funkt fast
This commit is contained in:
parent
d24e05841a
commit
5be395a113
File diff suppressed because one or more lines are too long
56
mine.py
56
mine.py
|
|
@ -6,9 +6,7 @@ import json
|
||||||
from time import sleep
|
from time import sleep
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
header_values = {
|
||||||
def getLinks():
|
|
||||||
header_values = {
|
|
||||||
'name': 'Michael Foord',
|
'name': 'Michael Foord',
|
||||||
'location': 'Northampton',
|
'location': 'Northampton',
|
||||||
'language': 'English',
|
'language': 'English',
|
||||||
|
|
@ -17,8 +15,9 @@ def getLinks():
|
||||||
'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
|
'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
|
||||||
'Upgrade-Insecure-Requests': '0',
|
'Upgrade-Insecure-Requests': '0',
|
||||||
'Referrer': 'https://www.google.com/'
|
'Referrer': 'https://www.google.com/'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def getLinks():
|
||||||
links = []
|
links = []
|
||||||
with requests.Session() as session:
|
with requests.Session() as session:
|
||||||
root = "https://www.chefkoch.de/rs/s0/Rezepte.html"
|
root = "https://www.chefkoch.de/rs/s0/Rezepte.html"
|
||||||
|
|
@ -26,9 +25,9 @@ def getLinks():
|
||||||
tree = html.fromstring(site.content)
|
tree = html.fromstring(site.content)
|
||||||
|
|
||||||
# converts: 344.621 Ergebnisse to int(344621)
|
# converts: 344.621 Ergebnisse to int(344621)
|
||||||
max = int(tree.xpath(
|
#max = int(tree.xpath(
|
||||||
'/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
|
# '/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
|
||||||
|
max = 2000 # get 2000 recepies :)
|
||||||
for i in range(0, max, 30):
|
for i in range(0, max, 30):
|
||||||
try:
|
try:
|
||||||
root = "https://www.chefkoch.de/rs/s" + \
|
root = "https://www.chefkoch.de/rs/s" + \
|
||||||
|
|
@ -56,8 +55,43 @@ def getLinks():
|
||||||
print(links)
|
print(links)
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
def getRecipe(links):
|
||||||
|
recs = dict()
|
||||||
|
with requests.Session() as session:
|
||||||
|
for link in links:
|
||||||
|
try:
|
||||||
|
site = session.get(link, headers=header_values)
|
||||||
|
tree = html.fromstring(site.content)
|
||||||
|
|
||||||
|
namePath = "/html/body/main/article[1]/div/div[2]/h1/text()"
|
||||||
|
ingredPath = "/html/body/main/article[2]/table/tbody/tr/td/span/text()" # TODO: fix this
|
||||||
|
recipPath = "/html/body/main/article[3]/div[1]/text()"
|
||||||
|
|
||||||
|
name = tree.xpath(namePath)[0]
|
||||||
|
ingred = tree.xpath(ingredPath)
|
||||||
|
resip = tree.xpath(recipPath)
|
||||||
|
resString = ""
|
||||||
|
for x in resip:
|
||||||
|
resString += x + "\n\n"
|
||||||
|
|
||||||
|
ingredDict = dict()
|
||||||
|
for i in range(0, len(ingred)-1, 2):
|
||||||
|
ingredDict[ingred[i+1]] = ingred[i]
|
||||||
|
recs[name] = [resString, ingredDict]
|
||||||
|
|
||||||
|
print("")
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
sleep(10)
|
||||||
|
|
||||||
|
sleep(random.randint(0, 5))
|
||||||
|
|
||||||
|
#links = getLinks()
|
||||||
|
#with open('./data/links.json', 'w') as file:
|
||||||
|
# jsonString = json.dumps(links)
|
||||||
|
# file.write(jsonString)
|
||||||
|
with open('./data/links.json') as file:
|
||||||
|
links = json.load(file)
|
||||||
|
|
||||||
|
getRecipe(links)
|
||||||
|
|
||||||
links = getLinks()
|
|
||||||
with open('./data/links.json', 'w') as file:
|
|
||||||
jsonString = json.dumps(links)
|
|
||||||
file.write(jsonString)
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue