funkt fast
This commit is contained in:
parent
d24e05841a
commit
5be395a113
File diff suppressed because one or more lines are too long
70
mine.py
70
mine.py
|
|
@ -6,19 +6,18 @@ import json
|
|||
from time import sleep
|
||||
import random
|
||||
|
||||
header_values = {
|
||||
'name': 'Michael Foord',
|
||||
'location': 'Northampton',
|
||||
'language': 'English',
|
||||
'User-Agent': 'Mozilla 4/0',
|
||||
'Accept-Encoding': 'gzip',
|
||||
'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
|
||||
'Upgrade-Insecure-Requests': '0',
|
||||
'Referrer': 'https://www.google.com/'
|
||||
}
|
||||
|
||||
def getLinks():
|
||||
header_values = {
|
||||
'name': 'Michael Foord',
|
||||
'location': 'Northampton',
|
||||
'language': 'English',
|
||||
'User-Agent': 'Mozilla 4/0',
|
||||
'Accept-Encoding': 'gzip',
|
||||
'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
|
||||
'Upgrade-Insecure-Requests': '0',
|
||||
'Referrer': 'https://www.google.com/'
|
||||
}
|
||||
|
||||
links = []
|
||||
with requests.Session() as session:
|
||||
root = "https://www.chefkoch.de/rs/s0/Rezepte.html"
|
||||
|
|
@ -26,9 +25,9 @@ def getLinks():
|
|||
tree = html.fromstring(site.content)
|
||||
|
||||
# converts: 344.621 Ergebnisse to int(344621)
|
||||
max = int(tree.xpath(
|
||||
'/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
|
||||
|
||||
#max = int(tree.xpath(
|
||||
# '/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
|
||||
max = 2000 # get 2000 recepies :)
|
||||
for i in range(0, max, 30):
|
||||
try:
|
||||
root = "https://www.chefkoch.de/rs/s" + \
|
||||
|
|
@ -56,8 +55,43 @@ def getLinks():
|
|||
print(links)
|
||||
return links
|
||||
|
||||
def getRecipe(links):
|
||||
recs = dict()
|
||||
with requests.Session() as session:
|
||||
for link in links:
|
||||
try:
|
||||
site = session.get(link, headers=header_values)
|
||||
tree = html.fromstring(site.content)
|
||||
|
||||
namePath = "/html/body/main/article[1]/div/div[2]/h1/text()"
|
||||
ingredPath = "/html/body/main/article[2]/table/tbody/tr/td/span/text()" # TODO: fix this
|
||||
recipPath = "/html/body/main/article[3]/div[1]/text()"
|
||||
|
||||
name = tree.xpath(namePath)[0]
|
||||
ingred = tree.xpath(ingredPath)
|
||||
resip = tree.xpath(recipPath)
|
||||
resString = ""
|
||||
for x in resip:
|
||||
resString += x + "\n\n"
|
||||
|
||||
ingredDict = dict()
|
||||
for i in range(0, len(ingred)-1, 2):
|
||||
ingredDict[ingred[i+1]] = ingred[i]
|
||||
recs[name] = [resString, ingredDict]
|
||||
|
||||
print("")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
sleep(10)
|
||||
|
||||
sleep(random.randint(0, 5))
|
||||
|
||||
#links = getLinks()
|
||||
#with open('./data/links.json', 'w') as file:
|
||||
# jsonString = json.dumps(links)
|
||||
# file.write(jsonString)
|
||||
with open('./data/links.json') as file:
|
||||
links = json.load(file)
|
||||
|
||||
getRecipe(links)
|
||||
|
||||
links = getLinks()
|
||||
with open('./data/links.json', 'w') as file:
|
||||
jsonString = json.dumps(links)
|
||||
file.write(jsonString)
|
||||
|
|
|
|||
Loading…
Reference in New Issue