funkt fast

This commit is contained in:
Askill 2020-04-05 14:05:45 +02:00
parent d24e05841a
commit 5be395a113
2 changed files with 53 additions and 18 deletions

1
data/links.json Normal file

File diff suppressed because one or more lines are too long

70
mine.py
View File

@ -6,19 +6,18 @@ import json
from time import sleep
import random
header_values = {
'name': 'Michael Foord',
'location': 'Northampton',
'language': 'English',
'User-Agent': 'Mozilla 4/0',
'Accept-Encoding': 'gzip',
'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
'Upgrade-Insecure-Requests': '0',
'Referrer': 'https://www.google.com/'
}
def getLinks():
header_values = {
'name': 'Michael Foord',
'location': 'Northampton',
'language': 'English',
'User-Agent': 'Mozilla 4/0',
'Accept-Encoding': 'gzip',
'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
'Upgrade-Insecure-Requests': '0',
'Referrer': 'https://www.google.com/'
}
links = []
with requests.Session() as session:
root = "https://www.chefkoch.de/rs/s0/Rezepte.html"
@ -26,9 +25,9 @@ def getLinks():
tree = html.fromstring(site.content)
# converts: 344.621 Ergebnisse to int(344621)
max = int(tree.xpath(
'/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
#max = int(tree.xpath(
# '/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
max = 2000 # get 2000 recepies :)
for i in range(0, max, 30):
try:
root = "https://www.chefkoch.de/rs/s" + \
@ -56,8 +55,43 @@ def getLinks():
print(links)
return links
def getRecipe(links):
recs = dict()
with requests.Session() as session:
for link in links:
try:
site = session.get(link, headers=header_values)
tree = html.fromstring(site.content)
namePath = "/html/body/main/article[1]/div/div[2]/h1/text()"
ingredPath = "/html/body/main/article[2]/table/tbody/tr/td/span/text()" # TODO: fix this
recipPath = "/html/body/main/article[3]/div[1]/text()"
name = tree.xpath(namePath)[0]
ingred = tree.xpath(ingredPath)
resip = tree.xpath(recipPath)
resString = ""
for x in resip:
resString += x + "\n\n"
ingredDict = dict()
for i in range(0, len(ingred)-1, 2):
ingredDict[ingred[i+1]] = ingred[i]
recs[name] = [resString, ingredDict]
print("")
except Exception as e:
print(e)
sleep(10)
sleep(random.randint(0, 5))
#links = getLinks()
#with open('./data/links.json', 'w') as file:
# jsonString = json.dumps(links)
# file.write(jsonString)
with open('./data/links.json') as file:
links = json.load(file)
getRecipe(links)
links = getLinks()
with open('./data/links.json', 'w') as file:
jsonString = json.dumps(links)
file.write(jsonString)