encoding works
This commit is contained in:
parent
0992a8a702
commit
74ef03ae4b
2013
data/links.json
2013
data/links.json
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
36
mine.py
36
mine.py
|
|
@ -1,10 +1,11 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
from time import sleep
|
from time import sleep
|
||||||
import random
|
import random
|
||||||
|
import traceback
|
||||||
|
|
||||||
header_values = {
|
header_values = {
|
||||||
'name': 'Michael Foord',
|
'name': 'Michael Foord',
|
||||||
|
|
@ -58,7 +59,9 @@ def getLinks():
|
||||||
def getRecipe(links):
|
def getRecipe(links):
|
||||||
recs = dict()
|
recs = dict()
|
||||||
with requests.Session() as session:
|
with requests.Session() as session:
|
||||||
for link in links:
|
counter = 0
|
||||||
|
for link in links[:1]:
|
||||||
|
counter += 1
|
||||||
try:
|
try:
|
||||||
site = session.get(link, headers=header_values)
|
site = session.get(link, headers=header_values)
|
||||||
tree = html.fromstring(site.content)
|
tree = html.fromstring(site.content)
|
||||||
|
|
@ -78,28 +81,27 @@ def getRecipe(links):
|
||||||
ingredDict = {}
|
ingredDict = {}
|
||||||
for i in range(0, len(ingred)-1, 2):
|
for i in range(0, len(ingred)-1, 2):
|
||||||
#print(ingred[i+1][0].text)
|
#print(ingred[i+1][0].text)
|
||||||
if ingred[i+1][0].text is not None:
|
if ingred[i+1][0] is not None:
|
||||||
if ingred[i+1][0].text is None:
|
if ingred[i+1][0].text is None:
|
||||||
stuff = ingred[i+1][0][0].text.strip().replace(" ", "")
|
textFromLink = ingred[i+1][0][0].text.strip().replace(" ", "")
|
||||||
|
#print(textFromLink)
|
||||||
|
stuff = textFromLink
|
||||||
else:
|
else:
|
||||||
stuff = ingred[i+1][0].text.strip().replace(" ", "")
|
stuff = ingred[i+1][0].text.strip().replace(" ", "")
|
||||||
else:
|
|
||||||
stuff = ""
|
|
||||||
|
|
||||||
if ingred[i][0].text is not None:
|
if ingred[i] is not None:
|
||||||
if ingred[i][0].text is None:
|
try:
|
||||||
amount = ingred[i][0][0].text.strip().replace(" ", "")
|
|
||||||
else:
|
|
||||||
amount = ingred[i][0].text.strip().replace(" ", "")
|
amount = ingred[i][0].text.strip().replace(" ", "")
|
||||||
else:
|
except:
|
||||||
amount = ""
|
amount = ""
|
||||||
|
#print(stuff, amount)
|
||||||
ingredDict[stuff] = amount
|
ingredDict[stuff] = amount
|
||||||
recs[name] = [resString, ingredDict]
|
recs[name] = [resString, ingredDict, link]
|
||||||
print("")
|
print("")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(traceback.format_exc())
|
||||||
|
|
||||||
print(link)
|
print(format(counter/100, '.2f'), link)
|
||||||
sleep(random.randint(0, 5))
|
sleep(random.randint(0, 5))
|
||||||
return recs
|
return recs
|
||||||
|
|
||||||
|
|
@ -114,6 +116,6 @@ with open('./data/links.json') as file:
|
||||||
|
|
||||||
recs = getRecipe(links)
|
recs = getRecipe(links)
|
||||||
|
|
||||||
with open('./data/recs.json', 'w') as file:
|
with open('./data/recs.json', 'w', encoding="utf-8") as file:
|
||||||
jsonString = json.dumps(recs)
|
json.dump(recs, file, ensure_ascii=False)
|
||||||
file.write(jsonString)
|
|
||||||
Loading…
Reference in New Issue