added api

calcOverlay is waaaahhhaay to slow
This commit is contained in:
Askill 2020-04-17 22:29:50 +02:00
parent cfd9d0fcc8
commit 04a1774e74
12 changed files with 69 additions and 2069 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -11,7 +11,7 @@ from flask_sqlalchemy import SQLAlchemy
app = Flask(__name__) app = Flask(__name__)
api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger') api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')
api.add_resource(endpoints.Recipe,'/api/v1/recipe/') api.add_resource(endpoints.RecipeList,'/api/v1/recipe/')
@app.route("/") @app.route("/")
def index(): def index():

View File

@ -5,19 +5,24 @@ import application.config as config
import json import json
import base64 import base64
from application.db import Session, Recipe, Ingredient from application.db import Session, Recipe, Ingredient
import search
import migrate
class Recipe(Resource): class RecipeList(Resource):
def get(self): def get(self):
""" """ """ """
try:
parser = reqparse.RequestParser()
parser.add_argument('useFace', type=bool, required=False)
args = parser.parse_args()
session = Session() parser = reqparse.RequestParser()
parser.add_argument('ingred', type=str, action='append')
args = parser.parse_args()
ingreds = args["ingred"]
ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr]
indx = search.fastes(ingreds )
recs = search.getRecDict(indx, ingreds )
#print(recs)
return flask.make_response(flask.jsonify({'data': recs}), 200)
return flask.make_response(flask.jsonify({'data': args}), 200)
except Exception as e:
print("error: -", e)
return flask.make_response(flask.jsonify({'error': str(e)}), 400)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -44,4 +44,4 @@ def migrate(path):
counter+=1 counter+=1
print(counter/leng) print(counter/leng)
migrate('./data/recs.json') #migrate('./data/recs.json')

27
mine.py
View File

@ -34,8 +34,8 @@ def getLinks():
# converts: 344.621 Ergebnisse to int(344621) # converts: 344.621 Ergebnisse to int(344621)
#max = int(tree.xpath( #max = int(tree.xpath(
# '/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", "")) # '/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
max = 2000 # get 2000 recepies :) max = 10000 # get 2000 recepies :)
for i in range(0, max, 30): for i in range(2000, max, 30):
try: try:
root = "https://www.chefkoch.de/rs/s" + \ root = "https://www.chefkoch.de/rs/s" + \
str(i) + "/Rezepte.html" str(i) + "/Rezepte.html"
@ -57,7 +57,7 @@ def getLinks():
i -= 30 i -= 30
sleep(10) sleep(10)
sleep(random.randint(0, 5)) sleep(random.randint(1, 4))
print(links) print(links)
return links return links
@ -110,20 +110,21 @@ def getRecipe(links):
except: except:
amount = "" amount = ""
#print(stuff, amount) #print(stuff, amount)
a = Link(ingredient_amount=amount) #a = Link(ingredient_amount=amount)
a.ingredient = Ingredient(name=stuff) #a.ingredient = Ingredient(name=stuff)
r.ingredient.append(a) #r.ingredient.append(a)
dbSession.add(r) #dbSession.add(r)
dbSession.commit() #dbSession.commit()
ingredDict[stuff] = amount ingredDict[stuff] = amount
recs[name] = [resString, ingredDict, link, img.decode("utf-8")] recs[name] = [resString, ingredDict, link, img.decode("utf-8")]
print("") if counter % 20 == 0:
print(counter)
except Exception as e: except Exception as e:
print(traceback.format_exc()) print(traceback.format_exc())
print(format(counter/len(links), '.2f'), link) print(format(counter/len(links), '.2f'), link)
sleep(random.randint(0, 5)) sleep(random.randint(0, 6))
return recs return recs
@ -149,9 +150,9 @@ def stemIngred():
#with open('./data/links.json', 'w') as file: #with open('./data/links.json', 'w') as file:
# jsonString = json.dumps(links) # jsonString = json.dumps(links)
# file.write(jsonString) # file.write(jsonString)
links = "" #links = ""
with open('./data/links.json') as file: #with open('./data/links.json') as file:
links = json.load(file) # links = json.load(file)
#recs = getRecipe(links) #recs = getRecipe(links)

View File

@ -5,9 +5,6 @@ from nltk.corpus import stopwords
import time import time
dbSession = Session() dbSession = Session()
inputArr = ["butter", "milch", "eier", "mehl", "zucker"]
inputArr += ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
maxMissing = 10
def slow(): def slow():
recipes = dbSession.query(Recipe).all() recipes = dbSession.query(Recipe).all()
@ -40,7 +37,6 @@ def slow():
def faster(inputArr): def faster(inputArr):
indx = {} indx = {}
for inpu in inputArr: for inpu in inputArr:
ids = [] ids = []
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all(): for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
@ -49,7 +45,6 @@ def faster(inputArr):
indx[str(x[0])] += 1 indx[str(x[0])] += 1
return(indx) return(indx)
def fastes(inputArr): def fastes(inputArr):
@ -58,8 +53,6 @@ def fastes(inputArr):
for inpu in inputArr: for inpu in inputArr:
ids = [] ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all(): for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
if str(recipe_id[0]) not in indx: if str(recipe_id[0]) not in indx:
indx[str(recipe_id[0])] = 0 indx[str(recipe_id[0])] = 0
@ -72,22 +65,31 @@ def stemInput(inputArr):
snowball = nltk.SnowballStemmer(language='german') snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german')) stopset = set(stopwords.words('german'))
for word in inputArr: for word in inputArr:
if word in stopset: if word in stopset:
continue continue
inputArr2.append(snowball.stem(word)) inputArr2.append(snowball.stem(word))
return inputArr2 return inputArr2
# #
def printDict(indx):
def getRecDict(indx, inputArr):
outDict = {} outDict = {}
for key, value in sorted(indx.items()): for key, value in indx.items():
ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
return outDict
def printDict(indx, inputArr):
outDict = {}
for key, value in indx.items():
ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all() ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all()) outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
for key, value in outDict.items(): for key, value in sorted(outDict.items()):
if key >= 0.7: if key >= 0.3:
print(key, value)
print(key, value[0], value[1])
for xx in value[2]:
print("\t", xx[0])
def calcOverlay(l1, l2): def calcOverlay(l1, l2):
@ -100,25 +102,29 @@ def calcOverlay(l1, l2):
for x in l2: for x in l2:
for l in l1: for l in l1:
if l == x[0]: if l not in defaultArr and l == x[0]:
#print(l) #print(l)
counter +=1 counter +=1
counter = counter / len(l2) counter = counter / len(l2)
return counter return counter
#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
stemmed = stemInput(inputArr) defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
#inputArr += defaultArr
start = time.time() maxMissing = 10
indx = faster(stemmed) #
end = time.time() #stemmed = stemInput(inputArr)
printDict(indx) #
print("\n", end - start, "\n") #start = time.time()
#indx = faster(stemmed)
#end = time.time()
start = time.time() #printDict(indx)
indx = fastes(stemmed) #print("\n", end - start, "\n")
end = time.time() #
printDict(indx) #
print("\n", end - start, "\n") #start = time.time()
#indx = fastes(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")