new workflow works

This commit is contained in:
Askill 2020-04-25 18:53:32 +02:00
parent d145ecee55
commit df22bb7f61
6 changed files with 56 additions and 83 deletions

Binary file not shown.

Binary file not shown.

View File

@ -20,15 +20,15 @@ class RecipeList(Resource):
args = parser.parse_args() args = parser.parse_args()
ingreds = args["ingred"] ingreds = args["ingred"]
ingreds = [migrate.stem(ingred)[0] for ingred in ingreds + search.defaultArr] ingreds = [migrate.stem(ingred)[0] for ingred in ingreds]
start = time.time() start = time.time()
indx = search.search2(ingreds) indx = search.search2(ingreds)
end = time.time() end = time.time()
print("get recipes",end - start, "\n") print("get recipes",end - start, "\n")
start = time.time() #start = time.time()
recs = search.getRecDict(indx, ingreds) recs = search.getRecDict2(indx, ingreds)
end = time.time() end = time.time()
print("calc overlay",end - start, "\n") print("calc overlay",end - start, "\n")

View File

@ -58,7 +58,7 @@ function renderRecipeList(data){
) )
recString = ` recString = `
<a href="${data1[2]}"> <a href="${data1[2]}" target="_blank">
<div class="card text-white bg-primary mb-3" style="max-width: 100%"> <div class="card text-white bg-primary mb-3" style="max-width: 100%">
<div class="card-body recipe-container"> <div class="card-body recipe-container">
<div class="row"> <div class="row">

107
search.py
View File

@ -7,32 +7,17 @@ from nltk.corpus import stopwords
import time import time
import heapq import heapq
from collections import Counter from collections import Counter
import migrate
def fastes(inputArr):
indx = {}
dbSession = g.session
for inpu in inputArr:
ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
if str(recipe_id[0]) not in indx:
indx[str(recipe_id[0])] = 0
indx[str(recipe_id[0])] += 1
return(indx)
def search2(inputArr): def search2(inputArr):
indx = {} indx = {}
dbSession = db.Session() dbSession = db.Session()
for inpu in inputArr: for inpu in inputArr:
ids = [] x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
for recipe in dbSession.query(db.Trunk.ingredients.recipe).filter(db.Trunk.name == inpu).all(): db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
if str(recipe.recipe_id) not in indx: indx[inpu] = [str(y[1]) for y in x]
indx[str(recipe.recipe_id)] = 0
indx[str(recipe.recipe_id)] += 1
return(indx) return(indx)
@ -43,75 +28,63 @@ def stemInput(inputArr):
stopset = set(stopwords.words('german')) stopset = set(stopwords.words('german'))
for word in inputArr: for word in inputArr:
if word in stopset: if word in stopset:
continue continue
inputArr2.append(snowball.stem(word)) inputArr2.append(snowball.stem(word))
return inputArr2 return inputArr2
# #
def getRecDict(indx, inputArr): def getRecDict2(indx, inputArr):
dbSession = g.session dbSession = db.Session()
outDict = {} outDict = {}
# 2d to 1d
indx = sum(indx.values(), [])
k = Counter(indx) k = Counter(indx)
# Finding 1000 highest values TODO: this is not correct
indx = k.most_common(1000) indx = k.most_common(1000)
indx = dict(indx) indx = dict(indx)
for key, value in indx.items():
ingred = [x[0] for x in dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()] ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
outDict[calcOverlay(inputArr, ingred)] = int(key) ingredDict = {}
for k,v, i in ingred:
if k not in ingredDict:
ingredDict[k] = {}
if i not in ingredDict[k]:
ingredDict[k][i] = []
ingredDict[k][i].append(v)
inputArr += defaultArr
for key, value in ingredDict.items():
overlay = calcOverlay2(inputArr, value)
while overlay in outDict.keys():
overlay -= 0.0001
outDict[overlay] = int(key)
outDict2 = {} outDict2 = {}
for key in heapq.nlargest(10, outDict.keys()): for key in heapq.nlargest(20, outDict.keys()):
key2 = outDict[key] key2 = outDict[key]
rec = dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first() rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(Ingredient.name, Ingredient.ingredient_amount).filter(Ingredient.recipe_id==key2).all()], rec.img.decode('utf-8')) outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
return outDict2 return outDict2
def printDict(indx, inputArr):
outDict = getRecDict(indx, inputArr)
for key, value in sorted(outDict.items()):
if key >= 0.3:
print(key, value[0], value[1])
for xx in value[2]:
print("\t", xx[0])
def stem(l1): def stem(l1):
snowball = nltk.SnowballStemmer(language='german') snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german')) stopset = set(stopwords.words('german'))
stopset |= set("(),") stopset |= set("(),")
l1 = [snowball.stem(l) for l in l1]
l1 = [snowball.stem(l) for l in l1]
return l1 return l1
def calcOverlay(l1, l2): def calcOverlay2(l1, l2):
counter = 0 counter = 0
for l in l1: for ll in l2.values():
if l not in defaultArr: for l in ll:
if l in l2: if l in l1:
#print(l) counter += 1
counter +=1 break
counter = counter / len(l2) counter = counter / len(l2)
return counter return counter
# it is assumed that everyone has this
#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"] defaultArr = ["Wasser", "salz", "pfeffer"]
defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this defaultArr = stem(defaultArr)
#inputArr += defaultArr
maxMissing = 10
#
#stemmed = stemInput(inputArr)
#
#start = time.time()
#indx = faster(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")
#
#
#start = time.time()
#indx = fastes(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")