new workflow works

This commit is contained in:
Askill 2020-04-25 18:53:32 +02:00
parent d145ecee55
commit df22bb7f61
6 changed files with 56 additions and 83 deletions

Binary file not shown.

Binary file not shown.

View File

@ -20,15 +20,15 @@ class RecipeList(Resource):
args = parser.parse_args()
ingreds = args["ingred"]
ingreds = [migrate.stem(ingred)[0] for ingred in ingreds + search.defaultArr]
ingreds = [migrate.stem(ingred)[0] for ingred in ingreds]
start = time.time()
indx = search.search2(ingreds)
end = time.time()
print("get recipes",end - start, "\n")
start = time.time()
recs = search.getRecDict(indx, ingreds)
#start = time.time()
recs = search.getRecDict2(indx, ingreds)
end = time.time()
print("calc overlay",end - start, "\n")

View File

@ -58,7 +58,7 @@ function renderRecipeList(data){
)
recString = `
<a href="${data1[2]}">
<a href="${data1[2]}" target="_blank">
<div class="card text-white bg-primary mb-3" style="max-width: 100%">
<div class="card-body recipe-container">
<div class="row">

131
search.py
View File

@ -6,34 +6,19 @@ import nltk as nltk
from nltk.corpus import stopwords
import time
import heapq
from collections import Counter
from collections import Counter
import migrate
def fastes(inputArr):
indx = {}
dbSession = g.session
for inpu in inputArr:
ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
if str(recipe_id[0]) not in indx:
indx[str(recipe_id[0])] = 0
indx[str(recipe_id[0])] += 1
return(indx)
def search2(inputArr):
indx = {}
dbSession = db.Session()
for inpu in inputArr:
ids = []
for recipe in dbSession.query(db.Trunk.ingredients.recipe).filter(db.Trunk.name == inpu).all():
if str(recipe.recipe_id) not in indx:
indx[str(recipe.recipe_id)] = 0
x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
indx[str(recipe.recipe_id)] += 1
return(indx)
indx[inpu] = [str(y[1]) for y in x]
return(indx)
def stemInput(inputArr):
@ -43,75 +28,63 @@ def stemInput(inputArr):
stopset = set(stopwords.words('german'))
for word in inputArr:
if word in stopset:
continue
continue
inputArr2.append(snowball.stem(word))
return inputArr2
#
def getRecDict(indx, inputArr):
dbSession = g.session
outDict = {}
k = Counter(indx)
# Finding 1000 highest values TODO: this is not correct
indx = k.most_common(1000)
indx = dict(indx)
for key, value in indx.items():
ingred = [x[0] for x in dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()]
outDict[calcOverlay(inputArr, ingred)] = int(key)
outDict2 = {}
for key in heapq.nlargest(10, outDict.keys()):
key2 = outDict[key]
rec = dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first()
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(Ingredient.name, Ingredient.ingredient_amount).filter(Ingredient.recipe_id==key2).all()], rec.img.decode('utf-8'))
return outDict2
def getRecDict2(indx, inputArr):
dbSession = db.Session()
def printDict(indx, inputArr):
outDict = getRecDict(indx, inputArr)
for key, value in sorted(outDict.items()):
if key >= 0.3:
print(key, value[0], value[1])
for xx in value[2]:
print("\t", xx[0])
outDict = {}
# 2d to 1d
indx = sum(indx.values(), [])
k = Counter(indx)
indx = k.most_common(1000)
indx = dict(indx)
ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
ingredDict = {}
for k,v, i in ingred:
if k not in ingredDict:
ingredDict[k] = {}
if i not in ingredDict[k]:
ingredDict[k][i] = []
ingredDict[k][i].append(v)
inputArr += defaultArr
for key, value in ingredDict.items():
overlay = calcOverlay2(inputArr, value)
while overlay in outDict.keys():
overlay -= 0.0001
outDict[overlay] = int(key)
outDict2 = {}
for key in heapq.nlargest(20, outDict.keys()):
key2 = outDict[key]
rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
return outDict2
def stem(l1):
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
stopset |= set("(),")
l1 = [snowball.stem(l) for l in l1]
l1 = [snowball.stem(l) for l in l1]
return l1
def calcOverlay(l1, l2):
def calcOverlay2(l1, l2):
counter = 0
for l in l1:
if l not in defaultArr:
if l in l2:
#print(l)
counter +=1
counter = counter / len(l2)
for ll in l2.values():
for l in ll:
if l in l1:
counter += 1
break
counter = counter / len(l2)
return counter
#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
#inputArr += defaultArr
maxMissing = 10
#
#stemmed = stemInput(inputArr)
#
#start = time.time()
#indx = faster(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")
#
#
#start = time.time()
#indx = fastes(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")
# it is assumed that everyone has this
defaultArr = ["Wasser", "salz", "pfeffer"]
defaultArr = stem(defaultArr)