new workflow works

This commit is contained in:
Askill 2020-04-25 18:53:32 +02:00
parent d145ecee55
commit df22bb7f61
6 changed files with 56 additions and 83 deletions

Binary file not shown.

Binary file not shown.

View File

@ -20,15 +20,15 @@ class RecipeList(Resource):
args = parser.parse_args() args = parser.parse_args()
ingreds = args["ingred"] ingreds = args["ingred"]
ingreds = [migrate.stem(ingred)[0] for ingred in ingreds + search.defaultArr] ingreds = [migrate.stem(ingred)[0] for ingred in ingreds]
start = time.time() start = time.time()
indx = search.search2(ingreds) indx = search.search2(ingreds)
end = time.time() end = time.time()
print("get recipes",end - start, "\n") print("get recipes",end - start, "\n")
start = time.time() #start = time.time()
recs = search.getRecDict(indx, ingreds) recs = search.getRecDict2(indx, ingreds)
end = time.time() end = time.time()
print("calc overlay",end - start, "\n") print("calc overlay",end - start, "\n")

View File

@ -58,7 +58,7 @@ function renderRecipeList(data){
) )
recString = ` recString = `
<a href="${data1[2]}"> <a href="${data1[2]}" target="_blank">
<div class="card text-white bg-primary mb-3" style="max-width: 100%"> <div class="card text-white bg-primary mb-3" style="max-width: 100%">
<div class="card-body recipe-container"> <div class="card-body recipe-container">
<div class="row"> <div class="row">

131
search.py
View File

@ -6,34 +6,19 @@ import nltk as nltk
from nltk.corpus import stopwords from nltk.corpus import stopwords
import time import time
import heapq import heapq
from collections import Counter from collections import Counter
import migrate
def fastes(inputArr):
indx = {}
dbSession = g.session
for inpu in inputArr:
ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
if str(recipe_id[0]) not in indx:
indx[str(recipe_id[0])] = 0
indx[str(recipe_id[0])] += 1
return(indx)
def search2(inputArr): def search2(inputArr):
indx = {} indx = {}
dbSession = db.Session() dbSession = db.Session()
for inpu in inputArr: for inpu in inputArr:
ids = [] x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
for recipe in dbSession.query(db.Trunk.ingredients.recipe).filter(db.Trunk.name == inpu).all(): db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
if str(recipe.recipe_id) not in indx:
indx[str(recipe.recipe_id)] = 0
indx[str(recipe.recipe_id)] += 1 indx[inpu] = [str(y[1]) for y in x]
return(indx)
return(indx)
def stemInput(inputArr): def stemInput(inputArr):
@ -43,75 +28,63 @@ def stemInput(inputArr):
stopset = set(stopwords.words('german')) stopset = set(stopwords.words('german'))
for word in inputArr: for word in inputArr:
if word in stopset: if word in stopset:
continue continue
inputArr2.append(snowball.stem(word)) inputArr2.append(snowball.stem(word))
return inputArr2 return inputArr2
# #
def getRecDict(indx, inputArr): def getRecDict2(indx, inputArr):
dbSession = g.session dbSession = db.Session()
outDict = {}
k = Counter(indx)
# Finding 1000 highest values TODO: this is not correct
indx = k.most_common(1000)
indx = dict(indx)
for key, value in indx.items():
ingred = [x[0] for x in dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()]
outDict[calcOverlay(inputArr, ingred)] = int(key)
outDict2 = {}
for key in heapq.nlargest(10, outDict.keys()):
key2 = outDict[key]
rec = dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first()
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(Ingredient.name, Ingredient.ingredient_amount).filter(Ingredient.recipe_id==key2).all()], rec.img.decode('utf-8'))
return outDict2
def printDict(indx, inputArr): outDict = {}
outDict = getRecDict(indx, inputArr) # 2d to 1d
for key, value in sorted(outDict.items()): indx = sum(indx.values(), [])
if key >= 0.3: k = Counter(indx)
indx = k.most_common(1000)
print(key, value[0], value[1]) indx = dict(indx)
for xx in value[2]:
print("\t", xx[0]) ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
ingredDict = {}
for k,v, i in ingred:
if k not in ingredDict:
ingredDict[k] = {}
if i not in ingredDict[k]:
ingredDict[k][i] = []
ingredDict[k][i].append(v)
inputArr += defaultArr
for key, value in ingredDict.items():
overlay = calcOverlay2(inputArr, value)
while overlay in outDict.keys():
overlay -= 0.0001
outDict[overlay] = int(key)
outDict2 = {}
for key in heapq.nlargest(20, outDict.keys()):
key2 = outDict[key]
rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
return outDict2
def stem(l1): def stem(l1):
snowball = nltk.SnowballStemmer(language='german') snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german')) stopset = set(stopwords.words('german'))
stopset |= set("(),") stopset |= set("(),")
l1 = [snowball.stem(l) for l in l1]
l1 = [snowball.stem(l) for l in l1]
return l1 return l1
def calcOverlay(l1, l2): def calcOverlay2(l1, l2):
counter = 0 counter = 0
for l in l1: for ll in l2.values():
if l not in defaultArr: for l in ll:
if l in l2: if l in l1:
#print(l) counter += 1
counter +=1 break
counter = counter / len(l2)
counter = counter / len(l2)
return counter return counter
# it is assumed that everyone has this
#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"] defaultArr = ["Wasser", "salz", "pfeffer"]
defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this defaultArr = stem(defaultArr)
#inputArr += defaultArr
maxMissing = 10
#
#stemmed = stemInput(inputArr)
#
#start = time.time()
#indx = faster(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")
#
#
#start = time.time()
#indx = fastes(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")