Inverse-Rezeptsuche/search.py

136 lines
3.4 KiB
Python
Raw Normal View History

2020-04-10 21:34:58 +00:00
from application.db import Session, Recipe, Ingredient, Trunk
2020-04-11 20:49:43 +00:00
import nltk as nltk
from nltk.corpus import stopwords
2020-04-15 10:54:02 +00:00
import time
2020-04-18 08:18:13 +00:00
import heapq
2020-04-10 21:34:58 +00:00
dbSession = Session()
def slow():
recipes = dbSession.query(Recipe).all()
arr = {}
for recipe in recipes:
rec = recipe
recipe = recipe.ingredients()
2020-04-15 10:54:02 +00:00
if len(recipe) > len(inputArr) + maxMissing:
2020-04-10 22:42:37 +00:00
continue
2020-04-10 21:34:58 +00:00
counter = 0
for i in inputArr:
for x in recipe:
if i in x:
counter += 1
continue
counter = str(counter)
if counter not in arr:
arr[counter] = []
2020-04-15 10:54:02 +00:00
arr[counter].append(rec.ingredients())
2020-04-10 21:34:58 +00:00
#print(rec.name)
2020-04-15 10:54:02 +00:00
# for y, x in arr.items():
# for xx in x:
# print(xx)
2020-04-10 21:34:58 +00:00
def faster(inputArr):
2020-04-10 21:34:58 +00:00
indx = {}
2020-04-10 21:34:58 +00:00
for inpu in inputArr:
ids = []
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
if str(x[0]) not in indx:
indx[str(x[0])] = 0
2020-04-10 21:34:58 +00:00
indx[str(x[0])] += 1
2020-04-10 21:34:58 +00:00
return(indx)
2020-04-15 10:54:02 +00:00
def fastes(inputArr):
2020-04-15 10:54:02 +00:00
indx = {}
for inpu in inputArr:
ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
if str(recipe_id[0]) not in indx:
indx[str(recipe_id[0])] = 0
indx[str(recipe_id[0])] += 1
return(indx)
def stemInput(inputArr):
2020-04-15 10:54:02 +00:00
inputArr2 = []
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
for word in inputArr:
if word in stopset:
continue
inputArr2.append(snowball.stem(word))
return inputArr2
#
def getRecDict(indx, inputArr):
2020-04-18 08:18:13 +00:00
#inputArr = stem(inputArr)
outDict = {}
for key, value in indx.items():
ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
2020-04-18 08:18:13 +00:00
outDict[calcOverlay(inputArr, ingred)] = int(key)
outDict2 = {}
for key in heapq.nlargest(10, outDict.keys()):
key2 = outDict[key]
outDict2[key] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first().name, key2, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key2).all())
return outDict2
def printDict(indx, inputArr):
2020-04-18 08:18:13 +00:00
outDict = getRecDict(indx, inputArr)
for key, value in sorted(outDict.items()):
if key >= 0.3:
print(key, value[0], value[1])
for xx in value[2]:
print("\t", xx[0])
2020-04-15 10:54:02 +00:00
2020-04-11 20:49:43 +00:00
2020-04-18 08:18:13 +00:00
def stem(l1):
2020-04-15 10:54:02 +00:00
snowball = nltk.SnowballStemmer(language='german')
2020-04-11 20:49:43 +00:00
stopset = set(stopwords.words('german'))
stopset |= set("(),")
2020-04-18 08:18:13 +00:00
l1 = [snowball.stem(l) for l in l1]
return l1
def calcOverlay(l1, l2):
2020-04-15 10:54:02 +00:00
counter = 0
for x in l2:
for l in l1:
if l not in defaultArr and l == x[0]:
#print(l)
counter +=1
counter = counter / len(l2)
2020-04-15 10:54:02 +00:00
return counter
#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
#inputArr += defaultArr
maxMissing = 10
#
#stemmed = stemInput(inputArr)
#
#start = time.time()
#indx = faster(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")
#
#
#start = time.time()
#indx = fastes(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")