Inverse-Rezeptsuche/test.py

132 lines
3.5 KiB
Python
Raw Normal View History

2020-04-10 21:34:58 +00:00
2020-04-11 20:49:43 +00:00
from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk
from nltk.corpus import stopwords
2020-04-15 10:54:02 +00:00
import time
2020-04-10 21:34:58 +00:00
dbSession = Session()
2020-04-15 10:54:02 +00:00
inputArr = ["butter", "milch", "eier", "käse"]
maxMissing = 4
2020-04-10 21:34:58 +00:00
def slow():
recipes = dbSession.query(Recipe).all()
arr = {}
for recipe in recipes:
rec = recipe
recipe = recipe.ingredients()
2020-04-15 10:54:02 +00:00
if len(recipe) > len(inputArr) + maxMissing:
2020-04-10 22:42:37 +00:00
continue
2020-04-10 21:34:58 +00:00
counter = 0
for i in inputArr:
for x in recipe:
if i in x:
counter += 1
continue
counter = str(counter)
if counter not in arr:
arr[counter] = []
2020-04-15 10:54:02 +00:00
arr[counter].append(rec.ingredients())
2020-04-10 21:34:58 +00:00
#print(rec.name)
2020-04-15 10:54:02 +00:00
# for y, x in arr.items():
# for xx in x:
# print(xx)
2020-04-10 21:34:58 +00:00
def faster():
indx = {}
for inpu in inputArr:
ids = []
for x in dbSession.query(Ingredient).filter(Ingredient.name.contains(inpu)).all():
2020-04-10 22:42:37 +00:00
2020-04-10 21:34:58 +00:00
for y in x.recipe:
2020-04-10 22:42:37 +00:00
2020-04-15 10:54:02 +00:00
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
2020-04-10 22:42:37 +00:00
continue
if str(y.recipe_id) not in indx:
indx[str(y.recipe_id)] = 0
2020-04-10 21:34:58 +00:00
2020-04-10 22:42:37 +00:00
indx[str(y.recipe_id)] += 1
2020-04-10 21:34:58 +00:00
2020-04-15 10:54:02 +00:00
outDict = {}
2020-04-10 21:34:58 +00:00
for key, value in indx.items():
2020-04-15 10:54:02 +00:00
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
print(outDict)
def fastes():
indx = {}
inputArr2 = []
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
for word in inputArr:
if word in stopset:
continue
inputArr2.append(snowball.stem(word))
for inpu in inputArr2:
ids = []
for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
for y in x.recipe:
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
continue
if str(y.recipe_id) not in indx:
indx[str(y.recipe_id)] = 0
2020-04-11 20:49:43 +00:00
2020-04-15 10:54:02 +00:00
indx[str(y.recipe_id)] += 1
outDict = {}
for key, value in indx.items():
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
print(outDict)
2020-04-11 20:49:43 +00:00
#
2020-04-15 10:54:02 +00:00
def calcOverlay(l1, l2):
snowball = nltk.SnowballStemmer(language='german')
2020-04-11 20:49:43 +00:00
stopset = set(stopwords.words('german'))
stopset |= set("(),")
2020-04-15 10:54:02 +00:00
l1 = [snowball.stem(l) for l in l1 ]
counter = 0
for x in l2:
for token in nltk.word_tokenize(x):
if token in stopset:
2020-04-11 20:49:43 +00:00
continue
stemmed = snowball.stem(token)
2020-04-15 10:54:02 +00:00
for l in l1:
if l == stemmed:
counter +=1
return counter
#
start = time.time()
slow()
end = time.time()
print("\n", end - start, "\n")
start = time.time()
faster()
end = time.time()
print("\n", end - start, "\n")
2020-04-11 20:49:43 +00:00
2020-04-15 10:54:02 +00:00
start = time.time()
fastes()
end = time.time()
print("\n", end - start, "\n")