2020-04-10 21:34:58 +00:00
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
from application.db import Session, Recipe, Ingredient, Trunk
|
2020-04-11 20:49:43 +00:00
|
|
|
import nltk as nltk
|
|
|
|
|
from nltk.corpus import stopwords
|
2020-04-15 10:54:02 +00:00
|
|
|
import time
|
2020-04-10 21:34:58 +00:00
|
|
|
|
|
|
|
|
dbSession = Session()
|
2020-04-15 12:23:58 +00:00
|
|
|
inputArr = ["butter", "milch", "eier", "mehl", "zucker"]
|
|
|
|
|
inputArr += ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
|
2020-04-15 12:16:28 +00:00
|
|
|
maxMissing = 10
|
2020-04-10 21:34:58 +00:00
|
|
|
|
|
|
|
|
def slow():
|
|
|
|
|
recipes = dbSession.query(Recipe).all()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
arr = {}
|
|
|
|
|
for recipe in recipes:
|
|
|
|
|
rec = recipe
|
|
|
|
|
recipe = recipe.ingredients()
|
2020-04-15 10:54:02 +00:00
|
|
|
if len(recipe) > len(inputArr) + maxMissing:
|
2020-04-10 22:42:37 +00:00
|
|
|
continue
|
2020-04-10 21:34:58 +00:00
|
|
|
counter = 0
|
|
|
|
|
for i in inputArr:
|
|
|
|
|
for x in recipe:
|
|
|
|
|
if i in x:
|
|
|
|
|
counter += 1
|
|
|
|
|
continue
|
|
|
|
|
counter = str(counter)
|
|
|
|
|
|
|
|
|
|
if counter not in arr:
|
|
|
|
|
arr[counter] = []
|
|
|
|
|
|
2020-04-15 10:54:02 +00:00
|
|
|
arr[counter].append(rec.ingredients())
|
2020-04-10 21:34:58 +00:00
|
|
|
#print(rec.name)
|
|
|
|
|
|
2020-04-15 10:54:02 +00:00
|
|
|
# for y, x in arr.items():
|
|
|
|
|
# for xx in x:
|
|
|
|
|
# print(xx)
|
2020-04-10 21:34:58 +00:00
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
def faster(inputArr):
|
2020-04-10 21:34:58 +00:00
|
|
|
indx = {}
|
2020-04-15 12:16:28 +00:00
|
|
|
|
|
|
|
|
|
2020-04-10 21:34:58 +00:00
|
|
|
for inpu in inputArr:
|
|
|
|
|
ids = []
|
2020-04-15 12:16:28 +00:00
|
|
|
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
|
|
|
|
|
if str(x[0]) not in indx:
|
|
|
|
|
indx[str(x[0])] = 0
|
2020-04-10 21:34:58 +00:00
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
indx[str(x[0])] += 1
|
2020-04-10 21:34:58 +00:00
|
|
|
|
|
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
return(indx)
|
2020-04-15 10:54:02 +00:00
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
def fastes(inputArr):
|
2020-04-15 10:54:02 +00:00
|
|
|
indx = {}
|
2020-04-15 12:16:28 +00:00
|
|
|
|
|
|
|
|
for inpu in inputArr:
|
|
|
|
|
ids = []
|
|
|
|
|
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if str(recipe_id[0]) not in indx:
|
|
|
|
|
indx[str(recipe_id[0])] = 0
|
|
|
|
|
|
|
|
|
|
indx[str(recipe_id[0])] += 1
|
|
|
|
|
return(indx)
|
|
|
|
|
|
|
|
|
|
def stemInput(inputArr):
|
2020-04-15 10:54:02 +00:00
|
|
|
inputArr2 = []
|
|
|
|
|
|
|
|
|
|
snowball = nltk.SnowballStemmer(language='german')
|
|
|
|
|
stopset = set(stopwords.words('german'))
|
|
|
|
|
for word in inputArr:
|
|
|
|
|
|
|
|
|
|
if word in stopset:
|
|
|
|
|
continue
|
|
|
|
|
inputArr2.append(snowball.stem(word))
|
|
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
return inputArr2
|
|
|
|
|
#
|
|
|
|
|
def printDict(indx):
|
2020-04-15 10:54:02 +00:00
|
|
|
outDict = {}
|
2020-04-15 12:16:28 +00:00
|
|
|
for key, value in sorted(indx.items()):
|
|
|
|
|
ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
|
|
|
|
|
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
|
|
|
|
|
|
|
|
|
|
for key, value in outDict.items():
|
2020-04-15 12:23:58 +00:00
|
|
|
if key >= 0.7:
|
2020-04-15 12:16:28 +00:00
|
|
|
print(key, value)
|
2020-04-15 10:54:02 +00:00
|
|
|
|
2020-04-11 20:49:43 +00:00
|
|
|
|
2020-04-15 10:54:02 +00:00
|
|
|
def calcOverlay(l1, l2):
|
|
|
|
|
snowball = nltk.SnowballStemmer(language='german')
|
2020-04-11 20:49:43 +00:00
|
|
|
stopset = set(stopwords.words('german'))
|
|
|
|
|
stopset |= set("(),")
|
|
|
|
|
|
2020-04-15 10:54:02 +00:00
|
|
|
l1 = [snowball.stem(l) for l in l1 ]
|
|
|
|
|
counter = 0
|
|
|
|
|
|
|
|
|
|
for x in l2:
|
2020-04-15 12:16:28 +00:00
|
|
|
for l in l1:
|
|
|
|
|
if l == x[0]:
|
|
|
|
|
#print(l)
|
|
|
|
|
counter +=1
|
|
|
|
|
counter = counter / len(l2)
|
2020-04-15 10:54:02 +00:00
|
|
|
return counter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2020-04-15 12:16:28 +00:00
|
|
|
stemmed = stemInput(inputArr)
|
2020-04-15 10:54:02 +00:00
|
|
|
|
|
|
|
|
start = time.time()
|
2020-04-15 12:16:28 +00:00
|
|
|
indx = faster(stemmed)
|
2020-04-15 10:54:02 +00:00
|
|
|
end = time.time()
|
2020-04-15 12:16:28 +00:00
|
|
|
printDict(indx)
|
2020-04-15 10:54:02 +00:00
|
|
|
print("\n", end - start, "\n")
|
|
|
|
|
|
2020-04-11 20:49:43 +00:00
|
|
|
|
2020-04-15 10:54:02 +00:00
|
|
|
start = time.time()
|
2020-04-15 12:16:28 +00:00
|
|
|
indx = fastes(stemmed)
|
2020-04-15 10:54:02 +00:00
|
|
|
end = time.time()
|
2020-04-15 12:16:28 +00:00
|
|
|
printDict(indx)
|
2020-04-15 10:54:02 +00:00
|
|
|
print("\n", end - start, "\n")
|