90 lines
2.7 KiB
Python
90 lines
2.7 KiB
Python
|
|
import application.db2 as db
|
|
from flask import g
|
|
import nltk as nltk
|
|
from nltk.corpus import stopwords
|
|
import time
|
|
import heapq
|
|
from collections import Counter
|
|
import background.migrate
|
|
|
|
def search2(inputArr):
|
|
indx = {}
|
|
dbSession = db.Session()
|
|
for inpu in inputArr:
|
|
x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
|
|
db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
|
|
|
|
indx[inpu] = [str(y[1]) for y in x]
|
|
|
|
return(indx)
|
|
|
|
|
|
def stemInput(inputArr):
|
|
inputArr2 = []
|
|
|
|
snowball = nltk.SnowballStemmer(language='german')
|
|
stopset = set(stopwords.words('german'))
|
|
for word in inputArr:
|
|
if word in stopset:
|
|
continue
|
|
inputArr2.append(snowball.stem(word))
|
|
return inputArr2
|
|
#
|
|
|
|
def getRecDict2(indx, inputArr):
|
|
dbSession = db.Session()
|
|
|
|
outDict = {}
|
|
# 2d to 1d
|
|
indx = sum(indx.values(), [])
|
|
k = Counter(indx)
|
|
indx = k.most_common(1000)
|
|
indx = dict(indx)
|
|
|
|
ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
|
|
ingredDict = {}
|
|
for k,v, i in ingred:
|
|
if k not in ingredDict:
|
|
ingredDict[k] = {}
|
|
if i not in ingredDict[k]:
|
|
ingredDict[k][i] = []
|
|
|
|
ingredDict[k][i].append(v)
|
|
inputArr += defaultArr
|
|
for key, value in ingredDict.items():
|
|
overlay = calcOverlay2(inputArr, value)
|
|
while overlay in outDict.keys():
|
|
overlay -= 0.0001
|
|
outDict[overlay] = int(key)
|
|
|
|
outDict2 = {}
|
|
for key in heapq.nlargest(20, outDict.keys()):
|
|
key2 = outDict[key]
|
|
rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
|
|
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
|
|
db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
|
|
return outDict2
|
|
|
|
def stem(l1):
|
|
snowball = nltk.SnowballStemmer(language='german')
|
|
stopset = set(stopwords.words('german'))
|
|
stopset |= set("(),")
|
|
l1 = [snowball.stem(l) for l in l1]
|
|
return l1
|
|
|
|
def calcOverlay2(l1, l2):
|
|
counter = 0
|
|
for ll in l2.values():
|
|
for l in ll:
|
|
if l in l1:
|
|
counter += 1
|
|
break
|
|
|
|
counter = counter / len(l2)
|
|
return counter
|
|
|
|
# it is assumed that everyone has this
|
|
defaultArr = ["Wasser", "salz", "pfeffer"]
|
|
|