Inverse-Rezeptsuche/app/application/search.py

121 lines
3.6 KiB
Python
Raw Normal View History

2020-04-26 21:44:25 +00:00
import application.db2 as db
from flask import g
import nltk as nltk
from nltk.corpus import stopwords
import time
import heapq
from collections import Counter
import background.migrate
2020-05-15 20:29:43 +00:00
from sqlalchemy import exists
2020-04-26 21:44:25 +00:00
def search2(inputArr):
indx = {}
dbSession = db.Session()
for inpu in inputArr:
x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
indx[inpu] = [str(y[1]) for y in x]
return(indx)
def stemInput(inputArr):
inputArr2 = []
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
for word in inputArr:
if word in stopset:
continue
inputArr2.append(snowball.stem(word))
return inputArr2
#
2020-04-26 21:44:25 +00:00
def getRecDict2(indx, inputArr):
dbSession = db.Session()
outDict = {}
# 2d to 1d
indx = sum(indx.values(), [])
k = Counter(indx)
indx = k.most_common(1000)
indx = dict(indx)
ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name).filter(
db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
2020-04-26 21:44:25 +00:00
ingredDict = {}
# RezeptID, stemmed Ingred, full ingred Name
# Dict spiegelt DB wieder, key, full ingred, stemmed
for k, v, i in ingred:
2020-04-26 21:44:25 +00:00
if k not in ingredDict:
ingredDict[k] = {}
if i not in ingredDict[k]:
ingredDict[k][i] = []
ingredDict[k][i].append(v)
2020-05-15 20:29:43 +00:00
ignored = []
for x in inputArr:
if not dbSession.query(exists().where(db.Trunk.name == x)).scalar():
ignored.append(x)
2020-04-26 21:44:25 +00:00
inputArr += defaultArr
# checks overlay per recipeID
# itareate over ingreds and checks per stemmed ingred
# returns accurate percentage of overlay
# since overlay scare is the key of dict it is reduced by insignificant number to preserve all values
2020-04-26 21:44:25 +00:00
for key, value in ingredDict.items():
overlay, missing = calcOverlay2(inputArr, value)
2020-04-26 21:44:25 +00:00
while overlay in outDict.keys():
overlay -= 0.0001
outDict[overlay] = (int(key), missing)
2020-05-15 20:29:43 +00:00
# return Dict with 20 highest value keys
2020-04-26 21:44:25 +00:00
outDict2 = {}
for key in heapq.nlargest(20, outDict.keys()):
key2 = outDict[key][0]
missing = outDict[key][1]
rec = dbSession.query(db.Recipe).filter(
db.Recipe.recipe_id == key2).first()
2020-04-26 21:44:25 +00:00
outDict2[key] = (key2, rec.name, rec.url, [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], missing)
2020-05-15 20:29:43 +00:00
outDict = {}
outDict["ignored"] = ignored
outDict["ingred"] = outDict2
return outDict
2020-04-26 21:44:25 +00:00
2020-04-26 21:44:25 +00:00
def stem(l1):
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
stopset |= set("(),")
l1 = [snowball.stem(l) for l in l1]
return l1
2020-04-26 21:44:25 +00:00
def calcOverlay2(l1, l2):
'''Calculates overlay and returns missing ingredients, [score (float), missing([])]'''
2020-04-26 21:44:25 +00:00
counter = 0
notIn = []
for key, ll in l2.items():
missing = True
2020-04-26 21:44:25 +00:00
for l in ll:
if l in l1:
counter += 1
missing = False
2020-04-26 21:44:25 +00:00
break
if missing:
notIn.append(key)
2020-04-26 21:44:25 +00:00
counter = counter / len(l2)
return counter, notIn
2020-04-26 21:44:25 +00:00
# it is assumed that everyone has this
defaultArr = ["Wasser", "salz", "pfeffer"]