Inverse-Rezeptsuche/search.py


from application.db import Session, Recipe, Ingredient, Trunk
import nltk as nltk
from nltk.corpus import stopwords
import time
import heapq

dbSession = Session()

def slow():
    recipes = dbSession.query(Recipe).all()


    arr = {}
    for recipe in recipes:
        rec = recipe
        recipe = recipe.ingredients()
        if len(recipe) > len(inputArr) + maxMissing:
            continue
        counter = 0
        for i in inputArr:
            for x in recipe:
                if i in x:
                    counter += 1
                    continue
        counter = str(counter)

        if counter not in arr:
            arr[counter] = []
            
        arr[counter].append(rec.ingredients())
        #print(rec.name)
        
#    for y, x in arr.items():
#        for xx in x:
#            print(xx)

def faster(inputArr):
    indx = {}
 
    for inpu in inputArr:
        ids = [] 
        for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
            if str(x[0]) not in indx:
                indx[str(x[0])] = 0

            indx[str(x[0])] += 1
        
    return(indx)

def fastes(inputArr):
    indx = {}

    for inpu in inputArr:
        ids = [] 
        for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():           
            if str(recipe_id[0]) not in indx:
                indx[str(recipe_id[0])] = 0

            indx[str(recipe_id[0])] += 1
    return(indx) 
    
def stemInput(inputArr):
    inputArr2 = []

    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
             continue
        inputArr2.append(snowball.stem(word))
    return inputArr2
#

def getRecDict(indx, inputArr):
    #inputArr = stem(inputArr)
    outDict = {}
    for key, value in indx.items():
        ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
        outDict[calcOverlay(inputArr, ingred)] = int(key)
        
    outDict2 = {}
    for key in heapq.nlargest(10, outDict.keys()):
        key2 = outDict[key]
        outDict2[key] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first().name, key2, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key2).all())
    return outDict2

def printDict(indx, inputArr):
    outDict = getRecDict(indx, inputArr)
    for key, value in sorted(outDict.items()):
        if key >= 0.3:
           
            print(key, value[0], value[1])
            for xx in value[2]:
                print("\t", xx[0])
    

def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")

    l1 =  [snowball.stem(l) for l in l1]
    return l1

def calcOverlay(l1, l2):
    counter = 0

    for x in l2:
        for l in l1:
            if l not in defaultArr and l == x[0]:
                #print(l)
                counter +=1
    counter = counter / len(l2)                 
    return counter


#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
#inputArr += defaultArr
maxMissing = 10
#
#stemmed = stemInput(inputArr)
#
#start = time.time()
#indx = faster(stemmed)  
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")  
#
#
#start = time.time()
#indx = fastes(stemmed)
#end = time.time()
#printDict(indx)
#print("\n", end - start, "\n")
did stuff 2020-04-10 21:34:58 +00:00
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`from application.db import Session, Recipe, Ingredient, Trunk`
added stemmed table 2020-04-11 20:49:43 +00:00			`import nltk as nltk`
			`from nltk.corpus import stopwords`
added migrate 2020-04-15 10:54:02 +00:00			`import time`
optimization 2020-04-18 08:18:13 +00:00			`import heapq`
did stuff 2020-04-10 21:34:58 +00:00
			`dbSession = Session()`

			`def slow():`
			`recipes = dbSession.query(Recipe).all()`


			`arr = {}`
			`for recipe in recipes:`
			`rec = recipe`
			`recipe = recipe.ingredients()`
added migrate 2020-04-15 10:54:02 +00:00			`if len(recipe) > len(inputArr) + maxMissing:`
- 2020-04-10 22:42:37 +00:00			`continue`
did stuff 2020-04-10 21:34:58 +00:00			`counter = 0`
			`for i in inputArr:`
			`for x in recipe:`
			`if i in x:`
			`counter += 1`
			`continue`
			`counter = str(counter)`

			`if counter not in arr:`
			`arr[counter] = []`

added migrate 2020-04-15 10:54:02 +00:00			`arr[counter].append(rec.ingredients())`
did stuff 2020-04-10 21:34:58 +00:00			`#print(rec.name)`

added migrate 2020-04-15 10:54:02 +00:00			`# for y, x in arr.items():`
			`# for xx in x:`
			`# print(xx)`
did stuff 2020-04-10 21:34:58 +00:00
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`def faster(inputArr):`
did stuff 2020-04-10 21:34:58 +00:00			`indx = {}`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00
did stuff 2020-04-10 21:34:58 +00:00			`for inpu in inputArr:`
			`ids = []`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():`
			`if str(x[0]) not in indx:`
			`indx[str(x[0])] = 0`
did stuff 2020-04-10 21:34:58 +00:00
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`indx[str(x[0])] += 1`
did stuff 2020-04-10 21:34:58 +00:00
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`return(indx)`
added migrate 2020-04-15 10:54:02 +00:00
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`def fastes(inputArr):`
added migrate 2020-04-15 10:54:02 +00:00			`indx = {}`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00
			`for inpu in inputArr:`
			`ids = []`
added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00			`for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`if str(recipe_id[0]) not in indx:`
			`indx[str(recipe_id[0])] = 0`

			`indx[str(recipe_id[0])] += 1`
			`return(indx)`

			`def stemInput(inputArr):`
added migrate 2020-04-15 10:54:02 +00:00			`inputArr2 = []`

			`snowball = nltk.SnowballStemmer(language='german')`
			`stopset = set(stopwords.words('german'))`
			`for word in inputArr:`
			`if word in stopset:`
			`continue`
			`inputArr2.append(snowball.stem(word))`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`return inputArr2`
			`#`
added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00
			`def getRecDict(indx, inputArr):`
optimization 2020-04-18 08:18:13 +00:00			`#inputArr = stem(inputArr)`
added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00			`outDict = {}`
			`for key, value in indx.items():`
			`ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()`
optimization 2020-04-18 08:18:13 +00:00			`outDict[calcOverlay(inputArr, ingred)] = int(key)`

			`outDict2 = {}`
			`for key in heapq.nlargest(10, outDict.keys()):`
			`key2 = outDict[key]`
			`outDict2[key] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first().name, key2, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key2).all())`
			`return outDict2`
added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00
			`def printDict(indx, inputArr):`
optimization 2020-04-18 08:18:13 +00:00			`outDict = getRecDict(indx, inputArr)`
added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00			`for key, value in sorted(outDict.items()):`
			`if key >= 0.3:`

			`print(key, value[0], value[1])`
			`for xx in value[2]:`
			`print("\t", xx[0])`
added migrate 2020-04-15 10:54:02 +00:00
added stemmed table 2020-04-11 20:49:43 +00:00
optimization 2020-04-18 08:18:13 +00:00			`def stem(l1):`
added migrate 2020-04-15 10:54:02 +00:00			`snowball = nltk.SnowballStemmer(language='german')`
added stemmed table 2020-04-11 20:49:43 +00:00			`stopset = set(stopwords.words('german'))`
			`stopset \|= set("(),")`

optimization 2020-04-18 08:18:13 +00:00			`l1 = [snowball.stem(l) for l in l1]`
			`return l1`

			`def calcOverlay(l1, l2):`
added migrate 2020-04-15 10:54:02 +00:00			`counter = 0`

			`for x in l2:`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`for l in l1:`
added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00			`if l not in defaultArr and l == x[0]:`
sorted by overlap works, much simpler db model 2020-04-15 12:16:28 +00:00			`#print(l)`
			`counter +=1`
			`counter = counter / len(l2)`
added migrate 2020-04-15 10:54:02 +00:00			`return counter`


added api calcOverlay is waaaahhhaay to slow 2020-04-17 20:29:50 +00:00			`#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]`
			`defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this`
			`#inputArr += defaultArr`
			`maxMissing = 10`
			`#`
			`#stemmed = stemInput(inputArr)`
			`#`
			`#start = time.time()`
			`#indx = faster(stemmed)`
			`#end = time.time()`
			`#printDict(indx)`
			`#print("\n", end - start, "\n")`
			`#`
			`#`
			`#start = time.time()`
			`#indx = fastes(stemmed)`
			`#end = time.time()`
			`#printDict(indx)`
			`#print("\n", end - start, "\n")`