Inverse-Rezeptsuche/test.py


from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk
from nltk.corpus import stopwords
import time

dbSession = Session()
inputArr = ["butter", "milch", "eier", "käse"] 
maxMissing = 4

def slow():
    recipes = dbSession.query(Recipe).all()


    arr = {}
    for recipe in recipes:
        rec = recipe
        recipe = recipe.ingredients()
        if len(recipe) > len(inputArr) + maxMissing:
            continue
        counter = 0
        for i in inputArr:
            for x in recipe:
                if i in x:
                    counter += 1
                    continue
        counter = str(counter)

        if counter not in arr:
            arr[counter] = []
            
        arr[counter].append(rec.ingredients())
        #print(rec.name)
        
#    for y, x in arr.items():
#        for xx in x:
#            print(xx)

def faster():
    indx = {}
    for inpu in inputArr:
        ids = [] 
        for x in dbSession.query(Ingredient).filter(Ingredient.name.contains(inpu)).all():

            for y in x.recipe:
                
                if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
                    continue   
                if str(y.recipe_id) not in indx:
                    indx[str(y.recipe_id)] = 0

                indx[str(y.recipe_id)] += 1
        

    outDict = {}
    for key, value in indx.items():
        ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
    
    print(outDict)


def fastes():
    indx = {}
    inputArr2 = []

    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:

        if word in stopset:
             continue
        inputArr2.append(snowball.stem(word))

    for inpu in inputArr2:
        ids = [] 
        for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
            for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
                for y in x.recipe:
                    
                    if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
                        continue   
                    if str(y.recipe_id) not in indx:
                        indx[str(y.recipe_id)] = 0

                    indx[str(y.recipe_id)] += 1
        
    outDict = {}
    for key, value in indx.items():
        ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
    
    print(outDict)
#

def calcOverlay(l1, l2):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")

    l1 =  [snowball.stem(l) for l in l1 ]
    counter = 0

    for x in l2:
        for token in nltk.word_tokenize(x): 
            if token in stopset:
                continue
            stemmed = snowball.stem(token)
            for l in l1:
                if l == stemmed:
                    counter +=1
                     
    return counter
#


start = time.time()
slow()
end = time.time()
print("\n", end - start, "\n")  


start = time.time()
faster()  
end = time.time()
print("\n", end - start, "\n")  


start = time.time()
fastes()
end = time.time()
print("\n", end - start, "\n")
did stuff 2020-04-10 21:34:58 +00:00
added stemmed table 2020-04-11 20:49:43 +00:00			`from application.db import Session, Recipe, Ingredient, Link, Trunk`
			`import nltk as nltk`
			`from nltk.corpus import stopwords`
added migrate 2020-04-15 10:54:02 +00:00			`import time`
did stuff 2020-04-10 21:34:58 +00:00
			`dbSession = Session()`
added migrate 2020-04-15 10:54:02 +00:00			`inputArr = ["butter", "milch", "eier", "käse"]`
			`maxMissing = 4`
did stuff 2020-04-10 21:34:58 +00:00
			`def slow():`
			`recipes = dbSession.query(Recipe).all()`


			`arr = {}`
			`for recipe in recipes:`
			`rec = recipe`
			`recipe = recipe.ingredients()`
added migrate 2020-04-15 10:54:02 +00:00			`if len(recipe) > len(inputArr) + maxMissing:`
- 2020-04-10 22:42:37 +00:00			`continue`
did stuff 2020-04-10 21:34:58 +00:00			`counter = 0`
			`for i in inputArr:`
			`for x in recipe:`
			`if i in x:`
			`counter += 1`
			`continue`
			`counter = str(counter)`

			`if counter not in arr:`
			`arr[counter] = []`

added migrate 2020-04-15 10:54:02 +00:00			`arr[counter].append(rec.ingredients())`
did stuff 2020-04-10 21:34:58 +00:00			`#print(rec.name)`

added migrate 2020-04-15 10:54:02 +00:00			`# for y, x in arr.items():`
			`# for xx in x:`
			`# print(xx)`
did stuff 2020-04-10 21:34:58 +00:00
			`def faster():`
			`indx = {}`
			`for inpu in inputArr:`
			`ids = []`
			`for x in dbSession.query(Ingredient).filter(Ingredient.name.contains(inpu)).all():`
- 2020-04-10 22:42:37 +00:00
did stuff 2020-04-10 21:34:58 +00:00			`for y in x.recipe:`
- 2020-04-10 22:42:37 +00:00
added migrate 2020-04-15 10:54:02 +00:00			`if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:`
- 2020-04-10 22:42:37 +00:00			`continue`
			`if str(y.recipe_id) not in indx:`
			`indx[str(y.recipe_id)] = 0`
did stuff 2020-04-10 21:34:58 +00:00
- 2020-04-10 22:42:37 +00:00			`indx[str(y.recipe_id)] += 1`
did stuff 2020-04-10 21:34:58 +00:00

added migrate 2020-04-15 10:54:02 +00:00			`outDict = {}`
did stuff 2020-04-10 21:34:58 +00:00			`for key, value in indx.items():`
added migrate 2020-04-15 10:54:02 +00:00			`ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()`
			`outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)`

			`print(outDict)`


			`def fastes():`
			`indx = {}`
			`inputArr2 = []`

			`snowball = nltk.SnowballStemmer(language='german')`
			`stopset = set(stopwords.words('german'))`
			`for word in inputArr:`

			`if word in stopset:`
			`continue`
			`inputArr2.append(snowball.stem(word))`

			`for inpu in inputArr2:`
			`ids = []`
			`for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():`
			`for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():`
			`for y in x.recipe:`

			`if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:`
			`continue`
			`if str(y.recipe_id) not in indx:`
			`indx[str(y.recipe_id)] = 0`
added stemmed table 2020-04-11 20:49:43 +00:00
added migrate 2020-04-15 10:54:02 +00:00			`indx[str(y.recipe_id)] += 1`

			`outDict = {}`
			`for key, value in indx.items():`
			`ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()`
			`outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)`

			`print(outDict)`
added stemmed table 2020-04-11 20:49:43 +00:00			`#`

added migrate 2020-04-15 10:54:02 +00:00			`def calcOverlay(l1, l2):`
			`snowball = nltk.SnowballStemmer(language='german')`
added stemmed table 2020-04-11 20:49:43 +00:00			`stopset = set(stopwords.words('german'))`
			`stopset \|= set("(),")`

added migrate 2020-04-15 10:54:02 +00:00			`l1 = [snowball.stem(l) for l in l1 ]`
			`counter = 0`

			`for x in l2:`
			`for token in nltk.word_tokenize(x):`
			`if token in stopset:`
added stemmed table 2020-04-11 20:49:43 +00:00			`continue`
			`stemmed = snowball.stem(token)`
added migrate 2020-04-15 10:54:02 +00:00			`for l in l1:`
			`if l == stemmed:`
			`counter +=1`

			`return counter`
			`#`


			`start = time.time()`
			`slow()`
			`end = time.time()`
			`print("\n", end - start, "\n")`


			`start = time.time()`
			`faster()`
			`end = time.time()`
			`print("\n", end - start, "\n")`

added stemmed table 2020-04-11 20:49:43 +00:00
added migrate 2020-04-15 10:54:02 +00:00			`start = time.time()`
			`fastes()`
			`end = time.time()`
			`print("\n", end - start, "\n")`