new workflow works

2020-04-25 18:53:32 +02:00 · 2020-04-25 18:53:32 +02:00 · df22bb7f61
parent d145ecee55
commit df22bb7f61
6 changed files with 56 additions and 83 deletions
--- a/pycache/run.cpython-37.pyc
+++ b/pycache/run.cpython-37.pyc
--- a/pycache/search.cpython-37.pyc
+++ b/pycache/search.cpython-37.pyc
--- a/application/pycache/endpoints.cpython-37.pyc
+++ b/application/pycache/endpoints.cpython-37.pyc
--- a/application/endpoints.py
+++ b/application/endpoints.py
@ -20,15 +20,15 @@ class RecipeList(Resource):
        args = parser.parse_args()
        ingreds = args["ingred"]
-        ingreds = [migrate.stem(ingred)[0] for ingred in ingreds + search.defaultArr] 
+        ingreds = [migrate.stem(ingred)[0] for ingred in ingreds] 
        start = time.time()
        indx = search.search2(ingreds)
        end = time.time()
        print("get recipes",end - start, "\n")  
-        start = time.time()
+        #start = time.time()
-        recs = search.getRecDict(indx, ingreds)
+        recs = search.getRecDict2(indx, ingreds)
        end = time.time()
        print("calc overlay",end - start, "\n")  
--- a/application/static/main.js
+++ b/application/static/main.js
@ -58,7 +58,7 @@ function renderRecipeList(data){
            )
            recString = `
-                <a href="${data1[2]}"> 
+                <a href="${data1[2]}" target="_blank"> 
                    <div class="card text-white bg-primary mb-3" style="max-width: 100%">
                        <div class="card-body recipe-container">
                            <div class="row">
--- a/search.py
+++ b/search.py
@ -6,34 +6,19 @@ import nltk as nltk
 from nltk.corpus import stopwords
 import time
 import heapq
-from collections import Counter 
+from collections import Counter
 import migrate
 def fastes(inputArr):
    indx = {}
    dbSession = g.session
    for inpu in inputArr:
        ids = [] 
        for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():           
            if str(recipe_id[0]) not in indx:
                indx[str(recipe_id[0])] = 0
            indx[str(recipe_id[0])] += 1
    return(indx) 
 def search2(inputArr):
    indx = {}
    dbSession = db.Session()
    for inpu in inputArr:
-        ids = [] 
+        x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
-        for recipe in dbSession.query(db.Trunk.ingredients.recipe).filter(db.Trunk.name == inpu).all(): 
+            db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
            if str(recipe.recipe_id) not in indx:
                indx[str(recipe.recipe_id)] = 0
-            indx[str(recipe.recipe_id)] += 1
+        indx[inpu] = [str(y[1]) for y in x]
-    return(indx) 
+
    return(indx)
 def stemInput(inputArr):
@ -43,75 +28,63 @@ def stemInput(inputArr):
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
-             continue
+            continue
        inputArr2.append(snowball.stem(word))
    return inputArr2
 #
-def getRecDict(indx, inputArr):
+def getRecDict2(indx, inputArr):
-    dbSession = g.session
+    dbSession = db.Session()
    outDict = {}
    k = Counter(indx) 
    # Finding 1000 highest values TODO: this is not correct
    indx = k.most_common(1000)  
    indx = dict(indx)
    for key, value in indx.items():
        ingred = [x[0] for x in dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()]
        outDict[calcOverlay(inputArr, ingred)] = int(key)
    outDict2 = {}
    for key in heapq.nlargest(10, outDict.keys()):
        key2 = outDict[key]
        rec = dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first()
        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(Ingredient.name, Ingredient.ingredient_amount).filter(Ingredient.recipe_id==key2).all()], rec.img.decode('utf-8'))
    return outDict2
-def printDict(indx, inputArr):
+    outDict = {}
-    outDict = getRecDict(indx, inputArr)
+    # 2d to 1d
-    for key, value in sorted(outDict.items()):
+    indx = sum(indx.values(), [])
-        if key >= 0.3:
+    k = Counter(indx)
-           
+    indx = k.most_common(1000)
-            print(key, value[0], value[1])
+    indx = dict(indx)
-            for xx in value[2]:
+
-                print("\t", xx[0])
+    ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
-    
+    ingredDict = {}
    for k,v, i in ingred:
        if k not in ingredDict:
            ingredDict[k] = {}
        if i not in ingredDict[k]:
            ingredDict[k][i] = []
        ingredDict[k][i].append(v)
    inputArr += defaultArr
    for key, value in ingredDict.items():
        overlay = calcOverlay2(inputArr, value)
        while overlay in outDict.keys():
            overlay -= 0.0001
        outDict[overlay] = int(key)
    outDict2 = {}
    for key in heapq.nlargest(20, outDict.keys()):
        key2 = outDict[key]
        rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
                                                                                                db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
    return outDict2
 def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
-
+    l1 = [snowball.stem(l) for l in l1]
    l1 =  [snowball.stem(l) for l in l1]
    return l1
-def calcOverlay(l1, l2):
+def calcOverlay2(l1, l2):
    counter = 0
-    for l in l1:
+    for ll in l2.values():
-        if l not in defaultArr:
+        for l in ll:
-            if l in l2:
+            if l in l1:
-                #print(l)
+                counter += 1
-                counter +=1
+                break
-    counter = counter / len(l2)                 
+
    counter = counter / len(l2)
    return counter
-
+# it is assumed that everyone has this
-#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
+defaultArr = ["Wasser", "salz", "pfeffer"]
-defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
+defaultArr = stem(defaultArr)
 #inputArr += defaultArr
 maxMissing = 10
 #
 #stemmed = stemInput(inputArr)
 #
 #start = time.time()
 #indx = faster(stemmed)  
 #end = time.time()
 #printDict(indx)
 #print("\n", end - start, "\n")  
 #
 #
 #start = time.time()
 #indx = fastes(stemmed)
 #end = time.time()
 #printDict(indx)
 #print("\n", end - start, "\n")