new workflow works

2020-04-25 18:53:32 +02:00 · 2020-04-25 18:53:32 +02:00 · df22bb7f61
parent d145ecee55
commit df22bb7f61
6 changed files with 56 additions and 83 deletions
--- a/pycache/run.cpython-37.pyc
+++ b/pycache/run.cpython-37.pyc
--- a/pycache/search.cpython-37.pyc
+++ b/pycache/search.cpython-37.pyc
--- a/application/pycache/endpoints.cpython-37.pyc
+++ b/application/pycache/endpoints.cpython-37.pyc
--- a/application/endpoints.py
+++ b/application/endpoints.py
@ -20,15 +20,15 @@ class RecipeList(Resource):
        args = parser.parse_args()
        ingreds = args["ingred"]

-        ingreds = [migrate.stem(ingred)[0] for ingred in ingreds + search.defaultArr] 
+        ingreds = [migrate.stem(ingred)[0] for ingred in ingreds] 

        start = time.time()
        indx = search.search2(ingreds)
        end = time.time()
        print("get recipes",end - start, "\n")  

-        start = time.time()
-        recs = search.getRecDict(indx, ingreds)
+        #start = time.time()
+        recs = search.getRecDict2(indx, ingreds)
        end = time.time()
        print("calc overlay",end - start, "\n")  

--- a/application/static/main.js
+++ b/application/static/main.js
@ -58,7 +58,7 @@ function renderRecipeList(data){
            )
            
            recString = `
-                <a href="${data1[2]}"> 
+                <a href="${data1[2]}" target="_blank"> 
                    <div class="card text-white bg-primary mb-3" style="max-width: 100%">
                        <div class="card-body recipe-container">
                            <div class="row">
--- a/search.py
+++ b/search.py
@ -6,34 +6,19 @@ import nltk as nltk
 from nltk.corpus import stopwords
 import time
 import heapq
-from collections import Counter 
+from collections import Counter
+import migrate

-
-
-def fastes(inputArr):
-    indx = {}
-    dbSession = g.session
-    for inpu in inputArr:
-        ids = [] 
-        for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():           
-            if str(recipe_id[0]) not in indx:
-                indx[str(recipe_id[0])] = 0
-
-            indx[str(recipe_id[0])] += 1
-    return(indx) 
-    
 def search2(inputArr):
    indx = {}
    dbSession = db.Session()
    for inpu in inputArr:
-        ids = [] 
-        for recipe in dbSession.query(db.Trunk.ingredients.recipe).filter(db.Trunk.name == inpu).all(): 
-    
-            if str(recipe.recipe_id) not in indx:
-                indx[str(recipe.recipe_id)] = 0
+        x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
+            db.Ingredient).join(db.RecIngred).join(db.Recipe).all()

-            indx[str(recipe.recipe_id)] += 1
-    return(indx) 
+        indx[inpu] = [str(y[1]) for y in x]
+
+    return(indx)


 def stemInput(inputArr):
@ -43,75 +28,63 @@ def stemInput(inputArr):
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
-             continue
+            continue
        inputArr2.append(snowball.stem(word))
    return inputArr2
 #

-def getRecDict(indx, inputArr):
-    dbSession = g.session
-    
-    outDict = {}
-    k = Counter(indx) 
-    # Finding 1000 highest values TODO: this is not correct
-    indx = k.most_common(1000)  
-    indx = dict(indx)
-    for key, value in indx.items():
-        ingred = [x[0] for x in dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()]
-        outDict[calcOverlay(inputArr, ingred)] = int(key)
-        
-    outDict2 = {}
-    for key in heapq.nlargest(10, outDict.keys()):
-        key2 = outDict[key]
-        rec = dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first()
-        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(Ingredient.name, Ingredient.ingredient_amount).filter(Ingredient.recipe_id==key2).all()], rec.img.decode('utf-8'))
-    return outDict2
+def getRecDict2(indx, inputArr):
+    dbSession = db.Session()

-def printDict(indx, inputArr):
-    outDict = getRecDict(indx, inputArr)
-    for key, value in sorted(outDict.items()):
-        if key >= 0.3:
-           
-            print(key, value[0], value[1])
-            for xx in value[2]:
-                print("\t", xx[0])
-    
+    outDict = {}
+    # 2d to 1d
+    indx = sum(indx.values(), [])
+    k = Counter(indx)
+    indx = k.most_common(1000)
+    indx = dict(indx)
+
+    ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
+    ingredDict = {}
+    for k,v, i in ingred:
+        if k not in ingredDict:
+            ingredDict[k] = {}
+        if i not in ingredDict[k]:
+            ingredDict[k][i] = []
+
+        ingredDict[k][i].append(v)
+    inputArr += defaultArr
+    for key, value in ingredDict.items():
+        overlay = calcOverlay2(inputArr, value)
+        while overlay in outDict.keys():
+            overlay -= 0.0001
+        outDict[overlay] = int(key)
+
+    outDict2 = {}
+    for key in heapq.nlargest(20, outDict.keys()):
+        key2 = outDict[key]
+        rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
+        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
+                                                                                                db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
+    return outDict2

 def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
-
-    l1 =  [snowball.stem(l) for l in l1]
+    l1 = [snowball.stem(l) for l in l1]
    return l1

-def calcOverlay(l1, l2):
+def calcOverlay2(l1, l2):
    counter = 0
-    for l in l1:
-        if l not in defaultArr:
-            if l in l2:
-                #print(l)
-                counter +=1
-    counter = counter / len(l2)                 
+    for ll in l2.values():
+        for l in ll:
+            if l in l1:
+                counter += 1
+                break
+
+    counter = counter / len(l2)
    return counter

-
-#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
-defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
-#inputArr += defaultArr
-maxMissing = 10
-#
-#stemmed = stemInput(inputArr)
-#
-#start = time.time()
-#indx = faster(stemmed)  
-#end = time.time()
-#printDict(indx)
-#print("\n", end - start, "\n")  
-#
-#
-#start = time.time()
-#indx = fastes(stemmed)
-#end = time.time()
-#printDict(indx)
-#print("\n", end - start, "\n")  
+# it is assumed that everyone has this
+defaultArr = ["Wasser", "salz", "pfeffer"]
+defaultArr = stem(defaultArr)