optimization

2020-04-18 10:18:13 +02:00 · 2020-04-18 10:18:13 +02:00 · 002c72094d
parent 04a1774e74
commit 002c72094d
5 changed files with 33 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+
+data/recs.json
--- a/pycache/search.cpython-37.pyc
+++ b/pycache/search.cpython-37.pyc
--- a/application/pycache/endpoints.cpython-37.pyc
+++ b/application/pycache/endpoints.cpython-37.pyc
--- a/application/endpoints.py
+++ b/application/endpoints.py
@ -7,6 +7,7 @@ import base64
 from application.db import Session, Recipe, Ingredient
 import search
 import migrate
+import time

 class RecipeList(Resource):
    def get(self):
@ -16,12 +17,23 @@ class RecipeList(Resource):
        parser.add_argument('ingred', type=str,  action='append')
        args = parser.parse_args()
        ingreds = args["ingred"]
-        ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr] 

-        indx = search.fastes(ingreds )
+        start = time.time()
+        ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr] 
+        end = time.time()
+        print("stem", end - start, "\n")  
+
+
+        start = time.time()
+        indx = search.fastes(ingreds)
+        end = time.time()
+        print("get recipes",end - start, "\n")  
+
+        start = time.time()
        recs = search.getRecDict(indx, ingreds )
-        
-        #print(recs)
+        end = time.time()
+        print("calc overlay",end - start, "\n")  
+      

        return flask.make_response(flask.jsonify({'data': recs}), 200)

--- a/search.py
+++ b/search.py
@ -3,6 +3,7 @@ from application.db import Session, Recipe, Ingredient, Trunk
 import nltk as nltk
 from nltk.corpus import stopwords
 import time
+import heapq

 dbSession = Session()

@ -72,18 +73,20 @@ def stemInput(inputArr):
 #

 def getRecDict(indx, inputArr):
+    #inputArr = stem(inputArr)
    outDict = {}
    for key, value in indx.items():
        ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
-        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
-    return outDict
+        outDict[calcOverlay(inputArr, ingred)] = int(key)
+        
+    outDict2 = {}
+    for key in heapq.nlargest(10, outDict.keys()):
+        key2 = outDict[key]
+        outDict2[key] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first().name, key2, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key2).all())
+    return outDict2

 def printDict(indx, inputArr):
-    outDict = {}
-    for key, value in indx.items():
-        ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
-        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
-    
+    outDict = getRecDict(indx, inputArr)
    for key, value in sorted(outDict.items()):
        if key >= 0.3:
           
@ -92,12 +95,15 @@ def printDict(indx, inputArr):
                print("\t", xx[0])
    

-def calcOverlay(l1, l2):
+def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")

-    l1 =  [snowball.stem(l) for l in l1 ]
+    l1 =  [snowball.stem(l) for l in l1]
+    return l1
+
+def calcOverlay(l1, l2):
    counter = 0

    for x in l2: