added api

calcOverlay is waaaahhhaay to slow
2020-04-17 22:29:50 +02:00 · 2020-04-17 22:29:50 +02:00 · 04a1774e74
parent cfd9d0fcc8
commit 04a1774e74
12 changed files with 69 additions and 2069 deletions
--- a/pycache/migrate.cpython-37.pyc
+++ b/pycache/migrate.cpython-37.pyc
--- a/pycache/run.cpython-37.pyc
+++ b/pycache/run.cpython-37.pyc
--- a/pycache/search.cpython-37.pyc
+++ b/pycache/search.cpython-37.pyc
--- a/application/init.py
+++ b/application/init.py
@ -11,7 +11,7 @@ from flask_sqlalchemy import SQLAlchemy
 app = Flask(__name__)
 api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')

-api.add_resource(endpoints.Recipe,'/api/v1/recipe/')
+api.add_resource(endpoints.RecipeList,'/api/v1/recipe/')

@app.route("/")
 def index():
--- a/application/pycache/init.cpython-37.pyc
+++ b/application/pycache/init.cpython-37.pyc
--- a/application/pycache/endpoints.cpython-37.pyc
+++ b/application/pycache/endpoints.cpython-37.pyc
--- a/application/endpoints.py
+++ b/application/endpoints.py
@ -5,19 +5,24 @@ import application.config as config
 import json
 import base64
 from application.db import Session, Recipe, Ingredient
+import search
+import migrate

-class Recipe(Resource):
+class RecipeList(Resource):
    def get(self):
        """  """
-        try:
-            parser = reqparse.RequestParser()
-            parser.add_argument('useFace', type=bool, required=False)
-            args = parser.parse_args()
+    
+        parser = reqparse.RequestParser()
+        parser.add_argument('ingred', type=str,  action='append')
+        args = parser.parse_args()
+        ingreds = args["ingred"]
+        ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr] 

-            session = Session()
+        indx = search.fastes(ingreds )
+        recs = search.getRecDict(indx, ingreds )
+        
+        #print(recs)
+
+        return flask.make_response(flask.jsonify({'data': recs}), 200)

-            return flask.make_response(flask.jsonify({'data': args}), 200)
-        except Exception as e:
-            print("error: -", e)
-            return flask.make_response(flask.jsonify({'error': str(e)}), 400)

--- a/data/links.json
+++ b/data/links.json
--- a/data/recs.json
+++ b/data/recs.json
--- a/migrate.py
+++ b/migrate.py
@ -44,4 +44,4 @@ def migrate(path):
        counter+=1
        print(counter/leng)

-migrate('./data/recs.json')
+#migrate('./data/recs.json')
--- a/mine.py
+++ b/mine.py
@ -34,8 +34,8 @@ def getLinks():
        # converts: 344.621 Ergebnisse to int(344621)
        #max = int(tree.xpath(
        #    '/html/body/main/div[1]/h1/span/text()')[0].split(" ")[0].replace(".", ""))
-        max = 2000 # get 2000 recepies :)
-        for i in range(0, max, 30):
+        max = 10000 # get 2000 recepies :)
+        for i in range(2000, max, 30):
            try:
                root = "https://www.chefkoch.de/rs/s" + \
                    str(i) + "/Rezepte.html"
@ -57,7 +57,7 @@ def getLinks():
                i -= 30
                sleep(10)

-            sleep(random.randint(0, 5))
+            sleep(random.randint(1, 4))

        print(links)
    return links
@ -110,20 +110,21 @@ def getRecipe(links):
                        except:
                            amount = ""
                    #print(stuff, amount)
-                    a = Link(ingredient_amount=amount)
-                    a.ingredient = Ingredient(name=stuff)
-                    r.ingredient.append(a)
-                    dbSession.add(r)
-                    dbSession.commit()
+                    #a = Link(ingredient_amount=amount)
+                    #a.ingredient = Ingredient(name=stuff)
+                    #r.ingredient.append(a)
+                    #dbSession.add(r)
+                    #dbSession.commit()
                    
                    ingredDict[stuff] = amount
                recs[name] = [resString, ingredDict, link, img.decode("utf-8")]
-                print("")
+                if counter % 20 == 0:
+                    print(counter)
            except Exception as e:
                print(traceback.format_exc())
                
            print(format(counter/len(links), '.2f'), link)
-            sleep(random.randint(0, 5))
+            sleep(random.randint(0, 6))
    return recs


@ -149,9 +150,9 @@ def stemIngred():
 #with open('./data/links.json', 'w') as file:
 #    jsonString = json.dumps(links)
 #    file.write(jsonString)
-links = ""
-with open('./data/links.json') as file:
-    links = json.load(file)
+#links = ""
+#with open('./data/links.json') as file:
+#    links = json.load(file)
    

 #recs = getRecipe(links)
--- a/search.py
+++ b/search.py
@ -5,9 +5,6 @@ from nltk.corpus import stopwords
 import time

 dbSession = Session()
-inputArr = ["butter", "milch", "eier", "mehl", "zucker"]
-inputArr += ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
-maxMissing = 10

 def slow():
    recipes = dbSession.query(Recipe).all()
@ -40,7 +37,6 @@ def slow():
 def faster(inputArr):
    indx = {}
 
-
    for inpu in inputArr:
        ids = [] 
        for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
@ -49,7 +45,6 @@ def faster(inputArr):

            indx[str(x[0])] += 1
        
-
    return(indx)

 def fastes(inputArr):
@ -57,9 +52,7 @@ def fastes(inputArr):

    for inpu in inputArr:
        ids = [] 
-        for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
-                
-
+        for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():           
            if str(recipe_id[0]) not in indx:
                indx[str(recipe_id[0])] = 0

@ -72,22 +65,31 @@ def stemInput(inputArr):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:
-
        if word in stopset:
             continue
        inputArr2.append(snowball.stem(word))
-
    return inputArr2
 #
-def printDict(indx):
+
+def getRecDict(indx, inputArr):
    outDict = {}
-    for key, value in sorted(indx.items()):
+    for key, value in indx.items():
+        ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
+        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
+    return outDict
+
+def printDict(indx, inputArr):
+    outDict = {}
+    for key, value in indx.items():
        ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
    
-    for key, value in outDict.items():
-        if key >= 0.7:
-            print(key, value)
+    for key, value in sorted(outDict.items()):
+        if key >= 0.3:
+           
+            print(key, value[0], value[1])
+            for xx in value[2]:
+                print("\t", xx[0])
    

 def calcOverlay(l1, l2):
@ -100,25 +102,29 @@ def calcOverlay(l1, l2):

    for x in l2:
        for l in l1:
-            if l == x[0]:
+            if l not in defaultArr and l == x[0]:
                #print(l)
                counter +=1
    counter = counter / len(l2)                 
    return counter


-
-stemmed = stemInput(inputArr)
-
-start = time.time()
-indx = faster(stemmed)  
-end = time.time()
-printDict(indx)
-print("\n", end - start, "\n")  
-
-
-start = time.time()
-indx = fastes(stemmed)
-end = time.time()
-printDict(indx)
-print("\n", end - start, "\n")  
+#inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
+defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
+#inputArr += defaultArr
+maxMissing = 10
+#
+#stemmed = stemInput(inputArr)
+#
+#start = time.time()
+#indx = faster(stemmed)  
+#end = time.time()
+#printDict(indx)
+#print("\n", end - start, "\n")  
+#
+#
+#start = time.time()
+#indx = fastes(stemmed)
+#end = time.time()
+#printDict(indx)
+#print("\n", end - start, "\n")