added migrate

2020-04-15 12:54:02 +02:00 · 2020-04-15 12:54:02 +02:00 · ffc41d617b
parent 17361c16ee
commit ffc41d617b
5 changed files with 128 additions and 25 deletions
--- a/application/pycache/db.cpython-37.pyc
+++ b/application/pycache/db.cpython-37.pyc
--- a/application/db.py
+++ b/application/db.py
@ -7,7 +7,7 @@ import enum
 from flask_sqlalchemy import SQLAlchemy
 from flask import Flask
-engine = db.create_engine('sqlite:///./test.sqlite', echo=False)
+engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
 connection = engine.connect()
 Base = declarative_base()
 Session = sessionmaker(bind=engine)
--- a/migrate.py
+++ b/migrate.py
@ -0,0 +1,26 @@
 import json
 import cv2
 import base64
 from application.db import Session, Recipe, Ingredient, Link, Trunk
 def migrate(path):
    recs = ""
    with open(path, encoding="utf-8") as file:
        recs = json.load(file)
    dbSession = Session()
    for key, value in recs.items():
        name=key
        resString=value[0]
        link=value[2]
        img=value[3].encode()
        r = Recipe(name=name, instructions=resString, url=link, img=img)
        for x, y in value[1].items():
            a = Link(ingredient_amount=y)
            a.ingredient = Ingredient(name=x)
            r.ingredient.append(a)
        dbSession.add(r)
        dbSession.commit()
 migrate('./data/recs.json')
--- a/mine.py
+++ b/mine.py
@ -9,6 +9,9 @@ import traceback
 import cv2
 import base64
 from application.db import Session, Recipe, Ingredient, Link, Trunk
 import nltk as nltk
 from nltk.corpus import stopwords
 header_values = {
    'name': 'Michael Foord',
@ -123,6 +126,23 @@ def getRecipe(links):
            sleep(random.randint(0, 5))
    return recs
 def stemIngred():
    dbSession = Session()
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
    count = dbSession.query(Ingredient).count()
    for x in dbSession.query(Ingredient).all():
        snowball = nltk.SnowballStemmer(language='german')
        for token in nltk.word_tokenize(x.name): 
            if token in stopset or len(token) < 3:
                continue
            stemmed = snowball.stem(token)
            x.trunks.append(Trunk(name=stemmed))
            dbSession.commit()
        print(x.ingredient_id/count)
 #links = getLinks()
 #with open('./data/links.json', 'w') as file:
 #    jsonString = json.dumps(links)
@ -132,8 +152,9 @@ with open('./data/links.json') as file:
    links = json.load(file)
-recs = getRecipe(links)
+#recs = getRecipe(links)
 stemIngred()
-with open('./data/recs.json', 'w', encoding="utf-8") as file:
+#with open('./data/recs.json', 'w', encoding="utf-8") as file:
-    json.dump(recs, file, ensure_ascii=False)
+#    json.dump(recs, file, ensure_ascii=False)
--- a/test.py
+++ b/test.py
@ -2,9 +2,11 @@
 from application.db import Session, Recipe, Ingredient, Link, Trunk
 import nltk as nltk
 from nltk.corpus import stopwords
 import time
 dbSession = Session()
-inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"] 
+inputArr = ["butter", "milch", "eier", "käse"] 
 maxMissing = 4
 def slow():
    recipes = dbSession.query(Recipe).all()
@ -14,7 +16,7 @@ def slow():
    for recipe in recipes:
        rec = recipe
        recipe = recipe.ingredients()
-        if len(recipe) > len(inputArr) + 2:
+        if len(recipe) > len(inputArr) + maxMissing:
            continue
        counter = 0
        for i in inputArr:
@ -27,11 +29,12 @@ def slow():
        if counter not in arr:
            arr[counter] = []
-            arr[counter].append(rec.ingredients())
+        arr[counter].append(rec.ingredients())
        #print(rec.name)
-
+#    for y, x in arr.items():
-    print(arr)
+#        for xx in x:
 #            print(xx)
 def faster():
    indx = {}
@ -41,7 +44,7 @@ def faster():
            for y in x.recipe:
-                if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + 5:
+                if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
                    continue   
                if str(y.recipe_id) not in indx:
                    indx[str(y.recipe_id)] = 0
@ -49,28 +52,81 @@ def faster():
                indx[str(y.recipe_id)] += 1
    outDict = {}
    for key, value in indx.items():
-        if value >= len(inputArr):
+        ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
-            print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients())
+        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
            #print(key)
    print(outDict)
 def fastes():
    indx = {}
    inputArr2 = []
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
             continue
        inputArr2.append(snowball.stem(word))
    for inpu in inputArr2:
        ids = [] 
        for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
            for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
                for y in x.recipe:
                    if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
                        continue   
                    if str(y.recipe_id) not in indx:
                        indx[str(y.recipe_id)] = 0
                    indx[str(y.recipe_id)] += 1
    outDict = {}
    for key, value in indx.items():
        ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
        outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
    print(outDict)
 #
-def stemIngred():
+def calcOverlay(l1, l2):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
-    count = dbSession.query(Ingredient).count()
+    l1 =  [snowball.stem(l) for l in l1 ]
-    for x in dbSession.query(Ingredient).all():
+    counter = 0
-        snowball = nltk.SnowballStemmer(language='german')
+
-        for token in nltk.word_tokenize(x.name): 
+    for x in l2:
-            if token in stopset or len(token) < 3:
+        for token in nltk.word_tokenize(x): 
            if token in stopset:
                continue
            stemmed = snowball.stem(token)
            for l in l1:
                if l == stemmed:
                    counter +=1
-            x.trunks.append(Trunk(name=stemmed))
+    return counter
-            dbSession.commit()
+#
-        print(x.ingredient_id/count)
+
-#faster()    
+
-#slow()
+start = time.time()
-print(dbSession.query(Trunk.name).all())
+slow()
 end = time.time()
 print("\n", end - start, "\n")  
 start = time.time()
 faster()  
 end = time.time()
 print("\n", end - start, "\n")  
 start = time.time()
 fastes()
 end = time.time()
 print("\n", end - start, "\n")