.

2020-04-26 23:44:25 +02:00 · 2020-04-26 23:44:25 +02:00 · 48dd8d5179
parent b70dc0bb54
commit 48dd8d5179
39 changed files with 234 additions and 297 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@
 data/recs.json
 backup.sql
 app/data/
--- a/pycache/migrate.cpython-37.pyc
+++ b/pycache/migrate.cpython-37.pyc
--- a/app/Dockerfile
+++ b/app/Dockerfile
--- a/app/application/init.py
+++ b/app/application/init.py
@ -5,12 +5,12 @@ import os
 from json import dumps
 import application.endpoints as endpoints
 import application.config as config
 from application.db import Session 
 app = Flask(__name__)
 api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')
 api.add_resource(endpoints.RecipeList,'/api/v1/recipe/')
 api.add_resource(endpoints.Images,'/api/v1/images/<string:id>')
@app.route("/")
 def index():
--- a/app/application/pycache/init.cpython-37.pyc
+++ b/app/application/pycache/init.cpython-37.pyc
--- a/app/application/pycache/app.cpython-37.pyc
+++ b/app/application/pycache/app.cpython-37.pyc
--- a/app/application/pycache/config.cpython-37.pyc
+++ b/app/application/pycache/config.cpython-37.pyc
--- a/app/application/pycache/db.cpython-37.pyc
+++ b/app/application/pycache/db.cpython-37.pyc
--- a/app/application/pycache/db2.cpython-37.pyc
+++ b/app/application/pycache/db2.cpython-37.pyc
--- a/app/application/pycache/endpoints.cpython-37.pyc
+++ b/app/application/pycache/endpoints.cpython-37.pyc
--- a/app/application/pycache/search.cpython-37.pyc
+++ b/app/application/pycache/search.cpython-37.pyc
--- a/app/application/config.py
+++ b/app/application/config.py
--- a/app/application/db2.py
+++ b/app/application/db2.py
@ -10,7 +10,7 @@ import time
 engine = db.create_engine('mysql+mysqldb://root@server/fs2?charset=utf8mb4', echo=False, encoding="utf8", pool_size=1000, max_overflow=0)
 Base = declarative_base()
-Session = sessionmaker(bind=engine)
+Session = sessionmaker(bind=engine, autoflush=False)
 # https://docs.sqlalchemy.org/en/13/orm/basic_relationships.html#association-object
@ -20,7 +20,7 @@ class Recipe(Base):
    name = Column('name', Text)
    instructions = Column('instructions', Text)
    url = Column('url', Text)
-    img = Column('img', LargeBinary)
+    img = Column('img', LargeBinary(length=(2**32)-1))
    imgURL = Column('imgURL', Text)
    ingredient = relationship("RecIngred", back_populates="recipe")
@ -30,25 +30,26 @@ class RecIngred(Base):
    ingredient_amount = Column('ingredient_amount', Text)
    ingredient_amount_mu = Column('ingredient_amount_mu', Text)    # measurement unit
    recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'), primary_key=True)
    ingredient_name = Column(String(50), ForeignKey('ingredient.name'), primary_key=True)
    recipe = relationship("Recipe", back_populates="ingredient")
    ingredient = relationship("Ingredient", back_populates="recipe")
    recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'), primary_key=True)
    ingredient_name = Column(String(200), ForeignKey('ingredient.name'), primary_key=True)
 class Ingredient(Base):
    __tablename__ = "ingredient"
-    name = Column('name', String(50), primary_key=True)
+    name = Column('name', String(200), primary_key=True)
    recipe = relationship("RecIngred", back_populates="ingredient")
    trunks = relationship("IngredTrunk", back_populates="ingredient")
 class IngredTrunk(Base):
    __tablename__ = 'ingredtrunk'
-    ingredient_name = Column(String(50), ForeignKey('ingredient.name'), primary_key=True)
+    ingredient_name = Column(String(200), ForeignKey('ingredient.name'), primary_key=True)
-    trunk_name = Column(String(50), ForeignKey('ingredient.name'), primary_key=True)
+    trunk_name = Column(String(50), ForeignKey('trunk.name'), primary_key=True)
-    ingredient = relationship("Ingredient", back_populates="trunk")
+    ingredient = relationship("Ingredient", back_populates="trunks")
-    trunk = relationship("Trunk", back_populates="ingredient")
+    trunk = relationship("Trunk", back_populates="ingredients")
 class Trunk(Base):
    __tablename__ = "trunk"
--- a/app/application/endpoints.py
+++ b/app/application/endpoints.py
@ -5,9 +5,9 @@ import requests
 import application.config as config
 import json
 import base64
-from application.db import Session, Recipe, Ingredient
+from application.db2 import Session, Recipe
-import search
+import application.search as search
-import migrate
+import background.migrate as migrate
 import time
 class RecipeList(Resource):
@ -20,15 +20,15 @@ class RecipeList(Resource):
        args = parser.parse_args()
        ingreds = args["ingred"]
-        ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr] 
+        ingreds = [migrate.stem(ingred)[0] for ingred in ingreds] 
        start = time.time()
-        indx = search.fastes(ingreds)
+        indx = search.search2(ingreds)
        end = time.time()
        print("get recipes",end - start, "\n")  
-        start = time.time()
+        #start = time.time()
-        recs = search.getRecDict(indx, ingreds)
+        recs = search.getRecDict2(indx, ingreds)
        end = time.time()
        print("calc overlay",end - start, "\n")  
@ -36,5 +36,15 @@ class RecipeList(Resource):
        g.session.close()
        return flask.make_response(flask.jsonify({'data': recs}), 200)
 class Images(Resource):
    def get(self, id = None):
        if id is None:
            flask.make_response(flask.jsonify({'error': "No ID supplied"}), 401)
        session = Session()
        image = session.query(Recipe.img).filter(Recipe.recipe_id == id).first()[0]
        image = base64.b64decode(image.encode())
        return flask.Response(image,  mimetype='image/png')
--- a/app/application/search.py
+++ b/app/application/search.py
@ -0,0 +1,89 @@
 import application.db2 as db
 from flask import g
 import nltk as nltk
 from nltk.corpus import stopwords
 import time
 import heapq
 from collections import Counter
 import background.migrate
 def search2(inputArr):
    indx = {}
    dbSession = db.Session()
    for inpu in inputArr:
        x = dbSession.query(db.Trunk.name, db.Recipe.recipe_id).filter(db.Trunk.name == inpu).join(db.IngredTrunk).join(
            db.Ingredient).join(db.RecIngred).join(db.Recipe).all()
        indx[inpu] = [str(y[1]) for y in x]
    return(indx)
 def stemInput(inputArr):
    inputArr2 = []
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
            continue
        inputArr2.append(snowball.stem(word))
    return inputArr2
 #
 def getRecDict2(indx, inputArr):
    dbSession = db.Session()
    outDict = {}
    # 2d to 1d
    indx = sum(indx.values(), [])
    k = Counter(indx)
    indx = k.most_common(1000)
    indx = dict(indx)
    ingred = [x for x in dbSession.query(db.Recipe.recipe_id, db.IngredTrunk.trunk_name, db.IngredTrunk.ingredient_name ).filter(db.Recipe.recipe_id.in_(indx.keys())).join(db.RecIngred).join(db.Ingredient).join(db.IngredTrunk).all()]
    ingredDict = {}
    for k,v, i in ingred:
        if k not in ingredDict:
            ingredDict[k] = {}
        if i not in ingredDict[k]:
            ingredDict[k][i] = []
        ingredDict[k][i].append(v)
    inputArr += defaultArr
    for key, value in ingredDict.items():
        overlay = calcOverlay2(inputArr, value)
        while overlay in outDict.keys():
            overlay -= 0.0001
        outDict[overlay] = int(key)
    outDict2 = {}
    for key in heapq.nlargest(20, outDict.keys()):
        key2 = outDict[key]
        rec = dbSession.query(db.Recipe).filter(db.Recipe.recipe_id == key2).first()
        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(db.Ingredient.name,
                                                                                                db.RecIngred.ingredient_amount).join(db.RecIngred).join(db.Recipe).filter(db.Recipe.recipe_id == key2).all()], rec.img.decode('utf-8'))
    return outDict2
 def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
    l1 = [snowball.stem(l) for l in l1]
    return l1
 def calcOverlay2(l1, l2):
    counter = 0
    for ll in l2.values():
        for l in ll:
            if l in l1:
                counter += 1
                break
    counter = counter / len(l2)
    return counter
 # it is assumed that everyone has this
 defaultArr = ["Wasser", "salz", "pfeffer"]
--- a/app/application/static/coms.js
+++ b/app/application/static/coms.js
--- a/app/application/static/main.css
+++ b/app/application/static/main.css
--- a/app/application/static/main.js
+++ b/app/application/static/main.js
@ -58,7 +58,7 @@ function renderRecipeList(data){
            )
            recString = `
-                <a href="${data1[2]}"> 
+                <a href="${data1[2]}" target="_blank"> 
                    <div class="card text-white bg-primary mb-3" style="max-width: 100%">
                        <div class="card-body recipe-container">
                            <div class="row">
--- a/app/application/static/render.js
+++ b/app/application/static/render.js
--- a/app/application/static/vanta/three.r95.min.js
+++ b/app/application/static/vanta/three.r95.min.js
--- a/app/application/static/vanta/vanta.net.min.js
+++ b/app/application/static/vanta/vanta.net.min.js
--- a/app/application/templates/index.html
+++ b/app/application/templates/index.html
--- a/app/background/pycache/migrate.cpython-37.pyc
+++ b/app/background/pycache/migrate.cpython-37.pyc
--- a/app/background/getImages.py
+++ b/app/background/getImages.py
--- a/app/background/migrate.py
+++ b/app/background/migrate.py
@ -0,0 +1,103 @@
 import json
 import cv2
 import base64
 import nltk as nltk
 from nltk.corpus import stopwords
 #import db as db1
 #import db2 as db2
 def stemWord(word):
    try:
        arr = []
        stopset = set(stopwords.words('german'))
        stopset |= set("(),")
        snowball = nltk.SnowballStemmer(language='german')
        for token in nltk.word_tokenize(word): 
            if token in stopset or len(token) < 4:
                continue
            stemmed = snowball.stem(token)
            arr.append(stemmed)
        if len(arr) == 0:
            arr.append("")
        return arr
    except:
        return [""]
 #migrate('./data/recs.json')
 def migrateRecsDb1ToDb2():
    session1 = db1.Session()
    session2 = db2.Session()
    count = 0
    length = session1.query(db1.Recipe).count()
    for r1 in list(session1.query(db1.Recipe).all())[int(length/2):]:
        try:
            if not bool(session2.query(db2.Recipe).filter(db2.Recipe.name == r1.name).first()):
                r2 = db2.Recipe(name=r1.name, instructions=r1.instructions, url=r1.url, img=r1.img)
                for ingred in r1.ingredient:
                    ri2 = db2.RecIngred()
                    ingredient2 = session2.query(db2.Ingredient).filter(db2.Ingredient.name == ingred.name).first()
                    if ingredient2 is None:
                        ingredient2 = db2.Ingredient(name=ingred.name)
                    ri2.ingredient_amount = ingred.ingredient_amount
                    ri2.ingredient = ingredient2
                    r2.ingredient.append(ri2)
                session2.add(r2)
                session2.commit()
        except:
            session1 = db1.Session()
            session2 = db2.Session()
        count+=1
        print(count/length)
 def TrunkDb2():
    session2 = db2.Session()
    count = 0
    length = session2.query(db2.Ingredient).count()
    for i2 in session2.query(db2.Ingredient).all():
        try:
            for trunk1 in stem(i2.name):
                ri2 = db2.IngredTrunk()
                trunk = session2.query(db2.Trunk).filter(db2.Trunk.name == trunk1).first()
                if trunk is None:
                    trunk = db2.Trunk(name=trunk1)
                if session2.query(db2.IngredTrunk).filter(db2.IngredTrunk.ingredient_name == i2.name, db2.IngredTrunk.trunk_name == trunk1).first() is None:
                    ri2.trunk = trunk
                    i2.trunks.append(ri2)
                session2.commit()
        except Exception as e:
            print(e)
            session2 = db2.Session()
        count+=1
        print(count/length)
 def stem(l1):
    '''Tokenize and stem word, result is 1d list'''
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
    l2 = []
    for token in nltk.word_tokenize(l1): 
        token = snowball.stem(token)
        if token in stopset or not token.isalpha() or len(token) < 2:
            continue
        l2.append(token)
    return l2
 #migrateDb1ToDb2()
 #TrunkDb2()
--- a/app/background/mine.py
+++ b/app/background/mine.py
--- a/app/requirements.txt
+++ b/app/requirements.txt
--- a/app/run.py
+++ b/app/run.py
@ -0,0 +1,10 @@
 from application import app
 from application.search import defaultArr, stem
 import nltk
 nltk.download('stopwords')
 nltk.download('punkt')
 defaultArr = stem(defaultArr)
 app.run(host="0.0.0.0", port='5001', debug=False, threaded=True)
--- a/application/pycache/init.cpython-37.pyc
+++ b/application/pycache/init.cpython-37.pyc
--- a/application/pycache/config.cpython-37.pyc
+++ b/application/pycache/config.cpython-37.pyc
--- a/application/pycache/endpoints.cpython-37.pyc
+++ b/application/pycache/endpoints.cpython-37.pyc
--- a/application/db.py
+++ b/application/db.py
@ -1,85 +0,0 @@
 import sqlalchemy as db
 from sqlalchemy import Column, String, Integer, Numeric, Table, DateTime, ARRAY, ForeignKey, create_engine, LargeBinary, Enum, Text
 from sqlalchemy.orm import sessionmaker, relationship, column_property
 from datetime import datetime
 from sqlalchemy.ext.declarative import declarative_base
 import enum
 from flask import Flask
 import time
 engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8", pool_size=1000, max_overflow=0)
 Base = declarative_base()
 Session = sessionmaker(bind=engine)
 # https://docs.sqlalchemy.org/en/13/orm/basic_relationships.html#association-object
 class Recipe(Base):
    __tablename__ = "recipe"
    recipe_id = Column('recipe_id', Integer, primary_key=True, autoincrement=True)
    name = Column('name', Text)
    instructions = Column('instructions', Text)
    url = Column('url', Text)
    img = Column('img', LargeBinary(length=(2**32)-1))
    ingredient = relationship("Ingredient", backref="recipe")
    trunk = relationship("Trunk", backref="recipe")
    def ingredients(self):
        l = []
        for i in self.ingredient:
            l.append(i.ingredients())
        return l
    def ingredientDict(self):
        l = {}
        for i in self.ingredient:
            l[i.ingredients()] = [i.ingredient_amount, i.ingredient_amount_mu]
        return l
    def serialize(self):
        ingredients = []
        if self.img is not None:
            img = self.img.decode('utf-8')
        else:
            img = None
        data = {
            "recipe_id": self.recipe_id,
            "name":self.name,
            "instructions":self.instructions,
            "url": self.url,
            "img": img,
            "ingredients": self.ingredients()  
        }
        return data
 class Ingredient(Base):
    __tablename__ = "ingredient"
    ingredient_id = Column('ingredient_id', Integer,  primary_key=True, autoincrement=True)
    name = Column('name', Text)
    ingredient_amount = Column('ingredient_amount', Text)
    ingredient_amount_mu = Column('ingredient_amount_mu', Text)    # measurement unit
    recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'))
 class Trunk(Base):
    __tablename__ = "trunk"
    trunk_id = Column('trunk_id', Integer,  primary_key=True, autoincrement=True)
    name = Column('name', Text)
    recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'))
 def initDB(counter):
    try:
        Base.metadata.create_all(engine)
    except Exception as e:
        print(e)
        counter += 1
        if counter < 13:
            time.sleep(5)
            initDB(counter)
 initDB(0)
--- a/data/images.jpeg
+++ b/data/images.jpeg
--- a/data/links.json
+++ b/data/links.json
--- a/ghostdriver.log
+++ b/ghostdriver.log
@ -1,27 +0,0 @@
 [INFO  - 2020-04-20T20:11:22.708Z] GhostDriver - Main - running on port 63975
 [INFO  - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
 [INFO  - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.customHeaders:  - {}
 [INFO  - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
 [INFO  - 2020-04-20T20:11:24.918Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 1c4df520-8343-11ea-a8a6-19d2d02d716b
 [ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - msg: Unhandled promise rejection TypeError: undefined is not an object (evaluating 'w.set')
  phantomjs://platform/console++.js:263 in error
 [ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - stack:
  (anonymous function) (https://www.chefkoch.de/rezepte/207581086939665/Nannie-s-schnelle-Kuchen.html:8626)
  phantomjs://platform/console++.js:263 in error
 [ERROR - 2020-04-20T20:11:28.646Z] WebElementLocator - _handleLocateCommand - Element(s) NOT Found: GAVE UP. Search Stop Time: 15874134[INFO  - 2020-04-20T20:12:30.427Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:13:35.766Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:17:30.429Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:18:35.767Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:22:30.429Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:23:35.768Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:27:30.429Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:28:35.771Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:32:30.439Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:33:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:37:30.439Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:38:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:42:30.439Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:43:35.784Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:47:30.439Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:48:35.787Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:52:30.439Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:53:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T20:57:30.440Z] SessionManagerReqHand - [INFO  - 2020-04-20T20:58:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:02:30.440Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:03:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:07:30.445Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:08:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:12:30.457Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:13:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:17:30.457Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:18:35.798Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:22:30.459Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:23:35.803Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:27:30.460Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 OW
 O  - 2020-04-20T21:22:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:27:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 ns - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:07:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:12:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:17:14.190Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:22:14.192Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:27:14.196Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:19:25.242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:24:25.249Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 [INFO  - 2020-04-20T21:27:31.799Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
 179Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:11:22.721Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:13:54.176Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:16:22.724Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:18:54.177Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:21:22.725Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO  - 2020-04-20T21:23:54.181Z] SessionManagerReqHand - [INFO  - 2020-04-20T21:26:22.728Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
--- a/migrate.py
+++ b/migrate.py
@ -1,52 +0,0 @@
 import json
 import cv2
 import base64
 import nltk as nltk
 from nltk.corpus import stopwords
 from application.db import Session, Recipe, Ingredient, Trunk
 def stemWord(word):
    try:
        arr = []
        stopset = set(stopwords.words('german'))
        stopset |= set("(),")
        snowball = nltk.SnowballStemmer(language='german')
        for token in nltk.word_tokenize(word): 
            if token in stopset or len(token) < 4:
                continue
            stemmed = snowball.stem(token)
            arr.append(stemmed)
        if len(arr) == 0:
            arr.append("")
        return arr
    except:
        return [""]
 def migrate(path):
    recs = ""
    with open(path, encoding="utf-8") as file:
        recs = json.load(file)
    dbSession = Session()
    counter = 0
    leng = len(recs)
    for key, value in recs.items():
        name=key
        resString=value[0]
        link=value[2]
        img=value[3].encode()
        r = Recipe(name=name, instructions=resString, url=link, img=img)
        for x, y in value[1].items():
            a = Ingredient(name=x, ingredient_amount=y)
            r.ingredient.append(a)
            for x in stemWord(a.name):
                t = Trunk(name=x)
                r.trunk.append(t)
        dbSession.add(r)
        dbSession.commit()
        counter+=1
        print(counter/leng)
 #migrate('./data/recs.json')
--- a/run.py
+++ b/run.py
@ -1,11 +0,0 @@
 from application import app
 import nltk
 from search import defaultArr, stem
 nltk.download('stopwords')
 nltk.download('punkt')
 delattr = stem(defaultArr)
 app.run(host="0.0.0.0", port='5001', debug=True, threaded=True)
--- a/search.py
+++ b/search.py
@ -1,102 +0,0 @@
 from application.db import Session, Recipe, Ingredient, Trunk
 from flask import g
 import nltk as nltk
 from nltk.corpus import stopwords
 import time
 import heapq
 from collections import Counter 
 def fastes(inputArr):
    indx = {}
    dbSession = g.session
    for inpu in inputArr:
        ids = [] 
        for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():           
            if str(recipe_id[0]) not in indx:
                indx[str(recipe_id[0])] = 0
            indx[str(recipe_id[0])] += 1
    return(indx) 
 def stemInput(inputArr):
    inputArr2 = []
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    for word in inputArr:
        if word in stopset:
             continue
        inputArr2.append(snowball.stem(word))
    return inputArr2
 #
 def getRecDict(indx, inputArr):
    dbSession = g.session
    outDict = {}
    k = Counter(indx) 
    # Finding 1000 highest values TODO: this is not correct
    indx = k.most_common(1000)  
    indx = dict(indx)
    for key, value in indx.items():
        ingred = [x[0] for x in dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()]
        outDict[calcOverlay(inputArr, ingred)] = int(key)
    outDict2 = {}
    for key in heapq.nlargest(10, outDict.keys()):
        key2 = outDict[key]
        rec = dbSession.query(Recipe).filter(Recipe.recipe_id==key2).first()
        outDict2[key] = (key2, rec.name, rec.url,  [r[0] + ": " + r[1] for r in dbSession.query(Ingredient.name, Ingredient.ingredient_amount).filter(Ingredient.recipe_id==key2).all()], rec.img.decode('utf-8'))
    return outDict2
 def printDict(indx, inputArr):
    outDict = getRecDict(indx, inputArr)
    for key, value in sorted(outDict.items()):
        if key >= 0.3:
            print(key, value[0], value[1])
            for xx in value[2]:
                print("\t", xx[0])
 def stem(l1):
    snowball = nltk.SnowballStemmer(language='german')
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
    l1 =  [snowball.stem(l) for l in l1]
    return l1
 def calcOverlay(l1, l2):
    counter = 0
    for l in l1:
        if l not in defaultArr:
            if l in l2:
                #print(l)
                counter +=1
    counter = counter / len(l2)                 
    return counter
 #inputArr = ["reis", "tofu", "bohnen", "kichererbsen", "hackfleisch"]
 defaultArr = ["Wasser", "salz", "pfeffer"] # it is assumed that everyone has this
 #inputArr += defaultArr
 maxMissing = 10
 #
 #stemmed = stemInput(inputArr)
 #
 #start = time.time()
 #indx = faster(stemmed)  
 #end = time.time()
 #printDict(indx)
 #print("\n", end - start, "\n")  
 #
 #
 #start = time.time()
 #indx = fastes(stemmed)
 #end = time.time()
 #printDict(indx)
 #print("\n", end - start, "\n")  
--- a/test.sqlite
+++ b/test.sqlite