added migrate

This commit is contained in:
Askill 2020-04-15 12:54:02 +02:00
parent 17361c16ee
commit ffc41d617b
5 changed files with 128 additions and 25 deletions

View File

@ -7,7 +7,7 @@ import enum
from flask_sqlalchemy import SQLAlchemy
from flask import Flask
engine = db.create_engine('sqlite:///./test.sqlite', echo=False)
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
connection = engine.connect()
Base = declarative_base()
Session = sessionmaker(bind=engine)

26
migrate.py Normal file
View File

@ -0,0 +1,26 @@
import json
import cv2
import base64
from application.db import Session, Recipe, Ingredient, Link, Trunk
def migrate(path):
recs = ""
with open(path, encoding="utf-8") as file:
recs = json.load(file)
dbSession = Session()
for key, value in recs.items():
name=key
resString=value[0]
link=value[2]
img=value[3].encode()
r = Recipe(name=name, instructions=resString, url=link, img=img)
for x, y in value[1].items():
a = Link(ingredient_amount=y)
a.ingredient = Ingredient(name=x)
r.ingredient.append(a)
dbSession.add(r)
dbSession.commit()
migrate('./data/recs.json')

27
mine.py
View File

@ -9,6 +9,9 @@ import traceback
import cv2
import base64
from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk
from nltk.corpus import stopwords
header_values = {
'name': 'Michael Foord',
@ -123,6 +126,23 @@ def getRecipe(links):
sleep(random.randint(0, 5))
return recs
def stemIngred():
dbSession = Session()
stopset = set(stopwords.words('german'))
stopset |= set("(),")
count = dbSession.query(Ingredient).count()
for x in dbSession.query(Ingredient).all():
snowball = nltk.SnowballStemmer(language='german')
for token in nltk.word_tokenize(x.name):
if token in stopset or len(token) < 3:
continue
stemmed = snowball.stem(token)
x.trunks.append(Trunk(name=stemmed))
dbSession.commit()
print(x.ingredient_id/count)
#links = getLinks()
#with open('./data/links.json', 'w') as file:
# jsonString = json.dumps(links)
@ -132,8 +152,9 @@ with open('./data/links.json') as file:
links = json.load(file)
recs = getRecipe(links)
#recs = getRecipe(links)
stemIngred()
with open('./data/recs.json', 'w', encoding="utf-8") as file:
json.dump(recs, file, ensure_ascii=False)
#with open('./data/recs.json', 'w', encoding="utf-8") as file:
# json.dump(recs, file, ensure_ascii=False)

98
test.py
View File

@ -2,9 +2,11 @@
from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk
from nltk.corpus import stopwords
import time
dbSession = Session()
inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"]
inputArr = ["butter", "milch", "eier", "käse"]
maxMissing = 4
def slow():
recipes = dbSession.query(Recipe).all()
@ -14,7 +16,7 @@ def slow():
for recipe in recipes:
rec = recipe
recipe = recipe.ingredients()
if len(recipe) > len(inputArr) + 2:
if len(recipe) > len(inputArr) + maxMissing:
continue
counter = 0
for i in inputArr:
@ -27,11 +29,12 @@ def slow():
if counter not in arr:
arr[counter] = []
arr[counter].append(rec.ingredients())
arr[counter].append(rec.ingredients())
#print(rec.name)
print(arr)
# for y, x in arr.items():
# for xx in x:
# print(xx)
def faster():
indx = {}
@ -41,7 +44,7 @@ def faster():
for y in x.recipe:
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + 5:
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
continue
if str(y.recipe_id) not in indx:
indx[str(y.recipe_id)] = 0
@ -49,28 +52,81 @@ def faster():
indx[str(y.recipe_id)] += 1
outDict = {}
for key, value in indx.items():
if value >= len(inputArr):
print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients())
#print(key)
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
print(outDict)
def fastes():
indx = {}
inputArr2 = []
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
for word in inputArr:
if word in stopset:
continue
inputArr2.append(snowball.stem(word))
for inpu in inputArr2:
ids = []
for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
for y in x.recipe:
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
continue
if str(y.recipe_id) not in indx:
indx[str(y.recipe_id)] = 0
indx[str(y.recipe_id)] += 1
outDict = {}
for key, value in indx.items():
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
print(outDict)
#
def stemIngred():
def calcOverlay(l1, l2):
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
stopset |= set("(),")
count = dbSession.query(Ingredient).count()
for x in dbSession.query(Ingredient).all():
snowball = nltk.SnowballStemmer(language='german')
for token in nltk.word_tokenize(x.name):
if token in stopset or len(token) < 3:
l1 = [snowball.stem(l) for l in l1 ]
counter = 0
for x in l2:
for token in nltk.word_tokenize(x):
if token in stopset:
continue
stemmed = snowball.stem(token)
for l in l1:
if l == stemmed:
counter +=1
return counter
#
x.trunks.append(Trunk(name=stemmed))
dbSession.commit()
print(x.ingredient_id/count)
#faster()
#slow()
print(dbSession.query(Trunk.name).all())
start = time.time()
slow()
end = time.time()
print("\n", end - start, "\n")
start = time.time()
faster()
end = time.time()
print("\n", end - start, "\n")
start = time.time()
fastes()
end = time.time()
print("\n", end - start, "\n")