added migrate

This commit is contained in:
Askill 2020-04-15 12:54:02 +02:00
parent 17361c16ee
commit ffc41d617b
5 changed files with 128 additions and 25 deletions

View File

@ -7,7 +7,7 @@ import enum
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from flask import Flask from flask import Flask
engine = db.create_engine('sqlite:///./test.sqlite', echo=False) engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
connection = engine.connect() connection = engine.connect()
Base = declarative_base() Base = declarative_base()
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)

26
migrate.py Normal file
View File

@ -0,0 +1,26 @@
import json
import cv2
import base64
from application.db import Session, Recipe, Ingredient, Link, Trunk
def migrate(path):
recs = ""
with open(path, encoding="utf-8") as file:
recs = json.load(file)
dbSession = Session()
for key, value in recs.items():
name=key
resString=value[0]
link=value[2]
img=value[3].encode()
r = Recipe(name=name, instructions=resString, url=link, img=img)
for x, y in value[1].items():
a = Link(ingredient_amount=y)
a.ingredient = Ingredient(name=x)
r.ingredient.append(a)
dbSession.add(r)
dbSession.commit()
migrate('./data/recs.json')

27
mine.py
View File

@ -9,6 +9,9 @@ import traceback
import cv2 import cv2
import base64 import base64
from application.db import Session, Recipe, Ingredient, Link, Trunk from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk
from nltk.corpus import stopwords
header_values = { header_values = {
'name': 'Michael Foord', 'name': 'Michael Foord',
@ -123,6 +126,23 @@ def getRecipe(links):
sleep(random.randint(0, 5)) sleep(random.randint(0, 5))
return recs return recs
def stemIngred():
dbSession = Session()
stopset = set(stopwords.words('german'))
stopset |= set("(),")
count = dbSession.query(Ingredient).count()
for x in dbSession.query(Ingredient).all():
snowball = nltk.SnowballStemmer(language='german')
for token in nltk.word_tokenize(x.name):
if token in stopset or len(token) < 3:
continue
stemmed = snowball.stem(token)
x.trunks.append(Trunk(name=stemmed))
dbSession.commit()
print(x.ingredient_id/count)
#links = getLinks() #links = getLinks()
#with open('./data/links.json', 'w') as file: #with open('./data/links.json', 'w') as file:
# jsonString = json.dumps(links) # jsonString = json.dumps(links)
@ -132,8 +152,9 @@ with open('./data/links.json') as file:
links = json.load(file) links = json.load(file)
recs = getRecipe(links) #recs = getRecipe(links)
stemIngred()
with open('./data/recs.json', 'w', encoding="utf-8") as file: #with open('./data/recs.json', 'w', encoding="utf-8") as file:
json.dump(recs, file, ensure_ascii=False) # json.dump(recs, file, ensure_ascii=False)

98
test.py
View File

@ -2,9 +2,11 @@
from application.db import Session, Recipe, Ingredient, Link, Trunk from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk import nltk as nltk
from nltk.corpus import stopwords from nltk.corpus import stopwords
import time
dbSession = Session() dbSession = Session()
inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"] inputArr = ["butter", "milch", "eier", "käse"]
maxMissing = 4
def slow(): def slow():
recipes = dbSession.query(Recipe).all() recipes = dbSession.query(Recipe).all()
@ -14,7 +16,7 @@ def slow():
for recipe in recipes: for recipe in recipes:
rec = recipe rec = recipe
recipe = recipe.ingredients() recipe = recipe.ingredients()
if len(recipe) > len(inputArr) + 2: if len(recipe) > len(inputArr) + maxMissing:
continue continue
counter = 0 counter = 0
for i in inputArr: for i in inputArr:
@ -27,11 +29,12 @@ def slow():
if counter not in arr: if counter not in arr:
arr[counter] = [] arr[counter] = []
arr[counter].append(rec.ingredients()) arr[counter].append(rec.ingredients())
#print(rec.name) #print(rec.name)
# for y, x in arr.items():
print(arr) # for xx in x:
# print(xx)
def faster(): def faster():
indx = {} indx = {}
@ -41,7 +44,7 @@ def faster():
for y in x.recipe: for y in x.recipe:
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + 5: if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
continue continue
if str(y.recipe_id) not in indx: if str(y.recipe_id) not in indx:
indx[str(y.recipe_id)] = 0 indx[str(y.recipe_id)] = 0
@ -49,28 +52,81 @@ def faster():
indx[str(y.recipe_id)] += 1 indx[str(y.recipe_id)] += 1
outDict = {}
for key, value in indx.items(): for key, value in indx.items():
if value >= len(inputArr): ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()) outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
#print(key)
print(outDict)
def fastes():
indx = {}
inputArr2 = []
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
for word in inputArr:
if word in stopset:
continue
inputArr2.append(snowball.stem(word))
for inpu in inputArr2:
ids = []
for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
for y in x.recipe:
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
continue
if str(y.recipe_id) not in indx:
indx[str(y.recipe_id)] = 0
indx[str(y.recipe_id)] += 1
outDict = {}
for key, value in indx.items():
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
print(outDict)
# #
def stemIngred(): def calcOverlay(l1, l2):
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german')) stopset = set(stopwords.words('german'))
stopset |= set("(),") stopset |= set("(),")
count = dbSession.query(Ingredient).count() l1 = [snowball.stem(l) for l in l1 ]
for x in dbSession.query(Ingredient).all(): counter = 0
snowball = nltk.SnowballStemmer(language='german')
for token in nltk.word_tokenize(x.name): for x in l2:
if token in stopset or len(token) < 3: for token in nltk.word_tokenize(x):
if token in stopset:
continue continue
stemmed = snowball.stem(token) stemmed = snowball.stem(token)
for l in l1:
if l == stemmed:
counter +=1
x.trunks.append(Trunk(name=stemmed)) return counter
dbSession.commit() #
print(x.ingredient_id/count)
#faster()
#slow() start = time.time()
print(dbSession.query(Trunk.name).all()) slow()
end = time.time()
print("\n", end - start, "\n")
start = time.time()
faster()
end = time.time()
print("\n", end - start, "\n")
start = time.time()
fastes()
end = time.time()
print("\n", end - start, "\n")