added migrate
This commit is contained in:
parent
17361c16ee
commit
ffc41d617b
Binary file not shown.
|
|
@ -7,7 +7,7 @@ import enum
|
||||||
from flask_sqlalchemy import SQLAlchemy
|
from flask_sqlalchemy import SQLAlchemy
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
|
|
||||||
engine = db.create_engine('sqlite:///./test.sqlite', echo=False)
|
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
|
||||||
connection = engine.connect()
|
connection = engine.connect()
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,26 @@
|
||||||
|
import json
|
||||||
|
import cv2
|
||||||
|
import base64
|
||||||
|
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
||||||
|
|
||||||
|
def migrate(path):
|
||||||
|
recs = ""
|
||||||
|
with open(path, encoding="utf-8") as file:
|
||||||
|
recs = json.load(file)
|
||||||
|
|
||||||
|
dbSession = Session()
|
||||||
|
for key, value in recs.items():
|
||||||
|
name=key
|
||||||
|
resString=value[0]
|
||||||
|
link=value[2]
|
||||||
|
img=value[3].encode()
|
||||||
|
|
||||||
|
r = Recipe(name=name, instructions=resString, url=link, img=img)
|
||||||
|
for x, y in value[1].items():
|
||||||
|
a = Link(ingredient_amount=y)
|
||||||
|
a.ingredient = Ingredient(name=x)
|
||||||
|
r.ingredient.append(a)
|
||||||
|
dbSession.add(r)
|
||||||
|
dbSession.commit()
|
||||||
|
|
||||||
|
migrate('./data/recs.json')
|
||||||
27
mine.py
27
mine.py
|
|
@ -9,6 +9,9 @@ import traceback
|
||||||
import cv2
|
import cv2
|
||||||
import base64
|
import base64
|
||||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
||||||
|
import nltk as nltk
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
|
||||||
|
|
||||||
header_values = {
|
header_values = {
|
||||||
'name': 'Michael Foord',
|
'name': 'Michael Foord',
|
||||||
|
|
@ -123,6 +126,23 @@ def getRecipe(links):
|
||||||
sleep(random.randint(0, 5))
|
sleep(random.randint(0, 5))
|
||||||
return recs
|
return recs
|
||||||
|
|
||||||
|
def stemIngred():
|
||||||
|
dbSession = Session()
|
||||||
|
stopset = set(stopwords.words('german'))
|
||||||
|
stopset |= set("(),")
|
||||||
|
|
||||||
|
count = dbSession.query(Ingredient).count()
|
||||||
|
for x in dbSession.query(Ingredient).all():
|
||||||
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
|
for token in nltk.word_tokenize(x.name):
|
||||||
|
if token in stopset or len(token) < 3:
|
||||||
|
continue
|
||||||
|
stemmed = snowball.stem(token)
|
||||||
|
|
||||||
|
x.trunks.append(Trunk(name=stemmed))
|
||||||
|
dbSession.commit()
|
||||||
|
print(x.ingredient_id/count)
|
||||||
|
|
||||||
#links = getLinks()
|
#links = getLinks()
|
||||||
#with open('./data/links.json', 'w') as file:
|
#with open('./data/links.json', 'w') as file:
|
||||||
# jsonString = json.dumps(links)
|
# jsonString = json.dumps(links)
|
||||||
|
|
@ -132,8 +152,9 @@ with open('./data/links.json') as file:
|
||||||
links = json.load(file)
|
links = json.load(file)
|
||||||
|
|
||||||
|
|
||||||
recs = getRecipe(links)
|
#recs = getRecipe(links)
|
||||||
|
stemIngred()
|
||||||
|
|
||||||
with open('./data/recs.json', 'w', encoding="utf-8") as file:
|
#with open('./data/recs.json', 'w', encoding="utf-8") as file:
|
||||||
json.dump(recs, file, ensure_ascii=False)
|
# json.dump(recs, file, ensure_ascii=False)
|
||||||
|
|
||||||
98
test.py
98
test.py
|
|
@ -2,9 +2,11 @@
|
||||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
||||||
import nltk as nltk
|
import nltk as nltk
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
|
import time
|
||||||
|
|
||||||
dbSession = Session()
|
dbSession = Session()
|
||||||
inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"]
|
inputArr = ["butter", "milch", "eier", "käse"]
|
||||||
|
maxMissing = 4
|
||||||
|
|
||||||
def slow():
|
def slow():
|
||||||
recipes = dbSession.query(Recipe).all()
|
recipes = dbSession.query(Recipe).all()
|
||||||
|
|
@ -14,7 +16,7 @@ def slow():
|
||||||
for recipe in recipes:
|
for recipe in recipes:
|
||||||
rec = recipe
|
rec = recipe
|
||||||
recipe = recipe.ingredients()
|
recipe = recipe.ingredients()
|
||||||
if len(recipe) > len(inputArr) + 2:
|
if len(recipe) > len(inputArr) + maxMissing:
|
||||||
continue
|
continue
|
||||||
counter = 0
|
counter = 0
|
||||||
for i in inputArr:
|
for i in inputArr:
|
||||||
|
|
@ -27,11 +29,12 @@ def slow():
|
||||||
if counter not in arr:
|
if counter not in arr:
|
||||||
arr[counter] = []
|
arr[counter] = []
|
||||||
|
|
||||||
arr[counter].append(rec.ingredients())
|
arr[counter].append(rec.ingredients())
|
||||||
#print(rec.name)
|
#print(rec.name)
|
||||||
|
|
||||||
|
# for y, x in arr.items():
|
||||||
print(arr)
|
# for xx in x:
|
||||||
|
# print(xx)
|
||||||
|
|
||||||
def faster():
|
def faster():
|
||||||
indx = {}
|
indx = {}
|
||||||
|
|
@ -41,7 +44,7 @@ def faster():
|
||||||
|
|
||||||
for y in x.recipe:
|
for y in x.recipe:
|
||||||
|
|
||||||
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + 5:
|
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
|
||||||
continue
|
continue
|
||||||
if str(y.recipe_id) not in indx:
|
if str(y.recipe_id) not in indx:
|
||||||
indx[str(y.recipe_id)] = 0
|
indx[str(y.recipe_id)] = 0
|
||||||
|
|
@ -49,28 +52,81 @@ def faster():
|
||||||
indx[str(y.recipe_id)] += 1
|
indx[str(y.recipe_id)] += 1
|
||||||
|
|
||||||
|
|
||||||
|
outDict = {}
|
||||||
for key, value in indx.items():
|
for key, value in indx.items():
|
||||||
if value >= len(inputArr):
|
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
|
||||||
print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients())
|
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
|
||||||
#print(key)
|
|
||||||
|
|
||||||
|
print(outDict)
|
||||||
|
|
||||||
|
|
||||||
|
def fastes():
|
||||||
|
indx = {}
|
||||||
|
inputArr2 = []
|
||||||
|
|
||||||
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
|
stopset = set(stopwords.words('german'))
|
||||||
|
for word in inputArr:
|
||||||
|
|
||||||
|
if word in stopset:
|
||||||
|
continue
|
||||||
|
inputArr2.append(snowball.stem(word))
|
||||||
|
|
||||||
|
for inpu in inputArr2:
|
||||||
|
ids = []
|
||||||
|
for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
|
||||||
|
for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
|
||||||
|
for y in x.recipe:
|
||||||
|
|
||||||
|
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
|
||||||
|
continue
|
||||||
|
if str(y.recipe_id) not in indx:
|
||||||
|
indx[str(y.recipe_id)] = 0
|
||||||
|
|
||||||
|
indx[str(y.recipe_id)] += 1
|
||||||
|
|
||||||
|
outDict = {}
|
||||||
|
for key, value in indx.items():
|
||||||
|
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
|
||||||
|
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
|
||||||
|
|
||||||
|
print(outDict)
|
||||||
#
|
#
|
||||||
|
|
||||||
def stemIngred():
|
def calcOverlay(l1, l2):
|
||||||
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
stopset = set(stopwords.words('german'))
|
stopset = set(stopwords.words('german'))
|
||||||
stopset |= set("(),")
|
stopset |= set("(),")
|
||||||
|
|
||||||
count = dbSession.query(Ingredient).count()
|
l1 = [snowball.stem(l) for l in l1 ]
|
||||||
for x in dbSession.query(Ingredient).all():
|
counter = 0
|
||||||
snowball = nltk.SnowballStemmer(language='german')
|
|
||||||
for token in nltk.word_tokenize(x.name):
|
for x in l2:
|
||||||
if token in stopset or len(token) < 3:
|
for token in nltk.word_tokenize(x):
|
||||||
|
if token in stopset:
|
||||||
continue
|
continue
|
||||||
stemmed = snowball.stem(token)
|
stemmed = snowball.stem(token)
|
||||||
|
for l in l1:
|
||||||
|
if l == stemmed:
|
||||||
|
counter +=1
|
||||||
|
|
||||||
x.trunks.append(Trunk(name=stemmed))
|
return counter
|
||||||
dbSession.commit()
|
#
|
||||||
print(x.ingredient_id/count)
|
|
||||||
#faster()
|
|
||||||
#slow()
|
start = time.time()
|
||||||
print(dbSession.query(Trunk.name).all())
|
slow()
|
||||||
|
end = time.time()
|
||||||
|
print("\n", end - start, "\n")
|
||||||
|
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
faster()
|
||||||
|
end = time.time()
|
||||||
|
print("\n", end - start, "\n")
|
||||||
|
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
fastes()
|
||||||
|
end = time.time()
|
||||||
|
print("\n", end - start, "\n")
|
||||||
Loading…
Reference in New Issue