103 lines
3.1 KiB
Python
103 lines
3.1 KiB
Python
import json
|
|
import cv2
|
|
import base64
|
|
import nltk as nltk
|
|
from nltk.corpus import stopwords
|
|
#import db as db1
|
|
#import db2 as db2
|
|
|
|
def stemWord(word):
|
|
try:
|
|
arr = []
|
|
stopset = set(stopwords.words('german'))
|
|
stopset |= set("(),")
|
|
snowball = nltk.SnowballStemmer(language='german')
|
|
for token in nltk.word_tokenize(word):
|
|
if token in stopset or len(token) < 4:
|
|
continue
|
|
stemmed = snowball.stem(token)
|
|
arr.append(stemmed)
|
|
if len(arr) == 0:
|
|
arr.append("")
|
|
return arr
|
|
except:
|
|
return [""]
|
|
|
|
#migrate('./data/recs.json')
|
|
|
|
def migrateRecsDb1ToDb2():
|
|
|
|
session1 = db1.Session()
|
|
session2 = db2.Session()
|
|
|
|
|
|
count = 0
|
|
length = session1.query(db1.Recipe).count()
|
|
for r1 in list(session1.query(db1.Recipe).all())[int(length/2):]:
|
|
try:
|
|
if not bool(session2.query(db2.Recipe).filter(db2.Recipe.name == r1.name).first()):
|
|
r2 = db2.Recipe(name=r1.name, instructions=r1.instructions, url=r1.url, img=r1.img)
|
|
|
|
for ingred in r1.ingredient:
|
|
ri2 = db2.RecIngred()
|
|
ingredient2 = session2.query(db2.Ingredient).filter(db2.Ingredient.name == ingred.name).first()
|
|
if ingredient2 is None:
|
|
ingredient2 = db2.Ingredient(name=ingred.name)
|
|
ri2.ingredient_amount = ingred.ingredient_amount
|
|
ri2.ingredient = ingredient2
|
|
r2.ingredient.append(ri2)
|
|
|
|
session2.add(r2)
|
|
session2.commit()
|
|
|
|
except:
|
|
session1 = db1.Session()
|
|
session2 = db2.Session()
|
|
|
|
count+=1
|
|
print(count/length)
|
|
|
|
def TrunkDb2():
|
|
session2 = db2.Session()
|
|
|
|
count = 0
|
|
length = session2.query(db2.Ingredient).count()
|
|
for i2 in session2.query(db2.Ingredient).all():
|
|
try:
|
|
for trunk1 in stem(i2.name):
|
|
|
|
ri2 = db2.IngredTrunk()
|
|
trunk = session2.query(db2.Trunk).filter(db2.Trunk.name == trunk1).first()
|
|
if trunk is None:
|
|
trunk = db2.Trunk(name=trunk1)
|
|
|
|
if session2.query(db2.IngredTrunk).filter(db2.IngredTrunk.ingredient_name == i2.name, db2.IngredTrunk.trunk_name == trunk1).first() is None:
|
|
ri2.trunk = trunk
|
|
i2.trunks.append(ri2)
|
|
|
|
session2.commit()
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
session2 = db2.Session()
|
|
|
|
count+=1
|
|
print(count/length)
|
|
|
|
|
|
def stem(l1):
|
|
'''Tokenize and stem word, result is 1d list'''
|
|
snowball = nltk.SnowballStemmer(language='german')
|
|
stopset = set(stopwords.words('german'))
|
|
stopset |= set("(),")
|
|
l2 = []
|
|
|
|
for token in nltk.word_tokenize(l1):
|
|
token = snowball.stem(token)
|
|
if token in stopset or not token.isalpha() or len(token) < 2:
|
|
continue
|
|
l2.append(token)
|
|
|
|
return l2
|
|
#migrateDb1ToDb2()
|
|
#TrunkDb2() |