Inverse-Rezeptsuche/app/background/migrate.py

103 lines
3.1 KiB
Python

import json
import cv2
import base64
import nltk as nltk
from nltk.corpus import stopwords
#import db as db1
#import db2 as db2
def stemWord(word):
try:
arr = []
stopset = set(stopwords.words('german'))
stopset |= set("(),")
snowball = nltk.SnowballStemmer(language='german')
for token in nltk.word_tokenize(word):
if token in stopset or len(token) < 4:
continue
stemmed = snowball.stem(token)
arr.append(stemmed)
if len(arr) == 0:
arr.append("")
return arr
except:
return [""]
#migrate('./data/recs.json')
def migrateRecsDb1ToDb2():
session1 = db1.Session()
session2 = db2.Session()
count = 0
length = session1.query(db1.Recipe).count()
for r1 in list(session1.query(db1.Recipe).all())[int(length/2):]:
try:
if not bool(session2.query(db2.Recipe).filter(db2.Recipe.name == r1.name).first()):
r2 = db2.Recipe(name=r1.name, instructions=r1.instructions, url=r1.url, img=r1.img)
for ingred in r1.ingredient:
ri2 = db2.RecIngred()
ingredient2 = session2.query(db2.Ingredient).filter(db2.Ingredient.name == ingred.name).first()
if ingredient2 is None:
ingredient2 = db2.Ingredient(name=ingred.name)
ri2.ingredient_amount = ingred.ingredient_amount
ri2.ingredient = ingredient2
r2.ingredient.append(ri2)
session2.add(r2)
session2.commit()
except:
session1 = db1.Session()
session2 = db2.Session()
count+=1
print(count/length)
def TrunkDb2():
session2 = db2.Session()
count = 0
length = session2.query(db2.Ingredient).count()
for i2 in session2.query(db2.Ingredient).all():
try:
for trunk1 in stem(i2.name):
ri2 = db2.IngredTrunk()
trunk = session2.query(db2.Trunk).filter(db2.Trunk.name == trunk1).first()
if trunk is None:
trunk = db2.Trunk(name=trunk1)
if session2.query(db2.IngredTrunk).filter(db2.IngredTrunk.ingredient_name == i2.name, db2.IngredTrunk.trunk_name == trunk1).first() is None:
ri2.trunk = trunk
i2.trunks.append(ri2)
session2.commit()
except Exception as e:
print(e)
session2 = db2.Session()
count+=1
print(count/length)
def stem(l1):
'''Tokenize and stem word, result is 1d list'''
snowball = nltk.SnowballStemmer(language='german')
stopset = set(stopwords.words('german'))
stopset |= set("(),")
l2 = []
for token in nltk.word_tokenize(l1):
token = snowball.stem(token)
if token in stopset or not token.isalpha() or len(token) < 2:
continue
l2.append(token)
return l2
#migrateDb1ToDb2()
#TrunkDb2()