added stemmed table

This commit is contained in:
Askill 2020-04-11 22:49:43 +02:00
parent 5289681b09
commit 17361c16ee
6 changed files with 34 additions and 33400 deletions

View File

@ -67,9 +67,16 @@ class Recipe(Base):
class Ingredient(Base): class Ingredient(Base):
__tablename__ = "ingredient" __tablename__ = "ingredient"
ingredient_id = Column('ingredient_id', Integer, primary_key=True) ingredient_id = Column('ingredient_id', Integer, primary_key=True, autoincrement=True)
name = Column('name', Text) name = Column('name', Text)
recipe = relationship("Link", back_populates="ingredient") recipe = relationship("Link", back_populates="ingredient")
trunks = relationship("Trunk")
class Trunk(Base):
__tablename__ = "trunk"
trunk_id = Column('trunk_id', Integer, primary_key=True, autoincrement=True)
name = Column('name', Text)
ingredient_id = Column(Integer, ForeignKey('ingredient.ingredient_id'))
Base.metadata.create_all(engine) Base.metadata.create_all(engine)

File diff suppressed because one or more lines are too long

View File

@ -8,7 +8,7 @@ import random
import traceback import traceback
import cv2 import cv2
import base64 import base64
from application.db import Session, Recipe, Ingredient, Link from application.db import Session, Recipe, Ingredient, Link, Trunk
header_values = { header_values = {
'name': 'Michael Foord', 'name': 'Michael Foord',

29
test.py
View File

@ -1,5 +1,7 @@
from application.db import Session, Recipe, Ingredient, Link from application.db import Session, Recipe, Ingredient, Link, Trunk
import nltk as nltk
from nltk.corpus import stopwords
dbSession = Session() dbSession = Session()
inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"] inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"]
@ -49,9 +51,26 @@ def faster():
for key, value in indx.items(): for key, value in indx.items():
if value >= len(inputArr): if value >= len(inputArr):
print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()) print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients())
#print(key) #print(key)
faster()
slow() #
def stemIngred():
stopset = set(stopwords.words('german'))
stopset |= set("(),")
count = dbSession.query(Ingredient).count()
for x in dbSession.query(Ingredient).all():
snowball = nltk.SnowballStemmer(language='german')
for token in nltk.word_tokenize(x.name):
if token in stopset or len(token) < 3:
continue
stemmed = snowball.stem(token)
x.trunks.append(Trunk(name=stemmed))
dbSession.commit()
print(x.ingredient_id/count)
#faster()
#slow()
print(dbSession.query(Trunk.name).all())

Binary file not shown.