added stemmed table

2020-04-11 22:49:43 +02:00 · 2020-04-11 22:49:43 +02:00 · 17361c16ee
parent 5289681b09
commit 17361c16ee
6 changed files with 34 additions and 33400 deletions
--- a/application/pycache/db.cpython-37.pyc
+++ b/application/pycache/db.cpython-37.pyc
--- a/application/db.py
+++ b/application/db.py
@ -67,9 +67,16 @@ class Recipe(Base):
 class Ingredient(Base):
    __tablename__ = "ingredient"
-    ingredient_id = Column('ingredient_id', Integer,  primary_key=True)
+    ingredient_id = Column('ingredient_id', Integer,  primary_key=True, autoincrement=True)
    name = Column('name', Text)
    recipe = relationship("Link", back_populates="ingredient")
    trunks = relationship("Trunk")
 class Trunk(Base):
    __tablename__ = "trunk"
    trunk_id = Column('trunk_id', Integer,  primary_key=True, autoincrement=True)
    name = Column('name', Text)
    ingredient_id = Column(Integer, ForeignKey('ingredient.ingredient_id'))
 Base.metadata.create_all(engine)
--- a/data/recs.json
+++ b/data/recs.json
--- a/mine.py
+++ b/mine.py
@ -8,7 +8,7 @@ import random
 import traceback
 import cv2
 import base64
-from application.db import Session, Recipe, Ingredient, Link
+from application.db import Session, Recipe, Ingredient, Link, Trunk
 header_values = {
    'name': 'Michael Foord',
--- a/test.py
+++ b/test.py
@ -1,5 +1,7 @@
-from application.db import Session, Recipe, Ingredient, Link
+from application.db import Session, Recipe, Ingredient, Link, Trunk
 import nltk as nltk
 from nltk.corpus import stopwords
 dbSession = Session()
 inputArr = ["kartoffeln", "zwiebel", "steak", "würfel"] 
@ -49,9 +51,26 @@ def faster():
    for key, value in indx.items():
        if value >= len(inputArr):
            print(dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients())
            #print(key)
-faster()    
+
-slow()
+#
 def stemIngred():
    stopset = set(stopwords.words('german'))
    stopset |= set("(),")
    count = dbSession.query(Ingredient).count()
    for x in dbSession.query(Ingredient).all():
        snowball = nltk.SnowballStemmer(language='german')
        for token in nltk.word_tokenize(x.name): 
            if token in stopset or len(token) < 3:
                continue
            stemmed = snowball.stem(token)
            x.trunks.append(Trunk(name=stemmed))
            dbSession.commit()
        print(x.ingredient_id/count)
 #faster()    
 #slow()
 print(dbSession.query(Trunk.name).all())
--- a/test.sqlite
+++ b/test.sqlite