sorted by overlap works, much simpler db model
This commit is contained in:
parent
ffc41d617b
commit
853b749fde
Binary file not shown.
|
|
@ -13,18 +13,7 @@ Base = declarative_base()
|
|||
Session = sessionmaker(bind=engine)
|
||||
|
||||
# https://docs.sqlalchemy.org/en/13/orm/basic_relationships.html#association-object
|
||||
class Link(Base):
|
||||
__tablename__ = 'link'
|
||||
recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'), primary_key=True)
|
||||
ingredient_id = Column(Integer, ForeignKey('ingredient.ingredient_id'), primary_key=True)
|
||||
ingredient_amount = Column('ingredient_amount', Text)
|
||||
ingredient_amount_mu = Column('ingredient_amount_mu', Text) # measurement unit
|
||||
|
||||
recipe = relationship("Recipe", back_populates="ingredient")
|
||||
ingredient = relationship("Ingredient", back_populates="recipe")
|
||||
|
||||
def ingredients(self):
|
||||
return self.ingredient.name
|
||||
|
||||
class Recipe(Base):
|
||||
__tablename__ = "recipe"
|
||||
|
|
@ -33,7 +22,8 @@ class Recipe(Base):
|
|||
instructions = Column('instructions', Text)
|
||||
url = Column('url', Text)
|
||||
img = Column('img', LargeBinary)
|
||||
ingredient = relationship("Link", back_populates="recipe")
|
||||
ingredient = relationship("Ingredient", backref="recipe")
|
||||
trunk = relationship("Trunk", backref="recipe")
|
||||
|
||||
def ingredients(self):
|
||||
l = []
|
||||
|
|
@ -69,14 +59,18 @@ class Ingredient(Base):
|
|||
__tablename__ = "ingredient"
|
||||
ingredient_id = Column('ingredient_id', Integer, primary_key=True, autoincrement=True)
|
||||
name = Column('name', Text)
|
||||
recipe = relationship("Link", back_populates="ingredient")
|
||||
trunks = relationship("Trunk")
|
||||
ingredient_amount = Column('ingredient_amount', Text)
|
||||
ingredient_amount_mu = Column('ingredient_amount_mu', Text) # measurement unit
|
||||
|
||||
recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'))
|
||||
|
||||
|
||||
class Trunk(Base):
|
||||
__tablename__ = "trunk"
|
||||
trunk_id = Column('trunk_id', Integer, primary_key=True, autoincrement=True)
|
||||
name = Column('name', Text)
|
||||
ingredient_id = Column(Integer, ForeignKey('ingredient.ingredient_id'))
|
||||
|
||||
recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'))
|
||||
|
||||
|
||||
Base.metadata.create_all(engine)
|
||||
|
|
|
|||
27
migrate.py
27
migrate.py
|
|
@ -1,7 +1,21 @@
|
|||
import json
|
||||
import cv2
|
||||
import base64
|
||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
||||
import nltk as nltk
|
||||
from nltk.corpus import stopwords
|
||||
from application.db import Session, Recipe, Ingredient, Trunk
|
||||
|
||||
def stemWord(word):
|
||||
arr = []
|
||||
stopset = set(stopwords.words('german'))
|
||||
stopset |= set("(),")
|
||||
snowball = nltk.SnowballStemmer(language='german')
|
||||
for token in nltk.word_tokenize(word):
|
||||
if token in stopset or len(token) < 4:
|
||||
continue
|
||||
stemmed = snowball.stem(token)
|
||||
arr.append(stemmed)
|
||||
return arr
|
||||
|
||||
def migrate(path):
|
||||
recs = ""
|
||||
|
|
@ -9,6 +23,8 @@ def migrate(path):
|
|||
recs = json.load(file)
|
||||
|
||||
dbSession = Session()
|
||||
counter = 0
|
||||
leng = len(recs)
|
||||
for key, value in recs.items():
|
||||
name=key
|
||||
resString=value[0]
|
||||
|
|
@ -16,11 +32,16 @@ def migrate(path):
|
|||
img=value[3].encode()
|
||||
|
||||
r = Recipe(name=name, instructions=resString, url=link, img=img)
|
||||
|
||||
for x, y in value[1].items():
|
||||
a = Link(ingredient_amount=y)
|
||||
a.ingredient = Ingredient(name=x)
|
||||
a = Ingredient(name=x, ingredient_amount=y)
|
||||
r.ingredient.append(a)
|
||||
for x in stemWord(a.name):
|
||||
t = Trunk(name=x)
|
||||
r.trunk.append(t)
|
||||
dbSession.add(r)
|
||||
dbSession.commit()
|
||||
counter+=1
|
||||
print(counter/leng)
|
||||
|
||||
migrate('./data/recs.json')
|
||||
|
|
|
|||
8
mine.py
8
mine.py
|
|
@ -8,7 +8,7 @@ import random
|
|||
import traceback
|
||||
import cv2
|
||||
import base64
|
||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
||||
from application.db import Session, Recipe, Ingredient, Trunk
|
||||
import nltk as nltk
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
|
|
@ -126,6 +126,8 @@ def getRecipe(links):
|
|||
sleep(random.randint(0, 5))
|
||||
return recs
|
||||
|
||||
|
||||
|
||||
def stemIngred():
|
||||
dbSession = Session()
|
||||
stopset = set(stopwords.words('german'))
|
||||
|
|
@ -135,7 +137,7 @@ def stemIngred():
|
|||
for x in dbSession.query(Ingredient).all():
|
||||
snowball = nltk.SnowballStemmer(language='german')
|
||||
for token in nltk.word_tokenize(x.name):
|
||||
if token in stopset or len(token) < 3:
|
||||
if token in stopset or len(token) < 4:
|
||||
continue
|
||||
stemmed = snowball.stem(token)
|
||||
|
||||
|
|
@ -153,7 +155,7 @@ with open('./data/links.json') as file:
|
|||
|
||||
|
||||
#recs = getRecipe(links)
|
||||
stemIngred()
|
||||
#stemIngred()
|
||||
|
||||
#with open('./data/recs.json', 'w', encoding="utf-8") as file:
|
||||
# json.dump(recs, file, ensure_ascii=False)
|
||||
|
|
|
|||
108
test.py
108
test.py
|
|
@ -1,12 +1,12 @@
|
|||
|
||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
||||
from application.db import Session, Recipe, Ingredient, Trunk
|
||||
import nltk as nltk
|
||||
from nltk.corpus import stopwords
|
||||
import time
|
||||
|
||||
dbSession = Session()
|
||||
inputArr = ["butter", "milch", "eier", "käse"]
|
||||
maxMissing = 4
|
||||
inputArr = ["butter", "milch", "eier", "mehl", "zucker"]
|
||||
maxMissing = 10
|
||||
|
||||
def slow():
|
||||
recipes = dbSession.query(Recipe).all()
|
||||
|
|
@ -36,32 +36,36 @@ def slow():
|
|||
# for xx in x:
|
||||
# print(xx)
|
||||
|
||||
def faster():
|
||||
def faster(inputArr):
|
||||
indx = {}
|
||||
|
||||
|
||||
for inpu in inputArr:
|
||||
ids = []
|
||||
for x in dbSession.query(Ingredient).filter(Ingredient.name.contains(inpu)).all():
|
||||
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
|
||||
if str(x[0]) not in indx:
|
||||
indx[str(x[0])] = 0
|
||||
|
||||
for y in x.recipe:
|
||||
|
||||
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
|
||||
continue
|
||||
if str(y.recipe_id) not in indx:
|
||||
indx[str(y.recipe_id)] = 0
|
||||
|
||||
indx[str(y.recipe_id)] += 1
|
||||
indx[str(x[0])] += 1
|
||||
|
||||
|
||||
outDict = {}
|
||||
for key, value in indx.items():
|
||||
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
|
||||
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
|
||||
|
||||
print(outDict)
|
||||
return(indx)
|
||||
|
||||
|
||||
def fastes():
|
||||
def fastes(inputArr):
|
||||
indx = {}
|
||||
|
||||
for inpu in inputArr:
|
||||
ids = []
|
||||
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
|
||||
|
||||
|
||||
if str(recipe_id[0]) not in indx:
|
||||
indx[str(recipe_id[0])] = 0
|
||||
|
||||
indx[str(recipe_id[0])] += 1
|
||||
return(indx)
|
||||
|
||||
def stemInput(inputArr):
|
||||
inputArr2 = []
|
||||
|
||||
snowball = nltk.SnowballStemmer(language='german')
|
||||
|
|
@ -72,26 +76,18 @@ def fastes():
|
|||
continue
|
||||
inputArr2.append(snowball.stem(word))
|
||||
|
||||
for inpu in inputArr2:
|
||||
ids = []
|
||||
for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
|
||||
for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
|
||||
for y in x.recipe:
|
||||
|
||||
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
|
||||
continue
|
||||
if str(y.recipe_id) not in indx:
|
||||
indx[str(y.recipe_id)] = 0
|
||||
|
||||
indx[str(y.recipe_id)] += 1
|
||||
|
||||
outDict = {}
|
||||
for key, value in indx.items():
|
||||
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
|
||||
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
|
||||
|
||||
print(outDict)
|
||||
return inputArr2
|
||||
#
|
||||
def printDict(indx):
|
||||
outDict = {}
|
||||
for key, value in sorted(indx.items()):
|
||||
ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
|
||||
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
|
||||
|
||||
for key, value in outDict.items():
|
||||
if key >= 0.5:
|
||||
print(key, value)
|
||||
|
||||
|
||||
def calcOverlay(l1, l2):
|
||||
snowball = nltk.SnowballStemmer(language='german')
|
||||
|
|
@ -102,31 +98,31 @@ def calcOverlay(l1, l2):
|
|||
counter = 0
|
||||
|
||||
for x in l2:
|
||||
for token in nltk.word_tokenize(x):
|
||||
if token in stopset:
|
||||
continue
|
||||
stemmed = snowball.stem(token)
|
||||
for l in l1:
|
||||
if l == stemmed:
|
||||
counter +=1
|
||||
|
||||
for l in l1:
|
||||
if l == x[0]:
|
||||
#print(l)
|
||||
counter +=1
|
||||
counter = counter / len(l2)
|
||||
return counter
|
||||
#
|
||||
|
||||
|
||||
start = time.time()
|
||||
slow()
|
||||
#slow()
|
||||
end = time.time()
|
||||
print("\n", end - start, "\n")
|
||||
|
||||
stemmed = stemInput(inputArr)
|
||||
|
||||
start = time.time()
|
||||
indx = faster(stemmed)
|
||||
end = time.time()
|
||||
printDict(indx)
|
||||
print("\n", end - start, "\n")
|
||||
|
||||
|
||||
start = time.time()
|
||||
faster()
|
||||
end = time.time()
|
||||
print("\n", end - start, "\n")
|
||||
|
||||
|
||||
start = time.time()
|
||||
fastes()
|
||||
indx = fastes(stemmed)
|
||||
end = time.time()
|
||||
printDict(indx)
|
||||
print("\n", end - start, "\n")
|
||||
Loading…
Reference in New Issue