sorted by overlap works, much simpler db model
This commit is contained in:
parent
ffc41d617b
commit
853b749fde
Binary file not shown.
|
|
@ -13,18 +13,7 @@ Base = declarative_base()
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
|
|
||||||
# https://docs.sqlalchemy.org/en/13/orm/basic_relationships.html#association-object
|
# https://docs.sqlalchemy.org/en/13/orm/basic_relationships.html#association-object
|
||||||
class Link(Base):
|
|
||||||
__tablename__ = 'link'
|
|
||||||
recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'), primary_key=True)
|
|
||||||
ingredient_id = Column(Integer, ForeignKey('ingredient.ingredient_id'), primary_key=True)
|
|
||||||
ingredient_amount = Column('ingredient_amount', Text)
|
|
||||||
ingredient_amount_mu = Column('ingredient_amount_mu', Text) # measurement unit
|
|
||||||
|
|
||||||
recipe = relationship("Recipe", back_populates="ingredient")
|
|
||||||
ingredient = relationship("Ingredient", back_populates="recipe")
|
|
||||||
|
|
||||||
def ingredients(self):
|
|
||||||
return self.ingredient.name
|
|
||||||
|
|
||||||
class Recipe(Base):
|
class Recipe(Base):
|
||||||
__tablename__ = "recipe"
|
__tablename__ = "recipe"
|
||||||
|
|
@ -33,7 +22,8 @@ class Recipe(Base):
|
||||||
instructions = Column('instructions', Text)
|
instructions = Column('instructions', Text)
|
||||||
url = Column('url', Text)
|
url = Column('url', Text)
|
||||||
img = Column('img', LargeBinary)
|
img = Column('img', LargeBinary)
|
||||||
ingredient = relationship("Link", back_populates="recipe")
|
ingredient = relationship("Ingredient", backref="recipe")
|
||||||
|
trunk = relationship("Trunk", backref="recipe")
|
||||||
|
|
||||||
def ingredients(self):
|
def ingredients(self):
|
||||||
l = []
|
l = []
|
||||||
|
|
@ -69,14 +59,18 @@ class Ingredient(Base):
|
||||||
__tablename__ = "ingredient"
|
__tablename__ = "ingredient"
|
||||||
ingredient_id = Column('ingredient_id', Integer, primary_key=True, autoincrement=True)
|
ingredient_id = Column('ingredient_id', Integer, primary_key=True, autoincrement=True)
|
||||||
name = Column('name', Text)
|
name = Column('name', Text)
|
||||||
recipe = relationship("Link", back_populates="ingredient")
|
ingredient_amount = Column('ingredient_amount', Text)
|
||||||
trunks = relationship("Trunk")
|
ingredient_amount_mu = Column('ingredient_amount_mu', Text) # measurement unit
|
||||||
|
|
||||||
|
recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'))
|
||||||
|
|
||||||
|
|
||||||
class Trunk(Base):
|
class Trunk(Base):
|
||||||
__tablename__ = "trunk"
|
__tablename__ = "trunk"
|
||||||
trunk_id = Column('trunk_id', Integer, primary_key=True, autoincrement=True)
|
trunk_id = Column('trunk_id', Integer, primary_key=True, autoincrement=True)
|
||||||
name = Column('name', Text)
|
name = Column('name', Text)
|
||||||
ingredient_id = Column(Integer, ForeignKey('ingredient.ingredient_id'))
|
|
||||||
|
recipe_id = Column(Integer, ForeignKey('recipe.recipe_id'))
|
||||||
|
|
||||||
|
|
||||||
Base.metadata.create_all(engine)
|
Base.metadata.create_all(engine)
|
||||||
|
|
|
||||||
27
migrate.py
27
migrate.py
|
|
@ -1,7 +1,21 @@
|
||||||
import json
|
import json
|
||||||
import cv2
|
import cv2
|
||||||
import base64
|
import base64
|
||||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
import nltk as nltk
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
from application.db import Session, Recipe, Ingredient, Trunk
|
||||||
|
|
||||||
|
def stemWord(word):
|
||||||
|
arr = []
|
||||||
|
stopset = set(stopwords.words('german'))
|
||||||
|
stopset |= set("(),")
|
||||||
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
|
for token in nltk.word_tokenize(word):
|
||||||
|
if token in stopset or len(token) < 4:
|
||||||
|
continue
|
||||||
|
stemmed = snowball.stem(token)
|
||||||
|
arr.append(stemmed)
|
||||||
|
return arr
|
||||||
|
|
||||||
def migrate(path):
|
def migrate(path):
|
||||||
recs = ""
|
recs = ""
|
||||||
|
|
@ -9,6 +23,8 @@ def migrate(path):
|
||||||
recs = json.load(file)
|
recs = json.load(file)
|
||||||
|
|
||||||
dbSession = Session()
|
dbSession = Session()
|
||||||
|
counter = 0
|
||||||
|
leng = len(recs)
|
||||||
for key, value in recs.items():
|
for key, value in recs.items():
|
||||||
name=key
|
name=key
|
||||||
resString=value[0]
|
resString=value[0]
|
||||||
|
|
@ -16,11 +32,16 @@ def migrate(path):
|
||||||
img=value[3].encode()
|
img=value[3].encode()
|
||||||
|
|
||||||
r = Recipe(name=name, instructions=resString, url=link, img=img)
|
r = Recipe(name=name, instructions=resString, url=link, img=img)
|
||||||
|
|
||||||
for x, y in value[1].items():
|
for x, y in value[1].items():
|
||||||
a = Link(ingredient_amount=y)
|
a = Ingredient(name=x, ingredient_amount=y)
|
||||||
a.ingredient = Ingredient(name=x)
|
|
||||||
r.ingredient.append(a)
|
r.ingredient.append(a)
|
||||||
|
for x in stemWord(a.name):
|
||||||
|
t = Trunk(name=x)
|
||||||
|
r.trunk.append(t)
|
||||||
dbSession.add(r)
|
dbSession.add(r)
|
||||||
dbSession.commit()
|
dbSession.commit()
|
||||||
|
counter+=1
|
||||||
|
print(counter/leng)
|
||||||
|
|
||||||
migrate('./data/recs.json')
|
migrate('./data/recs.json')
|
||||||
|
|
|
||||||
8
mine.py
8
mine.py
|
|
@ -8,7 +8,7 @@ import random
|
||||||
import traceback
|
import traceback
|
||||||
import cv2
|
import cv2
|
||||||
import base64
|
import base64
|
||||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
from application.db import Session, Recipe, Ingredient, Trunk
|
||||||
import nltk as nltk
|
import nltk as nltk
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
|
|
||||||
|
|
@ -126,6 +126,8 @@ def getRecipe(links):
|
||||||
sleep(random.randint(0, 5))
|
sleep(random.randint(0, 5))
|
||||||
return recs
|
return recs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def stemIngred():
|
def stemIngred():
|
||||||
dbSession = Session()
|
dbSession = Session()
|
||||||
stopset = set(stopwords.words('german'))
|
stopset = set(stopwords.words('german'))
|
||||||
|
|
@ -135,7 +137,7 @@ def stemIngred():
|
||||||
for x in dbSession.query(Ingredient).all():
|
for x in dbSession.query(Ingredient).all():
|
||||||
snowball = nltk.SnowballStemmer(language='german')
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
for token in nltk.word_tokenize(x.name):
|
for token in nltk.word_tokenize(x.name):
|
||||||
if token in stopset or len(token) < 3:
|
if token in stopset or len(token) < 4:
|
||||||
continue
|
continue
|
||||||
stemmed = snowball.stem(token)
|
stemmed = snowball.stem(token)
|
||||||
|
|
||||||
|
|
@ -153,7 +155,7 @@ with open('./data/links.json') as file:
|
||||||
|
|
||||||
|
|
||||||
#recs = getRecipe(links)
|
#recs = getRecipe(links)
|
||||||
stemIngred()
|
#stemIngred()
|
||||||
|
|
||||||
#with open('./data/recs.json', 'w', encoding="utf-8") as file:
|
#with open('./data/recs.json', 'w', encoding="utf-8") as file:
|
||||||
# json.dump(recs, file, ensure_ascii=False)
|
# json.dump(recs, file, ensure_ascii=False)
|
||||||
|
|
|
||||||
108
test.py
108
test.py
|
|
@ -1,12 +1,12 @@
|
||||||
|
|
||||||
from application.db import Session, Recipe, Ingredient, Link, Trunk
|
from application.db import Session, Recipe, Ingredient, Trunk
|
||||||
import nltk as nltk
|
import nltk as nltk
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
import time
|
import time
|
||||||
|
|
||||||
dbSession = Session()
|
dbSession = Session()
|
||||||
inputArr = ["butter", "milch", "eier", "käse"]
|
inputArr = ["butter", "milch", "eier", "mehl", "zucker"]
|
||||||
maxMissing = 4
|
maxMissing = 10
|
||||||
|
|
||||||
def slow():
|
def slow():
|
||||||
recipes = dbSession.query(Recipe).all()
|
recipes = dbSession.query(Recipe).all()
|
||||||
|
|
@ -36,32 +36,36 @@ def slow():
|
||||||
# for xx in x:
|
# for xx in x:
|
||||||
# print(xx)
|
# print(xx)
|
||||||
|
|
||||||
def faster():
|
def faster(inputArr):
|
||||||
indx = {}
|
indx = {}
|
||||||
|
|
||||||
|
|
||||||
for inpu in inputArr:
|
for inpu in inputArr:
|
||||||
ids = []
|
ids = []
|
||||||
for x in dbSession.query(Ingredient).filter(Ingredient.name.contains(inpu)).all():
|
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
|
||||||
|
if str(x[0]) not in indx:
|
||||||
|
indx[str(x[0])] = 0
|
||||||
|
|
||||||
for y in x.recipe:
|
indx[str(x[0])] += 1
|
||||||
|
|
||||||
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
|
|
||||||
continue
|
|
||||||
if str(y.recipe_id) not in indx:
|
|
||||||
indx[str(y.recipe_id)] = 0
|
|
||||||
|
|
||||||
indx[str(y.recipe_id)] += 1
|
|
||||||
|
|
||||||
|
|
||||||
outDict = {}
|
return(indx)
|
||||||
for key, value in indx.items():
|
|
||||||
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
|
|
||||||
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
|
|
||||||
|
|
||||||
print(outDict)
|
|
||||||
|
|
||||||
|
def fastes(inputArr):
|
||||||
def fastes():
|
|
||||||
indx = {}
|
indx = {}
|
||||||
|
|
||||||
|
for inpu in inputArr:
|
||||||
|
ids = []
|
||||||
|
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
|
||||||
|
|
||||||
|
|
||||||
|
if str(recipe_id[0]) not in indx:
|
||||||
|
indx[str(recipe_id[0])] = 0
|
||||||
|
|
||||||
|
indx[str(recipe_id[0])] += 1
|
||||||
|
return(indx)
|
||||||
|
|
||||||
|
def stemInput(inputArr):
|
||||||
inputArr2 = []
|
inputArr2 = []
|
||||||
|
|
||||||
snowball = nltk.SnowballStemmer(language='german')
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
|
|
@ -72,26 +76,18 @@ def fastes():
|
||||||
continue
|
continue
|
||||||
inputArr2.append(snowball.stem(word))
|
inputArr2.append(snowball.stem(word))
|
||||||
|
|
||||||
for inpu in inputArr2:
|
return inputArr2
|
||||||
ids = []
|
|
||||||
for xx in dbSession.query(Trunk).filter(Trunk.name == inpu).all():
|
|
||||||
for x in dbSession.query(Ingredient).filter(xx.ingredient_id == Ingredient.ingredient_id).all():
|
|
||||||
for y in x.recipe:
|
|
||||||
|
|
||||||
if dbSession.query(Link).filter(Link.recipe_id==y.recipe_id).count() > len(inputArr) + maxMissing:
|
|
||||||
continue
|
|
||||||
if str(y.recipe_id) not in indx:
|
|
||||||
indx[str(y.recipe_id)] = 0
|
|
||||||
|
|
||||||
indx[str(y.recipe_id)] += 1
|
|
||||||
|
|
||||||
outDict = {}
|
|
||||||
for key, value in indx.items():
|
|
||||||
ingred = dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().ingredients()
|
|
||||||
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, ingred)
|
|
||||||
|
|
||||||
print(outDict)
|
|
||||||
#
|
#
|
||||||
|
def printDict(indx):
|
||||||
|
outDict = {}
|
||||||
|
for key, value in sorted(indx.items()):
|
||||||
|
ingred = dbSession.query(Trunk.name).filter(Trunk.recipe_id==int(key)).all()
|
||||||
|
outDict[calcOverlay(inputArr, ingred)] = (dbSession.query(Recipe).filter(Recipe.recipe_id==key).first().name, key, dbSession.query(Ingredient.name).filter(Ingredient.recipe_id==key).all())
|
||||||
|
|
||||||
|
for key, value in outDict.items():
|
||||||
|
if key >= 0.5:
|
||||||
|
print(key, value)
|
||||||
|
|
||||||
|
|
||||||
def calcOverlay(l1, l2):
|
def calcOverlay(l1, l2):
|
||||||
snowball = nltk.SnowballStemmer(language='german')
|
snowball = nltk.SnowballStemmer(language='german')
|
||||||
|
|
@ -102,31 +98,31 @@ def calcOverlay(l1, l2):
|
||||||
counter = 0
|
counter = 0
|
||||||
|
|
||||||
for x in l2:
|
for x in l2:
|
||||||
for token in nltk.word_tokenize(x):
|
for l in l1:
|
||||||
if token in stopset:
|
if l == x[0]:
|
||||||
continue
|
#print(l)
|
||||||
stemmed = snowball.stem(token)
|
counter +=1
|
||||||
for l in l1:
|
counter = counter / len(l2)
|
||||||
if l == stemmed:
|
|
||||||
counter +=1
|
|
||||||
|
|
||||||
return counter
|
return counter
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
slow()
|
#slow()
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print("\n", end - start, "\n")
|
print("\n", end - start, "\n")
|
||||||
|
|
||||||
|
stemmed = stemInput(inputArr)
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
indx = faster(stemmed)
|
||||||
|
end = time.time()
|
||||||
|
printDict(indx)
|
||||||
|
print("\n", end - start, "\n")
|
||||||
|
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
faster()
|
indx = fastes(stemmed)
|
||||||
end = time.time()
|
|
||||||
print("\n", end - start, "\n")
|
|
||||||
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
fastes()
|
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
printDict(indx)
|
||||||
print("\n", end - start, "\n")
|
print("\n", end - start, "\n")
|
||||||
Loading…
Reference in New Issue