stability + started image download
This commit is contained in:
parent
68added41f
commit
34a0707964
|
|
@ -1,2 +1,4 @@
|
|||
|
||||
data/recs.json
|
||||
|
||||
backup.sql
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -5,6 +5,7 @@ import os
|
|||
from json import dumps
|
||||
import application.endpoints as endpoints
|
||||
import application.config as config
|
||||
from application.db import Session
|
||||
|
||||
app = Flask(__name__)
|
||||
api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -7,7 +7,7 @@ import enum
|
|||
from flask import Flask
|
||||
import time
|
||||
|
||||
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
|
||||
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8", pool_size=1000, max_overflow=0)
|
||||
|
||||
Base = declarative_base()
|
||||
Session = sessionmaker(bind=engine)
|
||||
|
|
@ -21,7 +21,7 @@ class Recipe(Base):
|
|||
name = Column('name', Text)
|
||||
instructions = Column('instructions', Text)
|
||||
url = Column('url', Text)
|
||||
img = Column('img', LargeBinary)
|
||||
img = Column('img', LargeBinary(length=(2**32)-1))
|
||||
ingredient = relationship("Ingredient", backref="recipe")
|
||||
trunk = relationship("Trunk", backref="recipe")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from flask_restful import Resource, reqparse
|
||||
import flask
|
||||
from flask import g
|
||||
import requests
|
||||
import application.config as config
|
||||
import json
|
||||
|
|
@ -12,28 +13,28 @@ import time
|
|||
class RecipeList(Resource):
|
||||
def get(self):
|
||||
""" """
|
||||
try:
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('ingred', type=str, action='append')
|
||||
args = parser.parse_args()
|
||||
ingreds = args["ingred"]
|
||||
g.session = Session()
|
||||
|
||||
ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr]
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('ingred', type=str, action='append')
|
||||
args = parser.parse_args()
|
||||
ingreds = args["ingred"]
|
||||
|
||||
start = time.time()
|
||||
indx = search.fastes(ingreds)
|
||||
end = time.time()
|
||||
print("get recipes",end - start, "\n")
|
||||
ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr]
|
||||
|
||||
start = time.time()
|
||||
recs = search.getRecDict(indx, ingreds)
|
||||
end = time.time()
|
||||
print("calc overlay",end - start, "\n")
|
||||
|
||||
return flask.make_response(flask.jsonify({'data': recs}), 200)
|
||||
start = time.time()
|
||||
indx = search.fastes(ingreds)
|
||||
end = time.time()
|
||||
print("get recipes",end - start, "\n")
|
||||
|
||||
start = time.time()
|
||||
recs = search.getRecDict(indx, ingreds)
|
||||
end = time.time()
|
||||
print("calc overlay",end - start, "\n")
|
||||
|
||||
g.session.commit()
|
||||
g.session.close()
|
||||
return flask.make_response(flask.jsonify({'data': recs}), 200)
|
||||
|
||||
except Exception as e:
|
||||
print("error: -", e)
|
||||
return flask.make_response(flask.jsonify({'error': str(e)}), 400)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
|
||||
|
||||
from urllib.parse import urljoin
|
||||
from lxml import html
|
||||
import requests
|
||||
import json
|
||||
from time import sleep
|
||||
import random
|
||||
import traceback
|
||||
import cv2
|
||||
import base64
|
||||
from application.db import Session, Recipe
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as ec
|
||||
import cv2
|
||||
from urllib.request import urlopen
|
||||
import numpy as np
|
||||
|
||||
def getImages():
|
||||
chromePath = 'C:/tools/chromedriver.exe'
|
||||
chrome_options = webdriver.ChromeOptions()
|
||||
chrome_options.add_argument('--headless')
|
||||
chrome_options.add_argument("--log-level=3")
|
||||
|
||||
driver = webdriver.Chrome(chromePath, chrome_options=chrome_options)
|
||||
|
||||
dbSession = Session()
|
||||
counter = 0
|
||||
maxC = dbSession.query(Recipe).count()
|
||||
for recipe in dbSession.query(Recipe).all():
|
||||
|
||||
url = recipe.url
|
||||
string1 = '//*[@id="recipe-image-carousel"]/div/div[1]/div[9]/div/a/amp-img'
|
||||
|
||||
driver.get(url)
|
||||
element = WebDriverWait(driver, 30).until(
|
||||
ec.presence_of_element_located((
|
||||
By.XPATH, string1)))
|
||||
|
||||
|
||||
src = driver.find_element_by_xpath(string1).get_attribute("src")
|
||||
print(src)
|
||||
resp = urlopen(src)
|
||||
image = np.asarray(bytearray(resp.read()), dtype="uint8")
|
||||
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
|
||||
|
||||
ret, jpeg = cv2.imencode(".jpg", image)
|
||||
img = base64.b64encode(jpeg)
|
||||
recipe.img = img
|
||||
dbSession.flush()
|
||||
dbSession.commit()
|
||||
counter +=1
|
||||
print(counter/maxC)
|
||||
sleep(5)
|
||||
|
||||
|
||||
|
||||
getImages()
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
[INFO - 2020-04-20T20:11:22.708Z] GhostDriver - Main - running on port 63975
|
||||
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
|
||||
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.customHeaders: - {}
|
||||
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
|
||||
[INFO - 2020-04-20T20:11:24.918Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 1c4df520-8343-11ea-a8a6-19d2d02d716b
|
||||
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - msg: Unhandled promise rejection TypeError: undefined is not an object (evaluating 'w.set')
|
||||
|
||||
phantomjs://platform/console++.js:263 in error
|
||||
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - stack:
|
||||
(anonymous function) (https://www.chefkoch.de/rezepte/207581086939665/Nannie-s-schnelle-Kuchen.html:8626)
|
||||
|
||||
phantomjs://platform/console++.js:263 in error
|
||||
[ERROR - 2020-04-20T20:11:28.646Z] WebElementLocator - _handleLocateCommand - Element(s) NOT Found: GAVE UP. Search Stop Time: 15874134[INFO - 2020-04-20T20:12:30.427Z] SessionManagerReqHand - [INFO - 2020-04-20T20:13:35.766Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:17:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:18:35.767Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:22:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:23:35.768Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:27:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:28:35.771Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:32:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:33:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:37:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:38:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:42:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:43:35.784Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:47:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:48:35.787Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:52:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:53:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:57:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T20:58:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:02:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T21:03:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:07:30.445Z] SessionManagerReqHand - [INFO - 2020-04-20T21:08:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:12:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:13:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:17:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:18:35.798Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:22:30.459Z] SessionManagerReqHand - [INFO - 2020-04-20T21:23:35.803Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:27:30.460Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
OW
|
||||
O - 2020-04-20T21:22:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:27:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
ns - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:07:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:12:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:17:14.190Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:22:14.192Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:27:14.196Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:19:25.242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:24:25.249Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
[INFO - 2020-04-20T21:27:31.799Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
179Z] SessionManagerReqHand - [INFO - 2020-04-20T21:11:22.721Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:13:54.176Z] SessionManagerReqHand - [INFO - 2020-04-20T21:16:22.724Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:18:54.177Z] SessionManagerReqHand - [INFO - 2020-04-20T21:21:22.725Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:23:54.181Z] SessionManagerReqHand - [INFO - 2020-04-20T21:26:22.728Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||
20
search.py
20
search.py
|
|
@ -1,30 +1,17 @@
|
|||
|
||||
from application.db import Session, Recipe, Ingredient, Trunk
|
||||
from flask import g
|
||||
import nltk as nltk
|
||||
from nltk.corpus import stopwords
|
||||
import time
|
||||
import heapq
|
||||
from collections import Counter
|
||||
|
||||
dbSession = Session()
|
||||
|
||||
|
||||
def faster(inputArr):
|
||||
indx = {}
|
||||
|
||||
for inpu in inputArr:
|
||||
ids = []
|
||||
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
|
||||
if str(x[0]) not in indx:
|
||||
indx[str(x[0])] = 0
|
||||
|
||||
indx[str(x[0])] += 1
|
||||
|
||||
return(indx)
|
||||
|
||||
def fastes(inputArr):
|
||||
indx = {}
|
||||
|
||||
dbSession = g.session
|
||||
for inpu in inputArr:
|
||||
ids = []
|
||||
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
|
||||
|
|
@ -47,7 +34,8 @@ def stemInput(inputArr):
|
|||
#
|
||||
|
||||
def getRecDict(indx, inputArr):
|
||||
#inputArr = stem(inputArr)
|
||||
dbSession = g.session
|
||||
|
||||
outDict = {}
|
||||
k = Counter(indx)
|
||||
# Finding 1000 highest values
|
||||
|
|
|
|||
Loading…
Reference in New Issue