stability + started image download
This commit is contained in:
parent
68added41f
commit
34a0707964
|
|
@ -1,2 +1,4 @@
|
||||||
|
|
||||||
data/recs.json
|
data/recs.json
|
||||||
|
|
||||||
|
backup.sql
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
|
|
@ -5,6 +5,7 @@ import os
|
||||||
from json import dumps
|
from json import dumps
|
||||||
import application.endpoints as endpoints
|
import application.endpoints as endpoints
|
||||||
import application.config as config
|
import application.config as config
|
||||||
|
from application.db import Session
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')
|
api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -7,7 +7,7 @@ import enum
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
import time
|
import time
|
||||||
|
|
||||||
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
|
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8", pool_size=1000, max_overflow=0)
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
|
|
@ -21,7 +21,7 @@ class Recipe(Base):
|
||||||
name = Column('name', Text)
|
name = Column('name', Text)
|
||||||
instructions = Column('instructions', Text)
|
instructions = Column('instructions', Text)
|
||||||
url = Column('url', Text)
|
url = Column('url', Text)
|
||||||
img = Column('img', LargeBinary)
|
img = Column('img', LargeBinary(length=(2**32)-1))
|
||||||
ingredient = relationship("Ingredient", backref="recipe")
|
ingredient = relationship("Ingredient", backref="recipe")
|
||||||
trunk = relationship("Trunk", backref="recipe")
|
trunk = relationship("Trunk", backref="recipe")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from flask_restful import Resource, reqparse
|
from flask_restful import Resource, reqparse
|
||||||
import flask
|
import flask
|
||||||
|
from flask import g
|
||||||
import requests
|
import requests
|
||||||
import application.config as config
|
import application.config as config
|
||||||
import json
|
import json
|
||||||
|
|
@ -12,7 +13,8 @@ import time
|
||||||
class RecipeList(Resource):
|
class RecipeList(Resource):
|
||||||
def get(self):
|
def get(self):
|
||||||
""" """
|
""" """
|
||||||
try:
|
g.session = Session()
|
||||||
|
|
||||||
parser = reqparse.RequestParser()
|
parser = reqparse.RequestParser()
|
||||||
parser.add_argument('ingred', type=str, action='append')
|
parser.add_argument('ingred', type=str, action='append')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
@ -30,10 +32,9 @@ class RecipeList(Resource):
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print("calc overlay",end - start, "\n")
|
print("calc overlay",end - start, "\n")
|
||||||
|
|
||||||
|
g.session.commit()
|
||||||
|
g.session.close()
|
||||||
return flask.make_response(flask.jsonify({'data': recs}), 200)
|
return flask.make_response(flask.jsonify({'data': recs}), 200)
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print("error: -", e)
|
|
||||||
return flask.make_response(flask.jsonify({'error': str(e)}), 400)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
|
||||||
|
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
from lxml import html
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from time import sleep
|
||||||
|
import random
|
||||||
|
import traceback
|
||||||
|
import cv2
|
||||||
|
import base64
|
||||||
|
from application.db import Session, Recipe
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as ec
|
||||||
|
import cv2
|
||||||
|
from urllib.request import urlopen
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def getImages():
|
||||||
|
chromePath = 'C:/tools/chromedriver.exe'
|
||||||
|
chrome_options = webdriver.ChromeOptions()
|
||||||
|
chrome_options.add_argument('--headless')
|
||||||
|
chrome_options.add_argument("--log-level=3")
|
||||||
|
|
||||||
|
driver = webdriver.Chrome(chromePath, chrome_options=chrome_options)
|
||||||
|
|
||||||
|
dbSession = Session()
|
||||||
|
counter = 0
|
||||||
|
maxC = dbSession.query(Recipe).count()
|
||||||
|
for recipe in dbSession.query(Recipe).all():
|
||||||
|
|
||||||
|
url = recipe.url
|
||||||
|
string1 = '//*[@id="recipe-image-carousel"]/div/div[1]/div[9]/div/a/amp-img'
|
||||||
|
|
||||||
|
driver.get(url)
|
||||||
|
element = WebDriverWait(driver, 30).until(
|
||||||
|
ec.presence_of_element_located((
|
||||||
|
By.XPATH, string1)))
|
||||||
|
|
||||||
|
|
||||||
|
src = driver.find_element_by_xpath(string1).get_attribute("src")
|
||||||
|
print(src)
|
||||||
|
resp = urlopen(src)
|
||||||
|
image = np.asarray(bytearray(resp.read()), dtype="uint8")
|
||||||
|
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
|
||||||
|
|
||||||
|
ret, jpeg = cv2.imencode(".jpg", image)
|
||||||
|
img = base64.b64encode(jpeg)
|
||||||
|
recipe.img = img
|
||||||
|
dbSession.flush()
|
||||||
|
dbSession.commit()
|
||||||
|
counter +=1
|
||||||
|
print(counter/maxC)
|
||||||
|
sleep(5)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
getImages()
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
[INFO - 2020-04-20T20:11:22.708Z] GhostDriver - Main - running on port 63975
|
||||||
|
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
|
||||||
|
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.customHeaders: - {}
|
||||||
|
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
|
||||||
|
[INFO - 2020-04-20T20:11:24.918Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 1c4df520-8343-11ea-a8a6-19d2d02d716b
|
||||||
|
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - msg: Unhandled promise rejection TypeError: undefined is not an object (evaluating 'w.set')
|
||||||
|
|
||||||
|
phantomjs://platform/console++.js:263 in error
|
||||||
|
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - stack:
|
||||||
|
(anonymous function) (https://www.chefkoch.de/rezepte/207581086939665/Nannie-s-schnelle-Kuchen.html:8626)
|
||||||
|
|
||||||
|
phantomjs://platform/console++.js:263 in error
|
||||||
|
[ERROR - 2020-04-20T20:11:28.646Z] WebElementLocator - _handleLocateCommand - Element(s) NOT Found: GAVE UP. Search Stop Time: 15874134[INFO - 2020-04-20T20:12:30.427Z] SessionManagerReqHand - [INFO - 2020-04-20T20:13:35.766Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:17:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:18:35.767Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:22:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:23:35.768Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:27:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:28:35.771Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:32:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:33:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:37:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:38:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:42:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:43:35.784Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:47:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:48:35.787Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:52:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:53:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:57:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T20:58:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:02:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T21:03:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:07:30.445Z] SessionManagerReqHand - [INFO - 2020-04-20T21:08:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:12:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:13:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:17:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:18:35.798Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:22:30.459Z] SessionManagerReqHand - [INFO - 2020-04-20T21:23:35.803Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:27:30.460Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
OW
|
||||||
|
O - 2020-04-20T21:22:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:27:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
ns - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:07:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:12:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:17:14.190Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:22:14.192Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:27:14.196Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:19:25.242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:24:25.249Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
[INFO - 2020-04-20T21:27:31.799Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
|
179Z] SessionManagerReqHand - [INFO - 2020-04-20T21:11:22.721Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:13:54.176Z] SessionManagerReqHand - [INFO - 2020-04-20T21:16:22.724Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:18:54.177Z] SessionManagerReqHand - [INFO - 2020-04-20T21:21:22.725Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:23:54.181Z] SessionManagerReqHand - [INFO - 2020-04-20T21:26:22.728Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
|
||||||
20
search.py
20
search.py
|
|
@ -1,30 +1,17 @@
|
||||||
|
|
||||||
from application.db import Session, Recipe, Ingredient, Trunk
|
from application.db import Session, Recipe, Ingredient, Trunk
|
||||||
|
from flask import g
|
||||||
import nltk as nltk
|
import nltk as nltk
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
import time
|
import time
|
||||||
import heapq
|
import heapq
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
dbSession = Session()
|
|
||||||
|
|
||||||
|
|
||||||
def faster(inputArr):
|
|
||||||
indx = {}
|
|
||||||
|
|
||||||
for inpu in inputArr:
|
|
||||||
ids = []
|
|
||||||
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
|
|
||||||
if str(x[0]) not in indx:
|
|
||||||
indx[str(x[0])] = 0
|
|
||||||
|
|
||||||
indx[str(x[0])] += 1
|
|
||||||
|
|
||||||
return(indx)
|
|
||||||
|
|
||||||
def fastes(inputArr):
|
def fastes(inputArr):
|
||||||
indx = {}
|
indx = {}
|
||||||
|
dbSession = g.session
|
||||||
for inpu in inputArr:
|
for inpu in inputArr:
|
||||||
ids = []
|
ids = []
|
||||||
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
|
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
|
||||||
|
|
@ -47,7 +34,8 @@ def stemInput(inputArr):
|
||||||
#
|
#
|
||||||
|
|
||||||
def getRecDict(indx, inputArr):
|
def getRecDict(indx, inputArr):
|
||||||
#inputArr = stem(inputArr)
|
dbSession = g.session
|
||||||
|
|
||||||
outDict = {}
|
outDict = {}
|
||||||
k = Counter(indx)
|
k = Counter(indx)
|
||||||
# Finding 1000 highest values
|
# Finding 1000 highest values
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue