stability + started image download

This commit is contained in:
Askill 2020-04-20 23:31:04 +02:00
parent 68added41f
commit 34a0707964
12 changed files with 117 additions and 37 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
data/recs.json data/recs.json
backup.sql

Binary file not shown.

Binary file not shown.

View File

@ -5,6 +5,7 @@ import os
from json import dumps from json import dumps
import application.endpoints as endpoints import application.endpoints as endpoints
import application.config as config import application.config as config
from application.db import Session
app = Flask(__name__) app = Flask(__name__)
api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger') api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')

View File

@ -7,7 +7,7 @@ import enum
from flask import Flask from flask import Flask
import time import time
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8") engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8", pool_size=1000, max_overflow=0)
Base = declarative_base() Base = declarative_base()
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)
@ -21,7 +21,7 @@ class Recipe(Base):
name = Column('name', Text) name = Column('name', Text)
instructions = Column('instructions', Text) instructions = Column('instructions', Text)
url = Column('url', Text) url = Column('url', Text)
img = Column('img', LargeBinary) img = Column('img', LargeBinary(length=(2**32)-1))
ingredient = relationship("Ingredient", backref="recipe") ingredient = relationship("Ingredient", backref="recipe")
trunk = relationship("Trunk", backref="recipe") trunk = relationship("Trunk", backref="recipe")

View File

@ -1,5 +1,6 @@
from flask_restful import Resource, reqparse from flask_restful import Resource, reqparse
import flask import flask
from flask import g
import requests import requests
import application.config as config import application.config as config
import json import json
@ -12,7 +13,8 @@ import time
class RecipeList(Resource): class RecipeList(Resource):
def get(self): def get(self):
""" """ """ """
try: g.session = Session()
parser = reqparse.RequestParser() parser = reqparse.RequestParser()
parser.add_argument('ingred', type=str, action='append') parser.add_argument('ingred', type=str, action='append')
args = parser.parse_args() args = parser.parse_args()
@ -30,10 +32,9 @@ class RecipeList(Resource):
end = time.time() end = time.time()
print("calc overlay",end - start, "\n") print("calc overlay",end - start, "\n")
g.session.commit()
g.session.close()
return flask.make_response(flask.jsonify({'data': recs}), 200) return flask.make_response(flask.jsonify({'data': recs}), 200)
except Exception as e:
print("error: -", e)
return flask.make_response(flask.jsonify({'error': str(e)}), 400)

61
getImages.py Normal file
View File

@ -0,0 +1,61 @@
from urllib.parse import urljoin
from lxml import html
import requests
import json
from time import sleep
import random
import traceback
import cv2
import base64
from application.db import Session, Recipe
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import cv2
from urllib.request import urlopen
import numpy as np
def getImages():
chromePath = 'C:/tools/chromedriver.exe'
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument("--log-level=3")
driver = webdriver.Chrome(chromePath, chrome_options=chrome_options)
dbSession = Session()
counter = 0
maxC = dbSession.query(Recipe).count()
for recipe in dbSession.query(Recipe).all():
url = recipe.url
string1 = '//*[@id="recipe-image-carousel"]/div/div[1]/div[9]/div/a/amp-img'
driver.get(url)
element = WebDriverWait(driver, 30).until(
ec.presence_of_element_located((
By.XPATH, string1)))
src = driver.find_element_by_xpath(string1).get_attribute("src")
print(src)
resp = urlopen(src)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
ret, jpeg = cv2.imencode(".jpg", image)
img = base64.b64encode(jpeg)
recipe.img = img
dbSession.flush()
dbSession.commit()
counter +=1
print(counter/maxC)
sleep(5)
getImages()

27
ghostdriver.log Normal file
View File

@ -0,0 +1,27 @@
[INFO - 2020-04-20T20:11:22.708Z] GhostDriver - Main - running on port 63975
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.customHeaders: - {}
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
[INFO - 2020-04-20T20:11:24.918Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 1c4df520-8343-11ea-a8a6-19d2d02d716b
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - msg: Unhandled promise rejection TypeError: undefined is not an object (evaluating 'w.set')
phantomjs://platform/console++.js:263 in error
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - stack:
(anonymous function) (https://www.chefkoch.de/rezepte/207581086939665/Nannie-s-schnelle-Kuchen.html:8626)
phantomjs://platform/console++.js:263 in error
[ERROR - 2020-04-20T20:11:28.646Z] WebElementLocator - _handleLocateCommand - Element(s) NOT Found: GAVE UP. Search Stop Time: 15874134[INFO - 2020-04-20T20:12:30.427Z] SessionManagerReqHand - [INFO - 2020-04-20T20:13:35.766Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:17:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:18:35.767Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:22:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:23:35.768Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:27:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:28:35.771Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:32:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:33:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:37:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:38:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:42:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:43:35.784Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:47:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:48:35.787Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:52:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:53:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:57:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T20:58:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:02:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T21:03:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:07:30.445Z] SessionManagerReqHand - [INFO - 2020-04-20T21:08:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:12:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:13:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:17:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:18:35.798Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:22:30.459Z] SessionManagerReqHand - [INFO - 2020-04-20T21:23:35.803Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:27:30.460Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
OW
O - 2020-04-20T21:22:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:27:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
ns - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:07:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:12:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:17:14.190Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:22:14.192Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:27:14.196Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:19:25.242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:24:25.249Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:27:31.799Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
179Z] SessionManagerReqHand - [INFO - 2020-04-20T21:11:22.721Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:13:54.176Z] SessionManagerReqHand - [INFO - 2020-04-20T21:16:22.724Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:18:54.177Z] SessionManagerReqHand - [INFO - 2020-04-20T21:21:22.725Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:23:54.181Z] SessionManagerReqHand - [INFO - 2020-04-20T21:26:22.728Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW

View File

@ -1,30 +1,17 @@
from application.db import Session, Recipe, Ingredient, Trunk from application.db import Session, Recipe, Ingredient, Trunk
from flask import g
import nltk as nltk import nltk as nltk
from nltk.corpus import stopwords from nltk.corpus import stopwords
import time import time
import heapq import heapq
from collections import Counter from collections import Counter
dbSession = Session()
def faster(inputArr):
indx = {}
for inpu in inputArr:
ids = []
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
if str(x[0]) not in indx:
indx[str(x[0])] = 0
indx[str(x[0])] += 1
return(indx)
def fastes(inputArr): def fastes(inputArr):
indx = {} indx = {}
dbSession = g.session
for inpu in inputArr: for inpu in inputArr:
ids = [] ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all(): for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
@ -47,7 +34,8 @@ def stemInput(inputArr):
# #
def getRecDict(indx, inputArr): def getRecDict(indx, inputArr):
#inputArr = stem(inputArr) dbSession = g.session
outDict = {} outDict = {}
k = Counter(indx) k = Counter(indx)
# Finding 1000 highest values # Finding 1000 highest values