stability + started image download

This commit is contained in:
Askill 2020-04-20 23:31:04 +02:00
parent 68added41f
commit 34a0707964
12 changed files with 117 additions and 37 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
data/recs.json
backup.sql

Binary file not shown.

Binary file not shown.

View File

@ -5,6 +5,7 @@ import os
from json import dumps
import application.endpoints as endpoints
import application.config as config
from application.db import Session
app = Flask(__name__)
api = Api(app, version='1', contact={"name":""}, license={"name":"Online Dienst Dokumentation"}, api_spec_url='/api/swagger')

View File

@ -7,7 +7,7 @@ import enum
from flask import Flask
import time
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8")
engine = db.create_engine('mysql+mysqldb://root@server/fs?charset=utf8mb4', echo=False, encoding="utf8", pool_size=1000, max_overflow=0)
Base = declarative_base()
Session = sessionmaker(bind=engine)
@ -21,7 +21,7 @@ class Recipe(Base):
name = Column('name', Text)
instructions = Column('instructions', Text)
url = Column('url', Text)
img = Column('img', LargeBinary)
img = Column('img', LargeBinary(length=(2**32)-1))
ingredient = relationship("Ingredient", backref="recipe")
trunk = relationship("Trunk", backref="recipe")

View File

@ -1,5 +1,6 @@
from flask_restful import Resource, reqparse
import flask
from flask import g
import requests
import application.config as config
import json
@ -12,28 +13,28 @@ import time
class RecipeList(Resource):
def get(self):
""" """
try:
parser = reqparse.RequestParser()
parser.add_argument('ingred', type=str, action='append')
args = parser.parse_args()
ingreds = args["ingred"]
g.session = Session()
ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr]
parser = reqparse.RequestParser()
parser.add_argument('ingred', type=str, action='append')
args = parser.parse_args()
ingreds = args["ingred"]
start = time.time()
indx = search.fastes(ingreds)
end = time.time()
print("get recipes",end - start, "\n")
ingreds = [migrate.stemWord(ingred)[0] for ingred in ingreds + search.defaultArr]
start = time.time()
recs = search.getRecDict(indx, ingreds)
end = time.time()
print("calc overlay",end - start, "\n")
return flask.make_response(flask.jsonify({'data': recs}), 200)
start = time.time()
indx = search.fastes(ingreds)
end = time.time()
print("get recipes",end - start, "\n")
start = time.time()
recs = search.getRecDict(indx, ingreds)
end = time.time()
print("calc overlay",end - start, "\n")
g.session.commit()
g.session.close()
return flask.make_response(flask.jsonify({'data': recs}), 200)
except Exception as e:
print("error: -", e)
return flask.make_response(flask.jsonify({'error': str(e)}), 400)

61
getImages.py Normal file
View File

@ -0,0 +1,61 @@
from urllib.parse import urljoin
from lxml import html
import requests
import json
from time import sleep
import random
import traceback
import cv2
import base64
from application.db import Session, Recipe
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import cv2
from urllib.request import urlopen
import numpy as np
def getImages():
chromePath = 'C:/tools/chromedriver.exe'
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument("--log-level=3")
driver = webdriver.Chrome(chromePath, chrome_options=chrome_options)
dbSession = Session()
counter = 0
maxC = dbSession.query(Recipe).count()
for recipe in dbSession.query(Recipe).all():
url = recipe.url
string1 = '//*[@id="recipe-image-carousel"]/div/div[1]/div[9]/div/a/amp-img'
driver.get(url)
element = WebDriverWait(driver, 30).until(
ec.presence_of_element_located((
By.XPATH, string1)))
src = driver.find_element_by_xpath(string1).get_attribute("src")
print(src)
resp = urlopen(src)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
ret, jpeg = cv2.imencode(".jpg", image)
img = base64.b64encode(jpeg)
recipe.img = img
dbSession.flush()
dbSession.commit()
counter +=1
print(counter/maxC)
sleep(5)
getImages()

27
ghostdriver.log Normal file
View File

@ -0,0 +1,27 @@
[INFO - 2020-04-20T20:11:22.708Z] GhostDriver - Main - running on port 63975
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.customHeaders: - {}
[INFO - 2020-04-20T20:11:24.918Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
[INFO - 2020-04-20T20:11:24.918Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 1c4df520-8343-11ea-a8a6-19d2d02d716b
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - msg: Unhandled promise rejection TypeError: undefined is not an object (evaluating 'w.set')
phantomjs://platform/console++.js:263 in error
[ERROR - 2020-04-20T20:11:26.927Z] Session [1c4df520-8343-11ea-a8a6-19d2d02d716b] - page.onError - stack:
(anonymous function) (https://www.chefkoch.de/rezepte/207581086939665/Nannie-s-schnelle-Kuchen.html:8626)
phantomjs://platform/console++.js:263 in error
[ERROR - 2020-04-20T20:11:28.646Z] WebElementLocator - _handleLocateCommand - Element(s) NOT Found: GAVE UP. Search Stop Time: 15874134[INFO - 2020-04-20T20:12:30.427Z] SessionManagerReqHand - [INFO - 2020-04-20T20:13:35.766Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:17:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:18:35.767Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:22:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:23:35.768Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:27:30.429Z] SessionManagerReqHand - [INFO - 2020-04-20T20:28:35.771Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:32:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:33:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:37:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:38:35.774Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:42:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:43:35.784Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:47:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:48:35.787Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:52:30.439Z] SessionManagerReqHand - [INFO - 2020-04-20T20:53:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T20:57:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T20:58:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:02:30.440Z] SessionManagerReqHand - [INFO - 2020-04-20T21:03:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:07:30.445Z] SessionManagerReqHand - [INFO - 2020-04-20T21:08:35.789Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:12:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:13:35.788Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:17:30.457Z] SessionManagerReqHand - [INFO - 2020-04-20T21:18:35.798Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:22:30.459Z] SessionManagerReqHand - [INFO - 2020-04-20T21:23:35.803Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:27:30.460Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
OW
O - 2020-04-20T21:22:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:27:12.188Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
ns - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:07:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:12:14.184Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:17:14.190Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:22:14.192Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:27:14.196Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:19:25.242Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:24:25.249Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
[INFO - 2020-04-20T21:27:31.799Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW
179Z] SessionManagerReqHand - [INFO - 2020-04-20T21:11:22.721Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:13:54.176Z] SessionManagerReqHand - [INFO - 2020-04-20T21:16:22.724Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:18:54.177Z] SessionManagerReqHand - [INFO - 2020-04-20T21:21:22.725Z] SessionManagerReqHand - _cleanupWindowlessSes[INFO - 2020-04-20T21:23:54.181Z] SessionManagerReqHand - [INFO - 2020-04-20T21:26:22.728Z] SessionManagerReqHand - _cleanupWindowlessSessions - Asynchronous Sessions clean-up phase starting NOW

View File

@ -1,30 +1,17 @@
from application.db import Session, Recipe, Ingredient, Trunk
from flask import g
import nltk as nltk
from nltk.corpus import stopwords
import time
import heapq
from collections import Counter
dbSession = Session()
def faster(inputArr):
indx = {}
for inpu in inputArr:
ids = []
for x in dbSession.query(Trunk.recipe_id).filter(Trunk.name.contains(inpu)).all():
if str(x[0]) not in indx:
indx[str(x[0])] = 0
indx[str(x[0])] += 1
return(indx)
def fastes(inputArr):
indx = {}
dbSession = g.session
for inpu in inputArr:
ids = []
for recipe_id in dbSession.query(Trunk.recipe_id).filter(Trunk.name == inpu).all():
@ -47,7 +34,8 @@ def stemInput(inputArr):
#
def getRecDict(indx, inputArr):
#inputArr = stem(inputArr)
dbSession = g.session
outDict = {}
k = Counter(indx)
# Finding 1000 highest values