From 64dbc0b52cd1e06bf6e591ec472137486ac6c343 Mon Sep 17 00:00:00 2001 From: Patrice Date: Wed, 24 Apr 2019 18:29:24 +0200 Subject: [PATCH] added alexa zeug --- reader/main.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++ reader/site.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ reader/tests.py | 10 ++++--- 3 files changed, 142 insertions(+), 4 deletions(-) diff --git a/reader/main.py b/reader/main.py index e69de29..39324d2 100644 --- a/reader/main.py +++ b/reader/main.py @@ -0,0 +1,66 @@ +import logging +import os +from flask import Flask +from flask_ask import Ask, request, session, question, statement +import random +import yaml +import site as s + +app = Flask(__name__) +ask = Ask(app, "/") +logging.getLogger('flask_ask').setLevel(logging.DEBUG) + + +@ask.intent('Search', + mapping={'site': 'Site', 'searchTerm':'SearchTerm'}, + default={'site': 'golem', 'searchTerm':''}) +def search(site, searchTerm): + print(site, searchTerm) + + if site == "golem": + obj = s.Golem() + elif site == "zeit": + obj = s.Zeit() + elif site == "welt": + obj = s.Welt() + else: + return statement("Es gab einen Fehler") + session.attributes["site"] = obj.url + + articles, links = obj.search_article(searchTerm) + session.attributes["lastSearch"] = links + antwort = "Für welchen der folgenden Artikel interessieren Sie sich?" + for i in range(0, len(articles)): + antwort += articles[i] + "..." + + return question(antwort) + +@ask.intent('Read', + mapping={'site': 'Site', 'activity':'Activity'}, + default={'site': 'golem', 'activity':'read_headlines'}) +def read(site, activity): + print(site, activity) + + response = "" + return statement(response) + +@ask.intent('AMAZON.HelpIntent') +def help(): + speech_text = 'Dieser Skill erlaubt es Ihnen einige Nachrichten Websites zu nutzen' + return statement(speech_text) + +@ask.launch +def launch(): + return read("golem", "read_headlines") + +@ask.session_ended +def session_ended(): + return "{}", 200 + + +if __name__ == '__main__': + if 'ASK_VERIFY_REQUESTS' in os.environ: + verify = str(os.environ.get('ASK_VERIFY_REQUESTS', '')).lower() + if verify == 'false': + app.config['ASK_VERIFY_REQUESTS'] = False + app.run() diff --git a/reader/site.py b/reader/site.py index 4de01d2..6e3556a 100644 --- a/reader/site.py +++ b/reader/site.py @@ -28,6 +28,76 @@ class site: class Golem(site): + url = "golem" + def search_article(self, topic): + searchURL = "https://suche.golem.de/search.php?l=10&q=" + topic.replace(" ", "+") + site = requests.get(searchURL, headers=self.header_values) + tree = html.fromstring(site.content) + + articles = tree.xpath('//span[@class="dh2 head2"]/text()') + links = tree.xpath('//ol[@class="list-articles"]/li/header//@href') + return articles, links + + def get_news(self): + searchURL = "https://www.golem.de/" + site = requests.get(searchURL, headers=self.header_values) + tree = html.fromstring(site.content) + + articles = tree.xpath('//h2[@class="head2"]/text()') + return articles + + def read_headlines(self, url): + site = requests.get(url, headers=self.header_values) + tree = html.fromstring(site.content) + + title = tree.xpath('//header/h1/span[@class="dh1 head5"]/text()') + title += tree.xpath('//header/p/text()') + return title + + def read_article(self, url): + site = requests.get(url, headers=self.header_values) + tree = html.fromstring(site.content) + + title = self.read_headlines(url) + title += tree.xpath('//div[@class="formatted"]/p/text()') + return title + +class Zeit(site): + url = "zeit" + def search_article(self, topic): + searchURL = "https://suche.golem.de/search.php?l=10&q=" + topic.replace(" ", "+") + site = requests.get(searchURL, headers=self.header_values) + tree = html.fromstring(site.content) + + articles = tree.xpath('//span[@class="dh2 head2"]/text()') + return articles + + def get_news(self): + searchURL = "https://www.golem.de/" + site = requests.get(searchURL, headers=self.header_values) + tree = html.fromstring(site.content) + + articles = tree.xpath('//h2[@class="head2"]/text()') + return articles + + def read_headlines(self, url): + site = requests.get(url, headers=self.header_values) + tree = html.fromstring(site.content) + + title = tree.xpath('//header/h1/span[@class="dh1 head5"]/text()') + title += tree.xpath('//header/p/text()') + return title + + def read_article(self, url): + site = requests.get(url, headers=self.header_values) + tree = html.fromstring(site.content) + + title = self.read_headlines(url) + title += tree.xpath('//div[@class="formatted"]/p/text()') + return title + +class Welt(site): + url = "welt" def search_article(self, topic): searchURL = "https://suche.golem.de/search.php?l=10&q=" + topic.replace(" ", "+") site = requests.get(searchURL, headers=self.header_values) diff --git a/reader/tests.py b/reader/tests.py index da7566b..8d310a6 100644 --- a/reader/tests.py +++ b/reader/tests.py @@ -3,8 +3,10 @@ from lxml import html import requests import re -url="https://www.golem.de/news/tchap-forscher-gelingt-anmeldung-im-regierungschat-frankreichs-1904-140799.html" -site = requests.get(url) +searchURL = "https://suche.golem.de/search.php?l=10&q=gaming" +site = requests.get(searchURL) tree = html.fromstring(site.content) -title = tree.xpath('//div[@class="formatted"]/p/text()') -print(title) \ No newline at end of file + +articles = tree.xpath('//span[@class="dh2 head2"]/text()') +links = tree.xpath('//ol[@class="list-articles"]/li/header//@href') +print(len(articles), len(links)) \ No newline at end of file