diff --git a/ngrok.exe b/ngrok.exe new file mode 100644 index 0000000..0ec6adb Binary files /dev/null and b/ngrok.exe differ diff --git a/reader/__pycache__/siteobj.cpython-35.pyc b/reader/__pycache__/siteobj.cpython-35.pyc index 09b8b5a..a0b6034 100644 Binary files a/reader/__pycache__/siteobj.cpython-35.pyc and b/reader/__pycache__/siteobj.cpython-35.pyc differ diff --git a/reader/main.py b/reader/main.py index 76e5796..3e88ad5 100644 --- a/reader/main.py +++ b/reader/main.py @@ -14,7 +14,9 @@ logging.getLogger('flask_ask').setLevel(logging.DEBUG) @ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'}) def search_on(site): try: + session.attributes["siteName"] = site + print(session.attributes["siteName"]) except: print("error") @@ -36,6 +38,8 @@ def search_for(searchTerm): if site == "golem": obj = site2.Golem() + elif site == "spiegel": + obj = site2.Spiegel() elif site is None: session.attributes["searchTerm"] = searchTerm session.attributes["lastCall"] = "searchfor" @@ -48,7 +52,7 @@ def search_for(searchTerm): response = "Für welchen der folgenden Artikel interessieren Sie sich?" if len(articles) > 0: - for i in range(0, max(5, len(articles))): + for i in range(0, min(5, len(articles))): response += articles[i] else: return question("Dazu konnte nichts gefunden werden. Möchten Sie nach etwas anderem Suchen?") @@ -59,34 +63,58 @@ def search_for(searchTerm): @ask.intent('News', mapping={'site': 'Site'}, default={'site': ''}) def news(site): - + try: + site = site.lower() + session.attributes["siteName"] = site + except: + print("error") + print(site) if site == "golem": obj = site2.Golem() + elif site == "spiegel": + obj = site2.Spiegel() elif site == '': session.attributes["lastCall"] = "news" return question("Auf welcher Seite wollen Sie hiernach Suchen?") else: return statement("error") - news = obj.get_news() + news, links = obj.get_news() + print(news) + session.attributes["lastSearch"] = links response = "" - for i in range(0, 5): + for i in range(0, min(5, len(news))): response += news[i] + ". " session.attributes["lastCall"] = "news" - return statement(response) + return question(response) @ask.intent('SearchTwo', mapping={'number': 'Nummer'}, default={'number': 1}) def search_answer(number): - print(number) - obj = site2.Golem() + try: + site = session.attributes["siteName"] + except: + site = None - art = obj.read_headlines(session.attributes["lastSearch"][int(number)-1]) + if site == "golem": + obj = site2.Golem() + elif site == "spiegel": + obj = site2.Spiegel() + + links = session.attributes["lastSearch"] + + newLinks = [] + for link in links: + if "http" not in link: + newLinks.append(obj.baseURL + link) + links = newLinks + + art = obj.read_headlines(links[int(number)-1]) response = "" for element in art: response += element + " " - + print(links) session.attributes["lastCall"] = "search2" return statement(response) diff --git a/reader/siteobj.py b/reader/siteobj.py index 9e60c36..c986a6b 100644 --- a/reader/siteobj.py +++ b/reader/siteobj.py @@ -12,18 +12,11 @@ class Site: xPath["searchArticle"] = "" xPath["searchLinks"] = "" xPath["newsArticle"] = "" + xPath["newsLinks"] = "" xPath["readHeadlineTitle"] = "" xPath["readHeadlineText"] = "" xPath["readArticleText"] = "" - - header_values = { - 'Connection:' : 'Keep-alive', - 'name' : 'Michael Foord', - 'location' : 'Northampton', - 'language' : 'German', - 'User-Agent': 'Mozilla 4/0'} - def __init__(self): return None @@ -42,7 +35,8 @@ class Site: tree = html.fromstring(site.content) articles = tree.xpath(self.xPath["newsArticle"]) - return articles + links = tree.xpath(self.xPath["newsLinks"]) + return articles, links def read_headlines(self, url): site = requests.get(url) @@ -65,10 +59,24 @@ class Golem(Site): siteName = "golem" baseURL = "https://www.golem.de/" searchURLString = "https://suche.golem.de/search.php?l=10&q=" + Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' - Site.xPath["newsArticle"] = '//h2[@class="head2"]/text()' + Site.xPath["newsArticle"] = '//li//h2/text()' + Site.xPath["newsLinks"] = '//header[@class="cluster-header"]//@href' Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()' Site.xPath["readHeadlineText"] = '//header/p/text()' Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()' +class Spiegel(Site): + siteName = "spiegel" + baseURL = "https://www.spiegel.de/" + searchURLString = "https://www.spiegel.de/suche/?suchbegriff=" + + Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' + Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' + Site.xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()' + Site.xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href' + Site.xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()' + Site.xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()' + Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'