diff --git a/reader/__pycache__/siteobj.cpython-35.pyc b/reader/__pycache__/siteobj.cpython-35.pyc index a0b6034..6f17104 100644 Binary files a/reader/__pycache__/siteobj.cpython-35.pyc and b/reader/__pycache__/siteobj.cpython-35.pyc differ diff --git a/reader/main.py b/reader/main.py index 3e88ad5..4a35a2c 100644 --- a/reader/main.py +++ b/reader/main.py @@ -14,7 +14,6 @@ logging.getLogger('flask_ask').setLevel(logging.DEBUG) @ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'}) def search_on(site): try: - session.attributes["siteName"] = site print(session.attributes["siteName"]) except: @@ -38,7 +37,7 @@ def search_for(searchTerm): if site == "golem": obj = site2.Golem() - elif site == "spiegel": + elif site.lower() == "spiegel": obj = site2.Spiegel() elif site is None: session.attributes["searchTerm"] = searchTerm @@ -64,14 +63,13 @@ def search_for(searchTerm): @ask.intent('News', mapping={'site': 'Site'}, default={'site': ''}) def news(site): try: - site = site.lower() session.attributes["siteName"] = site except: print("error") print(site) if site == "golem": obj = site2.Golem() - elif site == "spiegel": + elif site.lower() == "spiegel": obj = site2.Spiegel() elif site == '': session.attributes["lastCall"] = "news" @@ -96,25 +94,25 @@ def search_answer(number): site = session.attributes["siteName"] except: site = None - + print(number) if site == "golem": obj = site2.Golem() - elif site == "spiegel": + elif site.lower() == "spiegel": obj = site2.Spiegel() links = session.attributes["lastSearch"] - - newLinks = [] - for link in links: - if "http" not in link: - newLinks.append(obj.baseURL + link) - links = newLinks + if "http" not in str(links): + newLinks = [] + for link in links: + if "http" not in link: + newLinks.append(obj.baseURL + link) + links = newLinks art = obj.read_headlines(links[int(number)-1]) response = "" for element in art: - response += element + " " - print(links) + response += element + session.attributes["lastCall"] = "search2" return statement(response) diff --git a/reader/siteobj.py b/reader/siteobj.py index c986a6b..8a15a86 100644 --- a/reader/siteobj.py +++ b/reader/siteobj.py @@ -59,24 +59,24 @@ class Golem(Site): siteName = "golem" baseURL = "https://www.golem.de/" searchURLString = "https://suche.golem.de/search.php?l=10&q=" - - Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' - Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' - Site.xPath["newsArticle"] = '//li//h2/text()' - Site.xPath["newsLinks"] = '//header[@class="cluster-header"]//@href' - Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()' - Site.xPath["readHeadlineText"] = '//header/p/text()' - Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()' + xPath = dict() + xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' + xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' + xPath["newsArticle"] = '//li//h2/text()' + xPath["newsLinks"] = '//div[@class="g g4"]//header//@href' + xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()' + xPath["readHeadlineText"] = '//header/p/text()' + xPath["readArticleText"] = '//div[@class="formatted"]/p/text()' class Spiegel(Site): siteName = "spiegel" baseURL = "https://www.spiegel.de/" searchURLString = "https://www.spiegel.de/suche/?suchbegriff=" - - Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' - Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' - Site.xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()' - Site.xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href' - Site.xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()' - Site.xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()' - Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()' + xPath = dict() + xPath["searchArticle"] = '//div[@class="search-teaser"]/p/text()' + xPath["searchLinks"] = '//div[@class="search-teaser"]/p//@href' + xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()' + xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href' + xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()' + xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()' + xPath["readArticleText"] = '//div[@class="article-section clearfix"]/p/text()' diff --git a/reader/tests.py b/reader/tests.py index 8d310a6..261c094 100644 --- a/reader/tests.py +++ b/reader/tests.py @@ -1,12 +1,5 @@ -import urllib.request,urllib.parse,urllib.error -from lxml import html -import requests -import re +import siteobj as site2 -searchURL = "https://suche.golem.de/search.php?l=10&q=gaming" -site = requests.get(searchURL) -tree = html.fromstring(site.content) -articles = tree.xpath('//span[@class="dh2 head2"]/text()') -links = tree.xpath('//ol[@class="list-articles"]/li/header//@href') -print(len(articles), len(links)) \ No newline at end of file +obj = site2.Golem() +news = obj.get_news()