spiegel works / golem not

This commit is contained in:
Patrice 2019-05-07 20:10:28 +02:00
parent 3f0e7937c7
commit 1b71fb1c00
4 changed files with 55 additions and 19 deletions

BIN
ngrok.exe Normal file

Binary file not shown.

View File

@ -14,7 +14,9 @@ logging.getLogger('flask_ask').setLevel(logging.DEBUG)
@ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'}) @ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'})
def search_on(site): def search_on(site):
try: try:
session.attributes["siteName"] = site session.attributes["siteName"] = site
print(session.attributes["siteName"])
except: except:
print("error") print("error")
@ -36,6 +38,8 @@ def search_for(searchTerm):
if site == "golem": if site == "golem":
obj = site2.Golem() obj = site2.Golem()
elif site == "spiegel":
obj = site2.Spiegel()
elif site is None: elif site is None:
session.attributes["searchTerm"] = searchTerm session.attributes["searchTerm"] = searchTerm
session.attributes["lastCall"] = "searchfor" session.attributes["lastCall"] = "searchfor"
@ -48,7 +52,7 @@ def search_for(searchTerm):
response = "Für welchen der folgenden Artikel interessieren Sie sich?" response = "Für welchen der folgenden Artikel interessieren Sie sich?"
if len(articles) > 0: if len(articles) > 0:
for i in range(0, max(5, len(articles))): for i in range(0, min(5, len(articles))):
response += articles[i] response += articles[i]
else: else:
return question("Dazu konnte nichts gefunden werden. Möchten Sie nach etwas anderem Suchen?") return question("Dazu konnte nichts gefunden werden. Möchten Sie nach etwas anderem Suchen?")
@ -59,34 +63,58 @@ def search_for(searchTerm):
@ask.intent('News', mapping={'site': 'Site'}, default={'site': ''}) @ask.intent('News', mapping={'site': 'Site'}, default={'site': ''})
def news(site): def news(site):
try:
site = site.lower()
session.attributes["siteName"] = site
except:
print("error")
print(site)
if site == "golem": if site == "golem":
obj = site2.Golem() obj = site2.Golem()
elif site == "spiegel":
obj = site2.Spiegel()
elif site == '': elif site == '':
session.attributes["lastCall"] = "news" session.attributes["lastCall"] = "news"
return question("Auf welcher Seite wollen Sie hiernach Suchen?") return question("Auf welcher Seite wollen Sie hiernach Suchen?")
else: else:
return statement("error") return statement("error")
news = obj.get_news() news, links = obj.get_news()
print(news)
session.attributes["lastSearch"] = links
response = "" response = ""
for i in range(0, 5): for i in range(0, min(5, len(news))):
response += news[i] + ". " response += news[i] + ". "
session.attributes["lastCall"] = "news" session.attributes["lastCall"] = "news"
return statement(response) return question(response)
@ask.intent('SearchTwo', mapping={'number': 'Nummer'}, default={'number': 1}) @ask.intent('SearchTwo', mapping={'number': 'Nummer'}, default={'number': 1})
def search_answer(number): def search_answer(number):
print(number) try:
obj = site2.Golem() site = session.attributes["siteName"]
except:
site = None
art = obj.read_headlines(session.attributes["lastSearch"][int(number)-1]) if site == "golem":
obj = site2.Golem()
elif site == "spiegel":
obj = site2.Spiegel()
links = session.attributes["lastSearch"]
newLinks = []
for link in links:
if "http" not in link:
newLinks.append(obj.baseURL + link)
links = newLinks
art = obj.read_headlines(links[int(number)-1])
response = "" response = ""
for element in art: for element in art:
response += element + " " response += element + " "
print(links)
session.attributes["lastCall"] = "search2" session.attributes["lastCall"] = "search2"
return statement(response) return statement(response)

View File

@ -12,18 +12,11 @@ class Site:
xPath["searchArticle"] = "" xPath["searchArticle"] = ""
xPath["searchLinks"] = "" xPath["searchLinks"] = ""
xPath["newsArticle"] = "" xPath["newsArticle"] = ""
xPath["newsLinks"] = ""
xPath["readHeadlineTitle"] = "" xPath["readHeadlineTitle"] = ""
xPath["readHeadlineText"] = "" xPath["readHeadlineText"] = ""
xPath["readArticleText"] = "" xPath["readArticleText"] = ""
header_values = {
'Connection:' : 'Keep-alive',
'name' : 'Michael Foord',
'location' : 'Northampton',
'language' : 'German',
'User-Agent': 'Mozilla 4/0'}
def __init__(self): def __init__(self):
return None return None
@ -42,7 +35,8 @@ class Site:
tree = html.fromstring(site.content) tree = html.fromstring(site.content)
articles = tree.xpath(self.xPath["newsArticle"]) articles = tree.xpath(self.xPath["newsArticle"])
return articles links = tree.xpath(self.xPath["newsLinks"])
return articles, links
def read_headlines(self, url): def read_headlines(self, url):
site = requests.get(url) site = requests.get(url)
@ -65,10 +59,24 @@ class Golem(Site):
siteName = "golem" siteName = "golem"
baseURL = "https://www.golem.de/" baseURL = "https://www.golem.de/"
searchURLString = "https://suche.golem.de/search.php?l=10&q=" searchURLString = "https://suche.golem.de/search.php?l=10&q="
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
Site.xPath["newsArticle"] = '//h2[@class="head2"]/text()' Site.xPath["newsArticle"] = '//li//h2/text()'
Site.xPath["newsLinks"] = '//header[@class="cluster-header"]//@href'
Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()' Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()'
Site.xPath["readHeadlineText"] = '//header/p/text()' Site.xPath["readHeadlineText"] = '//header/p/text()'
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()' Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
class Spiegel(Site):
siteName = "spiegel"
baseURL = "https://www.spiegel.de/"
searchURLString = "https://www.spiegel.de/suche/?suchbegriff="
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
Site.xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()'
Site.xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href'
Site.xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()'
Site.xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()'
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'