spiegel works / golem not
This commit is contained in:
parent
3f0e7937c7
commit
1b71fb1c00
Binary file not shown.
|
|
@ -14,7 +14,9 @@ logging.getLogger('flask_ask').setLevel(logging.DEBUG)
|
||||||
@ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'})
|
@ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'})
|
||||||
def search_on(site):
|
def search_on(site):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
session.attributes["siteName"] = site
|
session.attributes["siteName"] = site
|
||||||
|
print(session.attributes["siteName"])
|
||||||
except:
|
except:
|
||||||
print("error")
|
print("error")
|
||||||
|
|
||||||
|
|
@ -36,6 +38,8 @@ def search_for(searchTerm):
|
||||||
|
|
||||||
if site == "golem":
|
if site == "golem":
|
||||||
obj = site2.Golem()
|
obj = site2.Golem()
|
||||||
|
elif site == "spiegel":
|
||||||
|
obj = site2.Spiegel()
|
||||||
elif site is None:
|
elif site is None:
|
||||||
session.attributes["searchTerm"] = searchTerm
|
session.attributes["searchTerm"] = searchTerm
|
||||||
session.attributes["lastCall"] = "searchfor"
|
session.attributes["lastCall"] = "searchfor"
|
||||||
|
|
@ -48,7 +52,7 @@ def search_for(searchTerm):
|
||||||
response = "Für welchen der folgenden Artikel interessieren Sie sich?"
|
response = "Für welchen der folgenden Artikel interessieren Sie sich?"
|
||||||
|
|
||||||
if len(articles) > 0:
|
if len(articles) > 0:
|
||||||
for i in range(0, max(5, len(articles))):
|
for i in range(0, min(5, len(articles))):
|
||||||
response += articles[i]
|
response += articles[i]
|
||||||
else:
|
else:
|
||||||
return question("Dazu konnte nichts gefunden werden. Möchten Sie nach etwas anderem Suchen?")
|
return question("Dazu konnte nichts gefunden werden. Möchten Sie nach etwas anderem Suchen?")
|
||||||
|
|
@ -59,34 +63,58 @@ def search_for(searchTerm):
|
||||||
|
|
||||||
@ask.intent('News', mapping={'site': 'Site'}, default={'site': ''})
|
@ask.intent('News', mapping={'site': 'Site'}, default={'site': ''})
|
||||||
def news(site):
|
def news(site):
|
||||||
|
try:
|
||||||
|
site = site.lower()
|
||||||
|
session.attributes["siteName"] = site
|
||||||
|
except:
|
||||||
|
print("error")
|
||||||
|
print(site)
|
||||||
if site == "golem":
|
if site == "golem":
|
||||||
obj = site2.Golem()
|
obj = site2.Golem()
|
||||||
|
elif site == "spiegel":
|
||||||
|
obj = site2.Spiegel()
|
||||||
elif site == '':
|
elif site == '':
|
||||||
session.attributes["lastCall"] = "news"
|
session.attributes["lastCall"] = "news"
|
||||||
return question("Auf welcher Seite wollen Sie hiernach Suchen?")
|
return question("Auf welcher Seite wollen Sie hiernach Suchen?")
|
||||||
else:
|
else:
|
||||||
return statement("error")
|
return statement("error")
|
||||||
|
|
||||||
news = obj.get_news()
|
news, links = obj.get_news()
|
||||||
|
print(news)
|
||||||
|
session.attributes["lastSearch"] = links
|
||||||
|
|
||||||
response = ""
|
response = ""
|
||||||
for i in range(0, 5):
|
for i in range(0, min(5, len(news))):
|
||||||
response += news[i] + ". "
|
response += news[i] + ". "
|
||||||
|
|
||||||
session.attributes["lastCall"] = "news"
|
session.attributes["lastCall"] = "news"
|
||||||
return statement(response)
|
return question(response)
|
||||||
|
|
||||||
@ask.intent('SearchTwo', mapping={'number': 'Nummer'}, default={'number': 1})
|
@ask.intent('SearchTwo', mapping={'number': 'Nummer'}, default={'number': 1})
|
||||||
def search_answer(number):
|
def search_answer(number):
|
||||||
print(number)
|
try:
|
||||||
obj = site2.Golem()
|
site = session.attributes["siteName"]
|
||||||
|
except:
|
||||||
|
site = None
|
||||||
|
|
||||||
art = obj.read_headlines(session.attributes["lastSearch"][int(number)-1])
|
if site == "golem":
|
||||||
|
obj = site2.Golem()
|
||||||
|
elif site == "spiegel":
|
||||||
|
obj = site2.Spiegel()
|
||||||
|
|
||||||
|
links = session.attributes["lastSearch"]
|
||||||
|
|
||||||
|
newLinks = []
|
||||||
|
for link in links:
|
||||||
|
if "http" not in link:
|
||||||
|
newLinks.append(obj.baseURL + link)
|
||||||
|
links = newLinks
|
||||||
|
|
||||||
|
art = obj.read_headlines(links[int(number)-1])
|
||||||
response = ""
|
response = ""
|
||||||
for element in art:
|
for element in art:
|
||||||
response += element + " "
|
response += element + " "
|
||||||
|
print(links)
|
||||||
session.attributes["lastCall"] = "search2"
|
session.attributes["lastCall"] = "search2"
|
||||||
return statement(response)
|
return statement(response)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,18 +12,11 @@ class Site:
|
||||||
xPath["searchArticle"] = ""
|
xPath["searchArticle"] = ""
|
||||||
xPath["searchLinks"] = ""
|
xPath["searchLinks"] = ""
|
||||||
xPath["newsArticle"] = ""
|
xPath["newsArticle"] = ""
|
||||||
|
xPath["newsLinks"] = ""
|
||||||
xPath["readHeadlineTitle"] = ""
|
xPath["readHeadlineTitle"] = ""
|
||||||
xPath["readHeadlineText"] = ""
|
xPath["readHeadlineText"] = ""
|
||||||
xPath["readArticleText"] = ""
|
xPath["readArticleText"] = ""
|
||||||
|
|
||||||
header_values = {
|
|
||||||
'Connection:' : 'Keep-alive',
|
|
||||||
'name' : 'Michael Foord',
|
|
||||||
'location' : 'Northampton',
|
|
||||||
'language' : 'German',
|
|
||||||
'User-Agent': 'Mozilla 4/0'}
|
|
||||||
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -42,7 +35,8 @@ class Site:
|
||||||
tree = html.fromstring(site.content)
|
tree = html.fromstring(site.content)
|
||||||
|
|
||||||
articles = tree.xpath(self.xPath["newsArticle"])
|
articles = tree.xpath(self.xPath["newsArticle"])
|
||||||
return articles
|
links = tree.xpath(self.xPath["newsLinks"])
|
||||||
|
return articles, links
|
||||||
|
|
||||||
def read_headlines(self, url):
|
def read_headlines(self, url):
|
||||||
site = requests.get(url)
|
site = requests.get(url)
|
||||||
|
|
@ -65,10 +59,24 @@ class Golem(Site):
|
||||||
siteName = "golem"
|
siteName = "golem"
|
||||||
baseURL = "https://www.golem.de/"
|
baseURL = "https://www.golem.de/"
|
||||||
searchURLString = "https://suche.golem.de/search.php?l=10&q="
|
searchURLString = "https://suche.golem.de/search.php?l=10&q="
|
||||||
|
|
||||||
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
|
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
|
||||||
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
|
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
|
||||||
Site.xPath["newsArticle"] = '//h2[@class="head2"]/text()'
|
Site.xPath["newsArticle"] = '//li//h2/text()'
|
||||||
|
Site.xPath["newsLinks"] = '//header[@class="cluster-header"]//@href'
|
||||||
Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()'
|
Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()'
|
||||||
Site.xPath["readHeadlineText"] = '//header/p/text()'
|
Site.xPath["readHeadlineText"] = '//header/p/text()'
|
||||||
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
|
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
|
||||||
|
|
||||||
|
class Spiegel(Site):
|
||||||
|
siteName = "spiegel"
|
||||||
|
baseURL = "https://www.spiegel.de/"
|
||||||
|
searchURLString = "https://www.spiegel.de/suche/?suchbegriff="
|
||||||
|
|
||||||
|
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
|
||||||
|
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
|
||||||
|
Site.xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()'
|
||||||
|
Site.xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href'
|
||||||
|
Site.xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()'
|
||||||
|
Site.xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()'
|
||||||
|
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue