parent
1b71fb1c00
commit
22be23224f
Binary file not shown.
|
|
@ -14,7 +14,6 @@ logging.getLogger('flask_ask').setLevel(logging.DEBUG)
|
|||
@ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'})
|
||||
def search_on(site):
|
||||
try:
|
||||
|
||||
session.attributes["siteName"] = site
|
||||
print(session.attributes["siteName"])
|
||||
except:
|
||||
|
|
@ -38,7 +37,7 @@ def search_for(searchTerm):
|
|||
|
||||
if site == "golem":
|
||||
obj = site2.Golem()
|
||||
elif site == "spiegel":
|
||||
elif site.lower() == "spiegel":
|
||||
obj = site2.Spiegel()
|
||||
elif site is None:
|
||||
session.attributes["searchTerm"] = searchTerm
|
||||
|
|
@ -64,14 +63,13 @@ def search_for(searchTerm):
|
|||
@ask.intent('News', mapping={'site': 'Site'}, default={'site': ''})
|
||||
def news(site):
|
||||
try:
|
||||
site = site.lower()
|
||||
session.attributes["siteName"] = site
|
||||
except:
|
||||
print("error")
|
||||
print(site)
|
||||
if site == "golem":
|
||||
obj = site2.Golem()
|
||||
elif site == "spiegel":
|
||||
elif site.lower() == "spiegel":
|
||||
obj = site2.Spiegel()
|
||||
elif site == '':
|
||||
session.attributes["lastCall"] = "news"
|
||||
|
|
@ -96,25 +94,25 @@ def search_answer(number):
|
|||
site = session.attributes["siteName"]
|
||||
except:
|
||||
site = None
|
||||
|
||||
print(number)
|
||||
if site == "golem":
|
||||
obj = site2.Golem()
|
||||
elif site == "spiegel":
|
||||
elif site.lower() == "spiegel":
|
||||
obj = site2.Spiegel()
|
||||
|
||||
links = session.attributes["lastSearch"]
|
||||
|
||||
newLinks = []
|
||||
for link in links:
|
||||
if "http" not in link:
|
||||
newLinks.append(obj.baseURL + link)
|
||||
links = newLinks
|
||||
if "http" not in str(links):
|
||||
newLinks = []
|
||||
for link in links:
|
||||
if "http" not in link:
|
||||
newLinks.append(obj.baseURL + link)
|
||||
links = newLinks
|
||||
|
||||
art = obj.read_headlines(links[int(number)-1])
|
||||
response = ""
|
||||
for element in art:
|
||||
response += element + " "
|
||||
print(links)
|
||||
response += element
|
||||
|
||||
session.attributes["lastCall"] = "search2"
|
||||
return statement(response)
|
||||
|
||||
|
|
|
|||
|
|
@ -59,24 +59,24 @@ class Golem(Site):
|
|||
siteName = "golem"
|
||||
baseURL = "https://www.golem.de/"
|
||||
searchURLString = "https://suche.golem.de/search.php?l=10&q="
|
||||
|
||||
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
|
||||
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
|
||||
Site.xPath["newsArticle"] = '//li//h2/text()'
|
||||
Site.xPath["newsLinks"] = '//header[@class="cluster-header"]//@href'
|
||||
Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()'
|
||||
Site.xPath["readHeadlineText"] = '//header/p/text()'
|
||||
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
|
||||
xPath = dict()
|
||||
xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
|
||||
xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
|
||||
xPath["newsArticle"] = '//li//h2/text()'
|
||||
xPath["newsLinks"] = '//div[@class="g g4"]//header//@href'
|
||||
xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()'
|
||||
xPath["readHeadlineText"] = '//header/p/text()'
|
||||
xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
|
||||
|
||||
class Spiegel(Site):
|
||||
siteName = "spiegel"
|
||||
baseURL = "https://www.spiegel.de/"
|
||||
searchURLString = "https://www.spiegel.de/suche/?suchbegriff="
|
||||
|
||||
Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()'
|
||||
Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href'
|
||||
Site.xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()'
|
||||
Site.xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href'
|
||||
Site.xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()'
|
||||
Site.xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()'
|
||||
Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()'
|
||||
xPath = dict()
|
||||
xPath["searchArticle"] = '//div[@class="search-teaser"]/p/text()'
|
||||
xPath["searchLinks"] = '//div[@class="search-teaser"]/p//@href'
|
||||
xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()'
|
||||
xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href'
|
||||
xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()'
|
||||
xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()'
|
||||
xPath["readArticleText"] = '//div[@class="article-section clearfix"]/p/text()'
|
||||
|
|
|
|||
|
|
@ -1,12 +1,5 @@
|
|||
import urllib.request,urllib.parse,urllib.error
|
||||
from lxml import html
|
||||
import requests
|
||||
import re
|
||||
import siteobj as site2
|
||||
|
||||
searchURL = "https://suche.golem.de/search.php?l=10&q=gaming"
|
||||
site = requests.get(searchURL)
|
||||
tree = html.fromstring(site.content)
|
||||
|
||||
articles = tree.xpath('//span[@class="dh2 head2"]/text()')
|
||||
links = tree.xpath('//ol[@class="list-articles"]/li/header//@href')
|
||||
print(len(articles), len(links))
|
||||
obj = site2.Golem()
|
||||
news = obj.get_news()
|
||||
|
|
|
|||
Loading…
Reference in New Issue