From 3f0e7937c716d71663a2771fad15acc4d1250538 Mon Sep 17 00:00:00 2001 From: Patrice Date: Thu, 2 May 2019 20:04:10 +0200 Subject: [PATCH] redone site object and working dialog for news and search --- reader/__pycache__/siteobj.cpython-35.pyc | Bin 2721 -> 2744 bytes reader/main.py | 88 +++++++++++++++++----- reader/siteobj.py | 56 ++++++++------ 3 files changed, 104 insertions(+), 40 deletions(-) diff --git a/reader/__pycache__/siteobj.cpython-35.pyc b/reader/__pycache__/siteobj.cpython-35.pyc index 99dc7166b65a4397b85ff95d423031a9d66e0bfc..09b8b5a8198b563f33668a5bff74d2396396b09a 100644 GIT binary patch literal 2744 zcma)8S##7z5N_>CdwJIfhm9d5jzb8FV+*LHs8EoYg2FjqD;2}b#;GkKjlD8jX_=8R zsLT`jGx;T{O63>mHBWiT58x?Zj}9M@Dx{rOcTe}s^f|lG>vjJ6_s^~G+eClUk;eqS zj-g%x`1lzV5)F(JuN}ebC0?V@q_ITJI=>9&S)$%>gz6G&QtHH0U)1 znoJ778r>?edWD%4W>r|D!kWNZzi7-0~JM?nJN*<&p6BFSc$Q)oN_Bb~Y%N0IQ7AdYUPogammth!H`>{Acf_EvA>`PN(#~Tg z-mG{xw|GId z^+P<0w(be_G>Io$8=H@|*d8Ib6r2@5*fX=;6h zzfOzHg~U;=nR6R-fQbfe*I=>Le`74YF;=97-DLy7Zf7%jkRSj2U^9_Hw39WZn1rq; zvX&Ij_Jm4QR^JhcWIM9Xwv5NB5cJ|GfzvbV#l%fU`22*A-IOydX~AoOm$j?q;c3n? zGm!#oyLpbO%t8u1RfhqPv#dRxs|^dW55ZGi05TSh8RJxK-k3Mi)7sI;C99ybjdcv| z?t4ITcfT?yfyZm?cXk`R4adTz@VQkuJa;wA>+03+)lT2EYkkRav|Cy8iK_&>Cg(xR z1qSRPd5*y%gYyhHF>Fh_L|$YDn<77Au*9HByxi^L!&hEOwk4&o2coD8+pEM+V#F{VR4DJO$hRP zo1NA7WO!&c5`+82NyDePxQ3&mr6>oR6z1pQK3|6WxZJy)(0?x{zXem#F08;#yEF(KQZk>c>7Al zb&V?(*E23`lW$cnO1>^}Z|6jQPZmTQGo5l~3PiUgdLX7tVg|%4TH;)0ZN#A%XVwmX z(zBx^nW&qNv%kN;rdeyga8kQut37Wd4kz;7vo;w`z720*yL$QA?et^EQ4=@%W!(#1 zrEV|#qw7oT8j(zVI>s?Dp@UiA|%7W$wT8f>mw<)b+=A0IL_#L zrE_9cjzzix2^~^rbnWChuIY1pb$AXg=X9>pn3U(^3Qw0E#}A%Yn{CIqYm-FyP_G2J zBKjx^8(;Z3gB1o}FsSlPhg_%IJJViWP$hAplMasKt zJ0YBt`)B$)`Y(9xsed7dqBBcNqExs@DtSl4+2uUGd8{|u?bhGvAN|J-fIp$~v5?=# zC0`+l@Xr7a&@)E7S>bCV-UM#Jz=oa;#?3o*=+#H>G@#dj!>9(41;3$63$9U36Gb*e zbuFp`Yr-`uoFj`zZsRXB4_((NtXK_rNKd0r3z>af@&XAPIoC7k%^n&-Mw=?NM-7OY z5TV`{MDviKmli}0L~SyR9!CohEovSeUV>;@^Z3>ZM5~&|*ViCg*F5UEgZ^z4)={EZ z0r~usEKOOck}TcJTR$*1+zEJc!3rx41}twsO~N=}eDfg7L{v0+76x>-H{Z>Kii5#W zWhsh-^yEA^VZHhx6N4blThAmDJKvtLRORzevpnHE*!;qK5E>XXedw3g6I{|lg1{fS zM)!9h&?Mu>qKQ&X-fV|^eNrg<-)C{!|BlHQDjW6>kACh87DP<+B}S7yKkW=(6;0ny z(nR_GGO0F^fSI=`+|Fnx)Kc2OHSGmu&+Hy(yRIqb@IYasu_T=MBPfN%f@FaRV?Pio z2{~IwS!vD4SGBe2{r`x3%BG=h&Khf;Fy*K0Qf^etltGPl-Z7}!x!|)_971yx(EAlj z%Q~Ry?z>qNi>PeY+Je>>qe-5mOx~^7D*>F|@479@EtRG37cJi(WYIY%-tqmj^MIE- z+ElMiYMJma0)Ebi4`>@I9EX4LE;#7vu>1Mr{-9WNnd?t zs;jAz_UWYqB)4@nbu4x-{@J=4_58pNCpmVn?sMX?ds|qC^M!qwaW*LI6Z#oLPR?1l zbnp1c;N-Y6;1n4>Jg!aHcx{40kqK8OTpRGZ2C4?97F;2M8b8edgvno|p0Q0iH;;bJI)`5Ann+ljwQ-v0%q;qi6^= zNDYl4=XRw}mx_drHxgoJ+Q4K)YX|h4 zo>-ZTXu52vT(v12(S|eUG@Yg(IbIVBc(?K7dFN(I6!r4CaBk$oKu9)zVnSr1XYrQ< pUbY8yOJNG8r@Yo#33cp5(fqo+fgaO;5ZaUb#**O}PHpwQe*nWhBisN0 diff --git a/reader/main.py b/reader/main.py index bceb7e8..76e5796 100644 --- a/reader/main.py +++ b/reader/main.py @@ -11,33 +11,83 @@ ask = Ask(app, "/") logging.getLogger('flask_ask').setLevel(logging.DEBUG) -@ask.intent('GolemSearch', - mapping={'site': 'Site', 'searchTerm':'Topic'}, - default={'site': 'golem', 'searchTerm':''}) -def search(site, searchTerm): - print(site, searchTerm) +@ask.intent('searchon', mapping={'site': 'Site'}, default={'site': 'golem'}) +def search_on(site): + try: + session.attributes["siteName"] = site + except: + print("error") - obj = site2.Golem() + if "searchTerm" in session.attributes and session.attributes["searchTerm"] is not None and "lastCall" in session.attributes and session.attributes["lastCall"] == "searchfor": + searchTerm = session.attributes["searchTerm"] + session.attributes["searchTerm"] = None + return search_for(searchTerm) + if "lastCall" in session.attributes and session.attributes["lastCall"] == "news": + return news(site) + session.attributes["lastCall"] = "searchon" + return question("Wonach?") + +@ask.intent('searchfor', mapping={'searchTerm':'Topic'}, default={'searchTerm':''}) +def search_for(searchTerm): + try: + site = session.attributes["siteName"] + except: + site = None + + if site == "golem": + obj = site2.Golem() + elif site is None: + session.attributes["searchTerm"] = searchTerm + session.attributes["lastCall"] = "searchfor" + return question("Auf welcher Seite wollen Sie hiernach Suchen?") + else: + return statement("error") articles, links = obj.search_article(searchTerm) - session.attributes["lastSearch"] = links response = "Für welchen der folgenden Artikel interessieren Sie sich?" - for i in range(0, 5): - response += articles[i] - return question(response) + if len(articles) > 0: + for i in range(0, max(5, len(articles))): + response += articles[i] + else: + return question("Dazu konnte nichts gefunden werden. Möchten Sie nach etwas anderem Suchen?") -@ask.intent('News', - mapping={'site': 'Site'}, - default={'site': 'golem'}) + session.attributes["lastCall"] = "searchfor" + + return question(response + "noch etwas?") + +@ask.intent('News', mapping={'site': 'Site'}, default={'site': ''}) def news(site): - print(site) - obj = site2.Golem() + + if site == "golem": + obj = site2.Golem() + elif site == '': + session.attributes["lastCall"] = "news" + return question("Auf welcher Seite wollen Sie hiernach Suchen?") + else: + return statement("error") + news = obj.get_news() + response = "" for i in range(0, 5): - response += news[i] + response += news[i] + ". " + + session.attributes["lastCall"] = "news" + return statement(response) + +@ask.intent('SearchTwo', mapping={'number': 'Nummer'}, default={'number': 1}) +def search_answer(number): + print(number) + obj = site2.Golem() + + art = obj.read_headlines(session.attributes["lastSearch"][int(number)-1]) + response = "" + for element in art: + response += element + " " + + session.attributes["lastCall"] = "search2" return statement(response) @ask.intent('AMAZON.HelpIntent') @@ -45,9 +95,13 @@ def help(): speech_text = 'Dieser Skill erlaubt es Ihnen einige Nachrichten Websites zu nutzen' return statement(speech_text) +@ask.intent('AMAZON.FallbackIntent') +def fallback(): + return statement("ein fehler ist aufgetreten") + @ask.launch def launch(): - return search("golem", "gaming") + return question("Was möchten Sie tun?") @ask.session_ended def session_ended(): diff --git a/reader/siteobj.py b/reader/siteobj.py index 5ceab51..9e60c36 100644 --- a/reader/siteobj.py +++ b/reader/siteobj.py @@ -5,54 +5,51 @@ import re class Site: - url = "" + siteName = "" + baseURL = "" + searchURLString = "" + xPath = dict() + xPath["searchArticle"] = "" + xPath["searchLinks"] = "" + xPath["newsArticle"] = "" + xPath["readHeadlineTitle"] = "" + xPath["readHeadlineText"] = "" + xPath["readArticleText"] = "" + header_values = { 'Connection:' : 'Keep-alive', 'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'German', 'User-Agent': 'Mozilla 4/0'} - + def __init__(self): - return None def search_article(self, topic): - return False - def get_news(self): - return False - def read_article(self, url): - return False - def read_headlines(self, url): - return False - - -class Golem(Site): - url = "golem" - def search_article(self, topic): - searchURL = "https://suche.golem.de/search.php?l=10&q=" + topic.replace(" ", "+") + searchURL = self.searchURLString + topic.replace(" ", "+") site = requests.get(searchURL) tree = html.fromstring(site.content) - articles = tree.xpath('//span[@class="dh2 head2"]/text()') - links = tree.xpath('//ol[@class="list-articles"]/li/header//@href') + articles = tree.xpath(self.xPath["searchArticle"]) + links = tree.xpath(self.xPath["searchLinks"]) return articles, links def get_news(self): - searchURL = "https://www.golem.de/" + searchURL = self.baseURL site = requests.get(searchURL) tree = html.fromstring(site.content) - articles = tree.xpath('//h2[@class="head2"]/text()') + articles = tree.xpath(self.xPath["newsArticle"]) return articles def read_headlines(self, url): site = requests.get(url) tree = html.fromstring(site.content) - title = tree.xpath('//header/h1/span[@class="dh1 head5"]/text()') - title += tree.xpath('//header/p/text()') + title = tree.xpath(self.xPath["readHeadlineTitle"] ) + title += tree.xpath(self.xPath["readHeadlineText"]) return title def read_article(self, url): @@ -60,5 +57,18 @@ class Golem(Site): tree = html.fromstring(site.content) title = self.read_headlines(url) - title += tree.xpath('//div[@class="formatted"]/p/text()') + title += tree.xpath(self.xPath["readArticleText"]) return title + + +class Golem(Site): + siteName = "golem" + baseURL = "https://www.golem.de/" + searchURLString = "https://suche.golem.de/search.php?l=10&q=" + Site.xPath["searchArticle"] = '//span[@class="dh2 head2"]/text()' + Site.xPath["searchLinks"] = '//ol[@class="list-articles"]/li/header//@href' + Site.xPath["newsArticle"] = '//h2[@class="head2"]/text()' + Site.xPath["readHeadlineTitle"] = '//header/h1/span[@class="dh1 head5"]/text()' + Site.xPath["readHeadlineText"] = '//header/p/text()' + Site.xPath["readArticleText"] = '//div[@class="formatted"]/p/text()' +