Done-ish
This commit is contained in:
parent
3b8a5d1ace
commit
41da4ee275
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
|
@ -1,76 +0,0 @@
|
|||
SentiWS
|
||||
~~~~~~~
|
||||
|
||||
SentimentWortschatz, or SentiWS for short, is a publicly available German-language resource for sentiment analysis, opinion mining etc. It lists positive and negative polarity bearing words weighted within the interval of [-1; 1] plus their part of speech tag, and if applicable, their inflections. The current version of SentiWS (v2.0) contains around 1,650 positive and 1,800 negative words, which sum up to around 16,000 positive and around 18,000 negative word forms incl. their inflections, respectively. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one.
|
||||
|
||||
|
||||
License
|
||||
~~~~~~~
|
||||
|
||||
SentiWS is licensed under a Creative Commons Attribution-Noncommercial-Share Alike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).
|
||||
|
||||
|
||||
Obtain a Copy
|
||||
~~~~~~~~~~~~~
|
||||
The latest version of SentiWS can be found at https://wortschatz.uni-leipzig.de/download/.
|
||||
|
||||
|
||||
Data Format
|
||||
~~~~~~~~~~~
|
||||
SentiWS is organised in two utf8-encoded text files structured the following way:
|
||||
|
||||
<Word>|<POS tag> \t <Polarity weight> \t <Infl_1>,...,<Infl_k> \n
|
||||
|
||||
where \t denotes a tab, and \n denotes a new line.
|
||||
|
||||
|
||||
Citation
|
||||
~~~~~~~~
|
||||
|
||||
If you use SentiWS in your work we kindly ask you to cite
|
||||
|
||||
R. Remus, U. Quasthoff & G. Heyer: SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.
|
||||
In: Proceedings of the 7th International Language Ressources and Evaluation (LREC'10), 2010
|
||||
|
||||
or use the following BibTeX-code snippet:
|
||||
|
||||
@INPROCEEDINGS{remquahey2010,
|
||||
title = {SentiWS -- a Publicly Available German-language Resource for Sentiment Analysis},
|
||||
booktitle = {Proceedings of the 7th International Language Resources and Evaluation (LREC'10)},
|
||||
author = {Remus, R. and Quasthoff, U. and Heyer, G.},
|
||||
year = {2010}
|
||||
}
|
||||
|
||||
|
||||
Version History
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
SentiWS is "work in progress" and hence far from being fully-fledged and error-free. It will be continuously refined by adding missing words and word forms and removing ambiguous ones.
|
||||
|
||||
v1.8b, 2010-05-19: First publicly available version as described in Remus et al. (2010).
|
||||
v1.8c, 2012-03-21: Second publicly available version in which some POS tags were corrected.
|
||||
v2.0, 2018-10-19: Third publicly available version in which the inflected forms were extended.
|
||||
|
||||
|
||||
Statistics
|
||||
~~~~~~~~~~
|
||||
|
||||
Positive Negative
|
||||
Adjectives Baseforms 792 712
|
||||
Inflections 10,922 10,461
|
||||
Adverbs Baseforms 7 4
|
||||
Inflections 5 0
|
||||
Nouns Baseforms 548 688
|
||||
Inflections 731 1154
|
||||
Verbs Baseforms 297 423
|
||||
Inflections 3,235 4,568
|
||||
All Baseforms 1,644 1,827
|
||||
Inflections 14,893 16,183
|
||||
|
||||
Total 16,537 18,010
|
||||
|
||||
Table: Overview of the dictionary's content
|
||||
|
||||
|
||||
|
||||
SentiWS.txt was last updated on 2018-12-19.
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
|
@ -111,22 +111,25 @@ def search_answer(number):
|
|||
|
||||
|
||||
links = util.get_session_value(session.attributes, "lastSearch")
|
||||
|
||||
|
||||
# if the site uses relative links, make absolute ones
|
||||
if "http" not in str(links):
|
||||
if str(links).count("http") < len(links):
|
||||
newLinks = []
|
||||
for link in links:
|
||||
if "http" not in link:
|
||||
newLinks.append(obj.baseURL + link)
|
||||
else:
|
||||
newLinks.append( link)
|
||||
links = newLinks
|
||||
|
||||
if int(number) > len(links):
|
||||
return question("Dieser Artikel existiert leider nicht, versuchen Sie eine andere Nummer.")
|
||||
art = obj.read_headlines(links[int(number)-1])
|
||||
response = ""
|
||||
for element in art:
|
||||
response += element
|
||||
|
||||
session.attributes["lastCall"] = "search2"
|
||||
return statement(response)
|
||||
return question(response)
|
||||
|
||||
|
||||
@ask.intent('Senti', mapping={'number': 'Nummer'}, default={'number': 1})
|
||||
|
|
@ -144,6 +147,18 @@ def get_sentiment(number):
|
|||
|
||||
|
||||
links = util.get_session_value(session.attributes, "lastSearch")
|
||||
# if the site uses relative links, make absolute ones
|
||||
if str(links).count("http") < len(links):
|
||||
newLinks = []
|
||||
for link in links:
|
||||
if "http" not in link:
|
||||
newLinks.append(obj.baseURL + link)
|
||||
else:
|
||||
newLinks.append( link)
|
||||
links = newLinks
|
||||
|
||||
if int(number) > len(links):
|
||||
return question("Dieser Artikel existiert leider nicht, versuchen Sie eine andere Nummer.")
|
||||
|
||||
url = links[int(number)-1]
|
||||
NewsText = obj.read_article(url)
|
||||
|
|
@ -160,7 +175,7 @@ def get_sentiment(number):
|
|||
else:
|
||||
good = "nice"
|
||||
|
||||
return statement(good)
|
||||
return question(good)
|
||||
|
||||
@ask.intent('AMAZON.HelpIntent')
|
||||
def help():
|
||||
|
|
@ -186,4 +201,4 @@ if __name__ == '__main__':
|
|||
if verify == 'false':
|
||||
app.config['ASK_VERIFY_REQUESTS'] = False
|
||||
|
||||
app.run(host='127.0.0.1',port=443)
|
||||
app.run(host='127.0.0.1',port=5000)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,261 @@
|
|||
{
|
||||
"interactionModel": {
|
||||
"languageModel": {
|
||||
"invocationName": "testing",
|
||||
"intents": [
|
||||
{
|
||||
"name": "AMAZON.FallbackIntent",
|
||||
"samples": []
|
||||
},
|
||||
{
|
||||
"name": "AMAZON.CancelIntent",
|
||||
"samples": [
|
||||
"schnauze",
|
||||
"ruhe ",
|
||||
"halt",
|
||||
"stop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "AMAZON.HelpIntent",
|
||||
"samples": []
|
||||
},
|
||||
{
|
||||
"name": "AMAZON.StopIntent",
|
||||
"samples": [
|
||||
"tschüss",
|
||||
"danke",
|
||||
"stop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "AMAZON.NavigateHomeIntent",
|
||||
"samples": []
|
||||
},
|
||||
{
|
||||
"name": "searchfor",
|
||||
"slots": [
|
||||
{
|
||||
"name": "Topic",
|
||||
"type": "AMAZON.SearchQuery"
|
||||
}
|
||||
],
|
||||
"samples": [
|
||||
"ja {Topic}",
|
||||
"ja nach {Topic}",
|
||||
"{Topic} zusammengefasst",
|
||||
"zu {Topic} ",
|
||||
"zu {Topic} zusammengefasst",
|
||||
"suche nach {Topic} ",
|
||||
"suche nach {Topic} zusammengefasst"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "News",
|
||||
"slots": [
|
||||
{
|
||||
"name": "Site",
|
||||
"type": "Site"
|
||||
}
|
||||
],
|
||||
"samples": [
|
||||
"gib mir die Nachrichten auf {Site}",
|
||||
"gib mir die Nachrichten bei {Site}",
|
||||
"was gibt es neues bei {Site}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "SearchTwo",
|
||||
"slots": [
|
||||
{
|
||||
"name": "Nummer",
|
||||
"type": "AMAZON.NUMBER"
|
||||
}
|
||||
],
|
||||
"samples": [
|
||||
"die {Nummer}",
|
||||
"fasse {Nummer} zusammen",
|
||||
"ließ {Nummer}",
|
||||
"fasse den {Nummer} Artikel zusammen",
|
||||
"ließ den {Nummer}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "searchon",
|
||||
"slots": [
|
||||
{
|
||||
"name": "Site",
|
||||
"type": "Site"
|
||||
}
|
||||
],
|
||||
"samples": [
|
||||
"auf {Site}",
|
||||
"{Site}",
|
||||
"öffne {Site}",
|
||||
"suche auf {Site}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Senti",
|
||||
"slots": [
|
||||
{
|
||||
"name": "number",
|
||||
"type": "AMAZON.NUMBER"
|
||||
}
|
||||
],
|
||||
"samples": [
|
||||
"Gefühl in {number}",
|
||||
"sentiment in {number}",
|
||||
"über {number}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"types": [
|
||||
{
|
||||
"name": "Site",
|
||||
"values": [
|
||||
{
|
||||
"name": {
|
||||
"value": "spiegel"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "welt"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "zeit"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "golem"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Activity",
|
||||
"values": [
|
||||
{
|
||||
"name": {
|
||||
"value": "suche",
|
||||
"synonyms": [
|
||||
"gucke nach",
|
||||
"suche nach"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "fasse zusammen"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "ließ"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Nummer",
|
||||
"values": [
|
||||
{
|
||||
"name": {
|
||||
"value": "10",
|
||||
"synonyms": [
|
||||
"zehnten",
|
||||
"zehn"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "9",
|
||||
"synonyms": [
|
||||
"neunten",
|
||||
"neun"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "8",
|
||||
"synonyms": [
|
||||
"achten",
|
||||
"acht"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "7",
|
||||
"synonyms": [
|
||||
"siebenten",
|
||||
"sieben"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "6",
|
||||
"synonyms": [
|
||||
"sechs",
|
||||
"sechsten"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "5",
|
||||
"synonyms": [
|
||||
"fünften",
|
||||
"fünf"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "4",
|
||||
"synonyms": [
|
||||
"vierten",
|
||||
"vier"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "3",
|
||||
"synonyms": [
|
||||
"dritten",
|
||||
"drei"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "2",
|
||||
"synonyms": [
|
||||
"zwei",
|
||||
"zweiten"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": {
|
||||
"value": "1",
|
||||
"synonyms": [
|
||||
"ersten",
|
||||
"eins"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -75,8 +75,8 @@ class Spiegel(Site):
|
|||
xPath = dict()
|
||||
xPath["searchArticle"] = '//div[@class="search-teaser"]/p/text()'
|
||||
xPath["searchLinks"] = '//div[@class="search-teaser"]/p//@href'
|
||||
xPath["newsArticle"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//span[@class="headline"]/text()'
|
||||
xPath["newsLinks"] = '//div[@class="column-wide pano_xxl"]//div[@class="teaser"]//h2[@class="article-title"]//@href'
|
||||
xPath["newsArticle"] = '//*[@class="teaser"]/div/h2/a/span[2]/text()'
|
||||
xPath["newsLinks"] = '//*[@class="teaser"]/div/h2//@href'
|
||||
xPath["readHeadlineTitle"] = '//div[@class="column-both"]//span[@class="headline"]//text()'
|
||||
xPath["readHeadlineText"] = '//div[@class="column-both"]/p/strong/text()'
|
||||
xPath["readArticleText"] = '//div[@class="article-section clearfix"]/p/text()'
|
||||
xPath["readArticleText"] = '//*[@id="js-article-column"]/div/p[1]/text()'
|
||||
|
|
|
|||
Loading…
Reference in New Issue