added senti

2019-05-27 21:17:52 +02:00 · 2019-05-27 21:17:52 +02:00 · 3b8a5d1ace
parent 78054e59d0
commit 3b8a5d1ace
10 changed files with 40466 additions and 36937 deletions
--- a/reader/GermanPolarityClues-2012/GermanPolarityClues-Negative.tsv
+++ b/reader/GermanPolarityClues-2012/GermanPolarityClues-Negative.tsv
--- a/reader/GermanPolarityClues-2012/GermanPolarityClues-Neutral.tsv
+++ b/reader/GermanPolarityClues-2012/GermanPolarityClues-Neutral.tsv
--- a/reader/GermanPolarityClues-2012/GermanPolarityClues-Positive.tsv
+++ b/reader/GermanPolarityClues-2012/GermanPolarityClues-Positive.tsv
--- a/reader/GermanPolarityClues-2012/SentiWS_v2.0.zip
+++ b/reader/GermanPolarityClues-2012/SentiWS_v2.0.zip
--- a/reader/GermanPolarityClues-2012/SentiWS_v2.0/SentiWS.txt
+++ b/reader/GermanPolarityClues-2012/SentiWS_v2.0/SentiWS.txt
@ -0,0 +1,76 @@
 SentiWS
 ~~~~~~~
 SentimentWortschatz, or SentiWS for short, is a publicly available German-language resource for sentiment analysis, opinion mining etc. It lists positive and negative polarity bearing words weighted within the interval of [-1; 1] plus their part of speech tag, and if applicable, their inflections. The current version of SentiWS (v2.0) contains around 1,650 positive and 1,800 negative words, which sum up to around 16,000 positive and around 18,000 negative word forms incl. their inflections, respectively. It not only contains adjectives and adverbs explicitly expressing a sentiment, but also nouns and verbs implicitly containing one.
 License
 ~~~~~~~
 SentiWS is licensed under a Creative Commons Attribution-Noncommercial-Share Alike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).
 Obtain a Copy
 ~~~~~~~~~~~~~
 The latest version of SentiWS can be found at https://wortschatz.uni-leipzig.de/download/.
 Data Format
 ~~~~~~~~~~~
 SentiWS is organised in two utf8-encoded text files structured the following way:
 <Word>|<POS tag> \t <Polarity weight> \t <Infl_1>,...,<Infl_k> \n
 where \t denotes a tab, and \n denotes a new line.
 Citation
 ~~~~~~~~
 If you use SentiWS in your work we kindly ask you to cite
 R. Remus, U. Quasthoff & G. Heyer: SentiWS - a Publicly Available German-language Resource for Sentiment Analysis.
 In: Proceedings of the 7th International Language Ressources and Evaluation (LREC'10), 2010
 or use the following BibTeX-code snippet:
@INPROCEEDINGS{remquahey2010,
 title = {SentiWS -- a Publicly Available German-language Resource for Sentiment Analysis},
 booktitle = {Proceedings of the 7th International Language Resources and Evaluation (LREC'10)},
 author = {Remus, R. and Quasthoff, U. and Heyer, G.},
 year = {2010}
 }
 Version History
 ~~~~~~~~~~~~~~~
 SentiWS is "work in progress" and hence far from being fully-fledged and error-free. It will be continuously refined by adding missing words and word forms and removing ambiguous ones.
 v1.8b, 2010-05-19: First publicly available version as described in Remus et al. (2010).
 v1.8c, 2012-03-21: Second publicly available version in which some POS tags were corrected.
 v2.0, 2018-10-19: Third publicly available version in which the inflected forms were extended.
 Statistics
 ~~~~~~~~~~
 				Positive	Negative
 Adjectives	Baseforms	792		712
 		Inflections	10,922		10,461
 Adverbs		Baseforms	7		4
 		Inflections	5		0
 Nouns		Baseforms	548		688
 		Inflections	731		1154
 Verbs		Baseforms	297		423
 		Inflections	3,235		4,568
 All		Baseforms	1,644		1,827
 		Inflections	14,893		16,183
 		Total		16,537		18,010
 Table: Overview of the dictionary's content
 SentiWS.txt was last updated on 2018-12-19.
--- a/reader/GermanPolarityClues-2012/SentiWS_v2.0/SentiWS_v2.0_Negative.txt
+++ b/reader/GermanPolarityClues-2012/SentiWS_v2.0/SentiWS_v2.0_Negative.txt
--- a/reader/GermanPolarityClues-2012/SentiWS_v2.0/SentiWS_v2.0_Positive.txt
+++ b/reader/GermanPolarityClues-2012/SentiWS_v2.0/SentiWS_v2.0_Positive.txt
--- a/reader/app.py
+++ b/reader/app.py
@ -3,6 +3,8 @@ import os
 from flask import Flask
 from flask_ask import Ask, request, session, question, statement
 import yaml
 from nltk.corpus import treebank
 from textblob_de import TextBlobDE as TextBlob
 import siteobj as site2
 import util
@ -126,6 +128,40 @@ def search_answer(number):
    session.attributes["lastCall"] = "search2"
    return statement(response)
@ask.intent('Senti', mapping={'number': 'Nummer'}, default={'number': 1})
 def get_sentiment(number):
    site = util.get_session_value(session.attributes, "siteName")
    if site is not None:
        obj = get_site_obj(site) 
    else:
        session.attributes["lastCall"] = "senti"
        return question("Wonach wollen Sie suchen?")
    if obj is None: # should never be called
        return question("Error. Wonach wollen Sie suchen?")
    links = util.get_session_value(session.attributes, "lastSearch")
    url = links[int(number)-1]
    NewsText = obj.read_article(url)
    newText = ""
    for text in NewsText:
            newText += text
    newText = TextBlob(newText)
    sent = newText.sentiment[0] 
    if sent < 0:
            good = "shit"
    else:
            good = "nice" 
    return statement(good)
@ask.intent('AMAZON.HelpIntent')
 def help():
    speech_text = 'Dieser Skill erlaubt es Ihnen einige Nachrichten Websites zu nutzen'
@ -150,10 +186,4 @@ if __name__ == '__main__':
        if verify == 'false':
            app.config['ASK_VERIFY_REQUESTS'] = False
    context = SSL.Context(SSL.TLSv1_2_METHOD)
    cer = os.path.join(os.path.dirname(__file__), 'certificate.pem')
    key = os.path.join(os.path.dirname(__file__), 'privkey.pem')
    context = (cer, key)
    app.run(host='127.0.0.1',port=443)
--- a/reader/sentiment.py
+++ b/reader/sentiment.py
@ -1,6 +1,7 @@
 # http://www.ulliwaltinger.de/sentiment/
 # https://github.com/solariz/german_stopwords
 #!/usr/bin/env python
 # https://github.com/markuskiller/textblob-de
 # -*- coding: utf-8 -*-
 import nltk
 import copy
@ -8,80 +9,28 @@ import encodings
 import csv
 from siteobj import *
 from nltk.corpus import treebank
-
+from textblob_de import TextBlobDE as TextBlob
 negatives = dict()
 positives = dict()
 neutrals = dict()
 with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Negative.tsv", "r", encoding="utf-8") as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row in reader:
        if "-" not in row[4].split("/"):
                negatives[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
 with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Neutral.tsv", "r", encoding="utf-8") as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row  in reader:
        if "-" not in row[4].split("/"):
                neutrals[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
 with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Positive.tsv", "r", encoding="utf-8") as tsvfile:
  reader = csv.reader(tsvfile, delimiter='\t')
  for row  in reader:
        if "-" not in row[4].split("/"):
                positives[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
 # get stopwords
 stopwords = []
 with  open("./reader/stopwords.txt", 'r', encoding='utf-8') as f:
        for line in f:
                stopwords.append(line)
 extraSW = [".", ",", "´´", "``", "'", '"', ]
 stopwords += extraSW
 obj = Spiegel()
 NewsText = obj.read_article("https://www.spiegel.de/netzwelt/games/labo-vr-set-von-nintendo-im-test-erst-basteln-dann-staunen-a-1265633.html")
 newText = ""
 for text in NewsText:
        newText += text
 tokens = nltk.word_tokenize(newText)
 toDelete = []
 for token in tokens:
        if token in stopwords:
                toDelete.append(token)
 for token in toDelete:
        while token in tokens:
                tokens.remove(token)
 p = 0
 ne = 0
 nu = 0
 for token in tokens:
        if token in negatives:
                p += negatives[token][0]
                ne += negatives[token][1]
                nu += negatives[token][2]
        elif token in positives:
                p += positives[token][0]
                ne += positives[token][1]
                nu += positives[token][2]
        elif token in neutrals:
                p += neutrals[token][0]
                ne += neutrals[token][1]
                nu += neutrals[token][2]
-total = p + ne + nu
+def get_sentiment(url):
        NewsText = obj.read_article(url)
-p /= total
+        newText = ""
-nu /= total
+        for text in NewsText:
-ne /= total
+                newText += text
-print(p, nu, ne)
+        newText = TextBlob(newText)
        sent = newText.sentiment[0] 
        if sent < 0:
                good = "shit"
        else:
                good = "nice" 
        print(good, newText.sentiment,"\n", link.split("/")[-1], "\n")
        return good
 obj = Golem()
 news, links = obj.get_news()
 for link in links:
        get_sentiment(link)
--- a/2
+++ b/2
@ -5,3 +5,5 @@ nltk
 lxml
 urllib
 yaml
 pip install -U textblob-de
 python -m textblob.download_corpora