Alexa-news-stentiment-evalu.../reader/sentiment.py

88 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# http://www.ulliwaltinger.de/sentiment/
# https://github.com/solariz/german_stopwords
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import nltk
import copy
import encodings
import csv
from siteobj import *
from nltk.corpus import treebank
negatives = dict()
positives = dict()
neutrals = dict()
with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Negative.tsv", "r", encoding="utf-8") as tsvfile:
reader = csv.reader(tsvfile, delimiter='\t')
for row in reader:
if "-" not in row[4].split("/"):
negatives[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Neutral.tsv", "r", encoding="utf-8") as tsvfile:
reader = csv.reader(tsvfile, delimiter='\t')
for row in reader:
if "-" not in row[4].split("/"):
neutrals[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Positive.tsv", "r", encoding="utf-8") as tsvfile:
reader = csv.reader(tsvfile, delimiter='\t')
for row in reader:
if "-" not in row[4].split("/"):
positives[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
# get stopwords
stopwords = []
with open("./reader/stopwords.txt", 'r', encoding='utf-8') as f:
for line in f:
stopwords.append(line)
extraSW = [".", ",", "´´", "``", "'", '"', ]
stopwords += extraSW
obj = Spiegel()
NewsText = obj.read_article("https://www.spiegel.de/netzwelt/games/labo-vr-set-von-nintendo-im-test-erst-basteln-dann-staunen-a-1265633.html")
newText = ""
for text in NewsText:
newText += text
tokens = nltk.word_tokenize(newText)
toDelete = []
for token in tokens:
if token in stopwords:
toDelete.append(token)
for token in toDelete:
while token in tokens:
tokens.remove(token)
p = 0
ne = 0
nu = 0
for token in tokens:
if token in negatives:
p += negatives[token][0]
ne += negatives[token][1]
nu += negatives[token][2]
elif token in positives:
p += positives[token][0]
ne += positives[token][1]
nu += positives[token][2]
elif token in neutrals:
p += neutrals[token][0]
ne += neutrals[token][1]
nu += neutrals[token][2]
total = p + ne + nu
p /= total
nu /= total
ne /= total
print(p, nu, ne)