added sentiment, removed docker stuff for now
This commit is contained in:
parent
44879f10e7
commit
78054e59d0
|
|
@ -0,0 +1,17 @@
|
||||||
|
FROM nginx:1.7
|
||||||
|
|
||||||
|
# Copy in conf files
|
||||||
|
COPY nginx.conf /etc/nginx/nginx.conf
|
||||||
|
COPY mime.types /etc/nginx/mime.types
|
||||||
|
COPY ssl.conf /etc/nginx/
|
||||||
|
COPY site.conf /etc/nginx/sites-enabled/
|
||||||
|
|
||||||
|
# COPY in certs
|
||||||
|
COPY ssl.crt /etc/nginx/ssl.crt
|
||||||
|
COPY ssl.key /etc/nginx/ssl.key
|
||||||
|
|
||||||
|
# Expose both the HTTP (80) and HTTPS (443) ports
|
||||||
|
EXPOSE 80 443
|
||||||
|
|
||||||
|
CMD ["nginx"]
|
||||||
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python: Aktuelle Datei",
|
||||||
|
"type": "python",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
FROM tiangolo/uwsgi-nginx-flask:python3.5
|
|
||||||
|
|
||||||
RUN python -m pip install pip==9.0.3
|
|
||||||
RUN apt-get update
|
|
||||||
RUN apt-get install -y gcc libevent-dev python-dev
|
|
||||||
|
|
||||||
COPY ./requirements.txt /
|
|
||||||
COPY ./ /app
|
|
||||||
|
|
||||||
COPY ./nginx.conf /etc/nginx/nginx.conf
|
|
||||||
#COPY /app/certificate.pem /etc/nginx/certs
|
|
||||||
RUN pip install -r /requirements.txt
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
|
@ -1,9 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
from OpenSSL import SSL
|
|
||||||
import os
|
import os
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
from flask_ask import Ask, request, session, question, statement
|
from flask_ask import Ask, request, session, question, statement
|
||||||
import random
|
|
||||||
import yaml
|
import yaml
|
||||||
import siteobj as site2
|
import siteobj as site2
|
||||||
import util
|
import util
|
||||||
|
|
@ -158,4 +156,4 @@ if __name__ == '__main__':
|
||||||
key = os.path.join(os.path.dirname(__file__), 'privkey.pem')
|
key = os.path.join(os.path.dirname(__file__), 'privkey.pem')
|
||||||
context = (cer, key)
|
context = (cer, key)
|
||||||
|
|
||||||
app.run(host='127.0.0.1',port=443,ssl_context=context)
|
app.run(host='127.0.0.1',port=443)
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
openssl req -newkey rsa:2048 -nodes -keyout privkey.pem -x509 -days 365 -out certificate.pem -subj "/C=US/ST=NRW/L=Earth/O=CompanyName/OU=IT/CN=alexa.jopa.dev"
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
|
|
||||||
user nginx;
|
|
||||||
worker_processes 1;
|
|
||||||
|
|
||||||
error_log /var/log/nginx/error.log warn;
|
|
||||||
pid /var/run/nginx.pid;
|
|
||||||
|
|
||||||
|
|
||||||
events {
|
|
||||||
worker_connections 1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
http {
|
|
||||||
include /etc/nginx/mime.types;
|
|
||||||
default_type application/octet-stream;
|
|
||||||
|
|
||||||
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
|
||||||
'$status $body_bytes_sent "$http_referer" '
|
|
||||||
'"$http_user_agent" "$http_x_forwarded_for"';
|
|
||||||
|
|
||||||
access_log /var/log/nginx/access.log main;
|
|
||||||
|
|
||||||
sendfile on;
|
|
||||||
#tcp_nopush on;
|
|
||||||
|
|
||||||
keepalive_timeout 65;
|
|
||||||
|
|
||||||
#gzip on;
|
|
||||||
|
|
||||||
server {
|
|
||||||
# enables SSLv3/TLSv1, but not SSLv2 which is weak and should no longer be used.
|
|
||||||
ssl_protocols SSLv3 TLSv1;
|
|
||||||
|
|
||||||
# disables all weak ciphers
|
|
||||||
ssl_ciphers ALL:!aNULL:!ADH:!eNULL:!LOW:!EXP:RC4+RSA:+HIGH:+MEDIUM;
|
|
||||||
|
|
||||||
server_name alexa.jopa.dev jopa.dev;
|
|
||||||
|
|
||||||
## Access and error logs.
|
|
||||||
access_log /var/log/nginx/access.log;
|
|
||||||
error_log /var/log/nginx/error.log info;
|
|
||||||
|
|
||||||
## Keep alive timeout set to a greater value for SSL/TLS.
|
|
||||||
keepalive_timeout 75 75;
|
|
||||||
|
|
||||||
## See the keepalive_timeout directive in nginx.conf.
|
|
||||||
## Server certificate and key.
|
|
||||||
ssl on;
|
|
||||||
ssl_certificate /app/cert/certificate.pem;
|
|
||||||
ssl_certificate_key /app/cert/privkey.pem;
|
|
||||||
ssl_session_timeout 5m;
|
|
||||||
|
|
||||||
## Strict Transport Security header for enhanced security. See
|
|
||||||
## http://www.chromium.org/sts. I've set it to 2 hours; set it to
|
|
||||||
## whichever age you want.
|
|
||||||
add_header Strict-Transport-Security "max-age=7200";
|
|
||||||
|
|
||||||
}
|
|
||||||
include /etc/nginx/conf.d/*.conf;
|
|
||||||
}
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
requests
|
|
||||||
flask-ask
|
|
||||||
lxml
|
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
# http://www.ulliwaltinger.de/sentiment/
|
||||||
|
# https://github.com/solariz/german_stopwords
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import nltk
|
||||||
|
import copy
|
||||||
|
import encodings
|
||||||
|
import csv
|
||||||
|
from siteobj import *
|
||||||
|
from nltk.corpus import treebank
|
||||||
|
|
||||||
|
negatives = dict()
|
||||||
|
positives = dict()
|
||||||
|
neutrals = dict()
|
||||||
|
|
||||||
|
with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Negative.tsv", "r", encoding="utf-8") as tsvfile:
|
||||||
|
reader = csv.reader(tsvfile, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
|
||||||
|
if "-" not in row[4].split("/"):
|
||||||
|
negatives[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
|
||||||
|
|
||||||
|
with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Neutral.tsv", "r", encoding="utf-8") as tsvfile:
|
||||||
|
reader = csv.reader(tsvfile, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
if "-" not in row[4].split("/"):
|
||||||
|
neutrals[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
|
||||||
|
|
||||||
|
with open("./reader/GermanPolarityClues-2012/GermanPolarityClues-Positive.tsv", "r", encoding="utf-8") as tsvfile:
|
||||||
|
reader = csv.reader(tsvfile, delimiter='\t')
|
||||||
|
for row in reader:
|
||||||
|
if "-" not in row[4].split("/"):
|
||||||
|
positives[row[0]] = [float(row[4].split("/")[0]), float(row[4].split("/")[1]), float(row[4].split("/")[2])]
|
||||||
|
|
||||||
|
# get stopwords
|
||||||
|
stopwords = []
|
||||||
|
with open("./reader/stopwords.txt", 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
stopwords.append(line)
|
||||||
|
|
||||||
|
extraSW = [".", ",", "´´", "``", "'", '"', ]
|
||||||
|
stopwords += extraSW
|
||||||
|
|
||||||
|
obj = Spiegel()
|
||||||
|
|
||||||
|
NewsText = obj.read_article("https://www.spiegel.de/netzwelt/games/labo-vr-set-von-nintendo-im-test-erst-basteln-dann-staunen-a-1265633.html")
|
||||||
|
|
||||||
|
newText = ""
|
||||||
|
for text in NewsText:
|
||||||
|
newText += text
|
||||||
|
|
||||||
|
tokens = nltk.word_tokenize(newText)
|
||||||
|
|
||||||
|
toDelete = []
|
||||||
|
for token in tokens:
|
||||||
|
if token in stopwords:
|
||||||
|
toDelete.append(token)
|
||||||
|
|
||||||
|
for token in toDelete:
|
||||||
|
while token in tokens:
|
||||||
|
tokens.remove(token)
|
||||||
|
|
||||||
|
p = 0
|
||||||
|
ne = 0
|
||||||
|
nu = 0
|
||||||
|
for token in tokens:
|
||||||
|
if token in negatives:
|
||||||
|
p += negatives[token][0]
|
||||||
|
ne += negatives[token][1]
|
||||||
|
nu += negatives[token][2]
|
||||||
|
elif token in positives:
|
||||||
|
p += positives[token][0]
|
||||||
|
ne += positives[token][1]
|
||||||
|
nu += positives[token][2]
|
||||||
|
elif token in neutrals:
|
||||||
|
p += neutrals[token][0]
|
||||||
|
ne += neutrals[token][1]
|
||||||
|
nu += neutrals[token][2]
|
||||||
|
|
||||||
|
|
||||||
|
total = p + ne + nu
|
||||||
|
|
||||||
|
p /= total
|
||||||
|
nu /= total
|
||||||
|
ne /= total
|
||||||
|
|
||||||
|
print(p, nu, ne)
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
import urllib.request,urllib.parse,urllib.error
|
import urllib.request,urllib.parse,urllib.error
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import requests
|
import requests
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class Site:
|
class Site:
|
||||||
siteName = ""
|
siteName = ""
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,5 +0,0 @@
|
||||||
import siteobj as site2
|
|
||||||
|
|
||||||
|
|
||||||
obj = site2.Golem()
|
|
||||||
news = obj.get_news()
|
|
||||||
|
|
@ -1,135 +0,0 @@
|
||||||
import urllib.request,urllib.parse,urllib.error
|
|
||||||
from urllib.error import HTTPError
|
|
||||||
from urllib.error import URLError
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
|
|
||||||
import networkx as nx
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
class url:
|
|
||||||
|
|
||||||
url = "" # the url of the website to be checked
|
|
||||||
sites = dict() # dic. with all sites and urls on those sites
|
|
||||||
does_work = [] # array with all prev. positiv tested urls
|
|
||||||
does_not_work = dict() # dic. with all not working urls and the site that linked there
|
|
||||||
header_values = {
|
|
||||||
'Connection:' : 'Keep-alive',
|
|
||||||
'name' : 'Michael Foord',
|
|
||||||
'location' : 'Northampton',
|
|
||||||
'language' : 'English',
|
|
||||||
'User-Agent': 'Mozilla 4/0'}
|
|
||||||
|
|
||||||
def __init__(self, url):
|
|
||||||
self.url = urllib.request.urlopen(url).geturl()
|
|
||||||
|
|
||||||
|
|
||||||
def make_url(self, link, start):
|
|
||||||
ret_link = urljoin(start, link)
|
|
||||||
|
|
||||||
return ret_link
|
|
||||||
|
|
||||||
def test_url(self, link, root):
|
|
||||||
|
|
||||||
if link in self.sites or link in self.does_work:
|
|
||||||
return True
|
|
||||||
elif link in self.does_not_work:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
|
|
||||||
header = urllib.parse.urlencode(self.header_values)
|
|
||||||
header=header.encode('ascii')
|
|
||||||
request = urllib.request.Request(link, header)
|
|
||||||
response = urllib.request.urlopen(request)
|
|
||||||
self.does_work.append(link)
|
|
||||||
print(" works " + link)
|
|
||||||
return True
|
|
||||||
|
|
||||||
except (urllib.error.HTTPError, urllib.error.URLError, ValueError):
|
|
||||||
self.does_not_work[link]=root
|
|
||||||
print(" doesn't work " + link)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get_actual_urls(self, links, root):
|
|
||||||
temp_links = []
|
|
||||||
for each_link in links:
|
|
||||||
|
|
||||||
if each_link.startswith("http") | each_link.startswith("//"):
|
|
||||||
temp_links.append(each_link)
|
|
||||||
else:
|
|
||||||
temp_links.append(urljoin(root, each_link))
|
|
||||||
|
|
||||||
for each_temp_link in temp_links:
|
|
||||||
self.test_url(each_temp_link, root)
|
|
||||||
|
|
||||||
return temp_links
|
|
||||||
|
|
||||||
def run_check(self, root=None): # root is the url of the current Site
|
|
||||||
|
|
||||||
if root == None:
|
|
||||||
root = self.url
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if root in self.sites or self.url.rsplit('/', 1)[0] not in root or not self.test_url(root, root):
|
|
||||||
return
|
|
||||||
|
|
||||||
header = urllib.parse.urlencode(self.header_values)
|
|
||||||
header=header.encode('ascii')
|
|
||||||
|
|
||||||
request = urllib.request.Request(root, header)
|
|
||||||
http_response = urllib.request.urlopen(request)
|
|
||||||
root = http_response.geturl()
|
|
||||||
response_data= http_response.read()
|
|
||||||
|
|
||||||
|
|
||||||
links = re.findall(r'href="(.*?)"' , str(response_data))
|
|
||||||
|
|
||||||
links = self.get_actual_urls(links, root)
|
|
||||||
|
|
||||||
|
|
||||||
self.sites[root]=links
|
|
||||||
for each_link in links:
|
|
||||||
self.run_check(each_link)
|
|
||||||
|
|
||||||
|
|
||||||
def graph(self):
|
|
||||||
|
|
||||||
G = nx.Graph(self.sites)
|
|
||||||
|
|
||||||
label_dict = {}
|
|
||||||
for key, value in self.sites.items(): #that's not how it works... todo: later
|
|
||||||
label_dict[key]=self.remove_root(value)
|
|
||||||
|
|
||||||
nx.draw(G, with_labels=True, font_size=8 , node_size=1000, node_color="skyblue", edge_color='#A0FFA2', pos=nx.spring_layout(G))
|
|
||||||
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
def remove_root(self, links):
|
|
||||||
ret_links = []
|
|
||||||
for link in links:
|
|
||||||
ret_links.append(link.rsplit('.', 1)[0])
|
|
||||||
|
|
||||||
return ret_links
|
|
||||||
|
|
||||||
def clean(self):
|
|
||||||
self.sites.clear()
|
|
||||||
self.does_not_work.clear()
|
|
||||||
self.does_work.clear()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
germalemma
|
||||||
|
flask
|
||||||
|
flask-ask
|
||||||
|
nltk
|
||||||
|
lxml
|
||||||
|
urllib
|
||||||
|
yaml
|
||||||
Loading…
Reference in New Issue