From edd27b63eb1bb170e762f9ef0ffd7ef8ac0104ec Mon Sep 17 00:00:00 2001
From: Patrice Matz
Date: Sat, 24 Nov 2018 20:56:54 +0100
Subject: [PATCH] added ignored words
---
.gitignore | 4 ++++
count.py | 31 +++++++++++++++++++++++--------
2 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/.gitignore b/.gitignore
index f4fa250..11be36a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,6 @@
\.vscode/
+
+test\.txt
+
+*.txt
diff --git a/count.py b/count.py
index 93b9e9b..3664ace 100644
--- a/count.py
+++ b/count.py
@@ -1,6 +1,14 @@
import matplotlib.pyplot as plt
+filtered = ["der", "die", "das", "ein", "eine", "einer", "es", "ist", "für", "im", "wird",
+"auch", "mit", "aus", "von", "als", "in", "werden", "wurde", "oder", "auf", "wie", "den" ,
+"zu", "dieser", "nicht", "sind", "des", "einen", "um", "können" , "nur", "diese", "wird",
+"eines", "über", "hier", "dem", "so", "werde," ,"werde.", "werden." ,"dies", "muss", "alle",
+"an" , "das", "der", "nach", "zum", "gibt", "da", "mehr", "dass", "gibt", "zum" ]
+
+
+
def main():
filename = "test.txt"
allWords = {}
@@ -17,21 +25,28 @@ def main():
counter+=1
# x,y,z for ribbon Plot
- wordArray = [] #x
- indexes = [] #y
- counts = [] #z
+ wordArray = [] #x
+ #indexes = [] #y
+ #counts = [] #z
+
+
+
for word in allWords:
- if len(allWords[word]) > 10:
+ if len(allWords[word]) > 10 and word.lower() not in filtered:
tmpArray = []
for index in allWords[word]:
+
tmpArray.append(len(tmpArray))
- counts.append(tmpArray)
- indexes.append(allWords[word])
+ #counts.append(tmpArray)
+ #indexes.append(allWords[word])
wordArray.append(word)
- plt.plot(allWords[word], tmpArray)
-
+ plt.scatter(allWords[word], tmpArray)
+ print(word, len(tmpArray))
+ #plt.yscale('log')
+
+
plt.legend(wordArray)
plt.show()