From edd27b63eb1bb170e762f9ef0ffd7ef8ac0104ec Mon Sep 17 00:00:00 2001 From: Patrice Matz Date: Sat, 24 Nov 2018 20:56:54 +0100 Subject: [PATCH] added ignored words --- .gitignore | 4 ++++ count.py | 31 +++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index f4fa250..11be36a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ \.vscode/ + +test\.txt + +*.txt diff --git a/count.py b/count.py index 93b9e9b..3664ace 100644 --- a/count.py +++ b/count.py @@ -1,6 +1,14 @@ import matplotlib.pyplot as plt +filtered = ["der", "die", "das", "ein", "eine", "einer", "es", "ist", "für", "im", "wird", +"auch", "mit", "aus", "von", "als", "in", "werden", "wurde", "oder", "auf", "wie", "den" , +"zu", "dieser", "nicht", "sind", "des", "einen", "um", "können" , "nur", "diese", "wird", +"eines", "über", "hier", "dem", "so", "werde," ,"werde.", "werden." ,"dies", "muss", "alle", +"an" , "das", "der", "nach", "zum", "gibt", "da", "mehr", "dass", "gibt", "zum" ] + + + def main(): filename = "test.txt" allWords = {} @@ -17,21 +25,28 @@ def main(): counter+=1 # x,y,z for ribbon Plot - wordArray = [] #x - indexes = [] #y - counts = [] #z + wordArray = [] #x + #indexes = [] #y + #counts = [] #z + + + for word in allWords: - if len(allWords[word]) > 10: + if len(allWords[word]) > 10 and word.lower() not in filtered: tmpArray = [] for index in allWords[word]: + tmpArray.append(len(tmpArray)) - counts.append(tmpArray) - indexes.append(allWords[word]) + #counts.append(tmpArray) + #indexes.append(allWords[word]) wordArray.append(word) - plt.plot(allWords[word], tmpArray) - + plt.scatter(allWords[word], tmpArray) + print(word, len(tmpArray)) + #plt.yscale('log') + + plt.legend(wordArray) plt.show()