diff --git a/README.md b/README.md index f0400e1..cfda913 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ -words-over-length +# Wordcount +Plots the use of words in a text over time. +![](Unbenannt.PNG) \ No newline at end of file diff --git a/Unbenannt.PNG b/Unbenannt.PNG new file mode 100644 index 0000000..3aa7c03 Binary files /dev/null and b/Unbenannt.PNG differ diff --git a/count.py b/count.py new file mode 100644 index 0000000..d05fa17 --- /dev/null +++ b/count.py @@ -0,0 +1,42 @@ + +import matplotlib.pyplot as plt +import plotly.plotly as py + +filePath = "./test.docx" + +def main(): + #print(getTxt(filePath)) + filename = "test.txt" + allWords = {} + counter = 0 + for line in open(filename, 'r'): + words = line.split(' ') + if '\n' in words: + words.remove("\n") + for word in words: + if word not in allWords: + allWords[word] = [counter] + else: + allWords[word].append(counter) + counter+=1 + + wordArray = [] #x + indexes = [] #y + counts = [] #z + for word in allWords: + if len(allWords[word]) > 10: + tmpArray = [] + for index in allWords[word]: + tmpArray.append(len(tmpArray)) + + counts.append(tmpArray) + indexes.append(allWords[word]) + + plt.plot(allWords[word], tmpArray) + wordArray.append(word) + plt.legend(wordArray) + plt.show() + + +if __name__ == "__main__": + main() \ No newline at end of file