2018-11-18 08:24:34 +00:00
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
2018-11-24 19:56:54 +00:00
|
|
|
filtered = ["der", "die", "das", "ein", "eine", "einer", "es", "ist", "für", "im", "wird",
|
|
|
|
|
"auch", "mit", "aus", "von", "als", "in", "werden", "wurde", "oder", "auf", "wie", "den" ,
|
|
|
|
|
"zu", "dieser", "nicht", "sind", "des", "einen", "um", "können" , "nur", "diese", "wird",
|
|
|
|
|
"eines", "über", "hier", "dem", "so", "werde," ,"werde.", "werden." ,"dies", "muss", "alle",
|
|
|
|
|
"an" , "das", "der", "nach", "zum", "gibt", "da", "mehr", "dass", "gibt", "zum" ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2018-11-18 08:24:34 +00:00
|
|
|
def main():
|
|
|
|
|
filename = "test.txt"
|
|
|
|
|
allWords = {}
|
|
|
|
|
counter = 0
|
|
|
|
|
for line in open(filename, 'r'):
|
|
|
|
|
words = line.split(' ')
|
|
|
|
|
if '\n' in words:
|
|
|
|
|
words.remove("\n")
|
|
|
|
|
for word in words:
|
|
|
|
|
if word not in allWords:
|
|
|
|
|
allWords[word] = [counter]
|
|
|
|
|
else:
|
|
|
|
|
allWords[word].append(counter)
|
|
|
|
|
counter+=1
|
2018-11-18 08:29:29 +00:00
|
|
|
|
|
|
|
|
# x,y,z for ribbon Plot
|
2018-11-24 19:56:54 +00:00
|
|
|
wordArray = [] #x
|
|
|
|
|
#indexes = [] #y
|
|
|
|
|
#counts = [] #z
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2018-11-18 08:24:34 +00:00
|
|
|
for word in allWords:
|
2018-11-24 19:56:54 +00:00
|
|
|
if len(allWords[word]) > 10 and word.lower() not in filtered:
|
2018-11-18 08:24:34 +00:00
|
|
|
tmpArray = []
|
|
|
|
|
for index in allWords[word]:
|
2018-11-24 19:56:54 +00:00
|
|
|
|
2018-11-18 08:24:34 +00:00
|
|
|
tmpArray.append(len(tmpArray))
|
|
|
|
|
|
2018-11-24 19:56:54 +00:00
|
|
|
#counts.append(tmpArray)
|
|
|
|
|
#indexes.append(allWords[word])
|
2018-11-18 08:24:34 +00:00
|
|
|
wordArray.append(word)
|
2018-11-18 08:29:29 +00:00
|
|
|
|
2018-11-24 19:56:54 +00:00
|
|
|
plt.scatter(allWords[word], tmpArray)
|
|
|
|
|
print(word, len(tmpArray))
|
|
|
|
|
#plt.yscale('log')
|
|
|
|
|
|
|
|
|
|
|
2018-11-18 08:24:34 +00:00
|
|
|
plt.legend(wordArray)
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|