persist + edge nodes

This commit is contained in:
Patrice 2019-05-01 14:26:11 +02:00
parent 7fa4b4dcfd
commit 285d1cc6c5
5 changed files with 7804 additions and 257 deletions

69
app.py
View File

@ -4,6 +4,7 @@ import urlchecker
import sitemapper import sitemapper
import _pickle as cPickle import _pickle as cPickle
import json import json
import sys
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
# App Config. # App Config.
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
@ -21,21 +22,32 @@ def index():
obj = sitemapper.url(url) obj = sitemapper.url(url)
obj.run_check(url) obj.run_check(url)
with open('your_file.txt', 'w') as f:
for item in obj.sites:
f.write("%s\n" % item)
nodes = "" nodes = ""
for link in obj.sites: drawn = []
nodes += '{' + 'id: "{}", label: "{}", group: {}'.format(link, link.rsplit('/')[-1], 0) + '},' for key, values in obj.sites.items():
nodes = nodes[:-1] label = key.rsplit('/')[-1]
if label == "":
label = key.rsplit('/')[-2]
nodes += '{' + 'id: "{}", label: "{}", group: {}'.format(key, label, 0) + '},\n'
drawn.append(key)
for key, values in obj.sites.items():
for value in values:
if value not in drawn and value not in obj.sites:
nodes += '{' + 'id: "{}", label: "{}", group: {}'.format(value, value, 1) + '},\n'
drawn.append(value)
nodes = nodes[:-2] + "\n"
edges = "" edges = ""
for key, values in obj.sites.items(): for key, values in obj.sites.items():
for value in values: for value in values:
edges += '{' + 'from: "{}", to: "{}"'.format(key, value) + '},' edges += '{' + 'from: "{}", to: "{}"'.format(key, value) + '},\n'
edges = edges[:-1] edges = edges[:-2] + "\n"
with open('./cached/' + url.rsplit('/')[2] + '.txt', 'w') as f:
f.write(nodes)
f.write(edges)
results = ''' results = '''
<script src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"></script>
@ -60,36 +72,40 @@ def index():
var options = { var options = {
autoResize: true, autoResize: true,
layout: { layout: {
improvedLayout:false, improvedLayout:true,
randomSeed: undefined, randomSeed: 10,
hierarchical: {
enabled:false,
levelSeparation: 150,
nodeSpacing: 100,
treeSpacing: 200,
blockShifting: true,
edgeMinimization: true,
parentCentralization: true,
direction: 'UD', // UD, DU, LR, RL
sortMethod: 'hubsize' // hubsize, directed
}
}, },
height: '100%', height: '100%',
width: '100%', width: '100%',
nodes: { nodes: {
shape: 'dot', shape: 'dot',
size: 30, size: 8,
font: { font: {
size: 32, size: 5,
color: '#ffffff' color: '#ffffff'
}, },
borderWidth: 2 borderWidth: 1
}, },
edges: { edges: {
width: 2 width: 1,
color: {
color:'#356b6b',
highlight:'#4286f4',
hover: '#41f4f4',
inherit: 'from',
opacity:1.0
},
},
interaction: {
hoverConnectedEdges: true,
tooltipDelay: 200
} }
}; };
network = new vis.Network(container, data, options); network = new vis.Network(container, data, options);
network.on("stabilizationIterationsDone", function () {
network.setOptions( { physics: false } );
});
</script> </script>
''' '''
return results return results
@ -97,6 +113,7 @@ def index():
if __name__ == '__main__': if __name__ == '__main__':
port = int(os.environ.get('PORT', 80)) port = int(os.environ.get('PORT', 80))
sys.setrecursionlimit(2000)
app.run(host='0.0.0.0', port=port) app.run(host='0.0.0.0', port=port)

7758
cached/www.google.de.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -7,8 +7,6 @@ class url:
url = "" # the url of the website to be checked url = "" # the url of the website to be checked
sites = dict() # dic. with all sites and urls on those sites sites = dict() # dic. with all sites and urls on those sites
does_work = [] # array with all prev. positiv tested urls
does_not_work = dict() # dic. with all not working urls and the site that linked there
header_values = { header_values = {
'Connection:' : 'Keep-alive', 'Connection:' : 'Keep-alive',
'name' : 'Michael Foord', 'name' : 'Michael Foord',
@ -32,7 +30,8 @@ class url:
if root in self.sites or self.url.rsplit('/')[2] not in root: if root in self.sites or self.url.rsplit('/')[2] not in root:
#print(self.url.rsplit('/')[2]) #print(self.url.rsplit('/')[2])
return return
if "https" not in root:
return
for element in self.exclude: for element in self.exclude:
if element in root: if element in root:
return return

View File

@ -1,227 +0,0 @@
https://www.andreasgehrke.com/proj/idee/idee_09/idee_09_01.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_01.html
https://www.andreasgehrke.com/proj/idee/idee_18/idee_18_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_07/real_proj_07_01.html
http://www.andreasgehrke.com/proj/idee/idee_04/idee_04_02.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_01.html
http://www.andreasgehrke.com/proj/idee/idee_07.html#i07
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_04.html
http://www.andreasgehrke.com/kont/DSH.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_06.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_03.html
https://www.andreasgehrke.com/proj/idee/idee_02/idee_02_01.html
https://www.andreasgehrke.com/proj/idee/idee_03/idee_03_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_09.html
https://www.andreasgehrke.com/proj/idee/idee_19/idee_19_03.html
http://www.andreasgehrke.com/proj/idee/idee_03/idee_03_03.html
https://www.andreasgehrke.com/proj/idee/idee_04/idee_04_03.html
http://www.andreasgehrke.com/kont/DS.html
http://www.andreasgehrke.com/proj/idee/idee_16/idee_16_02.html
https://www.andreasgehrke.com/proj/idee/idee_06.html#i06
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_03.html
https://www.andreasgehrke.com/proj/idee/idee_03/idee_03_01.html
http://www.andreasgehrke.com/prof/leis/leis.html
http://www.andreasgehrke.com/proj/proj.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_01.html
https://www.andreasgehrke.com/proj/idee/idee_11/idee_11_01.html
http://www.andreasgehrke.com/proj/proj_21/proj_21_01.html
http://www.andreasgehrke.com/proj/idee/idee_09/idee_09_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_05.html
https://www.andreasgehrke.com/proj/idee/idee_07.html#i07
http://www.andreasgehrke.com/prof/news/images/pdf2.pdf
http://www.andreasgehrke.com/proj/proj_21/proj_21_04.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_05.html
https://www.andreasgehrke.com/proj/idee/idee_04/idee_04_01.html
http://www.andreasgehrke.com/proj/proj_22/proj_22_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_13.html
https://www.andreasgehrke.com/kont/DS.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_07.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_04.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_08/real_proj_08_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_08/real_proj_08_03.html
http://www.andreasgehrke.com/kont/buer/buer.html
http://www.andreasgehrke.com/proj/idee/idee_01.html#i01
http://www.andreasgehrke.com/proj/idee/idee_06.html#i06
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_03.html
http://www.andreasgehrke.com/proj/idee/idee_19/idee_19_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_07.html
https://www.andreasgehrke.com/prof/news/images/pdf2.pdf
https://www.andreasgehrke.com/proj/idee/idee_03/idee_03_03.html
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_05.html
https://www.andreasgehrke.com/proj/proj_22/proj_22_03.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_08.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_01.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_13.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_10.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_08.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_10.html
http://www.andreasgehrke.com/index.html
https://www.andreasgehrke.com/images/ver/e2.pdf
https://www.andreasgehrke.com/prof/ver/ver.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_09/real_proj_09_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_07.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_01.html
https://www.andreasgehrke.com/proj/idee/idee_04/idee_04_02.html
https://www.andreasgehrke.com/proj/proj_21/proj_21_03.html
https://www.andreasgehrke.com/kont/DSH.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_05.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_07/real_proj_07_01.html
http://www.andreasgehrke.com/proj/idee/idee_03/idee_03_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_04.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_08.html
http://www.andreasgehrke.com/prof/ver/ver.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_08/real_proj_08_02.html
https://www.andreasgehrke.com/proj/proj_23/proj_23_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_01.html
https://www.andreasgehrke.com/proj/idee/idee_19/idee_19_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_12.html
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_04.html
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_06.html
http://www.andreasgehrke.com/proj/idee/idee_02/idee_02_01.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_09.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_03.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_11.html#r11
https://www.andreasgehrke.com/proj/idee/idee_08.html#i08
http://www.andreasgehrke.com/proj/idee/idee_09/idee_09_04.html
https://www.andreasgehrke.com/index.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_07.html
https://www.andreasgehrke.com/proj/idee/idee_09/idee_09_02.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_07.html
https://www.andreasgehrke.com/proj/proj_21/proj_21_04.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_04.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_12.html
http://www.andreasgehrke.com/proj/idee/idee_02/idee_02_04.html
https://www.andreasgehrke.com/proj/idee/idee_16/idee_16_01.html
https://www.andreasgehrke.com/proj/proj_22/proj_22_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_08/real_proj_08_01.html
http://www.andreasgehrke.com/proj/idee/idee_02/idee_02_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_10/real_proj_10_01.html
http://www.andreasgehrke.com/prof/pers/pers.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_06.html
http://www.andreasgehrke.com/proj/idee/idee_04/idee_04_03.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_07.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_07.html
https://www.andreasgehrke.com/proj/proj.html
https://www.andreasgehrke.com/proj/proj_22/proj_22_02.html
http://www.andreasgehrke.com/proj/proj_22/proj_22_02.html
https://www.andreasgehrke.com/proj/idee/idee_19/idee_19_02.html
https://www.andreasgehrke.com/kont/impr.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_04.html
http://www.andreasgehrke.com/proj/idee/idee_09/idee_09_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_04.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_11.html#r11
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_05.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_09/real_proj_09_01.html
https://www.andreasgehrke.com/prof/leis/leis.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_11.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_04.html
http://www.andreasgehrke.com/proj/proj_23/proj_23_01.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_06.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_05.html
http://www.andreasgehrke.com/proj/idee/idee_18/idee_18_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_03.html
http://www.andreasgehrke.com/proj/idee/idee_04/idee_04_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_05.html
https://www.andreasgehrke.com/proj/idee/idee_05.html#i05
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_05.html
https://www.andreasgehrke.com
http://www.andreasgehrke.com/proj/idee/idee_05.html#i05
http://www.andreasgehrke.com/proj/proj_22/proj_22_03.html
https://www.andreasgehrke.com/proj/idee/idee_09/idee_09_04.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_11.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_07.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_08/real_proj_08_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_10.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_04.html
https://www.andreasgehrke.com/proj/idee/idee_20/idee_20.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_10.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_01.html
http://www.andreasgehrke.com/proj/idee/idee_02/idee_02_02.html
https://www.andreasgehrke.com/proj/idee/idee_17/idee_17_01.html
http://www.andreasgehrke.com/proj/idee/idee_20/idee_20.html
https://www.andreasgehrke.com/proj/idee/idee_11/idee_11_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_08.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_01.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_08.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_03.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_03.html
https://www.andreasgehrke.com/prof/pers/pers.html
http://www.andreasgehrke.com
http://www.andreasgehrke.com/proj/idee/idee_17/idee_17_01.html
http://www.andreasgehrke.com/images/ver/e2.pdf
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_01.html
http://www.andreasgehrke.com/proj/idee/idee_11/idee_11_02.html
http://www.andreasgehrke.com/proj/idee/idee_11/idee_11_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_02.html
http://www.andreasgehrke.com/proj/idee/idee_11/idee_11_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_03.html
https://www.andreasgehrke.com/prof/ref/ref.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_07.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_06.html
https://www.andreasgehrke.com/proj/idee/idee_01.html#i01
http://www.andreasgehrke.com/proj/proj_21/proj_21_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_06.html
http://www.andreasgehrke.com/prof/news/news.html
http://www.andreasgehrke.com/prof/ref/ref.html
http://www.andreasgehrke.com/proj/proj_23/proj_23_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_02.html
http://www.andreasgehrke.com/proj/ref_anfrage.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_05.html
https://www.andreasgehrke.com/proj/idee/idee_02/idee_02_04.html
http://www.andreasgehrke.com/proj/idee/idee_08.html#i08
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_03.html
http://www.andreasgehrke.com/proj/proj_21/proj_21_03.html
https://www.andreasgehrke.com/proj/idee/idee_09/idee_09_03.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_05.html
https://www.andreasgehrke.com/proj/idee/idee_16/idee_16_02.html
https://www.andreasgehrke.com/prof/news/news.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_04.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_09.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_06.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_09.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_08.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_05.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_06.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_02.html
http://www.andreasgehrke.com/prof/moti/moti.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_08.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_08/real_proj_08_02.html
http://www.andreasgehrke.com/proj/idee/idee_16/idee_16_01.html
http://www.andreasgehrke.com/proj/idee/idee_01/idee_02_04.html
http://www.andreasgehrke.com/kont/impr.html
http://www.andreasgehrke.com/proj/idee/idee_18/idee_18_01.html
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_07.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_06.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_02.html
https://www.andreasgehrke.com/proj/proj_21/proj_21_01.html
https://www.andreasgehrke.com/proj/idee/idee_02/idee_02_03.html
https://www.andreasgehrke.com/kont/buer/buer.html
https://www.andreasgehrke.com/proj/idee/idee_02/idee_02_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_05.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_12/real_proj_12_04.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_06.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_04/real_proj_04_01.html
https://www.andreasgehrke.com/prof/moti/moti.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_01/real_proj_01_02.html
http://www.andreasgehrke.com/proj/idee/idee_03/idee_03_01.html
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_02.html
https://www.andreasgehrke.com/proj/idee/idee_18/idee_18_02.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_01.html
http://www.andreasgehrke.com/proj/idee/idee_19/idee_19_01.html
https://www.andreasgehrke.com/proj/idee/idee_11/idee_11_02.html
http://www.andreasgehrke.com/proj/real_proj/real_proj_06/real_proj_06_02.html
https://www.andreasgehrke.com/proj/proj_23/proj_23_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_10/real_proj_10_01.html
https://www.andreasgehrke.com/proj/idee/idee_01/idee_02_01.html
https://www.andreasgehrke.com/proj/proj_21/proj_21_02.html
http://www.andreasgehrke.com/proj/idee/idee_19/idee_19_03.html
http://www.andreasgehrke.com/proj/idee/idee_09/idee_09_01.html
https://www.andreasgehrke.com/proj/real_proj/real_proj_05/real_proj_05_08.html