added cli, removed flask

This commit is contained in:
Askill 2022-01-01 16:51:10 +01:00
parent 2c4faeeafd
commit 715269e726
5 changed files with 36 additions and 63 deletions

26
Star.py
View File

@ -1,13 +1,8 @@
from time import sleep, time from time import sleep
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import html from lxml import html
from networkx.readwrite.json_graph import tree
import requests import requests
import logging import logging
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import os
class Crawler: class Crawler:
@ -74,7 +69,7 @@ class Crawler:
except: except:
continue continue
nlinks=[] nlinks = []
for link in links: for link in links:
if link not in nlinks: if link not in nlinks:
if link.startswith("http"): if link.startswith("http"):
@ -101,19 +96,4 @@ class Crawler:
for node in nodes: for node in nodes:
g.add_node(node) g.add_node(node)
for f, t in edges: for f, t in edges:
g.add_edge(f,t) g.add_edge(f, t)
def draw(self):
net = Network(directed=True, layout=False, bgcolor="black", font_color="white")
G = nx.DiGraph()
self.makeGraph(G)
net.from_nx(G)
net.height = "100%"
net.width = "100%"
net.margin = "0"
net.padding = "0"
net.show(os.path.join(os.path.dirname(__file__), './mygraph.html'))

57
app.py
View File

@ -1,13 +1,8 @@
from flask import Flask, request, render_template
import os import os
from Star import Crawler from Star import Crawler
import json import json
import sys import argparse
#----------------------------------------------------------------------------#
# App Config.
#----------------------------------------------------------------------------#
app = Flask(__name__)
def transformForDrawing(n, e): def transformForDrawing(n, e):
nodes = [] nodes = []
@ -33,14 +28,15 @@ def transformForDrawing(n, e):
return nodes, edges return nodes, edges
def graph(url):
def graph(url, limit):
obj = Crawler() obj = Crawler()
obj.run(url, 5000) obj.run(url, limit)
current = os.path.dirname(__file__) current = os.path.dirname(__file__)
n, e = obj.getNodesEdges() n, e = obj.getNodesEdges()
with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f: with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f:
f.write(json.dumps({"nodes": n,"edges": e})) f.write(json.dumps({"nodes": n, "edges": e}))
nodes, edges = transformForDrawing(n, e) nodes, edges = transformForDrawing(n, e)
return nodes, edges return nodes, edges
@ -49,37 +45,40 @@ def graph(url):
def load(url): def load(url):
print("Loaded from cache: " + url) print("Loaded from cache: " + url)
current = os.path.dirname(__file__) current = os.path.dirname(__file__)
with open(os.path.join(current,'./cached/{}.json'.format(url)), 'r', encoding='utf-8') as f: with open(os.path.join(current, './cached/{}.json'.format(url)), 'r', encoding='utf-8') as f:
content = f.read() content = f.read()
jsonContent = json.loads(content) jsonContent = json.loads(content)
return transformForDrawing(jsonContent["nodes"], jsonContent["edges"]) return transformForDrawing(jsonContent["nodes"], jsonContent["edges"])
#----------------------------------------------------------------------------#
# Controllers.
#----------------------------------------------------------------------------#
# input for urls over url
@app.route('/') def main(url, pathToCached):
def index():
url = request.args.get("url")
cached = os.listdir(os.path.join(os.path.dirname(__file__), "./cached"))
withoutProtocol = url.split("/")[2] withoutProtocol = url.split("/")[2]
if withoutProtocol + '.json' not in cached:
nodes, edges = graph(url) if pathToCached is not None:
nodes, edges = graph(url, limit)
else: else:
nodes, edges = load(withoutProtocol) nodes, edges = load(withoutProtocol)
pathToTemplate = os.path.join(os.path.dirname(
print(url) __file__), "templates", "graph.html")
return render_template('graph.html', nodes = json.dumps(nodes), edges = json.dumps(edges)) with open(pathToTemplate, "rt") as fin:
with open(withoutProtocol + ".html", "wt") as fout:
fout.write(fin.read().replace('{{nodes}}', json.dumps(
nodes)).replace('{{edges}}', json.dumps(edges)))
if __name__ == '__main__': if __name__ == '__main__':
port = int(os.environ.get('PORT', 80)) parser = argparse.ArgumentParser(
app.run(host='0.0.0.0', port=port) description='Map any website. Only map websites you own, as this tool will open any link on a given website, which can potentially incure high costs for the owner and be interpreted as a small scale DOS attack.')
parser.add_argument('-url', type=str, help='url to map', required=True)
parser.add_argument('--plot-cached', type=str,
help='path to cached file', required=False)
parser.add_argument(
'-limit', type=str, help='maximum number of nodes on original site', required=False, default=5000)
args = parser.parse_args()
url = args.url
pathToCached = args.plot_cached
limit = args.limit
main(url, pathToCached, limit)

View File

@ -1,3 +1,3 @@
Flask-WTF
requests requests
lxml
urllib3

View File

@ -101,9 +101,9 @@
function draw() { function draw() {
var color = 'gray'; var color = 'gray';
var nodes = {{ nodes | safe }} ; var nodes = {{nodes}} ;
var edges = {{ edges | safe }} ; var edges = {{edges}} ;
// create a network // create a network
var container = document.getElementById('mynetwork'); var container = document.getElementById('mynetwork');
var data = { var data = {

6
x.py
View File

@ -1,6 +0,0 @@
import Star
crawler = Star.Crawler()
crawler.run("https://www.google.de/", 5000)
print(crawler.getNodesEdges())
crawler.draw()