added cli, removed flask

This commit is contained in:
Askill 2022-01-01 16:51:10 +01:00
parent 2c4faeeafd
commit 715269e726
5 changed files with 36 additions and 63 deletions

22
Star.py
View File

@ -1,13 +1,8 @@
from time import sleep, time
from time import sleep
from urllib.parse import urljoin
from lxml import html
from networkx.readwrite.json_graph import tree
import requests
import logging
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
import os
class Crawler:
@ -102,18 +97,3 @@ class Crawler:
g.add_node(node)
for f, t in edges:
g.add_edge(f, t)
def draw(self):
net = Network(directed=True, layout=False, bgcolor="black", font_color="white")
G = nx.DiGraph()
self.makeGraph(G)
net.from_nx(G)
net.height = "100%"
net.width = "100%"
net.margin = "0"
net.padding = "0"
net.show(os.path.join(os.path.dirname(__file__), './mygraph.html'))

53
app.py
View File

@ -1,13 +1,8 @@
from flask import Flask, request, render_template
import os
from Star import Crawler
import json
import sys
#----------------------------------------------------------------------------#
# App Config.
#----------------------------------------------------------------------------#
import argparse
app = Flask(__name__)
def transformForDrawing(n, e):
nodes = []
@ -33,9 +28,10 @@ def transformForDrawing(n, e):
return nodes, edges
def graph(url):
def graph(url, limit):
obj = Crawler()
obj.run(url, 5000)
obj.run(url, limit)
current = os.path.dirname(__file__)
n, e = obj.getNodesEdges()
@ -54,32 +50,35 @@ def load(url):
jsonContent = json.loads(content)
return transformForDrawing(jsonContent["nodes"], jsonContent["edges"])
#----------------------------------------------------------------------------#
# Controllers.
#----------------------------------------------------------------------------#
# input for urls over url
@app.route('/')
def index():
url = request.args.get("url")
cached = os.listdir(os.path.join(os.path.dirname(__file__), "./cached"))
def main(url, pathToCached):
withoutProtocol = url.split("/")[2]
if withoutProtocol + '.json' not in cached:
nodes, edges = graph(url)
if pathToCached is not None:
nodes, edges = graph(url, limit)
else:
nodes, edges = load(withoutProtocol)
print(url)
return render_template('graph.html', nodes = json.dumps(nodes), edges = json.dumps(edges))
pathToTemplate = os.path.join(os.path.dirname(
__file__), "templates", "graph.html")
with open(pathToTemplate, "rt") as fin:
with open(withoutProtocol + ".html", "wt") as fout:
fout.write(fin.read().replace('{{nodes}}', json.dumps(
nodes)).replace('{{edges}}', json.dumps(edges)))
if __name__ == '__main__':
port = int(os.environ.get('PORT', 80))
app.run(host='0.0.0.0', port=port)
parser = argparse.ArgumentParser(
description='Map any website. Only map websites you own, as this tool will open any link on a given website, which can potentially incure high costs for the owner and be interpreted as a small scale DOS attack.')
parser.add_argument('-url', type=str, help='url to map', required=True)
parser.add_argument('--plot-cached', type=str,
help='path to cached file', required=False)
parser.add_argument(
'-limit', type=str, help='maximum number of nodes on original site', required=False, default=5000)
args = parser.parse_args()
url = args.url
pathToCached = args.plot_cached
limit = args.limit
main(url, pathToCached, limit)

View File

@ -1,3 +1,3 @@
Flask-WTF
requests
lxml
urllib3

View File

@ -101,9 +101,9 @@
function draw() {
var color = 'gray';
var nodes = {{ nodes | safe }} ;
var nodes = {{nodes}} ;
var edges = {{ edges | safe }} ;
var edges = {{edges}} ;
// create a network
var container = document.getElementById('mynetwork');
var data = {

6
x.py
View File

@ -1,6 +0,0 @@
import Star
crawler = Star.Crawler()
crawler.run("https://www.google.de/", 5000)
print(crawler.getNodesEdges())
crawler.draw()