From 60496222078ba97111d9927bd110ec5a70b9eb56 Mon Sep 17 00:00:00 2001 From: Askill Date: Sat, 1 Jan 2022 23:43:11 +0100 Subject: [PATCH] added package stuff --- .gitignore | 7 ++ README.md | 4 +- docs/LICENSE.txt | 8 +++ google.png => docs/google.png | Bin setup.py | 63 ++++++++++++++++++ Star.py => src/Star-Mapper/Star.py | 2 +- src/Star-Mapper/__init__.py | 1 + app.py => src/Star-Mapper/main.py | 23 +++---- .../Star-Mapper/templates}/graph.html | 0 9 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 docs/LICENSE.txt rename google.png => docs/google.png (100%) create mode 100644 setup.py rename Star.py => src/Star-Mapper/Star.py (97%) create mode 100644 src/Star-Mapper/__init__.py rename app.py => src/Star-Mapper/main.py (79%) rename {templates => src/Star-Mapper/templates}/graph.html (100%) diff --git a/.gitignore b/.gitignore index 5818221..645b060 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,10 @@ __pycache__/ cached/beauty.json cached/www.budgetbytes.com.json templates/data.js +dist/ +*.egg-info +VERSION +**/__meta__.py +cached/visjs.github.io.json +cached/www.dinneratthezoo.com.json +cached/www.patricematz.de.json diff --git a/README.md b/README.md index ad5aa3d..e74ddbd 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ Calls every link on a given website and produces an explorable graph visualizati Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more. +``` Map any website. Only map websites you own, as this tool will open any link on a given website, which can potentially incure high costs for the owner and be interpreted as a small scale DOS attack. @@ -13,7 +14,8 @@ Please note that the graph layout can take a long time since it is JS based. Loa -url url to map --plot-cached path to cached file -limit maximum number of nodes on original site +``` ## Examples: ### Google.de: -![google.de](./google.png) \ No newline at end of file +![google.de](./docs/google.png) \ No newline at end of file diff --git a/docs/LICENSE.txt b/docs/LICENSE.txt new file mode 100644 index 0000000..2601df6 --- /dev/null +++ b/docs/LICENSE.txt @@ -0,0 +1,8 @@ +Copyright (c) 2021 Patrice Matz +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/google.png b/docs/google.png similarity index 100% rename from google.png rename to docs/google.png diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..11935af --- /dev/null +++ b/setup.py @@ -0,0 +1,63 @@ +# -*- coding: utf8 -*- +# +# This file were created by Python Boilerplate. Use Python Boilerplate to start +# simple, usable and best-practices compliant Python projects. +# +# Learn more about it at: http://github.com/fabiommendes/python-boilerplate/ +# + +import os + +from setuptools import setup, find_packages + +# Meta information +version = open('VERSION').read().strip() +dirname = os.path.dirname(__file__) + +# Save version and author to __meta__.py +path = os.path.join(dirname, 'src', 'Star-Mapper', '__meta__.py') +data = '''# Automatically created. Please do not edit. +__version__ = u'%s' +__author__ = u'Patrice Matz' +''' % version +with open(path, 'wb') as F: + F.write(data.encode()) + +setup( + # Basic info + name='Star-Mapper', + version=version, + author='Patrice Matz', + author_email='mail@patricematz.de', + url='https://github.com/Askill/Star-Mapper', + description='Calls every link on a given website and produces an explorable graph visualization.', + long_description=open('./docs/README.md').read(), + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: GNU General Public License (GPL)', + 'Operating System :: POSIX', + 'Programming Language :: Python', + 'Topic :: Software Development :: Libraries', + ], + keywords=['graph', "web crawler", 'network', 'visualization'], + # Packages and depencies + package_dir={'': 'src'}, + packages=find_packages('src'), + install_requires=[ + "requests", + "lxml", + "urllib3" + ], + # Data files + package_data={ + "docs":["*"] + }, + # Scripts + entry_points={ + }, + + # Other configurations + zip_safe=False, + platforms='any', +) diff --git a/Star.py b/src/Star-Mapper/Star.py similarity index 97% rename from Star.py rename to src/Star-Mapper/Star.py index eb01a1c..8ffd095 100644 --- a/Star.py +++ b/src/Star-Mapper/Star.py @@ -61,7 +61,7 @@ class Crawler: if not clean: continue - self.logger.warning(f"{len(self.links)} {root}") + self.logger.info(f"{len(self.links)} {root}") try: site = requests.get(root) tree = html.fromstring(site.content) diff --git a/src/Star-Mapper/__init__.py b/src/Star-Mapper/__init__.py new file mode 100644 index 0000000..45b599d --- /dev/null +++ b/src/Star-Mapper/__init__.py @@ -0,0 +1 @@ +from main import mapSite \ No newline at end of file diff --git a/app.py b/src/Star-Mapper/main.py similarity index 79% rename from app.py rename to src/Star-Mapper/main.py index dd967ca..74b42f0 100644 --- a/app.py +++ b/src/Star-Mapper/main.py @@ -4,7 +4,7 @@ import json import argparse -def transformForDrawing(n, e): +def transformForPlotting(n, e): nodes = [] drawn = [] edges = [] @@ -33,31 +33,28 @@ def graph(url, limit): obj = Crawler() obj.run(url, limit) - current = os.path.dirname(__file__) + current = os.getcwd() n, e = obj.getNodesEdges() with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f: f.write(json.dumps({"nodes": n, "edges": e})) - nodes, edges = transformForDrawing(n, e) - return nodes, edges + return transformForPlotting(n, e) -def load(url): - print("Loaded from cache: " + url) - current = os.path.dirname(__file__) - with open(os.path.join(current, './cached/{}.json'.format(url)), 'r', encoding='utf-8') as f: +def load(pathToCached): + with open(pathToCached, 'r', encoding='utf-8') as f: content = f.read() jsonContent = json.loads(content) - return transformForDrawing(jsonContent["nodes"], jsonContent["edges"]) + return transformForPlotting(jsonContent["nodes"], jsonContent["edges"]) -def main(url, pathToCached): +def mapSite(url, pathToCached, limit): withoutProtocol = url.split("/")[2] - if pathToCached is not None: + if pathToCached is None: nodes, edges = graph(url, limit) else: - nodes, edges = load(withoutProtocol) + nodes, edges = load(pathToCached) pathToTemplate = os.path.join(os.path.dirname( __file__), "templates", "graph.html") @@ -81,4 +78,4 @@ if __name__ == '__main__': pathToCached = args.plot_cached limit = args.limit - main(url, pathToCached, limit) + mapSite(url, pathToCached, limit) diff --git a/templates/graph.html b/src/Star-Mapper/templates/graph.html similarity index 100% rename from templates/graph.html rename to src/Star-Mapper/templates/graph.html