added package stuff

2022-01-01 23:43:11 +01:00 · 2022-01-01 23:43:11 +01:00 · 6049622207
parent 3c33441380
commit 6049622207
9 changed files with 93 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,10 @@ __pycache__/
 cached/beauty.json
 cached/www.budgetbytes.com.json
 templates/data.js
+dist/
+*.egg-info
+VERSION
+**/__meta__.py
+cached/visjs.github.io.json
+cached/www.dinneratthezoo.com.json
+cached/www.patricematz.de.json
--- a/README.md
+++ b/README.md
@ -4,6 +4,7 @@ Calls every link on a given website and produces an explorable graph visualizati

 Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more.

+``` 
    Map any website. Only map websites you own, as this tool will open any link on a given
    website, which can potentially incure high costs for the owner and be interpreted 
    as a small scale DOS attack.
@ -13,7 +14,8 @@ Please note that the graph layout can take a long time since it is JS based. Loa
        -url                  url to map
        --plot-cached         path to cached file
        -limit                maximum number of nodes on original site
+```

 ## Examples:
 ### Google.de:
-![google.de](./google.png)
+![google.de](./docs/google.png)
--- a/docs/LICENSE.txt
+++ b/docs/LICENSE.txt
@ -0,0 +1,8 @@
+Copyright (c) 2021 Patrice Matz
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/docs/google.png
+++ b/docs/google.png
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,63 @@
+# -*- coding: utf8 -*-
+#
+# This file were created by Python Boilerplate. Use Python Boilerplate to start
+# simple, usable and best-practices compliant Python projects.
+#
+# Learn more about it at: http://github.com/fabiommendes/python-boilerplate/
+#
+
+import os
+
+from setuptools import setup, find_packages
+
+# Meta information
+version = open('VERSION').read().strip()
+dirname = os.path.dirname(__file__)
+
+# Save version and author to __meta__.py
+path = os.path.join(dirname, 'src', 'Star-Mapper', '__meta__.py')
+data = '''# Automatically created. Please do not edit.
+__version__ = u'%s'
+__author__ = u'Patrice Matz'
+''' % version
+with open(path, 'wb') as F:
+    F.write(data.encode())
+
+setup(
+    # Basic info
+    name='Star-Mapper',
+    version=version,
+    author='Patrice Matz',
+    author_email='mail@patricematz.de',
+    url='https://github.com/Askill/Star-Mapper',
+    description='Calls every link on a given website and produces an explorable graph visualization.',
+    long_description=open('./docs/README.md').read(),
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: GNU General Public License (GPL)',
+        'Operating System :: POSIX',
+        'Programming Language :: Python',
+        'Topic :: Software Development :: Libraries',
+    ],
+    keywords=['graph', "web crawler",  'network', 'visualization'],
+    # Packages and depencies
+    package_dir={'': 'src'},
+    packages=find_packages('src'),
+    install_requires=[
+        "requests",
+        "lxml",
+        "urllib3"
+    ],
+    # Data files
+    package_data={
+        "docs":["*"]
+    },
+    # Scripts
+    entry_points={
+    },
+
+    # Other configurations
+    zip_safe=False,
+    platforms='any',
+)
--- a/src/Star-Mapper/Star.py
+++ b/src/Star-Mapper/Star.py
@ -61,7 +61,7 @@ class Crawler:
            if not clean:
                continue

-            self.logger.warning(f"{len(self.links)} {root}")
+            self.logger.info(f"{len(self.links)} {root}")
            try:
                site = requests.get(root)
                tree = html.fromstring(site.content)
--- a/src/Star-Mapper/init.py
+++ b/src/Star-Mapper/init.py
@ -0,0 +1 @@
+from main import mapSite
--- a/src/Star-Mapper/main.py
+++ b/src/Star-Mapper/main.py
@ -4,7 +4,7 @@ import json
 import argparse


-def transformForDrawing(n, e):
+def transformForPlotting(n, e):
    nodes = []
    drawn = []
    edges = []
@ -33,31 +33,28 @@ def graph(url, limit):
    obj = Crawler()
    obj.run(url, limit)

-    current = os.path.dirname(__file__)
+    current = os.getcwd()
    n, e = obj.getNodesEdges()
    with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f:
        f.write(json.dumps({"nodes": n, "edges": e}))

-    nodes, edges = transformForDrawing(n, e)
-    return nodes, edges
+    return transformForPlotting(n, e)


-def load(url):
-    print("Loaded from cache: " + url)
-    current = os.path.dirname(__file__)
-    with open(os.path.join(current, './cached/{}.json'.format(url)),  'r', encoding='utf-8') as f:
+def load(pathToCached):
+    with open(pathToCached,  'r', encoding='utf-8') as f:
        content = f.read()
        jsonContent = json.loads(content)
-        return transformForDrawing(jsonContent["nodes"], jsonContent["edges"])
+        return transformForPlotting(jsonContent["nodes"], jsonContent["edges"])


-def main(url, pathToCached):
+def mapSite(url, pathToCached, limit):
    withoutProtocol = url.split("/")[2]

-    if pathToCached is not None:
+    if pathToCached is None:
        nodes, edges = graph(url, limit)
    else:
-        nodes, edges = load(withoutProtocol)
+        nodes, edges = load(pathToCached)

    pathToTemplate = os.path.join(os.path.dirname(
        __file__), "templates", "graph.html")
@ -81,4 +78,4 @@ if __name__ == '__main__':
    pathToCached = args.plot_cached
    limit = args.limit

-    main(url, pathToCached, limit)
+    mapSite(url, pathToCached, limit)
--- a/src/Star-Mapper/templates/graph.html
+++ b/src/Star-Mapper/templates/graph.html