added package stuff
This commit is contained in:
parent
3c33441380
commit
6049622207
|
|
@ -4,3 +4,10 @@ __pycache__/
|
|||
cached/beauty.json
|
||||
cached/www.budgetbytes.com.json
|
||||
templates/data.js
|
||||
dist/
|
||||
*.egg-info
|
||||
VERSION
|
||||
**/__meta__.py
|
||||
cached/visjs.github.io.json
|
||||
cached/www.dinneratthezoo.com.json
|
||||
cached/www.patricematz.de.json
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ Calls every link on a given website and produces an explorable graph visualizati
|
|||
|
||||
Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more.
|
||||
|
||||
```
|
||||
Map any website. Only map websites you own, as this tool will open any link on a given
|
||||
website, which can potentially incure high costs for the owner and be interpreted
|
||||
as a small scale DOS attack.
|
||||
|
|
@ -13,7 +14,8 @@ Please note that the graph layout can take a long time since it is JS based. Loa
|
|||
-url url to map
|
||||
--plot-cached path to cached file
|
||||
-limit maximum number of nodes on original site
|
||||
```
|
||||
|
||||
## Examples:
|
||||
### Google.de:
|
||||

|
||||

|
||||
|
|
@ -0,0 +1,8 @@
|
|||
Copyright (c) 2021 Patrice Matz
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
Before Width: | Height: | Size: 2.3 MiB After Width: | Height: | Size: 2.3 MiB |
|
|
@ -0,0 +1,63 @@
|
|||
# -*- coding: utf8 -*-
|
||||
#
|
||||
# This file were created by Python Boilerplate. Use Python Boilerplate to start
|
||||
# simple, usable and best-practices compliant Python projects.
|
||||
#
|
||||
# Learn more about it at: http://github.com/fabiommendes/python-boilerplate/
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
# Meta information
|
||||
version = open('VERSION').read().strip()
|
||||
dirname = os.path.dirname(__file__)
|
||||
|
||||
# Save version and author to __meta__.py
|
||||
path = os.path.join(dirname, 'src', 'Star-Mapper', '__meta__.py')
|
||||
data = '''# Automatically created. Please do not edit.
|
||||
__version__ = u'%s'
|
||||
__author__ = u'Patrice Matz'
|
||||
''' % version
|
||||
with open(path, 'wb') as F:
|
||||
F.write(data.encode())
|
||||
|
||||
setup(
|
||||
# Basic info
|
||||
name='Star-Mapper',
|
||||
version=version,
|
||||
author='Patrice Matz',
|
||||
author_email='mail@patricematz.de',
|
||||
url='https://github.com/Askill/Star-Mapper',
|
||||
description='Calls every link on a given website and produces an explorable graph visualization.',
|
||||
long_description=open('./docs/README.md').read(),
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||
'Operating System :: POSIX',
|
||||
'Programming Language :: Python',
|
||||
'Topic :: Software Development :: Libraries',
|
||||
],
|
||||
keywords=['graph', "web crawler", 'network', 'visualization'],
|
||||
# Packages and depencies
|
||||
package_dir={'': 'src'},
|
||||
packages=find_packages('src'),
|
||||
install_requires=[
|
||||
"requests",
|
||||
"lxml",
|
||||
"urllib3"
|
||||
],
|
||||
# Data files
|
||||
package_data={
|
||||
"docs":["*"]
|
||||
},
|
||||
# Scripts
|
||||
entry_points={
|
||||
},
|
||||
|
||||
# Other configurations
|
||||
zip_safe=False,
|
||||
platforms='any',
|
||||
)
|
||||
|
|
@ -61,7 +61,7 @@ class Crawler:
|
|||
if not clean:
|
||||
continue
|
||||
|
||||
self.logger.warning(f"{len(self.links)} {root}")
|
||||
self.logger.info(f"{len(self.links)} {root}")
|
||||
try:
|
||||
site = requests.get(root)
|
||||
tree = html.fromstring(site.content)
|
||||
|
|
@ -0,0 +1 @@
|
|||
from main import mapSite
|
||||
|
|
@ -4,7 +4,7 @@ import json
|
|||
import argparse
|
||||
|
||||
|
||||
def transformForDrawing(n, e):
|
||||
def transformForPlotting(n, e):
|
||||
nodes = []
|
||||
drawn = []
|
||||
edges = []
|
||||
|
|
@ -33,31 +33,28 @@ def graph(url, limit):
|
|||
obj = Crawler()
|
||||
obj.run(url, limit)
|
||||
|
||||
current = os.path.dirname(__file__)
|
||||
current = os.getcwd()
|
||||
n, e = obj.getNodesEdges()
|
||||
with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f:
|
||||
f.write(json.dumps({"nodes": n, "edges": e}))
|
||||
|
||||
nodes, edges = transformForDrawing(n, e)
|
||||
return nodes, edges
|
||||
return transformForPlotting(n, e)
|
||||
|
||||
|
||||
def load(url):
|
||||
print("Loaded from cache: " + url)
|
||||
current = os.path.dirname(__file__)
|
||||
with open(os.path.join(current, './cached/{}.json'.format(url)), 'r', encoding='utf-8') as f:
|
||||
def load(pathToCached):
|
||||
with open(pathToCached, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
jsonContent = json.loads(content)
|
||||
return transformForDrawing(jsonContent["nodes"], jsonContent["edges"])
|
||||
return transformForPlotting(jsonContent["nodes"], jsonContent["edges"])
|
||||
|
||||
|
||||
def main(url, pathToCached):
|
||||
def mapSite(url, pathToCached, limit):
|
||||
withoutProtocol = url.split("/")[2]
|
||||
|
||||
if pathToCached is not None:
|
||||
if pathToCached is None:
|
||||
nodes, edges = graph(url, limit)
|
||||
else:
|
||||
nodes, edges = load(withoutProtocol)
|
||||
nodes, edges = load(pathToCached)
|
||||
|
||||
pathToTemplate = os.path.join(os.path.dirname(
|
||||
__file__), "templates", "graph.html")
|
||||
|
|
@ -81,4 +78,4 @@ if __name__ == '__main__':
|
|||
pathToCached = args.plot_cached
|
||||
limit = args.limit
|
||||
|
||||
main(url, pathToCached, limit)
|
||||
mapSite(url, pathToCached, limit)
|
||||
Loading…
Reference in New Issue