added package stuff
This commit is contained in:
parent
3c33441380
commit
6049622207
|
|
@ -4,3 +4,10 @@ __pycache__/
|
||||||
cached/beauty.json
|
cached/beauty.json
|
||||||
cached/www.budgetbytes.com.json
|
cached/www.budgetbytes.com.json
|
||||||
templates/data.js
|
templates/data.js
|
||||||
|
dist/
|
||||||
|
*.egg-info
|
||||||
|
VERSION
|
||||||
|
**/__meta__.py
|
||||||
|
cached/visjs.github.io.json
|
||||||
|
cached/www.dinneratthezoo.com.json
|
||||||
|
cached/www.patricematz.de.json
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ Calls every link on a given website and produces an explorable graph visualizati
|
||||||
|
|
||||||
Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more.
|
Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more.
|
||||||
|
|
||||||
|
```
|
||||||
Map any website. Only map websites you own, as this tool will open any link on a given
|
Map any website. Only map websites you own, as this tool will open any link on a given
|
||||||
website, which can potentially incure high costs for the owner and be interpreted
|
website, which can potentially incure high costs for the owner and be interpreted
|
||||||
as a small scale DOS attack.
|
as a small scale DOS attack.
|
||||||
|
|
@ -13,7 +14,8 @@ Please note that the graph layout can take a long time since it is JS based. Loa
|
||||||
-url url to map
|
-url url to map
|
||||||
--plot-cached path to cached file
|
--plot-cached path to cached file
|
||||||
-limit maximum number of nodes on original site
|
-limit maximum number of nodes on original site
|
||||||
|
```
|
||||||
|
|
||||||
## Examples:
|
## Examples:
|
||||||
### Google.de:
|
### Google.de:
|
||||||

|

|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
Copyright (c) 2021 Patrice Matz
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
Before Width: | Height: | Size: 2.3 MiB After Width: | Height: | Size: 2.3 MiB |
|
|
@ -0,0 +1,63 @@
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
#
|
||||||
|
# This file were created by Python Boilerplate. Use Python Boilerplate to start
|
||||||
|
# simple, usable and best-practices compliant Python projects.
|
||||||
|
#
|
||||||
|
# Learn more about it at: http://github.com/fabiommendes/python-boilerplate/
|
||||||
|
#
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
# Meta information
|
||||||
|
version = open('VERSION').read().strip()
|
||||||
|
dirname = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
# Save version and author to __meta__.py
|
||||||
|
path = os.path.join(dirname, 'src', 'Star-Mapper', '__meta__.py')
|
||||||
|
data = '''# Automatically created. Please do not edit.
|
||||||
|
__version__ = u'%s'
|
||||||
|
__author__ = u'Patrice Matz'
|
||||||
|
''' % version
|
||||||
|
with open(path, 'wb') as F:
|
||||||
|
F.write(data.encode())
|
||||||
|
|
||||||
|
setup(
|
||||||
|
# Basic info
|
||||||
|
name='Star-Mapper',
|
||||||
|
version=version,
|
||||||
|
author='Patrice Matz',
|
||||||
|
author_email='mail@patricematz.de',
|
||||||
|
url='https://github.com/Askill/Star-Mapper',
|
||||||
|
description='Calls every link on a given website and produces an explorable graph visualization.',
|
||||||
|
long_description=open('./docs/README.md').read(),
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 4 - Beta',
|
||||||
|
'Intended Audience :: Developers',
|
||||||
|
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||||
|
'Operating System :: POSIX',
|
||||||
|
'Programming Language :: Python',
|
||||||
|
'Topic :: Software Development :: Libraries',
|
||||||
|
],
|
||||||
|
keywords=['graph', "web crawler", 'network', 'visualization'],
|
||||||
|
# Packages and depencies
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
packages=find_packages('src'),
|
||||||
|
install_requires=[
|
||||||
|
"requests",
|
||||||
|
"lxml",
|
||||||
|
"urllib3"
|
||||||
|
],
|
||||||
|
# Data files
|
||||||
|
package_data={
|
||||||
|
"docs":["*"]
|
||||||
|
},
|
||||||
|
# Scripts
|
||||||
|
entry_points={
|
||||||
|
},
|
||||||
|
|
||||||
|
# Other configurations
|
||||||
|
zip_safe=False,
|
||||||
|
platforms='any',
|
||||||
|
)
|
||||||
|
|
@ -61,7 +61,7 @@ class Crawler:
|
||||||
if not clean:
|
if not clean:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.logger.warning(f"{len(self.links)} {root}")
|
self.logger.info(f"{len(self.links)} {root}")
|
||||||
try:
|
try:
|
||||||
site = requests.get(root)
|
site = requests.get(root)
|
||||||
tree = html.fromstring(site.content)
|
tree = html.fromstring(site.content)
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
from main import mapSite
|
||||||
|
|
@ -4,7 +4,7 @@ import json
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
def transformForDrawing(n, e):
|
def transformForPlotting(n, e):
|
||||||
nodes = []
|
nodes = []
|
||||||
drawn = []
|
drawn = []
|
||||||
edges = []
|
edges = []
|
||||||
|
|
@ -33,31 +33,28 @@ def graph(url, limit):
|
||||||
obj = Crawler()
|
obj = Crawler()
|
||||||
obj.run(url, limit)
|
obj.run(url, limit)
|
||||||
|
|
||||||
current = os.path.dirname(__file__)
|
current = os.getcwd()
|
||||||
n, e = obj.getNodesEdges()
|
n, e = obj.getNodesEdges()
|
||||||
with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f:
|
with open(os.path.join(current, './cached/' + url.rsplit('/')[2] + '.json'), 'w', encoding='utf-8') as f:
|
||||||
f.write(json.dumps({"nodes": n, "edges": e}))
|
f.write(json.dumps({"nodes": n, "edges": e}))
|
||||||
|
|
||||||
nodes, edges = transformForDrawing(n, e)
|
return transformForPlotting(n, e)
|
||||||
return nodes, edges
|
|
||||||
|
|
||||||
|
|
||||||
def load(url):
|
def load(pathToCached):
|
||||||
print("Loaded from cache: " + url)
|
with open(pathToCached, 'r', encoding='utf-8') as f:
|
||||||
current = os.path.dirname(__file__)
|
|
||||||
with open(os.path.join(current, './cached/{}.json'.format(url)), 'r', encoding='utf-8') as f:
|
|
||||||
content = f.read()
|
content = f.read()
|
||||||
jsonContent = json.loads(content)
|
jsonContent = json.loads(content)
|
||||||
return transformForDrawing(jsonContent["nodes"], jsonContent["edges"])
|
return transformForPlotting(jsonContent["nodes"], jsonContent["edges"])
|
||||||
|
|
||||||
|
|
||||||
def main(url, pathToCached):
|
def mapSite(url, pathToCached, limit):
|
||||||
withoutProtocol = url.split("/")[2]
|
withoutProtocol = url.split("/")[2]
|
||||||
|
|
||||||
if pathToCached is not None:
|
if pathToCached is None:
|
||||||
nodes, edges = graph(url, limit)
|
nodes, edges = graph(url, limit)
|
||||||
else:
|
else:
|
||||||
nodes, edges = load(withoutProtocol)
|
nodes, edges = load(pathToCached)
|
||||||
|
|
||||||
pathToTemplate = os.path.join(os.path.dirname(
|
pathToTemplate = os.path.join(os.path.dirname(
|
||||||
__file__), "templates", "graph.html")
|
__file__), "templates", "graph.html")
|
||||||
|
|
@ -81,4 +78,4 @@ if __name__ == '__main__':
|
||||||
pathToCached = args.plot_cached
|
pathToCached = args.plot_cached
|
||||||
limit = args.limit
|
limit = args.limit
|
||||||
|
|
||||||
main(url, pathToCached, limit)
|
mapSite(url, pathToCached, limit)
|
||||||
Loading…
Reference in New Issue