This commit is contained in:
Askill 2026-03-07 14:24:32 +01:00
parent a515964e8b
commit 99c7badd72
4 changed files with 2319 additions and 1893 deletions

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"nuxt.isNuxtApp": false
}

100
README.md
View File

@ -1,21 +1,93 @@
# Star-Mapper
Calls every link on a given website and produces an explorable graph visualization.
Star-Mapper is a Flask-based graph exploration service for Neo4j.
Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more.
It provides an interactive browser UI where you can run Cypher queries, visualize large graph results, inspect schema metadata, and tune layout/visual settings in real time. Layout computation is performed server-side in Python (igraph/networkx) for better performance on larger graphs.
```
Map any website. Only map websites you own, as this tool will open any link on a given
website, which can potentially incure high costs for the owner and be interpreted
as a small scale DOS attack.
## Current Goal
optional arguments:
-h, --help show this help message and exit
-url url to map
--plot-cached path to cached file
-limit maximum number of nodes on original site
Make Neo4j graph data explorable and understandable through:
- Fast query-to-visualization workflow.
- Multiple layout algorithms with automatic selection by graph size.
- Interactive graph navigation (zoom/pan/highlight/search) plus a tabular result view.
## Core Functionality
- Neo4j HTTP Transactional API integration.
- Cypher execution endpoint with graph extraction (`nodes`, `relationships`) and tabular rows.
- Server-side layout precomputation with algorithms such as:
- `auto`
- `force_directed`
- `force_directed_hq`
- `community`
- `circle`
- `drl` / `kamada_kawai` (when `python-igraph` is available)
- `spectral` (fallback when igraph is unavailable)
- Node coloring by label and size scaling by degree.
- Client features:
- Graph/table view toggle.
- Hover/select neighborhood highlighting.
- Node search and focus.
- Minimap.
- Visual controls (edge style, node/label size, spacing, iterations).
- Built-in demo graph generation (`/api/demo`) so UI can be tested without Neo4j data.
## Project Structure
- `app.py`: Flask app and API endpoints.
- `layout_engine.py`: Graph layout computation and algorithm selection.
- `templates/index.html`: Frontend UI (canvas rendering with D3-powered interactions).
- `src/Star-Mapper/`: Legacy website crawler code (kept in repository, not the primary current service path).
## API Endpoints
- `GET /`: Serves the explorer UI.
- `POST /api/query`: Execute Cypher and return graph + records + stats.
- `GET /api/schema`: Return labels, relationship types, property keys.
- `GET /api/connection-test`: Verify Neo4j connectivity.
- `POST /api/reconnect`: Update Neo4j connection settings at runtime.
- `GET /api/layouts`: Return available layout algorithms.
- `GET /api/sample-queries`: Return built-in sample Cypher queries.
- `POST /api/demo`: Generate synthetic graph data for demo/testing.
## Configuration
Environment variables used by `app.py`:
- `NEO4J_HTTP_URL` (default: `http://localhost`)
- `NEO4J_USER` (default: `neo4j`)
- `NEO4J_PASSWORD` (default: empty)
- `NEO4J_DATABASE` (default: `neo4j`)
## Local Development
1. Install dependencies:
```bash
pip install -r requirements.txt
```
## Examples:
### Google.de:
![google.de](./docs/google.png)
2. Optionally set Neo4j connection details:
```bash
set NEO4J_HTTP_URL=https://your-neo4j-host
set NEO4J_USER=neo4j
set NEO4J_PASSWORD=your-password
set NEO4J_DATABASE=neo4j
```
3. Run the app:
```bash
python app.py
```
4. Open:
`http://localhost:5555`
## Notes
- The current service is the Flask app in `app.py`.
- Legacy crawler functionality still exists in `src/Star-Mapper/main.py`, but the existing web UI and API are designed for Neo4j graph exploration.

394
app.py
View File

@ -23,13 +23,15 @@ from layout_engine import compute_layout, get_available_algorithms
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)
app = Flask(__name__)
# Neo4j HTTP API endpoint (not Bolt)
NEO4J_HTTP_URL = os.environ.get("NEO4J_HTTP_URL", "https://neo4j.develop.cortex.cloud.otto.de")
NEO4J_HTTP_URL = os.environ.get("NEO4J_HTTP_URL", "http://localhost")
NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j")
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "")
NEO4J_DATABASE = os.environ.get("NEO4J_DATABASE", "neo4j")
@ -42,13 +44,17 @@ def _neo4j_auth_header():
"""Build Basic auth header for Neo4j HTTP API."""
cred = f"{NEO4J_USER}:{NEO4J_PASSWORD}"
b64 = base64.b64encode(cred.encode()).decode()
return {"Authorization": f"Basic {b64}", "Content-Type": "application/json", "Accept": "application/json;charset=UTF-8"}
return {
"Authorization": f"Basic {b64}",
"Content-Type": "application/json",
"Accept": "application/json;charset=UTF-8",
}
def _neo4j_tx_url(database=None):
"""Build the transactional commit endpoint URL."""
db = database or NEO4J_DATABASE
base = NEO4J_HTTP_URL.rstrip('/')
base = NEO4J_HTTP_URL.rstrip("/")
return f"{base}/db/{db}/tx/commit"
@ -60,11 +66,13 @@ def execute_cypher(cypher: str, params: dict | None = None):
url = _neo4j_tx_url()
headers = _neo4j_auth_header()
payload = {
"statements": [{
"statement": cypher,
"parameters": params or {},
"resultDataContents": ["row", "graph"]
}]
"statements": [
{
"statement": cypher,
"parameters": params or {},
"resultDataContents": ["row", "graph"],
}
]
}
resp = http_requests.post(url, json=payload, headers=headers, timeout=120)
@ -102,30 +110,32 @@ def execute_cypher(cypher: str, params: dict | None = None):
labels = node_data.get("labels", [])
props = node_data.get("properties", {})
display = (
props.get('name')
or props.get('title')
or props.get('id')
or props.get('sku')
props.get("name")
or props.get("title")
or props.get("id")
or props.get("sku")
or (labels[0] if labels else nid)
)
nodes[nid] = {
'id': nid,
'labels': labels,
'properties': props,
'label': str(display)[:80],
"id": nid,
"labels": labels,
"properties": props,
"label": str(display)[:80],
}
for rel_data in graph_data.get("relationships", []):
eid = str(rel_data["id"])
if eid not in seen_edges:
seen_edges.add(eid)
edges.append({
'id': eid,
'source': str(rel_data["startNode"]),
'target': str(rel_data["endNode"]),
'type': rel_data.get("type", "RELATED"),
'properties': rel_data.get("properties", {}),
})
edges.append(
{
"id": eid,
"source": str(rel_data["startNode"]),
"target": str(rel_data["endNode"]),
"type": rel_data.get("type", "RELATED"),
"properties": rel_data.get("properties", {}),
}
)
return nodes, edges, records_out, keys
@ -152,10 +162,26 @@ def _execute_simple(cypher: str):
# Color generation
# ---------------------------------------------------------------------------
_PALETTE = [
'#00d4ff', '#ff6b6b', '#ffd93d', '#6bcb77', '#9b59b6',
'#e67e22', '#1abc9c', '#e74c3c', '#3498db', '#f39c12',
'#2ecc71', '#e91e63', '#00bcd4', '#ff9800', '#8bc34a',
'#673ab7', '#009688', '#ff5722', '#607d8b', '#cddc39',
"#00d4ff",
"#ff6b6b",
"#ffd93d",
"#6bcb77",
"#9b59b6",
"#e67e22",
"#1abc9c",
"#e74c3c",
"#3498db",
"#f39c12",
"#2ecc71",
"#e91e63",
"#00bcd4",
"#ff9800",
"#8bc34a",
"#673ab7",
"#009688",
"#ff5722",
"#607d8b",
"#cddc39",
]
@ -168,21 +194,21 @@ def color_for_label(label: str) -> str:
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@app.route('/')
@app.route("/")
def index():
return render_template('index.html')
return render_template("index.html")
@app.route('/api/query', methods=['POST'])
@app.route("/api/query", methods=["POST"])
def api_query():
data = request.get_json(force=True)
cypher = data.get('query', '').strip()
layout_algo = data.get('layout', 'auto')
spacing = float(data.get('spacing', 1.0))
iterations = int(data.get('iterations', 300))
cypher = data.get("query", "").strip()
layout_algo = data.get("layout", "auto")
spacing = float(data.get("spacing", 1.0))
iterations = int(data.get("iterations", 300))
if not cypher:
return jsonify({'error': 'Empty query'}), 400
return jsonify({"error": "Empty query"}), 400
try:
t0 = time.time()
@ -192,93 +218,107 @@ def api_query():
# Assign colours
label_colors: dict[str, str] = {}
for nd in nodes_dict.values():
for lb in nd.get('labels', []):
for lb in nd.get("labels", []):
if lb not in label_colors:
label_colors[lb] = color_for_label(lb)
# Compute layout server-side
t1 = time.time()
positions = compute_layout(nodes_dict, edges, algorithm=layout_algo, spacing=spacing, iterations=iterations)
positions = compute_layout(
nodes_dict,
edges,
algorithm=layout_algo,
spacing=spacing,
iterations=iterations,
)
t_layout = time.time() - t1
# Degree for sizing
degree: dict[str, int] = defaultdict(int)
for e in edges:
degree[e['source']] += 1
degree[e['target']] += 1
degree[e["source"]] += 1
degree[e["target"]] += 1
max_deg = max(degree.values()) if degree else 1
nodes_list = []
for nid, nd in nodes_dict.items():
pos = positions.get(nid, {'x': 0, 'y': 0})
primary = nd['labels'][0] if nd.get('labels') else 'Unknown'
nd['x'] = pos['x']
nd['y'] = pos['y']
nd['color'] = label_colors.get(primary, '#888888')
pos = positions.get(nid, {"x": 0, "y": 0})
primary = nd["labels"][0] if nd.get("labels") else "Unknown"
nd["x"] = pos["x"]
nd["y"] = pos["y"]
nd["color"] = label_colors.get(primary, "#888888")
d = degree.get(nid, 0)
nd['size'] = 3 + (d / max(max_deg, 1)) * 22
nd["size"] = 3 + (d / max(max_deg, 1)) * 22
nodes_list.append(nd)
# Deduplicate edges (keep unique source-target-type combos)
seen = set()
unique_edges = []
for e in edges:
key = (e['source'], e['target'], e['type'])
key = (e["source"], e["target"], e["type"])
if key not in seen:
seen.add(key)
unique_edges.append(e)
return jsonify({
'nodes': nodes_list,
'edges': unique_edges,
'label_colors': label_colors,
'records': records[:500], # cap tabular results
'keys': keys,
'stats': {
'node_count': len(nodes_list),
'edge_count': len(unique_edges),
'labels': list(label_colors.keys()),
'query_time_ms': round(t_query * 1000),
'layout_time_ms': round(t_layout * 1000),
},
})
return jsonify(
{
"nodes": nodes_list,
"edges": unique_edges,
"label_colors": label_colors,
"records": records[:500], # cap tabular results
"keys": keys,
"stats": {
"node_count": len(nodes_list),
"edge_count": len(unique_edges),
"labels": list(label_colors.keys()),
"query_time_ms": round(t_query * 1000),
"layout_time_ms": round(t_layout * 1000),
},
}
)
except Exception as exc:
logger.exception("Query failed")
return jsonify({'error': str(exc)}), 400
return jsonify({"error": str(exc)}), 400
@app.route('/api/schema')
@app.route("/api/schema")
def api_schema():
try:
labels = [r[0] for r in _execute_simple("CALL db.labels()")]
rel_types = [r[0] for r in _execute_simple("CALL db.relationshipTypes()")]
prop_keys = [r[0] for r in _execute_simple("CALL db.propertyKeys()")]
return jsonify({'labels': labels, 'relationship_types': rel_types, 'property_keys': prop_keys})
return jsonify(
{
"labels": labels,
"relationship_types": rel_types,
"property_keys": prop_keys,
}
)
except Exception as exc:
return jsonify({'error': str(exc)}), 400
return jsonify({"error": str(exc)}), 400
@app.route('/api/connection-test')
@app.route("/api/connection-test")
def api_connection_test():
try:
rows = _execute_simple("RETURN 1 AS ok")
if rows and rows[0][0] == 1:
return jsonify({'status': 'connected', 'uri': NEO4J_HTTP_URL})
return jsonify({"status": "connected", "uri": NEO4J_HTTP_URL})
raise RuntimeError("Unexpected response")
except Exception as exc:
return jsonify({'status': 'error', 'message': str(exc)}), 500
return jsonify({"status": "error", "message": str(exc)}), 500
@app.route('/api/reconnect', methods=['POST'])
@app.route("/api/reconnect", methods=["POST"])
def api_reconnect():
global NEO4J_HTTP_URL, NEO4J_USER, NEO4J_PASSWORD, NEO4J_DATABASE
data = request.get_json(force=True)
new_url = data.get('uri', '').strip()
new_user = data.get('user', '').strip()
new_pass = data.get('password', '')
new_url = data.get("uri", "").strip()
new_user = data.get("user", "").strip()
new_pass = data.get("password", "")
if not new_url:
return jsonify({'status': 'error', 'message': 'URL is required'}), 400
return jsonify({"status": "error", "message": "URL is required"}), 400
NEO4J_HTTP_URL = new_url
NEO4J_USER = new_user
@ -287,63 +327,119 @@ def api_reconnect():
try:
rows = _execute_simple("RETURN 1 AS ok")
if rows and rows[0][0] == 1:
return jsonify({'status': 'connected', 'uri': NEO4J_HTTP_URL})
return jsonify({"status": "connected", "uri": NEO4J_HTTP_URL})
raise RuntimeError("Unexpected response")
except Exception as exc:
return jsonify({'status': 'error', 'message': str(exc)}), 500
return jsonify({"status": "error", "message": str(exc)}), 500
@app.route('/api/layouts')
@app.route("/api/layouts")
def api_layouts():
return jsonify(get_available_algorithms())
@app.route('/api/sample-queries')
@app.route("/api/sample-queries")
def api_sample_queries():
queries = [
{'name': 'Sample Graph (100)',
'query': 'MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 100'},
{'name': 'Sample Graph (500)',
'query': 'MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 500'},
{'name': 'Sample Graph (2000)',
'query': 'MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 2000'},
{'name': 'Node Label Counts',
'query': 'MATCH (n) RETURN labels(n)[0] AS label, count(*) AS count ORDER BY count DESC LIMIT 25'},
{'name': 'Relationship Type Counts',
'query': 'MATCH ()-[r]->() RETURN type(r) AS type, count(*) AS count ORDER BY count DESC LIMIT 25'},
{'name': 'High-Connectivity Nodes',
'query': 'MATCH (n)-[r]-() WITH n, count(r) AS degree ORDER BY degree DESC LIMIT 20 MATCH (n)-[r2]->(m) RETURN n, r2, m LIMIT 300'},
{'name': 'Shortest Path (sample)',
'query': 'MATCH (a), (b) WHERE a <> b WITH a, b LIMIT 1 MATCH path = shortestPath((a)-[*..5]-(b)) RETURN path'},
{'name': 'Connected Component (depth 3)',
'query': 'MATCH (start) WITH start LIMIT 1 MATCH path = (start)-[*1..3]-(connected) RETURN path LIMIT 300'},
{'name': 'Schema Visualization',
'query': 'CALL db.schema.visualization()'},
{
"name": "Sample Graph (100)",
"query": "MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 100",
},
{
"name": "Sample Graph (500)",
"query": "MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 500",
},
{
"name": "Sample Graph (2000)",
"query": "MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 2000",
},
{
"name": "Node Label Counts",
"query": "MATCH (n) RETURN labels(n)[0] AS label, count(*) AS count ORDER BY count DESC LIMIT 25",
},
{
"name": "Relationship Type Counts",
"query": "MATCH ()-[r]->() RETURN type(r) AS type, count(*) AS count ORDER BY count DESC LIMIT 25",
},
{
"name": "High-Connectivity Nodes",
"query": "MATCH (n)-[r]-() WITH n, count(r) AS degree ORDER BY degree DESC LIMIT 20 MATCH (n)-[r2]->(m) RETURN n, r2, m LIMIT 300",
},
{
"name": "Shortest Path (sample)",
"query": "MATCH (a), (b) WHERE a <> b WITH a, b LIMIT 1 MATCH path = shortestPath((a)-[*..5]-(b)) RETURN path",
},
{
"name": "Connected Component (depth 3)",
"query": "MATCH (start) WITH start LIMIT 1 MATCH path = (start)-[*1..3]-(connected) RETURN path LIMIT 300",
},
{"name": "Schema Visualization", "query": "CALL db.schema.visualization()"},
]
return jsonify(queries)
@app.route('/api/demo', methods=['POST'])
@app.route("/api/demo", methods=["POST"])
def api_demo():
"""Generate a demo graph for testing the visualization without Neo4j."""
import random
data = request.get_json(force=True) if request.is_json else {}
size = min(int(data.get('size', 300)), 5000)
layout_algo = data.get('layout', 'auto')
spacing = float(data.get('spacing', 1.0))
iterations = int(data.get('iterations', 300))
size = min(int(data.get("size", 300)), 5000)
layout_algo = data.get("layout", "auto")
spacing = float(data.get("spacing", 1.0))
iterations = int(data.get("iterations", 300))
random.seed(42)
label_types = ['Product', 'Category', 'Brand', 'Supplier', 'Attribute',
'Color', 'Material', 'Tag', 'Collection', 'Review']
rel_types = ['BELONGS_TO', 'MADE_BY', 'SUPPLIED_BY', 'HAS_ATTRIBUTE',
'HAS_COLOR', 'MADE_OF', 'TAGGED_WITH', 'PART_OF', 'REVIEWED_IN', 'SIMILAR_TO']
label_types = [
"Product",
"Category",
"Brand",
"Supplier",
"Attribute",
"Color",
"Material",
"Tag",
"Collection",
"Review",
]
rel_types = [
"BELONGS_TO",
"MADE_BY",
"SUPPLIED_BY",
"HAS_ATTRIBUTE",
"HAS_COLOR",
"MADE_OF",
"TAGGED_WITH",
"PART_OF",
"REVIEWED_IN",
"SIMILAR_TO",
]
adj_names = ['Premium', 'Eco', 'Organic', 'Classic', 'Modern', 'Vintage',
'Smart', 'Ultra', 'Compact', 'Deluxe']
noun_names = ['Widget', 'Gadget', 'Module', 'Unit', 'Element', 'Component',
'System', 'Kit', 'Bundle', 'Pack']
adj_names = [
"Premium",
"Eco",
"Organic",
"Classic",
"Modern",
"Vintage",
"Smart",
"Ultra",
"Compact",
"Deluxe",
]
noun_names = [
"Widget",
"Gadget",
"Module",
"Unit",
"Element",
"Component",
"System",
"Kit",
"Bundle",
"Pack",
]
nodes_dict = {}
edges = []
@ -364,10 +460,14 @@ def api_demo():
name = f"{random.choice(adj_names)} {random.choice(noun_names)} {i}"
nid = f"demo_{i}"
nodes_dict[nid] = {
'id': nid,
'labels': [chosen_label],
'properties': {'name': name, 'sku': f"SKU-{i:05d}", 'price': round(random.uniform(5, 500), 2)},
'label': name,
"id": nid,
"labels": [chosen_label],
"properties": {
"name": name,
"sku": f"SKU-{i:05d}",
"price": round(random.uniform(5, 500), 2),
},
"label": name,
}
# Create edges — mix of random & preferential attachment
@ -378,18 +478,22 @@ def api_demo():
src = random.choice(node_ids)
# Preferential attachment: higher-degree nodes more likely as targets
if random.random() < 0.3 and degree:
top = sorted(degree, key=degree.get, reverse=True)[:max(1, len(top) if 'top' in dir() else 10)]
top = sorted(degree, key=degree.get, reverse=True)[
: max(1, len(top) if "top" in dir() else 10)
]
tgt = random.choice(top)
else:
tgt = random.choice(node_ids)
if src != tgt:
edges.append({
'id': f"edge_{len(edges)}",
'source': src,
'target': tgt,
'type': random.choice(rel_types),
'properties': {},
})
edges.append(
{
"id": f"edge_{len(edges)}",
"source": src,
"target": tgt,
"type": random.choice(rel_types),
"properties": {},
}
)
degree[src] += 1
degree[tgt] += 1
@ -398,37 +502,41 @@ def api_demo():
# Layout
t1 = time.time()
positions = compute_layout(nodes_dict, edges, algorithm=layout_algo, spacing=spacing, iterations=iterations)
positions = compute_layout(
nodes_dict, edges, algorithm=layout_algo, spacing=spacing, iterations=iterations
)
t_layout = time.time() - t1
max_deg = max(degree.values()) if degree else 1
nodes_list = []
for nid, nd in nodes_dict.items():
pos = positions.get(nid, {'x': 0, 'y': 0})
primary = nd['labels'][0]
nd['x'] = pos['x']
nd['y'] = pos['y']
nd['color'] = label_colors.get(primary, '#888')
pos = positions.get(nid, {"x": 0, "y": 0})
primary = nd["labels"][0]
nd["x"] = pos["x"]
nd["y"] = pos["y"]
nd["color"] = label_colors.get(primary, "#888")
d = degree.get(nid, 0)
nd['size'] = 3 + (d / max(max_deg, 1)) * 22
nd["size"] = 3 + (d / max(max_deg, 1)) * 22
nodes_list.append(nd)
return jsonify({
'nodes': nodes_list,
'edges': edges,
'label_colors': label_colors,
'records': [],
'keys': [],
'stats': {
'node_count': len(nodes_list),
'edge_count': len(edges),
'labels': list(label_colors.keys()),
'query_time_ms': 0,
'layout_time_ms': round(t_layout * 1000),
},
})
return jsonify(
{
"nodes": nodes_list,
"edges": edges,
"label_colors": label_colors,
"records": [],
"keys": [],
"stats": {
"node_count": len(nodes_list),
"edge_count": len(edges),
"labels": list(label_colors.keys()),
"query_time_ms": 0,
"layout_time_ms": round(t_layout * 1000),
},
}
)
# ---------------------------------------------------------------------------
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5555)
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=5555)

File diff suppressed because it is too large Load Diff