diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..13ee2b0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "nuxt.isNuxtApp": false +} \ No newline at end of file diff --git a/README.md b/README.md index 4cf26e4..1102557 100644 --- a/README.md +++ b/README.md @@ -62,22 +62,94 @@ _Original Star-Mapper description below._ ## Original: Website Mapper -Calls every link on a given website and produces an explorable graph visualization. +Star-Mapper is a Flask-based graph exploration service for Neo4j. -Please note that the graph layout can take a long time since it is JS based. Loading a graph with 3000 Nodes may take 5 minutes or more. +It provides an interactive browser UI where you can run Cypher queries, visualize large graph results, inspect schema metadata, and tune layout/visual settings in real time. Layout computation is performed server-side in Python (igraph/networkx) for better performance on larger graphs. -``` - Map any website. Only map websites you own, as this tool will open any link on a given - website, which can potentially incure high costs for the owner and be interpreted - as a small scale DOS attack. +## Current Goal - optional arguments: - -h, --help show this help message and exit - -url url to map - --plot-cached path to cached file - -limit maximum number of nodes on original site +Make Neo4j graph data explorable and understandable through: + +- Fast query-to-visualization workflow. +- Multiple layout algorithms with automatic selection by graph size. +- Interactive graph navigation (zoom/pan/highlight/search) plus a tabular result view. + +## Core Functionality + +- Neo4j HTTP Transactional API integration. +- Cypher execution endpoint with graph extraction (`nodes`, `relationships`) and tabular rows. +- Server-side layout precomputation with algorithms such as: + - `auto` + - `force_directed` + - `force_directed_hq` + - `community` + - `circle` + - `drl` / `kamada_kawai` (when `python-igraph` is available) + - `spectral` (fallback when igraph is unavailable) +- Node coloring by label and size scaling by degree. +- Client features: + - Graph/table view toggle. + - Hover/select neighborhood highlighting. + - Node search and focus. + - Minimap. + - Visual controls (edge style, node/label size, spacing, iterations). +- Built-in demo graph generation (`/api/demo`) so UI can be tested without Neo4j data. + +## Project Structure + +- `app.py`: Flask app and API endpoints. +- `layout_engine.py`: Graph layout computation and algorithm selection. +- `templates/index.html`: Frontend UI (canvas rendering with D3-powered interactions). +- `src/Star-Mapper/`: Legacy website crawler code (kept in repository, not the primary current service path). + +## API Endpoints + +- `GET /`: Serves the explorer UI. +- `POST /api/query`: Execute Cypher and return graph + records + stats. +- `GET /api/schema`: Return labels, relationship types, property keys. +- `GET /api/connection-test`: Verify Neo4j connectivity. +- `POST /api/reconnect`: Update Neo4j connection settings at runtime. +- `GET /api/layouts`: Return available layout algorithms. +- `GET /api/sample-queries`: Return built-in sample Cypher queries. +- `POST /api/demo`: Generate synthetic graph data for demo/testing. + +## Configuration + +Environment variables used by `app.py`: + +- `NEO4J_HTTP_URL` (default: `http://localhost`) +- `NEO4J_USER` (default: `neo4j`) +- `NEO4J_PASSWORD` (default: empty) +- `NEO4J_DATABASE` (default: `neo4j`) + +## Local Development + +1. Install dependencies: + +```bash +pip install -r requirements.txt ``` -## Examples: -### Google.de: - \ No newline at end of file +2. Optionally set Neo4j connection details: + +```bash +set NEO4J_HTTP_URL=https://your-neo4j-host +set NEO4J_USER=neo4j +set NEO4J_PASSWORD=your-password +set NEO4J_DATABASE=neo4j +``` + +3. Run the app: + +```bash +python app.py +``` + +4. Open: + +`http://localhost:5555` + +## Notes + +- The current service is the Flask app in `app.py`. +- Legacy crawler functionality still exists in `src/Star-Mapper/main.py`, but the existing web UI and API are designed for Neo4j graph exploration. \ No newline at end of file diff --git a/app.py b/app.py index ed60c2a..db72cad 100644 --- a/app.py +++ b/app.py @@ -23,13 +23,15 @@ from layout_engine import compute_layout, get_available_algorithms # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- -logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s') +logging.basicConfig( + level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s" +) logger = logging.getLogger(__name__) app = Flask(__name__) # Neo4j HTTP API endpoint (not Bolt) -NEO4J_HTTP_URL = os.environ.get("NEO4J_HTTP_URL", "https://neo4j.develop.cortex.cloud.otto.de") +NEO4J_HTTP_URL = os.environ.get("NEO4J_HTTP_URL", "http://localhost") NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j") NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "") NEO4J_DATABASE = os.environ.get("NEO4J_DATABASE", "neo4j") @@ -42,13 +44,17 @@ def _neo4j_auth_header(): """Build Basic auth header for Neo4j HTTP API.""" cred = f"{NEO4J_USER}:{NEO4J_PASSWORD}" b64 = base64.b64encode(cred.encode()).decode() - return {"Authorization": f"Basic {b64}", "Content-Type": "application/json", "Accept": "application/json;charset=UTF-8"} + return { + "Authorization": f"Basic {b64}", + "Content-Type": "application/json", + "Accept": "application/json;charset=UTF-8", + } def _neo4j_tx_url(database=None): """Build the transactional commit endpoint URL.""" db = database or NEO4J_DATABASE - base = NEO4J_HTTP_URL.rstrip('/') + base = NEO4J_HTTP_URL.rstrip("/") return f"{base}/db/{db}/tx/commit" @@ -60,11 +66,13 @@ def execute_cypher(cypher: str, params: dict | None = None): url = _neo4j_tx_url() headers = _neo4j_auth_header() payload = { - "statements": [{ - "statement": cypher, - "parameters": params or {}, - "resultDataContents": ["row", "graph"] - }] + "statements": [ + { + "statement": cypher, + "parameters": params or {}, + "resultDataContents": ["row", "graph"], + } + ] } resp = http_requests.post(url, json=payload, headers=headers, timeout=120) @@ -102,30 +110,32 @@ def execute_cypher(cypher: str, params: dict | None = None): labels = node_data.get("labels", []) props = node_data.get("properties", {}) display = ( - props.get('name') - or props.get('title') - or props.get('id') - or props.get('sku') + props.get("name") + or props.get("title") + or props.get("id") + or props.get("sku") or (labels[0] if labels else nid) ) nodes[nid] = { - 'id': nid, - 'labels': labels, - 'properties': props, - 'label': str(display)[:80], + "id": nid, + "labels": labels, + "properties": props, + "label": str(display)[:80], } for rel_data in graph_data.get("relationships", []): eid = str(rel_data["id"]) if eid not in seen_edges: seen_edges.add(eid) - edges.append({ - 'id': eid, - 'source': str(rel_data["startNode"]), - 'target': str(rel_data["endNode"]), - 'type': rel_data.get("type", "RELATED"), - 'properties': rel_data.get("properties", {}), - }) + edges.append( + { + "id": eid, + "source": str(rel_data["startNode"]), + "target": str(rel_data["endNode"]), + "type": rel_data.get("type", "RELATED"), + "properties": rel_data.get("properties", {}), + } + ) return nodes, edges, records_out, keys @@ -152,10 +162,26 @@ def _execute_simple(cypher: str): # Color generation # --------------------------------------------------------------------------- _PALETTE = [ - '#00d4ff', '#ff6b6b', '#ffd93d', '#6bcb77', '#9b59b6', - '#e67e22', '#1abc9c', '#e74c3c', '#3498db', '#f39c12', - '#2ecc71', '#e91e63', '#00bcd4', '#ff9800', '#8bc34a', - '#673ab7', '#009688', '#ff5722', '#607d8b', '#cddc39', + "#00d4ff", + "#ff6b6b", + "#ffd93d", + "#6bcb77", + "#9b59b6", + "#e67e22", + "#1abc9c", + "#e74c3c", + "#3498db", + "#f39c12", + "#2ecc71", + "#e91e63", + "#00bcd4", + "#ff9800", + "#8bc34a", + "#673ab7", + "#009688", + "#ff5722", + "#607d8b", + "#cddc39", ] @@ -168,21 +194,21 @@ def color_for_label(label: str) -> str: # --------------------------------------------------------------------------- # Routes # --------------------------------------------------------------------------- -@app.route('/') +@app.route("/") def index(): - return render_template('index.html') + return render_template("index.html") -@app.route('/api/query', methods=['POST']) +@app.route("/api/query", methods=["POST"]) def api_query(): data = request.get_json(force=True) - cypher = data.get('query', '').strip() - layout_algo = data.get('layout', 'auto') - spacing = float(data.get('spacing', 1.0)) - iterations = int(data.get('iterations', 300)) + cypher = data.get("query", "").strip() + layout_algo = data.get("layout", "auto") + spacing = float(data.get("spacing", 1.0)) + iterations = int(data.get("iterations", 300)) if not cypher: - return jsonify({'error': 'Empty query'}), 400 + return jsonify({"error": "Empty query"}), 400 try: t0 = time.time() @@ -192,93 +218,107 @@ def api_query(): # Assign colours label_colors: dict[str, str] = {} for nd in nodes_dict.values(): - for lb in nd.get('labels', []): + for lb in nd.get("labels", []): if lb not in label_colors: label_colors[lb] = color_for_label(lb) # Compute layout server-side t1 = time.time() - positions = compute_layout(nodes_dict, edges, algorithm=layout_algo, spacing=spacing, iterations=iterations) + positions = compute_layout( + nodes_dict, + edges, + algorithm=layout_algo, + spacing=spacing, + iterations=iterations, + ) t_layout = time.time() - t1 # Degree for sizing degree: dict[str, int] = defaultdict(int) for e in edges: - degree[e['source']] += 1 - degree[e['target']] += 1 + degree[e["source"]] += 1 + degree[e["target"]] += 1 max_deg = max(degree.values()) if degree else 1 nodes_list = [] for nid, nd in nodes_dict.items(): - pos = positions.get(nid, {'x': 0, 'y': 0}) - primary = nd['labels'][0] if nd.get('labels') else 'Unknown' - nd['x'] = pos['x'] - nd['y'] = pos['y'] - nd['color'] = label_colors.get(primary, '#888888') + pos = positions.get(nid, {"x": 0, "y": 0}) + primary = nd["labels"][0] if nd.get("labels") else "Unknown" + nd["x"] = pos["x"] + nd["y"] = pos["y"] + nd["color"] = label_colors.get(primary, "#888888") d = degree.get(nid, 0) - nd['size'] = 3 + (d / max(max_deg, 1)) * 22 + nd["size"] = 3 + (d / max(max_deg, 1)) * 22 nodes_list.append(nd) # Deduplicate edges (keep unique source-target-type combos) seen = set() unique_edges = [] for e in edges: - key = (e['source'], e['target'], e['type']) + key = (e["source"], e["target"], e["type"]) if key not in seen: seen.add(key) unique_edges.append(e) - return jsonify({ - 'nodes': nodes_list, - 'edges': unique_edges, - 'label_colors': label_colors, - 'records': records[:500], # cap tabular results - 'keys': keys, - 'stats': { - 'node_count': len(nodes_list), - 'edge_count': len(unique_edges), - 'labels': list(label_colors.keys()), - 'query_time_ms': round(t_query * 1000), - 'layout_time_ms': round(t_layout * 1000), - }, - }) + return jsonify( + { + "nodes": nodes_list, + "edges": unique_edges, + "label_colors": label_colors, + "records": records[:500], # cap tabular results + "keys": keys, + "stats": { + "node_count": len(nodes_list), + "edge_count": len(unique_edges), + "labels": list(label_colors.keys()), + "query_time_ms": round(t_query * 1000), + "layout_time_ms": round(t_layout * 1000), + }, + } + ) except Exception as exc: logger.exception("Query failed") - return jsonify({'error': str(exc)}), 400 + return jsonify({"error": str(exc)}), 400 -@app.route('/api/schema') +@app.route("/api/schema") def api_schema(): try: labels = [r[0] for r in _execute_simple("CALL db.labels()")] rel_types = [r[0] for r in _execute_simple("CALL db.relationshipTypes()")] prop_keys = [r[0] for r in _execute_simple("CALL db.propertyKeys()")] - return jsonify({'labels': labels, 'relationship_types': rel_types, 'property_keys': prop_keys}) + return jsonify( + { + "labels": labels, + "relationship_types": rel_types, + "property_keys": prop_keys, + } + ) except Exception as exc: - return jsonify({'error': str(exc)}), 400 + return jsonify({"error": str(exc)}), 400 -@app.route('/api/connection-test') +@app.route("/api/connection-test") def api_connection_test(): try: rows = _execute_simple("RETURN 1 AS ok") if rows and rows[0][0] == 1: - return jsonify({'status': 'connected', 'uri': NEO4J_HTTP_URL}) + return jsonify({"status": "connected", "uri": NEO4J_HTTP_URL}) raise RuntimeError("Unexpected response") except Exception as exc: - return jsonify({'status': 'error', 'message': str(exc)}), 500 + return jsonify({"status": "error", "message": str(exc)}), 500 -@app.route('/api/reconnect', methods=['POST']) +@app.route("/api/reconnect", methods=["POST"]) def api_reconnect(): global NEO4J_HTTP_URL, NEO4J_USER, NEO4J_PASSWORD, NEO4J_DATABASE data = request.get_json(force=True) - new_url = data.get('uri', '').strip() - new_user = data.get('user', '').strip() - new_pass = data.get('password', '') + new_url = data.get("uri", "").strip() + new_user = data.get("user", "").strip() + new_pass = data.get("password", "") if not new_url: - return jsonify({'status': 'error', 'message': 'URL is required'}), 400 + return jsonify({"status": "error", "message": "URL is required"}), 400 NEO4J_HTTP_URL = new_url NEO4J_USER = new_user @@ -287,63 +327,119 @@ def api_reconnect(): try: rows = _execute_simple("RETURN 1 AS ok") if rows and rows[0][0] == 1: - return jsonify({'status': 'connected', 'uri': NEO4J_HTTP_URL}) + return jsonify({"status": "connected", "uri": NEO4J_HTTP_URL}) raise RuntimeError("Unexpected response") except Exception as exc: - return jsonify({'status': 'error', 'message': str(exc)}), 500 + return jsonify({"status": "error", "message": str(exc)}), 500 -@app.route('/api/layouts') +@app.route("/api/layouts") def api_layouts(): return jsonify(get_available_algorithms()) -@app.route('/api/sample-queries') +@app.route("/api/sample-queries") def api_sample_queries(): queries = [ - {'name': 'Sample Graph (100)', - 'query': 'MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 100'}, - {'name': 'Sample Graph (500)', - 'query': 'MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 500'}, - {'name': 'Sample Graph (2000)', - 'query': 'MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 2000'}, - {'name': 'Node Label Counts', - 'query': 'MATCH (n) RETURN labels(n)[0] AS label, count(*) AS count ORDER BY count DESC LIMIT 25'}, - {'name': 'Relationship Type Counts', - 'query': 'MATCH ()-[r]->() RETURN type(r) AS type, count(*) AS count ORDER BY count DESC LIMIT 25'}, - {'name': 'High-Connectivity Nodes', - 'query': 'MATCH (n)-[r]-() WITH n, count(r) AS degree ORDER BY degree DESC LIMIT 20 MATCH (n)-[r2]->(m) RETURN n, r2, m LIMIT 300'}, - {'name': 'Shortest Path (sample)', - 'query': 'MATCH (a), (b) WHERE a <> b WITH a, b LIMIT 1 MATCH path = shortestPath((a)-[*..5]-(b)) RETURN path'}, - {'name': 'Connected Component (depth 3)', - 'query': 'MATCH (start) WITH start LIMIT 1 MATCH path = (start)-[*1..3]-(connected) RETURN path LIMIT 300'}, - {'name': 'Schema Visualization', - 'query': 'CALL db.schema.visualization()'}, + { + "name": "Sample Graph (100)", + "query": "MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 100", + }, + { + "name": "Sample Graph (500)", + "query": "MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 500", + }, + { + "name": "Sample Graph (2000)", + "query": "MATCH (n)-[r]->(m) RETURN n, r, m LIMIT 2000", + }, + { + "name": "Node Label Counts", + "query": "MATCH (n) RETURN labels(n)[0] AS label, count(*) AS count ORDER BY count DESC LIMIT 25", + }, + { + "name": "Relationship Type Counts", + "query": "MATCH ()-[r]->() RETURN type(r) AS type, count(*) AS count ORDER BY count DESC LIMIT 25", + }, + { + "name": "High-Connectivity Nodes", + "query": "MATCH (n)-[r]-() WITH n, count(r) AS degree ORDER BY degree DESC LIMIT 20 MATCH (n)-[r2]->(m) RETURN n, r2, m LIMIT 300", + }, + { + "name": "Shortest Path (sample)", + "query": "MATCH (a), (b) WHERE a <> b WITH a, b LIMIT 1 MATCH path = shortestPath((a)-[*..5]-(b)) RETURN path", + }, + { + "name": "Connected Component (depth 3)", + "query": "MATCH (start) WITH start LIMIT 1 MATCH path = (start)-[*1..3]-(connected) RETURN path LIMIT 300", + }, + {"name": "Schema Visualization", "query": "CALL db.schema.visualization()"}, ] return jsonify(queries) -@app.route('/api/demo', methods=['POST']) +@app.route("/api/demo", methods=["POST"]) def api_demo(): """Generate a demo graph for testing the visualization without Neo4j.""" import random + data = request.get_json(force=True) if request.is_json else {} - size = min(int(data.get('size', 300)), 5000) - layout_algo = data.get('layout', 'auto') - spacing = float(data.get('spacing', 1.0)) - iterations = int(data.get('iterations', 300)) + size = min(int(data.get("size", 300)), 5000) + layout_algo = data.get("layout", "auto") + spacing = float(data.get("spacing", 1.0)) + iterations = int(data.get("iterations", 300)) random.seed(42) - label_types = ['Product', 'Category', 'Brand', 'Supplier', 'Attribute', - 'Color', 'Material', 'Tag', 'Collection', 'Review'] - rel_types = ['BELONGS_TO', 'MADE_BY', 'SUPPLIED_BY', 'HAS_ATTRIBUTE', - 'HAS_COLOR', 'MADE_OF', 'TAGGED_WITH', 'PART_OF', 'REVIEWED_IN', 'SIMILAR_TO'] + label_types = [ + "Product", + "Category", + "Brand", + "Supplier", + "Attribute", + "Color", + "Material", + "Tag", + "Collection", + "Review", + ] + rel_types = [ + "BELONGS_TO", + "MADE_BY", + "SUPPLIED_BY", + "HAS_ATTRIBUTE", + "HAS_COLOR", + "MADE_OF", + "TAGGED_WITH", + "PART_OF", + "REVIEWED_IN", + "SIMILAR_TO", + ] - adj_names = ['Premium', 'Eco', 'Organic', 'Classic', 'Modern', 'Vintage', - 'Smart', 'Ultra', 'Compact', 'Deluxe'] - noun_names = ['Widget', 'Gadget', 'Module', 'Unit', 'Element', 'Component', - 'System', 'Kit', 'Bundle', 'Pack'] + adj_names = [ + "Premium", + "Eco", + "Organic", + "Classic", + "Modern", + "Vintage", + "Smart", + "Ultra", + "Compact", + "Deluxe", + ] + noun_names = [ + "Widget", + "Gadget", + "Module", + "Unit", + "Element", + "Component", + "System", + "Kit", + "Bundle", + "Pack", + ] nodes_dict = {} edges = [] @@ -364,10 +460,14 @@ def api_demo(): name = f"{random.choice(adj_names)} {random.choice(noun_names)} {i}" nid = f"demo_{i}" nodes_dict[nid] = { - 'id': nid, - 'labels': [chosen_label], - 'properties': {'name': name, 'sku': f"SKU-{i:05d}", 'price': round(random.uniform(5, 500), 2)}, - 'label': name, + "id": nid, + "labels": [chosen_label], + "properties": { + "name": name, + "sku": f"SKU-{i:05d}", + "price": round(random.uniform(5, 500), 2), + }, + "label": name, } # Create edges — mix of random & preferential attachment @@ -378,18 +478,22 @@ def api_demo(): src = random.choice(node_ids) # Preferential attachment: higher-degree nodes more likely as targets if random.random() < 0.3 and degree: - top = sorted(degree, key=degree.get, reverse=True)[:max(1, len(top) if 'top' in dir() else 10)] + top = sorted(degree, key=degree.get, reverse=True)[ + : max(1, len(top) if "top" in dir() else 10) + ] tgt = random.choice(top) else: tgt = random.choice(node_ids) if src != tgt: - edges.append({ - 'id': f"edge_{len(edges)}", - 'source': src, - 'target': tgt, - 'type': random.choice(rel_types), - 'properties': {}, - }) + edges.append( + { + "id": f"edge_{len(edges)}", + "source": src, + "target": tgt, + "type": random.choice(rel_types), + "properties": {}, + } + ) degree[src] += 1 degree[tgt] += 1 @@ -398,37 +502,41 @@ def api_demo(): # Layout t1 = time.time() - positions = compute_layout(nodes_dict, edges, algorithm=layout_algo, spacing=spacing, iterations=iterations) + positions = compute_layout( + nodes_dict, edges, algorithm=layout_algo, spacing=spacing, iterations=iterations + ) t_layout = time.time() - t1 max_deg = max(degree.values()) if degree else 1 nodes_list = [] for nid, nd in nodes_dict.items(): - pos = positions.get(nid, {'x': 0, 'y': 0}) - primary = nd['labels'][0] - nd['x'] = pos['x'] - nd['y'] = pos['y'] - nd['color'] = label_colors.get(primary, '#888') + pos = positions.get(nid, {"x": 0, "y": 0}) + primary = nd["labels"][0] + nd["x"] = pos["x"] + nd["y"] = pos["y"] + nd["color"] = label_colors.get(primary, "#888") d = degree.get(nid, 0) - nd['size'] = 3 + (d / max(max_deg, 1)) * 22 + nd["size"] = 3 + (d / max(max_deg, 1)) * 22 nodes_list.append(nd) - return jsonify({ - 'nodes': nodes_list, - 'edges': edges, - 'label_colors': label_colors, - 'records': [], - 'keys': [], - 'stats': { - 'node_count': len(nodes_list), - 'edge_count': len(edges), - 'labels': list(label_colors.keys()), - 'query_time_ms': 0, - 'layout_time_ms': round(t_layout * 1000), - }, - }) + return jsonify( + { + "nodes": nodes_list, + "edges": edges, + "label_colors": label_colors, + "records": [], + "keys": [], + "stats": { + "node_count": len(nodes_list), + "edge_count": len(edges), + "labels": list(label_colors.keys()), + "query_time_ms": 0, + "layout_time_ms": round(t_layout * 1000), + }, + } + ) # --------------------------------------------------------------------------- -if __name__ == '__main__': - app.run(debug=True, host='0.0.0.0', port=5555) +if __name__ == "__main__": + app.run(debug=True, host="0.0.0.0", port=5555) diff --git a/templates/index.html b/templates/index.html index d16dd3b..f03daf4 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,1887 +1,2130 @@ +
- - -