293 lines
10 KiB
Python
293 lines
10 KiB
Python
"""
|
|
Layout Engine - Efficient graph layout precomputation for large graphs.
|
|
|
|
Uses igraph (C-based) for maximum performance, with networkx fallback.
|
|
Supports multiple layout algorithms optimized for different graph sizes and structures.
|
|
"""
|
|
|
|
import numpy as np
|
|
import time
|
|
import logging
|
|
from collections import defaultdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import igraph as ig
|
|
HAS_IGRAPH = True
|
|
except ImportError:
|
|
HAS_IGRAPH = False
|
|
logger.warning("igraph not available, falling back to networkx layouts")
|
|
|
|
import networkx as nx
|
|
|
|
|
|
def compute_layout(nodes_dict, edges, algorithm='auto', spacing=1.0, iterations=300):
|
|
"""
|
|
Precompute node positions using efficient graph layout algorithms.
|
|
|
|
Strategy based on graph size:
|
|
- Small (<300 nodes): High-quality force-directed with many iterations
|
|
- Medium (300-3000): Force-directed with optimized parameters
|
|
- Large (3000-20000): Community-based layout (Louvain + per-community layout)
|
|
- Very large (>20000): DrL or spectral layout
|
|
|
|
Args:
|
|
spacing: Multiplier for target_range (1.0 = default, >1 = more spread)
|
|
iterations: Number of layout iterations (higher = better quality, slower)
|
|
|
|
Returns dict of {node_id: {'x': float, 'y': float}}
|
|
"""
|
|
if not nodes_dict:
|
|
return {}
|
|
|
|
start_time = time.time()
|
|
node_ids = list(nodes_dict.keys())
|
|
n = len(node_ids)
|
|
|
|
if n == 0:
|
|
return {}
|
|
|
|
if n == 1:
|
|
return {node_ids[0]: {'x': 0.0, 'y': 0.0}}
|
|
|
|
# Build adjacency
|
|
node_index = {nid: i for i, nid in enumerate(node_ids)}
|
|
edge_list = []
|
|
for edge in edges:
|
|
src = edge.get('source')
|
|
tgt = edge.get('target')
|
|
if src in node_index and tgt in node_index:
|
|
si, ti = node_index[src], node_index[tgt]
|
|
if si != ti: # skip self-loops for layout
|
|
edge_list.append((si, ti))
|
|
|
|
# Choose algorithm
|
|
if algorithm == 'auto':
|
|
if n > 20000:
|
|
algorithm = 'drl' if HAS_IGRAPH else 'spectral'
|
|
elif n > 3000:
|
|
algorithm = 'community'
|
|
elif n > 300:
|
|
algorithm = 'force_directed'
|
|
else:
|
|
algorithm = 'force_directed_hq'
|
|
|
|
logger.info(f"Computing layout for {n} nodes, {len(edge_list)} edges using '{algorithm}' (spacing={spacing}, iterations={iterations})")
|
|
|
|
if HAS_IGRAPH and algorithm != 'spectral':
|
|
positions = _layout_igraph(node_ids, edge_list, n, algorithm, iterations)
|
|
else:
|
|
positions = _layout_networkx(node_ids, edge_list, n, algorithm, iterations)
|
|
|
|
# Apply spacing multiplier by re-normalizing with scaled range
|
|
target_range = 2000 * spacing
|
|
elapsed = time.time() - start_time
|
|
logger.info(f"Layout computed in {elapsed:.2f}s")
|
|
|
|
# Re-normalize if spacing != 1.0
|
|
if abs(spacing - 1.0) > 0.01:
|
|
coords_list = [[positions[nid]['x'], positions[nid]['y']] for nid in node_ids]
|
|
coords = np.array(coords_list)
|
|
return _normalize_and_scale(node_ids, coords, target_range=target_range)
|
|
|
|
return positions
|
|
|
|
|
|
def _layout_igraph(node_ids, edge_list, n, algorithm, iterations=300):
|
|
"""Use igraph's C-based layout algorithms for maximum speed."""
|
|
g = ig.Graph(n=n, edges=edge_list, directed=False)
|
|
|
|
iters = max(50, int(iterations))
|
|
|
|
if algorithm == 'drl':
|
|
# DrL: Distributed Recursive Layout - excellent for very large graphs
|
|
layout = g.layout_drl()
|
|
elif algorithm == 'community':
|
|
# Community-based: detect communities, then layout
|
|
layout = _community_layout_igraph(g, iterations=iters)
|
|
elif algorithm == 'force_directed_hq':
|
|
# High quality Fruchterman-Reingold with more iterations
|
|
layout = g.layout_fruchterman_reingold(niter=max(iters, 500))
|
|
elif algorithm == 'fruchterman_reingold':
|
|
layout = g.layout_fruchterman_reingold(niter=iters)
|
|
elif algorithm == 'kamada_kawai':
|
|
if n < 1000:
|
|
layout = g.layout_kamada_kawai()
|
|
else:
|
|
layout = g.layout_fruchterman_reingold(niter=iters)
|
|
elif algorithm == 'circle':
|
|
layout = g.layout_circle()
|
|
else:
|
|
# Default: Fruchterman-Reingold
|
|
layout = g.layout_fruchterman_reingold(niter=iters)
|
|
|
|
coords = np.array(layout.coords)
|
|
return _normalize_and_scale(node_ids, coords)
|
|
|
|
|
|
def _community_layout_igraph(g, iterations=200):
|
|
"""
|
|
Community-based layout: detect communities with Louvain,
|
|
arrange communities in a circle, then layout nodes within each community.
|
|
"""
|
|
try:
|
|
communities = g.community_multilevel()
|
|
membership = communities.membership
|
|
except Exception:
|
|
return g.layout_fruchterman_reingold(niter=200)
|
|
|
|
n = g.vcount()
|
|
positions = np.zeros((n, 2))
|
|
|
|
# Group nodes by community
|
|
comm_nodes = defaultdict(list)
|
|
for i, comm_id in enumerate(membership):
|
|
comm_nodes[comm_id].append(i)
|
|
|
|
num_communities = len(comm_nodes)
|
|
|
|
# Arrange communities in a circle
|
|
comm_positions = {}
|
|
radius = max(500, num_communities * 50)
|
|
for idx, comm_id in enumerate(sorted(comm_nodes.keys())):
|
|
angle = 2 * np.pi * idx / max(num_communities, 1)
|
|
comm_positions[comm_id] = (radius * np.cos(angle), radius * np.sin(angle))
|
|
|
|
# Layout nodes within each community
|
|
for comm_id, node_indices in comm_nodes.items():
|
|
cx, cy = comm_positions[comm_id]
|
|
|
|
if len(node_indices) == 1:
|
|
positions[node_indices[0]] = [cx, cy]
|
|
continue
|
|
|
|
# Create subgraph
|
|
subgraph = g.subgraph(node_indices)
|
|
|
|
# Use FR layout for the subgraph
|
|
sub_layout = subgraph.layout_fruchterman_reingold(niter=iterations)
|
|
sub_coords = np.array(sub_layout.coords)
|
|
|
|
# Scale based on community size
|
|
scale = max(30, np.sqrt(len(node_indices)) * 15)
|
|
if sub_coords.std() > 0:
|
|
sub_coords = (sub_coords - sub_coords.mean(axis=0)) / max(sub_coords.std(), 1e-6) * scale
|
|
|
|
# Offset to community position
|
|
for local_idx, global_idx in enumerate(node_indices):
|
|
positions[global_idx] = [cx + sub_coords[local_idx][0], cy + sub_coords[local_idx][1]]
|
|
|
|
return ig.Layout(positions.tolist())
|
|
|
|
|
|
def _layout_networkx(node_ids, edge_list, n, algorithm, iterations=300):
|
|
"""Fallback to networkx layouts."""
|
|
G = nx.Graph()
|
|
G.add_nodes_from(range(n))
|
|
G.add_edges_from(edge_list)
|
|
|
|
iters = max(50, int(iterations))
|
|
|
|
if algorithm == 'spectral':
|
|
try:
|
|
pos = nx.spectral_layout(G, dim=2, scale=1000)
|
|
except Exception:
|
|
pos = nx.spring_layout(G, seed=42, scale=1000, iterations=iters)
|
|
elif algorithm in ('force_directed_hq', 'force_directed'):
|
|
k = 2.0 / np.sqrt(max(n, 1))
|
|
pos = nx.spring_layout(G, k=k, iterations=iters, scale=1000, seed=42)
|
|
elif algorithm == 'community':
|
|
pos = _community_layout_networkx(G, n)
|
|
elif algorithm == 'kamada_kawai':
|
|
if n < 500:
|
|
pos = nx.kamada_kawai_layout(G, scale=1000)
|
|
else:
|
|
pos = nx.spring_layout(G, seed=42, scale=1000, iterations=iters)
|
|
elif algorithm == 'circle':
|
|
pos = nx.circular_layout(G, scale=1000)
|
|
else:
|
|
k = 2.0 / np.sqrt(max(n, 1))
|
|
pos = nx.spring_layout(G, k=k, iterations=iters, scale=1000, seed=42)
|
|
|
|
coords = np.array([pos[i] for i in range(n)])
|
|
return _normalize_and_scale(node_ids, coords)
|
|
|
|
|
|
def _community_layout_networkx(G, n):
|
|
"""Community-based layout using networkx."""
|
|
try:
|
|
from networkx.algorithms.community import greedy_modularity_communities
|
|
communities = list(greedy_modularity_communities(G))
|
|
except Exception:
|
|
return nx.spring_layout(G, seed=42, scale=1000, iterations=200)
|
|
|
|
positions = {}
|
|
num_communities = len(communities)
|
|
radius = max(500, num_communities * 50)
|
|
|
|
for idx, comm in enumerate(communities):
|
|
angle = 2 * np.pi * idx / max(num_communities, 1)
|
|
cx = radius * np.cos(angle)
|
|
cy = radius * np.sin(angle)
|
|
|
|
comm_nodes = list(comm)
|
|
if len(comm_nodes) == 1:
|
|
positions[comm_nodes[0]] = np.array([cx, cy])
|
|
continue
|
|
|
|
subG = G.subgraph(comm_nodes)
|
|
sub_pos = nx.spring_layout(subG, seed=42, scale=max(20, np.sqrt(len(comm_nodes)) * 15), iterations=200)
|
|
|
|
for node, pos in sub_pos.items():
|
|
positions[node] = pos + np.array([cx, cy])
|
|
|
|
return positions
|
|
|
|
|
|
def _normalize_and_scale(node_ids, coords, target_range=2000):
|
|
"""Normalize coordinates to a centered range for consistent rendering."""
|
|
if len(coords) == 0:
|
|
return {}
|
|
|
|
# Center
|
|
center = coords.mean(axis=0)
|
|
coords = coords - center
|
|
|
|
# Scale to target range
|
|
max_extent = max(np.abs(coords).max(), 1e-6)
|
|
coords = coords / max_extent * (target_range / 2)
|
|
|
|
# Add slight jitter to prevent perfect overlaps
|
|
jitter = np.random.RandomState(42).uniform(-1, 1, coords.shape) * 2
|
|
coords += jitter
|
|
|
|
positions = {}
|
|
for i, node_id in enumerate(node_ids):
|
|
positions[node_id] = {'x': float(coords[i][0]), 'y': float(coords[i][1])}
|
|
|
|
return positions
|
|
|
|
|
|
def get_available_algorithms():
|
|
"""Return list of available layout algorithms."""
|
|
algos = [
|
|
{'id': 'auto', 'name': 'Auto (best for size)', 'description': 'Automatically selects based on graph size'},
|
|
{'id': 'force_directed', 'name': 'Force-Directed', 'description': 'Classic spring-electric layout'},
|
|
{'id': 'force_directed_hq', 'name': 'Force-Directed (HQ)', 'description': 'Higher quality, more iterations'},
|
|
{'id': 'community', 'name': 'Community-Based', 'description': 'Groups by community detection'},
|
|
{'id': 'circle', 'name': 'Circular', 'description': 'Nodes arranged in a circle'},
|
|
]
|
|
|
|
if HAS_IGRAPH:
|
|
algos.extend([
|
|
{'id': 'drl', 'name': 'DrL (Large Graphs)', 'description': 'Distributed Recursive Layout for 10k+ nodes'},
|
|
{'id': 'kamada_kawai', 'name': 'Kamada-Kawai', 'description': 'Energy-based layout (small graphs)'},
|
|
])
|
|
else:
|
|
algos.append(
|
|
{'id': 'spectral', 'name': 'Spectral', 'description': 'Eigenvalue-based layout (fast for large graphs)'},
|
|
)
|
|
|
|
return algos
|