Source code for graphem.influence

"""
Influence maximization functionality for Graphem.
"""

import numpy as np
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics as ep


[docs] def graphem_seed_selection(embedder, k, num_iterations=20): """ Run the GraphEmbedder layout to get an embedding, then select k seeds by choosing the nodes with the highest radial distances. Parameters: embedder: GraphEmbedder The initialized graph embedder object k: int Number of seed nodes to select num_iterations: int Number of layout iterations to run Returns: seeds: list List of k vertices selected as seeds """ # Run the layout algorithm embedder.run_layout(num_iterations=num_iterations) # Compute radial distances from the origin (0, 0, 0) positions = np.array(embedder.positions) radial_distances = np.linalg.norm(positions, axis=1) # Select the k nodes with the highest radial distances seeds = np.argsort(-radial_distances)[:k] return seeds.tolist()
[docs] def ndlib_estimated_influence(G, seeds, p=0.1, iterations_count=200): """ Run NDlib's Independent Cascades model on graph G, starting with the given seeds, and return the estimated final influence (number of nodes in state 2) and the number of iterations executed. Parameters: G: networkx.Graph The graph to run influence propagation on seeds: list The list of seed nodes p: float Propagation probability iterations_count: int Maximum number of simulation iterations Returns: influence: float The estimated influence (average number of influenced nodes) iterations: int The number of iterations run """ # Configure the Independent Cascades model model = ep.IndependentCascadesModel(G) config = mc.Configuration() # Set edge propagation probabilities for e in G.edges(): config.add_edge_configuration("threshold", e, p) # Initialize the model with configuration model.set_initial_status(config) # Set initial seeds to infected state for seed in seeds: config.add_node_configuration("status", seed, 1) # Run the simulation iterations = model.iteration_bunch(iterations_count) # Get the number of nodes in state 2 (influenced) at the end final_status = iterations[-1]['status'] influenced_count = sum(1 for node_state in final_status.values() if node_state == 2) return influenced_count, len(iterations)
[docs] def greedy_seed_selection(G, k, p=0.1, iterations_count=200): """ Greedy seed selection using NDlib influence estimation. For each candidate node evaluation, it calls NDlib's simulation and accumulates the total number of iterations used across all evaluations. Returns: seeds: the selected seed set (list of nodes) total_iters: the total number of NDlib iterations run during selection. """ seeds = [] total_iters = 0 n = G.number_of_nodes() # Create a copy of the graph for evaluations G_copy = G.copy() # Marginal gain of each node for _ in range(k): best_node = None best_influence = -1 # Evaluate each node not already in the seed set for node in range(n): if node in seeds: continue # Evaluate influence with this node added to the seed set candidate_seeds = seeds + [node] influence, iters = ndlib_estimated_influence(G_copy, candidate_seeds, p, iterations_count) total_iters += iters # Update best node if influence is improved if influence > best_influence: best_influence = influence best_node = node # Add the best node to the seed set if best_node is not None: seeds.append(best_node) return seeds, total_iters