Source code for graphem.influence

"""
Influence maximization functionality for Graphem.
"""

import numpy as np
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics as ep



[docs]
def graphem_seed_selection(embedder, k, num_iterations=20):
    """
    Run the GraphEmbedder layout to get an embedding, then select
    k seeds by choosing the nodes with the highest radial distances.
    
    Parameters:
        embedder: GraphEmbedder
            The initialized graph embedder object
        k: int
            Number of seed nodes to select
        num_iterations: int
            Number of layout iterations to run
    
    Returns:
        seeds: list
            List of k vertices selected as seeds
    """
    # Run the layout algorithm
    embedder.run_layout(num_iterations=num_iterations)
    
    # Compute radial distances from the origin (0, 0, 0)
    positions = np.array(embedder.positions)
    radial_distances = np.linalg.norm(positions, axis=1)
    
    # Select the k nodes with the highest radial distances
    seeds = np.argsort(-radial_distances)[:k]
    
    return seeds.tolist()




[docs]
def ndlib_estimated_influence(G, seeds, p=0.1, iterations_count=200):
    """
    Run NDlib's Independent Cascades model on graph G, starting with the given seeds,
    and return the estimated final influence (number of nodes in state 2) and
    the number of iterations executed.
    
    Parameters:
        G: networkx.Graph
            The graph to run influence propagation on
        seeds: list
            The list of seed nodes
        p: float
            Propagation probability
        iterations_count: int
            Maximum number of simulation iterations
    
    Returns:
        influence: float
            The estimated influence (average number of influenced nodes)
        iterations: int
            The number of iterations run
    """
    # Configure the Independent Cascades model
    model = ep.IndependentCascadesModel(G)
    config = mc.Configuration()
    
    # Set edge propagation probabilities
    for e in G.edges():
        config.add_edge_configuration("threshold", e, p)
    
    # Initialize the model with configuration
    model.set_initial_status(config)
    
    # Set initial seeds to infected state
    for seed in seeds:
        config.add_node_configuration("status", seed, 1)
    
    # Run the simulation
    iterations = model.iteration_bunch(iterations_count)
    
    # Get the number of nodes in state 2 (influenced) at the end
    final_status = iterations[-1]['status']
    influenced_count = sum(1 for node_state in final_status.values() if node_state == 2)
    
    return influenced_count, len(iterations)




[docs]
def greedy_seed_selection(G, k, p=0.1, iterations_count=200):
    """
    Greedy seed selection using NDlib influence estimation.
    For each candidate node evaluation, it calls NDlib's simulation and accumulates
    the total number of iterations used across all evaluations.

    Returns:
        seeds: the selected seed set (list of nodes)
        total_iters: the total number of NDlib iterations run during selection.
    """
    seeds = []
    total_iters = 0
    n = G.number_of_nodes()
    
    # Create a copy of the graph for evaluations
    G_copy = G.copy()
    
    # Marginal gain of each node
    for _ in range(k):
        best_node = None
        best_influence = -1
        
        # Evaluate each node not already in the seed set
        for node in range(n):
            if node in seeds:
                continue
                
            # Evaluate influence with this node added to the seed set
            candidate_seeds = seeds + [node]
            influence, iters = ndlib_estimated_influence(G_copy, candidate_seeds, p, iterations_count)
            total_iters += iters
            
            # Update best node if influence is improved
            if influence > best_influence:
                best_influence = influence
                best_node = node
        
        # Add the best node to the seed set
        if best_node is not None:
            seeds.append(best_node)
    
    return seeds, total_iters