Source code for hiper.metrics.entropy_loss

# -*- coding: utf-8 -*-
"""
entropy_loss.py

This module implements the Entropy Loss (EL) metric for measuring
how perturbations affect the entropy of various distributions in a hypergraph.
"""

from collections import Counter
from typing import TYPE_CHECKING, List

import numpy as np

if TYPE_CHECKING:
    from hiper.core import Hypernetwork



[docs]
class EntropyLoss:
    """
    Computes the Entropy Loss (EL) metric.

    The Entropy Loss measures how much entropy is lost in a distribution
    after a perturbation:

    :math:`\\text{EL} = H(P^{\\text{pre}}) - H(P^{\\text{post}})`

    where:
    - :math:`H(P) = -\\sum p_i \\log(p_i)` is the Shannon entropy
    - :math:`P^{\\text{pre}}` is the distribution before perturbation
    - :math:`P^{\\text{post}}` is the distribution after perturbation

    Positive values indicate entropy loss (reduced diversity),
    negative values indicate entropy gain (increased diversity),
    and zero indicates no change in entropy.

    The metric can be applied to various distributions such as:
    - Node degree distribution
    - Hyperedge size distribution
    - Hyperedge degree distribution
    """


[docs]
    def __init__(self, distribution_type: str = 'node_degree',
                 log_base: float = 2.0):
        """
        Initialize the Entropy Loss metric.

        Args:
            distribution_type: Type of distribution to analyze
            log_base: Base for logarithm in entropy calculation
        """
        self.name = "Entropy Loss"
        self.symbol = "EL"
        self.distribution_type = distribution_type
        self.log_base = log_base

        valid_types = ['node_degree', 'hyperedge_size', 'hyperedge_degree']
        if distribution_type not in valid_types:
            raise ValueError(f"distribution_type must be one of {valid_types}")



[docs]
    def compute(self,
                before_hypergraph: 'Hypernetwork',
                after_hypergraph: 'Hypernetwork') -> float:
        """
        Compute the Entropy Loss metric.

        Args:
            before_hypergraph: The hypergraph before perturbation.
            after_hypergraph: The hypergraph after perturbation.

        Returns:
            The entropy loss value (positive = loss, negative = gain).
        """
        # Extract distributions
        dist_before = self._extract_distribution(before_hypergraph)
        dist_after = self._extract_distribution(after_hypergraph)

        # Compute entropies
        entropy_before = self._compute_shannon_entropy(dist_before)
        entropy_after = self._compute_shannon_entropy(dist_after)

        # Return entropy loss (before - after)
        return entropy_before - entropy_after


    def _extract_distribution(self, hypergraph: 'Hypernetwork') -> List[int]:
        """Extract the specified distribution from the hypergraph."""
        if self.distribution_type == 'node_degree':
            return self._get_node_degree_distribution(hypergraph)
        elif self.distribution_type == 'hyperedge_size':
            return self._get_hyperedge_size_distribution(hypergraph)
        elif self.distribution_type == 'hyperedge_degree':
            return self._get_hyperedge_degree_distribution(hypergraph)
        else:
            raise ValueError(
                f"Unknown distribution type: {self.distribution_type}")

    @staticmethod
    def _get_node_degree_distribution(hypergraph: 'Hypernetwork') -> List[int]:
        """Get the degree of each node (number of hyperedges it belongs to)."""
        degrees = []
        for node in hypergraph.nodes.keys():
            degree = len(hypergraph.get_hyperedges(node))
            degrees.append(degree)
        return degrees

    @staticmethod
    def _get_hyperedge_size_distribution(hypergraph: 'Hypernetwork') -> List[
        int]:
        """Get the size of each hyperedge (number of nodes it contains)."""
        sizes = []
        for he_id in hypergraph.edges.keys():
            size = len(hypergraph.get_nodes(he_id))
            sizes.append(size)
        return sizes

    @staticmethod
    def _get_hyperedge_degree_distribution(hypergraph: 'Hypernetwork') -> List[
        int]:
        """
        Get the degree of each hyperedge (number of other hyperedges it shares
        nodes with).
        """
        degrees = []
        hyperedge_ids = list(hypergraph.edges.keys())

        for he_id in hyperedge_ids:
            he_nodes = set(hypergraph.get_nodes(he_id))
            degree = 0

            # Count hyperedges that share at least one node
            for other_he_id in hyperedge_ids:
                if other_he_id != he_id:
                    other_nodes = set(hypergraph.get_nodes(other_he_id))
                    if he_nodes.intersection(other_nodes):
                        degree += 1

            degrees.append(degree)
        return degrees

    def _compute_shannon_entropy(self, distribution: List[int]) -> float:
        """
        Compute Shannon entropy of a distribution.

        Args:
            distribution: List of values representing the distribution.

        Returns:
            Shannon entropy value.
        """
        if not distribution:
            return 0.0

        # Count frequencies
        counts = Counter(distribution)
        total = len(distribution)

        # Compute probabilities and entropy
        entropy = 0.0
        for count in counts.values():
            if count > 0:
                probability = count / total
                entropy -= probability * np.log(probability) / np.log(
                    self.log_base)

        return entropy


[docs]
    def compute_detailed(self,
                         before_hypergraph: 'Hypernetwork',
                         after_hypergraph: 'Hypernetwork') -> dict:
        """
        Compute detailed entropy analysis.

        Args:
            before_hypergraph: The hypergraph before perturbation.
            after_hypergraph: The hypergraph after perturbation.

        Returns:
            Dictionary containing detailed entropy analysis.
        """
        # Extract distributions
        dist_before = self._extract_distribution(before_hypergraph)
        dist_after = self._extract_distribution(after_hypergraph)

        # Compute statistics
        entropy_before = self._compute_shannon_entropy(dist_before)
        entropy_after = self._compute_shannon_entropy(dist_after)
        entropy_loss = entropy_before - entropy_after

        # Distribution statistics
        counts_before = Counter(dist_before)
        counts_after = Counter(dist_after)

        return {
            'entropy_loss': entropy_loss,
            'entropy_before': entropy_before,
            'entropy_after': entropy_after,
            'distribution_type': self.distribution_type,
            'log_base': self.log_base,
            'values_before': dist_before,
            'values_after': dist_after,
            'unique_values_before': len(counts_before),
            'unique_values_after': len(counts_after),
            'total_values_before': len(dist_before),
            'total_values_after': len(dist_after),
            'frequency_counts_before': dict(counts_before),
            'frequency_counts_after': dict(counts_after),
            'mean_before': np.mean(dist_before) if dist_before else 0.0,
            'mean_after': np.mean(dist_after) if dist_after else 0.0,
            'std_before': np.std(dist_before) if dist_before else 0.0,
            'std_after': np.std(dist_after) if dist_after else 0.0
        }



[docs]
    def compute_multiple_distributions(self,
                                       before_hypergraph: 'Hypernetwork',
                                       after_hypergraph: 'Hypernetwork') \
            -> dict:
        """
        Compute entropy loss for multiple distribution types.

        Args:
            before_hypergraph: The hypergraph before perturbation.
            after_hypergraph: The hypergraph after perturbation.

        Returns:
            Dictionary containing entropy loss for each distribution type.
        """
        distribution_types = ['node_degree', 'hyperedge_size',
                              'hyperedge_degree']
        results = {}

        original_type = self.distribution_type

        for dist_type in distribution_types:
            self.distribution_type = dist_type
            try:
                entropy_loss = self.compute(before_hypergraph, after_hypergraph)
                results[dist_type] = entropy_loss
            except (ValueError, ZeroDivisionError):
                results[dist_type] = np.nan

        # Restore original distribution type
        self.distribution_type = original_type

        return results



[docs]
    def compute_relative_entropy_loss(self,
                                      before_hypergraph: 'Hypernetwork',
                                      after_hypergraph: 'Hypernetwork') \
            -> float:
        """
        Compute relative entropy loss (normalized by initial entropy).

        Args:
            before_hypergraph: The hypergraph before perturbation.
            after_hypergraph: The hypergraph after perturbation.

        Returns:
            Relative entropy loss in [0, 1] or negative for entropy gain.
        """
        dist_before = self._extract_distribution(before_hypergraph)
        dist_after = self._extract_distribution(after_hypergraph)

        entropy_before = self._compute_shannon_entropy(dist_before)
        entropy_after = self._compute_shannon_entropy(dist_after)

        if entropy_before == 0:
            return 0.0 if entropy_after == 0 else -float('inf')

        return (entropy_before - entropy_after) / entropy_before



[docs]
    def __str__(self) -> str:
        """String representation of the metric."""
        return f"{self.name} ({self.symbol}) - {self.distribution_type}"



[docs]
    def __repr__(self) -> str:
        """Detailed string representation."""
        return (f"EntropyLoss(name='{self.name}', symbol='{self.symbol}', "
                f"distribution_type='{self.distribution_type}', "
                f"log_base={self.log_base})")