Source code for src.metrics.evaluation_metrics

"""Evaluation metrics for text embeddings based on similarity scores.

This module provides functions for calculating clustering quality metrics
on embedding spaces, particularly silhouette-based metrics that decompose
into intra-cluster cohesion and inter-cluster separation.

Example:
    Calculate metrics for document embeddings::

        from src.metrics.evaluation_metrics import calculate_all_metrics

        metrics = calculate_all_metrics(ref_embeddings, doc_embeddings, doc_labels)
        print(f"Silhouette score: {metrics['silhouette_score']}")
"""

import numpy as np
from sklearn.metrics import pairwise_distances, silhouette_score

from .silhouette_decomposition import enhanced_silhouette_analysis


[docs] def calculate_silhouette_metrics( embeddings: np.ndarray, labels: np.ndarray, metric: str = "cosine" ) -> dict[str, float]: """Calculate silhouette-based clustering metrics with normalized components. This function decomposes the silhouette score into its constituent parts: intra-cluster distance (cohesion) and inter-cluster distance (separation), providing normalized versions for better interpretability. Args: embeddings: The embeddings of the text chunks, shape (n_samples, n_dims). labels: The theme label for each chunk, shape (n_samples,). metric: Distance metric to use for calculations. Defaults to 'cosine'. Returns: Dictionary containing: - intra_cluster_distance_normalized: Normalized intra-cluster quality (0-1, higher is better). - inter_cluster_distance_normalized: Normalized inter-cluster separation (0-1, higher is better). - silhouette_score: Standard silhouette score (-1 to 1, higher is better). """ if len(np.unique(labels)) < 2: return { "intra_cluster_distance_normalized": 0.0, "inter_cluster_distance_normalized": 0.0, "silhouette_score": -1.0, } # Calculate distance matrix distance_matrix = pairwise_distances(embeddings, metric=metric) n_samples = len(embeddings) a_values: list[float] = [] # intra-cluster distances b_values: list[float] = [] # inter-cluster distances unique_labels = np.unique(labels) for i in range(n_samples): current_label = labels[i] # a(i): Average intra-cluster distance same_cluster_mask = (labels == current_label) & (np.arange(n_samples) != i) if np.sum(same_cluster_mask) > 0: a_i = np.mean(distance_matrix[i][same_cluster_mask]) else: a_i = 0.0 a_values.append(float(a_i)) # b(i): Average distance to nearest different cluster b_i = np.inf for other_label in unique_labels: if other_label != current_label: other_cluster_mask = labels == other_label if np.sum(other_cluster_mask) > 0: mean_dist_to_other = np.mean(distance_matrix[i][other_cluster_mask]) b_i = min(b_i, mean_dist_to_other) if b_i == np.inf: b_i = 0.0 b_values.append(float(b_i)) a_values_arr = np.array(a_values) b_values_arr = np.array(b_values) # Calculate silhouette score using sklearn for robustness try: silhouette_computed = silhouette_score(embeddings, labels, metric=metric) except (ValueError, IndexError): silhouette_computed = -1.0 # Normalize metrics for interpretability max_possible_distance = ( np.max(distance_matrix) if np.max(distance_matrix) > 0 else 1.0 ) # Intra-cluster quality: 1 - (average_distance / max_distance) # Higher values indicate better cohesion (points closer within clusters) intra_normalized = 1 - (np.mean(a_values_arr) / max_possible_distance) # Inter-cluster separation: average_distance / max_distance # Higher values indicate better separation (clusters farther apart) inter_normalized = np.mean(b_values_arr) / max_possible_distance return { "intra_cluster_distance_normalized": float(max(0.0, float(intra_normalized))), "inter_cluster_distance_normalized": float(inter_normalized), "silhouette_score": float(silhouette_computed), }
[docs] def calculate_all_metrics( ref_embeddings: np.ndarray, doc_embeddings: np.ndarray, doc_labels: np.ndarray, ) -> dict[str, float]: """Calculate minimal essential evaluation metrics. This function computes only the core metrics needed for clustering evaluation: silhouette score and its decomposition (intra/inter cluster distances). Args: ref_embeddings: Embeddings for reference themes, shape (n_themes, n_dims). doc_embeddings: Embeddings for document chunks, shape (n_chunks, n_dims). doc_labels: Theme labels for each document chunk, shape (n_chunks,). Returns: Dictionary containing silhouette-based metrics with keys: - silhouette_score - intra_cluster_distance_normalized - inter_cluster_distance_normalized """ all_metrics: dict[str, float] = {} # Enhanced silhouette analysis for clustering quality if len(np.unique(doc_labels)) > 1: silhouette_analysis = enhanced_silhouette_analysis(doc_embeddings, doc_labels) global_metrics = silhouette_analysis["global_metrics"] all_metrics.update( { "silhouette_score": global_metrics["silhouette_score"], "intra_cluster_distance_normalized": global_metrics[ "intra_cluster_quality" ], "inter_cluster_distance_normalized": global_metrics[ "inter_cluster_separation" ], } ) else: # Provide default values if silhouette score cannot be computed all_metrics.update( { "silhouette_score": -1.0, "intra_cluster_distance_normalized": 0.0, "inter_cluster_distance_normalized": 0.0, } ) return all_metrics