Remove deprecated consensus and utility functions

cafferychen777 · cafferychen777 · commit 2865f9d1f9be · 2026-01-26T03:25:31.000-06:00
Eliminated the unused 'find_agreement', 'facilitate_cluster_discussion', and 'summarize_discussion' functions from the codebase and updated imports and references accordingly. Refactored cache management to use the improved get_cache_stats for basic cache info, and streamlined consensus processing logic.
diff --git a/python/examples/consensus_example.py b/python/examples/consensus_example.py
@@ -11,7 +11,6 @@
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "python"))
 
 from mllmcelltype.consensus import check_consensus
-from mllmcelltype.utils import find_agreement
 
 # Sample data for testing
 test_predictions = {
@@ -105,11 +104,6 @@ def simple_consensus_check_prompt(annotations):
     print("\nGenerated prompt for consensus check:")
     print(prompt)
 
-    # Test find_agreement function
-    print("\nTesting find_agreement function:")
-    consensus, consensus_proportion, entropy = find_agreement(test_predictions)
-    print_results("Results from find_agreement", consensus, consensus_proportion, entropy)
-
     # Test check_consensus function
     print("\nTesting check_consensus function:")
     consensus, consensus_proportion, entropy, controversial = check_consensus(test_predictions)
diff --git a/python/mllmcelltype/__init__.py b/python/mllmcelltype/__init__.py
@@ -4,11 +4,9 @@
 from .cache_manager import get_cache_info
 from .consensus import (
     check_consensus,
-    facilitate_cluster_discussion,
     interactive_consensus_annotation,
     print_consensus_summary,
     process_controversial_clusters,
-    summarize_discussion,
 )
 from .functions import get_provider
 from .logger import setup_logging, write_log
@@ -27,7 +25,6 @@
     clean_annotation,
     clear_cache,
     create_cache_key,
-    find_agreement,
     format_results,
     get_cache_stats,
     load_api_key,
@@ -57,7 +54,6 @@
     "get_cache_stats",
     "get_cache_info",
     "format_results",
-    "find_agreement",
     # Prompts
     "create_prompt",
     "create_batch_prompt",
@@ -68,8 +64,6 @@
     "process_controversial_clusters",
     "interactive_consensus_annotation",
     "print_consensus_summary",
-    "facilitate_cluster_discussion",
-    "summarize_discussion",
     # URL utilities
     "resolve_provider_base_url",
     "get_default_api_url",
diff --git a/python/mllmcelltype/cache_manager.py b/python/mllmcelltype/cache_manager.py
@@ -5,24 +5,24 @@
 
 Functions:
     clear_mllmcelltype_cache(): Interactive cache clearing
-    get_cache_info(): Get information about current cache
+    get_cache_info(): Get basic information about current cache (delegates to get_cache_stats)
     clear_cache_cli(): Command-line interface for cache management
 """
 
 import os
 import shutil
 
+from .utils import get_cache_stats
+
 
 def clear_mllmcelltype_cache():
     """Clear the mLLMCelltype cache directory."""
-    cache_dir = os.path.join(os.path.expanduser("~"), ".mllmcelltype", "cache")
+    info = get_cache_stats(detailed=False)
+    cache_dir = info["path"]
 
-    if os.path.exists(cache_dir):
+    if info["exists"]:
         print(f"Found cache directory: {cache_dir}")
-
-        # Count cache files
-        cache_files = [f for f in os.listdir(cache_dir) if f.endswith(".json")]
-        print(f"Found {len(cache_files)} cache files")
+        print(f"Found {info['count']} cache files")
 
         # Ask for confirmation
         response = input("Do you want to clear all cache files? (yes/no): ")
@@ -37,22 +37,21 @@ def clear_mllmcelltype_cache():
 
 
 def get_cache_info():
-    """Get information about the current cache state."""
-    cache_dir = os.path.join(os.path.expanduser("~"), ".mllmcelltype", "cache")
-
-    if not os.path.exists(cache_dir):
-        return {"exists": False, "path": cache_dir, "file_count": 0, "total_size": 0}
-
-    cache_files = [f for f in os.listdir(cache_dir) if f.endswith(".json")]
-    total_size = sum(os.path.getsize(os.path.join(cache_dir, f)) for f in cache_files)
-
-    return {
-        "exists": True,
-        "path": cache_dir,
-        "file_count": len(cache_files),
-        "total_size": total_size,
-        "size_mb": total_size / (1024 * 1024),
-    }
+    """Get basic information about the current cache state.
+
+    This is a convenience wrapper around get_cache_stats(detailed=False).
+    For detailed statistics including provider counts and timestamps,
+    use get_cache_stats() instead.
+
+    Returns:
+        dict: Cache info with keys: exists, path, count, size, size_mb
+              (also includes file_count and total_size for backward compatibility)
+    """
+    stats = get_cache_stats(detailed=False)
+    # Add backward-compatible keys
+    stats["file_count"] = stats["count"]
+    stats["total_size"] = stats["size"]
+    return stats
 
 
 def clear_cache_cli():
diff --git a/python/mllmcelltype/consensus.py b/python/mllmcelltype/consensus.py
@@ -707,15 +707,9 @@ def process_controversial_clusters(
                             level="info",
                         )
 
-                        # Extract CP and H from the discussion if available
-                        cp_value, h_value = _extract_metrics_from_text(response)[:2]
-                        if cp_value is not None and h_value is not None:
-                            updated_consensus_proportion[cluster_id] = cp_value
-                            updated_entropy[cluster_id] = h_value
-                        else:
-                            # If not found in discussion, set high consensus values
-                            updated_consensus_proportion[cluster_id] = 1.0
-                            updated_entropy[cluster_id] = 0.0
+                        # Use already computed cp_value and h_value from above
+                        updated_consensus_proportion[cluster_id] = cp_value
+                        updated_entropy[cluster_id] = h_value
 
                         rounds_history.append(
                             f"Consensus reached in round {current_round}\n"
@@ -1333,84 +1327,3 @@ def print_consensus_summary(result: dict[str, Any]) -> None:
             print()
 
 
-def facilitate_cluster_discussion(
-    cluster_id: str,
-    marker_genes: list[str],
-    model_votes: dict[str, str],
-    species: str,
-    tissue: Optional[str] = None,
-    provider: str = "openai",
-    model: Optional[str] = None,
-    api_key: Optional[str] = None,
-    use_cache: bool = True,
-    base_url: Optional[str] = None,
-) -> str:
-    """Facilitate a discussion between different model predictions for a controversial cluster.
-
-    Args:
-        cluster_id: ID of the cluster
-        marker_genes: List of marker genes for the cluster
-        model_votes: Dictionary mapping model names to cell type annotations
-        species: Species name (e.g., 'human', 'mouse')
-        tissue: Optional tissue name (e.g., 'brain', 'liver')
-        provider: LLM provider for the discussion
-        model: Model name for the discussion
-        api_key: API key for the provider
-        use_cache: Whether to use cache
-
-    Returns:
-        str: Discussion result
-
-    """
-    # Generate discussion prompt
-    prompt = create_discussion_prompt(
-        cluster_id=cluster_id,
-        marker_genes=marker_genes,
-        model_votes=model_votes,
-        species=species,
-        tissue=tissue,
-    )
-
-    # Get response
-    response = get_model_response(prompt, provider, model, api_key, use_cache, base_url=base_url)
-
-    # Extract final decision
-    cell_type = extract_cell_type_from_discussion(response)
-
-    # Return the full discussion and the extracted cell type
-    return f"{response}\n\nFINAL DETERMINATION: {cell_type}"
-
-
-def summarize_discussion(discussion: str) -> str:
-    """Summarize a model discussion about cell type annotation.
-
-    Args:
-        discussion: Full discussion text
-
-    Returns:
-        str: Summary of the discussion
-
-    """
-    # Extract key points from the discussion
-    lines = discussion.strip().split("\n")
-    summary_lines = []
-
-    # Look for common summary indicators
-    for line in lines:
-        line = line.strip()
-        if line.lower().startswith(
-            ("conclusion", "summary", "final", "therefore", "overall", "in summary")
-        ):
-            summary_lines.append(line)
-
-    # If we found summary lines, join them
-    if summary_lines:
-        return "\n".join(summary_lines)
-
-    # Otherwise, extract the final decision
-    cell_type = extract_cell_type_from_discussion(discussion)
-    if cell_type:
-        return f"Final cell type determination: {cell_type}"
-
-    # If all else fails, return the last few lines
-    return "\n".join(lines[-3:])
diff --git a/python/mllmcelltype/utils.py b/python/mllmcelltype/utils.py