Skip to content

Commit 2865f9d

Browse files
Remove deprecated consensus and utility functions
Eliminated the unused 'find_agreement', 'facilitate_cluster_discussion', and 'summarize_discussion' functions from the codebase and updated imports and references accordingly. Refactored cache management to use the improved get_cache_stats for basic cache info, and streamlined consensus processing logic.
1 parent 0cfee8e commit 2865f9d

5 files changed

Lines changed: 67 additions & 235 deletions

File tree

python/examples/consensus_example.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "python"))
1212

1313
from mllmcelltype.consensus import check_consensus
14-
from mllmcelltype.utils import find_agreement
1514

1615
# Sample data for testing
1716
test_predictions = {
@@ -105,11 +104,6 @@ def simple_consensus_check_prompt(annotations):
105104
print("\nGenerated prompt for consensus check:")
106105
print(prompt)
107106

108-
# Test find_agreement function
109-
print("\nTesting find_agreement function:")
110-
consensus, consensus_proportion, entropy = find_agreement(test_predictions)
111-
print_results("Results from find_agreement", consensus, consensus_proportion, entropy)
112-
113107
# Test check_consensus function
114108
print("\nTesting check_consensus function:")
115109
consensus, consensus_proportion, entropy, controversial = check_consensus(test_predictions)

python/mllmcelltype/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44
from .cache_manager import get_cache_info
55
from .consensus import (
66
check_consensus,
7-
facilitate_cluster_discussion,
87
interactive_consensus_annotation,
98
print_consensus_summary,
109
process_controversial_clusters,
11-
summarize_discussion,
1210
)
1311
from .functions import get_provider
1412
from .logger import setup_logging, write_log
@@ -27,7 +25,6 @@
2725
clean_annotation,
2826
clear_cache,
2927
create_cache_key,
30-
find_agreement,
3128
format_results,
3229
get_cache_stats,
3330
load_api_key,
@@ -57,7 +54,6 @@
5754
"get_cache_stats",
5855
"get_cache_info",
5956
"format_results",
60-
"find_agreement",
6157
# Prompts
6258
"create_prompt",
6359
"create_batch_prompt",
@@ -68,8 +64,6 @@
6864
"process_controversial_clusters",
6965
"interactive_consensus_annotation",
7066
"print_consensus_summary",
71-
"facilitate_cluster_discussion",
72-
"summarize_discussion",
7367
# URL utilities
7468
"resolve_provider_base_url",
7569
"get_default_api_url",

python/mllmcelltype/cache_manager.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,24 @@
55
66
Functions:
77
clear_mllmcelltype_cache(): Interactive cache clearing
8-
get_cache_info(): Get information about current cache
8+
get_cache_info(): Get basic information about current cache (delegates to get_cache_stats)
99
clear_cache_cli(): Command-line interface for cache management
1010
"""
1111

1212
import os
1313
import shutil
1414

15+
from .utils import get_cache_stats
16+
1517

1618
def clear_mllmcelltype_cache():
1719
"""Clear the mLLMCelltype cache directory."""
18-
cache_dir = os.path.join(os.path.expanduser("~"), ".mllmcelltype", "cache")
20+
info = get_cache_stats(detailed=False)
21+
cache_dir = info["path"]
1922

20-
if os.path.exists(cache_dir):
23+
if info["exists"]:
2124
print(f"Found cache directory: {cache_dir}")
22-
23-
# Count cache files
24-
cache_files = [f for f in os.listdir(cache_dir) if f.endswith(".json")]
25-
print(f"Found {len(cache_files)} cache files")
25+
print(f"Found {info['count']} cache files")
2626

2727
# Ask for confirmation
2828
response = input("Do you want to clear all cache files? (yes/no): ")
@@ -37,22 +37,21 @@ def clear_mllmcelltype_cache():
3737

3838

3939
def get_cache_info():
40-
"""Get information about the current cache state."""
41-
cache_dir = os.path.join(os.path.expanduser("~"), ".mllmcelltype", "cache")
42-
43-
if not os.path.exists(cache_dir):
44-
return {"exists": False, "path": cache_dir, "file_count": 0, "total_size": 0}
45-
46-
cache_files = [f for f in os.listdir(cache_dir) if f.endswith(".json")]
47-
total_size = sum(os.path.getsize(os.path.join(cache_dir, f)) for f in cache_files)
48-
49-
return {
50-
"exists": True,
51-
"path": cache_dir,
52-
"file_count": len(cache_files),
53-
"total_size": total_size,
54-
"size_mb": total_size / (1024 * 1024),
55-
}
40+
"""Get basic information about the current cache state.
41+
42+
This is a convenience wrapper around get_cache_stats(detailed=False).
43+
For detailed statistics including provider counts and timestamps,
44+
use get_cache_stats() instead.
45+
46+
Returns:
47+
dict: Cache info with keys: exists, path, count, size, size_mb
48+
(also includes file_count and total_size for backward compatibility)
49+
"""
50+
stats = get_cache_stats(detailed=False)
51+
# Add backward-compatible keys
52+
stats["file_count"] = stats["count"]
53+
stats["total_size"] = stats["size"]
54+
return stats
5655

5756

5857
def clear_cache_cli():

python/mllmcelltype/consensus.py

Lines changed: 3 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -707,15 +707,9 @@ def process_controversial_clusters(
707707
level="info",
708708
)
709709

710-
# Extract CP and H from the discussion if available
711-
cp_value, h_value = _extract_metrics_from_text(response)[:2]
712-
if cp_value is not None and h_value is not None:
713-
updated_consensus_proportion[cluster_id] = cp_value
714-
updated_entropy[cluster_id] = h_value
715-
else:
716-
# If not found in discussion, set high consensus values
717-
updated_consensus_proportion[cluster_id] = 1.0
718-
updated_entropy[cluster_id] = 0.0
710+
# Use already computed cp_value and h_value from above
711+
updated_consensus_proportion[cluster_id] = cp_value
712+
updated_entropy[cluster_id] = h_value
719713

720714
rounds_history.append(
721715
f"Consensus reached in round {current_round}\n"
@@ -1333,84 +1327,3 @@ def print_consensus_summary(result: dict[str, Any]) -> None:
13331327
print()
13341328

13351329

1336-
def facilitate_cluster_discussion(
1337-
cluster_id: str,
1338-
marker_genes: list[str],
1339-
model_votes: dict[str, str],
1340-
species: str,
1341-
tissue: Optional[str] = None,
1342-
provider: str = "openai",
1343-
model: Optional[str] = None,
1344-
api_key: Optional[str] = None,
1345-
use_cache: bool = True,
1346-
base_url: Optional[str] = None,
1347-
) -> str:
1348-
"""Facilitate a discussion between different model predictions for a controversial cluster.
1349-
1350-
Args:
1351-
cluster_id: ID of the cluster
1352-
marker_genes: List of marker genes for the cluster
1353-
model_votes: Dictionary mapping model names to cell type annotations
1354-
species: Species name (e.g., 'human', 'mouse')
1355-
tissue: Optional tissue name (e.g., 'brain', 'liver')
1356-
provider: LLM provider for the discussion
1357-
model: Model name for the discussion
1358-
api_key: API key for the provider
1359-
use_cache: Whether to use cache
1360-
1361-
Returns:
1362-
str: Discussion result
1363-
1364-
"""
1365-
# Generate discussion prompt
1366-
prompt = create_discussion_prompt(
1367-
cluster_id=cluster_id,
1368-
marker_genes=marker_genes,
1369-
model_votes=model_votes,
1370-
species=species,
1371-
tissue=tissue,
1372-
)
1373-
1374-
# Get response
1375-
response = get_model_response(prompt, provider, model, api_key, use_cache, base_url=base_url)
1376-
1377-
# Extract final decision
1378-
cell_type = extract_cell_type_from_discussion(response)
1379-
1380-
# Return the full discussion and the extracted cell type
1381-
return f"{response}\n\nFINAL DETERMINATION: {cell_type}"
1382-
1383-
1384-
def summarize_discussion(discussion: str) -> str:
1385-
"""Summarize a model discussion about cell type annotation.
1386-
1387-
Args:
1388-
discussion: Full discussion text
1389-
1390-
Returns:
1391-
str: Summary of the discussion
1392-
1393-
"""
1394-
# Extract key points from the discussion
1395-
lines = discussion.strip().split("\n")
1396-
summary_lines = []
1397-
1398-
# Look for common summary indicators
1399-
for line in lines:
1400-
line = line.strip()
1401-
if line.lower().startswith(
1402-
("conclusion", "summary", "final", "therefore", "overall", "in summary")
1403-
):
1404-
summary_lines.append(line)
1405-
1406-
# If we found summary lines, join them
1407-
if summary_lines:
1408-
return "\n".join(summary_lines)
1409-
1410-
# Otherwise, extract the final decision
1411-
cell_type = extract_cell_type_from_discussion(discussion)
1412-
if cell_type:
1413-
return f"Final cell type determination: {cell_type}"
1414-
1415-
# If all else fails, return the last few lines
1416-
return "\n".join(lines[-3:])

0 commit comments

Comments
 (0)