Skip to content

Commit 05a29d2

Browse files
Remove unused consensus and metadata functions
Deleted unused functions related to consensus checking and annotation metadata from prompts.py and utils.py. Also updated __init__.py to remove corresponding imports and exports, simplifying the codebase.
1 parent 1e85f38 commit 05a29d2

File tree

3 files changed

+1
-174
lines changed

3 files changed

+1
-174
lines changed

python/mllmcelltype/__init__.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,10 @@
3232
create_cache_key,
3333
find_agreement,
3434
format_results,
35-
get_annotation_metadata,
3635
get_cache_stats,
3736
load_api_key,
3837
load_from_cache,
3938
save_to_cache,
40-
validate_cache,
4139
)
4240

4341
# LangExtract components (simplified)
@@ -71,11 +69,9 @@
7169
"create_cache_key",
7270
"save_to_cache",
7371
"load_from_cache",
74-
"validate_cache",
7572
"clear_cache",
7673
"get_cache_stats",
7774
"get_cache_info",
78-
"get_annotation_metadata",
7975
"format_results",
8076
"find_agreement",
8177
# Prompts

python/mllmcelltype/prompts.py

Lines changed: 0 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -146,24 +146,6 @@ def create_consensus_check_prompt(annotations: list[str]) -> str:
146146
You MUST provide numerical values for both CP and H, not just qualitative descriptions.
147147
"""
148148

149-
# Template for checking consensus across models
150-
DEFAULT_CONSENSUS_CHECK_TEMPLATE = """You are an expert in single-cell RNA-seq analysis, evaluating the consensus cell type annotations across different models.
151-
152-
Species: {species}
153-
Tissue: {tissue}
154-
155-
Here are the model predictions for each cluster:
156-
157-
{predictions}
158-
159-
For each cluster, assess:
160-
1. The level of agreement between models
161-
2. Which annotation is most accurate based on consensus
162-
3. Any clusters where annotations significantly differ, which require further investigation
163-
164-
Provide a final consensus annotation for each cluster and note any controversial clusters that need additional review.
165-
"""
166-
167149
# Template for checking if consensus is reached after discussion
168150
DEFAULT_DISCUSSION_CONSENSUS_CHECK_TEMPLATE = """You are an expert in single-cell RNA-seq analysis, evaluating whether a consensus has been reached after discussion about a controversial cluster annotation.
169151
@@ -372,63 +354,6 @@ def create_discussion_prompt(
372354
return prompt
373355

374356

375-
def create_model_consensus_check_prompt(
376-
predictions: dict[str, dict[str, str]],
377-
species: str,
378-
tissue: Optional[str] = None,
379-
prompt_template: Optional[str] = None,
380-
) -> str:
381-
"""Create a prompt for checking consensus across model predictions.
382-
383-
Args:
384-
predictions: Dictionary mapping model names to dictionaries of cluster annotations
385-
species: Species name (e.g., 'human', 'mouse')
386-
tissue: Tissue name (e.g., 'brain', 'blood')
387-
prompt_template: Custom prompt template
388-
389-
Returns:
390-
str: The generated prompt
391-
392-
"""
393-
write_log(f"Creating consensus check prompt for {len(predictions)} models")
394-
395-
# Use default template if none provided
396-
if not prompt_template:
397-
prompt_template = DEFAULT_CONSENSUS_CHECK_TEMPLATE
398-
399-
# Default tissue if none provided
400-
tissue_text = tissue if tissue else "unknown tissue"
401-
402-
# Get all model names
403-
models = list(predictions.keys())
404-
405-
# Get all cluster IDs
406-
clusters = set()
407-
for model_results in predictions.values():
408-
clusters.update(model_results.keys())
409-
clusters = sorted(clusters)
410-
411-
# Format predictions text
412-
predictions_lines = []
413-
414-
for cluster in clusters:
415-
predictions_lines.append(f"Cluster {cluster}:")
416-
for model in models:
417-
if cluster in predictions[model]:
418-
predictions_lines.append(f"- {model}: {predictions[model][cluster]}")
419-
predictions_lines.append("")
420-
421-
predictions_text = "\n".join(predictions_lines)
422-
423-
# Fill in the template
424-
prompt = prompt_template.format(
425-
species=species, tissue=tissue_text, predictions=predictions_text
426-
)
427-
428-
write_log(f"Generated consensus check prompt with {len(prompt)} characters")
429-
return prompt
430-
431-
432357
def create_discussion_consensus_check_prompt(
433358
cluster_id: str,
434359
discussion: str,

python/mllmcelltype/utils.py

Lines changed: 1 addition & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _get_cache_dir(cache_dir: Optional[str] = None) -> str:
2525
str: Cache directory path
2626
"""
2727
if cache_dir is None:
28-
cache_dir = os.path.join(os.path.expanduser("~"), ".llmcelltype", "cache")
28+
cache_dir = os.path.join(os.path.expanduser("~"), ".mllmcelltype", "cache")
2929
return cache_dir
3030

3131

@@ -249,36 +249,6 @@ def parse_marker_genes(marker_genes_df: pd.DataFrame) -> dict[str, list[str]]:
249249
return result
250250

251251

252-
def get_annotation_metadata(
253-
annotation_result: dict[str, str],
254-
) -> dict[str, dict[str, Any]]:
255-
"""Retrieve metadata for a specific annotation result.
256-
257-
Args:
258-
annotation_result: Dictionary mapping cluster IDs to cell type annotations
259-
260-
Returns:
261-
dict[str, dict[str, Any]]: Dictionary mapping cluster IDs to metadata
262-
263-
"""
264-
try:
265-
# Create a unique key for this annotation result
266-
key = hashlib.sha256(str(annotation_result).encode()).hexdigest()
267-
268-
# Check if metadata exists in cache
269-
cache_dir = os.path.expanduser("~/.mllmcelltype/metadata")
270-
cache_file = os.path.join(cache_dir, f"{key}.json")
271-
272-
if os.path.exists(cache_file):
273-
with open(cache_file) as f:
274-
return json.load(f)
275-
write_log("No metadata found for the given annotation result", level="debug")
276-
return {}
277-
except (KeyError, TypeError, AttributeError, ValueError) as e:
278-
write_log(f"Failed to retrieve metadata: {str(e)}", level="debug")
279-
return {}
280-
281-
282252
def format_results(results: list[str], clusters: list[str]) -> dict[str, str]:
283253
"""Format results into a dictionary mapping cluster names to annotations.
284254
@@ -290,8 +260,6 @@ def format_results(results: list[str], clusters: list[str]) -> dict[str, str]:
290260
dict[str, str]: Dictionary mapping cluster names to annotations
291261
292262
"""
293-
import json
294-
295263
# Clean up results (remove empty lines and whitespace)
296264
clean_results = [line.strip() for line in results if line.strip()]
297265

@@ -373,24 +341,6 @@ def format_results(results: list[str], clusters: list[str]) -> dict[str, str]:
373341
# If we found annotations for all clusters, return the result
374342
if len(json_result) == len(clusters):
375343
write_log("Successfully parsed JSON response", level="info")
376-
377-
# Store metadata in cache for later retrieval if needed
378-
if metadata:
379-
try:
380-
cache_dir = os.path.expanduser("~/.mllmcelltype/metadata")
381-
os.makedirs(cache_dir, exist_ok=True)
382-
383-
# Create a unique key for this annotation result
384-
key = hashlib.sha256(str(json_result).encode()).hexdigest()
385-
cache_file = os.path.join(cache_dir, f"{key}.json")
386-
387-
with open(cache_file, "w") as f:
388-
json.dump(metadata, f, indent=2)
389-
390-
write_log(f"Stored annotation metadata to {cache_file}", level="debug")
391-
except (OSError, TypeError, ValueError) as e:
392-
write_log(f"Failed to store metadata: {str(e)}", level="debug")
393-
394344
return json_result
395345
except (json.JSONDecodeError, ValueError, KeyError, TypeError, AttributeError) as e:
396346
write_log(f"Failed to parse JSON response: {str(e)}", level="debug")
@@ -564,8 +514,6 @@ def normalize_annotation_for_comparison(annotation: str) -> str:
564514
for old, new in replacements.items():
565515
if "$" in old:
566516
# Handle regex patterns
567-
import re
568-
569517
normalized = re.sub(old, new, normalized)
570518
else:
571519
normalized = normalized.replace(old, new)
@@ -654,48 +602,6 @@ def find_agreement(
654602
return consensus, confidence, entropy_scores
655603

656604

657-
def validate_cache(cache_key: str, cache_dir: Optional[str] = None) -> bool:
658-
"""Validate cache content for a specific key.
659-
660-
Args:
661-
cache_key: The cache key to validate
662-
cache_dir: The cache directory. If None, uses default directory.
663-
664-
Returns:
665-
bool: True if cache is valid, False otherwise
666-
667-
"""
668-
cache_dir = _get_cache_dir(cache_dir)
669-
cache_file = os.path.join(cache_dir, f"{cache_key}.json")
670-
671-
# Check if cache file exists
672-
if not os.path.exists(cache_file):
673-
return False
674-
675-
# Validate cache content
676-
try:
677-
with open(cache_file) as f:
678-
cache_content = json.load(f)
679-
680-
# Check if cache content is in the new format
681-
if (
682-
isinstance(cache_content, dict)
683-
and "version" in cache_content
684-
and "data" in cache_content
685-
):
686-
# New format with metadata
687-
return True
688-
if isinstance(cache_content, (list, dict)):
689-
# Legacy format - still valid but will be converted on next save
690-
return True
691-
# Invalid format
692-
write_log(f"Invalid cache format for key {cache_key}", level="warning")
693-
return False
694-
except (OSError, json.JSONDecodeError, TypeError, ValueError) as e:
695-
write_log(f"Error validating cache for {cache_file}: {e}", level="warning")
696-
return False
697-
698-
699605
def clear_cache(cache_dir: Optional[str] = None, older_than: Optional[int] = None) -> int:
700606
"""Clear cache.
701607

0 commit comments

Comments
 (0)