Skip to content

Commit 12ac807

Browse files
Remove batch annotation functionality and related tests
Eliminated the batch_annotate_clusters function and its prompt generator from the codebase, as well as all references and tests related to batch annotation. Updated the tutorial and README to remove batch annotation examples and documentation.
1 parent e773b7c commit 12ac807

7 files changed

Lines changed: 3 additions & 532 deletions

File tree

notebooks/mLLMCelltype_Tutorial.ipynb

Lines changed: 1 addition & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -548,99 +548,7 @@
548548
"execution_count": null,
549549
"metadata": {},
550550
"outputs": [],
551-
"source": [
552-
"# 1. Batch processing multiple datasets\n",
553-
"print(\"🔄 Batch Processing Example:\")\n",
554-
"print(\"\"\"\n",
555-
"from mllmcelltype import batch_annotate_clusters\n",
556-
"\n",
557-
"# Process multiple datasets\n",
558-
"datasets = {\n",
559-
" 'sample1': marker_genes_1,\n",
560-
" 'sample2': marker_genes_2,\n",
561-
" 'sample3': marker_genes_3\n",
562-
"}\n",
563-
"\n",
564-
"batch_results = batch_annotate_clusters(\n",
565-
" datasets=datasets,\n",
566-
" species='human',\n",
567-
" models=selected_models,\n",
568-
" api_keys=api_keys\n",
569-
")\n",
570-
"\"\"\")\n",
571-
"\n",
572-
"# 2. Custom prompts for specialized contexts\n",
573-
"print(\"\\n🎨 Custom Prompt Example:\")\n",
574-
"print(\"\"\"\n",
575-
"# For specialized tissues or conditions\n",
576-
"custom_prompt = '''You are analyzing {species} {tissue} from a patient with autoimmune disease.\n",
577-
"Focus on immune cell subtypes and activation states.'''\n",
578-
"\n",
579-
"results = annotate_clusters(\n",
580-
" marker_genes=marker_genes,\n",
581-
" custom_prompt=custom_prompt,\n",
582-
" species='human',\n",
583-
" tissue='synovial fluid',\n",
584-
" model=model,\n",
585-
" api_key=api_key\n",
586-
")\n",
587-
"\"\"\")\n",
588-
"\n",
589-
"# 3. Hierarchical annotation\n",
590-
"print(\"\\n🌳 Hierarchical Annotation Example:\")\n",
591-
"print(\"\"\"\n",
592-
"# First level: broad cell types\n",
593-
"level1_results = annotate_clusters(\n",
594-
" marker_genes, \n",
595-
" species=species,\n",
596-
" model=model,\n",
597-
" api_key=api_key\n",
598-
")\n",
599-
"\n",
600-
"# Second level: detailed subtypes for immune cells\n",
601-
"immune_clusters = [c for c, ct in level1_results.items() \n",
602-
" if any(term in ct.lower() for term in ['immune', 't cell', 'b cell'])]\n",
603-
"immune_markers = {k: marker_genes[k] for k in immune_clusters}\n",
604-
"\n",
605-
"level2_results = annotate_clusters(\n",
606-
" immune_markers,\n",
607-
" species=species,\n",
608-
" tissue=tissue,\n",
609-
" custom_prompt='Focus on detailed immune cell subtypes and activation states.',\n",
610-
" model=model,\n",
611-
" api_key=api_key\n",
612-
")\n",
613-
"\"\"\")\n",
614-
"\n",
615-
"# 4. Cost estimation\n",
616-
"print(\"\\n💰 Cost Estimation:\")\n",
617-
"total_clusters = len(marker_genes)\n",
618-
"models_used = len(selected_models)\n",
619-
"avg_tokens_per_annotation = 500 # Approximate\n",
620-
"\n",
621-
"print(f\"Clusters to annotate: {total_clusters}\")\n",
622-
"print(f\"Models used: {models_used}\")\n",
623-
"print(f\"Total API calls: ~{total_clusters * models_used}\")\n",
624-
"print(\"\\nEstimated costs vary by provider:\")\n",
625-
"print(\"- OpenRouter free models: $0\")\n",
626-
"print(\"- GPT-4: ~$0.01-0.02 per cluster\")\n",
627-
"print(\"- Claude: ~$0.01-0.015 per cluster\")\n",
628-
"print(\"- Gemini: ~$0.001-0.005 per cluster\")\n",
629-
"\n",
630-
"# 5. Cache management\n",
631-
"print(\"\\n🗄️ Cache Management:\")\n",
632-
"print(\"\"\"\n",
633-
"from mllmcelltype import get_cache_stats, clear_cache\n",
634-
"\n",
635-
"# Check cache statistics\n",
636-
"stats = get_cache_stats()\n",
637-
"print(f\"Cache size: {stats['total_size_mb']:.2f} MB\")\n",
638-
"print(f\"Cached results: {stats['total_entries']}\")\n",
639-
"\n",
640-
"# Clear cache if needed\n",
641-
"# clear_cache() # Uncomment to clear\n",
642-
"\"\"\")"
643-
]
551+
"source": "# 1. Processing multiple datasets\nprint(\"🔄 Processing Multiple Datasets:\")\nprint(\"\"\"\n# Process multiple datasets with a simple loop\ndatasets = {\n 'sample1': marker_genes_1,\n 'sample2': marker_genes_2,\n 'sample3': marker_genes_3\n}\n\nresults = {}\nfor name, markers in datasets.items():\n results[name] = annotate_clusters(\n marker_genes=markers,\n species='human',\n provider='openai',\n model='gpt-4-turbo'\n )\n\"\"\")\n\n# 2. Custom prompts for specialized contexts\nprint(\"\\n🎨 Custom Prompt Example:\")\nprint(\"\"\"\n# For specialized tissues or conditions\ncustom_prompt = '''You are analyzing {species} {tissue} from a patient with autoimmune disease.\nFocus on immune cell subtypes and activation states.'''\n\nresults = annotate_clusters(\n marker_genes=marker_genes,\n custom_prompt=custom_prompt,\n species='human',\n tissue='synovial fluid',\n model=model,\n api_key=api_key\n)\n\"\"\")\n\n# 3. Hierarchical annotation\nprint(\"\\n🌳 Hierarchical Annotation Example:\")\nprint(\"\"\"\n# First level: broad cell types\nlevel1_results = annotate_clusters(\n marker_genes, \n species=species,\n model=model,\n api_key=api_key\n)\n\n# Second level: detailed subtypes for immune cells\nimmune_clusters = [c for c, ct in level1_results.items() \n if any(term in ct.lower() for term in ['immune', 't cell', 'b cell'])]\nimmune_markers = {k: marker_genes[k] for k in immune_clusters}\n\nlevel2_results = annotate_clusters(\n immune_markers,\n species=species,\n tissue=tissue,\n custom_prompt='Focus on detailed immune cell subtypes and activation states.',\n model=model,\n api_key=api_key\n)\n\"\"\")\n\n# 4. Cost estimation\nprint(\"\\n💰 Cost Estimation:\")\ntotal_clusters = len(marker_genes)\nmodels_used = len(selected_models)\navg_tokens_per_annotation = 500 # Approximate\n\nprint(f\"Clusters to annotate: {total_clusters}\")\nprint(f\"Models used: {models_used}\")\nprint(f\"Total API calls: ~{total_clusters * models_used}\")\nprint(\"\\nEstimated costs vary by provider:\")\nprint(\"- OpenRouter free models: $0\")\nprint(\"- GPT-4: ~$0.01-0.02 per cluster\")\nprint(\"- Claude: ~$0.01-0.015 per cluster\")\nprint(\"- Gemini: ~$0.001-0.005 per cluster\")\n\n# 5. Cache management\nprint(\"\\n🗄️ Cache Management:\")\nprint(\"\"\"\nfrom mllmcelltype import get_cache_stats, clear_cache\n\n# Check cache statistics\nstats = get_cache_stats()\nprint(f\"Cache size: {stats['total_size_mb']:.2f} MB\")\nprint(f\"Cached results: {stats['total_entries']}\")\n\n# Clear cache if needed\n# clear_cache() # Uncomment to clear\n\"\"\")"
644552
},
645553
{
646554
"cell_type": "markdown",

python/README.md

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -241,30 +241,6 @@ consensus_results = interactive_consensus_annotation(
241241

242242
## Advanced Usage
243243

244-
### Batch Annotation
245-
246-
```python
247-
from mllmcelltype import batch_annotate_clusters
248-
249-
# Prepare multiple sets of marker genes (e.g., from different samples)
250-
marker_genes_list = [marker_genes_df1, marker_genes_df2, marker_genes_df3]
251-
252-
# Batch annotate multiple datasets efficiently
253-
batch_annotations = batch_annotate_clusters(
254-
marker_genes_list=marker_genes_list,
255-
species='mouse', # Organism species
256-
provider='anthropic', # LLM provider
257-
model='claude-sonnet-4-5-20250929', # Latest Sonnet model (recommended)
258-
tissue='brain' # Optional tissue context
259-
)
260-
261-
# Process and utilize results
262-
for i, annotations in enumerate(batch_annotations):
263-
print(f"Dataset {i+1} annotations:")
264-
for cluster, annotation in annotations.items():
265-
print(f" Cluster {cluster}: {annotation}")
266-
```
267-
268244
### Targeted Analysis: New Enhanced Parameters
269245

270246
mLLMCelltype v1.3.0+ introduces two powerful parameters for more precise control over cell type annotation:

python/mllmcelltype/__init__.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""mLLMCelltype: A Python module for cell type annotation using various LLMs."""
22

3-
from .annotate import annotate_clusters, batch_annotate_clusters, get_model_response
3+
from .annotate import annotate_clusters, get_model_response
44
from .cache_manager import get_cache_info
55
from .consensus import (
66
check_consensus,
@@ -11,7 +11,6 @@
1111
from .functions import get_provider
1212
from .logger import setup_logging, write_log
1313
from .prompts import (
14-
create_batch_prompt,
1514
create_consensus_check_prompt,
1615
create_discussion_prompt,
1716
create_prompt,
@@ -37,7 +36,6 @@
3736
__all__ = [
3837
# Core annotation
3938
"annotate_clusters",
40-
"batch_annotate_clusters",
4139
"get_model_response",
4240
# Functions
4341
"get_provider",
@@ -56,7 +54,6 @@
5654
"format_results",
5755
# Prompts
5856
"create_prompt",
59-
"create_batch_prompt",
6057
"create_discussion_prompt",
6158
"create_consensus_check_prompt",
6259
# Consensus

0 commit comments

Comments
 (0)