cafferychen777
diff --git a/‎notebooks/mLLMCelltype_Tutorial.ipynb‎
Lines changed: 1 addition & 93 deletions b/‎notebooks/mLLMCelltype_Tutorial.ipynb‎
Lines changed: 1 addition & 93 deletions
diff --git a/‎python/README.md‎
Lines changed: 0 additions & 24 deletions b/‎python/README.md‎
Lines changed: 0 additions & 24 deletions
diff --git a/‎python/mllmcelltype/__init__.py‎
Lines changed: 1 addition & 4 deletions b/‎python/mllmcelltype/__init__.py‎
Lines changed: 1 addition & 4 deletions
@@ -548,99 +548,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# 1. Batch processing multiple datasets\n",
-    "print(\"🔄 Batch Processing Example:\")\n",
-    "print(\"\"\"\n",
-    "from mllmcelltype import batch_annotate_clusters\n",
-    "\n",
-    "# Process multiple datasets\n",
-    "datasets = {\n",
-    "    'sample1': marker_genes_1,\n",
-    "    'sample2': marker_genes_2,\n",
-    "    'sample3': marker_genes_3\n",
-    "}\n",
-    "\n",
-    "batch_results = batch_annotate_clusters(\n",
-    "    datasets=datasets,\n",
-    "    species='human',\n",
-    "    models=selected_models,\n",
-    "    api_keys=api_keys\n",
-    ")\n",
-    "\"\"\")\n",
-    "\n",
-    "# 2. Custom prompts for specialized contexts\n",
-    "print(\"\\n🎨 Custom Prompt Example:\")\n",
-    "print(\"\"\"\n",
-    "# For specialized tissues or conditions\n",
-    "custom_prompt = '''You are analyzing {species} {tissue} from a patient with autoimmune disease.\n",
-    "Focus on immune cell subtypes and activation states.'''\n",
-    "\n",
-    "results = annotate_clusters(\n",
-    "    marker_genes=marker_genes,\n",
-    "    custom_prompt=custom_prompt,\n",
-    "    species='human',\n",
-    "    tissue='synovial fluid',\n",
-    "    model=model,\n",
-    "    api_key=api_key\n",
-    ")\n",
-    "\"\"\")\n",
-    "\n",
-    "# 3. Hierarchical annotation\n",
-    "print(\"\\n🌳 Hierarchical Annotation Example:\")\n",
-    "print(\"\"\"\n",
-    "# First level: broad cell types\n",
-    "level1_results = annotate_clusters(\n",
-    "    marker_genes, \n",
-    "    species=species,\n",
-    "    model=model,\n",
-    "    api_key=api_key\n",
-    ")\n",
-    "\n",
-    "# Second level: detailed subtypes for immune cells\n",
-    "immune_clusters = [c for c, ct in level1_results.items() \n",
-    "                  if any(term in ct.lower() for term in ['immune', 't cell', 'b cell'])]\n",
-    "immune_markers = {k: marker_genes[k] for k in immune_clusters}\n",
-    "\n",
-    "level2_results = annotate_clusters(\n",
-    "    immune_markers,\n",
-    "    species=species,\n",
-    "    tissue=tissue,\n",
-    "    custom_prompt='Focus on detailed immune cell subtypes and activation states.',\n",
-    "    model=model,\n",
-    "    api_key=api_key\n",
-    ")\n",
-    "\"\"\")\n",
-    "\n",
-    "# 4. Cost estimation\n",
-    "print(\"\\n💰 Cost Estimation:\")\n",
-    "total_clusters = len(marker_genes)\n",
-    "models_used = len(selected_models)\n",
-    "avg_tokens_per_annotation = 500  # Approximate\n",
-    "\n",
-    "print(f\"Clusters to annotate: {total_clusters}\")\n",
-    "print(f\"Models used: {models_used}\")\n",
-    "print(f\"Total API calls: ~{total_clusters * models_used}\")\n",
-    "print(\"\\nEstimated costs vary by provider:\")\n",
-    "print(\"- OpenRouter free models: $0\")\n",
-    "print(\"- GPT-4: ~$0.01-0.02 per cluster\")\n",
-    "print(\"- Claude: ~$0.01-0.015 per cluster\")\n",
-    "print(\"- Gemini: ~$0.001-0.005 per cluster\")\n",
-    "\n",
-    "# 5. Cache management\n",
-    "print(\"\\n🗄️ Cache Management:\")\n",
-    "print(\"\"\"\n",
-    "from mllmcelltype import get_cache_stats, clear_cache\n",
-    "\n",
-    "# Check cache statistics\n",
-    "stats = get_cache_stats()\n",
-    "print(f\"Cache size: {stats['total_size_mb']:.2f} MB\")\n",
-    "print(f\"Cached results: {stats['total_entries']}\")\n",
-    "\n",
-    "# Clear cache if needed\n",
-    "# clear_cache()  # Uncomment to clear\n",
-    "\"\"\")"
-   ]
+   "source": "# 1. Processing multiple datasets\nprint(\"🔄 Processing Multiple Datasets:\")\nprint(\"\"\"\n# Process multiple datasets with a simple loop\ndatasets = {\n    'sample1': marker_genes_1,\n    'sample2': marker_genes_2,\n    'sample3': marker_genes_3\n}\n\nresults = {}\nfor name, markers in datasets.items():\n    results[name] = annotate_clusters(\n        marker_genes=markers,\n        species='human',\n        provider='openai',\n        model='gpt-4-turbo'\n    )\n\"\"\")\n\n# 2. Custom prompts for specialized contexts\nprint(\"\\n🎨 Custom Prompt Example:\")\nprint(\"\"\"\n# For specialized tissues or conditions\ncustom_prompt = '''You are analyzing {species} {tissue} from a patient with autoimmune disease.\nFocus on immune cell subtypes and activation states.'''\n\nresults = annotate_clusters(\n    marker_genes=marker_genes,\n    custom_prompt=custom_prompt,\n    species='human',\n    tissue='synovial fluid',\n    model=model,\n    api_key=api_key\n)\n\"\"\")\n\n# 3. Hierarchical annotation\nprint(\"\\n🌳 Hierarchical Annotation Example:\")\nprint(\"\"\"\n# First level: broad cell types\nlevel1_results = annotate_clusters(\n    marker_genes, \n    species=species,\n    model=model,\n    api_key=api_key\n)\n\n# Second level: detailed subtypes for immune cells\nimmune_clusters = [c for c, ct in level1_results.items() \n                  if any(term in ct.lower() for term in ['immune', 't cell', 'b cell'])]\nimmune_markers = {k: marker_genes[k] for k in immune_clusters}\n\nlevel2_results = annotate_clusters(\n    immune_markers,\n    species=species,\n    tissue=tissue,\n    custom_prompt='Focus on detailed immune cell subtypes and activation states.',\n    model=model,\n    api_key=api_key\n)\n\"\"\")\n\n# 4. Cost estimation\nprint(\"\\n💰 Cost Estimation:\")\ntotal_clusters = len(marker_genes)\nmodels_used = len(selected_models)\navg_tokens_per_annotation = 500  # Approximate\n\nprint(f\"Clusters to annotate: {total_clusters}\")\nprint(f\"Models used: {models_used}\")\nprint(f\"Total API calls: ~{total_clusters * models_used}\")\nprint(\"\\nEstimated costs vary by provider:\")\nprint(\"- OpenRouter free models: $0\")\nprint(\"- GPT-4: ~$0.01-0.02 per cluster\")\nprint(\"- Claude: ~$0.01-0.015 per cluster\")\nprint(\"- Gemini: ~$0.001-0.005 per cluster\")\n\n# 5. Cache management\nprint(\"\\n🗄️ Cache Management:\")\nprint(\"\"\"\nfrom mllmcelltype import get_cache_stats, clear_cache\n\n# Check cache statistics\nstats = get_cache_stats()\nprint(f\"Cache size: {stats['total_size_mb']:.2f} MB\")\nprint(f\"Cached results: {stats['total_entries']}\")\n\n# Clear cache if needed\n# clear_cache()  # Uncomment to clear\n\"\"\")"
   },
   {
    "cell_type": "markdown",
 
@@ -241,30 +241,6 @@ consensus_results = interactive_consensus_annotation(
 
 ## Advanced Usage
 
-### Batch Annotation
-
-```python
-from mllmcelltype import batch_annotate_clusters
-
-# Prepare multiple sets of marker genes (e.g., from different samples)
-marker_genes_list = [marker_genes_df1, marker_genes_df2, marker_genes_df3]
-
-# Batch annotate multiple datasets efficiently
-batch_annotations = batch_annotate_clusters(
-    marker_genes_list=marker_genes_list,
-    species='mouse',                      # Organism species
-    provider='anthropic',                 # LLM provider
-    model='claude-sonnet-4-5-20250929',  # Latest Sonnet model (recommended)
-    tissue='brain'                       # Optional tissue context
-)
-
-# Process and utilize results
-for i, annotations in enumerate(batch_annotations):
-    print(f"Dataset {i+1} annotations:")
-    for cluster, annotation in annotations.items():
-        print(f"  Cluster {cluster}: {annotation}")
-```
-
 ### Targeted Analysis: New Enhanced Parameters
 
 mLLMCelltype v1.3.0+ introduces two powerful parameters for more precise control over cell type annotation:
 
@@ -1,6 +1,6 @@
 """mLLMCelltype: A Python module for cell type annotation using various LLMs."""
 
-from .annotate import annotate_clusters, batch_annotate_clusters, get_model_response
+from .annotate import annotate_clusters, get_model_response
 from .cache_manager import get_cache_info
 from .consensus import (
     check_consensus,
@@ -11,7 +11,6 @@
 from .functions import get_provider
 from .logger import setup_logging, write_log
 from .prompts import (
-    create_batch_prompt,
     create_consensus_check_prompt,
     create_discussion_prompt,
     create_prompt,
@@ -37,7 +36,6 @@
 __all__ = [
     # Core annotation
     "annotate_clusters",
-    "batch_annotate_clusters",
     "get_model_response",
     # Functions
     "get_provider",
@@ -56,7 +54,6 @@
     "format_results",
     # Prompts
     "create_prompt",
-    "create_batch_prompt",
     "create_discussion_prompt",
     "create_consensus_check_prompt",
     # Consensus