|
548 | 548 | "execution_count": null, |
549 | 549 | "metadata": {}, |
550 | 550 | "outputs": [], |
551 | | - "source": [ |
552 | | - "# 1. Batch processing multiple datasets\n", |
553 | | - "print(\"🔄 Batch Processing Example:\")\n", |
554 | | - "print(\"\"\"\n", |
555 | | - "from mllmcelltype import batch_annotate_clusters\n", |
556 | | - "\n", |
557 | | - "# Process multiple datasets\n", |
558 | | - "datasets = {\n", |
559 | | - " 'sample1': marker_genes_1,\n", |
560 | | - " 'sample2': marker_genes_2,\n", |
561 | | - " 'sample3': marker_genes_3\n", |
562 | | - "}\n", |
563 | | - "\n", |
564 | | - "batch_results = batch_annotate_clusters(\n", |
565 | | - " datasets=datasets,\n", |
566 | | - " species='human',\n", |
567 | | - " models=selected_models,\n", |
568 | | - " api_keys=api_keys\n", |
569 | | - ")\n", |
570 | | - "\"\"\")\n", |
571 | | - "\n", |
572 | | - "# 2. Custom prompts for specialized contexts\n", |
573 | | - "print(\"\\n🎨 Custom Prompt Example:\")\n", |
574 | | - "print(\"\"\"\n", |
575 | | - "# For specialized tissues or conditions\n", |
576 | | - "custom_prompt = '''You are analyzing {species} {tissue} from a patient with autoimmune disease.\n", |
577 | | - "Focus on immune cell subtypes and activation states.'''\n", |
578 | | - "\n", |
579 | | - "results = annotate_clusters(\n", |
580 | | - " marker_genes=marker_genes,\n", |
581 | | - " custom_prompt=custom_prompt,\n", |
582 | | - " species='human',\n", |
583 | | - " tissue='synovial fluid',\n", |
584 | | - " model=model,\n", |
585 | | - " api_key=api_key\n", |
586 | | - ")\n", |
587 | | - "\"\"\")\n", |
588 | | - "\n", |
589 | | - "# 3. Hierarchical annotation\n", |
590 | | - "print(\"\\n🌳 Hierarchical Annotation Example:\")\n", |
591 | | - "print(\"\"\"\n", |
592 | | - "# First level: broad cell types\n", |
593 | | - "level1_results = annotate_clusters(\n", |
594 | | - " marker_genes, \n", |
595 | | - " species=species,\n", |
596 | | - " model=model,\n", |
597 | | - " api_key=api_key\n", |
598 | | - ")\n", |
599 | | - "\n", |
600 | | - "# Second level: detailed subtypes for immune cells\n", |
601 | | - "immune_clusters = [c for c, ct in level1_results.items() \n", |
602 | | - " if any(term in ct.lower() for term in ['immune', 't cell', 'b cell'])]\n", |
603 | | - "immune_markers = {k: marker_genes[k] for k in immune_clusters}\n", |
604 | | - "\n", |
605 | | - "level2_results = annotate_clusters(\n", |
606 | | - " immune_markers,\n", |
607 | | - " species=species,\n", |
608 | | - " tissue=tissue,\n", |
609 | | - " custom_prompt='Focus on detailed immune cell subtypes and activation states.',\n", |
610 | | - " model=model,\n", |
611 | | - " api_key=api_key\n", |
612 | | - ")\n", |
613 | | - "\"\"\")\n", |
614 | | - "\n", |
615 | | - "# 4. Cost estimation\n", |
616 | | - "print(\"\\n💰 Cost Estimation:\")\n", |
617 | | - "total_clusters = len(marker_genes)\n", |
618 | | - "models_used = len(selected_models)\n", |
619 | | - "avg_tokens_per_annotation = 500 # Approximate\n", |
620 | | - "\n", |
621 | | - "print(f\"Clusters to annotate: {total_clusters}\")\n", |
622 | | - "print(f\"Models used: {models_used}\")\n", |
623 | | - "print(f\"Total API calls: ~{total_clusters * models_used}\")\n", |
624 | | - "print(\"\\nEstimated costs vary by provider:\")\n", |
625 | | - "print(\"- OpenRouter free models: $0\")\n", |
626 | | - "print(\"- GPT-4: ~$0.01-0.02 per cluster\")\n", |
627 | | - "print(\"- Claude: ~$0.01-0.015 per cluster\")\n", |
628 | | - "print(\"- Gemini: ~$0.001-0.005 per cluster\")\n", |
629 | | - "\n", |
630 | | - "# 5. Cache management\n", |
631 | | - "print(\"\\n🗄️ Cache Management:\")\n", |
632 | | - "print(\"\"\"\n", |
633 | | - "from mllmcelltype import get_cache_stats, clear_cache\n", |
634 | | - "\n", |
635 | | - "# Check cache statistics\n", |
636 | | - "stats = get_cache_stats()\n", |
637 | | - "print(f\"Cache size: {stats['total_size_mb']:.2f} MB\")\n", |
638 | | - "print(f\"Cached results: {stats['total_entries']}\")\n", |
639 | | - "\n", |
640 | | - "# Clear cache if needed\n", |
641 | | - "# clear_cache() # Uncomment to clear\n", |
642 | | - "\"\"\")" |
643 | | - ] |
| 551 | + "source": "# 1. Processing multiple datasets\nprint(\"🔄 Processing Multiple Datasets:\")\nprint(\"\"\"\n# Process multiple datasets with a simple loop\ndatasets = {\n 'sample1': marker_genes_1,\n 'sample2': marker_genes_2,\n 'sample3': marker_genes_3\n}\n\nresults = {}\nfor name, markers in datasets.items():\n results[name] = annotate_clusters(\n marker_genes=markers,\n species='human',\n provider='openai',\n model='gpt-4-turbo'\n )\n\"\"\")\n\n# 2. Custom prompts for specialized contexts\nprint(\"\\n🎨 Custom Prompt Example:\")\nprint(\"\"\"\n# For specialized tissues or conditions\ncustom_prompt = '''You are analyzing {species} {tissue} from a patient with autoimmune disease.\nFocus on immune cell subtypes and activation states.'''\n\nresults = annotate_clusters(\n marker_genes=marker_genes,\n custom_prompt=custom_prompt,\n species='human',\n tissue='synovial fluid',\n model=model,\n api_key=api_key\n)\n\"\"\")\n\n# 3. Hierarchical annotation\nprint(\"\\n🌳 Hierarchical Annotation Example:\")\nprint(\"\"\"\n# First level: broad cell types\nlevel1_results = annotate_clusters(\n marker_genes, \n species=species,\n model=model,\n api_key=api_key\n)\n\n# Second level: detailed subtypes for immune cells\nimmune_clusters = [c for c, ct in level1_results.items() \n if any(term in ct.lower() for term in ['immune', 't cell', 'b cell'])]\nimmune_markers = {k: marker_genes[k] for k in immune_clusters}\n\nlevel2_results = annotate_clusters(\n immune_markers,\n species=species,\n tissue=tissue,\n custom_prompt='Focus on detailed immune cell subtypes and activation states.',\n model=model,\n api_key=api_key\n)\n\"\"\")\n\n# 4. Cost estimation\nprint(\"\\n💰 Cost Estimation:\")\ntotal_clusters = len(marker_genes)\nmodels_used = len(selected_models)\navg_tokens_per_annotation = 500 # Approximate\n\nprint(f\"Clusters to annotate: {total_clusters}\")\nprint(f\"Models used: {models_used}\")\nprint(f\"Total API calls: ~{total_clusters * models_used}\")\nprint(\"\\nEstimated costs vary by provider:\")\nprint(\"- OpenRouter free models: $0\")\nprint(\"- GPT-4: ~$0.01-0.02 per cluster\")\nprint(\"- Claude: ~$0.01-0.015 per cluster\")\nprint(\"- Gemini: ~$0.001-0.005 per cluster\")\n\n# 5. Cache management\nprint(\"\\n🗄️ Cache Management:\")\nprint(\"\"\"\nfrom mllmcelltype import get_cache_stats, clear_cache\n\n# Check cache statistics\nstats = get_cache_stats()\nprint(f\"Cache size: {stats['total_size_mb']:.2f} MB\")\nprint(f\"Cached results: {stats['total_entries']}\")\n\n# Clear cache if needed\n# clear_cache() # Uncomment to clear\n\"\"\")" |
644 | 552 | }, |
645 | 553 | { |
646 | 554 | "cell_type": "markdown", |
|
0 commit comments