|
336 | 336 | { |
337 | 337 | "cell_type": "markdown", |
338 | 338 | "metadata": {}, |
339 | | - "source": [ |
340 | | - "## 🚀 5. Run Cell Type Annotation\n", |
341 | | - "\n", |
342 | | - "Now let's perform the annotation. This section includes both single-model and multi-model consensus options:" |
343 | | - ] |
| 339 | + "source": "## 🚀 5. Run Cell Type Annotation\n\nNow let's perform the annotation. This section includes both single-model and multi-model consensus options.\n\n### 💡 Demo Mode Available\n\n**Don't have an API key?** No problem! This notebook includes a **demo mode** that automatically loads pre-computed results for the example PBMC dataset. This allows you to:\n- Experience the complete workflow without API costs\n- See example outputs for visualization and analysis\n- Understand the tool's capabilities before committing resources\n\n**How it works:**\n- If you have API keys configured, the notebook will perform live annotation using LLMs\n- If no API keys are found, it will automatically load cached demo results\n- You'll always see a clear message indicating which mode is active\n\n**Note:** Demo mode only works with the example PBMC data. For your own datasets, you'll need to configure at least one API key (free options available via OpenRouter)." |
344 | 340 | }, |
345 | 341 | { |
346 | 342 | "cell_type": "code", |
347 | 343 | "execution_count": null, |
348 | 344 | "metadata": {}, |
349 | 345 | "outputs": [], |
350 | | - "source": [ |
351 | | - "from mllmcelltype import interactive_consensus_annotation, annotate_clusters\n", |
352 | | - "\n", |
353 | | - "# Set up parameters\n", |
354 | | - "annotation_params = {\n", |
355 | | - " 'marker_genes': marker_genes,\n", |
356 | | - " 'species': species,\n", |
357 | | - " 'tissue': tissue if tissue else None,\n", |
358 | | - " 'use_cache': True, # Save API costs by caching results\n", |
359 | | - " 'verbose': True # Show progress\n", |
360 | | - "}\n", |
361 | | - "\n", |
362 | | - "print(\"🔬 Starting annotation...\\n\")\n", |
363 | | - "\n", |
364 | | - "if len(selected_models) == 1:\n", |
365 | | - " # Single model annotation\n", |
366 | | - " model = selected_models[0]\n", |
367 | | - " print(f\"Using single model: {model['provider']}/{model['model']}\")\n", |
368 | | - " \n", |
369 | | - " # Use the correct parameter name\n", |
370 | | - " results = annotate_clusters(\n", |
371 | | - " **annotation_params,\n", |
372 | | - " model=model, # Changed from model_config\n", |
373 | | - " api_key=api_keys.get(model['provider'], '')\n", |
374 | | - " )\n", |
375 | | - " \n", |
376 | | - " # Format results for consistency\n", |
377 | | - " formatted_results = {\n", |
378 | | - " 'consensus': results,\n", |
379 | | - " 'consensus_proportion': {k: 1.0 for k in results.keys()},\n", |
380 | | - " 'entropy': {k: 0.0 for k in results.keys()},\n", |
381 | | - " 'model_annotations': {k: {model['model']: v} for k, v in results.items()},\n", |
382 | | - " 'controversial_clusters': []\n", |
383 | | - " }\n", |
384 | | - " \n", |
385 | | - "else:\n", |
386 | | - " # Multi-model consensus annotation\n", |
387 | | - " print(f\"Using {len(selected_models)} models for consensus annotation\")\n", |
388 | | - " \n", |
389 | | - " # Advanced parameters for consensus\n", |
390 | | - " consensus_params = {\n", |
391 | | - " 'consensus_threshold': 0.6, # Minimum agreement (lowered for efficiency)\n", |
392 | | - " 'entropy_threshold': 1.2, # Maximum entropy (raised for efficiency)\n", |
393 | | - " 'max_discussion_rounds': 3, # Maximum discussion rounds\n", |
394 | | - " 'consensus_model': None # Auto-select best model for consensus\n", |
395 | | - " }\n", |
396 | | - " \n", |
397 | | - " # Show advanced options\n", |
398 | | - " use_advanced = input(\"\\nUse advanced consensus settings? (y/n) [default: n]: \") or 'n'\n", |
399 | | - " if use_advanced.lower() == 'y':\n", |
400 | | - " print(\"\\nAdvanced settings (press Enter for defaults):\")\n", |
401 | | - " ct = input(f\"Consensus threshold (default {consensus_params['consensus_threshold']}): \")\n", |
402 | | - " if ct: \n", |
403 | | - " consensus_params['consensus_threshold'] = float(ct)\n", |
404 | | - " \n", |
405 | | - " et = input(f\"Entropy threshold (default {consensus_params['entropy_threshold']}): \")\n", |
406 | | - " if et: \n", |
407 | | - " consensus_params['entropy_threshold'] = float(et)\n", |
408 | | - " \n", |
409 | | - " mr = input(f\"Max discussion rounds (default {consensus_params['max_discussion_rounds']}): \")\n", |
410 | | - " if mr: \n", |
411 | | - " consensus_params['max_discussion_rounds'] = int(mr)\n", |
412 | | - " \n", |
413 | | - " # Run consensus annotation\n", |
414 | | - " formatted_results = interactive_consensus_annotation(\n", |
415 | | - " **annotation_params,\n", |
416 | | - " models=selected_models,\n", |
417 | | - " api_keys=api_keys,\n", |
418 | | - " **consensus_params\n", |
419 | | - " )\n", |
420 | | - "\n", |
421 | | - "print(\"\\n✅ Annotation complete!\")" |
422 | | - ] |
| 346 | + "source": "from mllmcelltype import interactive_consensus_annotation, annotate_clusters\nimport os\nimport json\n\n# Check if we have API keys and can run actual annotation\nhas_api_keys = any(\n os.environ.get(f'{provider.upper()}_API_KEY') \n for provider in ['OPENAI', 'ANTHROPIC', 'GOOGLE', 'OPENROUTER', 'DEEPSEEK', 'QWEN']\n)\n\n# Check if using example PBMC data\nusing_example_data = use_example.lower() == 'y' if 'use_example' in globals() else False\n\n# Determine if we should use demo mode\nuse_demo_mode = not has_api_keys and using_example_data\n\nif use_demo_mode:\n print(\"=\" * 70)\n print(\"🎬 DEMO MODE ACTIVATED\")\n print(\"=\" * 70)\n print(\"\\n📢 Notice: No API keys detected.\")\n print(\"📂 Loading pre-computed demo results for the PBMC example dataset...\")\n print(\"💡 This allows you to experience the workflow without API costs.\")\n print(\"\\n⚠️ These are cached results from a previous run, not live LLM predictions.\")\n print(\" To run actual annotations, please configure at least one API key above.\")\n print(\"=\" * 70)\n print(\"\\n⏳ Loading cached results...\\n\")\n \n # Load cached results\n try:\n # Load the detailed results JSON\n with open('demo_data/cached_detailed_results.json', 'r') as f:\n formatted_results = json.load(f)\n \n print(\"✅ Demo results loaded successfully!\")\n print(f\"📊 Loaded annotations for {len(formatted_results['consensus'])} clusters\")\n print(f\"🤖 Simulating multi-model consensus with 3 models\")\n \n except FileNotFoundError:\n print(\"❌ Error: Demo data files not found.\")\n print(\"Please ensure 'demo_data/' directory exists with cached results.\")\n raise\n \nelse:\n # Original annotation code - run actual LLM annotation\n print(\"🔬 Starting LIVE annotation with LLMs...\\n\")\n \n # Set up parameters\n annotation_params = {\n 'marker_genes': marker_genes,\n 'species': species,\n 'tissue': tissue if tissue else None,\n 'use_cache': True, # Save API costs by caching results\n 'verbose': True # Show progress\n }\n \n if len(selected_models) == 1:\n # Single model annotation\n model = selected_models[0]\n print(f\"Using single model: {model['provider']}/{model['model']}\")\n \n # Use the correct parameter name\n results = annotate_clusters(\n **annotation_params,\n model=model,\n api_key=api_keys.get(model['provider'], '')\n )\n \n # Format results for consistency\n formatted_results = {\n 'consensus': results,\n 'consensus_proportion': {k: 1.0 for k in results.keys()},\n 'entropy': {k: 0.0 for k in results.keys()},\n 'model_annotations': {k: {model['model']: v} for k, v in results.items()},\n 'controversial_clusters': []\n }\n \n else:\n # Multi-model consensus annotation\n print(f\"Using {len(selected_models)} models for consensus annotation\")\n \n # Advanced parameters for consensus\n consensus_params = {\n 'consensus_threshold': 0.6, # Minimum agreement (lowered for efficiency)\n 'entropy_threshold': 1.2, # Maximum entropy (raised for efficiency)\n 'max_discussion_rounds': 3, # Maximum discussion rounds\n 'consensus_model': None # Auto-select best model for consensus\n }\n \n # Show advanced options\n use_advanced = input(\"\\nUse advanced consensus settings? (y/n) [default: n]: \") or 'n'\n if use_advanced.lower() == 'y':\n print(\"\\nAdvanced settings (press Enter for defaults):\")\n ct = input(f\"Consensus threshold (default {consensus_params['consensus_threshold']}): \")\n if ct: \n consensus_params['consensus_threshold'] = float(ct)\n \n et = input(f\"Entropy threshold (default {consensus_params['entropy_threshold']}): \")\n if et: \n consensus_params['entropy_threshold'] = float(et)\n \n mr = input(f\"Max discussion rounds (default {consensus_params['max_discussion_rounds']}): \")\n if mr: \n consensus_params['max_discussion_rounds'] = int(mr)\n \n # Run consensus annotation\n formatted_results = interactive_consensus_annotation(\n **annotation_params,\n models=selected_models,\n api_keys=api_keys,\n **consensus_params\n )\n \n print(\"\\n✅ Annotation complete!\")\n\n# Summary regardless of mode\nprint(f\"\\n📈 Results Summary:\")\nprint(f\" - Annotated clusters: {len(formatted_results['consensus'])}\")\nprint(f\" - Average consensus: {sum(formatted_results['consensus_proportion'].values()) / len(formatted_results['consensus_proportion']):.2%}\")\nif formatted_results.get('controversial_clusters'):\n print(f\" - Controversial clusters: {len(formatted_results['controversial_clusters'])}\")" |
423 | 347 | }, |
424 | 348 | { |
425 | 349 | "cell_type": "markdown", |
|
0 commit comments