diff --git a/src/llama_prompt_ops/core/prompt_strategies.py b/src/llama_prompt_ops/core/prompt_strategies.py index 4802b39..5b806da 100644 --- a/src/llama_prompt_ops/core/prompt_strategies.py +++ b/src/llama_prompt_ops/core/prompt_strategies.py @@ -316,7 +316,7 @@ def run(self, prompt_data: Dict[str, Any]) -> Any: max_labeled_demos=self.max_labeled_demos, auto=dspy_auto_mode, # Use the mapped value num_candidates=self.num_candidates, - num_threads=self.num_threads, + # num_threads is passed via eval_kwargs in compile() call instead max_errors=self.max_errors, seed=self.seed, init_temperature=self.init_temperature, @@ -530,10 +530,18 @@ def custom_propose_instructions(self, *args, **kwargs): try: # Call compile with all parameters logging.info("Calling optimizer.compile") + + # Configure eval_kwargs to pass arguments to dspy.evaluate.Evaluate, + # which is used internally by the compile method. This is the correct + # way to set num_threads for parallel evaluation in MIPROv2. + # Note: num_threads should NOT be passed to the MIPROv2 constructor. + eval_kwargs = {"num_threads": self.num_threads} + optimized_program = optimizer.compile( program, trainset=self.trainset, valset=self.valset, + eval_kwargs=eval_kwargs, # Pass num_threads to internal evaluator num_trials=self.num_trials, minibatch=self.minibatch, minibatch_size=self.minibatch_size, diff --git a/tests/integration/test_cli_integration.py b/tests/integration/test_cli_integration.py index caaaa43..231609e 100644 --- a/tests/integration/test_cli_integration.py +++ b/tests/integration/test_cli_integration.py @@ -304,3 +304,114 @@ def test_end_to_end_cli_flow(self, mock_api_key_check, temp_config_file): # Clean up the temporary output file if os.path.exists(output_path): os.unlink(output_path) + + def test_cli_migrate_with_num_threads_e2e(self, mock_api_key_check): + """ + End-to-end CLI test for num_threads parameter passing bug fix. + + This test verifies that the num_threads setting from the config file + is correctly passed through the entire CLI pipeline without causing + the TypeError that was fixed. + """ + import tempfile + + import yaml + from click.testing import CliRunner + + runner = CliRunner() + + # Create a config that specifically includes num_threads to test the fix + test_config = { + "dataset": { + "path": "test_data.json", + "input_field": ["inputs", "question"], + "golden_output_field": ["outputs", "answer"], + }, + "model": {"name": "gpt-3.5-turbo", "temperature": 0.7}, + "metric": {"class": "llama_prompt_ops.core.metrics.FacilityMetric"}, + "optimization": { + "strategy": "basic", + "num_threads": 3, # Specific value to test the fix + "max_bootstrapped_demos": 2, + "num_trials": 1, + }, + } + + # Create temporary config file + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(test_config, f) + config_path = f.name + + try: + # Mock all external dependencies but let the CLI process the config + mock_migrator = MagicMock() + mock_optimized = MagicMock() + mock_optimized.signature.instructions = "Test optimized prompt" + mock_migrator.optimize.return_value = mock_optimized + mock_migrator.load_dataset_with_adapter.return_value = ([], [], []) + + # Create a strategy that would trigger the original bug + mock_strategy = MagicMock() + mock_strategy.num_threads = 3 # This should match our config + + with ( + patch( + "llama_prompt_ops.interfaces.cli.PromptMigrator", + return_value=mock_migrator, + ), + patch( + "llama_prompt_ops.interfaces.cli.get_dataset_adapter_from_config", + return_value=MagicMock(), + ), + patch( + "llama_prompt_ops.interfaces.cli.get_models_from_config", + return_value=(None, None), + ), + patch( + "llama_prompt_ops.interfaces.cli.get_metric", + return_value=MagicMock(), + ), + # This is the critical patch - ensure strategy gets created with num_threads + patch( + "llama_prompt_ops.interfaces.cli.get_strategy", + return_value=mock_strategy, + ), + # Mock the actual strategy execution to verify parameters + patch( + "llama_prompt_ops.core.prompt_strategies.BasicOptimizationStrategy" + ) as mock_strategy_class, + ): + # Configure the mock strategy class + mock_strategy_instance = MagicMock() + mock_strategy_class.return_value = mock_strategy_instance + + # The critical test: CLI should process config with num_threads without error + result = runner.invoke(cli, ["migrate", "--config", config_path]) + + # Debug output if there's an error + if result.exit_code != 0: + print(f"CLI Error: {result.output}") + if result.exception: + print(f"Exception: {result.exception}") + import traceback + + print( + f"Traceback: {''.join(traceback.format_exception(type(result.exception), result.exception, result.exception.__traceback__))}" + ) + + # The test passes if the CLI completes without crashing + # (The original bug would cause a TypeError during strategy instantiation) + assert ( + result.exit_code == 0 + ), f"CLI should complete successfully, got: {result.output}" + + # Verify that our configuration was processed + # (The actual strategy creation may be mocked, but config parsing should work) + print( + "✅ E2E CLI test passed: num_threads config processed without TypeError" + ) + + finally: + # Clean up temporary config file + if os.path.exists(config_path): + os.unlink(config_path) diff --git a/tests/integration/test_core_integration.py b/tests/integration/test_core_integration.py index 6656515..265de52 100644 --- a/tests/integration/test_core_integration.py +++ b/tests/integration/test_core_integration.py @@ -310,3 +310,150 @@ def test_end_to_end_flow_with_mocks(facility_config_path): # Check results assert result is not None assert result.signature.instructions == "Optimized prompt" + + +@pytest.mark.skipif( + not CORE_COMPONENTS_AVAILABLE, + reason=get_core_skip_reason() or "Core components available", +) +def test_basic_optimization_strategy_num_threads_integration(): + """ + Integration test for the MIPROv2 num_threads bug fix. + + This test verifies that BasicOptimizationStrategy correctly passes num_threads + to the dspy library without causing parameter errors. This is a regression test + for the bug where num_threads was incorrectly passed to MIPROv2 constructor. + """ + import json + import tempfile + + # Create minimal test data + test_data = [ + { + "inputs": {"question": "Test maintenance request"}, + "outputs": { + "answer": json.dumps( + { + "categories": {"routine_maintenance_requests": True}, + "sentiment": "neutral", + "urgency": "low", + } + ) + }, + }, + { + "inputs": {"question": "Emergency repair needed"}, + "outputs": { + "answer": json.dumps( + { + "categories": {"emergency_repair_services": True}, + "sentiment": "urgent", + "urgency": "high", + } + ) + }, + }, + ] + + # Create temporary dataset file + with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False) as tmp: + json.dump(test_data, tmp) + tmp_path = tmp.name + + try: + # Load dataset + adapter = ConfigurableJSONAdapter( + dataset_path=tmp_path, + input_field=["inputs", "question"], + golden_output_field=["outputs", "answer"], + ) + + dataset = adapter.adapt() + + # Create strategy with specific num_threads value to test the fix + strategy = BasicOptimizationStrategy( + num_threads=2, # Specific value to verify correct parameter passing + max_bootstrapped_demos=1, # Minimal for faster testing + max_labeled_demos=1, + num_trials=1, # Single trial for speed + metric=FacilityMetric(), + ) + + # Set up datasets (minimal size for integration test) + strategy.trainset = dataset[:1] # Use just one example + strategy.valset = dataset[1:2] if len(dataset) > 1 else dataset[:1] + + # Mock the models to avoid real API calls but test dspy integration + with patch("dspy.LM") as mock_lm: + # Configure mock to return valid responses + mock_instance = MagicMock() + mock_lm.return_value = mock_instance + + # The key test: verify that strategy instantiation and basic setup + # works without TypeError from incorrect num_threads parameter passing + strategy.task_model = mock_instance + strategy.prompt_model = mock_instance + + prompt_data = { + "text": "Categorize customer messages", + "inputs": ["question"], + "outputs": ["answer"], + } + + # This is the critical test - the strategy should be able to configure + # without throwing a TypeError about num_threads parameter + try: + # We patch the actual dspy.MIPROv2 to verify it's called correctly + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + mock_optimizer = MagicMock() + mock_mipro.return_value = mock_optimizer + mock_program = MagicMock() + mock_cot.return_value = mock_program + mock_optimizer.compile.return_value = mock_program + + # This call should succeed without TypeError + result = strategy.run(prompt_data) + + # Verify correct API usage: + # 1. num_threads should NOT be in MIPROv2 constructor + mock_mipro.assert_called_once() + constructor_kwargs = mock_mipro.call_args.kwargs + assert ( + "num_threads" not in constructor_kwargs + ), "num_threads should not be passed to MIPROv2 constructor" + + # 2. num_threads SHOULD be in compile eval_kwargs + mock_optimizer.compile.assert_called_once() + compile_kwargs = mock_optimizer.compile.call_args.kwargs + assert ( + "eval_kwargs" in compile_kwargs + ), "eval_kwargs should be present in compile call" + assert ( + compile_kwargs["eval_kwargs"]["num_threads"] == 2 + ), "num_threads should be correctly passed via eval_kwargs" + + # 3. Strategy should return a result + assert result is not None + + print( + "✅ Integration test passed: num_threads correctly handled by dspy" + ) + + except TypeError as e: + if "num_threads" in str(e): + pytest.fail( + f"Bug regression detected: {e}. " + "The num_threads parameter is being incorrectly passed to MIPROv2 constructor." + ) + else: + # Re-raise other TypeErrors as they might be legitimate + raise + + finally: + # Clean up temporary file + if os.path.exists(tmp_path): + os.unlink(tmp_path) diff --git a/tests/unit/test_prompt_strategies.py b/tests/unit/test_prompt_strategies.py new file mode 100644 index 0000000..e293d6f --- /dev/null +++ b/tests/unit/test_prompt_strategies.py @@ -0,0 +1,316 @@ +""" +Unit tests for prompt strategies. + +This module tests the correct API usage of prompt optimization strategies, +particularly ensuring that dspy.MIPROv2 is called with correct parameters. +""" + +from unittest.mock import MagicMock, call, patch + +import pytest + +from llama_prompt_ops.core.prompt_strategies import ( + BasicOptimizationStrategy, + OptimizationError, +) + + +class TestBasicOptimizationStrategy: + """Test BasicOptimizationStrategy for correct dspy API usage.""" + + def test_num_threads_parameter_passing(self): + """ + Test that num_threads is correctly passed via eval_kwargs to optimizer.compile, + not to dspy.MIPROv2 constructor. + + This test prevents regression of the bug where num_threads was incorrectly + passed to MIPROv2 constructor, causing a TypeError. + """ + # Arrange + strategy = BasicOptimizationStrategy(num_threads=8) + strategy.trainset = [MagicMock()] # Mock dataset + strategy.valset = [MagicMock()] + strategy.metric = MagicMock() + + # Mock task and prompt models + mock_task_model = MagicMock() + mock_prompt_model = MagicMock() + strategy.task_model = mock_task_model + strategy.prompt_model = mock_prompt_model + + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + # Set up mock optimizer + mock_optimizer_instance = MagicMock() + mock_mipro.return_value = mock_optimizer_instance + + # Set up mock program + mock_program = MagicMock() + mock_cot.return_value = mock_program + + # Set up mock optimized program result + mock_optimized_program = MagicMock() + mock_optimizer_instance.compile.return_value = mock_optimized_program + + # Act + prompt_data = { + "text": "test prompt", + "inputs": ["input"], + "outputs": ["output"], + } + result = strategy.run(prompt_data) + + # Assert - Check that num_threads is NOT in the MIPROv2 constructor call + mock_mipro.assert_called_once() + constructor_kwargs = mock_mipro.call_args.kwargs + assert ( + "num_threads" not in constructor_kwargs + ), "num_threads should not be passed to dspy.MIPROv2 constructor" + + # Assert - Check that num_threads IS in the compile call via eval_kwargs + mock_optimizer_instance.compile.assert_called_once() + compile_kwargs = mock_optimizer_instance.compile.call_args.kwargs + assert ( + "eval_kwargs" in compile_kwargs + ), "eval_kwargs should be present in optimizer.compile call" + assert compile_kwargs["eval_kwargs"] == { + "num_threads": 8 + }, "eval_kwargs should contain the correct num_threads value" + + # Assert - Check that the result is the optimized program + assert result == mock_optimized_program + + def test_mipro_v2_constructor_parameters(self): + """ + Test that all expected parameters are passed to dspy.MIPROv2 constructor, + excluding num_threads. + """ + # Arrange + strategy = BasicOptimizationStrategy( + num_threads=4, + max_bootstrapped_demos=3, + max_labeled_demos=2, + auto="basic", + num_candidates=5, + max_errors=2, + seed=42, + init_temperature=0.7, + verbose=True, + track_stats=False, + metric_threshold=0.8, + ) + strategy.trainset = [MagicMock()] + strategy.valset = [MagicMock()] + strategy.metric = MagicMock() + strategy.task_model = MagicMock() + strategy.prompt_model = MagicMock() + + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + mock_optimizer_instance = MagicMock() + mock_mipro.return_value = mock_optimizer_instance + mock_program = MagicMock() + mock_cot.return_value = mock_program + mock_optimizer_instance.compile.return_value = MagicMock() + + # Act + prompt_data = {"text": "test", "inputs": [], "outputs": []} + strategy.run(prompt_data) + + # Assert - Check expected parameters are present and correct + mock_mipro.assert_called_once() + kwargs = mock_mipro.call_args.kwargs + + # Check key parameters are present + assert kwargs["max_bootstrapped_demos"] == 3 + assert kwargs["max_labeled_demos"] == 2 + assert kwargs["auto"] == "light" # 'basic' maps to 'light' + assert kwargs["num_candidates"] == 5 + assert kwargs["max_errors"] == 2 + assert kwargs["seed"] == 42 + assert kwargs["init_temperature"] == 0.7 + assert kwargs["verbose"] == True + assert kwargs["track_stats"] == False + assert kwargs["metric_threshold"] == 0.8 + + # Ensure num_threads is NOT present + assert "num_threads" not in kwargs + + def test_auto_mode_mapping(self): + """ + Test that auto mode values are correctly mapped from our API to dspy's API. + """ + test_cases = [ + ("basic", "light"), + ("intermediate", "medium"), + ("advanced", "heavy"), + ] + + for our_value, expected_dspy_value in test_cases: + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + # Arrange + strategy = BasicOptimizationStrategy(auto=our_value) + strategy.trainset = [MagicMock()] + strategy.valset = [MagicMock()] + strategy.metric = MagicMock() + strategy.task_model = MagicMock() + strategy.prompt_model = MagicMock() + + mock_optimizer_instance = MagicMock() + mock_mipro.return_value = mock_optimizer_instance + mock_program = MagicMock() + mock_cot.return_value = mock_program + mock_optimizer_instance.compile.return_value = MagicMock() + + # Act + prompt_data = {"text": "test", "inputs": [], "outputs": []} + strategy.run(prompt_data) + + # Assert + kwargs = mock_mipro.call_args.kwargs + assert ( + kwargs["auto"] == expected_dspy_value + ), f"auto='{our_value}' should map to '{expected_dspy_value}'" + + def test_compile_method_parameters(self): + """ + Test that all expected parameters are passed to optimizer.compile method. + """ + # Arrange + strategy = BasicOptimizationStrategy( + num_trials=3, + minibatch=False, + minibatch_size=10, + program_aware_proposer=False, + data_aware_proposer=False, + requires_permission_to_run=True, + ) + strategy.trainset = [MagicMock()] + strategy.valset = [MagicMock()] + strategy.metric = MagicMock() + strategy.task_model = MagicMock() + strategy.prompt_model = MagicMock() + + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + mock_optimizer_instance = MagicMock() + mock_mipro.return_value = mock_optimizer_instance + mock_program = MagicMock() + mock_cot.return_value = mock_program + mock_optimizer_instance.compile.return_value = MagicMock() + + # Act + prompt_data = {"text": "test", "inputs": [], "outputs": []} + strategy.run(prompt_data) + + # Assert + mock_optimizer_instance.compile.assert_called_once() + kwargs = mock_optimizer_instance.compile.call_args.kwargs + + # Check compile-specific parameters + assert kwargs["num_trials"] == 3 + assert kwargs["minibatch"] == False + assert kwargs["minibatch_size"] == 10 + assert kwargs["program_aware_proposer"] == False + assert kwargs["data_aware_proposer"] == False + assert kwargs["requires_permission_to_run"] == True + assert kwargs["provide_traceback"] == True + + def test_exception_handling_with_meaningful_error(self): + """ + Test that optimization errors are properly wrapped and provide meaningful messages. + """ + # Arrange + strategy = BasicOptimizationStrategy() + strategy.trainset = [MagicMock()] + strategy.valset = [MagicMock()] + strategy.metric = MagicMock() + strategy.task_model = MagicMock() + strategy.prompt_model = MagicMock() + + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + # Configure the mock to raise an exception + mock_mipro.side_effect = RuntimeError("Simulated dspy error") + + # Act & Assert + prompt_data = {"text": "test", "inputs": [], "outputs": []} + with pytest.raises(OptimizationError) as exc_info: + strategy.run(prompt_data) + + # Check that the original error message is preserved + assert "Simulated dspy error" in str(exc_info.value) + assert "Optimization failed" in str(exc_info.value) + + def test_fallback_when_dspy_not_available(self): + """ + Test that strategy gracefully falls back when dspy is not available. + """ + # Arrange + strategy = BasicOptimizationStrategy() + # Simulate dspy not being available by not setting trainset + strategy.trainset = None + + # Act + prompt_data = {"text": "test prompt", "inputs": [], "outputs": []} + result = strategy.run(prompt_data) + + # Assert + assert isinstance(result, str) + assert "test prompt" in result + assert "Optimized for" in result + + def test_model_adapter_unwrapping(self): + """ + Test that DSPyModelAdapter instances are properly unwrapped. + """ + # Arrange + strategy = BasicOptimizationStrategy() + strategy.trainset = [MagicMock()] + strategy.valset = [MagicMock()] + strategy.metric = MagicMock() + + # Create mock adapters with _model attribute + mock_task_adapter = MagicMock() + mock_task_adapter._model = "unwrapped_task_model" + mock_prompt_adapter = MagicMock() + mock_prompt_adapter._model = "unwrapped_prompt_model" + + strategy.task_model = mock_task_adapter + strategy.prompt_model = mock_prompt_adapter + + with ( + patch("dspy.MIPROv2") as mock_mipro, + patch("dspy.ChainOfThought") as mock_cot, + ): + + mock_optimizer_instance = MagicMock() + mock_mipro.return_value = mock_optimizer_instance + mock_program = MagicMock() + mock_cot.return_value = mock_program + mock_optimizer_instance.compile.return_value = MagicMock() + + # Act + prompt_data = {"text": "test", "inputs": [], "outputs": []} + strategy.run(prompt_data) + + # Assert - Check that unwrapped models are passed to MIPROv2 + kwargs = mock_mipro.call_args.kwargs + assert kwargs["task_model"] == "unwrapped_task_model" + assert kwargs["prompt_model"] == "unwrapped_prompt_model"