diff --git a/src/strands_evals/__init__.py b/src/strands_evals/__init__.py index 229dba0..35919db 100644 --- a/src/strands_evals/__init__.py +++ b/src/strands_evals/__init__.py @@ -1,4 +1,4 @@ -from . import detectors, evaluators, extractors, generators, providers, simulation, telemetry, types +from . import chaos, detectors, evaluators, extractors, generators, providers, simulation, telemetry, types from .case import Case from .eval_task_handler import EvalTaskHandler, TracedHandler, eval_task from .evaluation_data_store import EvaluationDataStore @@ -17,6 +17,7 @@ "EvalTaskHandler", "TracedHandler", "eval_task", + "chaos", "detectors", "evaluators", "extractors", diff --git a/src/strands_evals/chaos/__init__.py b/src/strands_evals/chaos/__init__.py new file mode 100644 index 0000000..e04544a --- /dev/null +++ b/src/strands_evals/chaos/__init__.py @@ -0,0 +1,32 @@ +"""Chaos testing module for Strands Evals. + +Provides deterministic fault injection for evaluating agent resilience +under tool failures and response corruption scenarios. +""" + +from .case import ChaosCase +from .effects import ( + ChaosEffect, + CorruptValues, + RemoveFields, + ToolCallFailure, + ToolEffect, + TruncateFields, +) +from .experiment import ChaosExperiment +from .plugin import ChaosPlugin + +__all__ = [ + # Core classes + "ChaosCase", + "ChaosExperiment", + "ChaosPlugin", + # Effect hierarchy + "ChaosEffect", + "ToolEffect", + # Concrete effects + "ToolCallFailure", + "TruncateFields", + "RemoveFields", + "CorruptValues", +] diff --git a/src/strands_evals/chaos/_context.py b/src/strands_evals/chaos/_context.py new file mode 100644 index 0000000..74118e5 --- /dev/null +++ b/src/strands_evals/chaos/_context.py @@ -0,0 +1,21 @@ +"""Internal context variable for tracking the active chaos case. + +The ChaosPlugin reads from this ContextVar at hook time. +The ChaosExperiment sets and resets it around each case's task invocation. + +Using a ContextVar ensures correct behavior under: +- Sequential execution (trivially correct) +- Async execution (each asyncio.Task inherits the var from its parent) +- Threaded execution (each thread gets its own copy) +""" + +from contextvars import ContextVar +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .case import ChaosCase + +_current_chaos_case: ContextVar["ChaosCase | None"] = ContextVar( + "chaos_current_case", + default=None, +) diff --git a/src/strands_evals/chaos/case.py b/src/strands_evals/chaos/case.py new file mode 100644 index 0000000..4d66fe4 --- /dev/null +++ b/src/strands_evals/chaos/case.py @@ -0,0 +1,123 @@ +"""Chaos case definition. + +A ChaosCase extends Case with chaos-specific fields, providing a stable +extension point for failure injection configuration without modifying the +base Case class. +""" + +import uuid + +from pydantic import Field +from typing_extensions import Generic + +from ..case import Case +from ..types.evaluation import InputT, OutputT +from .effects import ChaosEffect + + +class ChaosCase(Case, Generic[InputT, OutputT]): + """A test case with associated chaos effects. + + Extends Case to carry the effects mapping that the ChaosPlugin reads + at hook time. A ChaosCase with empty effects is a baseline run. + + The ``expand`` class method provides the Cartesian product of cases × + effect maps, producing a flat list of ChaosCase objects ready for + ChaosExperiment. + + Attributes: + effects: Mapping of tool_name -> list of effects to inject for this case. + Tools not listed behave normally. Empty dict means baseline (no chaos). + + Example:: + + from strands_evals import Case + from strands_evals.chaos import ChaosCase + from strands_evals.chaos.effects import ToolCallFailure, TruncateFields + + # Direct construction + chaos_case = ChaosCase( + name="search_timeout", + input="Find flights to Tokyo", + effects={"search_tool": [ToolCallFailure(error_type="timeout")]}, + ) + + # Expansion from base cases × named effect maps + cases = [ + Case(name="flight_search", input="Find flights to Tokyo"), + Case(name="hotel_search", input="Find hotels in Tokyo"), + ] + effect_maps = { + "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]}, + "search_truncated": {"search_tool": [TruncateFields(max_length=5)]}, + } + chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + # Produces 6 ChaosCase objects: 2 cases × (2 effect maps + 1 baseline) + """ + + effects: dict[str, list[ChaosEffect]] = Field( + default_factory=dict, + description="Mapping of tool_name -> list of effects to inject for this case. " + "Empty dict means baseline (no chaos).", + ) + + @classmethod + def expand( + cls, + cases: list[Case], + effect_maps: dict[str, dict[str, list[ChaosEffect]]], + include_no_effect_baseline: bool = False, + ) -> list["ChaosCase"]: + """Generate the Cartesian product of cases × named effect maps. + + Produces a flat list of ChaosCase objects, one for each (case, effect_map) + combination. Each ChaosCase gets a fresh session_id and a composite name + built from the case name and the effect map key. + + Args: + cases: Base test cases to expand. + effect_maps: Named effect configurations. Keys are short human-readable + names (used in the composite case name); values are mappings of + tool_name -> list of ChaosEffect instances. + include_no_effect_baseline: If True, includes a baseline (no chaos) + variant for each case. Defaults to False. + + Returns: + Flat list of ChaosCase objects with composite names like + "flight_search|baseline" or "flight_search|search_timeout". + """ + all_entries: list[tuple[str, dict[str, list[ChaosEffect]]]] = [] + + if include_no_effect_baseline: + all_entries.append(("baseline", {})) + + for name, effects in effect_maps.items(): + all_entries.append((name, effects)) + + expanded: list[ChaosCase] = [] + for case in cases: + for condition_name, effects in all_entries: + session_id = str(uuid.uuid4()) + expanded_name = f"{case.name}|{condition_name}" if case.name else condition_name + expanded.append( + cls( + name=expanded_name, + session_id=session_id, + input=case.input, + expected_output=case.expected_output, + expected_assertion=case.expected_assertion, + expected_trajectory=case.expected_trajectory, + expected_interactions=case.expected_interactions, + expected_environment_state=case.expected_environment_state, + metadata=case.metadata, + effects=effects, + ) + ) + + return expanded + + def __repr__(self) -> str: + effects_str = ", ".join( + f"{target}: [{', '.join(type(e).__name__ for e in effs)}]" for target, effs in self.effects.items() + ) + return f"ChaosCase(name='{self.name}', effects={{{effects_str}}})" diff --git a/src/strands_evals/chaos/effects.py b/src/strands_evals/chaos/effects.py new file mode 100644 index 0000000..f65af48 --- /dev/null +++ b/src/strands_evals/chaos/effects.py @@ -0,0 +1,248 @@ +"""Chaos effect definitions. + +Effects are first-class parameterized classes organized in a hierarchy: + ChaosEffect → ToolEffect → concrete effects (ToolCallFailure, TruncateFields, etc.) + +Each concrete effect carries only the parameters meaningful to it. +The `hook` class variable indicates whether the effect fires pre-tool-call +(error effects) or post-tool-call (corruption effects). +""" + +import math +import random +from abc import abstractmethod +from typing import Any, ClassVar, Literal + +from pydantic import BaseModel, Field + + +class ChaosEffect(BaseModel): + """Base for all chaos effects. + + Attributes: + apply_rate: Probability that this effect fires, defaults to 1 (always fire). + hook: Whether this effect fires pre-call ("pre") or post-call ("post"). + """ + + hook: ClassVar[Literal["pre", "post"]] + + apply_rate: float = Field( + default=1.0, + ge=0.0, + le=1.0, + description="Probability that this effect fires (1.0 = always).", + ) + + @abstractmethod + def apply(self, context: Any = None) -> Any: + """Apply the chaos effect to the given context and return the result.""" + ... + + +class ToolEffect(ChaosEffect): + """Effect that operates at the tool invocation boundary. + + This intermediate class enables type-based dispatch so the plugin can + distinguish tool-level effects from other planned effect categories + (e.g., upcoming ``ModelEffect`` for LLM input and output chaos injection). + """ + + +# All supported failure types +ToolCallFailureType = Literal["timeout", "network_error", "execution_error", "validation_error"] + +# Default error messages per failure type +_DEFAULT_ERROR_MESSAGES: dict[str, str] = { + "timeout": "Tool call timed out", + "network_error": "Network unreachable", + "execution_error": "Tool execution failed", + "validation_error": "Tool input validation failed", +} + + +class ToolCallFailure(ToolEffect): + """Simulates a tool call failure that prevents the tool from executing. + + The tool call is cancelled before execution with a simulated error message. + + Example:: + + ChaosCase( + name="search_timeout", + input="Find flights", + effects={"search_tool": [ToolCallFailure(error_type="timeout")]}, + ) + + ChaosCase( + name="db_network_error", + input="Query database", + effects={"database_tool": [ToolCallFailure( + error_type="network_error", + error_message="Connection refused on port 5432", + )]}, + ) + """ + + hook: ClassVar[Literal["pre", "post"]] = "pre" + error_type: ToolCallFailureType = Field( + default="execution_error", + description="Type of failure to simulate.", + ) + error_message: str | None = Field( + default=None, + description="Custom error message. If None, uses a default for the error_type.", + ) + + def apply(self, context: Any = None) -> str: + """Return the error message to cancel the tool call with.""" + if self.error_message is not None: + return self.error_message + return _DEFAULT_ERROR_MESSAGES[self.error_type] + + +class TruncateFields(ToolEffect): + """Truncates string values in the tool response. + + The tool executes normally, but string fields in the response are + truncated to at most `max_length` characters. + + Example:: + + ChaosCase( + name="search_truncated", + input="Find flights", + effects={ + "search_tool": [TruncateFields(max_length=5)], + }, + ) + """ + + hook: ClassVar[Literal["pre", "post"]] = "post" + max_length: int = Field(default=10, ge=0, description="Maximum length to truncate string values to") + + def apply(self, response: Any = None) -> Any: + """Truncate string values to max_length. + + Args: + response: The tool response dict to corrupt. + + Returns: + Response with string values truncated. + """ + if not isinstance(response, dict): + return response + result: dict[str, Any] = {} + for key, value in response.items(): + if isinstance(value, str) and len(value) > self.max_length: + result[key] = value[: self.max_length] + elif isinstance(value, dict): + result[key] = self.apply(value) + else: + result[key] = value + return result + + +class RemoveFields(ToolEffect): + """Removes a fraction of fields from the tool response. + + The tool executes normally, but a portion of the response fields + are deleted. + + Example:: + + ChaosCase( + name="db_remove_fields", + input="Query database", + effects={ + "database_tool": [RemoveFields(remove_ratio=0.5)], + }, + ) + """ + + hook: ClassVar[Literal["pre", "post"]] = "post" + remove_ratio: float = Field( + default=0.5, + ge=0.0, + le=1.0, + description="Fraction of fields to remove from the response", + ) + + def apply(self, response: Any = None) -> Any: + """Remove a fraction of fields from the response. + + Always removes at least 1 field when called. + + Args: + response: The tool response dict to corrupt. + + Returns: + Response with fields removed. + """ + if not isinstance(response, dict): + return response + keys = list(response.keys()) + if not keys: + return response + + num_to_remove = max(1, math.ceil(len(keys) * self.remove_ratio)) + keys_to_remove = set(random.sample(keys, min(num_to_remove, len(keys)))) + return {k: v for k, v in response.items() if k not in keys_to_remove} + + +class CorruptValues(ToolEffect): + """Replaces a fraction of values with garbage data. + + The tool executes normally, but a portion of the response values + are replaced with wrong types or nonsense data. + + Example:: + + ChaosCase( + name="db_corrupt", + input="Query database", + effects={ + "database_tool": [CorruptValues(corrupt_ratio=0.8)], + }, + ) + """ + + hook: ClassVar[Literal["pre", "post"]] = "post" + corrupt_ratio: float = Field( + default=0.5, + ge=0.0, + le=1.0, + description="Fraction of values to corrupt in the response", + ) + + _CORRUPTIONS: ClassVar[list[Any]] = [None, 99999, "", True, [], "CORRUPTED_DATA"] + + def apply(self, response: Any = None) -> Any: + """Replace a fraction of values with wrong types or garbage data. + + Always corrupts at least 1 field when called. + + Args: + response: The tool response dict to corrupt. + + Returns: + Response with corrupted values. + """ + if not isinstance(response, dict): + return response + keys = list(response.keys()) + if not keys: + return response + + num_to_corrupt = max(1, math.ceil(len(keys) * self.corrupt_ratio)) + keys_to_corrupt = set(random.sample(keys, min(num_to_corrupt, len(keys)))) + + result: dict[str, Any] = {} + for key, value in response.items(): + if key in keys_to_corrupt: + candidates = [c for c in self._CORRUPTIONS if c != value] + result[key] = random.choice(candidates) if candidates else "CORRUPTED_DATA" + elif isinstance(value, dict): + result[key] = self.apply(value) + else: + result[key] = value + return result diff --git a/src/strands_evals/chaos/experiment.py b/src/strands_evals/chaos/experiment.py new file mode 100644 index 0000000..e841da3 --- /dev/null +++ b/src/strands_evals/chaos/experiment.py @@ -0,0 +1,183 @@ +"""Chaos Experiment. + +Composes the base Experiment to run ChaosCase objects through evaluators, +providing deterministic evaluation of agent resilience under tool failures. +""" + +import logging +from collections.abc import Callable +from typing import Any, Optional + +from ..evaluators.evaluator import Evaluator +from ..experiment import Experiment +from ..types.evaluation_report import EvaluationReport +from ._context import _current_chaos_case +from .case import ChaosCase + +logger = logging.getLogger(__name__) + + +class ChaosExperiment: + """Runs ChaosCase objects through evaluators with chaos-aware dispatch. + + Sets the active ChaosCase via ContextVar before each task invocation so + the ChaosPlugin can read the case's effects at hook time. The user's task + body contains zero chaos concepts — the plugin reads the active case from + the ContextVar. + + Use ``ChaosCase.expand()`` to generate the Cartesian product of base cases + × effect sets before passing them to this experiment. + + Example:: + + from strands_evals import Case + from strands_evals.chaos import ( + ChaosCase, + ChaosExperiment, + ChaosPlugin, + ) + from strands_evals.chaos.effects import ToolCallFailure + + chaos = ChaosPlugin() + + cases = [Case(input="Find flights to Tokyo", name="flight_search")] + effect_sets = [ + {"search_tool": [ToolCallFailure(error_type="timeout")]}, + {"database_tool": [ToolCallFailure(error_type="network_error")]}, + ] + chaos_cases = ChaosCase.expand(cases, effect_sets) + + def my_task(case): + agent = Agent(tools=[search_tool, database_tool], plugins=[chaos]) + return {"output": str(agent(case.input))} + + experiment = ChaosExperiment( + cases=chaos_cases, + evaluators=[my_evaluator], + ) + + reports = experiment.run_evaluations(task=my_task) + """ + + def __init__( + self, + cases: list[ChaosCase], + evaluators: Optional[list[Evaluator]] = None, + ): + """Initialize a ChaosExperiment. + + Args: + cases: ChaosCase objects to evaluate. Use ``ChaosCase.expand()`` + to generate these from base cases and effect sets. + evaluators: Evaluators to assess results. + """ + self._cases = cases + self._evaluators = evaluators + + # Internal Experiment with the chaos cases + self._experiment = Experiment( + cases=list(cases), + evaluators=evaluators, + ) + + @property + def cases(self) -> list[ChaosCase]: + """The ChaosCase objects configured for this experiment.""" + return self._cases + + def _wrap_task(self, task: Callable[[ChaosCase], Any]) -> Callable[[ChaosCase], Any]: + """Wrap a task function to activate the correct ChaosCase via ContextVar. + + Handles both sync and async tasks — returns a sync wrapper for sync tasks + and an async wrapper for async tasks, so the base Experiment dispatches + correctly. + + Args: + task: The original task function (sync or async). + + Returns: + A wrapped callable that sets/resets the ContextVar around each invocation. + """ + import asyncio + + if asyncio.iscoroutinefunction(task): + + async def chaos_aware_task_async(case: ChaosCase) -> Any: + token = _current_chaos_case.set(case) + try: + return await task(case) + finally: + _current_chaos_case.reset(token) + + return chaos_aware_task_async + else: + + def chaos_aware_task(case: ChaosCase) -> Any: + token = _current_chaos_case.set(case) + try: + return task(case) + finally: + _current_chaos_case.reset(token) + + return chaos_aware_task + + def run_evaluations( + self, + task: Callable[[ChaosCase], Any], + **kwargs, + ) -> list[EvaluationReport]: + """Run evaluations across all ChaosCase objects. + + Delegates to run_evaluations_async with max_workers=1, mirroring the + base Experiment pattern. + + Args: + task: The task function to evaluate. Takes a ChaosCase and returns output. + The task body should contain zero chaos concepts — just construct + the agent with plugins=[chaos] and call it. + **kwargs: Additional kwargs passed to the base Experiment.run_evaluations_async. + + Returns: + List of EvaluationReport objects. + + Raises: + ValueError: If an async task is passed (use run_evaluations_async instead). + """ + import asyncio + + if asyncio.iscoroutinefunction(task): + raise ValueError( + "Async task is not supported in run_evaluations. Please use run_evaluations_async instead." + ) + + return asyncio.run(self.run_evaluations_async(task, max_workers=1, **kwargs)) + + async def run_evaluations_async( + self, + task: Callable[[ChaosCase], Any], + max_workers: int = 10, + **kwargs, + ) -> list[EvaluationReport]: + """Run evaluations asynchronously across all ChaosCase objects. + + Wraps the user's task to set the ContextVar before each case execution. + The base Experiment handles sync-to-async dispatch internally. + + Args: + task: The task function (sync or async). + max_workers: Maximum number of parallel workers. + **kwargs: Additional kwargs passed to the base Experiment.run_evaluations_async. + + Returns: + List of EvaluationReport objects. + """ + wrapped = self._wrap_task(task) + reports = await self._experiment.run_evaluations_async(wrapped, max_workers=max_workers, **kwargs) + + logger.info( + "cases=<%d>, reports=<%d> | chaos experiment complete", + len(self._cases), + len(reports), + ) + + return reports diff --git a/src/strands_evals/chaos/plugin.py b/src/strands_evals/chaos/plugin.py new file mode 100644 index 0000000..8d1dbed --- /dev/null +++ b/src/strands_evals/chaos/plugin.py @@ -0,0 +1,132 @@ +"""Chaos Plugin for Strands Agents. + +Implements chaos injection as a standard Strands Plugin using the SDK's +native hook system (BeforeToolCallEvent / AfterToolCallEvent). + +The plugin reads the active ChaosCase from a module-level ContextVar at hook +time. The ChaosExperiment manages the ContextVar lifecycle. +""" + +import json +import logging +import random + +from strands.hooks import AfterToolCallEvent, BeforeToolCallEvent +from strands.plugins import Plugin, hook + +from ._context import _current_chaos_case +from .effects import ChaosEffect, TruncateFields + +logger = logging.getLogger(__name__) + + +class ChaosPlugin(Plugin): + """Strands Plugin that injects deterministic chaos based on the active ChaosCase. + + The plugin intercepts tool calls via Strands' native hook system: + - BeforeToolCallEvent: cancels tool calls for pre-hook effects (ToolCallFailure) + - AfterToolCallEvent: corrupts tool responses for post-hook effects (TruncateFields, etc.) + + The active ChaosCase is managed via a ContextVar (set by ChaosExperiment). + When no ChaosCase is active or the case has no effects, all tools behave normally. + + Example:: + + from strands import Agent + from strands_evals.chaos import ChaosPlugin + + chaos = ChaosPlugin() + agent = Agent( + model=my_model, + tools=[search_tool, database_tool], + plugins=[chaos], + ) + + # The ChaosExperiment handles ChaosCase activation via ContextVar. + # The user's task body contains zero chaos concepts. + """ + + name = "chaos-testing" + + def __init__(self) -> None: + super().__init__() + + @hook # type: ignore[call-overload] + def before_tool_call(self, event: BeforeToolCallEvent) -> None: + """Intercept tool calls to inject pre-hook (error) effects. + + For ToolCallFailure effects (with error_type='timeout', 'network_error', + etc.), cancels the tool call with the effect's error_message before the + tool executes. + """ + chaos_case = _current_chaos_case.get() + if chaos_case is None or not chaos_case.effects: + return + + tool_name = event.tool_use.get("name", "") + effects = chaos_case.effects.get(tool_name, []) + if not effects: + return + + # First pre-hook effect wins (tool is cancelled once) + for effect in effects: + if effect.hook == "pre": + if random.random() > effect.apply_rate: + continue + event.cancel_tool = effect.apply() + logger.info("effect=<%s>, tool=<%s> | injected chaos pre-hook", type(effect).__name__, tool_name) + return + + @hook # type: ignore[call-overload] + def after_tool_call(self, event: AfterToolCallEvent) -> None: + """Intercept tool results to inject post-hook (corruption) effects. + + For corruption effects (TruncateFields, RemoveFields, CorruptValues), + applies effect.apply() to JSON content blocks in the tool response. + """ + chaos_case = _current_chaos_case.get() + if chaos_case is None or not chaos_case.effects: + return + + tool_name = event.tool_use.get("name", "") + effects = chaos_case.effects.get(tool_name, []) + if not effects: + return + + # Apply all post-hook effects sequentially (they compose) + for effect in effects: + if effect.hook != "post": + continue + + if random.random() > effect.apply_rate: + continue + + if event.result is None: + continue + + result = event.result + content = result.get("content") + + if isinstance(content, list): + result["content"] = self._apply_to_blocks(effect, content) # type: ignore[assignment] + + logger.info("effect=<%s>, tool=<%s> | applied chaos post-hook", type(effect).__name__, tool_name) + + def _apply_to_blocks(self, effect: ChaosEffect, blocks: list) -> list: + """Apply effect to text blocks in a content list.""" + corrupted_blocks = [] + for block in blocks: + if isinstance(block, dict) and "text" in block: + text_data = block["text"] + if isinstance(text_data, str): + try: + parsed = json.loads(text_data) + if isinstance(parsed, dict): + corrupted = effect.apply(parsed) + block = {**block, "text": json.dumps(corrupted)} + except (json.JSONDecodeError, ValueError): + # Plain text — apply truncation if effect is TruncateFields + if isinstance(effect, TruncateFields): + block = {**block, "text": text_data[: effect.max_length]} + corrupted_blocks.append(block) + return corrupted_blocks diff --git a/tests/strands_evals/chaos/__init__.py b/tests/strands_evals/chaos/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/strands_evals/chaos/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/strands_evals/chaos/test_case.py b/tests/strands_evals/chaos/test_case.py new file mode 100644 index 0000000..7a57ea7 --- /dev/null +++ b/tests/strands_evals/chaos/test_case.py @@ -0,0 +1,176 @@ +"""Unit tests for ChaosCase.""" + +from strands_evals import Case +from strands_evals.chaos import ChaosCase +from strands_evals.chaos.effects import CorruptValues, ToolCallFailure, TruncateFields + + +class TestChaosCase: + """Tests for the ChaosCase data model.""" + + def test_baseline_case_has_no_effects(self): + case = ChaosCase(name="baseline", input="hello") + assert case.effects == {} + + def test_case_with_effects(self): + case = ChaosCase( + name="search_timeout", + input="hello", + effects={"search_tool": [ToolCallFailure(error_type="timeout")]}, + ) + assert len(case.effects) == 1 + assert isinstance(case.effects["search_tool"][0], ToolCallFailure) + + def test_case_with_multiple_tools(self): + case = ChaosCase( + name="compound", + input="hello", + effects={ + "search_tool": [ToolCallFailure(error_type="timeout")], + "db_tool": [CorruptValues(corrupt_ratio=0.8)], + }, + ) + assert len(case.effects) == 2 + + def test_case_with_multiple_effects_per_tool(self): + case = ChaosCase( + name="multi_effect", + input="hello", + effects={ + "tool_a": [ + TruncateFields(max_length=5), + CorruptValues(corrupt_ratio=0.3), + ], + }, + ) + assert len(case.effects["tool_a"]) == 2 + + def test_inherits_case_fields(self): + case = ChaosCase( + name="with_expected", + input="hello", + expected_output="world", + expected_trajectory=["tool_a"], + metadata={"key": "value"}, + effects={"tool_a": [ToolCallFailure()]}, + ) + assert case.input == "hello" + assert case.expected_output == "world" + assert case.expected_trajectory == ["tool_a"] + assert case.metadata == {"key": "value"} + + def test_repr_shows_effects(self): + case = ChaosCase( + name="test", + input="hello", + effects={"tool": [ToolCallFailure()]}, + ) + repr_str = repr(case) + assert "test" in repr_str + assert "ToolCallFailure" in repr_str + + +class TestChaosCaseExpand: + """Tests for the ChaosCase.expand() class method.""" + + def test_expand_with_baseline(self): + cases = [ + Case(name="case_a", input="hello"), + Case(name="case_b", input="world"), + ] + effect_maps = { + "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]}, + "db_corrupt": {"db_tool": [CorruptValues(corrupt_ratio=0.8)]}, + } + result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + # 2 cases × (2 effect maps + 1 baseline) = 6 + assert len(result) == 6 + + def test_expand_without_baseline(self): + cases = [ + Case(name="case_a", input="hello"), + Case(name="case_b", input="world"), + ] + effect_maps = { + "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]}, + } + result = ChaosCase.expand(cases, effect_maps) + # 2 cases × 1 effect map = 2 (no baseline by default) + assert len(result) == 2 + + def test_expand_baseline_names(self): + cases = [Case(name="case_a", input="hello")] + effect_maps = {"timeout": {"tool": [ToolCallFailure()]}} + result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + names = [c.name for c in result] + assert "case_a|baseline" in names + + def test_expand_uses_dict_keys_as_names(self): + cases = [Case(name="case_a", input="hello")] + effect_maps = {"search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]}} + result = ChaosCase.expand(cases, effect_maps) + assert result[0].name == "case_a|search_timeout" + + def test_expand_compound_effect_name(self): + cases = [Case(name="case_a", input="hello")] + effect_maps = { + "multi_failure": { + "search_tool": [ToolCallFailure(error_type="timeout")], + "db_tool": [CorruptValues()], + } + } + result = ChaosCase.expand(cases, effect_maps) + assert result[0].name == "case_a|multi_failure" + + def test_expand_unique_session_ids(self): + cases = [Case(name="case_a", input="hello"), Case(name="case_b", input="world")] + effect_maps = {"timeout": {"tool": [ToolCallFailure()]}} + result = ChaosCase.expand(cases, effect_maps) + session_ids = [c.session_id for c in result] + assert len(session_ids) == len(set(session_ids)) + + def test_expand_preserves_case_fields(self): + cases = [ + Case( + name="case_a", + input="hello", + expected_output="world", + expected_trajectory=["tool_a"], + metadata={"key": "value"}, + ) + ] + effect_maps = {"timeout": {"tool": [ToolCallFailure()]}} + result = ChaosCase.expand(cases, effect_maps) + expanded = result[0] + assert expanded.input == "hello" + assert expanded.expected_output == "world" + assert expanded.expected_trajectory == ["tool_a"] + assert expanded.metadata == {"key": "value"} + + def test_expand_baseline_has_empty_effects(self): + cases = [Case(name="case_a", input="hello")] + effect_maps = {"timeout": {"tool": [ToolCallFailure()]}} + result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + baseline = [c for c in result if "baseline" in c.name][0] + assert baseline.effects == {} + + def test_expand_empty_effect_maps_with_baseline(self): + cases = [Case(name="case_a", input="hello")] + result = ChaosCase.expand(cases, {}, include_no_effect_baseline=True) + # Only baseline + assert len(result) == 1 + assert "baseline" in result[0].name + + def test_expand_empty_effect_maps_without_baseline(self): + cases = [Case(name="case_a", input="hello")] + result = ChaosCase.expand(cases, {}) + # No baseline by default, no effect maps → empty + assert len(result) == 0 + + def test_expand_case_without_name(self): + cases = [Case(input="hello")] + effect_maps = {"timeout": {"tool": [ToolCallFailure()]}} + result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + names = [c.name for c in result] + assert "baseline" in names + assert "timeout" in names diff --git a/tests/strands_evals/chaos/test_context.py b/tests/strands_evals/chaos/test_context.py new file mode 100644 index 0000000..ebc2d03 --- /dev/null +++ b/tests/strands_evals/chaos/test_context.py @@ -0,0 +1,42 @@ +"""Unit tests for the chaos _context module.""" + +from strands_evals.chaos import ChaosCase +from strands_evals.chaos._context import _current_chaos_case + + +class TestContextVar: + """Tests for the _current_chaos_case ContextVar.""" + + def test_default_is_none(self): + assert _current_chaos_case.get() is None + + def test_set_and_get(self): + case = ChaosCase(name="test_case", input="hello") + token = _current_chaos_case.set(case) + try: + assert _current_chaos_case.get() is case + assert _current_chaos_case.get().name == "test_case" + finally: + _current_chaos_case.reset(token) + + def test_nested_set_and_reset(self): + c1 = ChaosCase(name="outer", input="hello") + c2 = ChaosCase(name="inner", input="world") + + token1 = _current_chaos_case.set(c1) + try: + assert _current_chaos_case.get().name == "outer" + token2 = _current_chaos_case.set(c2) + try: + assert _current_chaos_case.get().name == "inner" + finally: + _current_chaos_case.reset(token2) + assert _current_chaos_case.get().name == "outer" + finally: + _current_chaos_case.reset(token1) + + def test_reset_restores_none(self): + case = ChaosCase(name="test", input="hello") + token = _current_chaos_case.set(case) + _current_chaos_case.reset(token) + assert _current_chaos_case.get() is None diff --git a/tests/strands_evals/chaos/test_effects.py b/tests/strands_evals/chaos/test_effects.py new file mode 100644 index 0000000..c55821d --- /dev/null +++ b/tests/strands_evals/chaos/test_effects.py @@ -0,0 +1,163 @@ +"""Unit tests for chaos effect classes.""" + +import random + +import pytest + +from strands_evals.chaos.effects import ( + CorruptValues, + RemoveFields, + ToolCallFailure, + TruncateFields, +) + + +class TestToolCallFailure: + """Tests for the ToolCallFailure pre-hook effect.""" + + @pytest.mark.parametrize( + "error_type,expected_message", + [ + ("timeout", "Tool call timed out"), + ("network_error", "Network unreachable"), + ("execution_error", "Tool execution failed"), + ("validation_error", "Tool input validation failed"), + ], + ) + def test_apply_returns_default_message(self, error_type, expected_message): + effect = ToolCallFailure(error_type=error_type) + assert effect.apply() == expected_message + + def test_apply_returns_custom_message_when_provided(self): + effect = ToolCallFailure(error_type="timeout", error_message="Custom timeout msg") + assert effect.apply() == "Custom timeout msg" + + def test_apply_rate_defaults_to_one(self): + effect = ToolCallFailure() + assert effect.apply_rate == 1.0 + + +class TestTruncateFields: + """Tests for the TruncateFields post-hook effect.""" + + def test_truncates_long_strings(self): + effect = TruncateFields(max_length=5) + response = {"name": "hello world", "short": "hi"} + result = effect.apply(response) + assert result["name"] == "hello" + assert result["short"] == "hi" + + def test_preserves_non_string_values(self): + effect = TruncateFields(max_length=3) + response = {"count": 42, "flag": True, "items": [1, 2, 3]} + result = effect.apply(response) + assert result["count"] == 42 + assert result["flag"] is True + assert result["items"] == [1, 2, 3] + + def test_truncates_nested_dicts(self): + effect = TruncateFields(max_length=3) + response = {"nested": {"deep_value": "abcdef"}} + result = effect.apply(response) + assert result["nested"]["deep_value"] == "abc" + + def test_empty_dict_returns_empty(self): + effect = TruncateFields(max_length=5) + assert effect.apply({}) == {} + + def test_non_dict_input_returned_as_is(self): + effect = TruncateFields(max_length=5) + assert effect.apply("not a dict") == "not a dict" + assert effect.apply(None) is None + + def test_zero_max_length_truncates_all_strings(self): + effect = TruncateFields(max_length=0) + response = {"text": "hello"} + result = effect.apply(response) + assert result["text"] == "" + + +class TestRemoveFields: + """Tests for the RemoveFields post-hook effect.""" + + def test_removes_at_least_one_field(self): + random.seed(42) + effect = RemoveFields(remove_ratio=0.1) + response = {"a": 1, "b": 2, "c": 3, "d": 4} + result = effect.apply(response) + assert len(result) < len(response) + + def test_removes_half_fields(self): + random.seed(42) + effect = RemoveFields(remove_ratio=0.5) + response = {"a": 1, "b": 2, "c": 3, "d": 4} + result = effect.apply(response) + assert len(result) == 2 + + def test_removes_all_fields_at_ratio_one(self): + random.seed(42) + effect = RemoveFields(remove_ratio=1.0) + response = {"a": 1, "b": 2, "c": 3} + result = effect.apply(response) + assert len(result) == 0 + + def test_empty_dict_returns_empty(self): + effect = RemoveFields(remove_ratio=0.5) + assert effect.apply({}) == {} + + def test_non_dict_input_returned_as_is(self): + effect = RemoveFields(remove_ratio=0.5) + assert effect.apply("not a dict") == "not a dict" + assert effect.apply(None) is None + + def test_single_field_always_removed(self): + random.seed(42) + effect = RemoveFields(remove_ratio=0.5) + response = {"only_key": "value"} + result = effect.apply(response) + assert len(result) == 0 + + +class TestCorruptValues: + """Tests for the CorruptValues post-hook effect.""" + + def test_corrupts_at_least_one_field(self): + random.seed(42) + effect = CorruptValues(corrupt_ratio=0.1) + response = {"a": "original_a", "b": "original_b", "c": "original_c", "d": "original_d"} + result = effect.apply(response) + corrupted_count = sum(1 for k in response if result[k] != response[k]) + assert corrupted_count >= 1 + + def test_corrupted_values_come_from_corruption_pool(self): + random.seed(42) + effect = CorruptValues(corrupt_ratio=1.0) + response = {"a": "original", "b": "data"} + result = effect.apply(response) + corruption_pool = [None, 99999, "", True, [], "CORRUPTED_DATA"] + for key in response: + assert result[key] in corruption_pool + + def test_corrupts_nested_dicts_recursively(self): + random.seed(42) + effect = CorruptValues(corrupt_ratio=1.0) + response = {"top": "value", "nested": {"inner": "deep_value"}} + result = effect.apply(response) + # The nested dict should also be processed + assert "nested" in result or "top" in result + + def test_empty_dict_returns_empty(self): + effect = CorruptValues(corrupt_ratio=0.5) + assert effect.apply({}) == {} + + def test_non_dict_input_returned_as_is(self): + effect = CorruptValues(corrupt_ratio=0.5) + assert effect.apply("not a dict") == "not a dict" + assert effect.apply(None) is None + + def test_corrupted_value_differs_from_original(self): + random.seed(42) + effect = CorruptValues(corrupt_ratio=1.0) + response = {"key": "unique_original_value"} + result = effect.apply(response) + assert result["key"] != "unique_original_value" diff --git a/tests/strands_evals/chaos/test_experiment.py b/tests/strands_evals/chaos/test_experiment.py new file mode 100644 index 0000000..4fb0c57 --- /dev/null +++ b/tests/strands_evals/chaos/test_experiment.py @@ -0,0 +1,132 @@ +"""Unit tests for ChaosExperiment.""" + +import pytest + +from strands_evals import Case +from strands_evals.chaos import ChaosCase, ChaosExperiment +from strands_evals.chaos._context import _current_chaos_case +from strands_evals.chaos.effects import CorruptValues, ToolCallFailure +from strands_evals.evaluators.evaluator import Evaluator +from strands_evals.types import EvaluationData, EvaluationOutput + + +class MockChaosEvaluator(Evaluator): + """Simple evaluator that always passes.""" + + def evaluate(self, evaluation_case: EvaluationData) -> list[EvaluationOutput]: + return [EvaluationOutput(score=1.0, test_pass=True, reason="Mock pass")] + + +@pytest.fixture +def cases(): + return [ + Case(name="case_a", input="hello"), + Case(name="case_b", input="world"), + ] + + +@pytest.fixture +def effect_maps(): + return { + "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]}, + "db_corrupt": {"db_tool": [CorruptValues(corrupt_ratio=0.8)]}, + } + + +@pytest.fixture +def evaluator(): + return MockChaosEvaluator() + + +class TestChaosExperiment: + """Tests for ChaosExperiment initialization and execution.""" + + def test_cases_count_with_baseline(self, cases, effect_maps, evaluator): + chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + # 2 cases × (2 effect maps + 1 baseline) = 6 + assert len(experiment.cases) == 6 + + def test_cases_count_without_baseline(self, cases, effect_maps, evaluator): + chaos_cases = ChaosCase.expand(cases, effect_maps) + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + # 2 cases × 2 effect maps = 4 + assert len(experiment.cases) == 4 + + def test_case_names_include_effect_map_key(self, cases, effect_maps, evaluator): + chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + names = [c.name for c in experiment.cases] + assert "case_a|baseline" in names + assert "case_b|baseline" in names + assert "case_a|search_timeout" in names + assert "case_b|db_corrupt" in names + + def test_each_case_has_unique_session_id(self, cases, effect_maps, evaluator): + chaos_cases = ChaosCase.expand(cases, effect_maps) + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + session_ids = [c.session_id for c in experiment.cases] + assert len(session_ids) == len(set(session_ids)) + + def test_context_var_set_and_reset(self, cases, effect_maps, evaluator): + """Verify the ContextVar is set to the correct ChaosCase during task execution and reset after.""" + observed_cases = [] + + def capturing_task(case: ChaosCase): + active_case = _current_chaos_case.get() + observed_cases.append((case.name, active_case.name if active_case else None)) + return "output" + + chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + experiment.run_evaluations(task=capturing_task) + + # Should have 6 observations (2 cases × 3 conditions) + assert len(observed_cases) == 6 + + # Verify the ContextVar matched the case being executed + for case_name, active_name in observed_cases: + assert case_name == active_name + + # After all runs, the ContextVar should be back to None + assert _current_chaos_case.get() is None + + def test_context_var_reset_on_task_exception(self, evaluator): + """Verify the ContextVar is reset even if the task raises.""" + cases = [Case(name="failing", input="x")] + effect_maps = {"chaos": {"t": [ToolCallFailure()]}} + chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + + call_count = [0] + + def failing_task(case: ChaosCase): + call_count[0] += 1 + if call_count[0] == 1: + raise RuntimeError("Task failed") + return "output" + + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + + # The base Experiment should handle the exception internally + # ContextVar should still be reset + try: + experiment.run_evaluations(task=failing_task) + except Exception: + pass + + assert _current_chaos_case.get() is None + + def test_returns_evaluation_reports(self, cases, effect_maps, evaluator): + """Verify run_evaluations returns reports.""" + + def task(case: ChaosCase): + return "output" + + chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True) + experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator]) + reports = experiment.run_evaluations(task=task) + + assert len(reports) >= 1 + report = reports[0] + # 2 cases × 3 conditions = 6 scores + assert len(report.scores) == 6 diff --git a/tests/strands_evals/chaos/test_plugin.py b/tests/strands_evals/chaos/test_plugin.py new file mode 100644 index 0000000..45b1844 --- /dev/null +++ b/tests/strands_evals/chaos/test_plugin.py @@ -0,0 +1,268 @@ +"""Unit tests for ChaosPlugin.""" + +import json +from unittest.mock import MagicMock + +import pytest + +from strands_evals.chaos import ChaosCase, ChaosPlugin +from strands_evals.chaos._context import _current_chaos_case +from strands_evals.chaos.effects import ( + ToolCallFailure, + TruncateFields, +) + + +@pytest.fixture +def chaos_plugin(): + return ChaosPlugin() + + +@pytest.fixture +def before_event(): + """Create a mock BeforeToolCallEvent.""" + event = MagicMock() + event.tool_use = {"name": "search_tool"} + event.cancel_tool = None + return event + + +@pytest.fixture +def after_event(): + """Create a mock AfterToolCallEvent with list content.""" + event = MagicMock() + event.tool_use = {"name": "search_tool"} + event.result = { + "content": [{"text": json.dumps({"title": "Long Title Here", "count": 42})}], + "status": "success", + "toolUseId": "tool-123", + } + return event + + +class TestChaosPluginBeforeToolCall: + """Tests for the before_tool_call hook.""" + + def test_no_case_active_does_nothing(self, chaos_plugin, before_event): + token = _current_chaos_case.set(None) + try: + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool is None + finally: + _current_chaos_case.reset(token) + + def test_case_without_matching_tool_does_nothing(self, chaos_plugin, before_event): + case = ChaosCase( + name="other_tool_fails", + input="test", + effects={"other_tool": [ToolCallFailure(error_type="timeout")]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool is None + finally: + _current_chaos_case.reset(token) + + def test_pre_hook_effect_cancels_tool(self, chaos_plugin, before_event): + case = ChaosCase( + name="search_timeout", + input="test", + effects={"search_tool": [ToolCallFailure(error_type="timeout")]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool == "Tool call timed out" + finally: + _current_chaos_case.reset(token) + + def test_post_hook_effect_does_not_cancel_tool(self, chaos_plugin, before_event): + case = ChaosCase( + name="search_truncated", + input="test", + effects={"search_tool": [TruncateFields(max_length=5)]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool is None + finally: + _current_chaos_case.reset(token) + + def test_first_pre_hook_effect_wins(self, chaos_plugin, before_event): + case = ChaosCase( + name="multi_pre", + input="test", + effects={ + "search_tool": [ + ToolCallFailure(error_type="timeout"), + ToolCallFailure(error_type="network_error"), + ] + }, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool == "Tool call timed out" + finally: + _current_chaos_case.reset(token) + + +class TestChaosPluginAfterToolCall: + """Tests for the after_tool_call hook.""" + + def test_no_case_active_does_nothing(self, chaos_plugin, after_event): + token = _current_chaos_case.set(None) + try: + original_content = after_event.result["content"][0]["text"] + chaos_plugin.after_tool_call(after_event) + assert after_event.result["content"][0]["text"] == original_content + finally: + _current_chaos_case.reset(token) + + def test_case_without_matching_tool_does_nothing(self, chaos_plugin, after_event): + case = ChaosCase( + name="other_tool", + input="test", + effects={"other_tool": [TruncateFields(max_length=3)]}, + ) + token = _current_chaos_case.set(case) + try: + original_content = after_event.result["content"][0]["text"] + chaos_plugin.after_tool_call(after_event) + assert after_event.result["content"][0]["text"] == original_content + finally: + _current_chaos_case.reset(token) + + def test_post_hook_corrupts_json_text_blocks(self, chaos_plugin, after_event): + case = ChaosCase( + name="truncate", + input="test", + effects={"search_tool": [TruncateFields(max_length=3)]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.after_tool_call(after_event) + corrupted = json.loads(after_event.result["content"][0]["text"]) + assert corrupted["title"] == "Lon" + assert corrupted["count"] == 42 # non-string preserved + finally: + _current_chaos_case.reset(token) + + def test_pre_hook_effect_ignored_in_after_hook(self, chaos_plugin, after_event): + case = ChaosCase( + name="pre_only", + input="test", + effects={"search_tool": [ToolCallFailure(error_type="timeout")]}, + ) + token = _current_chaos_case.set(case) + try: + original_content = after_event.result["content"][0]["text"] + chaos_plugin.after_tool_call(after_event) + assert after_event.result["content"][0]["text"] == original_content + finally: + _current_chaos_case.reset(token) + + def test_none_result_is_skipped(self, chaos_plugin): + event = MagicMock() + event.tool_use = {"name": "search_tool"} + event.result = None + + case = ChaosCase( + name="truncate", + input="test", + effects={"search_tool": [TruncateFields(max_length=3)]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.after_tool_call(event) # Should not raise + finally: + _current_chaos_case.reset(token) + + def test_plain_text_truncation(self, chaos_plugin): + """Test that plain (non-JSON) text blocks get truncated if effect has max_length.""" + event = MagicMock() + event.tool_use = {"name": "search_tool"} + event.result = { + "content": [{"text": "This is plain text, not JSON"}], + "status": "success", + "toolUseId": "tool-456", + } + + case = ChaosCase( + name="truncate", + input="test", + effects={"search_tool": [TruncateFields(max_length=4)]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.after_tool_call(event) + assert event.result["content"][0]["text"] == "This" + finally: + _current_chaos_case.reset(token) + + +class TestApplyRate: + """Tests for the apply_rate probability check in ChaosPlugin.""" + + def test_apply_rate_zero_skips_pre_hook_effect(self, chaos_plugin, before_event): + """Effect with apply_rate=0.0 should never fire.""" + case = ChaosCase( + name="never_fires", + input="test", + effects={"search_tool": [ToolCallFailure(error_type="timeout", apply_rate=0.0)]}, + ) + token = _current_chaos_case.set(case) + try: + # Run multiple times to confirm it never fires + for _ in range(20): + before_event.cancel_tool = None + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool is None + finally: + _current_chaos_case.reset(token) + + def test_apply_rate_one_always_fires_pre_hook(self, chaos_plugin, before_event): + """Effect with apply_rate=1.0 should always fire.""" + case = ChaosCase( + name="always_fires", + input="test", + effects={"search_tool": [ToolCallFailure(error_type="timeout", apply_rate=1.0)]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.before_tool_call(before_event) + assert before_event.cancel_tool == "Tool call timed out" + finally: + _current_chaos_case.reset(token) + + def test_apply_rate_zero_skips_post_hook_effect(self, chaos_plugin, after_event): + """Post-hook effect with apply_rate=0.0 should never fire.""" + case = ChaosCase( + name="never_truncates", + input="test", + effects={"search_tool": [TruncateFields(max_length=3, apply_rate=0.0)]}, + ) + token = _current_chaos_case.set(case) + try: + original_content = after_event.result["content"][0]["text"] + chaos_plugin.after_tool_call(after_event) + assert after_event.result["content"][0]["text"] == original_content + finally: + _current_chaos_case.reset(token) + + def test_apply_rate_one_always_fires_post_hook(self, chaos_plugin, after_event): + """Post-hook effect with apply_rate=1.0 should always fire.""" + case = ChaosCase( + name="always_truncates", + input="test", + effects={"search_tool": [TruncateFields(max_length=3, apply_rate=1.0)]}, + ) + token = _current_chaos_case.set(case) + try: + chaos_plugin.after_tool_call(after_event) + corrupted = json.loads(after_event.result["content"][0]["text"]) + assert corrupted["title"] == "Lon" + finally: + _current_chaos_case.reset(token)