diff --git a/src/strands_evals/__init__.py b/src/strands_evals/__init__.py
index 229dba0..35919db 100644
--- a/src/strands_evals/__init__.py
+++ b/src/strands_evals/__init__.py
@@ -1,4 +1,4 @@
-from . import detectors, evaluators, extractors, generators, providers, simulation, telemetry, types
+from . import chaos, detectors, evaluators, extractors, generators, providers, simulation, telemetry, types
 from .case import Case
 from .eval_task_handler import EvalTaskHandler, TracedHandler, eval_task
 from .evaluation_data_store import EvaluationDataStore
@@ -17,6 +17,7 @@
     "EvalTaskHandler",
     "TracedHandler",
     "eval_task",
+    "chaos",
     "detectors",
     "evaluators",
     "extractors",
diff --git a/src/strands_evals/chaos/__init__.py b/src/strands_evals/chaos/__init__.py
new file mode 100644
index 0000000..e04544a
--- /dev/null
+++ b/src/strands_evals/chaos/__init__.py
@@ -0,0 +1,32 @@
+"""Chaos testing module for Strands Evals.
+
+Provides deterministic fault injection for evaluating agent resilience
+under tool failures and response corruption scenarios.
+"""
+
+from .case import ChaosCase
+from .effects import (
+    ChaosEffect,
+    CorruptValues,
+    RemoveFields,
+    ToolCallFailure,
+    ToolEffect,
+    TruncateFields,
+)
+from .experiment import ChaosExperiment
+from .plugin import ChaosPlugin
+
+__all__ = [
+    # Core classes
+    "ChaosCase",
+    "ChaosExperiment",
+    "ChaosPlugin",
+    # Effect hierarchy
+    "ChaosEffect",
+    "ToolEffect",
+    # Concrete effects
+    "ToolCallFailure",
+    "TruncateFields",
+    "RemoveFields",
+    "CorruptValues",
+]
diff --git a/src/strands_evals/chaos/_context.py b/src/strands_evals/chaos/_context.py
new file mode 100644
index 0000000..74118e5
--- /dev/null
+++ b/src/strands_evals/chaos/_context.py
@@ -0,0 +1,21 @@
+"""Internal context variable for tracking the active chaos case.
+
+The ChaosPlugin reads from this ContextVar at hook time.
+The ChaosExperiment sets and resets it around each case's task invocation.
+
+Using a ContextVar ensures correct behavior under:
+- Sequential execution (trivially correct)
+- Async execution (each asyncio.Task inherits the var from its parent)
+- Threaded execution (each thread gets its own copy)
+"""
+
+from contextvars import ContextVar
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .case import ChaosCase
+
+_current_chaos_case: ContextVar["ChaosCase | None"] = ContextVar(
+    "chaos_current_case",
+    default=None,
+)
diff --git a/src/strands_evals/chaos/case.py b/src/strands_evals/chaos/case.py
new file mode 100644
index 0000000..4d66fe4
--- /dev/null
+++ b/src/strands_evals/chaos/case.py
@@ -0,0 +1,123 @@
+"""Chaos case definition.
+
+A ChaosCase extends Case with chaos-specific fields, providing a stable
+extension point for failure injection configuration without modifying the
+base Case class.
+"""
+
+import uuid
+
+from pydantic import Field
+from typing_extensions import Generic
+
+from ..case import Case
+from ..types.evaluation import InputT, OutputT
+from .effects import ChaosEffect
+
+
+class ChaosCase(Case, Generic[InputT, OutputT]):
+    """A test case with associated chaos effects.
+
+    Extends Case to carry the effects mapping that the ChaosPlugin reads
+    at hook time. A ChaosCase with empty effects is a baseline run.
+
+    The ``expand`` class method provides the Cartesian product of cases ×
+    effect maps, producing a flat list of ChaosCase objects ready for
+    ChaosExperiment.
+
+    Attributes:
+        effects: Mapping of tool_name -> list of effects to inject for this case.
+            Tools not listed behave normally. Empty dict means baseline (no chaos).
+
+    Example::
+
+        from strands_evals import Case
+        from strands_evals.chaos import ChaosCase
+        from strands_evals.chaos.effects import ToolCallFailure, TruncateFields
+
+        # Direct construction
+        chaos_case = ChaosCase(
+            name="search_timeout",
+            input="Find flights to Tokyo",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
+        )
+
+        # Expansion from base cases × named effect maps
+        cases = [
+            Case(name="flight_search", input="Find flights to Tokyo"),
+            Case(name="hotel_search", input="Find hotels in Tokyo"),
+        ]
+        effect_maps = {
+            "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]},
+            "search_truncated": {"search_tool": [TruncateFields(max_length=5)]},
+        }
+        chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        # Produces 6 ChaosCase objects: 2 cases × (2 effect maps + 1 baseline)
+    """
+
+    effects: dict[str, list[ChaosEffect]] = Field(
+        default_factory=dict,
+        description="Mapping of tool_name -> list of effects to inject for this case. "
+        "Empty dict means baseline (no chaos).",
+    )
+
+    @classmethod
+    def expand(
+        cls,
+        cases: list[Case],
+        effect_maps: dict[str, dict[str, list[ChaosEffect]]],
+        include_no_effect_baseline: bool = False,
+    ) -> list["ChaosCase"]:
+        """Generate the Cartesian product of cases × named effect maps.
+
+        Produces a flat list of ChaosCase objects, one for each (case, effect_map)
+        combination. Each ChaosCase gets a fresh session_id and a composite name
+        built from the case name and the effect map key.
+
+        Args:
+            cases: Base test cases to expand.
+            effect_maps: Named effect configurations. Keys are short human-readable
+                names (used in the composite case name); values are mappings of
+                tool_name -> list of ChaosEffect instances.
+            include_no_effect_baseline: If True, includes a baseline (no chaos)
+                variant for each case. Defaults to False.
+
+        Returns:
+            Flat list of ChaosCase objects with composite names like
+            "flight_search|baseline" or "flight_search|search_timeout".
+        """
+        all_entries: list[tuple[str, dict[str, list[ChaosEffect]]]] = []
+
+        if include_no_effect_baseline:
+            all_entries.append(("baseline", {}))
+
+        for name, effects in effect_maps.items():
+            all_entries.append((name, effects))
+
+        expanded: list[ChaosCase] = []
+        for case in cases:
+            for condition_name, effects in all_entries:
+                session_id = str(uuid.uuid4())
+                expanded_name = f"{case.name}|{condition_name}" if case.name else condition_name
+                expanded.append(
+                    cls(
+                        name=expanded_name,
+                        session_id=session_id,
+                        input=case.input,
+                        expected_output=case.expected_output,
+                        expected_assertion=case.expected_assertion,
+                        expected_trajectory=case.expected_trajectory,
+                        expected_interactions=case.expected_interactions,
+                        expected_environment_state=case.expected_environment_state,
+                        metadata=case.metadata,
+                        effects=effects,
+                    )
+                )
+
+        return expanded
+
+    def __repr__(self) -> str:
+        effects_str = ", ".join(
+            f"{target}: [{', '.join(type(e).__name__ for e in effs)}]" for target, effs in self.effects.items()
+        )
+        return f"ChaosCase(name='{self.name}', effects={{{effects_str}}})"
diff --git a/src/strands_evals/chaos/effects.py b/src/strands_evals/chaos/effects.py
new file mode 100644
index 0000000..f65af48
--- /dev/null
+++ b/src/strands_evals/chaos/effects.py
@@ -0,0 +1,248 @@
+"""Chaos effect definitions.
+
+Effects are first-class parameterized classes organized in a hierarchy:
+    ChaosEffect → ToolEffect → concrete effects (ToolCallFailure, TruncateFields, etc.)
+
+Each concrete effect carries only the parameters meaningful to it.
+The `hook` class variable indicates whether the effect fires pre-tool-call
+(error effects) or post-tool-call (corruption effects).
+"""
+
+import math
+import random
+from abc import abstractmethod
+from typing import Any, ClassVar, Literal
+
+from pydantic import BaseModel, Field
+
+
+class ChaosEffect(BaseModel):
+    """Base for all chaos effects.
+
+    Attributes:
+        apply_rate: Probability that this effect fires, defaults to 1 (always fire).
+        hook: Whether this effect fires pre-call ("pre") or post-call ("post").
+    """
+
+    hook: ClassVar[Literal["pre", "post"]]
+
+    apply_rate: float = Field(
+        default=1.0,
+        ge=0.0,
+        le=1.0,
+        description="Probability that this effect fires (1.0 = always).",
+    )
+
+    @abstractmethod
+    def apply(self, context: Any = None) -> Any:
+        """Apply the chaos effect to the given context and return the result."""
+        ...
+
+
+class ToolEffect(ChaosEffect):
+    """Effect that operates at the tool invocation boundary.
+
+    This intermediate class enables type-based dispatch so the plugin can
+    distinguish tool-level effects from other planned effect categories
+    (e.g., upcoming ``ModelEffect`` for LLM input and output chaos injection).
+    """
+
+
+# All supported failure types
+ToolCallFailureType = Literal["timeout", "network_error", "execution_error", "validation_error"]
+
+# Default error messages per failure type
+_DEFAULT_ERROR_MESSAGES: dict[str, str] = {
+    "timeout": "Tool call timed out",
+    "network_error": "Network unreachable",
+    "execution_error": "Tool execution failed",
+    "validation_error": "Tool input validation failed",
+}
+
+
+class ToolCallFailure(ToolEffect):
+    """Simulates a tool call failure that prevents the tool from executing.
+
+    The tool call is cancelled before execution with a simulated error message.
+
+    Example::
+
+        ChaosCase(
+            name="search_timeout",
+            input="Find flights",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
+        )
+
+        ChaosCase(
+            name="db_network_error",
+            input="Query database",
+            effects={"database_tool": [ToolCallFailure(
+                error_type="network_error",
+                error_message="Connection refused on port 5432",
+            )]},
+        )
+    """
+
+    hook: ClassVar[Literal["pre", "post"]] = "pre"
+    error_type: ToolCallFailureType = Field(
+        default="execution_error",
+        description="Type of failure to simulate.",
+    )
+    error_message: str | None = Field(
+        default=None,
+        description="Custom error message. If None, uses a default for the error_type.",
+    )
+
+    def apply(self, context: Any = None) -> str:
+        """Return the error message to cancel the tool call with."""
+        if self.error_message is not None:
+            return self.error_message
+        return _DEFAULT_ERROR_MESSAGES[self.error_type]
+
+
+class TruncateFields(ToolEffect):
+    """Truncates string values in the tool response.
+
+    The tool executes normally, but string fields in the response are
+    truncated to at most `max_length` characters.
+
+    Example::
+
+        ChaosCase(
+            name="search_truncated",
+            input="Find flights",
+            effects={
+                "search_tool": [TruncateFields(max_length=5)],
+            },
+        )
+    """
+
+    hook: ClassVar[Literal["pre", "post"]] = "post"
+    max_length: int = Field(default=10, ge=0, description="Maximum length to truncate string values to")
+
+    def apply(self, response: Any = None) -> Any:
+        """Truncate string values to max_length.
+
+        Args:
+            response: The tool response dict to corrupt.
+
+        Returns:
+            Response with string values truncated.
+        """
+        if not isinstance(response, dict):
+            return response
+        result: dict[str, Any] = {}
+        for key, value in response.items():
+            if isinstance(value, str) and len(value) > self.max_length:
+                result[key] = value[: self.max_length]
+            elif isinstance(value, dict):
+                result[key] = self.apply(value)
+            else:
+                result[key] = value
+        return result
+
+
+class RemoveFields(ToolEffect):
+    """Removes a fraction of fields from the tool response.
+
+    The tool executes normally, but a portion of the response fields
+    are deleted.
+
+    Example::
+
+        ChaosCase(
+            name="db_remove_fields",
+            input="Query database",
+            effects={
+                "database_tool": [RemoveFields(remove_ratio=0.5)],
+            },
+        )
+    """
+
+    hook: ClassVar[Literal["pre", "post"]] = "post"
+    remove_ratio: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Fraction of fields to remove from the response",
+    )
+
+    def apply(self, response: Any = None) -> Any:
+        """Remove a fraction of fields from the response.
+
+        Always removes at least 1 field when called.
+
+        Args:
+            response: The tool response dict to corrupt.
+
+        Returns:
+            Response with fields removed.
+        """
+        if not isinstance(response, dict):
+            return response
+        keys = list(response.keys())
+        if not keys:
+            return response
+
+        num_to_remove = max(1, math.ceil(len(keys) * self.remove_ratio))
+        keys_to_remove = set(random.sample(keys, min(num_to_remove, len(keys))))
+        return {k: v for k, v in response.items() if k not in keys_to_remove}
+
+
+class CorruptValues(ToolEffect):
+    """Replaces a fraction of values with garbage data.
+
+    The tool executes normally, but a portion of the response values
+    are replaced with wrong types or nonsense data.
+
+    Example::
+
+        ChaosCase(
+            name="db_corrupt",
+            input="Query database",
+            effects={
+                "database_tool": [CorruptValues(corrupt_ratio=0.8)],
+            },
+        )
+    """
+
+    hook: ClassVar[Literal["pre", "post"]] = "post"
+    corrupt_ratio: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Fraction of values to corrupt in the response",
+    )
+
+    _CORRUPTIONS: ClassVar[list[Any]] = [None, 99999, "", True, [], "CORRUPTED_DATA"]
+
+    def apply(self, response: Any = None) -> Any:
+        """Replace a fraction of values with wrong types or garbage data.
+
+        Always corrupts at least 1 field when called.
+
+        Args:
+            response: The tool response dict to corrupt.
+
+        Returns:
+            Response with corrupted values.
+        """
+        if not isinstance(response, dict):
+            return response
+        keys = list(response.keys())
+        if not keys:
+            return response
+
+        num_to_corrupt = max(1, math.ceil(len(keys) * self.corrupt_ratio))
+        keys_to_corrupt = set(random.sample(keys, min(num_to_corrupt, len(keys))))
+
+        result: dict[str, Any] = {}
+        for key, value in response.items():
+            if key in keys_to_corrupt:
+                candidates = [c for c in self._CORRUPTIONS if c != value]
+                result[key] = random.choice(candidates) if candidates else "CORRUPTED_DATA"
+            elif isinstance(value, dict):
+                result[key] = self.apply(value)
+            else:
+                result[key] = value
+        return result
diff --git a/src/strands_evals/chaos/experiment.py b/src/strands_evals/chaos/experiment.py
new file mode 100644
index 0000000..e841da3
--- /dev/null
+++ b/src/strands_evals/chaos/experiment.py
@@ -0,0 +1,183 @@
+"""Chaos Experiment.
+
+Composes the base Experiment to run ChaosCase objects through evaluators,
+providing deterministic evaluation of agent resilience under tool failures.
+"""
+
+import logging
+from collections.abc import Callable
+from typing import Any, Optional
+
+from ..evaluators.evaluator import Evaluator
+from ..experiment import Experiment
+from ..types.evaluation_report import EvaluationReport
+from ._context import _current_chaos_case
+from .case import ChaosCase
+
+logger = logging.getLogger(__name__)
+
+
+class ChaosExperiment:
+    """Runs ChaosCase objects through evaluators with chaos-aware dispatch.
+
+    Sets the active ChaosCase via ContextVar before each task invocation so
+    the ChaosPlugin can read the case's effects at hook time. The user's task
+    body contains zero chaos concepts — the plugin reads the active case from
+    the ContextVar.
+
+    Use ``ChaosCase.expand()`` to generate the Cartesian product of base cases
+    × effect sets before passing them to this experiment.
+
+    Example::
+
+        from strands_evals import Case
+        from strands_evals.chaos import (
+            ChaosCase,
+            ChaosExperiment,
+            ChaosPlugin,
+        )
+        from strands_evals.chaos.effects import ToolCallFailure
+
+        chaos = ChaosPlugin()
+
+        cases = [Case(input="Find flights to Tokyo", name="flight_search")]
+        effect_sets = [
+            {"search_tool": [ToolCallFailure(error_type="timeout")]},
+            {"database_tool": [ToolCallFailure(error_type="network_error")]},
+        ]
+        chaos_cases = ChaosCase.expand(cases, effect_sets)
+
+        def my_task(case):
+            agent = Agent(tools=[search_tool, database_tool], plugins=[chaos])
+            return {"output": str(agent(case.input))}
+
+        experiment = ChaosExperiment(
+            cases=chaos_cases,
+            evaluators=[my_evaluator],
+        )
+
+        reports = experiment.run_evaluations(task=my_task)
+    """
+
+    def __init__(
+        self,
+        cases: list[ChaosCase],
+        evaluators: Optional[list[Evaluator]] = None,
+    ):
+        """Initialize a ChaosExperiment.
+
+        Args:
+            cases: ChaosCase objects to evaluate. Use ``ChaosCase.expand()``
+                to generate these from base cases and effect sets.
+            evaluators: Evaluators to assess results.
+        """
+        self._cases = cases
+        self._evaluators = evaluators
+
+        # Internal Experiment with the chaos cases
+        self._experiment = Experiment(
+            cases=list(cases),
+            evaluators=evaluators,
+        )
+
+    @property
+    def cases(self) -> list[ChaosCase]:
+        """The ChaosCase objects configured for this experiment."""
+        return self._cases
+
+    def _wrap_task(self, task: Callable[[ChaosCase], Any]) -> Callable[[ChaosCase], Any]:
+        """Wrap a task function to activate the correct ChaosCase via ContextVar.
+
+        Handles both sync and async tasks — returns a sync wrapper for sync tasks
+        and an async wrapper for async tasks, so the base Experiment dispatches
+        correctly.
+
+        Args:
+            task: The original task function (sync or async).
+
+        Returns:
+            A wrapped callable that sets/resets the ContextVar around each invocation.
+        """
+        import asyncio
+
+        if asyncio.iscoroutinefunction(task):
+
+            async def chaos_aware_task_async(case: ChaosCase) -> Any:
+                token = _current_chaos_case.set(case)
+                try:
+                    return await task(case)
+                finally:
+                    _current_chaos_case.reset(token)
+
+            return chaos_aware_task_async
+        else:
+
+            def chaos_aware_task(case: ChaosCase) -> Any:
+                token = _current_chaos_case.set(case)
+                try:
+                    return task(case)
+                finally:
+                    _current_chaos_case.reset(token)
+
+            return chaos_aware_task
+
+    def run_evaluations(
+        self,
+        task: Callable[[ChaosCase], Any],
+        **kwargs,
+    ) -> list[EvaluationReport]:
+        """Run evaluations across all ChaosCase objects.
+
+        Delegates to run_evaluations_async with max_workers=1, mirroring the
+        base Experiment pattern.
+
+        Args:
+            task: The task function to evaluate. Takes a ChaosCase and returns output.
+                The task body should contain zero chaos concepts — just construct
+                the agent with plugins=[chaos] and call it.
+            **kwargs: Additional kwargs passed to the base Experiment.run_evaluations_async.
+
+        Returns:
+            List of EvaluationReport objects.
+
+        Raises:
+            ValueError: If an async task is passed (use run_evaluations_async instead).
+        """
+        import asyncio
+
+        if asyncio.iscoroutinefunction(task):
+            raise ValueError(
+                "Async task is not supported in run_evaluations. Please use run_evaluations_async instead."
+            )
+
+        return asyncio.run(self.run_evaluations_async(task, max_workers=1, **kwargs))
+
+    async def run_evaluations_async(
+        self,
+        task: Callable[[ChaosCase], Any],
+        max_workers: int = 10,
+        **kwargs,
+    ) -> list[EvaluationReport]:
+        """Run evaluations asynchronously across all ChaosCase objects.
+
+        Wraps the user's task to set the ContextVar before each case execution.
+        The base Experiment handles sync-to-async dispatch internally.
+
+        Args:
+            task: The task function (sync or async).
+            max_workers: Maximum number of parallel workers.
+            **kwargs: Additional kwargs passed to the base Experiment.run_evaluations_async.
+
+        Returns:
+            List of EvaluationReport objects.
+        """
+        wrapped = self._wrap_task(task)
+        reports = await self._experiment.run_evaluations_async(wrapped, max_workers=max_workers, **kwargs)
+
+        logger.info(
+            "cases=<%d>, reports=<%d> | chaos experiment complete",
+            len(self._cases),
+            len(reports),
+        )
+
+        return reports
diff --git a/src/strands_evals/chaos/plugin.py b/src/strands_evals/chaos/plugin.py
new file mode 100644
index 0000000..8d1dbed
--- /dev/null
+++ b/src/strands_evals/chaos/plugin.py
@@ -0,0 +1,132 @@
+"""Chaos Plugin for Strands Agents.
+
+Implements chaos injection as a standard Strands Plugin using the SDK's
+native hook system (BeforeToolCallEvent / AfterToolCallEvent).
+
+The plugin reads the active ChaosCase from a module-level ContextVar at hook
+time. The ChaosExperiment manages the ContextVar lifecycle.
+"""
+
+import json
+import logging
+import random
+
+from strands.hooks import AfterToolCallEvent, BeforeToolCallEvent
+from strands.plugins import Plugin, hook
+
+from ._context import _current_chaos_case
+from .effects import ChaosEffect, TruncateFields
+
+logger = logging.getLogger(__name__)
+
+
+class ChaosPlugin(Plugin):
+    """Strands Plugin that injects deterministic chaos based on the active ChaosCase.
+
+    The plugin intercepts tool calls via Strands' native hook system:
+    - BeforeToolCallEvent: cancels tool calls for pre-hook effects (ToolCallFailure)
+    - AfterToolCallEvent: corrupts tool responses for post-hook effects (TruncateFields, etc.)
+
+    The active ChaosCase is managed via a ContextVar (set by ChaosExperiment).
+    When no ChaosCase is active or the case has no effects, all tools behave normally.
+
+    Example::
+
+        from strands import Agent
+        from strands_evals.chaos import ChaosPlugin
+
+        chaos = ChaosPlugin()
+        agent = Agent(
+            model=my_model,
+            tools=[search_tool, database_tool],
+            plugins=[chaos],
+        )
+
+        # The ChaosExperiment handles ChaosCase activation via ContextVar.
+        # The user's task body contains zero chaos concepts.
+    """
+
+    name = "chaos-testing"
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    @hook  # type: ignore[call-overload]
+    def before_tool_call(self, event: BeforeToolCallEvent) -> None:
+        """Intercept tool calls to inject pre-hook (error) effects.
+
+        For ToolCallFailure effects (with error_type='timeout', 'network_error',
+        etc.), cancels the tool call with the effect's error_message before the
+        tool executes.
+        """
+        chaos_case = _current_chaos_case.get()
+        if chaos_case is None or not chaos_case.effects:
+            return
+
+        tool_name = event.tool_use.get("name", "")
+        effects = chaos_case.effects.get(tool_name, [])
+        if not effects:
+            return
+
+        # First pre-hook effect wins (tool is cancelled once)
+        for effect in effects:
+            if effect.hook == "pre":
+                if random.random() > effect.apply_rate:
+                    continue
+                event.cancel_tool = effect.apply()
+                logger.info("effect=<%s>, tool=<%s> | injected chaos pre-hook", type(effect).__name__, tool_name)
+                return
+
+    @hook  # type: ignore[call-overload]
+    def after_tool_call(self, event: AfterToolCallEvent) -> None:
+        """Intercept tool results to inject post-hook (corruption) effects.
+
+        For corruption effects (TruncateFields, RemoveFields, CorruptValues),
+        applies effect.apply() to JSON content blocks in the tool response.
+        """
+        chaos_case = _current_chaos_case.get()
+        if chaos_case is None or not chaos_case.effects:
+            return
+
+        tool_name = event.tool_use.get("name", "")
+        effects = chaos_case.effects.get(tool_name, [])
+        if not effects:
+            return
+
+        # Apply all post-hook effects sequentially (they compose)
+        for effect in effects:
+            if effect.hook != "post":
+                continue
+
+            if random.random() > effect.apply_rate:
+                continue
+
+            if event.result is None:
+                continue
+
+            result = event.result
+            content = result.get("content")
+
+            if isinstance(content, list):
+                result["content"] = self._apply_to_blocks(effect, content)  # type: ignore[assignment]
+
+            logger.info("effect=<%s>, tool=<%s> | applied chaos post-hook", type(effect).__name__, tool_name)
+
+    def _apply_to_blocks(self, effect: ChaosEffect, blocks: list) -> list:
+        """Apply effect to text blocks in a content list."""
+        corrupted_blocks = []
+        for block in blocks:
+            if isinstance(block, dict) and "text" in block:
+                text_data = block["text"]
+                if isinstance(text_data, str):
+                    try:
+                        parsed = json.loads(text_data)
+                        if isinstance(parsed, dict):
+                            corrupted = effect.apply(parsed)
+                            block = {**block, "text": json.dumps(corrupted)}
+                    except (json.JSONDecodeError, ValueError):
+                        # Plain text — apply truncation if effect is TruncateFields
+                        if isinstance(effect, TruncateFields):
+                            block = {**block, "text": text_data[: effect.max_length]}
+            corrupted_blocks.append(block)
+        return corrupted_blocks
diff --git a/tests/strands_evals/chaos/__init__.py b/tests/strands_evals/chaos/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/strands_evals/chaos/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/strands_evals/chaos/test_case.py b/tests/strands_evals/chaos/test_case.py
new file mode 100644
index 0000000..7a57ea7
--- /dev/null
+++ b/tests/strands_evals/chaos/test_case.py
@@ -0,0 +1,176 @@
+"""Unit tests for ChaosCase."""
+
+from strands_evals import Case
+from strands_evals.chaos import ChaosCase
+from strands_evals.chaos.effects import CorruptValues, ToolCallFailure, TruncateFields
+
+
+class TestChaosCase:
+    """Tests for the ChaosCase data model."""
+
+    def test_baseline_case_has_no_effects(self):
+        case = ChaosCase(name="baseline", input="hello")
+        assert case.effects == {}
+
+    def test_case_with_effects(self):
+        case = ChaosCase(
+            name="search_timeout",
+            input="hello",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
+        )
+        assert len(case.effects) == 1
+        assert isinstance(case.effects["search_tool"][0], ToolCallFailure)
+
+    def test_case_with_multiple_tools(self):
+        case = ChaosCase(
+            name="compound",
+            input="hello",
+            effects={
+                "search_tool": [ToolCallFailure(error_type="timeout")],
+                "db_tool": [CorruptValues(corrupt_ratio=0.8)],
+            },
+        )
+        assert len(case.effects) == 2
+
+    def test_case_with_multiple_effects_per_tool(self):
+        case = ChaosCase(
+            name="multi_effect",
+            input="hello",
+            effects={
+                "tool_a": [
+                    TruncateFields(max_length=5),
+                    CorruptValues(corrupt_ratio=0.3),
+                ],
+            },
+        )
+        assert len(case.effects["tool_a"]) == 2
+
+    def test_inherits_case_fields(self):
+        case = ChaosCase(
+            name="with_expected",
+            input="hello",
+            expected_output="world",
+            expected_trajectory=["tool_a"],
+            metadata={"key": "value"},
+            effects={"tool_a": [ToolCallFailure()]},
+        )
+        assert case.input == "hello"
+        assert case.expected_output == "world"
+        assert case.expected_trajectory == ["tool_a"]
+        assert case.metadata == {"key": "value"}
+
+    def test_repr_shows_effects(self):
+        case = ChaosCase(
+            name="test",
+            input="hello",
+            effects={"tool": [ToolCallFailure()]},
+        )
+        repr_str = repr(case)
+        assert "test" in repr_str
+        assert "ToolCallFailure" in repr_str
+
+
+class TestChaosCaseExpand:
+    """Tests for the ChaosCase.expand() class method."""
+
+    def test_expand_with_baseline(self):
+        cases = [
+            Case(name="case_a", input="hello"),
+            Case(name="case_b", input="world"),
+        ]
+        effect_maps = {
+            "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]},
+            "db_corrupt": {"db_tool": [CorruptValues(corrupt_ratio=0.8)]},
+        }
+        result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        # 2 cases × (2 effect maps + 1 baseline) = 6
+        assert len(result) == 6
+
+    def test_expand_without_baseline(self):
+        cases = [
+            Case(name="case_a", input="hello"),
+            Case(name="case_b", input="world"),
+        ]
+        effect_maps = {
+            "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]},
+        }
+        result = ChaosCase.expand(cases, effect_maps)
+        # 2 cases × 1 effect map = 2 (no baseline by default)
+        assert len(result) == 2
+
+    def test_expand_baseline_names(self):
+        cases = [Case(name="case_a", input="hello")]
+        effect_maps = {"timeout": {"tool": [ToolCallFailure()]}}
+        result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        names = [c.name for c in result]
+        assert "case_a|baseline" in names
+
+    def test_expand_uses_dict_keys_as_names(self):
+        cases = [Case(name="case_a", input="hello")]
+        effect_maps = {"search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]}}
+        result = ChaosCase.expand(cases, effect_maps)
+        assert result[0].name == "case_a|search_timeout"
+
+    def test_expand_compound_effect_name(self):
+        cases = [Case(name="case_a", input="hello")]
+        effect_maps = {
+            "multi_failure": {
+                "search_tool": [ToolCallFailure(error_type="timeout")],
+                "db_tool": [CorruptValues()],
+            }
+        }
+        result = ChaosCase.expand(cases, effect_maps)
+        assert result[0].name == "case_a|multi_failure"
+
+    def test_expand_unique_session_ids(self):
+        cases = [Case(name="case_a", input="hello"), Case(name="case_b", input="world")]
+        effect_maps = {"timeout": {"tool": [ToolCallFailure()]}}
+        result = ChaosCase.expand(cases, effect_maps)
+        session_ids = [c.session_id for c in result]
+        assert len(session_ids) == len(set(session_ids))
+
+    def test_expand_preserves_case_fields(self):
+        cases = [
+            Case(
+                name="case_a",
+                input="hello",
+                expected_output="world",
+                expected_trajectory=["tool_a"],
+                metadata={"key": "value"},
+            )
+        ]
+        effect_maps = {"timeout": {"tool": [ToolCallFailure()]}}
+        result = ChaosCase.expand(cases, effect_maps)
+        expanded = result[0]
+        assert expanded.input == "hello"
+        assert expanded.expected_output == "world"
+        assert expanded.expected_trajectory == ["tool_a"]
+        assert expanded.metadata == {"key": "value"}
+
+    def test_expand_baseline_has_empty_effects(self):
+        cases = [Case(name="case_a", input="hello")]
+        effect_maps = {"timeout": {"tool": [ToolCallFailure()]}}
+        result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        baseline = [c for c in result if "baseline" in c.name][0]
+        assert baseline.effects == {}
+
+    def test_expand_empty_effect_maps_with_baseline(self):
+        cases = [Case(name="case_a", input="hello")]
+        result = ChaosCase.expand(cases, {}, include_no_effect_baseline=True)
+        # Only baseline
+        assert len(result) == 1
+        assert "baseline" in result[0].name
+
+    def test_expand_empty_effect_maps_without_baseline(self):
+        cases = [Case(name="case_a", input="hello")]
+        result = ChaosCase.expand(cases, {})
+        # No baseline by default, no effect maps → empty
+        assert len(result) == 0
+
+    def test_expand_case_without_name(self):
+        cases = [Case(input="hello")]
+        effect_maps = {"timeout": {"tool": [ToolCallFailure()]}}
+        result = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        names = [c.name for c in result]
+        assert "baseline" in names
+        assert "timeout" in names
diff --git a/tests/strands_evals/chaos/test_context.py b/tests/strands_evals/chaos/test_context.py
new file mode 100644
index 0000000..ebc2d03
--- /dev/null
+++ b/tests/strands_evals/chaos/test_context.py
@@ -0,0 +1,42 @@
+"""Unit tests for the chaos _context module."""
+
+from strands_evals.chaos import ChaosCase
+from strands_evals.chaos._context import _current_chaos_case
+
+
+class TestContextVar:
+    """Tests for the _current_chaos_case ContextVar."""
+
+    def test_default_is_none(self):
+        assert _current_chaos_case.get() is None
+
+    def test_set_and_get(self):
+        case = ChaosCase(name="test_case", input="hello")
+        token = _current_chaos_case.set(case)
+        try:
+            assert _current_chaos_case.get() is case
+            assert _current_chaos_case.get().name == "test_case"
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_nested_set_and_reset(self):
+        c1 = ChaosCase(name="outer", input="hello")
+        c2 = ChaosCase(name="inner", input="world")
+
+        token1 = _current_chaos_case.set(c1)
+        try:
+            assert _current_chaos_case.get().name == "outer"
+            token2 = _current_chaos_case.set(c2)
+            try:
+                assert _current_chaos_case.get().name == "inner"
+            finally:
+                _current_chaos_case.reset(token2)
+            assert _current_chaos_case.get().name == "outer"
+        finally:
+            _current_chaos_case.reset(token1)
+
+    def test_reset_restores_none(self):
+        case = ChaosCase(name="test", input="hello")
+        token = _current_chaos_case.set(case)
+        _current_chaos_case.reset(token)
+        assert _current_chaos_case.get() is None
diff --git a/tests/strands_evals/chaos/test_effects.py b/tests/strands_evals/chaos/test_effects.py
new file mode 100644
index 0000000..c55821d
--- /dev/null
+++ b/tests/strands_evals/chaos/test_effects.py
@@ -0,0 +1,163 @@
+"""Unit tests for chaos effect classes."""
+
+import random
+
+import pytest
+
+from strands_evals.chaos.effects import (
+    CorruptValues,
+    RemoveFields,
+    ToolCallFailure,
+    TruncateFields,
+)
+
+
+class TestToolCallFailure:
+    """Tests for the ToolCallFailure pre-hook effect."""
+
+    @pytest.mark.parametrize(
+        "error_type,expected_message",
+        [
+            ("timeout", "Tool call timed out"),
+            ("network_error", "Network unreachable"),
+            ("execution_error", "Tool execution failed"),
+            ("validation_error", "Tool input validation failed"),
+        ],
+    )
+    def test_apply_returns_default_message(self, error_type, expected_message):
+        effect = ToolCallFailure(error_type=error_type)
+        assert effect.apply() == expected_message
+
+    def test_apply_returns_custom_message_when_provided(self):
+        effect = ToolCallFailure(error_type="timeout", error_message="Custom timeout msg")
+        assert effect.apply() == "Custom timeout msg"
+
+    def test_apply_rate_defaults_to_one(self):
+        effect = ToolCallFailure()
+        assert effect.apply_rate == 1.0
+
+
+class TestTruncateFields:
+    """Tests for the TruncateFields post-hook effect."""
+
+    def test_truncates_long_strings(self):
+        effect = TruncateFields(max_length=5)
+        response = {"name": "hello world", "short": "hi"}
+        result = effect.apply(response)
+        assert result["name"] == "hello"
+        assert result["short"] == "hi"
+
+    def test_preserves_non_string_values(self):
+        effect = TruncateFields(max_length=3)
+        response = {"count": 42, "flag": True, "items": [1, 2, 3]}
+        result = effect.apply(response)
+        assert result["count"] == 42
+        assert result["flag"] is True
+        assert result["items"] == [1, 2, 3]
+
+    def test_truncates_nested_dicts(self):
+        effect = TruncateFields(max_length=3)
+        response = {"nested": {"deep_value": "abcdef"}}
+        result = effect.apply(response)
+        assert result["nested"]["deep_value"] == "abc"
+
+    def test_empty_dict_returns_empty(self):
+        effect = TruncateFields(max_length=5)
+        assert effect.apply({}) == {}
+
+    def test_non_dict_input_returned_as_is(self):
+        effect = TruncateFields(max_length=5)
+        assert effect.apply("not a dict") == "not a dict"
+        assert effect.apply(None) is None
+
+    def test_zero_max_length_truncates_all_strings(self):
+        effect = TruncateFields(max_length=0)
+        response = {"text": "hello"}
+        result = effect.apply(response)
+        assert result["text"] == ""
+
+
+class TestRemoveFields:
+    """Tests for the RemoveFields post-hook effect."""
+
+    def test_removes_at_least_one_field(self):
+        random.seed(42)
+        effect = RemoveFields(remove_ratio=0.1)
+        response = {"a": 1, "b": 2, "c": 3, "d": 4}
+        result = effect.apply(response)
+        assert len(result) < len(response)
+
+    def test_removes_half_fields(self):
+        random.seed(42)
+        effect = RemoveFields(remove_ratio=0.5)
+        response = {"a": 1, "b": 2, "c": 3, "d": 4}
+        result = effect.apply(response)
+        assert len(result) == 2
+
+    def test_removes_all_fields_at_ratio_one(self):
+        random.seed(42)
+        effect = RemoveFields(remove_ratio=1.0)
+        response = {"a": 1, "b": 2, "c": 3}
+        result = effect.apply(response)
+        assert len(result) == 0
+
+    def test_empty_dict_returns_empty(self):
+        effect = RemoveFields(remove_ratio=0.5)
+        assert effect.apply({}) == {}
+
+    def test_non_dict_input_returned_as_is(self):
+        effect = RemoveFields(remove_ratio=0.5)
+        assert effect.apply("not a dict") == "not a dict"
+        assert effect.apply(None) is None
+
+    def test_single_field_always_removed(self):
+        random.seed(42)
+        effect = RemoveFields(remove_ratio=0.5)
+        response = {"only_key": "value"}
+        result = effect.apply(response)
+        assert len(result) == 0
+
+
+class TestCorruptValues:
+    """Tests for the CorruptValues post-hook effect."""
+
+    def test_corrupts_at_least_one_field(self):
+        random.seed(42)
+        effect = CorruptValues(corrupt_ratio=0.1)
+        response = {"a": "original_a", "b": "original_b", "c": "original_c", "d": "original_d"}
+        result = effect.apply(response)
+        corrupted_count = sum(1 for k in response if result[k] != response[k])
+        assert corrupted_count >= 1
+
+    def test_corrupted_values_come_from_corruption_pool(self):
+        random.seed(42)
+        effect = CorruptValues(corrupt_ratio=1.0)
+        response = {"a": "original", "b": "data"}
+        result = effect.apply(response)
+        corruption_pool = [None, 99999, "", True, [], "CORRUPTED_DATA"]
+        for key in response:
+            assert result[key] in corruption_pool
+
+    def test_corrupts_nested_dicts_recursively(self):
+        random.seed(42)
+        effect = CorruptValues(corrupt_ratio=1.0)
+        response = {"top": "value", "nested": {"inner": "deep_value"}}
+        result = effect.apply(response)
+        # The nested dict should also be processed
+        assert "nested" in result or "top" in result
+
+    def test_empty_dict_returns_empty(self):
+        effect = CorruptValues(corrupt_ratio=0.5)
+        assert effect.apply({}) == {}
+
+    def test_non_dict_input_returned_as_is(self):
+        effect = CorruptValues(corrupt_ratio=0.5)
+        assert effect.apply("not a dict") == "not a dict"
+        assert effect.apply(None) is None
+
+    def test_corrupted_value_differs_from_original(self):
+        random.seed(42)
+        effect = CorruptValues(corrupt_ratio=1.0)
+        response = {"key": "unique_original_value"}
+        result = effect.apply(response)
+        assert result["key"] != "unique_original_value"
diff --git a/tests/strands_evals/chaos/test_experiment.py b/tests/strands_evals/chaos/test_experiment.py
new file mode 100644
index 0000000..4fb0c57
--- /dev/null
+++ b/tests/strands_evals/chaos/test_experiment.py
@@ -0,0 +1,132 @@
+"""Unit tests for ChaosExperiment."""
+
+import pytest
+
+from strands_evals import Case
+from strands_evals.chaos import ChaosCase, ChaosExperiment
+from strands_evals.chaos._context import _current_chaos_case
+from strands_evals.chaos.effects import CorruptValues, ToolCallFailure
+from strands_evals.evaluators.evaluator import Evaluator
+from strands_evals.types import EvaluationData, EvaluationOutput
+
+
+class MockChaosEvaluator(Evaluator):
+    """Simple evaluator that always passes."""
+
+    def evaluate(self, evaluation_case: EvaluationData) -> list[EvaluationOutput]:
+        return [EvaluationOutput(score=1.0, test_pass=True, reason="Mock pass")]
+
+
+@pytest.fixture
+def cases():
+    return [
+        Case(name="case_a", input="hello"),
+        Case(name="case_b", input="world"),
+    ]
+
+
+@pytest.fixture
+def effect_maps():
+    return {
+        "search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]},
+        "db_corrupt": {"db_tool": [CorruptValues(corrupt_ratio=0.8)]},
+    }
+
+
+@pytest.fixture
+def evaluator():
+    return MockChaosEvaluator()
+
+
+class TestChaosExperiment:
+    """Tests for ChaosExperiment initialization and execution."""
+
+    def test_cases_count_with_baseline(self, cases, effect_maps, evaluator):
+        chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+        # 2 cases × (2 effect maps + 1 baseline) = 6
+        assert len(experiment.cases) == 6
+
+    def test_cases_count_without_baseline(self, cases, effect_maps, evaluator):
+        chaos_cases = ChaosCase.expand(cases, effect_maps)
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+        # 2 cases × 2 effect maps = 4
+        assert len(experiment.cases) == 4
+
+    def test_case_names_include_effect_map_key(self, cases, effect_maps, evaluator):
+        chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+        names = [c.name for c in experiment.cases]
+        assert "case_a|baseline" in names
+        assert "case_b|baseline" in names
+        assert "case_a|search_timeout" in names
+        assert "case_b|db_corrupt" in names
+
+    def test_each_case_has_unique_session_id(self, cases, effect_maps, evaluator):
+        chaos_cases = ChaosCase.expand(cases, effect_maps)
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+        session_ids = [c.session_id for c in experiment.cases]
+        assert len(session_ids) == len(set(session_ids))
+
+    def test_context_var_set_and_reset(self, cases, effect_maps, evaluator):
+        """Verify the ContextVar is set to the correct ChaosCase during task execution and reset after."""
+        observed_cases = []
+
+        def capturing_task(case: ChaosCase):
+            active_case = _current_chaos_case.get()
+            observed_cases.append((case.name, active_case.name if active_case else None))
+            return "output"
+
+        chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+        experiment.run_evaluations(task=capturing_task)
+
+        # Should have 6 observations (2 cases × 3 conditions)
+        assert len(observed_cases) == 6
+
+        # Verify the ContextVar matched the case being executed
+        for case_name, active_name in observed_cases:
+            assert case_name == active_name
+
+        # After all runs, the ContextVar should be back to None
+        assert _current_chaos_case.get() is None
+
+    def test_context_var_reset_on_task_exception(self, evaluator):
+        """Verify the ContextVar is reset even if the task raises."""
+        cases = [Case(name="failing", input="x")]
+        effect_maps = {"chaos": {"t": [ToolCallFailure()]}}
+        chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+
+        call_count = [0]
+
+        def failing_task(case: ChaosCase):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                raise RuntimeError("Task failed")
+            return "output"
+
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+
+        # The base Experiment should handle the exception internally
+        # ContextVar should still be reset
+        try:
+            experiment.run_evaluations(task=failing_task)
+        except Exception:
+            pass
+
+        assert _current_chaos_case.get() is None
+
+    def test_returns_evaluation_reports(self, cases, effect_maps, evaluator):
+        """Verify run_evaluations returns reports."""
+
+        def task(case: ChaosCase):
+            return "output"
+
+        chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
+        experiment = ChaosExperiment(cases=chaos_cases, evaluators=[evaluator])
+        reports = experiment.run_evaluations(task=task)
+
+        assert len(reports) >= 1
+        report = reports[0]
+        # 2 cases × 3 conditions = 6 scores
+        assert len(report.scores) == 6
diff --git a/tests/strands_evals/chaos/test_plugin.py b/tests/strands_evals/chaos/test_plugin.py
new file mode 100644
index 0000000..45b1844
--- /dev/null
+++ b/tests/strands_evals/chaos/test_plugin.py
@@ -0,0 +1,268 @@
+"""Unit tests for ChaosPlugin."""
+
+import json
+from unittest.mock import MagicMock
+
+import pytest
+
+from strands_evals.chaos import ChaosCase, ChaosPlugin
+from strands_evals.chaos._context import _current_chaos_case
+from strands_evals.chaos.effects import (
+    ToolCallFailure,
+    TruncateFields,
+)
+
+
+@pytest.fixture
+def chaos_plugin():
+    return ChaosPlugin()
+
+
+@pytest.fixture
+def before_event():
+    """Create a mock BeforeToolCallEvent."""
+    event = MagicMock()
+    event.tool_use = {"name": "search_tool"}
+    event.cancel_tool = None
+    return event
+
+
+@pytest.fixture
+def after_event():
+    """Create a mock AfterToolCallEvent with list content."""
+    event = MagicMock()
+    event.tool_use = {"name": "search_tool"}
+    event.result = {
+        "content": [{"text": json.dumps({"title": "Long Title Here", "count": 42})}],
+        "status": "success",
+        "toolUseId": "tool-123",
+    }
+    return event
+
+
+class TestChaosPluginBeforeToolCall:
+    """Tests for the before_tool_call hook."""
+
+    def test_no_case_active_does_nothing(self, chaos_plugin, before_event):
+        token = _current_chaos_case.set(None)
+        try:
+            chaos_plugin.before_tool_call(before_event)
+            assert before_event.cancel_tool is None
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_case_without_matching_tool_does_nothing(self, chaos_plugin, before_event):
+        case = ChaosCase(
+            name="other_tool_fails",
+            input="test",
+            effects={"other_tool": [ToolCallFailure(error_type="timeout")]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.before_tool_call(before_event)
+            assert before_event.cancel_tool is None
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_pre_hook_effect_cancels_tool(self, chaos_plugin, before_event):
+        case = ChaosCase(
+            name="search_timeout",
+            input="test",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.before_tool_call(before_event)
+            assert before_event.cancel_tool == "Tool call timed out"
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_post_hook_effect_does_not_cancel_tool(self, chaos_plugin, before_event):
+        case = ChaosCase(
+            name="search_truncated",
+            input="test",
+            effects={"search_tool": [TruncateFields(max_length=5)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.before_tool_call(before_event)
+            assert before_event.cancel_tool is None
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_first_pre_hook_effect_wins(self, chaos_plugin, before_event):
+        case = ChaosCase(
+            name="multi_pre",
+            input="test",
+            effects={
+                "search_tool": [
+                    ToolCallFailure(error_type="timeout"),
+                    ToolCallFailure(error_type="network_error"),
+                ]
+            },
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.before_tool_call(before_event)
+            assert before_event.cancel_tool == "Tool call timed out"
+        finally:
+            _current_chaos_case.reset(token)
+
+
+class TestChaosPluginAfterToolCall:
+    """Tests for the after_tool_call hook."""
+
+    def test_no_case_active_does_nothing(self, chaos_plugin, after_event):
+        token = _current_chaos_case.set(None)
+        try:
+            original_content = after_event.result["content"][0]["text"]
+            chaos_plugin.after_tool_call(after_event)
+            assert after_event.result["content"][0]["text"] == original_content
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_case_without_matching_tool_does_nothing(self, chaos_plugin, after_event):
+        case = ChaosCase(
+            name="other_tool",
+            input="test",
+            effects={"other_tool": [TruncateFields(max_length=3)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            original_content = after_event.result["content"][0]["text"]
+            chaos_plugin.after_tool_call(after_event)
+            assert after_event.result["content"][0]["text"] == original_content
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_post_hook_corrupts_json_text_blocks(self, chaos_plugin, after_event):
+        case = ChaosCase(
+            name="truncate",
+            input="test",
+            effects={"search_tool": [TruncateFields(max_length=3)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.after_tool_call(after_event)
+            corrupted = json.loads(after_event.result["content"][0]["text"])
+            assert corrupted["title"] == "Lon"
+            assert corrupted["count"] == 42  # non-string preserved
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_pre_hook_effect_ignored_in_after_hook(self, chaos_plugin, after_event):
+        case = ChaosCase(
+            name="pre_only",
+            input="test",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            original_content = after_event.result["content"][0]["text"]
+            chaos_plugin.after_tool_call(after_event)
+            assert after_event.result["content"][0]["text"] == original_content
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_none_result_is_skipped(self, chaos_plugin):
+        event = MagicMock()
+        event.tool_use = {"name": "search_tool"}
+        event.result = None
+
+        case = ChaosCase(
+            name="truncate",
+            input="test",
+            effects={"search_tool": [TruncateFields(max_length=3)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.after_tool_call(event)  # Should not raise
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_plain_text_truncation(self, chaos_plugin):
+        """Test that plain (non-JSON) text blocks get truncated if effect has max_length."""
+        event = MagicMock()
+        event.tool_use = {"name": "search_tool"}
+        event.result = {
+            "content": [{"text": "This is plain text, not JSON"}],
+            "status": "success",
+            "toolUseId": "tool-456",
+        }
+
+        case = ChaosCase(
+            name="truncate",
+            input="test",
+            effects={"search_tool": [TruncateFields(max_length=4)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.after_tool_call(event)
+            assert event.result["content"][0]["text"] == "This"
+        finally:
+            _current_chaos_case.reset(token)
+
+
+class TestApplyRate:
+    """Tests for the apply_rate probability check in ChaosPlugin."""
+
+    def test_apply_rate_zero_skips_pre_hook_effect(self, chaos_plugin, before_event):
+        """Effect with apply_rate=0.0 should never fire."""
+        case = ChaosCase(
+            name="never_fires",
+            input="test",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout", apply_rate=0.0)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            # Run multiple times to confirm it never fires
+            for _ in range(20):
+                before_event.cancel_tool = None
+                chaos_plugin.before_tool_call(before_event)
+                assert before_event.cancel_tool is None
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_apply_rate_one_always_fires_pre_hook(self, chaos_plugin, before_event):
+        """Effect with apply_rate=1.0 should always fire."""
+        case = ChaosCase(
+            name="always_fires",
+            input="test",
+            effects={"search_tool": [ToolCallFailure(error_type="timeout", apply_rate=1.0)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.before_tool_call(before_event)
+            assert before_event.cancel_tool == "Tool call timed out"
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_apply_rate_zero_skips_post_hook_effect(self, chaos_plugin, after_event):
+        """Post-hook effect with apply_rate=0.0 should never fire."""
+        case = ChaosCase(
+            name="never_truncates",
+            input="test",
+            effects={"search_tool": [TruncateFields(max_length=3, apply_rate=0.0)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            original_content = after_event.result["content"][0]["text"]
+            chaos_plugin.after_tool_call(after_event)
+            assert after_event.result["content"][0]["text"] == original_content
+        finally:
+            _current_chaos_case.reset(token)
+
+    def test_apply_rate_one_always_fires_post_hook(self, chaos_plugin, after_event):
+        """Post-hook effect with apply_rate=1.0 should always fire."""
+        case = ChaosCase(
+            name="always_truncates",
+            input="test",
+            effects={"search_tool": [TruncateFields(max_length=3, apply_rate=1.0)]},
+        )
+        token = _current_chaos_case.set(case)
+        try:
+            chaos_plugin.after_tool_call(after_event)
+            corrupted = json.loads(after_event.result["content"][0]["text"])
+            assert corrupted["title"] == "Lon"
+        finally:
+            _current_chaos_case.reset(token)