Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/strands_evals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import detectors, evaluators, extractors, generators, providers, simulation, telemetry, types
from . import chaos, detectors, evaluators, extractors, generators, providers, simulation, telemetry, types
from .case import Case
from .eval_task_handler import EvalTaskHandler, TracedHandler, eval_task
from .evaluation_data_store import EvaluationDataStore
Expand All @@ -17,6 +17,7 @@
"EvalTaskHandler",
"TracedHandler",
"eval_task",
"chaos",
"detectors",
"evaluators",
"extractors",
Expand Down
32 changes: 32 additions & 0 deletions src/strands_evals/chaos/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Chaos testing module for Strands Evals.

Provides deterministic fault injection for evaluating agent resilience
under tool failures and response corruption scenarios.
"""

from .case import ChaosCase
from .effects import (
ChaosEffect,
CorruptValues,
RemoveFields,
ToolCallFailure,
ToolEffect,
TruncateFields,
)
from .experiment import ChaosExperiment
from .plugin import ChaosPlugin

__all__ = [
# Core classes
"ChaosCase",
"ChaosExperiment",
"ChaosPlugin",
# Effect hierarchy
"ChaosEffect",
"ToolEffect",
# Concrete effects
"ToolCallFailure",
"TruncateFields",
"RemoveFields",
"CorruptValues",
]
21 changes: 21 additions & 0 deletions src/strands_evals/chaos/_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Internal context variable for tracking the active chaos case.

The ChaosPlugin reads from this ContextVar at hook time.
The ChaosExperiment sets and resets it around each case's task invocation.

Using a ContextVar ensures correct behavior under:
- Sequential execution (trivially correct)
- Async execution (each asyncio.Task inherits the var from its parent)
- Threaded execution (each thread gets its own copy)
"""

from contextvars import ContextVar
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from .case import ChaosCase

_current_chaos_case: ContextVar["ChaosCase | None"] = ContextVar(
"chaos_current_case",
default=None,
)
123 changes: 123 additions & 0 deletions src/strands_evals/chaos/case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Chaos case definition.

A ChaosCase extends Case with chaos-specific fields, providing a stable
extension point for failure injection configuration without modifying the
base Case class.
"""

import uuid

from pydantic import Field
from typing_extensions import Generic

from ..case import Case
from ..types.evaluation import InputT, OutputT
from .effects import ChaosEffect


class ChaosCase(Case, Generic[InputT, OutputT]):
"""A test case with associated chaos effects.

Extends Case to carry the effects mapping that the ChaosPlugin reads
at hook time. A ChaosCase with empty effects is a baseline run.

The ``expand`` class method provides the Cartesian product of cases ×
effect maps, producing a flat list of ChaosCase objects ready for
ChaosExperiment.

Attributes:
effects: Mapping of tool_name -> list of effects to inject for this case.
Tools not listed behave normally. Empty dict means baseline (no chaos).

Example::

from strands_evals import Case
from strands_evals.chaos import ChaosCase
from strands_evals.chaos.effects import ToolCallFailure, TruncateFields

# Direct construction
chaos_case = ChaosCase(
name="search_timeout",
input="Find flights to Tokyo",
effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
)

# Expansion from base cases × named effect maps
cases = [
Case(name="flight_search", input="Find flights to Tokyo"),
Case(name="hotel_search", input="Find hotels in Tokyo"),
]
effect_maps = {
"search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]},
"search_truncated": {"search_tool": [TruncateFields(max_length=5)]},
}
chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
# Produces 6 ChaosCase objects: 2 cases × (2 effect maps + 1 baseline)
"""

effects: dict[str, list[ChaosEffect]] = Field(
default_factory=dict,
description="Mapping of tool_name -> list of effects to inject for this case. "
"Empty dict means baseline (no chaos).",
)

@classmethod
def expand(
cls,
cases: list[Case],
effect_maps: dict[str, dict[str, list[ChaosEffect]]],
include_no_effect_baseline: bool = False,
) -> list["ChaosCase"]:
"""Generate the Cartesian product of cases × named effect maps.

Produces a flat list of ChaosCase objects, one for each (case, effect_map)
combination. Each ChaosCase gets a fresh session_id and a composite name
built from the case name and the effect map key.

Args:
cases: Base test cases to expand.
effect_maps: Named effect configurations. Keys are short human-readable
names (used in the composite case name); values are mappings of
tool_name -> list of ChaosEffect instances.
include_no_effect_baseline: If True, includes a baseline (no chaos)
variant for each case. Defaults to False.

Returns:
Flat list of ChaosCase objects with composite names like
"flight_search|baseline" or "flight_search|search_timeout".
"""
all_entries: list[tuple[str, dict[str, list[ChaosEffect]]]] = []

if include_no_effect_baseline:
all_entries.append(("baseline", {}))

for name, effects in effect_maps.items():
all_entries.append((name, effects))

expanded: list[ChaosCase] = []
for case in cases:
for condition_name, effects in all_entries:
session_id = str(uuid.uuid4())
expanded_name = f"{case.name}|{condition_name}" if case.name else condition_name
expanded.append(
cls(
name=expanded_name,
session_id=session_id,
input=case.input,
expected_output=case.expected_output,
expected_assertion=case.expected_assertion,
expected_trajectory=case.expected_trajectory,
expected_interactions=case.expected_interactions,
expected_environment_state=case.expected_environment_state,
metadata=case.metadata,
effects=effects,
)
)

return expanded

def __repr__(self) -> str:
effects_str = ", ".join(
f"{target}: [{', '.join(type(e).__name__ for e in effs)}]" for target, effs in self.effects.items()
)
return f"ChaosCase(name='{self.name}', effects={{{effects_str}}})"
Loading
Loading