Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/strands_evals/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import detectors, evaluators, extractors, generators, providers, simulation, telemetry, types
from . import chaos, detectors, evaluators, extractors, generators, providers, simulation, telemetry, types
from .case import Case
from .eval_task_handler import EvalTaskHandler, TracedHandler, eval_task
from .evaluation_data_store import EvaluationDataStore
Expand All @@ -17,6 +17,7 @@
"EvalTaskHandler",
"TracedHandler",
"eval_task",
"chaos",
"detectors",
"evaluators",
"extractors",
Expand Down
32 changes: 32 additions & 0 deletions src/strands_evals/chaos/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Chaos testing module for Strands Evals.

Provides deterministic fault injection for evaluating agent resilience
under tool failures and response corruption scenarios.
"""

from .case import ChaosCase
from .effects import (
ChaosEffect,
CorruptValues,
RemoveFields,
ToolCallFailure,
ToolEffect,
TruncateFields,
)
from .experiment import ChaosExperiment
from .plugin import ChaosPlugin

__all__ = [
# Core classes
"ChaosCase",
"ChaosExperiment",
"ChaosPlugin",
# Effect hierarchy
"ChaosEffect",
"ToolEffect",
# Concrete effects
"ToolCallFailure",
"TruncateFields",
"RemoveFields",
"CorruptValues",
]
21 changes: 21 additions & 0 deletions src/strands_evals/chaos/_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Internal context variable for tracking the active chaos case.

The ChaosPlugin reads from this ContextVar at hook time.
The ChaosExperiment sets and resets it around each case's task invocation.

Using a ContextVar ensures correct behavior under:
- Sequential execution (trivially correct)
- Async execution (each asyncio.Task inherits the var from its parent)
- Threaded execution (each thread gets its own copy)
"""

from contextvars import ContextVar
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from .case import ChaosCase

_current_chaos_case: ContextVar["ChaosCase | None"] = ContextVar(
"chaos_current_case",
default=None,
)
123 changes: 123 additions & 0 deletions src/strands_evals/chaos/case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Chaos case definition.

A ChaosCase extends Case with chaos-specific fields, providing a stable
extension point for failure injection configuration without modifying the
base Case class.
"""

import uuid

from pydantic import Field
from typing_extensions import Generic

from ..case import Case
from ..types.evaluation import InputT, OutputT
from .effects import ChaosEffect


class ChaosCase(Case, Generic[InputT, OutputT]):
"""A test case with associated chaos effects.

Extends Case to carry the effects mapping that the ChaosPlugin reads
at hook time. A ChaosCase with empty effects is a baseline run.

The ``expand`` class method provides the Cartesian product of cases ×
effect maps, producing a flat list of ChaosCase objects ready for
ChaosExperiment.

Attributes:
effects: Mapping of tool_name -> list of effects to inject for this case.
Tools not listed behave normally. Empty dict means baseline (no chaos).

Example::

from strands_evals import Case
from strands_evals.chaos import ChaosCase
from strands_evals.chaos.effects import ToolCallFailure, TruncateFields

# Direct construction
chaos_case = ChaosCase(
name="search_timeout",
input="Find flights to Tokyo",
effects={"search_tool": [ToolCallFailure(error_type="timeout")]},
)

# Expansion from base cases × named effect maps
cases = [
Case(name="flight_search", input="Find flights to Tokyo"),
Case(name="hotel_search", input="Find hotels in Tokyo"),
]
effect_maps = {
"search_timeout": {"search_tool": [ToolCallFailure(error_type="timeout")]},
"search_truncated": {"search_tool": [TruncateFields(max_length=5)]},
}
chaos_cases = ChaosCase.expand(cases, effect_maps, include_no_effect_baseline=True)
# Produces 6 ChaosCase objects: 2 cases × (2 effect maps + 1 baseline)
"""

effects: dict[str, list[ChaosEffect]] = Field(
Comment thread
ybdarrenwang marked this conversation as resolved.
Outdated
default_factory=dict,
description="Mapping of tool_name -> list of effects to inject for this case. "
"Empty dict means baseline (no chaos).",
)

@classmethod
def expand(
cls,
cases: list[Case],
effect_maps: dict[str, dict[str, list[ChaosEffect]]],
include_no_effect_baseline: bool = False,
) -> list["ChaosCase"]:
"""Generate the Cartesian product of cases × named effect maps.

Produces a flat list of ChaosCase objects, one for each (case, effect_map)
combination. Each ChaosCase gets a fresh session_id and a composite name
built from the case name and the effect map key.

Args:
cases: Base test cases to expand.
effect_maps: Named effect configurations. Keys are short human-readable
names (used in the composite case name); values are mappings of
tool_name -> list of ChaosEffect instances.
include_no_effect_baseline: If True, includes a baseline (no chaos)
variant for each case. Defaults to False.

Returns:
Flat list of ChaosCase objects with composite names like
"flight_search|baseline" or "flight_search|search_timeout".
"""
all_entries: list[tuple[str, dict[str, list[ChaosEffect]]]] = []

if include_no_effect_baseline:
all_entries.append(("baseline", {}))

for name, effects in effect_maps.items():
all_entries.append((name, effects))

expanded: list[ChaosCase] = []
for case in cases:
for condition_name, effects in all_entries:
session_id = str(uuid.uuid4())
expanded_name = f"{case.name}|{condition_name}" if case.name else condition_name
expanded.append(
cls(
name=expanded_name,
session_id=session_id,
input=case.input,
expected_output=case.expected_output,
expected_assertion=case.expected_assertion,
expected_trajectory=case.expected_trajectory,
expected_interactions=case.expected_interactions,
expected_environment_state=case.expected_environment_state,
metadata=case.metadata,
effects=effects,
)
)

return expanded

def __repr__(self) -> str:
effects_str = ", ".join(
f"{target}: [{', '.join(type(e).__name__ for e in effs)}]" for target, effs in self.effects.items()
)
return f"ChaosCase(name='{self.name}', effects={{{effects_str}}})"
Loading
Loading