Skip to content

Commit caa13e3

Browse files
Graders fix
1 parent 06b0159 commit caa13e3

3 files changed

Lines changed: 27 additions & 2 deletions

File tree

graders.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22

33
from typing import Any, Dict, List
44

5+
_SCORE_MIN = 1e-9
6+
_SCORE_MAX = 1.0 - 1e-9
7+
58

69
def _grade_with_cap(trajectory: List[Dict[str, Any]], max_total_reward: float) -> float:
710
if not trajectory or max_total_reward <= 0:
8-
return 0.0
11+
return _SCORE_MIN
912
total_reward = sum(float(step.get("reward") or 0.0) for step in trajectory)
1013
score = total_reward / max_total_reward
11-
return float(max(0.0, min(1.0, score)))
14+
return float(max(_SCORE_MIN, min(_SCORE_MAX, score)))
1215

1316

1417
class RefundPolicyRecoveryGrader:
Binary file not shown.

tests/test_metrics_baselines.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
from __future__ import annotations
22

3+
from shopOps.graders import (
4+
FraudStockoutCascadeGrader,
5+
RefundPolicyRecoveryGrader,
6+
SlaQueueJuggleGrader,
7+
)
38
from shopOps.eval import TASKS, aggregate_results, run_episode
49

510

@@ -29,3 +34,20 @@ def test_baseline_scores_are_monotonic_by_difficulty_seed_1() -> None:
2934
for task in TASKS
3035
]
3136
assert scores[0] >= scores[1] >= scores[2]
37+
38+
39+
def test_graders_return_open_interval_scores() -> None:
40+
graders = [
41+
RefundPolicyRecoveryGrader(),
42+
SlaQueueJuggleGrader(),
43+
FraudStockoutCascadeGrader(),
44+
]
45+
trajectories = [
46+
[],
47+
[{"reward": 999.0}],
48+
[{"reward": -999.0}],
49+
]
50+
for grader in graders:
51+
for trajectory in trajectories:
52+
score = grader.grade(trajectory)
53+
assert 0.0 < score < 1.0

0 commit comments

Comments
 (0)