2626 "sla_queue_juggle" ,
2727 "fraud_stockout_cascade" ,
2828]
29+ TIER_TO_TASK = {
30+ "easy" : "refund_policy_recovery" ,
31+ "medium" : "sla_queue_juggle" ,
32+ "hard" : "fraud_stockout_cascade" ,
33+ }
2934
3035
3136def _priority_rank (priority : CasePriority ) -> int :
@@ -51,6 +56,39 @@ def _next_open_case(obs: ShopopsObservation) -> str | None:
5156 return candidates [0 ].case_id
5257
5358
59+ def _has_text (summary : str | None , needle : str ) -> bool :
60+ return needle .lower () in (summary or "" ).lower ()
61+
62+
63+ def _refund_target (case ) -> float :
64+ order_value = float (case .order_value_usd or 0.0 )
65+ requested = float (case .requested_compensation_usd or order_value )
66+ policy = case .policy_summary or ""
67+ history = case .history_summary or ""
68+
69+ if "35%" in policy :
70+ return round (order_value * 0.33 , 2 )
71+ if case .case_type == CaseType .DELIVERY_ISSUE and case .carrier_status == CarrierStatus .APPROVED :
72+ return round (order_value * 0.29 , 2 )
73+ if _has_text (history , "prior replacements" ):
74+ return round (order_value * 0.35 , 2 )
75+ return round (requested , 2 )
76+
77+
78+ def _should_replace (case ) -> bool :
79+ history = case .history_summary or ""
80+ order_status = getattr (case .order_status , "value" , case .order_status )
81+ if case .case_type == CaseType .DELIVERY_ISSUE and order_status == "lost" :
82+ return True
83+ if case .case_type == CaseType .WRONG_ITEM :
84+ if case .fraud_signal == FraudSignal .HIGH :
85+ return False
86+ if _has_text (history , "prior replacements" ):
87+ return False
88+ return bool (case .replacement_sku )
89+ return False
90+
91+
5492def baseline_policy (obs : ShopopsObservation ) -> ShopopsAction :
5593 case = obs .active_case
5694 blockers = set (obs .unresolved_blockers )
@@ -67,10 +105,6 @@ def baseline_policy(obs: ShopopsObservation) -> ShopopsAction:
67105 return ShopopsAction (action_type = ActionType .SWITCH_CASE , case_id = target )
68106 return ShopopsAction (action_type = ActionType .CLOSE_CASE )
69107
70- waiting_external = {
71- EvidenceStatus .REQUESTED ,
72- CarrierStatus .INVESTIGATING ,
73- }
74108 if case .evidence_status == EvidenceStatus .REQUESTED or case .carrier_status == CarrierStatus .INVESTIGATING :
75109 target = _next_open_case (obs )
76110 if target and target != case .case_id :
@@ -105,20 +139,14 @@ def baseline_policy(obs: ShopopsObservation) -> ShopopsAction:
105139 action_type = ActionType .ESCALATE_RISK ,
106140 escalation_reason = EscalationReason .SUSPECTED_FRAUD ,
107141 )
108- if case .case_id == "RPR-1" :
109- return ShopopsAction (action_type = ActionType .ISSUE_REFUND , refund_amount_usd = 92.0 )
110- if case .case_id == "SLA-5" :
111- return ShopopsAction (action_type = ActionType .ISSUE_REFUND , refund_amount_usd = 50.0 )
112- if case .case_id == "HARD-4" :
113- return ShopopsAction (action_type = ActionType .ISSUE_REFUND , refund_amount_usd = 72.0 )
114- if case .case_id == "HARD-3" :
115- return ShopopsAction (action_type = ActionType .ISSUE_REFUND , refund_amount_usd = 145.0 )
116- if case .case_type == CaseType .REFUND_REQUEST :
117- requested = case .requested_compensation_usd or case .order_value_usd
118- return ShopopsAction (action_type = ActionType .ISSUE_REFUND , refund_amount_usd = requested )
119- if case .case_type in {CaseType .WRONG_ITEM , CaseType .DELIVERY_ISSUE } and case .replacement_sku :
142+ if _should_replace (case ):
120143 expedite = case .priority in {CasePriority .HIGH , CasePriority .CRITICAL }
121144 return ShopopsAction (action_type = ActionType .SHIP_REPLACEMENT , expedite = expedite )
145+ if case .case_type in {CaseType .REFUND_REQUEST , CaseType .WRONG_ITEM , CaseType .DELIVERY_ISSUE }:
146+ return ShopopsAction (
147+ action_type = ActionType .ISSUE_REFUND ,
148+ refund_amount_usd = _refund_target (case ),
149+ )
122150
123151 if "internal_note_required" in blockers and case .resolution_action is not None :
124152 return ShopopsAction (action_type = ActionType .ADD_INTERNAL_NOTE , note_code = "ops_reviewed" )
@@ -187,14 +215,15 @@ def aggregate_results(results: List[Dict[str, object]]) -> Dict[str, object]:
187215 }
188216
189217
190- def run_eval (task : str , total_seeds : int , split_seed : int ) -> Dict [str , object ]:
218+ def run_eval (task : str , total_seeds : int , split_seed : int , validation : bool = False ) -> Dict [str , object ]:
191219 rng = random .Random (split_seed )
192220 seeds = list (range (1 , total_seeds + 1 ))
193221 rng .shuffle (seeds )
194222 results = [run_episode (seed = value , task = task , debug_mode = True ) for value in seeds ]
195223 return {
196224 "task" : task ,
197225 "seed_count" : len (seeds ),
226+ "validation" : validation ,
198227 "results" : results ,
199228 "summary" : aggregate_results (results ),
200229 }
@@ -203,16 +232,36 @@ def run_eval(task: str, total_seeds: int, split_seed: int) -> Dict[str, object]:
203232def main () -> None :
204233 parser = argparse .ArgumentParser (description = "Run ShopOps baseline evaluation" )
205234 parser .add_argument ("--task" , choices = TASKS + ["all" ], default = "all" )
235+ parser .add_argument (
236+ "--tier" ,
237+ choices = list (TIER_TO_TASK .keys ()),
238+ help = "Backward-compatible alias for --task" ,
239+ )
240+ parser .add_argument (
241+ "--validation" ,
242+ action = "store_true" ,
243+ help = "Backward-compatible flag retained for CI compatibility" ,
244+ )
206245 parser .add_argument ("--total-seeds" , type = int , default = 10 )
207246 parser .add_argument ("--seed" , type = int , default = 1337 )
208247 args = parser .parse_args ()
209248
210249 OUTPUT_DIR .mkdir (parents = True , exist_ok = True )
211- tasks = TASKS if args .task == "all" else [args .task ]
250+ selected_task = TIER_TO_TASK .get (args .tier ) if args .tier else args .task
251+ tasks = TASKS if selected_task == "all" else [selected_task ]
212252 payload = {}
213253 for task in tasks :
214- payload [task ] = run_eval (task = task , total_seeds = args .total_seeds , split_seed = args .seed )
215- out_path = OUTPUT_DIR / "shopops_eval_tasks.json"
254+ payload [task ] = run_eval (
255+ task = task ,
256+ total_seeds = args .total_seeds ,
257+ split_seed = args .seed ,
258+ validation = args .validation ,
259+ )
260+ if args .tier :
261+ suffix = "validation" if args .validation else "legacy"
262+ out_path = OUTPUT_DIR / f"shopops_eval_{ suffix } _{ args .tier } .json"
263+ else :
264+ out_path = OUTPUT_DIR / "shopops_eval_tasks.json"
216265 out_path .write_text (json .dumps (payload , indent = 2 ))
217266 print (f"Wrote { out_path } " )
218267
0 commit comments