5555INVALID_LIMIT = 4
5656SCORE_MIN = 1e-9
5757SCORE_MAX = 1.0 - 1e-9
58+ STEP_REWARD_MIN = 0.01
5859TASK_ALIASES = {
5960 "easy" : "refund_policy_recovery" ,
6061 "medium" : "sla_queue_juggle" ,
@@ -336,7 +337,7 @@ def step(
336337
337338 if self ._is_done ():
338339 return self ._build_observation (
339- reward = 0.0 ,
340+ reward = STEP_REWARD_MIN ,
340341 done = True ,
341342 info = {"already_done" : True , "termination_reason" : self ._termination_reason ()},
342343 )
@@ -349,7 +350,7 @@ def step(
349350 self ._state .step_count += 1
350351 self ._advance_events ()
351352 self ._update_sla_breaches ()
352- reward = - 0.25
353+ reward = STEP_REWARD_MIN
353354 self ._cumulative_reward += reward
354355 self ._latest_tool_result = ToolResult (
355356 action_type = action .action_type ,
@@ -364,8 +365,8 @@ def step(
364365 "last_action_error" : validation_error ,
365366 "reward_breakdown" : {
366367 "information_gain" : 0.0 ,
367- "workflow_progress" : - 0.15 ,
368- "business_outcome" : - 0.1 ,
368+ "workflow_progress" : STEP_REWARD_MIN ,
369+ "business_outcome" : STEP_REWARD_MIN ,
369370 },
370371 }
371372 if done :
@@ -793,9 +794,9 @@ def _inspect_order(self, case: CaseInternal, action: ShopopsAction) -> ActionOut
793794 del action
794795 if "order" in case .completed_checks :
795796 return ActionOutcome (
796- reward = - 0.03 ,
797+ reward = 0.02 ,
797798 summary = "Order details were already inspected." ,
798- details = {"reward_breakdown" : {"information_gain" : - 0.03 }},
799+ details = {"reward_breakdown" : {"information_gain" : 0.02 }},
799800 )
800801 case .completed_checks .add ("order" )
801802 case .order_summary = case .order_details_text
@@ -812,9 +813,9 @@ def _inspect_policy(self, case: CaseInternal, action: ShopopsAction) -> ActionOu
812813 del action
813814 if "policy" in case .completed_checks :
814815 return ActionOutcome (
815- reward = - 0.03 ,
816+ reward = 0.02 ,
816817 summary = "Policy details were already inspected." ,
817- details = {"reward_breakdown" : {"information_gain" : - 0.03 }},
818+ details = {"reward_breakdown" : {"information_gain" : 0.02 }},
818819 )
819820 case .completed_checks .add ("policy" )
820821 case .policy_summary = case .policy_details_text or "No special policy guidance for this case."
@@ -832,9 +833,9 @@ def _inspect_inventory(self, case: CaseInternal, action: ShopopsAction) -> Actio
832833 del action
833834 if "inventory" in case .completed_checks :
834835 return ActionOutcome (
835- reward = - 0.03 ,
836+ reward = 0.02 ,
836837 summary = "Inventory was already inspected." ,
837- details = {"reward_breakdown" : {"information_gain" : - 0.03 }},
838+ details = {"reward_breakdown" : {"information_gain" : 0.02 }},
838839 )
839840 case .completed_checks .add ("inventory" )
840841 sku = case .replacement_sku or "none"
@@ -853,9 +854,9 @@ def _inspect_customer_history(self, case: CaseInternal, action: ShopopsAction) -
853854 del action
854855 if "history" in case .completed_checks :
855856 return ActionOutcome (
856- reward = - 0.03 ,
857+ reward = 0.02 ,
857858 summary = "Customer history was already inspected." ,
858- details = {"reward_breakdown" : {"information_gain" : - 0.03 }},
859+ details = {"reward_breakdown" : {"information_gain" : 0.02 }},
859860 )
860861 case .completed_checks .add ("history" )
861862 case .history_summary = case .history_details_text or "No significant customer history was found."
@@ -873,21 +874,21 @@ def _request_evidence(self, case: CaseInternal, action: ShopopsAction) -> Action
873874 del action
874875 if not case .needs_evidence :
875876 return ActionOutcome (
876- reward = - 0.05 ,
877+ reward = 0.01 ,
877878 summary = "This case does not require customer evidence." ,
878- details = {"reward_breakdown" : {"workflow_progress" : - 0.05 }},
879+ details = {"reward_breakdown" : {"workflow_progress" : 0.01 }},
879880 )
880881 if case .evidence_status == EvidenceStatus .REQUESTED :
881882 return ActionOutcome (
882- reward = - 0.03 ,
883+ reward = 0.02 ,
883884 summary = "Evidence request is already pending." ,
884- details = {"reward_breakdown" : {"workflow_progress" : - 0.03 }},
885+ details = {"reward_breakdown" : {"workflow_progress" : 0.02 }},
885886 )
886887 if case .evidence_status in {EvidenceStatus .RECEIVED , EvidenceStatus .INSUFFICIENT }:
887888 return ActionOutcome (
888- reward = - 0.02 ,
889+ reward = 0.02 ,
889890 summary = "Evidence result is already available." ,
890- details = {"reward_breakdown" : {"workflow_progress" : - 0.02 }},
891+ details = {"reward_breakdown" : {"workflow_progress" : 0.02 }},
891892 )
892893 case .evidence_status = EvidenceStatus .REQUESTED
893894 case .status = CaseStatus .WAITING_CUSTOMER
@@ -912,21 +913,21 @@ def _contact_carrier(self, case: CaseInternal, action: ShopopsAction) -> ActionO
912913 del action
913914 if not case .needs_carrier_contact :
914915 return ActionOutcome (
915- reward = - 0.05 ,
916+ reward = 0.01 ,
916917 summary = "Carrier contact is not needed for this case." ,
917- details = {"reward_breakdown" : {"workflow_progress" : - 0.05 }},
918+ details = {"reward_breakdown" : {"workflow_progress" : 0.01 }},
918919 )
919920 if case .carrier_status == CarrierStatus .INVESTIGATING :
920921 return ActionOutcome (
921- reward = - 0.03 ,
922+ reward = 0.02 ,
922923 summary = "Carrier investigation is already pending." ,
923- details = {"reward_breakdown" : {"workflow_progress" : - 0.03 }},
924+ details = {"reward_breakdown" : {"workflow_progress" : 0.02 }},
924925 )
925926 if case .carrier_status in {CarrierStatus .APPROVED , CarrierStatus .DENIED }:
926927 return ActionOutcome (
927- reward = - 0.02 ,
928+ reward = 0.02 ,
928929 summary = "Carrier result is already available." ,
929- details = {"reward_breakdown" : {"workflow_progress" : - 0.02 }},
930+ details = {"reward_breakdown" : {"workflow_progress" : 0.02 }},
930931 )
931932 case .carrier_status = CarrierStatus .INVESTIGATING
932933 case .status = CaseStatus .WAITING_CARRIER
@@ -956,13 +957,13 @@ def _issue_refund(self, case: CaseInternal, action: ShopopsAction) -> ActionOutc
956957 fit = self ._refund_fit (case , amount )
957958 workflow = self ._check_coverage (case )
958959 business = 0.22 + fit
959- workflow_reward = 0.08 if workflow >= 1.0 else max (- 0.08 , 0.08 * ( workflow - 1.0 ) )
960+ workflow_reward = 0.08 if workflow >= 1.0 else max (0.01 , 0.08 * workflow )
960961 if case .fraud_signal == FraudSignal .HIGH and case .evidence_status in {
961962 EvidenceStatus .NOT_REQUESTED ,
962963 EvidenceStatus .REQUESTED ,
963964 }:
964965 business -= 0.18
965- reward = max (- 0.2 , business + workflow_reward )
966+ reward = max (STEP_REWARD_MIN , business + workflow_reward )
966967 case .resolution_summary = f"Refund of ${ amount :.2f} prepared."
967968 return ActionOutcome (
968969 reward = reward ,
@@ -982,9 +983,9 @@ def _ship_replacement(self, case: CaseInternal, action: ShopopsAction) -> Action
982983 if units <= 0 :
983984 self ._stockouts += 1
984985 return ActionOutcome (
985- reward = - 0.2 ,
986+ reward = STEP_REWARD_MIN ,
986987 summary = "Replacement failed because inventory is exhausted." ,
987- details = {"reward_breakdown" : {"business_outcome" : - 0.2 }},
988+ details = {"reward_breakdown" : {"business_outcome" : STEP_REWARD_MIN }},
988989 )
989990 self ._inventory [sku ] = units - 1
990991 case .resolution_action = ActionType .SHIP_REPLACEMENT
@@ -995,7 +996,7 @@ def _ship_replacement(self, case: CaseInternal, action: ShopopsAction) -> Action
995996 workflow = self ._check_coverage (case )
996997 expedite_bonus = 0.08 if action .expedite == case .preferred_expedite else - 0.04
997998 resolution_bonus = 0.26 if case .preferred_resolution == ActionType .SHIP_REPLACEMENT else - 0.12
998- reward = max (- 0.2 , resolution_bonus + expedite_bonus + 0.06 * workflow )
999+ reward = max (STEP_REWARD_MIN , resolution_bonus + expedite_bonus + 0.06 * workflow )
9991000 case .resolution_summary = (
10001001 f"Replacement for { sku } queued{ ' with expedite' if action .expedite else '' } ."
10011002 )
@@ -1038,9 +1039,9 @@ def _add_internal_note(self, case: CaseInternal, action: ShopopsAction) -> Actio
10381039 note_code = action .note_code or "general_note"
10391040 if note_code in case .notes :
10401041 return ActionOutcome (
1041- reward = - 0.02 ,
1042+ reward = 0.02 ,
10421043 summary = "That note already exists on the case." ,
1043- details = {"reward_breakdown" : {"workflow_progress" : - 0.02 }},
1044+ details = {"reward_breakdown" : {"workflow_progress" : 0.02 }},
10441045 )
10451046 case .notes .append (note_code )
10461047 reward = 0.05 if case .requires_note else 0.01
@@ -1070,12 +1071,12 @@ def _close_case(self, case: CaseInternal, action: ShopopsAction) -> ActionOutcom
10701071 )
10711072 case .resolution_summary = case .resolution_summary or "Case closed."
10721073 return ActionOutcome (
1073- reward = max (- 0.25 , reward ),
1074+ reward = max (STEP_REWARD_MIN , reward ),
10741075 summary = f"Case { case .case_id } closed." ,
10751076 details = {
10761077 "reward_breakdown" : {
10771078 "workflow_progress" : 0.12 ,
1078- "business_outcome" : max (- 0.37 , reward - 0.12 ),
1079+ "business_outcome" : max (STEP_REWARD_MIN , reward - 0.12 ),
10791080 },
10801081 "closure_quality" : round (quality , 4 ),
10811082 "remaining_blockers" : blockers ,
@@ -1087,15 +1088,15 @@ def _switch_case(self, case: CaseInternal, action: ShopopsAction) -> ActionOutco
10871088 target = self ._case_by_id (action .case_id or "" )
10881089 if target is None :
10891090 return ActionOutcome (
1090- reward = - 0.1 ,
1091+ reward = STEP_REWARD_MIN ,
10911092 summary = "Cannot switch because the target case does not exist." ,
1092- details = {"reward_breakdown" : {"workflow_progress" : - 0.1 }},
1093+ details = {"reward_breakdown" : {"workflow_progress" : STEP_REWARD_MIN }},
10931094 )
10941095 if target .case_id == self ._active_case_id :
10951096 return ActionOutcome (
1096- reward = - 0.02 ,
1097+ reward = 0.02 ,
10971098 summary = "The target case is already active." ,
1098- details = {"reward_breakdown" : {"workflow_progress" : - 0.02 }},
1099+ details = {"reward_breakdown" : {"workflow_progress" : 0.02 }},
10991100 )
11001101 current = self ._active_case ()
11011102 self ._active_case_id = target .case_id
0 commit comments