Skip to content

Commit 6a5f1a8

Browse files
Final Env Creation done
1 parent 971ce6e commit 6a5f1a8

15 files changed

Lines changed: 550 additions & 136 deletions

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,23 @@ Outputs JSON metrics to `outputs/evals/` with:
132132
- Controlled via `--seed` in the eval runner
133133
- Hard-tier validation seeds available via `--validation`
134134

135+
## Inference Script (Hackathon Compliance)
136+
137+
The repo includes `inference.py` at the project root. It uses the OpenAI client
138+
and emits strict `[START]`, `[STEP]`, `[END]` logs.
139+
140+
Required environment variables:
141+
- `API_BASE_URL`
142+
- `MODEL_NAME`
143+
- `HF_TOKEN`
144+
- `ENV_URL` (optional, defaults to `http://localhost:8000`)
145+
146+
Example:
147+
```bash
148+
export API_BASE_URL=\"https://api.openai.com/v1\"
149+
export MODEL_NAME=\"gpt-4o\"\nexport HF_TOKEN=\"<your_key>\"\nexport ENV_URL=\"http://localhost:8000\"\npython inference.py
150+
```
151+
135152
## Test Results
136153

137154
Latest scenario test report:

inference.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import json
2+
import os
3+
import re
4+
import sys
5+
from typing import Any, Dict
6+
7+
import requests
8+
from openai import OpenAI
9+
10+
API_BASE_URL = os.getenv("API_BASE_URL")
11+
MODEL_NAME = os.getenv("MODEL_NAME")
12+
HF_TOKEN = os.getenv("HF_TOKEN")
13+
ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
14+
15+
REQUIRED_VARS = {
16+
"API_BASE_URL": API_BASE_URL,
17+
"MODEL_NAME": MODEL_NAME,
18+
"HF_TOKEN": HF_TOKEN,
19+
}
20+
21+
22+
def _require_env() -> None:
23+
missing = [key for key, value in REQUIRED_VARS.items() if not value]
24+
if missing:
25+
print("Missing required env vars: " + ", ".join(missing))
26+
sys.exit(2)
27+
28+
29+
def _parse_action(text: str) -> Dict[str, Any]:
30+
try:
31+
return json.loads(text)
32+
except json.JSONDecodeError:
33+
match = re.search(r"\{.*\}", text, re.DOTALL)
34+
if match:
35+
return json.loads(match.group(0))
36+
raise
37+
38+
39+
def _safe_action() -> Dict[str, Any]:
40+
return {
41+
"action_type": "reject",
42+
"refund_amount_usd": None,
43+
"replacement_expedite": False,
44+
"escalation_reason": None,
45+
}
46+
47+
48+
def main() -> None:
49+
_require_env()
50+
client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
51+
52+
seed = int(os.getenv("SEED", "42"))
53+
54+
print("[START]")
55+
print(f"episode_id=unknown")
56+
print(f"seed={seed}")
57+
print(f"model={MODEL_NAME}")
58+
print(f"env_url={ENV_URL}")
59+
60+
reset_resp = requests.post(f"{ENV_URL}/reset", json={"seed": seed})
61+
reset_resp.raise_for_status()
62+
payload = reset_resp.json()
63+
obs = payload["observation"]
64+
episode_id = obs.get("episode_id", "unknown")
65+
print(f"episode_id={episode_id}")
66+
67+
step = 0
68+
done = payload.get("done", False)
69+
70+
while not done:
71+
prompt = (
72+
"You are an e-commerce ops agent. Return ONLY JSON with keys: "
73+
"action_type, refund_amount_usd, replacement_expedite, escalation_reason. "
74+
f"Observation: {json.dumps(obs)}"
75+
)
76+
77+
try:
78+
response = client.responses.create(
79+
model=MODEL_NAME,
80+
input=prompt,
81+
)
82+
action = _parse_action(response.output_text)
83+
except Exception:
84+
action = _safe_action()
85+
86+
step_resp = requests.post(
87+
f"{ENV_URL}/step",
88+
json={"action": action, "episode_id": episode_id},
89+
)
90+
step_resp.raise_for_status()
91+
step_payload = step_resp.json()
92+
93+
print("[STEP]")
94+
print(f"step={step}")
95+
print(f"action={json.dumps(action)}")
96+
print(f"reward={step_payload.get('reward')}")
97+
print(f"done={step_payload.get('done')}")
98+
99+
obs = step_payload["observation"]
100+
done = step_payload.get("done", False)
101+
step += 1
102+
if step >= 20:
103+
break
104+
105+
final_score = (
106+
obs.get("metadata", {})
107+
.get("episode_summary", {})
108+
.get("final_score")
109+
)
110+
print("[END]")
111+
print(f"final_score={final_score}")
112+
113+
114+
if __name__ == "__main__":
115+
main()

openenv.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
spec_version: 1
2-
schema_version: 1.0.0
32
name: shopOps
43
type: space
54
runtime: fastapi

outputs/inference_output.txt

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
[START]
2+
episode_id=unknown
3+
seed=42
4+
model=gpt-4o
5+
env_url=http://localhost:8000
6+
episode_id=aac7d410-41f4-40bb-bcac-c0f0eaae5672
7+
[STEP]
8+
step=0
9+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
10+
reward=0.0
11+
done=False
12+
[STEP]
13+
step=1
14+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
15+
reward=0.0
16+
done=False
17+
[STEP]
18+
step=2
19+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
20+
reward=0.0
21+
done=False
22+
[STEP]
23+
step=3
24+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
25+
reward=0.0
26+
done=False
27+
[STEP]
28+
step=4
29+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
30+
reward=1.0
31+
done=False
32+
[STEP]
33+
step=5
34+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
35+
reward=0.0
36+
done=False
37+
[STEP]
38+
step=6
39+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
40+
reward=0.985
41+
done=False
42+
[STEP]
43+
step=7
44+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
45+
reward=1.0
46+
done=False
47+
[STEP]
48+
step=8
49+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
50+
reward=0.0
51+
done=False
52+
[STEP]
53+
step=9
54+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
55+
reward=1.0
56+
done=False
57+
[STEP]
58+
step=10
59+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
60+
reward=0.0
61+
done=False
62+
[STEP]
63+
step=11
64+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
65+
reward=0.0
66+
done=False
67+
[STEP]
68+
step=12
69+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
70+
reward=0.9978009999999999
71+
done=False
72+
[STEP]
73+
step=13
74+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
75+
reward=0.0
76+
done=False
77+
[STEP]
78+
step=14
79+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
80+
reward=0.0
81+
done=False
82+
[STEP]
83+
step=15
84+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
85+
reward=0.0
86+
done=False
87+
[STEP]
88+
step=16
89+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
90+
reward=0.0
91+
done=False
92+
[STEP]
93+
step=17
94+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
95+
reward=0.0
96+
done=False
97+
[STEP]
98+
step=18
99+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
100+
reward=0.8838415
101+
done=False
102+
[STEP]
103+
step=19
104+
action={"action_type": "reject", "refund_amount_usd": null, "replacement_expedite": false, "escalation_reason": null}
105+
reward=0.0
106+
done=True
107+
[END]
108+
final_score=None

0 commit comments

Comments
 (0)