Adaptive-Tutor-Env/openenv.yaml at main · PaulBrytonRaj18/Adaptive-Tutor-Env · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: adaptive-tutor-env
version: "1.0.0"
description: >
  AdaptiveTutor-Env is a real-world OpenEnv-compliant reinforcement learning environment
  that simulates a personalized AI tutoring system for an individual student across five
  academic subjects. The environment models the sequential instructional decision-making
  process a human tutor performs, designed for training and evaluating RL agents on
  long-horizon pedagogical optimization.
author: "AdaptiveTutor Team"
license: MIT
tags:
  - openenv
  - education
  - personalized-learning
  - reinforcement-learning
  - irt
  - edtech

tasks:
  - id: single_subject_mastery
    name: "Single Subject Mastery"
    difficulty: easy
    max_steps: 20
    description: >
      The agent must teach a student all 5 Mathematics topics (algebra, geometry,
      calculus, statistics, number_theory) to mastery (>= 0.75 average) within 20 steps.
      Scored on final average mastery of Mathematics topics.

  - id: multi_subject_balancing
    name: "Multi Subject Balancing"
    difficulty: medium
    max_steps: 25
    description: >
      The agent must balance mastery across all 5 subjects within 25 steps. Scored
      on overall mastery gain AND inter-subject balance (low Gini coefficient across
      subject-level average masteries). Requires strategic allocation of teaching effort.

  - id: long_horizon_retention
    name: "Long Horizon Retention under Forgetting"
    difficulty: hard
    max_steps: 40
    description: >
      The agent must maximize mastery across all subjects AND ensure topics retain
      high mastery at episode end under Ebbinghaus exponential forgetting. Topics
      not revisited decay. Requires proactive spaced-repetition scheduling.

action_space:
  type: discrete_composite
  description: "5-dimensional action: subject, topic, activity_type, difficulty, strategy"
  fields:
    subject:
      type: string
      choices:
        - mathematics
        - science
        - computer_science
        - english
        - social_studies
      description: "Academic subject to focus teaching on"
    topic:
      type: string
      description: >
        Topic within selected subject. mathematics: [algebra, geometry, calculus,
        statistics, number_theory]. science: [physics, chemistry, biology,
        earth_science, astronomy]. computer_science: [algorithms, data_structures,
        programming, networking, databases]. english: [grammar, vocabulary,
        reading_comprehension, writing, literature]. social_studies: [history, geography,
        civics, economics, culture].
    activity_type:
      type: integer
      min: 0
      max: 3
      description: "0=video_lesson, 1=practice_exercise, 2=quiz, 3=revision"
    difficulty:
      type: integer
      min: 0
      max: 2
      description: "0=easy (IRT b=-1.0), 1=medium (b=0.0), 2=hard (b=+1.5)"
    strategy:
      type: integer
      min: 0
      max: 2
      description: "0=introduce_new_concept, 1=reinforce_existing, 2=spaced_repetition_review"

observation_space:
  type: dict
  description: "Student state representation with per-topic mastery, engagement, and fatigue"
  fields:
    masteries:
      type: dict
      description: "Nested dict subject->topic->mastery_score [0.0, 1.0]"
    engagement:
      type: float
      min: 0.0
      max: 1.0
      description: "Student engagement/attention level"
    fatigue:
      type: float
      min: 0.0
      max: 1.0
      description: "Student cognitive fatigue level"
    step:
      type: integer
      description: "Current episode step index"
    subject_masteries:
      type: dict
      description: "Average mastery per subject"
    overall_mastery:
      type: float
      min: 0.0
      max: 1.0
      description: "Global average mastery across all topics"
    time_since_last_visit:
      type: dict
      description: "Steps elapsed since each topic was last visited (for forgetting)"
    last_action:
      type: dict
      nullable: true
      description: "The most recently taken action"

reward_space:
  type: float
  min: -1.0
  max: 1.0
  description: >
    Dense composite reward at every step.
    total = w1*learning_gain + w2*engagement_bonus + w3*balance_reward - w4*retention_penalty
    where w1=0.50, w2=0.20, w3=0.20, w4=0.10.

base_url: "http://localhost:8000"
endpoints:
  reset: "POST /reset"
  step: "POST /step"
  state: "GET /state"
  tasks: "GET /tasks"