Skip to content

Commit 3ee4191

Browse files
committed
fix: repair issue→PR automation (checkbox parsing, simulator support)
- process_issue.py: fix parse_checkboxes() to extract only [x] checked items (old parse_list() was treating "- [x] foo" and "- [ ] foo" identically) - process_issue.py: normalize form field key names to handle GitHub's rendering of label text (parentheses stripped, spaces→underscores) - process_issue.py: add build_tool_entry() for add-simulator issues - process_issue.py: accept ISSUE_TYPE=tool in addition to model/dataset - process-issue.yml: add add-simulator label to trigger condition - process-issue.yml: add else branch → issue_type=tool - process-issue.yml: git add docs/index.html so embedded data is included in PR
1 parent 52316e8 commit 3ee4191

2 files changed

Lines changed: 115 additions & 40 deletions

File tree

.github/workflows/process-issue.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ permissions:
1111

1212
jobs:
1313
process:
14-
# Only run for issues using our "Add Model" or "Add Dataset" templates
1514
if: |
1615
contains(github.event.issue.labels.*.name, 'add-model') ||
17-
contains(github.event.issue.labels.*.name, 'add-dataset')
16+
contains(github.event.issue.labels.*.name, 'add-dataset') ||
17+
contains(github.event.issue.labels.*.name, 'add-simulator')
1818
runs-on: ubuntu-latest
1919
steps:
2020
- uses: actions/checkout@v4
@@ -34,8 +34,10 @@ jobs:
3434
LABELS='${{ toJson(github.event.issue.labels.*.name) }}'
3535
if echo "$LABELS" | grep -q "add-model"; then
3636
echo "issue_type=model" >> $GITHUB_OUTPUT
37-
else
37+
elif echo "$LABELS" | grep -q "add-dataset"; then
3838
echo "issue_type=dataset" >> $GITHUB_OUTPUT
39+
else
40+
echo "issue_type=tool" >> $GITHUB_OUTPUT
3941
fi
4042
4143
- name: Parse issue and append entry
@@ -57,7 +59,7 @@ jobs:
5759
git config user.name "github-actions[bot]"
5860
git config user.email "github-actions[bot]@users.noreply.github.com"
5961
git checkout -b "$BRANCH"
60-
git add data/ docs/data.json README.md
62+
git add data/ docs/data.json docs/index.html README.md
6163
git commit -m "feat: add entry from issue #${{ github.event.issue.number }}"
6264
git push origin "$BRANCH"
6365
gh pr create \

scripts/process_issue.py

Lines changed: 109 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,19 @@
22
"""
33
process_issue.py — Parse a GitHub Issue form body and append a new entry to the YAML.
44
5-
Called by .github/workflows/process-issue.yml when a new "Add Model" or
6-
"Add Dataset" issue is opened. The workflow passes the issue body and metadata
5+
Called by .github/workflows/process-issue.yml when a new "Add Model", "Add Dataset",
6+
or "Add Simulator" issue is opened. The workflow passes the issue body and metadata
77
via environment variables, and this script:
8-
1. Parses the structured issue form
8+
1. Parses the structured issue form (handles both text inputs and checkboxes)
99
2. Validates the entry
10-
3. Writes it to data/models.yaml or data/datasets.yaml
10+
3. Writes it to the appropriate YAML file
1111
4. The workflow then creates a PR for admin review
1212
1313
Environment variables (set by the GitHub Actions workflow):
1414
ISSUE_BODY — raw issue body text
15-
ISSUE_TYPE — "model" or "dataset"
16-
ISSUE_NUMBER — GitHub issue number (for PR description)
15+
ISSUE_TYPE — "model", "dataset", or "tool"
16+
ISSUE_NUMBER — GitHub issue number
1717
ISSUE_AUTHOR — GitHub username of issue author
18-
19-
Usage (from GitHub Actions):
20-
python scripts/process_issue.py
2118
"""
2219

2320
import os
@@ -33,12 +30,20 @@
3330
TODAY = date.today().isoformat()
3431

3532

33+
# ─────────────────────────────────────────────────────────────────────────────
34+
# Form parsing
35+
# ─────────────────────────────────────────────────────────────────────────────
36+
3637
def parse_form(body: str) -> dict[str, str]:
3738
"""Parse GitHub issue form body into a key→value dict.
3839
3940
GitHub Forms render as:
4041
### Field Label
41-
value
42+
value text (for inputs / textareas)
43+
44+
### Checkboxes
45+
- [x] checked item
46+
- [ ] unchecked item
4247
"""
4348
result: dict[str, str] = {}
4449
current_key = None
@@ -49,7 +54,7 @@ def parse_form(body: str) -> dict[str, str]:
4954
if heading:
5055
if current_key is not None:
5156
result[current_key] = "\n".join(current_lines).strip()
52-
current_key = heading.group(1).strip().lower().replace(" ", "_")
57+
current_key = heading.group(1).strip().lower().replace(" ", "_").replace("(", "").replace(")", "")
5358
current_lines = []
5459
elif current_key is not None:
5560
if line.strip() not in ("_No response_", ""):
@@ -61,8 +66,27 @@ def parse_form(body: str) -> dict[str, str]:
6166
return result
6267

6368

69+
def parse_checkboxes(value: str) -> list[str]:
70+
"""Extract only the checked items from a markdown checkbox block.
71+
72+
GitHub renders checkboxes as:
73+
- [x] checked value
74+
- [ ] unchecked value
75+
"""
76+
checked = []
77+
for line in value.splitlines():
78+
m = re.match(r"^\s*-\s*\[x\]\s+(.+)$", line, re.IGNORECASE)
79+
if m:
80+
# Strip trailing parenthetical descriptions added to dropdown options
81+
# e.g. "physics_engine — 물리 엔진 (MuJoCo ...)" → "physics_engine"
82+
item = m.group(1).strip()
83+
item = re.split(r"\s+[—–-]\s+", item)[0].strip()
84+
checked.append(item)
85+
return checked
86+
87+
6488
def parse_list(value: str) -> list[str]:
65-
"""Parse a comma-separated string into a cleaned list."""
89+
"""Parse a comma-separated or newline-separated string into a cleaned list."""
6690
return [v.strip() for v in re.split(r"[,\n]+", value) if v.strip()]
6791

6892

@@ -73,50 +97,54 @@ def to_int(value: str, default: int = 0) -> int:
7397
return default
7498

7599

100+
# ─────────────────────────────────────────────────────────────────────────────
101+
# Entry builders
102+
# ─────────────────────────────────────────────────────────────────────────────
103+
76104
def build_model_entry(form: dict) -> dict:
77105
return {
78-
"id": re.sub(r"[^a-z0-9-]", "", form.get("id", "").lower().replace(" ", "-")),
106+
"id": re.sub(r"[^a-z0-9-]", "", form.get("id_(slug)", form.get("id", "")).lower().replace(" ", "-")),
79107
"name": form.get("name", ""),
80108
"org": form.get("organization", ""),
81109
"year": to_int(form.get("year", str(date.today().year))),
82-
"description_en": form.get("description_(english)", ""),
83-
"description_ko": form.get("description_(korean)", ""),
110+
"description_en": form.get("description_english", form.get("description_en", "")),
111+
"description_ko": form.get("description_korean", form.get("description_ko", "")),
84112
"github_url": form.get("github_url", ""),
85-
"paper_url": form.get("paper_url_(arxiv)", ""),
113+
"paper_url": form.get("paper_url_arxiv", form.get("paper_url", "")),
86114
"hf_url": form.get("huggingface_url", ""),
87-
"project_url": form.get("project_page_url", ""),
88-
"categories": parse_list(form.get("categories", "")),
89-
"hardware": parse_list(form.get("hardware_targets", "")),
90-
"learning": parse_list(form.get("learning_methods", "")),
91-
"framework": parse_list(form.get("framework", "")),
92-
"communication": parse_list(form.get("communication", "")),
115+
"project_url": form.get("project_page_url", form.get("project_/_docs_url", "")),
116+
"categories": parse_checkboxes(form.get("categories", "")),
117+
"hardware": parse_checkboxes(form.get("hardware_targets", "")),
118+
"learning": parse_checkboxes(form.get("learning_methods", "")),
119+
"framework": parse_checkboxes(form.get("framework", "")),
120+
"communication": parse_checkboxes(form.get("communication", "")),
93121
"stats": {
94122
"github_stars": 0,
95123
"github_forks": 0,
96124
"hf_downloads": 0,
97125
"last_updated": TODAY,
98126
},
99127
"added_date": TODAY,
100-
"tags": parse_list(form.get("tags", "")),
128+
"tags": parse_list(form.get("tags_(optional)", form.get("tags", ""))),
101129
}
102130

103131

104132
def build_dataset_entry(form: dict) -> dict:
105133
return {
106-
"id": re.sub(r"[^a-z0-9-]", "", form.get("id", "").lower().replace(" ", "-")),
134+
"id": re.sub(r"[^a-z0-9-]", "", form.get("id_(slug)", form.get("id", "")).lower().replace(" ", "-")),
107135
"name": form.get("name", ""),
108136
"org": form.get("organization", ""),
109137
"year": to_int(form.get("year", str(date.today().year))),
110-
"description_en": form.get("description_(english)", ""),
111-
"description_ko": form.get("description_(korean)", ""),
138+
"description_en": form.get("description_english", form.get("description_en", "")),
139+
"description_ko": form.get("description_korean", form.get("description_ko", "")),
112140
"github_url": form.get("github_url", ""),
113-
"paper_url": form.get("paper_url_(arxiv)", ""),
141+
"paper_url": form.get("paper_url_arxiv", form.get("paper_url", "")),
114142
"hf_url": form.get("huggingface_url", ""),
115143
"project_url": form.get("project_page_url", ""),
116-
"categories": parse_list(form.get("categories", "")),
117-
"hardware": parse_list(form.get("hardware_targets", "")),
118-
"source": parse_list(form.get("data_source", "")),
119-
"modality": parse_list(form.get("modality", "")),
144+
"categories": parse_checkboxes(form.get("categories", "")),
145+
"hardware": parse_checkboxes(form.get("hardware_targets", "")),
146+
"source": parse_checkboxes(form.get("data_source", "")),
147+
"modality": parse_checkboxes(form.get("modality", "")),
120148
"scale": {
121149
"trajectories": to_int(form.get("number_of_trajectories", "0")),
122150
"hours": to_int(form.get("total_hours", "0")),
@@ -129,15 +157,53 @@ def build_dataset_entry(form: dict) -> dict:
129157
"last_updated": TODAY,
130158
},
131159
"added_date": TODAY,
132-
"tags": parse_list(form.get("tags", "")),
160+
"tags": parse_list(form.get("tags_(optional)", form.get("tags", ""))),
133161
}
134162

135163

164+
def build_tool_entry(form: dict) -> dict:
165+
# The dropdown "Type" field may include a description suffix — strip it
166+
raw_type = form.get("type", "")
167+
tool_type = re.split(r"\s+[—–-]\s+", raw_type.splitlines()[0] if raw_type else "")[0].strip()
168+
169+
features = parse_checkboxes(form.get("features", ""))
170+
gpu = any("gpu" in f.lower() for f in features)
171+
ros = any("ros" in f.lower() for f in features)
172+
173+
raw_lang = form.get("primary_languages", form.get("primary_language_s", ""))
174+
languages = parse_list(raw_lang)
175+
176+
return {
177+
"id": re.sub(r"[^a-z0-9-]", "", form.get("id_(slug)", form.get("id", "")).lower().replace(" ", "-")),
178+
"name": form.get("name", ""),
179+
"org": form.get("organization", ""),
180+
"year": to_int(form.get("year", str(date.today().year))),
181+
"description_en": form.get("description_english", form.get("description_en", "")),
182+
"description_ko": form.get("description_korean", form.get("description_ko", "")),
183+
"github_url": form.get("github_url", ""),
184+
"paper_url": form.get("paper_url_arxiv", form.get("paper_url", "")),
185+
"project_url": form.get("project_/_docs_url", form.get("project_url", "")),
186+
"type": tool_type,
187+
"gpu_accelerated": gpu,
188+
"ros_support": ros,
189+
"language": languages,
190+
"stats": {
191+
"github_stars": 0,
192+
"last_updated": TODAY,
193+
},
194+
"added_date": TODAY,
195+
"tags": parse_list(form.get("tags_(optional)", form.get("tags", ""))),
196+
}
197+
198+
199+
# ─────────────────────────────────────────────────────────────────────────────
200+
# YAML I/O
201+
# ─────────────────────────────────────────────────────────────────────────────
202+
136203
def append_entry(yaml_path: Path, entry: dict) -> None:
137204
with open(yaml_path, encoding="utf-8") as f:
138205
entries = yaml.safe_load(f) or []
139206

140-
# Check for duplicate id
141207
existing_ids = {e.get("id") for e in entries}
142208
if entry["id"] in existing_ids:
143209
print(f"::error::Entry with id '{entry['id']}' already exists in {yaml_path.name}")
@@ -149,6 +215,10 @@ def append_entry(yaml_path: Path, entry: dict) -> None:
149215
print(f"✅ Appended '{entry['id']}' to {yaml_path.name}")
150216

151217

218+
# ─────────────────────────────────────────────────────────────────────────────
219+
# Entry point
220+
# ─────────────────────────────────────────────────────────────────────────────
221+
152222
def main() -> None:
153223
body = os.environ.get("ISSUE_BODY", "")
154224
issue_type = os.environ.get("ISSUE_TYPE", "").lower()
@@ -159,8 +229,8 @@ def main() -> None:
159229
print("::error::ISSUE_BODY is empty")
160230
sys.exit(1)
161231

162-
if issue_type not in ("model", "dataset"):
163-
print(f"::error::ISSUE_TYPE must be 'model' or 'dataset', got: '{issue_type}'")
232+
if issue_type not in ("model", "dataset", "tool"):
233+
print(f"::error::ISSUE_TYPE must be 'model', 'dataset', or 'tool', got: '{issue_type}'")
164234
sys.exit(1)
165235

166236
form = parse_form(body)
@@ -169,9 +239,12 @@ def main() -> None:
169239
if issue_type == "model":
170240
entry = build_model_entry(form)
171241
yaml_path = DATA_DIR / "models.yaml"
172-
else:
242+
elif issue_type == "dataset":
173243
entry = build_dataset_entry(form)
174244
yaml_path = DATA_DIR / "datasets.yaml"
245+
else:
246+
entry = build_tool_entry(form)
247+
yaml_path = DATA_DIR / "tools.yaml"
175248

176249
if not entry["id"]:
177250
print("::error::Could not determine entry 'id' from form")

0 commit comments

Comments
 (0)