22"""
33process_issue.py — Parse a GitHub Issue form body and append a new entry to the YAML.
44
5- Called by .github/workflows/process-issue.yml when a new "Add Model" or
6- "Add Dataset " issue is opened. The workflow passes the issue body and metadata
5+ Called by .github/workflows/process-issue.yml when a new "Add Model", "Add Dataset",
6+ or "Add Simulator " issue is opened. The workflow passes the issue body and metadata
77via environment variables, and this script:
8- 1. Parses the structured issue form
8+ 1. Parses the structured issue form (handles both text inputs and checkboxes)
99 2. Validates the entry
10- 3. Writes it to data/models.yaml or data/datasets.yaml
10+ 3. Writes it to the appropriate YAML file
1111 4. The workflow then creates a PR for admin review
1212
1313Environment variables (set by the GitHub Actions workflow):
1414 ISSUE_BODY — raw issue body text
15- ISSUE_TYPE — "model" or "dataset "
16- ISSUE_NUMBER — GitHub issue number (for PR description)
15+ ISSUE_TYPE — "model", "dataset", or "tool "
16+ ISSUE_NUMBER — GitHub issue number
1717 ISSUE_AUTHOR — GitHub username of issue author
18-
19- Usage (from GitHub Actions):
20- python scripts/process_issue.py
2118"""
2219
2320import os
3330TODAY = date .today ().isoformat ()
3431
3532
33+ # ─────────────────────────────────────────────────────────────────────────────
34+ # Form parsing
35+ # ─────────────────────────────────────────────────────────────────────────────
36+
3637def parse_form (body : str ) -> dict [str , str ]:
3738 """Parse GitHub issue form body into a key→value dict.
3839
3940 GitHub Forms render as:
4041 ### Field Label
41- value
42+ value text (for inputs / textareas)
43+
44+ ### Checkboxes
45+ - [x] checked item
46+ - [ ] unchecked item
4247 """
4348 result : dict [str , str ] = {}
4449 current_key = None
@@ -49,7 +54,7 @@ def parse_form(body: str) -> dict[str, str]:
4954 if heading :
5055 if current_key is not None :
5156 result [current_key ] = "\n " .join (current_lines ).strip ()
52- current_key = heading .group (1 ).strip ().lower ().replace (" " , "_" )
57+ current_key = heading .group (1 ).strip ().lower ().replace (" " , "_" ). replace ( "(" , "" ). replace ( ")" , "" )
5358 current_lines = []
5459 elif current_key is not None :
5560 if line .strip () not in ("_No response_" , "" ):
@@ -61,8 +66,27 @@ def parse_form(body: str) -> dict[str, str]:
6166 return result
6267
6368
69+ def parse_checkboxes (value : str ) -> list [str ]:
70+ """Extract only the checked items from a markdown checkbox block.
71+
72+ GitHub renders checkboxes as:
73+ - [x] checked value
74+ - [ ] unchecked value
75+ """
76+ checked = []
77+ for line in value .splitlines ():
78+ m = re .match (r"^\s*-\s*\[x\]\s+(.+)$" , line , re .IGNORECASE )
79+ if m :
80+ # Strip trailing parenthetical descriptions added to dropdown options
81+ # e.g. "physics_engine — 물리 엔진 (MuJoCo ...)" → "physics_engine"
82+ item = m .group (1 ).strip ()
83+ item = re .split (r"\s+[—–-]\s+" , item )[0 ].strip ()
84+ checked .append (item )
85+ return checked
86+
87+
6488def parse_list (value : str ) -> list [str ]:
65- """Parse a comma-separated string into a cleaned list."""
89+ """Parse a comma-separated or newline-separated string into a cleaned list."""
6690 return [v .strip () for v in re .split (r"[,\n]+" , value ) if v .strip ()]
6791
6892
@@ -73,50 +97,54 @@ def to_int(value: str, default: int = 0) -> int:
7397 return default
7498
7599
100+ # ─────────────────────────────────────────────────────────────────────────────
101+ # Entry builders
102+ # ─────────────────────────────────────────────────────────────────────────────
103+
76104def build_model_entry (form : dict ) -> dict :
77105 return {
78- "id" : re .sub (r"[^a-z0-9-]" , "" , form .get ("id" , "" ).lower ().replace (" " , "-" )),
106+ "id" : re .sub (r"[^a-z0-9-]" , "" , form .get ("id_(slug)" , form . get ( " id" , "" ) ).lower ().replace (" " , "-" )),
79107 "name" : form .get ("name" , "" ),
80108 "org" : form .get ("organization" , "" ),
81109 "year" : to_int (form .get ("year" , str (date .today ().year ))),
82- "description_en" : form .get ("description_(english)" , "" ),
83- "description_ko" : form .get ("description_(korean)" , "" ),
110+ "description_en" : form .get ("description_english" , form . get ( "description_en" , "" ) ),
111+ "description_ko" : form .get ("description_korean" , form . get ( "description_ko" , "" ) ),
84112 "github_url" : form .get ("github_url" , "" ),
85- "paper_url" : form .get ("paper_url_(arxiv)" , "" ),
113+ "paper_url" : form .get ("paper_url_arxiv" , form . get ( "paper_url" , "" ) ),
86114 "hf_url" : form .get ("huggingface_url" , "" ),
87- "project_url" : form .get ("project_page_url" , "" ),
88- "categories" : parse_list (form .get ("categories" , "" )),
89- "hardware" : parse_list (form .get ("hardware_targets" , "" )),
90- "learning" : parse_list (form .get ("learning_methods" , "" )),
91- "framework" : parse_list (form .get ("framework" , "" )),
92- "communication" : parse_list (form .get ("communication" , "" )),
115+ "project_url" : form .get ("project_page_url" , form . get ( "project_/_docs_url" , "" ) ),
116+ "categories" : parse_checkboxes (form .get ("categories" , "" )),
117+ "hardware" : parse_checkboxes (form .get ("hardware_targets" , "" )),
118+ "learning" : parse_checkboxes (form .get ("learning_methods" , "" )),
119+ "framework" : parse_checkboxes (form .get ("framework" , "" )),
120+ "communication" : parse_checkboxes (form .get ("communication" , "" )),
93121 "stats" : {
94122 "github_stars" : 0 ,
95123 "github_forks" : 0 ,
96124 "hf_downloads" : 0 ,
97125 "last_updated" : TODAY ,
98126 },
99127 "added_date" : TODAY ,
100- "tags" : parse_list (form .get ("tags" , "" )),
128+ "tags" : parse_list (form .get ("tags_(optional)" , form . get ( " tags" , "" ) )),
101129 }
102130
103131
104132def build_dataset_entry (form : dict ) -> dict :
105133 return {
106- "id" : re .sub (r"[^a-z0-9-]" , "" , form .get ("id" , "" ).lower ().replace (" " , "-" )),
134+ "id" : re .sub (r"[^a-z0-9-]" , "" , form .get ("id_(slug)" , form . get ( " id" , "" ) ).lower ().replace (" " , "-" )),
107135 "name" : form .get ("name" , "" ),
108136 "org" : form .get ("organization" , "" ),
109137 "year" : to_int (form .get ("year" , str (date .today ().year ))),
110- "description_en" : form .get ("description_(english)" , "" ),
111- "description_ko" : form .get ("description_(korean)" , "" ),
138+ "description_en" : form .get ("description_english" , form . get ( "description_en" , "" ) ),
139+ "description_ko" : form .get ("description_korean" , form . get ( "description_ko" , "" ) ),
112140 "github_url" : form .get ("github_url" , "" ),
113- "paper_url" : form .get ("paper_url_(arxiv)" , "" ),
141+ "paper_url" : form .get ("paper_url_arxiv" , form . get ( "paper_url" , "" ) ),
114142 "hf_url" : form .get ("huggingface_url" , "" ),
115143 "project_url" : form .get ("project_page_url" , "" ),
116- "categories" : parse_list (form .get ("categories" , "" )),
117- "hardware" : parse_list (form .get ("hardware_targets" , "" )),
118- "source" : parse_list (form .get ("data_source" , "" )),
119- "modality" : parse_list (form .get ("modality" , "" )),
144+ "categories" : parse_checkboxes (form .get ("categories" , "" )),
145+ "hardware" : parse_checkboxes (form .get ("hardware_targets" , "" )),
146+ "source" : parse_checkboxes (form .get ("data_source" , "" )),
147+ "modality" : parse_checkboxes (form .get ("modality" , "" )),
120148 "scale" : {
121149 "trajectories" : to_int (form .get ("number_of_trajectories" , "0" )),
122150 "hours" : to_int (form .get ("total_hours" , "0" )),
@@ -129,15 +157,53 @@ def build_dataset_entry(form: dict) -> dict:
129157 "last_updated" : TODAY ,
130158 },
131159 "added_date" : TODAY ,
132- "tags" : parse_list (form .get ("tags" , "" )),
160+ "tags" : parse_list (form .get ("tags_(optional)" , form . get ( " tags" , "" ) )),
133161 }
134162
135163
164+ def build_tool_entry (form : dict ) -> dict :
165+ # The dropdown "Type" field may include a description suffix — strip it
166+ raw_type = form .get ("type" , "" )
167+ tool_type = re .split (r"\s+[—–-]\s+" , raw_type .splitlines ()[0 ] if raw_type else "" )[0 ].strip ()
168+
169+ features = parse_checkboxes (form .get ("features" , "" ))
170+ gpu = any ("gpu" in f .lower () for f in features )
171+ ros = any ("ros" in f .lower () for f in features )
172+
173+ raw_lang = form .get ("primary_languages" , form .get ("primary_language_s" , "" ))
174+ languages = parse_list (raw_lang )
175+
176+ return {
177+ "id" : re .sub (r"[^a-z0-9-]" , "" , form .get ("id_(slug)" , form .get ("id" , "" )).lower ().replace (" " , "-" )),
178+ "name" : form .get ("name" , "" ),
179+ "org" : form .get ("organization" , "" ),
180+ "year" : to_int (form .get ("year" , str (date .today ().year ))),
181+ "description_en" : form .get ("description_english" , form .get ("description_en" , "" )),
182+ "description_ko" : form .get ("description_korean" , form .get ("description_ko" , "" )),
183+ "github_url" : form .get ("github_url" , "" ),
184+ "paper_url" : form .get ("paper_url_arxiv" , form .get ("paper_url" , "" )),
185+ "project_url" : form .get ("project_/_docs_url" , form .get ("project_url" , "" )),
186+ "type" : tool_type ,
187+ "gpu_accelerated" : gpu ,
188+ "ros_support" : ros ,
189+ "language" : languages ,
190+ "stats" : {
191+ "github_stars" : 0 ,
192+ "last_updated" : TODAY ,
193+ },
194+ "added_date" : TODAY ,
195+ "tags" : parse_list (form .get ("tags_(optional)" , form .get ("tags" , "" ))),
196+ }
197+
198+
199+ # ─────────────────────────────────────────────────────────────────────────────
200+ # YAML I/O
201+ # ─────────────────────────────────────────────────────────────────────────────
202+
136203def append_entry (yaml_path : Path , entry : dict ) -> None :
137204 with open (yaml_path , encoding = "utf-8" ) as f :
138205 entries = yaml .safe_load (f ) or []
139206
140- # Check for duplicate id
141207 existing_ids = {e .get ("id" ) for e in entries }
142208 if entry ["id" ] in existing_ids :
143209 print (f"::error::Entry with id '{ entry ['id' ]} ' already exists in { yaml_path .name } " )
@@ -149,6 +215,10 @@ def append_entry(yaml_path: Path, entry: dict) -> None:
149215 print (f"✅ Appended '{ entry ['id' ]} ' to { yaml_path .name } " )
150216
151217
218+ # ─────────────────────────────────────────────────────────────────────────────
219+ # Entry point
220+ # ─────────────────────────────────────────────────────────────────────────────
221+
152222def main () -> None :
153223 body = os .environ .get ("ISSUE_BODY" , "" )
154224 issue_type = os .environ .get ("ISSUE_TYPE" , "" ).lower ()
@@ -159,8 +229,8 @@ def main() -> None:
159229 print ("::error::ISSUE_BODY is empty" )
160230 sys .exit (1 )
161231
162- if issue_type not in ("model" , "dataset" ):
163- print (f"::error::ISSUE_TYPE must be 'model' or 'dataset ', got: '{ issue_type } '" )
232+ if issue_type not in ("model" , "dataset" , "tool" ):
233+ print (f"::error::ISSUE_TYPE must be 'model', 'dataset', or 'tool ', got: '{ issue_type } '" )
164234 sys .exit (1 )
165235
166236 form = parse_form (body )
@@ -169,9 +239,12 @@ def main() -> None:
169239 if issue_type == "model" :
170240 entry = build_model_entry (form )
171241 yaml_path = DATA_DIR / "models.yaml"
172- else :
242+ elif issue_type == "dataset" :
173243 entry = build_dataset_entry (form )
174244 yaml_path = DATA_DIR / "datasets.yaml"
245+ else :
246+ entry = build_tool_entry (form )
247+ yaml_path = DATA_DIR / "tools.yaml"
175248
176249 if not entry ["id" ]:
177250 print ("::error::Could not determine entry 'id' from form" )
0 commit comments