@@ -38,7 +38,16 @@ class CifParserError(Sorry):
3838
3939
4040# Parser engine selection.
41- # Callers can override per-instance via the engine= kwarg on reader.__init__.
41+ #
42+ # "ucif" is the historical ANTLR-generated parser that has shipped with
43+ # iotbx.cif since 2011; it is the default for backward compatibility.
44+ # "xcif" is a newer hand-written C++ parser in cctbx_project/xcif that
45+ # is API-compatible through the iotbx.cif.reader adapter but materially
46+ # faster on large files (memory-mapped zero-copy parse_file path).
47+ #
48+ # Callers can override per-instance via the engine= kwarg on
49+ # reader.__init__. DEFAULT_ENGINE is read at construction time; changing
50+ # it at runtime affects only subsequent reader() calls.
4251DEFAULT_ENGINE = "ucif"
4352_VALID_ENGINES = ("ucif" , "xcif" )
4453
@@ -155,6 +164,81 @@ def _drive_builder_from_xcif_file(builder, file_path, strict):
155164
156165
157166class reader (object ):
167+ """Parse a CIF / mmCIF input and populate an iotbx.cif.model tree.
168+
169+ Accepts input via exactly one of file_path, file_object, or
170+ input_string. Parsing is delegated to a selected engine (see
171+ engine= below). On success, model() returns the populated
172+ iotbx.cif.model.cif tree. On error, the behavior depends on the
173+ raise_if_errors flag and the selected engine (see engine= for the
174+ divergence).
175+
176+ Arguments
177+ ---------
178+ file_path : str or None
179+ Path to a CIF file on disk. Handles .gz / .Z / .bz2 via
180+ smart_open. Mutually exclusive with file_object / input_string.
181+ file_object : file-like or None
182+ Readable stream containing CIF text. Read to EOF. Mutually
183+ exclusive with file_path / input_string.
184+ input_string : str or None
185+ CIF text already in memory. Mutually exclusive with the above.
186+ cif_object : iotbx.cif.model.cif or None
187+ Pre-existing model to append into. Mutually exclusive with
188+ builder.
189+ builder : object or None
190+ Custom builder receiving parse callbacks. Must implement
191+ add_data_block(heading), add_data_item(tag, value),
192+ add_loop(header, data), start_save_frame(heading), and
193+ end_save_frame(). IMPORTANT: heading strings are passed WITH
194+ the "data_" / "save_" prefix intact (e.g. "data_foo",
195+ "save_bar"); the default cif_model_builder strips the prefix
196+ at the first underscore, so a third-party builder that
197+ string-compares the full token needs to account for the
198+ prefix. Mutually exclusive with cif_object.
199+ raise_if_errors : bool, default True
200+ If True, raise CifParserError on the first parse error. If
201+ False, collect errors (see error_count() / show_errors()) and
202+ leave whatever model state the engine produced accessible via
203+ model(). See engine= for the partial-model divergence.
204+ strict : bool, default True
205+ If False, accept STAR/DDL2 global_ blocks and content
206+ appearing before the first data_ block (attached to an
207+ implicit global_ block). Required for the cctbx monomer
208+ library.
209+ engine : {"ucif", "xcif", None}, default None
210+ Selects the underlying parser implementation. None means
211+ DEFAULT_ENGINE. Behavioral differences that matter for
212+ callers:
213+ * Error messages. xcif prefixes diagnostics with
214+ "<source>:line:col: "; ucif emits bare messages. Code
215+ that greps CifParserError strings must handle both.
216+ * Partial-model-on-error (raise_if_errors=False). ucif
217+ continues past the first error, accumulates multiple
218+ diagnostics, and yields a partial model with blocks
219+ parsed before the fault populated. xcif stops at the
220+ first CifError and yields an EMPTY model with exactly
221+ one diagnostic. Callers that salvage partial state from
222+ malformed files must use engine="ucif".
223+ * Source order. Both engines preserve pair/loop/save-frame
224+ source order within a block (str(model) output matches).
225+ * File path fast-path. engine="xcif" with file_path set
226+ and an uncompressed extension dispatches to a
227+ memory-mapped C++ parser, skipping the Python-string
228+ copy of the file. Compressed files (.gz/.Z/.bz2) and
229+ file_object= inputs fall back to the read-into-string
230+ path.
231+
232+ Attributes
233+ ----------
234+ engine : str
235+ The engine actually used for this parse (never None after
236+ __init__).
237+ file_path : str or None
238+ As passed in.
239+ builder : object
240+ The builder that received callbacks.
241+ """
158242
159243 def __init__ (self ,
160244 file_path = None ,
0 commit comments