Skip to content

Commit 99f588d

Browse files
authored
add support for import/transform schema autodetection (#198) (#262)
* add support for import/transform schema autodetection (#198) * fix PR comments
1 parent ebeb0bf commit 99f588d

5 files changed

Lines changed: 151 additions & 34 deletions

File tree

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,14 @@ pygeometa metadata validate path/to/file.yml
6464
# import a metadata document to MCF
6565
pygeometa metadata import path/to/file.xml --schema=iso19139
6666

67+
# import a metadata document to MCF, autodetecting the metadata file format
68+
pygeometa metadata import path/to/file.xml --schema=autodetect # --schema=autodetect is default
69+
6770
# transform from one metadata representation to another
6871
pygeometa metadata transform path/to/file.xml --input-schema=iso19139 --output-schema=oarec-record
72+
73+
# transform from one metadata representation to another, autodetecting the metadata file format
74+
pygeometa metadata transform path/to/file.xml --input-schema=autodetect --output-schema=oarec-record # --input-schema=autodetect is default
6975
```
7076

7177
### Supported schemas

docs/content/tutorial.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,14 @@ pygeometa validate path/to/file.yml
6262
# import a metadata document to MCF
6363
pygeometa metadata import path/to/file.xml --schema=iso19139
6464

65+
# import a metadata document to MCF, autodetecting the metadata file format
66+
pygeometa metadata import path/to/file.xml --schema=autodetect # --schema=autodetect is default
67+
6568
# transform from one metadata representation to another
6669
pygeometa metadata transform path/to/file.xml --input-schema=iso19139 --output-schema=oarec-record
70+
71+
# transform from one metadata representation to another, autodetecting the metadata file format
72+
pygeometa metadata transform path/to/file.xml --input-schema=autodetect --output-schema=oarec-record # --input-schema=autodetect is default
6773
```
6874

6975
## For Developers

pygeometa/core.py

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,58 @@ def __parse_mcf_dict_recursive(dict2):
324324
return mcf_dict
325325

326326

327+
def import_metadata(schema: str, metadata: str) -> dict:
328+
"""
329+
Import metadata
330+
331+
:param schema: schema / format
332+
:metadata: metadata string
333+
334+
:returns: MCF object
335+
"""
336+
337+
if schema == 'autodetect':
338+
schemas = get_supported_schemas()
339+
else:
340+
schemas = [schema]
341+
342+
for s in schemas:
343+
LOGGER.debug(f'Attempting to import into {s}')
344+
schema_object = load_schema(s)
345+
346+
try:
347+
return schema_object.import_(metadata)
348+
except NotImplementedError:
349+
raise RuntimeError(f'Import not supported for {s}')
350+
except Exception as err:
351+
raise RuntimeError(f'Import failed: {err}')
352+
353+
354+
def transform_metadata(input_schema: str, output_schema: str,
355+
metadata: str) -> str:
356+
"""
357+
Transform metadata
358+
359+
:param input_schema: input schema / format
360+
:param output_schema: output schema / format
361+
:metadata: metadata string
362+
363+
:returns: transformed metadata or `None`
364+
"""
365+
366+
try:
367+
content = import_metadata(input_schema, metadata)
368+
369+
LOGGER.info(f'Processing into {output_schema}')
370+
schema_object_output = load_schema(output_schema)
371+
content = schema_object_output.write(content)
372+
except Exception as err:
373+
LOGGER.debug(err)
374+
return None
375+
376+
return content
377+
378+
327379
def pretty_print(xml: str) -> str:
328380
"""
329381
clean up indentation and spacing
@@ -479,23 +531,20 @@ class MCFValidationError(Exception):
479531
@cli_options.OPTION_OUTPUT
480532
@cli_options.OPTION_VERBOSITY
481533
@click.option('--schema', required=True,
482-
type=click.Choice(get_supported_schemas()),
534+
type=click.Choice(get_supported_schemas(include_autodetect=True)), # noqa
535+
default='autodetect',
483536
help='Metadata schema')
484537
def import_(ctx, metadata_file, schema, output, verbosity):
485538
"""import metadata"""
486539

487-
LOGGER.info(f'Importing {metadata_file} into {schema}')
488-
schema_object = load_schema(schema)
489-
490540
try:
491-
content = schema_object.import_(metadata_file.read())
492-
except NotImplementedError:
493-
raise click.ClickException(f'Import not supported for {schema}')
494-
495-
if output is None:
496-
click.echo(yaml.dump(content))
497-
else:
498-
output.write(yaml.dump(content, indent=4))
541+
content = import_metadata(schema, metadata_file.read())
542+
if output is None:
543+
click.echo(yaml.dump(content))
544+
else:
545+
output.write(yaml.dump(content, indent=4))
546+
except Exception as err:
547+
raise click.ClickException(f'No supported schema detecte/found: {err}')
499548

500549

501550
@click.command()
@@ -585,7 +634,8 @@ def validate(ctx, mcf, verbosity):
585634
@cli_options.OPTION_OUTPUT
586635
@cli_options.OPTION_VERBOSITY
587636
@click.option('--input-schema', required=True,
588-
type=click.Choice(get_supported_schemas()),
637+
type=click.Choice(get_supported_schemas(include_autodetect=True)), # noqa
638+
default='autodetect',
589639
help='Metadata schema of input file')
590640
@click.option('--output-schema', required=True,
591641
type=click.Choice(get_supported_schemas()),
@@ -594,18 +644,11 @@ def transform(ctx, metadata_file, input_schema, output_schema, output,
594644
verbosity):
595645
"""transform metadata"""
596646

597-
LOGGER.info(f'Importing {metadata_file} into {input_schema}')
598-
schema_object_input = load_schema(input_schema)
599-
content = None
600-
601-
try:
602-
content = schema_object_input.import_(metadata_file.read())
603-
except NotImplementedError:
604-
raise click.ClickException(f'Import not supported for {input_schema}')
647+
content = transform_metadata(input_schema, output_schema,
648+
metadata_file.read())
605649

606-
LOGGER.info(f'Processing into {output_schema}')
607-
schema_object_output = load_schema(output_schema)
608-
content = schema_object_output.write(content)
650+
if content is None:
651+
raise click.ClickException('No supported input schema detected/found')
609652

610653
if output is None:
611654
click.echo(content)

pygeometa/schemas/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,13 @@
6565
}
6666

6767

68-
def get_supported_schemas(details: bool = False) -> list:
68+
def get_supported_schemas(details: bool = False,
69+
include_autodetect: bool = False) -> list:
6970
"""
7071
Get supported schemas
7172
7273
:param details: provide read/write details
74+
:param include_autodetect: include magic auto detection mode
7375
7476
:returns: list of supported schemas
7577
"""
@@ -91,7 +93,12 @@ def has_mode(plugin: BaseOutputSchema, mode: str) -> bool:
9193
LOGGER.debug('Generating list of supported schemas')
9294

9395
if not details:
94-
return SCHEMAS.keys()
96+
if include_autodetect:
97+
schemas_keys = list(SCHEMAS.keys())
98+
schemas_keys.append('autodetect')
99+
return schemas_keys
100+
else:
101+
return SCHEMAS.keys()
95102

96103
for key in SCHEMAS.keys():
97104
schema = load_schema(key)
@@ -105,6 +112,14 @@ def has_mode(plugin: BaseOutputSchema, mode: str) -> bool:
105112
'write': can_write
106113
})
107114

115+
if include_autodetect:
116+
schema_matrix.append({
117+
'id': 'autodetect',
118+
'description': 'Auto schema detection',
119+
'read': True,
120+
'write': False
121+
})
122+
108123
return schema_matrix
109124

110125

tests/run_tests.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#
2121
# Copyright (c) 2015 Government of Canada
2222
# Copyright (c) 2016 ERT Inc.
23-
# Copyright (c) 2024 Tom Kralidis
23+
# Copyright (c) 2025 Tom Kralidis
2424
#
2525
# Permission is hereby granted, free of charge, to any person
2626
# obtaining a copy of this software and associated documentation
@@ -54,10 +54,11 @@
5454
import yaml
5555

5656
from pygeometa.core import (read_mcf, pretty_print, render_j2_template,
57-
get_charstring, normalize_datestring,
58-
prune_distribution_formats,
57+
get_charstring, import_metadata,
58+
normalize_datestring, prune_distribution_formats,
5959
prune_transfer_option, MCFReadError,
60-
MCFValidationError, SCHEMAS, validate_mcf)
60+
MCFValidationError, SCHEMAS, transform_metadata,
61+
validate_mcf)
6162
from pygeometa.helpers import json_dumps
6263
from pygeometa.schemas import (get_supported_schemas, InvalidSchemaError,
6364
load_schema)
@@ -229,10 +230,16 @@ def test_get_supported_schemas(self):
229230
'Expected specific number of supported schemas')
230231
self.assertEqual(sorted(schemas),
231232
sorted(['dcat', 'iso19139', 'iso19139-2',
232-
'iso19139-hnap', 'oarec-record', 'stac-item',
233-
'wmo-cmp', 'wmo-wcmp2', 'wmo-wigos']),
233+
'iso19139-hnap', 'oarec-record',
234+
'stac-item', 'wmo-cmp', 'wmo-wcmp2',
235+
'wmo-wigos']),
234236
'Expected exact list of supported schemas')
235237

238+
schemas = get_supported_schemas(include_autodetect=True)
239+
self.assertEqual(len(schemas), 10,
240+
'Expected specific number of supported schemas')
241+
self.assertIn('autodetect', schemas, 'Expected autodetect in list')
242+
236243
def test_render_j2_template(self):
237244
"""test template rendering"""
238245

@@ -397,8 +404,8 @@ def test_validate_mcf(self):
397404
with self.assertRaises(MCFValidationError):
398405
is_valid = validate_mcf({'foo': 'bar'})
399406

400-
def test_import(self):
401-
"""test metadata import"""
407+
def test_schema_import(self):
408+
"""test direct metadata schema import"""
402409

403410
schema = ISO19139OutputSchema()
404411

@@ -434,6 +441,46 @@ def test_import(self):
434441
self.assertEqual(expected_bbox, result_bbox,
435442
'Expected specific BBOX')
436443

444+
def test_import_metadata(self):
445+
"""test metadata import"""
446+
447+
with open(get_abspath('md-SMJP01RJTD-gmd.xml')) as fh:
448+
mcf = import_metadata('iso19139', fh.read())
449+
450+
self.assertEqual(
451+
mcf['identification']['title'],
452+
'WIS/GTS bulletin SMJP01 RJTD in FM12 SYNOP',
453+
'Expected specific title')
454+
455+
with open(get_abspath('md-SMJP01RJTD-gmd.xml')) as fh:
456+
mcf = import_metadata('autodetect', fh.read())
457+
458+
self.assertEqual(
459+
mcf['identification']['title'],
460+
'WIS/GTS bulletin SMJP01 RJTD in FM12 SYNOP',
461+
'Expected specific title')
462+
463+
def test_transform_metadata(self):
464+
"""test metadata transform"""
465+
466+
with open(get_abspath('md-SMJP01RJTD-gmd.xml')) as fh:
467+
m = transform_metadata('iso19139', 'oarec-record', fh.read())
468+
469+
m = json.loads(m)
470+
self.assertEqual(
471+
m['properties']['title'],
472+
'WIS/GTS bulletin SMJP01 RJTD in FM12 SYNOP',
473+
'Expected specific title')
474+
475+
with open(get_abspath('md-SMJP01RJTD-gmd.xml')) as fh:
476+
m = transform_metadata('autodetect', 'oarec-record', fh.read())
477+
478+
m = json.loads(m)
479+
self.assertEqual(
480+
m['properties']['title'],
481+
'WIS/GTS bulletin SMJP01 RJTD in FM12 SYNOP',
482+
'Expected specific title')
483+
437484

438485
def get_abspath(filepath):
439486
"""helper function absolute file access"""

0 commit comments

Comments
 (0)