Skip to content

Commit 5e6a088

Browse files
authored
Feature/validation fixes (#11)
* Validate duplicate parameters/sensors correctly * Differentiate deprecated terms when validating
1 parent 8027a4c commit 5e6a088

File tree

6 files changed

+243
-20
lines changed

6 files changed

+243
-20
lines changed

argo_metadata_validator/validation.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from argo_metadata_validator.models.sensor import Sensor
1414
from argo_metadata_validator.schema_utils import get_json_validator, infer_schema_from_data, infer_version_from_data
1515
from argo_metadata_validator.utils import load_json
16-
from argo_metadata_validator.vocab_utils import expand_vocab, get_all_terms_from_argo_vocabs
16+
from argo_metadata_validator.vocab_utils import VocabTerms, expand_vocab, get_all_terms_from_argo_vocabs
1717

1818

1919
def _parse_json_error(error: JsonValidationError) -> ValidationError:
@@ -25,11 +25,11 @@ class ArgoValidator:
2525

2626
all_json_data: dict[str, Any] = {} # Keyed by the original filename
2727
validation_errors: dict[str, list[ValidationError]] = {} # Keyed by the original filename
28-
valid_argo_vocab_terms: list[str] = []
28+
argo_vocab_terms: VocabTerms
2929

3030
def __init__(self):
3131
"""Initialise by pre-loading the ARGO vocab terms."""
32-
self.valid_argo_vocab_terms = get_all_terms_from_argo_vocabs()
32+
self.argo_vocab_terms = get_all_terms_from_argo_vocabs()
3333

3434
def load_json_data(self, json_files: list[str]):
3535
"""Take a list of JSON files and load content into memory.
@@ -143,6 +143,23 @@ def _validate_vocabs(self, json_data: Any) -> list[ValidationError]:
143143
)
144144
return validation_errors
145145

146+
def _is_term_found(self, uri: str, term_list: list[str]):
147+
if uri in term_list:
148+
return True
149+
if re.search(r"_\d+\/$", uri):
150+
# Check if this was a duplicate term (_N added to end)
151+
unduplicate_uri = re.sub(r"_\d+\/$", "/", uri)
152+
return unduplicate_uri in term_list
153+
else:
154+
# No _N at the end so can't be a duplicate term
155+
return False
156+
157+
def _is_active_term(self, uri: str):
158+
return self._is_term_found(uri, self.argo_vocab_terms.active)
159+
160+
def _is_deprecated_term(self, uri: str):
161+
return self._is_term_found(uri, self.argo_vocab_terms.deprecated)
162+
146163
def validate_vocab_terms(self, json_data: Any, field: str, sub_fields: list[str]) -> list[ValidationError]:
147164
"""Check that specific fields in the JSON match ARGO vocab terms.
148165
@@ -172,6 +189,10 @@ def validate_vocab_terms(self, json_data: Any, field: str, sub_fields: list[str]
172189
# Vocab terms can have optional text enclosed in square brackets
173190
val = re.sub(r"\s+\[\w+\]", "", val)
174191
val = expand_vocab(context, val)
175-
if val not in self.valid_argo_vocab_terms:
176-
errors.append(ValidationError(message=f"Unknown NSV term: {val}", path=f"{field}.{idx}.{x}"))
192+
if not self._is_active_term(val):
193+
if self._is_deprecated_term(val):
194+
error = ValidationError(message=f"Deprecated NSV term: {val}", path=f"{field}.{idx}.{x}")
195+
else:
196+
error = ValidationError(message=f"Unknown NSV term: {val}", path=f"{field}.{idx}.{x}")
197+
errors.append(error)
177198
return errors

argo_metadata_validator/vocab_utils.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Utilities related to NVS/vocabularies."""
22

33
import requests
4+
from pydantic import BaseModel
45

56
NVS_HOST = "http://vocab.nerc.ac.uk"
67

@@ -20,6 +21,13 @@
2021
]
2122

2223

24+
class VocabTerms(BaseModel):
25+
"""Model to hold fetched vocab terms from NVS."""
26+
27+
active: list[str]
28+
deprecated: list[str]
29+
30+
2331
def expand_vocab(context: dict, value: str):
2432
"""Use context from the JSON to expand vocab terms to full URIs."""
2533
val = value
@@ -31,19 +39,21 @@ def expand_vocab(context: dict, value: str):
3139
return val
3240

3341

34-
def get_all_terms_from_argo_vocabs() -> list[str]:
42+
def get_all_terms_from_argo_vocabs() -> VocabTerms:
3543
"""Fetches all active terms from all of the ARGO vocabularies.
3644
3745
Returns:
3846
list[str]: List of terms as URIs.
3947
"""
40-
term_list = []
48+
terms = VocabTerms(active=[], deprecated=[])
4149
for vocab in ALL_ARGO_VOCABS:
42-
term_list += get_all_terms_from_vocab(vocab)
43-
return term_list
50+
vocab_terms = get_all_terms_from_vocab(vocab)
51+
terms.active += vocab_terms.active
52+
terms.deprecated += vocab_terms.deprecated
53+
return terms
4454

4555

46-
def get_all_terms_from_vocab(vocab: str):
56+
def get_all_terms_from_vocab(vocab: str) -> VocabTerms:
4757
"""SPARQL query to fetch all active terms from a given vocab.
4858
4959
Args:
@@ -53,17 +63,21 @@ def get_all_terms_from_vocab(vocab: str):
5363
sparql_query = f"""
5464
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
5565
PREFIX owl: <http://www.w3.org/2002/07/owl#>
56-
SELECT DISTINCT (?c as ?uri)
66+
SELECT DISTINCT (?c as ?uri) ?isDeprecated
5767
WHERE {{
5868
<{NVS_HOST}/collection/{vocab}/current/> skos:member ?c .
59-
?c owl:deprecated ?isDeprecated .
60-
FILTER (?isDeprecated = "false")
69+
?c owl:deprecated ?isDeprecated
6170
}}
6271
"""
6372

6473
resp = requests.post(
6574
query_url, data=sparql_query, headers={"Content-Type": "application/sparql-query"}, timeout=120
6675
)
6776
resp.raise_for_status()
68-
results = [x["uri"]["value"] for x in resp.json()["results"]["bindings"]]
77+
results = VocabTerms(active=[], deprecated=[])
78+
for x in resp.json()["results"]["bindings"]:
79+
if x["isDeprecated"]["value"] == "true":
80+
results.deprecated.append(x["uri"]["value"])
81+
else:
82+
results.active.append(x["uri"]["value"])
6983
return results
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
{
2+
"sensor_info": {
3+
"created_by": "BAK test",
4+
"date_creation": "2023-02-03T07:38:07Z",
5+
"link" : "./argo.sensor.schema.json",
6+
"format_version": "0.4.0",
7+
"contents": "json file to describe a sensor for Argo. v0.4.0 draft",
8+
"sensor_described": "AANDERAA-AANDERAA_OPTODE_4330-3901"
9+
},
10+
"@context": {
11+
"SDN:R03::": "http://vocab.nerc.ac.uk/collection/R03/current/",
12+
"SDN:R25::": "http://vocab.nerc.ac.uk/collection/R25/current/",
13+
"SDN:R26::": "http://vocab.nerc.ac.uk/collection/R26/current/",
14+
"SDN:R27::": "http://vocab.nerc.ac.uk/collection/R27/current/"
15+
},
16+
"SENSORS": [
17+
{
18+
"SENSOR": "SDN:R25::OPTODE_DOXY",
19+
"SENSOR_MAKER": "SDN:R26::AANDERAA",
20+
"SENSOR_MODEL": "SDN:R27::AANDERAA_OPTODE_4330",
21+
"SENSOR_MODEL_FIRMWARE": " ",
22+
"SENSOR_SERIAL_NO": "3901"
23+
}
24+
],
25+
"PARAMETERS": [
26+
{
27+
"PARAMETER": "SDN:R03::NB_SAMPLE",
28+
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY",
29+
"PARAMETER_UNITS": "degree",
30+
"PARAMETER_ACCURACY": " ",
31+
"PARAMETER_RESOLUTION": " ",
32+
"PREDEPLOYMENT_CALIB_EQUATION": "none",
33+
"PREDEPLOYMENT_CALIB_COEFFICIENT_LIST": { },
34+
"PREDEPLOYMENT_CALIB_COMMENT": "Phase measurement with blue excitation light; see TD269 Operating manual oxygen optode 4330, 4835, 483",
35+
"PREDEPLOYMENT_CALIB_DATE": " "
36+
},
37+
{
38+
"PARAMETER": "SDN:R03::C2PHASE_DOXY",
39+
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY",
40+
"PARAMETER_UNITS": "degree",
41+
"PARAMETER_ACCURACY": " ",
42+
"PARAMETER_RESOLUTION": " ",
43+
"PREDEPLOYMENT_CALIB_EQUATION": "none",
44+
"PREDEPLOYMENT_CALIB_COEFFICIENT_LIST": { },
45+
"PREDEPLOYMENT_CALIB_COMMENT": "Phase measurement with red excitation light; see TD269 Operating manual oxygen optode 4330, 4835, 4831",
46+
"PREDEPLOYMENT_CALIB_DATE": " "
47+
},
48+
{
49+
"PARAMETER": "SDN:R03::TEMP_DOXY",
50+
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY",
51+
"PARAMETER_UNITS": "degC",
52+
"PARAMETER_ACCURACY": "0.03",
53+
"PARAMETER_RESOLUTION": "0.01",
54+
"PREDEPLOYMENT_CALIB_EQUATION": "TEMP_DOXY=T0+T1*TEMP_VOLTAGE_DOXY+T2*TEMP_VOLTAGE_DOXY^2+T3*TEMP_VOLTAGE_DOXY^3+T4*TEMP_VOLTAGE_DOXY^4+T5*TEMP_VOLTAGE_DOXY^5; with TEMP_VOLTAGE_DOXY=voltage from thermistor bridge (mV)",
55+
"PREDEPLOYMENT_CALIB_COEFFICIENT_LIST": {
56+
"T0": "not available",
57+
"T1": "not available",
58+
"T2": "not available",
59+
"T3": "not available",
60+
"T4": "not available",
61+
"T5": "not available"
62+
},
63+
"PREDEPLOYMENT_CALIB_COMMENT": "optode temperature, see TD269 Operating manual oxygen optode 4330, 4835, 4831",
64+
"PREDEPLOYMENT_CALIB_DATE": "2020-01-01T00:00:00Z"
65+
},
66+
{
67+
"PARAMETER": "SDN:R03::DOXY",
68+
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY",
69+
"PARAMETER_UNITS": "umol/kg",
70+
"PARAMETER_ACCURACY": "8 umol/kg or 10%",
71+
"PARAMETER_RESOLUTION": "1 umol/kg",
72+
"PREDEPLOYMENT_CALIB_EQUATION": "TPHASE_DOXY=C1PHASE_DOXY-C2PHASE_DOXY; Phase_Pcorr=TPHASE_DOXY+Pcoef1*PRES/1000; CalPhase=PhaseCoef0+PhaseCoef1*Phase_Pcorr+PhaseCoef2*Phase_Pcorr^2+PhaseCoef3*Phase_Pcorr^3; MOLAR_DOXY=[((c3+c4*TEMP_DOXY)/(c5+c6*CalPhase))-1]/Ksv; Ksv=c0+c1*TEMP_DOXY+c2*TEMP_DOXY^2; O2=MOLAR_DOXY*Scorr*Pcorr; Scorr=A*exp[PSAL*(B0+B1*Ts+B2*Ts^2+B3*Ts^3)+C0*PSAL^2]; A=[(1013.25-pH2O(TEMP,Spreset))/(1013.25-pH2O(TEMP,PSAL))]; pH2O(TEMP,S)=1013.25*exp[D0+D1*(100/(TEMP+273.15))+D2*ln((TEMP+273.15)/100)+D3*S]; Pcorr=1+((Pcoef2*TEMP+Pcoef3)*PRES)/1000; Ts=ln[(298.15-TEMP)/(273.15+TEMP)]; DOXY=O2/rho, where rho is the potential density [kg/L] calculated from CTD data",
73+
"PREDEPLOYMENT_CALIB_COEFFICIENT_LIST": {
74+
"Spreset": "0",
75+
"Pcoef1": "0.1",
76+
"Pcoef2": "0.00022",
77+
"Pcoef3": "0.0419",
78+
"B0": "0.00624523",
79+
"B1": "-0.00737614",
80+
"B2": "-0.010341",
81+
"B3": "-0.00817083",
82+
"C0": "3.1201642E-3",
83+
"PhaseCoef0": "-1.652",
84+
"PhaseCoef1": "1",
85+
"PhaseCoef2": "0",
86+
"PhaseCoef3": "0",
87+
"c0": "0.00261275",
88+
"c1": "0.00011268",
89+
"c2": "2.2309e-06",
90+
"c3": "200.183",
91+
"c4": "-0.223497",
92+
"c5": "-43.6776",
93+
"c6": "4.10578",
94+
"D0": "24.4543",
95+
"D1": "-67.4509",
96+
"D2": "-4.8489",
97+
"D3": "-0.000544",
98+
"C1": "-285.56594E-6",
99+
"C2": "316.32993E-9",
100+
"C3": "-1.0767272E-6"
101+
},
102+
"PREDEPLOYMENT_CALIB_COMMENT": "see TD269 Operating manual oxygen optode 4330, 4835, 4831; see Processing Argo OXYGEN data at the DAC level, Version 2.2 (DOI: http://dx.doi.org/10.13155/39795)",
103+
"PREDEPLOYMENT_CALIB_DATE": "2020-01-01T00:00:00Z",
104+
"parameter_vendorinfo": {
105+
"example": "Vendors can add their own parameter info here; they can add any field they like as a fieldname below parameter_vendorinfo"
106+
},
107+
"predeployment_vendorinfo": {
108+
"example": "Vendors can add their own predeployment info here; they can add any field they like as a fieldname below predeployment_vendorinfo"
109+
}
110+
},
111+
{
112+
"PARAMETER": "SDN:R03::PPOX_DOXY",
113+
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY",
114+
"PARAMETER_UNITS": "mbar",
115+
"PARAMETER_ACCURACY": " ",
116+
"PARAMETER_RESOLUTION": " ",
117+
"PREDEPLOYMENT_CALIB_EQUATION": "TPHASE_DOXY=C1PHASE_DOXY-C2PHASE_DOXY; Phase_Pcorr=TPHASE_DOXY+Pcoef1*PRES/1000; CalPhase=PhaseCoef0+PhaseCoef1*Phase_Pcorr+PhaseCoef2*Phase_Pcorr^2+PhaseCoef3*Phase_Pcorr^3; Ksv=c0+c1*TEMP_DOXY+c2*TEMP_DOXY^2; MOLAR_DOXY=[((c3+c4*TEMP_DOXY)/(c5+c6*CalPhase))-1]/Ksv; Pcorr=1+((Pcoef2*TEMP+Pcoef3)*PRES)/1000; MOLAR_DOXY=MOLAR_DOXY*Pcorr; pH2Osat=1013.25*exp[D0+D1*(100/(TEMP+273.15))+D2*ln((TEMP+273.15)/100)]; Tcorr=44.6596*exp[2.00907+3.22014*Ts+4.05010*Ts^2+4.94457*Ts^3-2.56847e-1*Ts^4+3.88767*Ts^5]; Ts=ln[(298.15-TEMP)/(273.15+TEMP)]; PPOX_DOXY=MOLAR_DOXY*(0.20946*(1013.25-pH2Osat))/Tcorr*exp[0.317*PRES/(8.314*(TEMP+273.15))]",
118+
"PREDEPLOYMENT_CALIB_COEFFICIENT_LIST": {
119+
"Pcoef1": "0.1",
120+
"Pcoef2": "0.00022",
121+
"Pcoef3": "0.0419",
122+
"PhaseCoef0": "-1.652",
123+
"PhaseCoef1": "1",
124+
"PhaseCoef2": "0",
125+
"PhaseCoef3": "0",
126+
"c0": "0.00261275",
127+
"c1": "0.00011268",
128+
"c2": "2.2309e-06",
129+
"c3": "200.183",
130+
"c4": "-0.223497",
131+
"c5": "-43.6776",
132+
"c6": "4.10578",
133+
"D0": "24.4543",
134+
"D1": "-67.4509",
135+
"D2": "-4.8489",
136+
"C0": "3.1201642E-3",
137+
"C1": "-285.56594E-6",
138+
"C2": "316.32993E-9",
139+
"C3": "-1.0767272E-6"
140+
},
141+
"PREDEPLOYMENT_CALIB_COMMENT": "see TD269 Operating manual oxygen optode 4330, 4835, 4831; see Processing Argo OXYGEN data at the DAC level, Version 2.2 (DOI: http://dx.doi.org/10.13155/39795)",
142+
"PREDEPLOYMENT_CALIB_DATE": "2020-01-01T00:00:00Z",
143+
"parameter_vendorinfo": {
144+
"example": "Vendors can add their own parameter info here; they can add any field they like as a fieldname below parameter_vendorinfo"
145+
},
146+
"predeployment_vendorinfo": {
147+
"example": "Vendors can add their own predeployment info here; they can add any field they like as a fieldname below predeployment_vendorinfo"
148+
}
149+
}
150+
],
151+
"instrument_vendorinfo": {
152+
"example": "Vendors can add something about the whole instrument here; they can add field they like as a fieldname below instrument_vendorinfo"
153+
}
154+
}

tests/files/valid_float.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,13 @@
174174
"SENSOR_MODEL_FIRMWARE": " ",
175175
"SENSOR_SERIAL_NO": "3901"
176176
},
177+
{
178+
"SENSOR": "SDN:R25::OPTODE_DOXY_2",
179+
"SENSOR_MAKER": "SDN:R26::AANDERAA",
180+
"SENSOR_MODEL": "SDN:R27::AANDERAA_OPTODE_4330",
181+
"SENSOR_MODEL_FIRMWARE": " ",
182+
"SENSOR_SERIAL_NO": "3901"
183+
},
177184
{
178185
"SENSOR": "SDN:R25::CTD_PRES",
179186
"SENSOR_MAKER": "SDN:R26::RBR",
@@ -496,6 +503,17 @@
496503
"PREDEPLOYMENT_CALIB_COMMENT": "Phase measurement with blue excitation light; see TD269 Operating manual oxygen optode 4330, 4835, 483",
497504
"PREDEPLOYMENT_CALIB_DATE": " "
498505
},
506+
{
507+
"PARAMETER": "SDN:R03::C1PHASE_DOXY_2",
508+
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY_2",
509+
"PARAMETER_UNITS": "degree",
510+
"PARAMETER_ACCURACY": " ",
511+
"PARAMETER_RESOLUTION": " ",
512+
"PREDEPLOYMENT_CALIB_EQUATION": "none",
513+
"PREDEPLOYMENT_CALIB_COEFFICIENT_LIST": {},
514+
"PREDEPLOYMENT_CALIB_COMMENT": "Phase measurement with blue excitation light; see TD269 Operating manual oxygen optode 4330, 4835, 483",
515+
"PREDEPLOYMENT_CALIB_DATE": " "
516+
},
499517
{
500518
"PARAMETER": "SDN:R03::C2PHASE_DOXY",
501519
"PARAMETER_SENSOR": "SDN:R25::OPTODE_DOXY",

tests/integration_tests/test_file_validation.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,17 @@
4343
]
4444
},
4545
],
46+
[
47+
"sensor_deprecated_vocab.json",
48+
{
49+
"sensor_deprecated_vocab.json": [
50+
ValidationError(
51+
message="Deprecated NSV term: http://vocab.nerc.ac.uk/collection/R03/current/NB_SAMPLE/",
52+
path="PARAMETERS.0.PARAMETER",
53+
)
54+
]
55+
},
56+
],
4657
],
4758
)
4859
def test_validating_files(file_path, expected_output):

tests/unit_tests/test_vocab_utils.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from argo_metadata_validator.vocab_utils import (
77
ALL_ARGO_VOCABS,
88
NVS_HOST,
9+
VocabTerms,
910
expand_vocab,
1011
get_all_terms_from_argo_vocabs,
1112
get_all_terms_from_vocab,
@@ -34,24 +35,27 @@ def test_expand_vocab(input_val, expected_result):
3435

3536
def test_get_all_terms_from_argo_vocabs(mocker):
3637
"""Test for get_all_terms_from_argo_vocabs calling mocked version of sub-method."""
37-
mock_get = mocker.patch("argo_metadata_validator.vocab_utils.get_all_terms_from_vocab", return_value=["1"])
38+
mock_get = mocker.patch(
39+
"argo_metadata_validator.vocab_utils.get_all_terms_from_vocab",
40+
return_value=VocabTerms(active=["1"], deprecated=[]),
41+
)
3842

3943
result = get_all_terms_from_argo_vocabs()
4044

4145
# Check the per-vocab call happens the right number of times
4246
assert mock_get.call_count == len(ALL_ARGO_VOCABS)
4347
# Check that result is correctly a list of strings
44-
assert isinstance(result, list)
45-
assert all(isinstance(x, str) for x in result)
48+
assert isinstance(result.active, list)
49+
assert all(isinstance(x, str) for x in result.active)
4650

4751

4852
def test_get_all_terms_from_vocab():
4953
"""Simple unit test for get_all_terms_from_vocab, mocking the HTTP call."""
5054
example_response = {
5155
"results": {
5256
"bindings": [
53-
{"uri": {"value": "http://vocab/hi"}},
54-
{"uri": {"value": "http://vocab/bye"}},
57+
{"uri": {"value": "http://vocab/hi"}, "isDeprecated": {"value": "false"}},
58+
{"uri": {"value": "http://vocab/bye"}, "isDeprecated": {"value": "true"}},
5559
]
5660
}
5761
}
@@ -60,4 +64,5 @@ def test_get_all_terms_from_vocab():
6064
mock_req.post(f"{NVS_HOST}/sparql/sparql", json=example_response)
6165
result = get_all_terms_from_vocab("R01")
6266

63-
assert result == ["http://vocab/hi", "http://vocab/bye"]
67+
assert result.active == ["http://vocab/hi"]
68+
assert result.deprecated == ["http://vocab/bye"]

0 commit comments

Comments
 (0)