Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions argopy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
PARALLEL = "parallel"
PARALLEL_DEFAULT_METHOD = "parallel_default_method"
LON = "longitude_convention"
NVS = "nvs"

# Define the list of available options and default values:
OPTIONS = {
Expand All @@ -69,6 +70,7 @@
PARALLEL: False,
PARALLEL_DEFAULT_METHOD: "thread",
LON: "180",
NVS: "https://vocab.nerc.ac.uk/collection",
}
DEFAULT = OPTIONS.copy()

Expand Down Expand Up @@ -134,6 +136,7 @@ def validate_parallel_method(method):
PARALLEL: validate_parallel,
PARALLEL_DEFAULT_METHOD: validate_parallel_method,
LON: lambda x: x in ['180', '360'],
NVS: lambda x: isinstance(x, str) or x is None,
}


Expand Down Expand Up @@ -229,6 +232,9 @@ class set_options:
- '180': longitude goes from -180 to 180
- '360': longitude goes from 0 to 360

nvs: str, default: 'https://vocab.nerc.ac.uk/collection'
Argo NVS server

Other Parameters
----------------
server: : str, default: None
Expand Down
268 changes: 152 additions & 116 deletions argopy/related/reference_tables.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,39 @@
import pandas as pd
from functools import lru_cache
import collections
from pathlib import Path

from ..stores import httpstore, filestore
from ..options import OPTIONS
from ..utils import path2assets
from argopy.stores import httpstore
from argopy.options import OPTIONS
from argopy.utils.format import urnparser
from argopy.utils.locals import Asset

VALID_REF = Asset().load("nvs_reference_tables")['data']['valid_ref']

VALID_REF = filestore(cache=True).open_json(Path(path2assets).joinpath("nvs_reference_tables.json"))['data']['valid_ref']


class ArgoNVSReferenceTables:
"""Argo Reference Tables

Utility function to retrieve Argo Reference Tables from a NVS server.

By default, this relies on: https://vocab.nerc.ac.uk/collection

Examples
--------
Methods:

>>> R = ArgoNVSReferenceTables()
>>> R.search('sensor')
>>> R.tbl(3)
>>> R.tbl('R09')

Properties:

>>> R.all_tbl_name
>>> R.all_tbl
>>> R.valid_ref

Notes
-----
This class relies on a list of valid reference table ids that is updated on every argopy release.

"""
valid_ref = VALID_REF.copy()

"""List of all available Reference Tables"""
class NVScollection:
""" A class to handle any NVS collection table """

def __init__(
self,
nvs="https://vocab.nerc.ac.uk/collection",
cache: bool = True,
cachedir: str = "",
**kwargs,
):
"""Argo Reference Tables from NVS"""
"""Reference Tables from NVS collection"""
self.nvs = kwargs.get("nvs", OPTIONS["nvs"])

cachedir = OPTIONS["cachedir"] if cachedir == "" else cachedir
self.fs = httpstore(cache=cache, cachedir=cachedir)
self.nvs = nvs
self.fs = kwargs.get("fs", None)
if self.fs is None:
self._cache = kwargs.get("cache", True)
self._cachedir = kwargs.get("cachedir", OPTIONS["cachedir"])
self._timeout = kwargs.get("timeout", OPTIONS["api_timeout"])
self.fs = httpstore(cache=self._cache, cachedir=self._cachedir, timeout=self._timeout)

def _valid_ref(self, rtid):
"""
Validate any rtid argument and return the corresponding valid ID from the list.

Parameters
----------
rtid: Input reference ID. Can be a string (e.g., "R12", "12", "r12") or a number (e.g., 12).

Returns:
str: Valid reference ID from the list, or None if not found.
"""
# Convert rtid to a string and standardize its format
if isinstance(rtid, (int, float)):
# If rtid is a number, format it as "RXX"
rtid_str = f"R{int(rtid):02d}"
else:
# If rtid is a string, convert to uppercase and standardize
rtid_str = str(rtid).strip().upper()
if rtid_str.startswith('R') and len(rtid_str) > 1:
# If it starts with 'R', ensure the numeric part is two digits
prefix = rtid_str[0]
suffix = rtid_str[1:]
try:
num = int(suffix)
rtid_str = f"{prefix}{num:02d}"
except ValueError:
pass # Keep the original string if conversion fails
elif ~rtid_str.startswith('R'):
try:
num = int(rtid_str)
rtid_str = f"R{num}"
except ValueError:
pass # Keep the original string if conversion fails
@property
def valid_ref(self):
df = self._FullCollection()
return df['ID'].to_list()

# Check if the standardized rtid_str is in the valid_refs list
if rtid_str in self.valid_ref:
return rtid_str
else:
raise ValueError(
f"Invalid Argo Reference Table '{rtid}', must be one in: {', '.join(self.valid_ref)}"
)
def _valid_ref(self, rtid):
"""No validation"""
return rtid

def _jsConcept2df(self, data):
Expand All @@ -104,18 +43,21 @@ def _jsConcept2df(self, data):
"prefLabel": [],
"definition": [],
"deprecated": [],
"urn": [],
"id": [],
}
for k in data["@graph"]:
if k["@type"] == "skos:Collection":
Collection_name = k["dc:alternative"]
elif k["@type"] == "skos:Concept":
content["altLabel"].append(k["skos:altLabel"])
content["altLabel"].append(urnparser(k['skos:notation'])['termid'])
content["prefLabel"].append(k["skos:prefLabel"]["@value"])
content["definition"].append(k["skos:definition"]["@value"] if k["skos:definition"] != '' else None)
content["definition"].append(k["skos:definition"]["@value"])
content["deprecated"].append(k["owl:deprecated"])
content["urn"].append(k['skos:notation'])
content["id"].append(k["@id"])
df = pd.DataFrame.from_dict(content)
df['deprecated'] = df.apply(lambda x: True if x['deprecated']=='true' else False, axis=1)
df.name = Collection_name
return df

Expand All @@ -128,6 +70,25 @@ def _jsCollection(self, data):
rtid = k["@id"]
return (name, desc, rtid)

def _jsFullCollection(self, data):
"""Return all skos:Collection information as data"""
result = []
for k in data["@graph"]:
if k["@type"] == "skos:Collection":
title = k["dc:title"]
name = k["dc:alternative"]
desc = k["dc:description"]
url = k["@id"]
tid = k['@id'].split('/')[-3]
result.append((tid, title, name, desc, url))
return result

@lru_cache
def _FullCollection(self):
url = f"{self.nvs}/?_profile=nvs&_mediatype=application/ld+json"
js = self.fs.open_json(url)
return pd.DataFrame(self._jsFullCollection(js), columns=['ID', 'title', 'name', 'description', 'url'])

def get_url(self, rtid, fmt="ld+json"):
"""Return URL toward a given reference table for a given format

Expand Down Expand Up @@ -158,7 +119,7 @@ def get_url(self, rtid, fmt="ld+json"):

@lru_cache
def tbl(self, rtid):
"""Return an Argo Reference table
"""Return a Reference table

Parameters
----------
Expand All @@ -174,8 +135,9 @@ def tbl(self, rtid):
df = self._jsConcept2df(js)
return df

@lru_cache
def tbl_name(self, rtid):
"""Return name of an Argo Reference table
"""Return name of a Reference table

Parameters
----------
Expand All @@ -190,6 +152,41 @@ def tbl_name(self, rtid):
js = self.fs.open_json(self.get_url(rtid))
return self._jsCollection(js)

@property
def all_tbl(self):
"""Return all Reference tables

Returns
-------
OrderedDict
Dictionary with all table short names as key and table content as class:`pandas.DataFrame`
"""
URLs = [self.get_url(rtid) for rtid in self.valid_ref]
df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df)
all_tables = {}
[all_tables.update({t.name: t}) for t in df_list]
all_tables = collections.OrderedDict(sorted(all_tables.items()))
return all_tables

@property
def all_tbl_name(self):
"""Return names of all Reference tables

Returns
-------
OrderedDict
Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link')
"""
URLs = [self.get_url(rtid) for rtid in self.valid_ref]
name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection)
all_tables = {}
[
all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)})
for name, desc, rtid in name_list
]
all_tables = collections.OrderedDict(sorted(all_tables.items()))
return all_tables

def search(self, txt, where="all"):
"""Search for string in tables title and/or description

Expand Down Expand Up @@ -218,37 +215,76 @@ def search(self, txt, where="all"):
results.append(tbl_id)
return results

@property
def all_tbl(self):
"""Return all Argo Reference tables

Returns
-------
OrderedDict
Dictionary with all table short names as key and table content as class:`pandas.DataFrame`
class ArgoNVSReferenceTables(NVScollection):
"""Argo Reference Tables

Utility function to retrieve Argo Reference Tables from a NVS server.

By default, this relies on: https://vocab.nerc.ac.uk/collection

Examples
--------
Methods:

>>> R = ArgoNVSReferenceTables()
>>> R.search('sensor')
>>> R.tbl(3)
>>> R.tbl('R09')

Properties:

>>> R.all_tbl_name
>>> R.all_tbl
>>> R.valid_ref

Notes
-----
This class relies on a list of valid reference table ids that is updated on every argopy release.

"""
valid_ref = VALID_REF.copy()

"""List of all available Reference Tables"""

def _valid_ref(self, rtid):
"""
URLs = [self.get_url(rtid) for rtid in self.valid_ref]
df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df)
all_tables = {}
[all_tables.update({t.name: t}) for t in df_list]
all_tables = collections.OrderedDict(sorted(all_tables.items()))
return all_tables
Validate any rtid argument and return the corresponding valid ID from the list.

@property
def all_tbl_name(self):
"""Return names of all Argo Reference tables
Parameters
----------
rtid: Input reference ID. Can be a string (e.g., "R12", "12", "r12") or a number (e.g., 12).

Returns
-------
OrderedDict
Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link')
Returns:
str: Valid reference ID from the list, or None if not found.
"""
URLs = [self.get_url(rtid) for rtid in self.valid_ref]
name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection)
all_tables = {}
[
all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)})
for name, desc, rtid in name_list
]
all_tables = collections.OrderedDict(sorted(all_tables.items()))
return all_tables
# Convert rtid to a string and standardize its format
if isinstance(rtid, (int, float)):
# If rtid is a number, format it as "RXX"
rtid_str = f"R{int(rtid):02d}"
else:
# If rtid is a string, convert to uppercase and standardize
rtid_str = str(rtid).strip().upper()
if rtid_str.startswith('R') and len(rtid_str) > 1:
# If it starts with 'R', ensure the numeric part is two digits
prefix = rtid_str[0]
suffix = rtid_str[1:]
try:
num = int(suffix)
rtid_str = f"{prefix}{num:02d}"
except ValueError:
pass # Keep the original string if conversion fails
elif ~rtid_str.startswith('R'):
try:
num = int(rtid_str)
rtid_str = f"R{num}"
except ValueError:
pass # Keep the original string if conversion fails

# Check if the standardized rtid_str is in the valid_refs list
if rtid_str in self.valid_ref:
return rtid_str
else:
raise ValueError(
f"Invalid Argo Reference Table '{rtid}', must be one in: {', '.join(self.valid_ref)}"
)
3 changes: 3 additions & 0 deletions argopy/sensors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from argopy.sensors.references import ArgoSensorReferences

__all__ = ('ArgoSensorReferences')
Loading
Loading