import pandas as pd
from functools import lru_cache
import collections
from argopy.stores import httpstore
from argopy.options import OPTIONS
from argopy.utils.locals import Asset
from argopy.utils.decorators import deprecated
from argopy.utils.format import urnparser
VALID_REF = Asset.load('vocabulary:description')['data']['valid_ref']
class NVScollection:
""" A class to handle any NVS collection table """
def __init__(
self,
**kwargs,
):
"""Reference Tables from NVS collection"""
self.nvs = kwargs.get("nvs", OPTIONS["nvs"])
self.fs = kwargs.get("fs", None)
if self.fs is None:
self._cache = kwargs.get("cache", True)
self._cachedir = kwargs.get("cachedir", OPTIONS["cachedir"])
self._timeout = kwargs.get("timeout", OPTIONS["api_timeout"])
self.fs = httpstore(cache=self._cache, cachedir=self._cachedir, timeout=self._timeout)
@property
def valid_ref(self):
df = self._FullCollection()
return df['ID'].to_list()
def _valid_ref(self, rtid):
"""No validation"""
return rtid
def _jsConcept2df(self, data):
"""Return all skos:Concept as class:`pandas.DataFrame`"""
content = {
"altLabel": [],
"prefLabel": [],
"definition": [],
"deprecated": [],
"urn": [],
"id": [],
}
for k in data["@graph"]:
if k["@type"] == "skos:Collection":
Collection_name = k["dc:alternative"]
elif k["@type"] == "skos:Concept":
content["altLabel"].append(urnparser(k['skos:notation'])['termid'])
content["prefLabel"].append(k["skos:prefLabel"]["@value"])
content["definition"].append(k["skos:definition"]["@value"] if k["skos:definition"] != '' else None)
content["deprecated"].append(k["owl:deprecated"])
content["urn"].append(k['skos:notation'])
content["id"].append(k["@id"])
df = pd.DataFrame.from_dict(content)
df['deprecated'] = df.apply(lambda x: True if x['deprecated']=='true' else False, axis=1)
df.name = Collection_name
return df
def _jsCollection(self, data):
"""Return last skos:Collection information as data"""
for k in data["@graph"]:
if k["@type"] == "skos:Collection":
name = k["dc:alternative"]
desc = k["dc:description"]
rtid = k["@id"]
return (name, desc, rtid)
def _jsFullCollection(self, data):
"""Return all skos:Collection information as data"""
result = []
for k in data["@graph"]:
if k["@type"] == "skos:Collection":
title = k["dc:title"]
name = k["dc:alternative"]
desc = k["dc:description"]
url = k["@id"]
tid = k['@id'].split('/')[-3]
result.append((tid, title, name, desc, url))
return result
@lru_cache
def _FullCollection(self):
url = f"{self.nvs}/collection/?_profile=nvs&_mediatype=application/ld+json"
js = self.fs.open_json(url)
return pd.DataFrame(self._jsFullCollection(js), columns=['ID', 'title', 'name', 'description', 'url'])
def get_url(self, rtid, fmt="ld+json"):
"""Return URL toward a given reference table for a given format
Parameters
----------
rtid: {str, int}
Name or number of the reference table to retrieve. Eg: 'R01', 12
fmt: str, default: "ld+json"
Format of the NVS server response. Can be: "ld+json", "rdf+xml" or "text/turtle".
Returns
-------
str
"""
rtid = self._valid_ref(rtid)
if fmt == "ld+json":
fmt_ext = "?_profile=nvs&_mediatype=application/ld+json"
elif fmt == "rdf+xml":
fmt_ext = "?_profile=nvs&_mediatype=application/rdf+xml"
elif fmt == "text/turtle":
fmt_ext = "?_profile=nvs&_mediatype=text/turtle"
else:
raise ValueError(
"Invalid format. Must be in: 'ld+json', 'rdf+xml' or 'text/turtle'."
)
url = "{}/collection/{}/current/{}".format
return url(self.nvs, rtid, fmt_ext)
@lru_cache
def tbl(self, rtid):
"""Return a Reference table
Parameters
----------
rtid: {str, int}
Name or number of the reference table to retrieve. Eg: 'R01', 12
Returns
-------
class:`pandas.DataFrame`
"""
rtid = self._valid_ref(rtid)
js = self.fs.open_json(self.get_url(rtid))
df = self._jsConcept2df(js)
return df
@lru_cache
def tbl_name(self, rtid):
"""Return name of a Reference table
Parameters
----------
rtid: {str, int}
Name or number of the reference table to retrieve. Eg: 'R01', 12
Returns
-------
tuple('short name', 'description', 'NVS id link')
"""
rtid = self._valid_ref(rtid)
js = self.fs.open_json(self.get_url(rtid))
return self._jsCollection(js)
@property
def all_tbl(self):
"""Return all Reference tables
Returns
-------
OrderedDict
Dictionary with all table short names as key and table content as class:`pandas.DataFrame`
"""
URLs = [self.get_url(rtid) for rtid in self.valid_ref]
df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df)
all_tables = {}
[all_tables.update({t.name: t}) for t in df_list]
all_tables = collections.OrderedDict(sorted(all_tables.items()))
return all_tables
@property
def all_tbl_name(self):
"""Return names of all Reference tables
Returns
-------
OrderedDict
Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link')
"""
URLs = [self.get_url(rtid) for rtid in self.valid_ref]
name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection)
all_tables = {}
[
all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)})
for name, desc, rtid in name_list
]
all_tables = collections.OrderedDict(sorted(all_tables.items()))
return all_tables
def search(self, txt, where="all"):
"""Search for string in tables title and/or description
Parameters
----------
txt: str
where: str, default='all'
Where to search, can be: 'title', 'description', 'all'
Returns
-------
list of table id matching the search
"""
results = []
for tbl_id in self.all_tbl_name:
title = self.tbl_name(tbl_id)[0]
description = self.tbl_name(tbl_id)[1]
if where == "title":
if txt.lower() in title.lower():
results.append(tbl_id)
elif where == "description":
if txt.lower() in description.lower():
results.append(tbl_id)
elif where == "all":
if txt.lower() in description.lower() or txt.lower() in title.lower():
results.append(tbl_id)
return results
[docs]
@deprecated("Update your code to use 'ArgoReference' instead.", version='[TBD]')
class ArgoNVSReferenceTables(NVScollection):
"""Argo Reference Tables
Utility function to retrieve Argo Reference Tables from a NVS server.
By default, this relies on: https://vocab.nerc.ac.uk/collection
Examples
--------
Methods:
>>> R = ArgoNVSReferenceTables()
>>> R.search('sensor')
>>> R.tbl(3)
>>> R.tbl('R09')
Properties:
>>> R.all_tbl_name
>>> R.all_tbl
>>> R.valid_ref
Notes
-----
This class relies on a list of valid reference table ids that is updated on every argopy release.
"""
valid_ref = VALID_REF.copy()
"""List of all available Reference Tables"""
def _valid_ref(self, rtid):
"""
Validate any rtid argument and return the corresponding valid ID from the list.
Parameters
----------
rtid: Input reference ID. Can be a string (e.g., "R12", "12", "r12") or a number (e.g., 12).
Returns:
str: Valid reference ID from the list, or None if not found.
"""
# Convert rtid to a string and standardize its format
if isinstance(rtid, (int, float)):
# If rtid is a number, format it as "RXX"
rtid_str = f"R{int(rtid):02d}"
else:
# If rtid is a string, convert to uppercase and standardize
rtid_str = str(rtid).strip().upper()
if rtid_str.startswith('R') and len(rtid_str) > 1:
# If it starts with 'R', ensure the numeric part is two digits
prefix = rtid_str[0]
suffix = rtid_str[1:]
try:
num = int(suffix)
rtid_str = f"{prefix}{num:02d}"
except ValueError:
pass # Keep the original string if conversion fails
elif ~rtid_str.startswith('R'):
try:
num = int(rtid_str)
rtid_str = f"R{num}"
except ValueError:
pass # Keep the original string if conversion fails
# Check if the standardized rtid_str is in the valid_refs list
if rtid_str in self.valid_ref:
return rtid_str
else:
raise ValueError(
f"Invalid Argo Reference Table '{rtid}', must be one in: {', '.join(self.valid_ref)}"
)