import warnings
import pandas as pd
from typing import Any
from copy import deepcopy
import numpy as np
from argopy.options import OPTIONS
from argopy.stores.nvs import NVS
from argopy.stores.nvs.utils import bindings2df, id2urn, url2predicate
from argopy.stores.nvs.utils import known_mappings
from argopy.utils.format import ppliststr, urnparser
from argopy.utils.locals import Asset
id2concept = lambda x: urnparser(id2urn(x))["termid"]
predicate = lambda x: url2predicate(x).split(":")[-1] # Remove NVS jargon ('skos:', or 'owl:')
[docs]
class ArgoReferenceMapping:
"""A class to work with Argo Reference Value Relationships, i.e. a NVS "mapping"
More details from the AVTT documentation:
https://github.com/OneArgo/ArgoVocabs?tab=readme-ov-file#ivb-mappings
> Mappings are used to inform relationship between concepts. For instance, inform all the sensor_models manufactured by one sensor_maker, or all the platform_types manufactures by one platform_maker, etc.
> They are used by the FileChecker to ensure the consistency between these metadata fields in the Argo dataset.
Examples
--------
..code-block: python
:caption: Creation
from argopy import ArgoReferenceMapping
# Use two Argo parameter names, documented by one of the Argo reference tables:
ArgoReferenceMapping('PLATFORM_MAKER', 'PLATFORM_TYPE')
# or reference table identifiers:
ArgoReferenceMapping('R24', 'R23')
.. code-block:: python
:caption: Indexing and values
from argopy import ArgoReferenceMapping
arm = ArgoReferenceMapping('R24', 'R23')
# Relationships within this reference mapping:
len(arm) # Number of relationships
arm.subjects # Ordered list of unique 'subject' reference values names
arm.objects # Ordered list of unique 'object' reference values names
arm.predicates # Ordered list of unique 'predicate', aka relationships, in this mapping
# Check if a reference value is in this mapping as a subject or an object:
'SBE' in arm # Return True
# Indexing is by subject values:
arm['SBE'] # Return a dict with predicate as keys and objects as values
# Iterate over all relationships:
for relation in arm:
print(relation['subject'], relation['predicate'])
.. code-block:: python
:caption: Export method
from argopy import ArgoReferenceMapping
arm = ArgoReferenceMapping('R24', 'R23')
# Export all mapping relationships in a DataFrame:
arm.to_dataframe()
# To export mapping using AVTT jargon:
arm.to_dataframe(raw=True)
"""
__slots__ = (
"_subjects",
"_objects",
"_predicates",
"_nvs_store",
"_d",
"_Vocabulary2Parameter",
"sub_id",
"sub_parameter",
"obj_id",
"obj_parameter",
"nvs",
)
[docs]
def __init__(self, sub: str, obj: str, **kwargs):
# Get an NVS store to retrieve data:
self._nvs_store: NVS = NVS(nvs=kwargs.get("nvs", OPTIONS["nvs"]))
# Validate subject and object:
self._Vocabulary2Parameter: dict[str, str] = Asset.load("vocabulary:mapping")[
"data"
]["Vocabulary2Parameter"]
if sub in self._Vocabulary2Parameter.keys():
self.sub_id: str = sub
self.sub_parameter: str = self._Vocabulary2Parameter[sub]
elif sub in self._Vocabulary2Parameter.values():
self.sub_parameter: str = sub
self.sub_id: str = [
k for k, v in self._Vocabulary2Parameter.items() if v == sub
][0]
else:
raise ValueError(
f"Unknown subject Reference Table '{sub}'. Possible values are: \nIDs like: {ppliststr([k for k in self._Vocabulary2Parameter], last='or')}\nNames like: {ppliststr([k for k in self._Vocabulary2Parameter.values()], last='or')}"
)
if obj in self._Vocabulary2Parameter.keys():
self.obj_id: str = obj
self.obj_parameter: str = self._Vocabulary2Parameter[obj]
elif obj in self._Vocabulary2Parameter.values():
self.obj_parameter: str = obj
self.obj_id: str = [
k for k, v in self._Vocabulary2Parameter.items() if v == obj
][0]
else:
raise ValueError(
f"Unknown object Reference Table '{obj}'. Possible values are: \nIDs like: {ppliststr([k for k in self._Vocabulary2Parameter], last='or')}\nNames like: {ppliststr([k for k in self._Vocabulary2Parameter.values()], last='or')}"
)
if (self.sub_id, self.obj_id) not in known_mappings():
warnings.warn(
f"This mapping '{(self.sub_id, self.obj_id)}'is not known to the AVTT ! Known mappings are {known_mappings()}"
)
# Retrieve NVS raw data
# We use a deepcopy because we will modify the nvs raw data with complementary data
self.nvs: dict[str, Any] = deepcopy(
self._nvs_store.load_mapping(self.sub_id, self.obj_id)
)
# Internal placeholders:
self._subjects: list[str] | None = None
self._objects: list[str] | None = None
self._predicates: list[str] | None = None
self._d: dict[str, pd.DataFrame] | None = {}
def __repr__(self):
summary = [
f"<argo.reference.mapping> subject('{self.sub_id}'/'{self.sub_parameter}') vs object('{self.obj_id}'/'{self.obj_parameter}')"
]
summary.append(f"{len(self)} relationships in this mapping")
return "\n".join(summary)
@property
def subjects(self):
if self._subjects is None:
self._subjects = np.unique([
id2concept(binding["subj"]["value"])
for binding in self.nvs["results"]["bindings"]
]).tolist()
self._subjects.sort()
return self._subjects
@property
def objects(self):
if self._objects is None:
self._objects = np.unique([
id2concept(binding["obj"]["value"])
for binding in self.nvs["results"]["bindings"]
]).tolist()
self._objects.sort()
return self._objects
@property
def predicates(self):
if self._predicates is None:
self._predicates = np.unique([
predicate(binding["pred"]["value"])
for binding in self.nvs["results"]["bindings"]
]).tolist()
self._predicates.sort()
return self._predicates
def __len__(self):
return len(self.nvs["results"]["bindings"])
def __iter__(self):
for sub in self.subjects:
results = {'subject': sub, 'predicate':self[sub]}
yield results
def __contains__(self, item):
return item in self.subjects or item in self.objects
def __getitem__(self, key: str):
ref_value: str | None = None
if key in self.subjects:
ref_value = key
if ref_value is not None:
if self._d.get(ref_value, None) is None:
data = [
b
for b in self.nvs["results"]["bindings"]
if id2concept(b["subj"]["value"]) == key
]
results = {}
for b in data:
subj, pred, obj = id2concept(b["subj"]["value"]), predicate(b["pred"]["value"]), id2concept(
b["obj"]["value"])
if pred in results:
results[pred].append(obj)
else:
results[pred] = [obj]
for v in results.values():
v.sort()
self._d[ref_value] = results
return self._d[ref_value]
raise ValueError(f"Invalid subject mapping value '{key}'")
[docs]
def to_dataframe(self, raw:bool = False) -> pd.DataFrame:
"""Return mapping as a :class:`pd.DataFrame`"""
df = None
if len(self.nvs["results"]["bindings"]) > 0:
df = bindings2df(self.nvs["results"]["bindings"])
if raw:
df = df.drop(['subject', 'object'], axis=1)
df = df.rename({'subject_uri': 'subject', 'object_uri': 'object'}, axis=1)
return df[['subject', 'predicate', 'object']]
else:
df = df.drop(['subject_uri', 'object_uri'], axis=1)
df['predicate'] = df['predicate'].map(lambda x: x.split(":")[-1]) # Remove NVS jargon ('skos:', or 'owl:')
return df