Source code for argopy.reference.concept

import pandas as pd
from typing import Any, TypeAlias
from os import PathLike
from pathlib import Path
import json
from dataclasses import dataclass

from argopy.options import OPTIONS
from argopy.errors import OptionValueError
from argopy.utils.locals import Asset, caller_function
from argopy.utils.format import urnparser, ppliststr
from argopy.utils.checkers import to_list
from argopy.utils.casting import Encoder
from argopy.stores.nvs import NVS
from argopy.stores.nvs.utils import (
    concept2vocabulary,
    check_vocabulary,
    id2urn,
    curate_r03definition,
    curate_r14definition,
    curate_r18definition,
)

FilePath: TypeAlias = str | PathLike[str]


@dataclass(frozen=True)
class Props:
    """:class:`ArgoReferenceValue` property holder

    This should allow to make the difference between the class logic/attributes and the meta-data to expose.

    slots > attrs > keys
    """

    slots = (
        "name",
        "reference",
        "long_name",
        "definition",
        "deprecated",
        "version",
        "date",
        "uri",
        "urn",
        "parameter",
        "related",
        "broader",
        "narrower",
        "sameas",
        "_context",
        "_extra",
        "_nvs",
        "_from",
        "_nvs_store",
    )
    """All possible class attributes"""

    attrs = (
        "name",
        "reference",
        "long_name",
        "definition",
        "deprecated",
        "version",
        "date",
        "uri",
        "urn",
        "parameter",
        "related",
        "broader",
        "narrower",
        "sameas",
        "context",
        "extra",
        "nvs",
    )
    """A subset of slots, to be publicly exposed (and are read-only)"""

    keys = (
        "name",
        "reference",
        "long_name",
        "definition",
        "deprecated",
        "version",
        "date",
        "uri",
        "urn",
        "parameter",
        "related",
        "broader",
        "narrower",
        "sameas",
        "context",
    )
    """A subset of attrs, to be used to validate export/search possible values"""

    extra = ('R03', 'R14', 'R18')
    """List of Vocabularies for which Concept definition string provides 'extra' meta-data"""


[docs] class ArgoReferenceValue: """A class to work with an Argo Reference Value, i.e. a NVS vocabulary "concept" An Argo Reference Value is one possible and documented value for one Argo parameter. For instance, 'AANDERAA_OPTODE_3835' is an Argo Reference Value for the 'SENSOR_MODEL' parameter. All other possible values for this parameter are in the Argo reference table 27 for "Argo sensor models" (see :class:`ArgoReferenceTable`). Examples -------- .. code-block:: python :caption: Creation from argopy import ArgoReferenceValue # One possible value for the Argo parameter 'SENSOR_MODEL': arv = ArgoReferenceValue('AANDERAA_OPTODE_3835') # For ambiguous value seen in more than one Reference Table arv = ArgoReferenceValue('4', reference='RT_QC_FLAG') arv = ArgoReferenceValue('4', reference='RR2') # From NVS/URN jargon: arv = ArgoReferenceValue.from_urn('SDN:R27::AANDERAA_OPTODE_3835') .. code-block:: python :caption: Read attributes from argopy import ArgoReferenceValue arv = ArgoReferenceValue('AANDERAA_OPTODE_3835') # All possible attributes are listed in: arv.attrs # Reference Value attributes (and NVS origin): arv.name # nvs["skos:altLabel"] or urnparser(id2urn(nvs["@id"]))["termid"] if altLabel is None arv.long_name # nvs["skos:prefLabel"]["@value"] arv.definition # nvs["skos:definition"]["@value"] arv.deprecated # nvs["owl:deprecated"] arv.reference # The reference table this concept belongs to, can be used with ArgoReferenceTable (eg 'R27') arv.parameter # The netcdf parameter this concept applies to, can be used with ArgoReferenceTable (eg 'SENSOR_MODEL') # Other reference Value attributes (more technical): arv.version # nvs["owl:versionInfo"] arv.date # nvs["dc:date"] arv.uri # nvs["@id"] arv.urn # nvs["skos:notation"] # Relationships with other Reference Values or Context: arv.broader # nvs["skos:broader"] arv.narrower # nvs["skos:narrower"] arv.related # nvs["skos:related"] arv.sameas # nvs["owl:sameAs"] arv.context # nvs["@context"] # Extra attributes for R03, R14, R18 values (content curated from the value definition string, see e.g. below) arv.extra # Raw NVS json data: arv.nvs .. code-block:: python :caption: Extra attributes (R03, R14, R18) from argopy import ArgoReferenceValue # For Values from R03 table arv = ArgoReferenceValue('BBP470') arv.extra arv.extra['Local_Attributes'].long_name arv.extra['Properties'].category # For Values from R14 table arv = ArgoReferenceValue('T000015') arv.extra arv.extra['Template_Values'].unit # For Values from R18 table arv = ArgoReferenceValue('CB00001') arv.extra arv.extra['Template_Values'].short_sensor_name .. code-block:: python :caption: Export methods from argopy import ArgoReferenceValue arv = ArgoReferenceValue('AANDERAA_OPTODE_3835') # Export to a dictionary: arv.to_dict() arv.to_dict(keys=['name', 'deprecated']) # Select attributes to export in dictionary keys # Export to json structure: arv.to_json() # In memory arv.to_json('reference_value.json') # To a json file arv.to_json('reference_value.json', keys=['name', 'deprecated']) # Select attributes to export """ __slots__ = Props.slots attrs: tuple[str] = Props.attrs """Public attributes""" keys: tuple[str] = Props.keys """Attributes used in exporting this reference value""" def __init_implicit( self, name: str | None = None, reference: str | None = None, **kwargs ) -> None: """Create instance with JSON fetched from NVS using name and reference""" self._from = "nvs" self.name = name reftable: list[str] | None = concept2vocabulary( name ) # Return vocabulary IDs with this concept if reftable is None: raise ValueError("Invalid Reference Value") if reference is not None: reference = check_vocabulary( reference ) # Return a table ID, whatever the input if reference not in reftable: raise ValueError( f"Reference Table '{reference}' not valid for the '{name}' Reference Value, should be one in: {reftable}" ) if reference is None: if len(reftable) > 1: raise ValueError( f"This Reference Value appears in more than one Reference Table: {reftable}. You must specified with the 'reference' argument which one to use." ) else: self.reference = reftable[0] else: self.reference = reference # eg 'R27' # Once we have a 'name' and a 'reference', we can load raw data from NVS self._nvs = self._nvs_store.load_concept(self.name, self.reference) def __init_explicit(self, data: Any) -> None: """Create instance with JSON data provided, typically using ArgoReferenceValue.from_dict()""" self._from = "json" self._nvs = data self.name = self.nvs["skos:altLabel"] if self.name == "" or self.name is None: self.name = urnparser(id2urn(self.nvs["@id"]))["termid"] self.reference = urnparser(self.nvs["dce:identifier"])[ "listid" ] # eg 'dce:identifier' = 'SDN:R27::UNKNOWN'
[docs] def __init__(self, name: str, reference: str | None = None, **kwargs) -> None: self._nvs_store : NVS = NVS(nvs=kwargs.get("nvs", OPTIONS["nvs"])) if kwargs.get("data", None) is None: self.__init_implicit(name=name, reference=reference) else: self.__init_explicit(data=kwargs.get("data")) # And populate all attributes: self.long_name = self.nvs["skos:prefLabel"]["@value"] self.definition = ( self.nvs["skos:definition"]["@value"] if isinstance(self.nvs["skos:definition"], dict) else self.nvs["skos:definition"] ) self.deprecated = True if self.nvs["owl:deprecated"] == "True" else False self.version = self.nvs["owl:versionInfo"] self.date = pd.to_datetime(self.nvs["dc:date"]) self.uri = self.nvs["@id"] self.urn = self.nvs["skos:notation"] self.parameter = Asset().load("vocabulary:mapping")["data"][ "Vocabulary2Parameter" ][self.reference] self._context = self.nvs.get("@context", None) self._extra = None if self.reference == "R03": self._extra = curate_r03definition(self.definition) if self.reference == "R14": self._extra = curate_r14definition(self.definition) if self.reference == "R18": self._extra = curate_r18definition(self.definition) # todo: support mapping (https://github.com/OneArgo/ArgoVocabs?tab=readme-ov-file#ivb-mappings) # Relation can be: # "narrower/broader" when there is a hierarchy between the subject and the object # "related" when the subject is related to the object without strict hierarchy # Eg: 'AANDERAA_OPTODE_3830' concept: # 'skos:related': {'@id': 'http://vocab.nerc.ac.uk/collection/R25/current/OPTODE_DOXY/'}, # 'skos:broader': {'@id': 'http://vocab.nerc.ac.uk/collection/R26/current/AANDERAA/'}, self.related = None if self.nvs.get("skos:related", None) is not None: self.related = to_list(self.nvs.get("skos:related", None)) self.broader = None if self.nvs.get("skos:broader", None) is not None: self.broader = to_list(self.nvs.get("skos:broader", None)) self.narrower = None if self.nvs.get("skos:narrower", None) is not None: self.narrower = to_list(self.nvs.get("skos:narrower", None)) self.sameas = None if self.nvs.get("owl:sameAs", None) is not None: self.sameas = to_list(self.nvs.get("owl:sameAs", None))
def __setattr__(self, attr, value): """Set attribute value, with read-only after instantiation policy for public attributes""" if attr in self.attrs and not caller_function().startswith("__init"): raise AttributeError(f"'{attr}' is read-only after instantiation.") ArgoReferenceValue.__dict__[attr].__set__(self, value) def __repr__(self): summary = [f"<argo.reference.table.value> '{self.name}'"] summary.append(f'long_name: "{self.long_name}"') summary.append(f'definition: "{self.definition}"') summary.append(f'urn: "{self.urn}"') summary.append(f"uri: {self.uri}") summary.append(f"version: {self.version} ({self.date})") summary.append(f'deprecated: {"True" if self.deprecated else "False"}') summary.append(f"reference/parameter: {self.reference}/{self.parameter}") nrel = sum( [ len(getattr(self, rel)) if getattr(self, rel, None) is not None else 0 for rel in ["broader", "narrower", "related", "sameas"] ] ) if nrel > 0: summary.append(f"relations[{nrel}]:") else: summary.append( f'relations[{ppliststr(["broader", "narrower", "related", "sameas"], last="or")}]: -' ) for relation in ["broader", "narrower", "related", "sameas"]: if getattr(self, relation, None) is not None: # list of items like: {"@id": "http://vocab.nerc.ac.uk/collection/R23/current/PROVOR_II/"} rels = getattr(self, relation) # Format the list as a list of items like "R23/PROVOR_II": urns = [urnparser(id2urn(r["@id"])) for r in rels] urns = [f'{u["listid"]}/{u["termid"]}' for u in urns] # Final print: if relation == "related": summary.append( f' - "{relation}" to {len(urns)} value{"s" if len(urns) > 1 else ""} : {ppliststr(urns)}' ) elif relation == "sameas": summary.append( f' - "{relation}" {len(urns)} value{"s" if len(urns) > 1 else ""} : {ppliststr(urns)}' ) else: summary.append( f' - {len(urns)} "{relation}" value{"s" if len(urns)>1 else ""}: {ppliststr(urns)}' ) if getattr(self, "context", None) is not None: keys = list(self.context.keys()) summary.append(f"context[{len(keys)}]: {ppliststr(keys)}") else: summary.append( f'context: {"(not loaded yet, use key indexing to load)" if self._from == "json" else "-"}' ) if getattr(self, "extra", None) is not None: keys = list(self.extra.keys()) summary.append(f"extra[{len(keys)}]:") for key in keys: summary.append(f' - "{key}": {self.extra[key]}') else: summary.append("extra: (no extra attributes from definition string)") return "\n".join(summary) def __str__(self): return f"ArgoReferenceValue(name='{self.name}', reference='{self.reference}', parameter='{self.parameter}')" def __getitem__(self, key): if key == "context": """'context' requires a special treatment because this is the only attribute that is not filled when the ArgoReferenceValue instance is created using json data from a Reference Table graph concept and the from_dict method, typically in this use-case: >>> val = ArgoReferenceTable('PLATFORM_FAMILY')['FLOAT_COASTAL'] This 'val' instance has no 'context' attribute. So, when we call on "val['context']" we need to trigger full NVS data fetching of the concept, which also update the internal nvs object. """ if self.nvs.get("@context", None) is None: # Update NVS data: self._nvs: dict[str, str] = self._nvs_store.load_concept( urnparser(self.urn)["termid"], self.reference ) # Fill in context attribute: self._context: str | None = self.nvs.get("@context", None) return getattr(self, "context") elif key in self.__slots__: return getattr(self, key) raise ValueError(f"Unknown attribute '{key}'") def _ipython_key_completions_(self): """Provide method for key-autocompletions in IPython.""" return [p for p in Props.keys] @property def nvs(self): return self._nvs @property def context(self): return self._context @property def extra(self): return self._extra
[docs] @classmethod def from_urn(cls, urn: str = None) -> "ArgoReferenceValue": urn = urnparser(urn) return cls(urn["termid"], reference=urn["listid"])
[docs] @classmethod def from_dict(cls, data: dict = None) -> "ArgoReferenceValue": """Create a :class:`ArgoReferenceValue` from a dictionary (JSON-like) Examples -------- .. code-block :: python :caption: Expected dictionary structure {'@id': 'http://vocab.nerc.ac.uk/collection/R27/current/UNKNOWN/', 'pav:authoredOn': '2019-10-11 14:49:00.0', 'pav:hasCurrentVersion': {'@id': 'http://vocab.nerc.ac.uk/collection/R27/current/UNKNOWN/1/'}, 'dce:identifier': 'SDN:R27::UNKNOWN', 'pav:version': '1', 'skos:notation': 'SDN:R27::UNKNOWN', 'skos:altLabel': 'UNKNOWN', 'dc:date': '2019-10-11 14:49:00.0', 'owl:versionInfo': '1', 'skos:prefLabel': {'@language': 'en', '@value': 'Unknown sensor model'}, 'dc:identifier': 'SDN:R27::UNKNOWN', 'skos:note': {'@language': 'en', '@value': 'accepted'}, 'owl:deprecated': 'false', 'void:inDataset': {'@id': 'http://vocab.nerc.ac.uk/.well-known/void'}, 'skos:definition': {'@language': 'en', '@value': 'Sensor model is unknown.'}, '@type': 'skos:Concept'} """ return cls("", data=data)
def to_dict(self, keys: list[str] | None = None) -> dict[str, Any]: """Export reference value attributes to a dictionary Parameters ---------- keys: list[str], optional, default = None List of attributes to output as keys in the dictionary. All by default if set to None. Returns ------- dict[str, Any] """ if keys is None: validated_keys = Props.keys else: validated_keys = [] for k in to_list(keys): if k not in Props.keys: raise OptionValueError( f"Invalid key name '{k}'. Valid values are: {ppliststr(Props.keys)}" ) validated_keys.append(k) d = {} for key in validated_keys: d.update({key: getattr(self, key)}) return d
[docs] def to_json( self, path: FilePath | None = None, keys: list[str] | None = None, **kwargs ): """Export to a JSON string or path Parameters ---------- path: str, path object, file-like object, or None, default None String, path object (implementing os.PathLike[str]), or file-like object implementing a write() function. If None, the result is returned as a string. keys: list[str], optional, default = None List of attributes to output as keys in the JSON structure. All by default if set to None. **kwargs All other arguments are passed to :class:`json.dumps` or :class:`json.dump` Returns ------- None or str If path is None, returns the resulting json format as a string. Otherwise, returns None. """ # Get data to export: data = self.to_dict(keys=keys) # Make sure we have an appropriate JSON encoder for pandas data types if kwargs.get("cls", None) is None: kwargs.update({"cls": Encoder}) # Export: if path is None: return json.dumps(data, **kwargs) else: if getattr(path, "write", None) is None: with open(Path(path), "w") as fp: return json.dump(data, fp, **kwargs) else: return json.dump(data, path, **kwargs)