Source code for argopy.related.reference_tables

import pandas as pd
from functools import lru_cache
import collections
from ..stores import httpstore
from ..options import OPTIONS


[docs]class ArgoNVSReferenceTables: """Argo Reference Tables Utility function to retrieve Argo Reference Tables from a NVS server. By default, this relies on: https://vocab.nerc.ac.uk/collection Examples -------- Methods: >>> R = ArgoNVSReferenceTables() >>> R.search('sensor') >>> R.tbl(3) >>> R.tbl('R09') Properties: >>> R.all_tbl_name >>> R.all_tbl >>> R.valid_ref """ valid_ref = [ "R01", "RR2", "RD2", "RP2", "R03", "R04", "R05", "R06", "R07", "R08", "R09", "R10", "R11", "R12", "R13", "R15", "RMC", "RTV", "R16", "R18", "R19", "R20", "R21", "R22", "R23", "R24", "R25", "R26", "R27", "R28", "R29", "R30", "R40", ] """List of all available Reference Tables"""
[docs] def __init__( self, nvs="https://vocab.nerc.ac.uk/collection", cache: bool = True, cachedir: str = "", ): """Argo Reference Tables from NVS""" cachedir = OPTIONS["cachedir"] if cachedir == "" else cachedir self.fs = httpstore(cache=cache, cachedir=cachedir) self.nvs = nvs
def _valid_ref(self, rtid): if rtid not in self.valid_ref: rtid = "R%0.2d" % rtid if rtid not in self.valid_ref: raise ValueError( "Invalid Argo Reference Table, should be one in: %s" % ", ".join(self.valid_ref) ) return rtid def _jsConcept2df(self, data): """Return all skos:Concept as class:`pandas.DataFrame`""" content = { "altLabel": [], "prefLabel": [], "definition": [], "deprecated": [], "id": [], } for k in data["@graph"]: if k["@type"] == "skos:Collection": Collection_name = k["alternative"] elif k["@type"] == "skos:Concept": content["altLabel"].append(k["altLabel"]) content["prefLabel"].append(k["prefLabel"]["@value"]) content["definition"].append(k["definition"]["@value"]) content["deprecated"].append(k["deprecated"]) content["id"].append(k["@id"]) df = pd.DataFrame.from_dict(content) df.name = Collection_name return df def _jsCollection(self, data): """Return last skos:Collection information as data""" for k in data["@graph"]: if k["@type"] == "skos:Collection": name = k["alternative"] desc = k["description"] rtid = k["@id"] return (name, desc, rtid) def get_url(self, rtid, fmt="ld+json"): """Return URL toward a given reference table for a given format Parameters ---------- rtid: {str, int} Name or number of the reference table to retrieve. Eg: 'R01', 12 fmt: str, default: "ld+json" Format of the NVS server response. Can be: "ld+json", "rdf+xml" or "text/turtle". Returns ------- str """ rtid = self._valid_ref(rtid) if fmt == "ld+json": fmt_ext = "?_profile=nvs&_mediatype=application/ld+json" elif fmt == "rdf+xml": fmt_ext = "?_profile=nvs&_mediatype=application/rdf+xml" elif fmt == "text/turtle": fmt_ext = "?_profile=nvs&_mediatype=text/turtle" else: raise ValueError( "Invalid format. Must be in: 'ld+json', 'rdf+xml' or 'text/turtle'." ) url = "{}/{}/current/{}".format return url(self.nvs, rtid, fmt_ext)
[docs] @lru_cache def tbl(self, rtid): """Return an Argo Reference table Parameters ---------- rtid: {str, int} Name or number of the reference table to retrieve. Eg: 'R01', 12 Returns ------- class:`pandas.DataFrame` """ rtid = self._valid_ref(rtid) js = self.fs.open_json(self.get_url(rtid)) df = self._jsConcept2df(js) return df
[docs] def tbl_name(self, rtid): """Return name of an Argo Reference table Parameters ---------- rtid: {str, int} Name or number of the reference table to retrieve. Eg: 'R01', 12 Returns ------- tuple('short name', 'description', 'NVS id link') """ rtid = self._valid_ref(rtid) js = self.fs.open_json(self.get_url(rtid)) return self._jsCollection(js)
[docs] def search(self, txt, where="all"): """Search for string in tables title and/or description Parameters ---------- txt: str where: str, default='all' Where to search, can be: 'title', 'description', 'all' Returns ------- list of table id matching the search """ results = [] for tbl_id in self.all_tbl_name: title = self.tbl_name(tbl_id)[0] description = self.tbl_name(tbl_id)[1] if where == "title": if txt.lower() in title.lower(): results.append(tbl_id) elif where == "description": if txt.lower() in description.lower(): results.append(tbl_id) elif where == "all": if txt.lower() in description.lower() or txt.lower() in title.lower(): results.append(tbl_id) return results
@property def all_tbl(self): """Return all Argo Reference tables Returns ------- OrderedDict Dictionary with all table short names as key and table content as class:`pandas.DataFrame` """ URLs = [self.get_url(rtid) for rtid in self.valid_ref] df_list = self.fs.open_mfjson(URLs, preprocess=self._jsConcept2df) all_tables = {} [all_tables.update({t.name: t}) for t in df_list] all_tables = collections.OrderedDict(sorted(all_tables.items())) return all_tables @property def all_tbl_name(self): """Return names of all Argo Reference tables Returns ------- OrderedDict Dictionary with all table short names as key and table names as tuple('short name', 'description', 'NVS id link') """ URLs = [self.get_url(rtid) for rtid in self.valid_ref] name_list = self.fs.open_mfjson(URLs, preprocess=self._jsCollection) all_tables = {} [ all_tables.update({rtid.split("/")[-3]: (name, desc, rtid)}) for name, desc, rtid in name_list ] all_tables = collections.OrderedDict(sorted(all_tables.items())) return all_tables