import pandas as pd
import numpy as np
import warnings
from typing import Union
# from matplotlib.colors import to_hex
# from IPython.display import IFrame
from ..stores import httpstore
[docs]
class DOIrecord:
"""Meta-data holder for an Argo GDAC snapshot DOI record
This is a low-level class that is not intended to be instantiated directly.
Please use the :class:`ArgoDOI` instead.
Examples
--------
.. code-block:: python
:caption: API description
d = DOIrecord()
d = DOIrecord('42182')
d = DOIrecord('42182#103075')
d = DOIrecord(hashtag='103075')
d = DOIrecord(hashtag='103088')
d.doi
d.dx
d.isvalid
d.date
d.network
d.data
d.file
"""
root = ""
[docs]
def __init__(
self,
doi: str = "10.17882/42182",
hashtag: str = None,
fs: httpstore = None,
autoload: bool = True,
api_root: str = "https://www.seanoe.org/api/",
):
self.api_root = api_root
self._fs = fs # A httpstore will be created if necessary if self.load() is called
self._data = None
self._doi = doi
self._hashtag = hashtag
if "#" in doi:
self._doi = doi.split("#")[0]
self._hashtag = doi.split("#")[-1]
if autoload:
self.load()
@property
def doi(self) -> str:
"""DOI component (without hashtag)"""
return self._doi
@property
def hashtag(self) -> str:
"""Hashtag of the full doi"""
return self._hashtag
@property
def dx(self) -> str:
"""DOI url"""
return "https://dx.doi.org/%s" % str(self)
def isvalid(self) -> bool:
return "42182" in self.doi
@property
def data(self) -> dict:
"""DOI record meta-data holder
Trigger data (down)load if not available
"""
if self._data is None:
self.load()
return self._data
@property
def date(self) -> pd.Timestamp:
"""Date associated with the DOI record"""
return self.data["date"]
@property
def network(self) -> str:
"""Network of the Argo data pointed by the DOI
Returns
-------
str: 'core+BGC+deep' or 'BGC'
"""
return "BGC" if "BGC" in self.data["title"] else "core+BGC+deep"
@property
def file(self) -> list:
"""Return a pretty list of files properties associated with this DOI"""
results = []
for f in self.data["files"]:
r = {"openAccess": bool(f["openAccess"])}
if bool(f["openAccess"]):
r["path"] = f["fileUrl"]
else:
r["path"] = None
r["update"] = pd.to_datetime(f["lastUpdateDate"])
r["date"] = pd.to_datetime(f["fragment"]["date"])
r["size"] = f["size"]
r["network"] = "BGC" if "BGC" in f["fragment"]["title"] else "core+BGC+deep"
results.append(r)
return results
@property
def uri(self) -> str:
"""url to API call to retrieve DOI data"""
if self.hashtag is None:
url = "find-by-id/{id}".format
else:
url = "find-by-fragment/{id}?fragmentId={hashtag}".format
return self.api_root + url(id=self.doi.split("/")[-1], hashtag=self.hashtag)
def __str__(self):
# txt = "%s/%s" % (self.root, self.doi)
txt = "%s" % (self.doi)
if self.hashtag is not None:
txt = "%s#%s" % (txt, self._hashtag)
return txt
def _process_data(self, data: dict) -> dict:
"""Synthetic dict from data return by API"""
Nfiles = len(data["files"])
if Nfiles > 1:
# Sort files resources by date (most recent first)
data["files"].sort(
key=lambda x: x.get("fragment").get("date"), reverse=True
)
return {
"title": data["title"]["en"],
"date": pd.to_datetime(data["date"]),
"authors": data["authors"],
"files": data["files"],
"Nfiles": Nfiles,
# 'description': data['description'],
# 'keywords': data['keywords'],
# 'licenceUrl': data['licenceUrl'],
}
def load(self, cache: bool = False):
"""Load DOI record data from API call"""
if self._data is None:
if self._fs is None:
self._fs = httpstore(cache=cache)
data = self._fs.open_json(self.uri)
self._data = self._process_data(data)
return self
def from_dict(self, d: dict):
"""Load DOI record data from a dictionary"""
if (
"title" in d
and "en" in d["title"]
and "date" in d
and "authors" in d
and "files" in d
):
self._data = self._process_data(d)
return self
def search(self, **kwargs):
raise ValueError("")
def _repr_file(self, file, with_label=False) -> str:
"""Return a pretty string from a single file dict"""
def sizeof_fmt(num, suffix="B"):
for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"):
if abs(num) < 1024.0:
return f"{num:3.1f}{unit}{suffix}"
num /= 1024.0
return f"{num:.1f}Yi{suffix}"
summary = []
if with_label:
summary.append("%s" % file["label"]["en"])
if bool(file["openAccess"]):
summary.append("%s" % file["fileUrl"])
else:
summary.append("%s" % file["fileName"])
attrs = []
attrs.append("%s" % sizeof_fmt(file["size"]))
attrs.append("openAccess=%s" % file["openAccess"])
summary.append("(%s)" % (", ".join(attrs)))
return " ".join(summary)
def __repr__(self):
summary = ["<argopy.DOI.record>"]
summary.append("DOI: %s" % self.__str__())
if self._data is not None:
summary.append("Title: %s" % self.data["title"])
summary.append("Date: %s" % self.date.strftime("%Y-%m-%d"))
summary.append("Network: %s" % self.network)
summary.append("Link: %s" % self.dx)
if self.data["Nfiles"] == 1:
summary.append("File: %s" % self._repr_file(self.data["files"][0]))
else:
summary.append("File: %i files in total" % (self.data["Nfiles"]))
summary.append("Files for core+BGC+deep:")
ifound = 0
for ii, f in enumerate(self.data["files"]):
if "BGC" not in f["fragment"]["title"] and ifound < 10:
summary.append(
" - #%s %s"
% (f["id"], self._repr_file(f, with_label=True))
)
ifound += 1
summary.append("Files for BGC only:")
ifound = 0
for ii, f in enumerate(self.data["files"]):
if "BGC" in f["fragment"]["title"] and ifound < 10:
summary.append(
" - #%s %s"
% (f["id"], self._repr_file(f, with_label=True))
)
ifound += 1
return "\n".join(summary)
# @property
# def html(self) -> str:
# fs = 12
#
# def td_msg(bgcolor, txtcolor, txt):
# style = "background-color:%s;" % to_hex(bgcolor, keep_alpha=True)
# style += "border-width:0px;"
# style += "padding: 2px 2px 2px 0px;"
# style += "text-align:left;"
# style += "color:%s" % to_hex(txtcolor, keep_alpha=True)
# return "<td style='%s'>%s</td>" % (style, str(txt))
#
# def td_a(bgcolor, txtcolor, txt, link):
# style = "background-color:%s;" % to_hex(bgcolor, keep_alpha=True)
# style += "border-width:0px;"
# style += "padding: 2px 0px 2px 5px;"
# style += "text-align:right;"
# style += "color:%s" % to_hex(txtcolor, keep_alpha=True)
# return "<td style='%s'><a href='%s'>%s</a></td>" % (style, link, str(txt))
#
# td_empty = "<td style='border-width:0px;padding: 2px 5px 2px 5px;text-align:left'> </td>"
#
# html = []
# html.append(
# "<table style='border-collapse:collapse;border-spacing:0;font-size:%ipx'>"
# % fs
# )
# html.append("<tbody>")
#
# rows = []
#
# # 1st row:
# cols = []
# cols.append(td_msg("dimgray", "w", "doi: "))
# cols.append(td_msg("green", "w", "%s/" % self.root))
# cols.append(td_msg("yellowgreen", "w", self.doi))
# if self.hashtag is not None:
# cols.append(td_msg("darkorange", "w", "#%s" % self.hashtag))
# cols.append(td_a("white", "w", "β", self.dx))
# cols.append(td_empty)
# rows.append("<tr>%s</tr>" % "\n".join(cols))
#
# # # 2nd row (if data have been loaded):
# # if self._data is not None:
# # cols = []
# # cols.append(td_msg('dimgray', 'w', "Title: "))
# # cols.append(td_msg('white', 'w', "%s" % self.data['title']))
# # # cols.append(td_msg('yellowgreen', 'w', self.doi))
# # # if self.hashtag is not None:
# # # cols.append(td_msg("darkorange", 'w', "#%s" % self.hashtag))
# # # cols.append(td_a("white", 'w', "β", self.dx))
# # # cols.append(td_empty)
# # rows.append("<tr>%s</tr>" % "\n".join(cols))
#
# # print(rows)
# # # Fix colspan:
# # Nrows = np.max([len(r.split("<td ")) for r in rows])
# # print(Nrows)
# # rowss = []
# # for r in rows:
# # rowss.append(r.replace("<tr>", "<tr colspan='%i'>" % Nrows))
# # print(rowss)
#
# # Finalize
# html.append("\n".join(rows))
# html.append("</tbody>")
# html.append("</table>")
# html = "\n".join(html)
# return html
# def _repr_html_(self):
# return self.html
[docs]
class ArgoDOI:
"""Argo GDAC snapshot DOI access and discovery
Examples
--------
.. code-block:: python
:caption: Load DOI meta-data
from argopy import ArgoDOI
doi = ArgoDOI() # If you don't know where to start, just load the primary Argo DOI record
doi = ArgoDOI('95141') # To point directly to a snapshot ID
doi = ArgoDOI(hashtag='95141')
doi = ArgoDOI(fs=httpstore(cache=True))
.. code-block:: python
:caption: Searching for a specific DOI snapshot
# Return doi closest to a given date:
ArgoDOI().search('2020-02')
# Return doi closest to a given date for a specific network:
ArgoDOI().search('2020-02', network='BGC')
.. code-block:: python
:caption: Working with DOIs
doi = ArgoDOI('95141')
doi.download() # Trigger download of the DOI file
doi.file # Easy to read list of file(s) associated with a DOI record
doi.dx # http link toward the DOI snapshot webpage
"""
[docs]
def __init__(self,
hashtag=None,
fs=None,
cache=True):
self._fs = fs if isinstance(fs, httpstore) else httpstore(cache=cache)
if hashtag is not None and '42182#' in hashtag:
hashtag = hashtag.split('42182#')[-1]
self._doi = DOIrecord(hashtag=hashtag, fs=self._fs, autoload=True)
@property
def doi(self) -> str:
"""DOI component (without hashtag)"""
return str(self._doi)
def __repr__(self):
summary = self._doi.__repr__().split("\n")
summary[0] = '<argopy.DOI>'
return "\n".join(summary)
[docs]
def dates(self, network: str = None) -> dict:
"""Mapping of DOI snapshot hashtag(s) to their publication date(s)
Parameters
----------
network: str, optional
Allows to specify a network, like 'BGC'.
Returns
-------
dict
Dictionary where keys are DOI hashtag and values are publication dates as :class:`pandas.Timestamp`
"""
d = {}
network = self._doi.network if network is None else network
if network == "BGC":
for f in self._doi.data["files"]:
if "BGC" in f["fragment"]["title"]:
d.update({int(f["id"]): pd.to_datetime(f["fragment"]["date"])})
else:
for f in self._doi.data["files"]:
if "BGC" not in f["fragment"]["title"]:
d.update({int(f["id"]): pd.to_datetime(f["fragment"]["date"])})
return d
[docs]
def search(self, date: Union[str, pd.Timestamp], network: str = None) -> DOIrecord:
"""Search the DOI record the closest to a given date
Parameters
----------
date: str, :class:`pandas.Timestamp`
Date to search a DOI for
network: str, optional
Allows to specify a network, like 'BGC'
Returns
-------
:class:`argopy.related.doi_snapshot.DOIrecord`
"""
dates = self.dates(network=network)
target = pd.to_datetime(date, utc=True)
close = list(dates.values())[
np.argmin(np.abs([target - dates[d] for d in dates]))
]
found = [d for d in dates if dates[d] == close]
results = []
if len(found) > 0:
for f in found:
results.append(DOIrecord(hashtag=f, fs=self._fs))
if len(results) == 1:
if (close - target).days > 30:
warnings.warn(
"This snapshot is more than 30 days off your search dates !"
)
return results[0]
else:
return results
@property
def file(self) -> list:
"""DOI tar.gz file properties"""
return self._doi.file
@property
def dx(self) -> str:
"""DOI url"""
return self._doi.dx
[docs]
def download(self):
"""Trigger download of a DOI tar.gz file
This will simply make the web browser to open the DOI file.
"""
flist = self.file
if len(flist) > 1:
warnings.warn("For safety reasons, we don't trigger download of a DOI when it has more than one file. This is probably happening because you did not specified a hashtag to your ArgoDOI instance.")
else:
import webbrowser
webbrowser.open_new(self.file[0]['path'])