Source code for argopy.stores.implementations.gdac
import logging
from typing import Union
from pathlib import Path
from fsspec.core import split_protocol
from urllib.parse import urlparse
from socket import gaierror
import fsspec
import os
from ...options import OPTIONS
from ...errors import GdacPathError
from ...utils.lists import shortcut2gdac
from .. import filestore, httpstore, ftpstore, s3store
log = logging.getLogger("argopy.stores.gdac")
[docs]
class gdacfs:
"""Argo file system for any GDAC path
Parameters
----------
path: str, optional
GDAC path to create a file system for. Support any possible GDAC protocol.
If not specified, value from the global option ``gdac`` will be used.
cache: bool (False)
cachedir: str (from OPTIONS)
**kwargs: (optional)
Other arguments are passed to :func:`fsspec.filesystem`
Returns
-------
A directory based file system based on :class:`argopy.stores.ArgoStoreProto`
Examples
--------
.. code-block:: python
:caption: Explicit GDAC stores
fs = gdacfs("https://data-argo.ifremer.fr")
fs = gdacfs("https://usgodae.org/pub/outgoing/argo")
fs = gdacfs("ftp://ftp.ifremer.fr/ifremer/argo")
fs = gdacfs("/home/ref-argo/gdac")
fs = gdacfs("s3://argo-gdac-sandbox/pub")
with argopy.set_options(gdac="s3://argo-gdac-sandbox/pub"):
fs = gdacfs()
.. code-block:: python
:caption: GDAC stores by shortcut name
fs = gdacfs("http") # "https" > https://data-argo.ifremer.fr
fs = gdacfs("us-http") # "us-https" > https://usgodae.org/pub/outgoing/argo
fs = gdacfs("ftp") # > ftp://ftp.ifremer.fr/ifremer/argo
fs = gdacfs("s3") # or "aws" > s3://argo-gdac-sandbox/pub
Warnings
--------
This class does not check if the path is a valid Argo GDAC
See Also
--------
:meth:`argopy.utils.check_gdac_path`, :meth:`argopy.utils.list_gdac_servers`, :meth:`argopy.utils.shortcut2gdac`
"""
protocol2fs = {"file": filestore, "http": httpstore, "ftp": ftpstore, "s3": s3store}
"""Dictionary mapping path protocol to Argo file system to instantiate"""
@staticmethod
def path2protocol(path: Union[str, Path]) -> str:
"""Narrow down any path to a supported protocol, raise GdacPathError if protocol not supported"""
if isinstance(path, Path):
return "file"
else:
split = split_protocol(path)[0]
if split is None:
return "file"
if "http" in split: # will also catch "https"
return "http"
elif "ftp" in split:
return "ftp"
elif "s3" in split:
return "s3"
else:
raise GdacPathError("Unknown protocol for an Argo GDAC host: %s" % split)
def __new__(cls, path: Union[str, Path, None] = None, cache: bool = False, cachedir: str = "", **kwargs):
"""Create a file system for any Argo GDAC compliant path"""
path = OPTIONS["gdac"] if path is None else shortcut2gdac(path)
protocol = cls.path2protocol(path)
cls.root = path
cls.target_protocol = protocol
fs = cls.protocol2fs[cls.target_protocol]
fs_args = {'cache': cache, 'cachedir': cachedir}
if protocol == "ftp":
ftp_host = urlparse(path).hostname
ftp_port = 0 if urlparse(path).port is None else urlparse(path).port
fs_args['host'] = ftp_host
fs_args['port'] = ftp_port
try:
fs = fs(**fs_args, **kwargs)
except gaierror as e:
raise GdacPathError(
"Can't get address info from FTP host: %s\nGAIerror: %s"
% (fs_args, str(e))
)
fs.fs = fsspec.filesystem("dir", fs=fs.fs, path=path)
return fs