Source code for argopy.xarray

import sys
import warnings

import numpy as np
import pandas as pd
import xarray as xr
from sklearn import preprocessing

try:
    import gsw

    with_gsw = True
except ModuleNotFoundError:
    with_gsw = False

from argopy.utilities import linear_interpolation_remap
from argopy.errors import InvalidDatasetStructure


[docs]@xr.register_dataset_accessor("argo") class ArgoAccessor: """ Class registered under scope ``argo`` to access a :class:`xarray.Dataset` object. - Ensure all variables are of the Argo required dtype with: ds.argo.cast_types() - Convert a collection of points into a collection of profiles: ds.argo.point2profile() - Convert a collection of profiles to a collection of points: ds.argo.profile2point() """ def __init__(self, xarray_obj): """ Init """ self._obj = xarray_obj self._added = list() # Will record all new variables added by argo # self._register = collections.OrderedDict() # Will register mutable instances of sub-modules like 'plot' # Variables present in the initial dataset self._vars = list(xarray_obj.variables.keys()) # Store the initial list of dimensions self._dims = list(xarray_obj.dims.keys()) self.encoding = xarray_obj.encoding self.attrs = xarray_obj.attrs if "N_PROF" in self._dims: self._type = "profile" elif "N_POINTS" in self._dims: self._type = "point" else: raise InvalidDatasetStructure("Argo dataset structure not recognised") if "PRES_ADJUSTED" in self._vars: self._mode = "expert" elif "PRES" in self._vars: self._mode = "standard" else: raise InvalidDatasetStructure("Argo dataset structure not recognised") def _add_history(self, txt): if "history" in self._obj.attrs: self._obj.attrs["history"] += "; %s" % txt else: self._obj.attrs["history"] = txt
[docs] def cast_types(self): # noqa: C901 """ Make sure variables are of the appropriate types This is hard coded, but should be retrieved from an API somewhere Should be able to handle all possible variables encountered in the Argo dataset """ ds = self._obj list_str = [ "PLATFORM_NUMBER", "DATA_MODE", "DIRECTION", "DATA_CENTRE", "DATA_TYPE", "FORMAT_VERSION", "HANDBOOK_VERSION", "PROJECT_NAME", "PI_NAME", "STATION_PARAMETERS", "DATA_CENTER", "DC_REFERENCE", "DATA_STATE_INDICATOR", "PLATFORM_TYPE", "FIRMWARE_VERSION", "POSITIONING_SYSTEM", "PROFILE_PRES_QC", "PROFILE_PSAL_QC", "PROFILE_TEMP_QC", "PARAMETER", "SCIENTIFIC_CALIB_EQUATION", "SCIENTIFIC_CALIB_COEFFICIENT", "SCIENTIFIC_CALIB_COMMENT", "HISTORY_INSTITUTION", "HISTORY_STEP", "HISTORY_SOFTWARE", "HISTORY_SOFTWARE_RELEASE", "HISTORY_REFERENCE", "HISTORY_QCTEST", "HISTORY_ACTION", "HISTORY_PARAMETER", "VERTICAL_SAMPLING_SCHEME", "FLOAT_SERIAL_NO", ] list_int = [ "PLATFORM_NUMBER", "WMO_INST_TYPE", "WMO_INST_TYPE", "CYCLE_NUMBER", "CONFIG_MISSION_NUMBER", ] list_datetime = [ "REFERENCE_DATE_TIME", "DATE_CREATION", "DATE_UPDATE", "JULD", "JULD_LOCATION", "SCIENTIFIC_CALIB_DATE", "HISTORY_DATE", ] def cast_this(da, type): """ Low-level casting of DataArray values """ try: da.values = da.values.astype(type) da.attrs["casted"] = 1 except Exception: print("Oops!", sys.exc_info()[0], "occurred.") print("Fail to cast: ", da.dtype, "into:", type, "for: ", da.name) print("Encountered unique values:", np.unique(da)) return da def cast_this_da(da): """ Cast any DataArray """ da.attrs["casted"] = 0 if v in list_str and da.dtype == "O": # Object da = cast_this(da, str) if v in list_int: # and da.dtype == 'O': # Object da = cast_this(da, int) if v in list_datetime and da.dtype == "O": # Object if ( "conventions" in da.attrs and da.attrs["conventions"] == "YYYYMMDDHHMISS" ): if da.size != 0: if len(da.dims) <= 1: val = da.astype(str).values.astype("U14") # This should not happen, but still ! That's real world data val[val == " "] = "nan" da.values = pd.to_datetime(val, format="%Y%m%d%H%M%S") else: s = da.stack(dummy_index=da.dims) val = s.astype(str).values.astype("U14") # This should not happen, but still ! That's real world data val[val == " "] = "nan" s.values = pd.to_datetime(val, format="%Y%m%d%H%M%S") da.values = s.unstack("dummy_index") da = cast_this(da, np.datetime64) else: da = cast_this(da, np.datetime64) elif v == "SCIENTIFIC_CALIB_DATE": da = cast_this(da, str) s = da.stack(dummy_index=da.dims) s.values = pd.to_datetime(s.values, format="%Y%m%d%H%M%S") da.values = (s.unstack("dummy_index")).values da = cast_this(da, np.datetime64) if "QC" in v and "PROFILE" not in v and "QCTEST" not in v: if da.dtype == "O": # convert object to string da = cast_this(da, str) # Address weird string values: # (replace missing or nan values by a '0' that will be cast as an integer later if da.dtype == "<U3": # string, len 3 because of a 'nan' somewhere ii = ( da == " " ) # This should not happen, but still ! That's real world data da = xr.where(ii, "0", da) ii = ( da == "nan" ) # This should not happen, but still ! That's real world data da = xr.where(ii, "0", da) # Get back to regular U1 string da = cast_this(da, np.dtype("U1")) if da.dtype == "<U1": # string ii = ( da == "" ) # This should not happen, but still ! That's real world data da = xr.where(ii, "0", da) ii = ( da == " " ) # This should not happen, but still ! That's real world data da = xr.where(ii, "0", da) ii = ( da == "n" ) # This should not happen, but still ! That's real world data da = xr.where(ii, "0", da) # finally convert QC strings to integers: da = cast_this(da, int) if da.dtype != "O": da.attrs["casted"] = 1 return da for v in ds.data_vars: try: ds[v] = cast_this_da(ds[v]) except Exception: print("Oops!", sys.exc_info()[0], "occurred.") print("Fail to cast: %s " % v) print("Encountered unique values:", np.unique(ds[v])) raise return ds
[docs] def filter_data_mode(self, keep_error: bool = True, errors: str = "raise"): # noqa: C901 """ Filter variables according to their data mode This applies to <PARAM> and <PARAM_QC> For data mode 'R' and 'A': keep <PARAM> (eg: 'PRES', 'TEMP' and 'PSAL') For data mode 'D': keep <PARAM_ADJUSTED> (eg: 'PRES_ADJUSTED', 'TEMP_ADJUSTED' and 'PSAL_ADJUSTED') Parameters ---------- keep_error: bool, optional If true (default) keep the measurements error fields or not. errors: {'raise','ignore'}, optional If 'raise' (default), raises a InvalidDatasetStructure error if any of the expected dataset variables is not found. If 'ignore', fails silently and return unmodified dataset. Returns ------- :class:`xarray.Dataset` """ if self._type != "point": raise InvalidDatasetStructure( "Method only available to a collection of points" ) ######### # Sub-functions ######### def safe_where_eq(xds, key, value): # xds.where(xds[key] == value, drop=True) is not safe to empty time variables, cf issue #64 try: return xds.where(xds[key] == value, drop=True) except ValueError as v: if v.args[0] == ("zero-size array to reduction operation " "minimum which has no identity"): # A bug in xarray will cause a ValueError if trying to # decode the times in a NetCDF file with length 0. # See: # https://github.com/pydata/xarray/issues/1329 # https://github.com/euroargodev/argopy/issues/64 # Here, we just need to return an empty array TIME = xds['TIME'] xds = xds.drop_vars('TIME') xds = xds.where(xds[key] == value, drop=True) xds['TIME'] = xr.DataArray(np.arange(len(xds['N_POINTS'])), dims='N_POINTS', attrs=TIME.attrs).astype(np.datetime64) xds = xds.set_coords('TIME') return xds def ds_split_datamode(xds): """ Create one dataset for each of the data_mode Split full dataset into 3 datasets """ # Real-time: argo_r = safe_where_eq(xds, 'DATA_MODE', 'R') for v in plist: vname = v.upper() + "_ADJUSTED" if vname in argo_r: argo_r = argo_r.drop_vars(vname) vname = v.upper() + "_ADJUSTED_QC" if vname in argo_r: argo_r = argo_r.drop_vars(vname) vname = v.upper() + "_ADJUSTED_ERROR" if vname in argo_r: argo_r = argo_r.drop_vars(vname) # Real-time adjusted: argo_a = safe_where_eq(xds, 'DATA_MODE', 'A') for v in plist: vname = v.upper() if vname in argo_a: argo_a = argo_a.drop_vars(vname) vname = v.upper() + "_QC" if vname in argo_a: argo_a = argo_a.drop_vars(vname) # Delayed mode: argo_d = safe_where_eq(xds, 'DATA_MODE', 'D') return argo_r, argo_a, argo_d def fill_adjusted_nan(ds, vname): """Fill in the adjusted field with the non-adjusted wherever it is NaN Ensure to have values even for bad QC data in delayed mode """ ii = ds.where(np.isnan(ds[vname + "_ADJUSTED"]), drop=1)["N_POINTS"] ds[vname + "_ADJUSTED"].loc[dict(N_POINTS=ii)] = ds[vname].loc[ dict(N_POINTS=ii) ] return ds def merge_arrays(this_argo_r, this_argo_a, this_argo_d, this_vname): """ Merge one variable from 3 DataArrays Based on xarray merge function with ’no_conflicts’: only values which are not null in all datasets must be equal. The returned dataset then contains the combination of all non-null values. Return a xarray.DataArray """ def merge_this(a1, a2, a3): return xr.merge((xr.merge((a1, a2)), a3)) DA = merge_this( this_argo_r[this_vname], this_argo_a[this_vname + "_ADJUSTED"].rename(this_vname), this_argo_d[this_vname + "_ADJUSTED"].rename(this_vname), ) DA_QC = merge_this( this_argo_r[this_vname + "_QC"], this_argo_a[this_vname + "_ADJUSTED_QC"].rename(this_vname + "_QC"), this_argo_d[this_vname + "_ADJUSTED_QC"].rename(this_vname + "_QC"), ) if keep_error: DA_ERROR = xr.merge(( this_argo_a[this_vname + "_ADJUSTED_ERROR"].rename(this_vname + "_ERROR"), this_argo_d[this_vname + "_ADJUSTED_ERROR"].rename(this_vname + "_ERROR"), )) DA = merge_this(DA, DA_QC, DA_ERROR) else: DA = xr.merge((DA, DA_QC)) return DA ######### # filter ######### ds = self._obj if "DATA_MODE" not in ds: if errors: raise InvalidDatasetStructure( "Method only available for dataset with a 'DATA_MODE' variable " ) else: # todo should raise a warning instead ? return ds # Define variables to filter: possible_list = [ "PRES", "TEMP", "PSAL", "DOXY", "CHLA", "BBP532", "BBP700", "DOWNWELLING_PAR", "DOWN_IRRADIANCE380", "DOWN_IRRADIANCE412", "DOWN_IRRADIANCE490", ] plist = [p for p in possible_list if p in ds.data_vars] # Create one dataset for each of the data_mode: argo_r, argo_a, argo_d = ds_split_datamode(ds) # Fill in the adjusted field with the non-adjusted wherever it is NaN for v in plist: argo_d = fill_adjusted_nan(argo_d, v.upper()) # Drop QC fields in delayed mode dataset: for v in plist: vname = v.upper() if vname in argo_d: argo_d = argo_d.drop_vars(vname) vname = v.upper() + "_QC" if vname in argo_d: argo_d = argo_d.drop_vars(vname) # Create new arrays with the appropriate variables: vlist = [merge_arrays(argo_r, argo_a, argo_d, v) for v in plist] # Create final dataset by merging all available variables final = xr.merge(vlist) # Merge with all other variables: other_variables = list( set([v for v in list(ds.data_vars) if "ADJUSTED" not in v]) - set(list(final.data_vars)) ) # other_variables.remove('DATA_MODE') # Not necessary anymore for p in other_variables: final = xr.merge((final, ds[p])) final.attrs = ds.attrs final.argo._add_history("Variables filtered according to DATA_MODE") final = final[np.sort(final.data_vars)] # Cast data types and add attributes: final = final.argo.cast_types() return final
[docs] def filter_qc(self, QC_list=[1, 2], drop=True, mode="all", mask=False): # noqa: C901 """ Filter data set according to QC values Mask the dataset for points where 'all' or 'any' of the QC fields has a value in the list of integer QC flags. This method can return the filtered dataset or the filter mask. """ if self._type != "point": raise InvalidDatasetStructure( "Method only available to a collection of points" ) if mode not in ["all", "any"]: raise ValueError("Mode must 'all' or 'any'") this = self._obj # Extract QC fields: QC_fields = [] for v in this.data_vars: if "QC" in v and "PROFILE" not in v: QC_fields.append(v) QC_fields = this[QC_fields] for v in QC_fields.data_vars: QC_fields[v] = QC_fields[v].astype(int) # Now apply filter this_mask = xr.DataArray( np.zeros_like(QC_fields["N_POINTS"]), dims=["N_POINTS"], coords={"N_POINTS": QC_fields["N_POINTS"]}, ) for v in QC_fields.data_vars: for qc in QC_list: this_mask += QC_fields[v] == qc if mode == "all": this_mask = this_mask == len(QC_fields) # all else: this_mask = this_mask >= 1 # any if not mask: this = this.where(this_mask, drop=drop) for v in this.data_vars: if "QC" in v and "PROFILE" not in v: this[v] = this[v].astype(int) this.argo._add_history("Variables selected according to QC") this = this.argo.cast_types() return this else: return this_mask
[docs] def uid(self, wmo_or_uid, cyc=None, direction=None): """ UID encoder/decoder Parameters ---------- int WMO number (to encode) or UID (to decode) cyc: int, optional Cycle number (to encode), not used to decode direction: str, optional Direction of the profile, must be 'A' (Ascending) or 'D' (Descending) Returns ------- int or tuple of int Examples -------- unique_float_profile_id = uid(690024,13,'A') # Encode wmo, cyc, drc = uid(unique_float_profile_id) # Decode """ le = preprocessing.LabelEncoder() le.fit(["A", "D"]) def encode_direction(x): y = 1 - le.transform(x) return np.where(y == 0, -1, y) def decode_direction(x): y = 1 - np.where(x == -1, 0, x) return le.inverse_transform(y) offset = 1e5 if cyc is not None: # ENCODER if direction is not None: return ( encode_direction(direction) * np.vectorize(int)(offset * wmo_or_uid + cyc).ravel() ) else: return np.vectorize(int)(offset * wmo_or_uid + cyc).ravel() else: # DECODER drc = decode_direction(np.sign(wmo_or_uid)) wmo = np.vectorize(int)(np.abs(wmo_or_uid) / offset) cyc = -np.vectorize(int)(offset * wmo - np.abs(wmo_or_uid)) return wmo, cyc, drc
[docs] def point2profile(self): # noqa: C901 """ Transform a collection of points into a collection of profiles """ if self._type != "point": raise InvalidDatasetStructure( "Method only available to a collection of points" ) this = self._obj # Should not be modified def fillvalue(da): """ Return fillvalue for a dataarray """ # https://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.kind.html#numpy.dtype.kind if da.dtype.kind in ["U"]: fillvalue = " " elif da.dtype.kind == "i": fillvalue = 99999 elif da.dtype.kind == "M": fillvalue = np.datetime64("NaT") else: fillvalue = np.nan return fillvalue # Find the number of profiles (N_PROF) and vertical levels (N_LEVELS): dummy_argo_uid = xr.DataArray( self.uid( this["PLATFORM_NUMBER"].values, this["CYCLE_NUMBER"].values, this["DIRECTION"].values, ), dims="N_POINTS", coords={"N_POINTS": this["N_POINTS"]}, name="dummy_argo_uid", ) N_PROF = len(np.unique(dummy_argo_uid)) # that = this.groupby(dummy_argo_uid) N_LEVELS = int( xr.DataArray( np.ones_like(this["N_POINTS"].values), dims="N_POINTS", coords={"N_POINTS": this["N_POINTS"]}, ) .groupby(dummy_argo_uid) .sum() .max() .values ) assert N_PROF * N_LEVELS >= len(this["N_POINTS"]) # Store the initial set of coordinates: coords_list = list(this.coords) this = this.reset_coords() # For each variables, determine if it has unique value by profile, # if yes: the transformed variable should be [N_PROF] # if no: the transformed variable should be [N_PROF, N_LEVELS] count = np.zeros((N_PROF, len(this.data_vars)), "int") for i_prof, grp in enumerate(this.groupby(dummy_argo_uid)): i_uid, prof = grp for iv, vname in enumerate(this.data_vars): count[i_prof, iv] = len(np.unique(prof[vname])) # Variables with a unique value for each profiles: list_1d = list(np.array(this.data_vars)[count.sum(axis=0) == count.shape[0]]) # Variables with more than 1 value for each profiles: list_2d = list(np.array(this.data_vars)[count.sum(axis=0) != count.shape[0]]) # Create new empty dataset: new_ds = [] for vname in list_2d: new_ds.append( xr.DataArray( np.full( (N_PROF, N_LEVELS), fillvalue(this[vname]), dtype=this[vname].dtype, ), dims=["N_PROF", "N_LEVELS"], coords={ "N_PROF": np.arange(N_PROF), "N_LEVELS": np.arange(N_LEVELS), }, attrs=this[vname].attrs, name=vname, ) ) for vname in list_1d: new_ds.append( xr.DataArray( np.full((N_PROF,), fillvalue(this[vname]), dtype=this[vname].dtype), dims=["N_PROF"], coords={"N_PROF": np.arange(N_PROF)}, attrs=this[vname].attrs, name=vname, ) ) new_ds = xr.merge(new_ds) # Now fill in each profile values: for i_prof, grp in enumerate(this.groupby(dummy_argo_uid)): i_uid, prof = grp for iv, vname in enumerate(this.data_vars): # ['N_PROF', 'N_LEVELS'] array: if len(new_ds[vname].dims) == 2: y = new_ds[vname].values x = prof[vname].values try: y[i_prof, 0: len(x)] = x except Exception: print(vname, "input", x.shape, "output", y[i_prof, :].shape) raise new_ds[vname].values = y else: # ['N_PROF', ] array: y = new_ds[vname].values x = prof[vname].values y[i_prof] = np.unique(x)[0] # Restore coordinate variables: new_ds = new_ds.set_coords([c for c in coords_list if c in new_ds]) # Misc formating new_ds = new_ds.sortby("TIME") new_ds = new_ds.argo.cast_types() new_ds = new_ds[np.sort(new_ds.data_vars)] new_ds.encoding = self.encoding # Preserve low-level encoding information new_ds.attrs = self.attrs # Preserve original attributes new_ds.argo._add_history("Transformed with point2profile") new_ds.argo._type = "profile" return new_ds
[docs] def profile2point(self): """ Convert a collection of profiles to a collection of points """ if self._type != "profile": raise InvalidDatasetStructure( "Method only available for a collection of profiles (N_PROF dimemsion)" ) ds = self._obj # Remove all variables for which a dimension is length=0 (eg: N_HISTORY) dim_list = [] for v in ds.data_vars: dims = ds[v].dims for d in dims: if len(ds[d]) == 0: dim_list.append(d) break # Drop dimensions and associated variables from this dataset ds = ds.drop_dims(np.unique(dim_list)) # Remove any variable that is not with dimensions (N_PROF,) or (N_PROF, N_LEVELS) for v in ds: dims = list(ds[v].dims) dims = ".".join(dims) if dims not in ["N_PROF", "N_PROF.N_LEVELS"]: ds = ds.drop_vars(v) (ds,) = xr.broadcast(ds) ds = ds.stack({"N_POINTS": list(ds.dims)}) ds = ds.reset_index("N_POINTS").drop_vars(["N_PROF", "N_LEVELS"]) possible_coords = ["LATITUDE", "LONGITUDE", "TIME", "JULD", "N_POINTS"] for c in [c for c in possible_coords if c in ds.data_vars]: ds = ds.set_coords(c) # Remove index without data (useless points) ds = ds.where(~np.isnan(ds["PRES"]), drop=1) ds = ds.sortby("TIME") ds["N_POINTS"] = np.arange(0, len(ds["N_POINTS"])) ds = ds.argo.cast_types() ds = ds[np.sort(ds.data_vars)] ds.encoding = self.encoding # Preserve low-level encoding information ds.attrs = self.attrs # Preserve original attributes ds.argo._add_history("Transformed with profile2point") ds.argo._type = "point" return ds
[docs] def interp_std_levels(self, std_lev): """ Returns a new dataset interpolated to new inputs levels Parameters ---------- list or np.array Standard levels used for interpolation Returns ------- :class:`xarray.Dataset` """ if (type(std_lev) is np.ndarray) | (type(std_lev) is list): std_lev = np.array(std_lev) if (np.any(sorted(std_lev) != std_lev)) | (np.any(std_lev < 0)): raise ValueError( "Standard levels must be a list or a numpy array of positive and sorted values" ) else: raise ValueError( "Standard levels must be a list or a numpy array of positive and sorted values" ) if self._type != "profile": raise InvalidDatasetStructure( "Method only available for a collection of profiles" ) ds = self._obj # Selecting profiles that have a max(pressure) > max(std_lev) to avoid extrapolation in that direction # For levels < min(pressure), first level values of the profile are extended to surface. i1 = ds["PRES"].max("N_LEVELS") >= std_lev[-1] dsp = ds.where(i1, drop=True) # check if any profile is left, ie if any profile match the requested depth if len(dsp["N_PROF"]) == 0: raise Warning( "None of the profiles can be interpolated (not reaching the requested depth range)." ) return None # add new vertical dimensions, this has to be in the datasets to apply ufunc later dsp["Z_LEVELS"] = xr.DataArray(std_lev, dims={"Z_LEVELS": std_lev}) # init ds_out = xr.Dataset() # vars to interpolate datavars = [ dv for dv in list(dsp.variables) if set(["N_LEVELS", "N_PROF"]) == set(dsp[dv].dims) and "QC" not in dv and "ERROR" not in dv ] # coords coords = [dv for dv in list(dsp.coords)] # vars depending on N_PROF only solovars = [ dv for dv in list(dsp.variables) if dv not in datavars and dv not in coords and "QC" not in dv and "ERROR" not in dv ] for dv in datavars: ds_out[dv] = linear_interpolation_remap( dsp.PRES, dsp[dv], dsp["Z_LEVELS"], z_dim="N_LEVELS", z_regridded_dim="Z_LEVELS", ) ds_out = ds_out.rename({"remapped": "PRES_INTERPOLATED"}) for sv in solovars: ds_out[sv] = dsp[sv] for co in coords: ds_out.coords[co] = dsp[co] ds_out = ds_out.drop_vars(["N_LEVELS", "Z_LEVELS"]) ds_out = ds_out[np.sort(ds_out.data_vars)] ds_out.attrs = self.attrs # Preserve original attributes ds_out.argo._add_history("Interpolated on standard levels") return ds_out
[docs] def teos10( # noqa: C901 self, vlist: list = ["SA", "CT", "SIG0", "N2", "PV", "PTEMP"], inplace: bool = True, ): """ Add TEOS10 variables to the dataset By default, adds: 'SA', 'CT' Other possible variables: 'SIG0', 'N2', 'PV', 'PTEMP', 'SOUND_SPEED' Relies on the gsw library. If one exists, the correct CF standard name will be added to the attrs. Parameters ---------- vlist: list(str) List with the name of variables to add. Must be a list containing one or more of the following string values: * `"SA"` Adds an absolute salinity variable * `"CT"` Adds a conservative temperature variable * `"SIG0"` Adds a potential density anomaly variable referenced to 0 dbar * `"N2"` Adds a buoyancy (Brunt-Vaisala) frequency squared variable. This variable has been regridded to the original pressure levels in the Dataset using a linear interpolation. * `"PV"` Adds a planetary vorticity variable calculated from :math:`\\frac{f N^2}{\\text{gravity}}`. This is not a TEOS-10 variable from the gsw toolbox, but is provided for convenience. This variable has been regridded to the original pressure levels in the Dataset using a linear interpolation. * `"PTEMP"` Adds a potential temperature variable * `"SOUND_SPEED"` Adds a sound speed variable inplace: boolean, True by default If True, return the input :class:`xarray.Dataset` with new TEOS10 variables added as a new :class:`xarray.DataArray`. If False, return a :class:`xarray.Dataset` with new TEOS10 variables Returns ------- :class:`xarray.Dataset` """ if not with_gsw: raise ModuleNotFoundError("This functionality requires the gsw library") allowed = ['SA', 'CT', 'SIG0', 'N2', 'PV', 'PTEMP', 'SOUND_SPEED'] if any(var not in allowed for var in vlist): raise ValueError(f"vlist must be a subset of {allowed}, instead found {vlist}") warnings.warn("Default variables will be reduced to 'SA' and 'CT' in 0.1.9", category=FutureWarning) this = self._obj to_profile = False if self._type == "profile": to_profile = True this = this.argo.profile2point() # Get base variables as numpy arrays: psal = this['PSAL'].values temp = this['TEMP'].values pres = this['PRES'].values lon = this['LONGITUDE'].values lat = this['LATITUDE'].values # Coriolis f = gsw.f(lat) # Absolute salinity sa = gsw.SA_from_SP(psal, pres, lon, lat) # Conservative temperature ct = gsw.CT_from_t(sa, temp, pres) # Potential Temperature if "PTEMP" in vlist: pt = gsw.pt_from_CT(sa, ct) # Potential density referenced to surface if "SIG0" in vlist: sig0 = gsw.sigma0(sa, ct) # N2 if "N2" in vlist or "PV" in vlist: n2_mid, p_mid = gsw.Nsquared(sa, ct, pres, lat) # N2 on the CT grid: ishallow = (slice(0, -1), Ellipsis) ideep = (slice(1, None), Ellipsis) def mid(x): return 0.5 * (x[ideep] + x[ishallow]) n2 = np.zeros(ct.shape) * np.nan n2[1:-1] = mid(n2_mid) # PV: if "PV" in vlist: pv = f * n2 / gsw.grav(lat, pres) # Sound Speed: if 'SOUND_SPEED' in vlist: cs = gsw.sound_speed(sa, ct, pres) # Back to the dataset: that = [] if 'SA' in vlist: SA = xr.DataArray(sa, coords=this['PSAL'].coords, name='SA') SA.attrs['long_name'] = 'Absolute Salinity' SA.attrs['standard_name'] = 'sea_water_absolute_salinity' SA.attrs['unit'] = 'g/kg' that.append(SA) if 'CT' in vlist: CT = xr.DataArray(ct, coords=this['TEMP'].coords, name='CT') CT.attrs['long_name'] = 'Conservative Temperature' CT.attrs['standard_name'] = 'sea_water_conservative_temperature' CT.attrs['unit'] = 'degC' that.append(CT) if 'SIG0' in vlist: SIG0 = xr.DataArray(sig0, coords=this['TEMP'].coords, name='SIG0') SIG0.attrs['long_name'] = 'Potential density anomaly with reference pressure of 0 dbar' SIG0.attrs['standard_name'] = 'sea_water_sigma_theta' SIG0.attrs['unit'] = 'kg/m^3' that.append(SIG0) if 'N2' in vlist: N2 = xr.DataArray(n2, coords=this['TEMP'].coords, name='N2') N2.attrs['long_name'] = 'Squared buoyancy frequency' N2.attrs['unit'] = '1/s^2' that.append(N2) if 'PV' in vlist: PV = xr.DataArray(pv, coords=this['TEMP'].coords, name='PV') PV.attrs['long_name'] = 'Planetary Potential Vorticity' PV.attrs['unit'] = '1/m/s' that.append(PV) if 'PTEMP' in vlist: PTEMP = xr.DataArray(pt, coords=this['TEMP'].coords, name='PTEMP') PTEMP.attrs['long_name'] = 'Potential Temperature' PTEMP.attrs['standard_name'] = 'sea_water_potential_temperature' PTEMP.attrs['unit'] = 'degC' that.append(PTEMP) if 'SOUND_SPEED' in vlist: CS = xr.DataArray(cs, coords=this['TEMP'].coords, name='SOUND_SPEED') CS.attrs['long_name'] = 'Speed of sound' CS.attrs['standard_name'] = 'speed_of_sound_in_sea_water' CS.attrs['unit'] = 'm/s' that.append(CS) # Create a dataset with all new variables: that = xr.merge(that) # Add to the dataset essential Argo variables (allows to keep using the argo accessor): that = that.assign( { k: this[k] for k in [ "TIME", " LATITUDE", "LONGITUDE", "PRES", "PRES_ADJUSTED", "PLATFORM_NUMBER", "CYCLE_NUMBER", "DIRECTION", ] if k in this } ) # Manage output: if inplace: # Merge previous with new variables for v in that.variables: this[v] = that[v] if to_profile: this = this.argo.point2profile() for k in this: if k not in self._obj: self._obj[k] = this[k] return self._obj else: if to_profile: return that.argo.point2profile() else: return that
# @property # def plot(self): # """Access plotting functions""" # # Create a mutable instance on 1st call so that later changes will be reflected in future calls # # https://stackoverflow.com/a/8140747 # if "plot" not in self._register: # self._register["plot"] = [_PlotMethods(self)] # return self._register["plot"][0]