"""
Mathematically or statistically compute something out of xarray objects
"""
import numpy as np
from scipy import interpolate
import xarray as xr
from packaging import version
import logging
from ..errors import InvalidOption
log = logging.getLogger("argopy.utils.compute")
#
# From xarrayutils : https://github.com/jbusecke/xarrayutils/blob/master/xarrayutils/vertical_coordinates.py
# Direct integration of those 2 functions to minimize dependencies and possibility of tuning them to our needs
#
[docs]
def linear_interpolation_remap(
z, data, z_regridded, z_dim=None, z_regridded_dim="regridded", output_dim="remapped"
):
# interpolation called in xarray ufunc
def _regular_interp(x, y, target_values):
# remove all nans from input x and y
idx = np.logical_or(np.isnan(x), np.isnan(y))
x = x[~idx]
y = y[~idx]
# Need at least 5 points in the profile to interpolate, otherwise, return NaNs
if len(y) < 5:
interpolated = np.empty(len(target_values))
interpolated[:] = np.nan
else:
# replace nans in target_values without of bound Values (just in case)
target_values = np.where(
~np.isnan(target_values), target_values, np.nanmax(x) + 1
)
# Interpolate with fill value parameter to extend min pressure toward 0
interpolated = interpolate.interp1d(
x, y, bounds_error=False, fill_value=(y[0], y[-1])
)(target_values)
return interpolated
# infer dim from input
if z_dim is None:
if len(z.dims) != 1:
raise RuntimeError("if z_dim is not specified, x must be a 1D array.")
dim = z.dims[0]
else:
dim = z_dim
# if dataset is passed drop all data_vars that dont contain dim
if isinstance(data, xr.Dataset):
raise ValueError("Dataset input is not supported yet")
# TODO: for a dataset input just apply the function for each appropriate array
if version.parse(xr.__version__) > version.parse("0.15.0"):
kwargs = dict(
input_core_dims=[[dim], [dim], [z_regridded_dim]],
output_core_dims=[[output_dim]],
vectorize=True,
dask="parallelized",
output_dtypes=[data.dtype],
dask_gufunc_kwargs={
"output_sizes": {output_dim: len(z_regridded[z_regridded_dim])}
},
)
else:
kwargs = dict(
input_core_dims=[[dim], [dim], [z_regridded_dim]],
output_core_dims=[[output_dim]],
vectorize=True,
dask="parallelized",
output_dtypes=[data.dtype],
output_sizes={output_dim: len(z_regridded[z_regridded_dim])},
)
remapped = xr.apply_ufunc(_regular_interp, z, data, z_regridded, **kwargs)
remapped.coords[output_dim] = z_regridded.rename(
{z_regridded_dim: output_dim}
).coords[output_dim]
return remapped
[docs]
def groupby_remap(
z,
data,
z_regridded, # noqa C901
z_dim=None,
z_regridded_dim="regridded",
output_dim="remapped",
select="deep",
right=False,
):
"""todo: Need a docstring here !"""
# sub-sampling called in xarray ufunc
def _subsample_bins(x, y, target_values):
# remove all nans from input x and y
try:
idx = np.logical_or(np.isnan(x), np.isnan(y))
except TypeError:
log.debug(
"Error with this '%s' y data content: %s" % (type(y), str(np.unique(y)))
)
raise
x = x[~idx]
y = y[~idx]
ifound = np.digitize(
x, target_values, right=right
) # ``bins[i-1] <= x < bins[i]``
ifound -= 1 # Because digitize returns a 1-based indexing, we need to remove 1
y_binned = np.ones_like(target_values) * np.nan
for ib, this_ibin in enumerate(np.unique(ifound)):
ix = np.where(ifound == this_ibin)
iselect = ix[-1]
# Map to y value at specific x index in the bin:
if select == "shallow":
iselect = iselect[0] # min/shallow
mapped_value = y[iselect]
elif select == "deep":
iselect = iselect[-1] # max/deep
mapped_value = y[iselect]
elif select == "middle":
iselect = iselect[
np.where(x[iselect] >= np.median(x[iselect]))[0][0]
] # median/middle
mapped_value = y[iselect]
elif select == "random":
iselect = iselect[np.random.randint(len(iselect))]
mapped_value = y[iselect]
# or Map to y statistics in the bin:
elif select == "mean":
mapped_value = np.nanmean(y[iselect])
elif select == "min":
mapped_value = np.nanmin(y[iselect])
elif select == "max":
mapped_value = np.nanmax(y[iselect])
elif select == "median":
mapped_value = np.median(y[iselect])
else:
raise InvalidOption("`select` option has invalid value (%s)" % select)
y_binned[this_ibin] = mapped_value
return y_binned
# infer dim from input
if z_dim is None:
if len(z.dims) != 1:
raise RuntimeError("if z_dim is not specified, x must be a 1D array.")
dim = z.dims[0]
else:
dim = z_dim
# if dataset is passed drop all data_vars that don't contain dim
if isinstance(data, xr.Dataset):
raise ValueError("Dataset input is not supported yet")
# TODO: for a dataset input just apply the function for each appropriate array
if version.parse(xr.__version__) > version.parse("0.15.0"):
kwargs = dict(
input_core_dims=[[dim], [dim], [z_regridded_dim]],
output_core_dims=[[output_dim]],
vectorize=True,
dask="parallelized",
output_dtypes=[data.dtype],
dask_gufunc_kwargs={
"output_sizes": {output_dim: len(z_regridded[z_regridded_dim])}
},
)
else:
kwargs = dict(
input_core_dims=[[dim], [dim], [z_regridded_dim]],
output_core_dims=[[output_dim]],
vectorize=True,
dask="parallelized",
output_dtypes=[data.dtype],
output_sizes={output_dim: len(z_regridded[z_regridded_dim])},
)
remapped = xr.apply_ufunc(_subsample_bins, z, data, z_regridded, **kwargs)
remapped.coords[output_dim] = z_regridded.rename(
{z_regridded_dim: output_dim}
).coords[output_dim]
return remapped