Source code for argopy.extensions.carbonate_content

import numpy as np
import xarray as xr
from typing import Optional, Union

try:
    import PyCO2SYS as pyco2

    HAS_PYCO2SYS = True
except ImportError:
    HAS_PYCO2SYS = False
    pyco2 = None

try:
    from joblib import Parallel, delayed

    HAS_JOBLIB = True
except ImportError:
    HAS_JOBLIB = False
    Parallel = None
    delayed = None

from ..errors import InvalidDatasetStructure, DataNotFound
from . import register_argo_accessor, ArgoAccessorExtension

# import carbonate utilities
from ..utils.carbonate import (
    error_propagation,
    ChemistryConstants,
    CalculationOptions,
    Measurements,
    MeasurementErrors,
    CANYONData,
)


[docs] @register_argo_accessor("content") class CONTENT(ArgoAccessorExtension): """Nutrients and Carbonate System Variables predictor made consistent with chemistry constraints with CONTENT This is an implementation of the CONTENT method: a combination of CANYON-B Bayesian neural network mappings of AT, CT, pH and pCO2 made consistent with carbonate chemistry constraints for any set of water column P, T, S, O2, location data as an alternative to (spatial) climatological interpolation ([1]_). When using this method, please cite the paper. See Also -------- :meth:`content.predict`, :attr:`content.input_list`, :attr:`content.output_list` Examples -------- Load data, they must contain oxygen measurements: .. code-block:: python from argopy import DataFetcher # ArgoSet = DataFetcher(ds='bgc', mode='standard', params='DOXY', measured='DOXY').float(1902605) ArgoSet = DataFetcher(ds='bgc', mode='standard', params='DOXY', measured='DOXY').region([-75, -60, 20, 30, 0, 1000, '20250101', '20250201']) ds = ArgoSet.to_xarray() Once input data are loaded, make parameters predictions with or without specifying input errors on pressure (epres, in dbar), temperature (etemp, in degC), salinity (epsal, in PSU) and oxygen (edoxy, in micromole/kg). For interested users, uncertainties on predicted parameters can also be included. .. code-block:: python ds.argo.content.predict() ds.argo.content.predict(include_uncertainties=True) ds.argo.content.predict(epres=0.5, etemp=0.005, epsal=0.005, edoxy=0.01) ds.argo.content.predict(epres=0.5, etemp=0.005, epsal=0.005, edoxy=0.01, include_uncertainties=True) ds.argo.content.predict(epres=0.5, etemp=0.005, epsal=0.005, edoxy=np.array([...])) By default, if no input errors are specified, the following default values are used: - epres = 0.5 dbar - etemp = 0.005 degC - epsal = 0.005 PSU - edoxy = 1% of DOXY value Notes ----- This Python implementation is largely inspired by work from HCBSciencesProducts (https://github.com/HCBScienceProducts) which is available at https://github.com/HCBScienceProducts/CONTENT. This implementation relies heavily on the great PyCO2SYS package (https://github.com/mvdh7/PyCO2SYS) for carbonate chemistry calculations [2]_ which itself is a Python adaptation of the original CO2SYS software by C. Lewis and D. Wallace [3_] and subsequent Matlab functions CO2SYS.m by Van Heuven et al. [4]_ and errors.m and derivnum.m by Orr et al. [5]_. References ---------- .. [1] Bittig, H. C., Steinhoff, T., Claustre, H., Fiedler, B., Williams, N. L., Sauzede, R., Kortzinger, A., and Gattuso, J. P. (2018). An alternative to static climatologies: Robust estimation of open ocean CO2 variables and nutrient concentrations from T, S, and O2 data using Bayesian neural networks. Frontiers in Marine Science, 5, 328. doi:10.3389/fmars.2018.00328 .. [2] Humphreys, M. P., Lewis, E. R., Sharp, J. D., & Pierrot, D. (2022). PyCO2SYS v1.8: Marine carbonate system calculations in Python. Geoscientific Model Development, 15(1), 15-43. doi:10.5194/gmd-15-15-2022 .. [3] Lewis, E. R., & Wallace, D. W. R. (1998). Program developed for CO2 system calculations (No. cdiac: CDIAC-105). Environmental System Science Data Infrastructure for a Virtual Ecosystem (ESS-DIVE)(United States). doi:10.15485/1464255 .. [4] Van Heuven, S. M. A. C., Pierrot, D., Rae, J. W. B., Lewis, E., & Wallace, D. W. R. (2011). MATLAB program developed for CO2 system calculations. doi: 10.3334/CDIAC/otg.CO2SYS_MATLAB_v1.1 .. [5] Orr, J. C., Epitalon, J. M., Dickson, A. G., & Gattuso, J. P. (2018). Routine uncertainty propagation for the marine carbon dioxide system. Marine Chemistry, 207, 84-107. doi: 10.1016/j.marchem.2018.10.006 """ # Input parameter pairs for each of 6 carbonate calculations (pCO2/AT, pHT/AT, pCO2/pHT, pCO2/DIC, pHT/DIC, AT/DIC) _inpar = np.array([[3, 0], [2, 0], [3, 2], [3, 1], [2, 1], [0, 1]]) # Output parameters (complement of input parameters) _outpar = np.array([np.setdiff1d([0, 1, 2, 3], row) for row in _inpar]) # Flag type of carbonate parameters (used for PyCO2SYS) _flag_type = dict(AT=1, DIC=2, pHT=3, pCO2=4) # Parameter names used for CONTENT carbonate calculations _parameters = ["AT", "DIC", "pHT", "pCO2"] # Save indices for output parameters (i.e. _outpar) _svi = np.array([[1, 3], [2, 2], [3, 3], [1, 2], [2, 3], [1, 1]]) _input_list = ["LATITUDE", "LONGITUDE", "PRES", "TEMP", "PSAL", "DOXY"] """List of parameters required to make predictions""" _output_list = [ "PO4", "NO3", "SiOH4", "AT", "AT_SIGMA", "AT_SIGMA_MIN", "DIC", "DIC_SIGMA", "DIC_SIGMA_MIN", "pHT", "pHT_SIGMA", "pHT_SIGMA_MIN", "pCO2", "pCO2_SIGMA", "pCO2_SIGMA_MIN", ] """List of all possible output variables for CONTENT"""
[docs] def __init__(self, *args, **kwargs): if not HAS_PYCO2SYS: raise ImportError( "PyCO2SYS is required for the CONTENT extension. " "Install it with: pip install PyCO2SYS" ) super().__init__(*args, **kwargs) if self._argo._type != "point": raise InvalidDatasetStructure( "Method only available for a collection of points" ) if self._argo.N_POINTS == 0: raise DataNotFound("Empty dataset, no data to transform !")
@property def input_list(self) -> list[str]: """List of parameters required to make predictions with CONTENT Returns ------- list[str], default = ["LATITUDE", "LONGITUDE", "PRES", "TEMP", "PSAL", "DOXY"] """ return self._input_list.copy() @property def output_list(self) -> list[str]: """List of all possible output variables for CONTENT Returns ------- list[str], default = ['PO4', 'NO3', 'SiOH4', 'AT', 'AT_SIGMA', 'AT_SIGMA_MIN', 'DIC', 'DIC_SIGMA', 'DIC_SIGMA_MIN', 'pHT', 'pHT_SIGMA', 'pHT_SIGMA_MIN', 'pCO2', 'pCO2_SIGMA', 'pCO2_SIGMA_MIN'] Notes ----- DIC = CT in Bittig et al., (2018), keep it that way to be consistent with the canyon-b and canyon-med extensions. """ return self._output_list.copy() def get_param_attrs(self, param: str) -> dict: """ Get attributes for a given predicted parameter. Parameters ---------- param : str Parameter name. Valid options are: - 'NO3': Nitrate - 'PO4': Phosphate - 'SiOH4': Silicate - 'AT': Total alkalinity - 'DIC': Dissolved inorganic carbon - 'pHT': Total pH - 'pCO2': Partial pressure of CO2 Returns ------- dict Attribute dictionary containing: - 'units': Measurement units - 'long_name': Descriptive parameter name - 'comment': Data provenance note - 'reference': CANYON-B/CONTENT digital object identifier (DOI) """ attrs = {} if param in ["NO3", "PO4", "SiOH4", "AT", "DIC"]: attrs.update({"units": "micromole/kg"}) if param == "NO3": attrs.update({"long_name": "Nitrate concentration"}) attrs.update({"comment": "Synthetic variable predicted using CANYON-B"}) if param == "PO4": attrs.update({"long_name": "Phosphate concentration"}) attrs.update({"comment": "Synthetic variable predicted using CANYON-B"}) if param == "SiOH4": attrs.update({"long_name": "Silicate concentration"}) attrs.update({"comment": "Synthetic variable predicted using CANYON-B"}) if param == "AT": attrs.update({"long_name": "Total alkalinity"}) attrs.update({"comment": "Synthetic variable predicted using CONTENT"}) if param == "DIC": attrs.update({"long_name": "Total dissolved inorganic carbon"}) attrs.update({"comment": "Synthetic variable predicted using CONTENT"}) if param == "pHT": attrs.update({"long_name": "Total pH"}) attrs.update({"units": "insitu total scale"}) attrs.update({"comment": "Synthetic variable predicted using CONTENT"}) if param == "pCO2": attrs.update({"long_name": "Partial pressure of CO2"}) attrs.update({"units": "micro atm"}) attrs.update({"comment": "Synthetic variable predicted using CONTENT"}) attrs.update({"reference": "https://doi.org/10.3389/fmars.2018.00328"}) return attrs def get_canyon_b_raw_predictions( self, params: list, epres: Optional[float] = None, etemp: Optional[float] = None, epsal: Optional[float] = None, edoxy: Optional[Union[float, np.ndarray]] = None, n_jobs: Optional[int] = -1, ) -> dict: """Get raw CANYON-B predictions for specified parameters. The raw predicted values contains the predicted parameter values along with their associated uncertainties (ci, cim, cin, cii) and input effects (inx). See `CanyonB._predict` for more details. Parameters ---------- params : list Parameters that will be predicted. Must be a list containing one or more parameters from 'AT', 'DIC', 'pHT', 'pCO2', 'NO3', 'PO4', 'SiOH4' epres, etemp, epsal : float, optional Input errors (defaults: 0.5, 0.005, 0.005) edoxy : float or np.ndarray, optional Oxygen input error (default: 1% of doxy) n_jobs : int, optional Number of parallel jobs to run (default: -1, use all available CPUs) Returns ------- dict Dictionary with raw CANYON-B outputs for each parameter See Also -------- :class:`Dataset.argo.canyon_b` """ # Compute input matrix once for all parameters (optimization) data = self._obj.argo.canyon_b.create_canyonb_input_matrix() # Helper function to predict a single parameter def predict_param(param): """Process a single parameter prediction""" out = self._obj.argo.canyon_b._predict( param=param, epres=epres, etemp=etemp, epsal=epsal, edoxy=edoxy, data=data, ) return param, out # Get raw predictions for each parameter (in parallel) with Parallel(n_jobs=n_jobs, prefer=None) as parallel: results = parallel(delayed(predict_param)(param) for param in params) # Convert results list to dict raw_outputs = {param: out for param, out in results} return raw_outputs def setup_pre_carbonate_calculations( self, canyonb_results: dict, epres: Optional[float] = None, etemp: Optional[float] = None, epsal: Optional[float] = None, edoxy: Optional[Union[float, np.ndarray]] = None, ) -> dict: """ Prepare the results from CANYON-B for carbonate calculations. This function processes the raw results and sets up the necessary matrices for further carbonate chemistry calculations, returning structured data objects. Parameters ---------- canyonb_results : dict Raw results from CANYON-B predictions. epres, etemp, epsal : float, optional Input errors (defaults: 0.5, 0.005, 0.005) edoxy : float or np.ndarray, optional Oxygen input error (default: 1% of doxy) Returns ------- canyon_data : CANYONData Object with raw predictions, covariance, and correlation errors : MeasurementErrors Measurement uncertainties for salinity and temperature rawout : dict Raw output arrays for each carbonate parameter sigma : dict Uncertainty arrays for each carbonate parameter """ # Number of observations nol = self._argo.N_POINTS # Setting up default uncertainties if not provided (must match canyon_b._predict defaults) epres = 0.5 if epres is None else epres etemp = 0.005 if etemp is None else etemp epsal = 0.005 if epsal is None else epsal edoxy = 0.01 * self._obj.DOXY.values if edoxy is None else edoxy # Create MeasurementErrors object (see utils/carbonate.py) errors = MeasurementErrors(salinity=epsal, temperature=etemp) # Copy to raw output and start mixing calculations rawout = {} for param in self._parameters: rawout[param] = np.full((nol, 4), np.nan) rawout[param][:, 0] = canyonb_results[param][param].flatten() # Deal with weights and uncertainties sigma = {} for param in self._parameters: sigma[param] = np.full((nol, 4), np.nan) # Nutrient uncertainty (variance(inputs) + variance(training data) + variance(MLP)) where MLP is the CANYON-B neural network canyonb_results["SiOH4"]["eSiOH4"] = canyonb_results["SiOH4"]["SiOH4_ci"] canyonb_results["PO4"]["ePO4"] = canyonb_results["PO4"]["PO4_ci"] # covariance matrix of CANYON-B AT, CT, pH, pCO2 due to common inputs canyon_cov = np.full((4, 4, nol), np.nan) # Create error array error_array = [etemp, epsal, edoxy, epres] error_array = [ np.full(nol, e) if np.isscalar(e) else np.asarray(e).flatten() for e in error_array ] etemp, epsal, edoxy, epres = error_array # Compute error matrix error_matrix = np.column_stack([etemp, epsal, edoxy, epres]) ** 2 # Construct covariance matrix for i in range(4): for j in range(i, 4): inx_i = canyonb_results[self._parameters[i]][ f"{self._parameters[i]}_inx" ] inx_j = canyonb_results[self._parameters[j]][ f"{self._parameters[j]}_inx" ] canyon_cov[i, j, :] = np.sum(inx_i * inx_j * error_matrix, axis=1) canyon_cov[j, i, :] = canyon_cov[i, j, :] # Correlation matrix of CANYON-B AT, CT, pH, pCO2 due to common inputs and # CANYON-B estimation uncertainty cyr = canyon_cov * 0 # Add full variance on diagonal # variance(inputs)[local] + variance(training data)[global] + variance(MLP)[local] sigma["AT"][:, 0] = canyonb_results["AT"]["AT_ci"] canyon_cov[0, 0, :] = canyonb_results["AT"]["AT_ci"] ** 2 sigma["DIC"][:, 0] = canyonb_results["DIC"]["DIC_ci"] canyon_cov[1, 1, :] = canyonb_results["DIC"]["DIC_ci"] ** 2 sigma["pHT"][:, 0] = canyonb_results["pHT"]["pHT_ci"] canyon_cov[2, 2, :] = canyonb_results["pHT"]["pHT_ci"] ** 2 sigma["pCO2"][:, 0] = canyonb_results["pCO2"]["pCO2_ci"] canyon_cov[3, 3, :] = canyonb_results["pCO2"]["pCO2_ci"] ** 2 # Convert covariance matrices to correlation matrices for i in range(nol): # Extract standard deviations from diagonal elements std_devs = np.sqrt(np.diag(canyon_cov[:, :, i])) # Create normalization matrix (std[i] * std[j] for all i,j pairs) std_outer = np.outer(std_devs, std_devs) # Convert covariance to correlation: corr[i,j] = cov[i,j] / (std[i] * std[j]) cyr[:, :, i] = canyon_cov[:, :, i] / std_outer # Create CANYONData object canyon_data = CANYONData( b_raw=canyonb_results, covariance=canyon_cov, correlation=cyr ) return canyon_data, errors, rawout, sigma def compute_derivatives_carbonate_system( self, canyon_data: CANYONData, options: CalculationOptions = None, n_jobs: Optional[int] = -1, ) -> np.ndarray: """ Compute derivatives of carbonate system calculations for all 6 parameter pairs (pCO2/AT, pHT/AT, pCO2/pHT, pCO2/DIC, pHT/DIC, AT/DIC). Parameters ---------- canyon_data : CANYONData CANYON-B predictions with covariance and correlation matrices options : CalculationOptions, optional PyCO2SYS calculation options (uses defaults if None) n_jobs : int, optional Number of parallel jobs to run (default: -1, use all available CPUs) Returns ------- np.ndarray Array containing computed derivatives """ if options is None: options = CalculationOptions() # Create Measurements object from self._obj data measurements = Measurements( salinity=self._obj.PSAL.values, temperature=self._obj.TEMP.values, pressure=self._obj.PRES.values, total_silicate=canyon_data.b_raw["SiOH4"]["SiOH4"], total_phosphate=canyon_data.b_raw["PO4"]["PO4"], total_borate=0.0, # 0 following CONTENT matlab implementation ) # Number of observations nol = self._argo.N_POINTS # Initialize output array dcout = np.full((4, 4, 2, nol), np.nan) # Parameter name mapping for derivatives parameters_derived = { "AT": {"par1": "d_alkalinity__d_par1", "par2": "d_alkalinity__d_par2"}, "DIC": {"par1": "d_dic__d_par1", "par2": "d_dic__d_par2"}, "pHT": {"par1": "d_pH__d_par1", "par2": "d_pH__d_par2"}, "pCO2": {"par1": "d_pCO2__d_par1", "par2": "d_pCO2__d_par2"}, } # Helper function to compute derivatives for one parameter pair def compute_pair_derivatives(p): # Get parameter names for this combination par1_name = self._parameters[self._inpar[p, 0]] par2_name = self._parameters[self._inpar[p, 1]] # Get parameter values and types par1 = canyon_data.b_raw[par1_name][par1_name] par2 = canyon_data.b_raw[par2_name][par2_name] par1_type = self._flag_type[par1_name] par2_type = self._flag_type[par2_name] # Compute derivatives with respect to both par1 and par2 deriv = pyco2.sys( par1=par1, par2=par2, par1_type=par1_type, par2_type=par2_type, salinity=measurements.salinity, temperature=measurements.temperature, pressure=measurements.pressure, total_silicate=measurements.total_silicate, total_phosphate=measurements.total_phosphate, opt_pH_scale=options.pH_scale, opt_k_carbonic=options.k_carbonic, opt_k_bisulfate=options.k_bisulfate, grads_of=["pH", "pCO2", "alkalinity", "dic"], grads_wrt=["par1", "par2"], ) # Get output parameter names out_param1 = self._parameters[self._outpar[p, 0]] out_param2 = self._parameters[self._outpar[p, 1]] # Return derivatives for this pair return (p, deriv, out_param1, out_param2) # Compute derivatives for all 6 parameter pairs (in parallel) with Parallel(n_jobs=n_jobs, prefer=None) as parallel: results = parallel(delayed(compute_pair_derivatives)(p) for p in range(6)) # Store results in dcout array for p, deriv, out_param1, out_param2 in results: # Store derivatives with respect to par1 dcout[self._inpar[p, 0], self._inpar[p, 1], 0, :] = deriv[ parameters_derived[out_param1]["par1"] ] dcout[self._inpar[p, 0], self._inpar[p, 1], 1, :] = deriv[ parameters_derived[out_param2]["par1"] ] # Store derivatives with respect to par2 dcout[self._inpar[p, 1], self._inpar[p, 0], 0, :] = deriv[ parameters_derived[out_param1]["par2"] ] dcout[self._inpar[p, 1], self._inpar[p, 0], 1, :] = deriv[ parameters_derived[out_param2]["par2"] ] return dcout def compute_uncertainties_carbonate_system( self, canyon_data: CANYONData, errors: MeasurementErrors, rawout: dict, sigma: dict, constants: ChemistryConstants = None, options: CalculationOptions = None, n_jobs: Optional[int] = -1, ) -> tuple[dict, dict]: """ Compute carbonate system values and uncertainties for all 6 parameter pairs. This function uses the K1K2 constants of Lueker et al., (2000), KSO4 of Dickson 1990 & TB of Uppstrom 1979 and adds localized error calculations including parameter correlation due to common inputs. Parameters ---------- canyon_data : CANYONData CANYON-B predictions with covariance and correlation matrices errors : MeasurementErrors Measurement uncertainties for salinity and temperature rawout : dict Raw output arrays for each parameter (i.e. AT, DIC, pHT, pCO2) sigma : dict Uncertainty arrays for each parameter (i.e. AT, DIC, pHT, pCO2) constants : ChemistryConstants, optional Equilibrium constant uncertainties (uses defaults if None) options : CalculationOptions, optional PyCO2SYS calculation options (uses defaults if None) n_jobs : int, optional Number of parallel jobs to run (default: -1, use all available CPUs) Returns ------- rawout : dict Computed carbonate system values for each parameter (updated) sigma : dict Uncertainties for each parameter (updated) """ # Set defaults if not provided if constants is None: constants = ChemistryConstants() if options is None: options = CalculationOptions() # Create Measurements object from self._obj data measurements = Measurements( salinity=self._obj.PSAL.values, temperature=self._obj.TEMP.values, pressure=self._obj.PRES.values, total_silicate=canyon_data.b_raw["SiOH4"]["SiOH4"], total_phosphate=canyon_data.b_raw["PO4"]["PO4"], total_borate=0.0, # 0 following CONTENT matlab implementation ) # Parameter name mapping for output parameters_derived = dict(AT="alkalinity", DIC="dic", pHT="pH", pCO2="pCO2") # Helper function to compute uncertainties for one parameter combination def compute_pair_uncertainties(p): # Get parameter types and values for this combination par1_name = self._parameters[self._inpar[p, 0]] par2_name = self._parameters[self._inpar[p, 1]] par1_value = canyon_data.b_raw[par1_name][par1_name] par2_value = canyon_data.b_raw[par2_name][par2_name] par1_type = self._flag_type[par1_name] par2_type = self._flag_type[par2_name] # Compute carbonate system with error propagation deriv, uncertainties = error_propagation( inpar=self._inpar, carbonate_index=p, parameter_1=par1_value, parameter_2=par2_value, parameter_1_type=par1_type, parameter_2_type=par2_type, measurements=measurements, errors=errors, canyon_data=canyon_data, constants=constants, options=options, ) # Get output parameter names out_param1 = self._parameters[self._outpar[p, 0]] out_param2 = self._parameters[self._outpar[p, 1]] return (p, deriv, uncertainties, out_param1, out_param2) # Compute uncertainties for all 6 parameter combinations (in parallel) with Parallel(n_jobs=n_jobs, prefer=None) as parallel: results = parallel(delayed(compute_pair_uncertainties)(p) for p in range(6)) # Store results for p, deriv, uncertainties, out_param1, out_param2 in results: # Store output values for the two derived parameters rawout[out_param1][:, self._svi[p, 0]] = deriv[ parameters_derived[out_param1] ] rawout[out_param2][:, self._svi[p, 1]] = deriv[ parameters_derived[out_param2] ] # Store uncertainty values sigma[out_param1][:, self._svi[p, 0]] = uncertainties[ parameters_derived[out_param1] ] sigma[out_param2][:, self._svi[p, 1]] = uncertainties[ parameters_derived[out_param2] ] return rawout, sigma def compute_weighted_mean_covariance( self, dcout: np.ndarray, canyon_data: CANYONData, sigma: np.ndarray, ) -> dict: """ Compute weighted mean and covariance for each carbonate parameter. Parameters ---------- dcout : np.ndarray Derivatives of carbonate system calculations canyon_data : CANYONData CANYON-B predictions with covariance and correlation matrices sigma : np.ndarray Uncertainty arrays for each carbonate parameter Returns ------- cocov : dict Weighted mean covariance matrices for each carbonate parameter """ # Number of observations nol = self._argo.N_POINTS # Build weighted mean covariance matrix for all parameters cocov = {} for k in range(4): cocov[self._parameters[k]] = np.full((4, 4, nol), np.nan) # Fill diagonal with variances for i in range(4): cocov[self._parameters[k]][i, i, :] = ( sigma[self._parameters[k]][:, i] ** 2 ) for p in range(6): if k in self._outpar[p, :]: # find calc (p) that calculated parameter k i = np.where(self._outpar[p, :] == k)[0][0] # Covariance from direct CANYON-B and calc (p): inpar[p,:] cocov[self._parameters[k]][0, self._svi[p, i], :] = ( 1 * dcout[self._inpar[p, 0], self._inpar[p, 1], i, :] * canyon_data.covariance[k, self._inpar[p, 0], :] + 1 * dcout[self._inpar[p, 1], self._inpar[p, 0], i, :] * canyon_data.covariance[k, self._inpar[p, 1], :] ) # Mirror the covariance cocov[self._parameters[k]][self._svi[p, i], 0, :] = cocov[ self._parameters[k] ][0, self._svi[p, i], :] # Find second calc (o): inpar[o,:] for covariance term between calc (p) and calc (o) if p < 5: for o in range(p + 1, 6): if ( k in self._outpar[o, :] ): # find calc (o) that calculated parameter k j = np.where(self._outpar[o, :] == k)[0][0] # Covariance from calcs (p) and (o) cocov[self._parameters[k]][ self._svi[p, i], self._svi[o, j], : ] = ( dcout[self._inpar[p, 0], self._inpar[p, 1], i, :] * dcout[self._inpar[o, 0], self._inpar[o, 1], j, :] * canyon_data.covariance[ self._inpar[p, 0], self._inpar[o, 0], : ] + dcout[self._inpar[p, 0], self._inpar[p, 1], i, :] * dcout[self._inpar[o, 1], self._inpar[o, 0], j, :] * canyon_data.covariance[ self._inpar[p, 0], self._inpar[o, 1], : ] + dcout[self._inpar[p, 1], self._inpar[p, 0], i, :] * dcout[self._inpar[o, 0], self._inpar[o, 1], j, :] * canyon_data.covariance[ self._inpar[p, 1], self._inpar[o, 0], : ] + dcout[self._inpar[p, 1], self._inpar[p, 0], i, :] * dcout[self._inpar[o, 1], self._inpar[o, 0], j, :] * canyon_data.covariance[ self._inpar[p, 1], self._inpar[o, 1], : ] ) # Mirror the covariance cocov[self._parameters[k]][ self._svi[o, j], self._svi[p, i], : ] = cocov[self._parameters[k]][ self._svi[p, i], self._svi[o, j], : ] return cocov def define_weights(self, sigma: dict) -> dict: """ Define weights for each carbonate parameter based on uncertainties. Parameters ---------- sigma : dict Uncertainty arrays for each carbonate parameter Returns ------- weights : dict Weights for each carbonate parameter """ weights = {} for i in range(4): weights[self._parameters[i]] = ( 1 / sigma[self._parameters[i]] ** 2 ) # weights weights[f"{self._parameters[i]}sum"] = np.sum( weights[self._parameters[i]], axis=1 ) # sum all to normalize weights to 1 return weights def compute_weighted_mean_outputs_and_uncertainties( self, rawout: dict, sigma: dict, cocov: dict, canyon_data: CANYONData ) -> dict: """ Compute weighted mean outputs and their uncertainties for each carbonate parameter. Parameters ---------- rawout : dict Raw output arrays for each carbonate parameter sigma : dict Uncertainty arrays for each carbonate parameter cocov : dict Weighted mean covariance matrices for each carbonate parameter canyon_data : CANYONData CANYON-B predictions with raw results Returns ------- out : dict Final output dictionary containing: - {param}: weighted mean values for each carbonate parameter - {param}_sigma: total uncertainty of the CONTENT estimate, that is the sum of sigma_min and sigma_mean where : - sigma_mean = standard deviation associated with the weighted mean (of the four carbonate system variable estimates) - sigma_min = standard deviation propagated from the uncertainty of the terms of the weighted mean (see Eq. 6 in [1]_) - {param}_sigma_min: standard deviation propagated from the uncertainty of the terms of the weighted mean (see Eq. 6 in [1]_) - {param}_raw: raw calculation values - sigma: record of weighted sigmas - canyon_b_raw: CANYON-B structure Reference --------- .. [1] Bittig, H. C., Steinhoff, T., Claustre, H., Fiedler, B., Williams, N. L., Sauzede, R., Kortzinger, A., and Gattuso, J. P. (2018). An alternative to static climatologies: Robust estimation of open ocean CO2 variables and nutrient concentrations from T, S, and O2 data using Bayesian neural networks. Frontiers in Marine Science, 5, 328. doi:10.3389/fmars.2018.00328 """ # Number of observations nol = self._argo.N_POINTS # Define weights based on uncertainties w = self.define_weights(sigma) # Output dictionary out = {} # Output each variable for i in range(4): param = self._parameters[i] # Weighted mean out[param] = ( np.sum(w[param] * rawout[param], axis=1) / w[f"{param}sum"] ) # / [param unit] # Standard deviation about the mean (of weighted mean...; for 4 samples.) sigma_delta = np.sqrt( np.sum( w[param] * (rawout[param] - out[param][:, np.newaxis]) ** 2, axis=1 ) / (w[f"{param}sum"] - np.sum(w[param] ** 2, axis=1) / w[f"{param}sum"]) ) # / [param unit]; is localized # Std propagation from correlated inputs for weighted mean weight_ratio = w[param] / w[f"{param}sum"][:, np.newaxis] # Create the weighted covariance product weighted_cov = np.zeros(nol) for row in range(4): for col in range(4): weighted_cov += ( weight_ratio[:, row] * weight_ratio[:, col] * cocov[param][row, col, :] ) sigma_propagated = np.sqrt(weighted_cov) # / [param unit]; is localized out[f"{param}_sigma"] = sigma_delta + sigma_propagated # / [param unit] out[f"{param}_sigma_min"] = sigma_propagated # / [param unit] # Add raw calculations for i in range(4): param = self._parameters[i] out[f"{param}_raw"] = rawout[param] out["sigma"] = sigma # record of weighted sigmas out["canyon_b_raw"] = canyon_data.b_raw # CANYON-B structure return out def _predict( self, epres: Optional[float] = None, etemp: Optional[float] = None, epsal: Optional[float] = None, edoxy: Optional[Union[float, np.ndarray]] = None, ) -> dict: """ Predict CONTENT variables Parameters ---------- epres, etemp, epsal : float, optional Input errors edoxy : float or np.ndarray, optional Oxygen input error (default: 1% of doxy) Returns ------- predictions : dict Dictionary containing predicted variables with uncertainties """ # Step 1: Get raw CANYON-B predictions for all carbonate parameters + nutrients params_to_predict = [ "AT", "DIC", "pHT", "pCO2", "PO4", "SiOH4", "NO3", ] # Technically, NO3 is not needed for CONTENT but I included it for the final output. canyonb_results = self.get_canyon_b_raw_predictions( params=params_to_predict, epres=epres, etemp=etemp, epsal=epsal, edoxy=edoxy ) # Step 2: Setup pre-carbonate calculations (covariance, correlation, etc.) canyon_data, errors, rawout, sigma = self.setup_pre_carbonate_calculations( canyonb_results=canyonb_results, epres=epres, etemp=etemp, epsal=epsal, edoxy=edoxy, ) # Step 3: Compute derivatives for carbonate system dcout = self.compute_derivatives_carbonate_system(canyon_data=canyon_data) # Step 4: Compute uncertainties for carbonate system rawout, sigma = self.compute_uncertainties_carbonate_system( canyon_data=canyon_data, errors=errors, rawout=rawout, sigma=sigma ) # Step 5: Compute weighted mean covariance cocov = self.compute_weighted_mean_covariance( dcout=dcout, canyon_data=canyon_data, sigma=sigma ) # Step 6: Compute weighted mean outputs and uncertainties results = self.compute_weighted_mean_outputs_and_uncertainties( rawout=rawout, sigma=sigma, cocov=cocov, canyon_data=canyon_data ) return results
[docs] def predict( self, epres: Optional[float] = None, etemp: Optional[float] = None, epsal: Optional[float] = None, edoxy: Optional[Union[float, np.ndarray]] = None, include_uncertainties: Optional[bool] = False, ) -> xr.Dataset: """ Make predictions using the CONTENT method. Parameters ---------- epres, etemp, epsal : float, optional Input errors edoxy : float or np.ndarray, optional Oxygen input error (default: 1% of doxy) include_uncertainties : bool, optional If True, include uncertainty estimates for each predicted parameter Returns ------- :class:`xr.Dataset` Input dataset augmented with predicted parameters """ # Get predictions for all 4 carbonate parameters (CONTENT method) and nutrients (CANYON-B method) prediction = self._predict(epres=epres, etemp=etemp, epsal=epsal, edoxy=edoxy) for param in ["NO3", "PO4", "SiOH4"]: self._obj[param] = xr.zeros_like(self._obj["TEMP"]) self._obj[param].attrs = self.get_param_attrs(param) values = ( prediction["canyon_b_raw"][param][param].astype(np.float32).squeeze() ) self._obj[param].values = np.atleast_1d(values) # Update history for nutrients if self._argo: self._argo.add_history( "Added CANYON-B predictions for [%s]" % (",".join(["NO3", "PO4", "SiOH4"])) ) for param in self._parameters: self._obj[param] = xr.zeros_like(self._obj["TEMP"]) self._obj[param].attrs = self.get_param_attrs(param) values = prediction[param].astype(np.float32).squeeze() self._obj[param].values = np.atleast_1d(values) if include_uncertainties: # sigma self._obj[f"{param}_SIGMA"] = xr.zeros_like(self._obj[param]) self._obj[f"{param}_SIGMA"].attrs = { "long_name": f"Total uncertainty on {param} using CONTENT", "units": self._obj[param].attrs.get("units", ""), } values = prediction[f"{param}_sigma"].astype(np.float32).squeeze() self._obj[f"{param}_SIGMA"].values = np.atleast_1d(values) # sigma min self._obj[f"{param}_SIGMA_MIN"] = xr.zeros_like(self._obj[param]) self._obj[f"{param}_SIGMA_MIN"].attrs = { "long_name": f"Uncertainty propagated from the input variables on {param} using CONTENT", "units": self._obj[param].attrs.get("units", ""), } values = prediction[f"{param}_sigma_min"].astype(np.float32).squeeze() self._obj[f"{param}_SIGMA_MIN"].values = np.atleast_1d(values) # Update history for carbonate parameters if self._argo: self._argo.add_history( "Added CONTENT predictions for [%s]" % (",".join(self._parameters)) ) # Create and return xarray Dataset with predictions return self._obj