Source code for cde.density_simulation.BaseConditionalDensitySimulation

import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import warnings
from mpl_toolkits.mplot3d import Axes3D
from cde import ConditionalDensity



[docs]class BaseConditionalDensitySimulation(ConditionalDensity):
[docs] def pdf(self, X, Y): """ Conditional probability density function p(y|x) of the underlying probability model Args: X: x to be conditioned on - numpy array of shape (n_points, ndim_x) Y: y target values for witch the pdf shall be evaluated - numpy array of shape (n_points, ndim_y) Returns: p(X|Y) conditional density values for the provided X and Y - numpy array of shape (n_points, ) """ raise NotImplementedError
[docs] def log_pdf(self, X, Y): """ Conditional log-probability log p(y|x). Requires the model to be fitted. Args: X: numpy array to be conditioned on - shape: (n_samples, n_dim_x) Y: numpy array of y targets - shape: (n_samples, n_dim_y) Returns: conditional log-probability log p(y|x) - numpy array of shape (n_query_samples, ) """ # This method is numerically unfavorable and should be overwritten with a numerically stable method with warnings.catch_warnings(): warnings.simplefilter("ignore") log_prob = np.log(self.pdf(X, Y)) return log_prob
[docs] def cdf(self, X, Y): """ Conditional cumulated probability density function P(Y < y | x) of the underlying probability model Args: X: x to be conditioned on - numpy array of shape (n_points, ndim_x) Y: y target values for witch the cdf shall be evaluated - numpy array of shape (n_points, ndim_y) Returns: P(Y < y | x) cumulated density values for the provided X and Y - numpy array of shape (n_points, ) """ raise NotImplementedError
[docs] def simulate_conditional(self, X): """ Draws random samples from the conditional distribution Args: X: x to be conditioned on when drawing a sample from y ~ p(y|x) - numpy array of shape (n_samples, ndim_x) Returns: Conditional random samples y drawn from p(y|x) - numpy array of shape (n_samples, ndim_y) """ raise NotImplementedError
[docs] def simulate(self, n_samples): """ Draws random samples from the unconditional distribution p(x,y) Args: n_samples: (int) number of samples to be drawn from the conditional distribution Returns: (X,Y) - random samples drawn from p(x,y) - numpy arrays of shape (n_samples, ndim_x) and (n_samples, ndim_y) """ raise NotImplementedError
[docs] def plot(self, xlim=(-5, 5), ylim=(-5, 5), resolution=100, mode="pdf", show=False, numpyfig=False): """ Plots the distribution specified in mode if x and y are 1-dimensional each Args: xlim: 2-tuple specifying the x axis limits ylim: 2-tuple specifying the y axis limits resolution: integer specifying the resolution of plot mode: spefify which dist to plot ["pdf", "cdf", "joint_pdf"] """ modes = ["pdf", "cdf", "joint_pdf"] assert mode in modes, "mode must be on of the following: " + modes assert self.ndim == 2, "Can only plot two dimensional distributions" if show == False and mpl.is_interactive(): plt.ioff() # prepare mesh linspace_x = np.linspace(xlim[0], xlim[1], num=resolution) linspace_y = np.linspace(ylim[0], ylim[1], num=resolution) X, Y = np.meshgrid(linspace_x, linspace_y) X, Y = X.flatten(), Y.flatten() # calculate values of distribution if mode == "pdf": Z = self.pdf(X, Y) elif mode == "cdf": Z = self.cdf(X, Y) elif mode == "joint_pdf": Z = self.joint_pdf(X, Y) X, Y, Z = X.reshape([resolution, resolution]), Y.reshape([resolution, resolution]), Z.reshape( [resolution, resolution]) fig = plt.figure(dpi=300) ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm, rcount=resolution, ccount=resolution, linewidth=100, antialiased=True) plt.xlabel("x") plt.ylabel("y") if show: plt.show() if numpyfig: fig.tight_layout(pad=0) fig.canvas.draw() numpy_img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') numpy_img = numpy_img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) return numpy_img return fig
[docs] def mean_(self, x_cond, n_samples=10**6): """ Mean of the fitted distribution conditioned on x_cond Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) Returns: Means E[y|x] corresponding to x_cond - numpy array of shape (n_values, ndim_y) """ assert x_cond.ndim == 2 if self.can_sample: return self._mean_mc(x_cond, n_samples=n_samples) else: return self._mean_pdf(x_cond)
[docs] def std_(self, x_cond, n_samples=10 ** 6): """ Standard deviation of the fitted distribution conditioned on x_cond Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) Returns: Standard deviations sqrt(Var[y|x]) corresponding to x_cond - numpy array of shape (n_values, ndim_y) """ x_cond = self._handle_input_dimensionality(x_cond) assert x_cond.ndim == 2 return self._std_pdf(x_cond, n_samples=n_samples)
[docs] def covariance(self, x_cond, n_samples=10**6): """ Covariance of the fitted distribution conditioned on x_cond Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) n_samples: number of samples for monte carlo model_fitting Returns: Covariances Cov[y|x] corresponding to x_cond - numpy array of shape (n_values, ndim_y, ndim_y) """ if self.has_pdf: return self._covariance_pdf(x_cond) elif self.can_sample: return self._covariance_mc(x_cond, n_samples=n_samples) else: raise NotImplementedError()
[docs] def skewness(self, x_cond, n_samples=10 ** 6): """ Skewness of the fitted distribution conditioned on x_cond Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) Returns: Skewness Skew[y|x] corresponding to x_cond - numpy array of shape (n_values, ndim_y, ndim_y) """ x_cond = self._handle_input_dimensionality(x_cond) assert x_cond.ndim == 2 if self.has_pdf: return self._skewness_pdf(x_cond, n_samples=n_samples) elif self.can_sample: return self._skewness_pdf(x_cond, n_samples=n_samples) else: raise NotImplementedError()
[docs] def kurtosis(self, x_cond, n_samples=10 ** 6): """ Kurtosis of the fitted distribution conditioned on x_cond Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) Returns: Kurtosis Kurt[y|x] corresponding to x_cond - numpy array of shape (n_values, ndim_y, ndim_y) """ x_cond = self._handle_input_dimensionality(x_cond) assert x_cond.ndim == 2 if self.has_pdf: return self._kurtosis_pdf(x_cond, n_samples=n_samples) elif self.can_sample: return self._kurtosis_mc(x_cond, n_samples=n_samples) else: raise NotImplementedError()
[docs] def value_at_risk(self, x_cond, alpha=0.01, n_samples=10**6): """ Computes the Value-at-Risk (VaR) of the fitted distribution. Only if ndim_y = 1 Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) alpha: quantile percentage of the distribution n_samples: number of samples for monte carlo model_fitting Returns: VaR values for each x to condition on - numpy array of shape (n_values) """ assert self.ndim_y == 1, "Value at Risk can only be computed when ndim_y = 1" assert x_cond.ndim == 2 if self.has_cdf: return self._quantile_cdf(x_cond, alpha=alpha) elif self.can_sample: return self._quantile_mc(x_cond, alpha=alpha, n_samples=n_samples) else: raise NotImplementedError()
[docs] def conditional_value_at_risk(self, x_cond, alpha=0.01, n_samples=10**6): """ Computes the Conditional Value-at-Risk (CVaR) / Expected Shortfall of the fitted distribution. Only if ndim_y = 1 Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) alpha: quantile percentage of the distribution n_samples: number of samples for monte carlo model_fitting Returns: CVaR values for each x to condition on - numpy array of shape (n_values) """ assert self.ndim_y == 1, "Value at Risk can only be computed when ndim_y = 1" x_cond = self._handle_input_dimensionality(x_cond) assert x_cond.ndim == 2 VaRs = self.value_at_risk(x_cond, alpha=alpha, n_samples=n_samples) if self.has_pdf: return self._conditional_value_at_risk_mc_pdf(VaRs, x_cond, alpha=alpha, n_samples=n_samples) elif self.can_sample: return self._conditional_value_at_risk_sampling(VaRs, x_cond, n_samples=n_samples) else: raise NotImplementedError("Distribution object must either support pdf or sampling in order to compute CVaR")
[docs] def tail_risk_measures(self, x_cond, alpha=0.01, n_samples=10**6): """ Computes the Value-at-Risk (VaR) and Conditional Value-at-Risk (CVaR) Args: x_cond: different x values to condition on - numpy array of shape (n_values, ndim_x) alpha: quantile percentage of the distribution n_samples: number of samples for monte carlo model_fitting Returns: - VaR values for each x to condition on - numpy array of shape (n_values) - CVaR values for each x to condition on - numpy array of shape (n_values) """ assert self.ndim_y == 1, "Value at Risk can only be computed when ndim_y = 1" assert x_cond.ndim == 2 VaRs = self.value_at_risk(x_cond, alpha=alpha, n_samples=n_samples) if self.has_pdf: CVaRs = self._conditional_value_at_risk_mc_pdf(VaRs, x_cond, alpha=alpha, n_samples=n_samples) elif self.can_sample: CVaRs = self._conditional_value_at_risk_sampling(VaRs, x_cond, n_samples=n_samples) else: raise NotImplementedError("Distribution object must either support pdf or sampling in order to compute CVaR") assert VaRs.shape == CVaRs.shape == (len(x_cond),) return VaRs, CVaRs
def get_configuration(self, deep=True): param_dict = super(BaseConditionalDensitySimulation, self).get_params(deep=deep) param_dict['simulator'] = self.__class__.__name__ return param_dict def _handle_input_dimensionality(self, X, Y=None): # assert that both X an Y are 2D arrays with shape (n_samples, n_dim) if X.ndim == 1: X = np.expand_dims(X, axis=1) if Y is not None: if Y.ndim == 1: Y = np.expand_dims(Y, axis=1) assert X.shape[0] == Y.shape[0], "X and Y must have the same length along axis 0" assert X.ndim == Y.ndim == 2, "X and Y must be matrices" if Y is None: return X else: return X, Y def _compute_data_statistics(self): _, Y = self.simulate(n_samples=10**4) return np.mean(Y, axis=0), np.std(Y, axis=0)