Source code for drippy.univariate

"""Plotting functions for univariate models (y = c + e)."""

from __future__ import annotations
from typing import TYPE_CHECKING
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from lmfit.models import LinearModel
from drippy.utilities import get_figure_and_axes

if TYPE_CHECKING:
    from collections.abc import Callable
    from matplotlib.axes import Axes
    from matplotlib.figure import Figure
    from drippy.data import EDAData

[docs] _ORDERED_VALUES = "Ordered Values"
[docs] def run_sequence_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, ) -> tuple[Figure, Axes]: """Creates a run sequence plot of y vs index or continuous index t. Args: data: EDAData container. Uses t as x-axis if present, else index. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. Returns: The figure and axes containing the plot. """ fig, ax = get_figure_and_axes(fig, ax) x_vals = data.t if data.t is not None else np.arange(len(data.y)) ax.plot(x_vals, data.y, label="Data") ax.legend() ax.set_xlabel("Index" if data.t is None else "t") ax.set_ylabel("Y") ax.set_title("Run Sequence Plot") fig.tight_layout() return fig, ax
[docs] def lag_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, lag: int = 1, ) -> tuple[Figure, Axes]: """Creates a lag plot of the data. Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. lag: Number of lags. Must be positive and less than len(y). Returns: The figure and axes containing the plot. """ lag = int(lag) if lag <= 0: msg = "Lag must be a positive integer" raise ValueError(msg) if lag >= len(data.y): msg = "Lag must be less than the length of the data" raise ValueError(msg) fig, ax = get_figure_and_axes(fig, ax) y_original = data.y[lag:] y_lagged = data.y[:-lag] colors = np.arange(len(y_lagged)) scatter = ax.scatter( y_lagged, y_original, c=colors, cmap="viridis", label="Lag Plot" ) fig.colorbar(scatter, ax=ax, label="Index") ax.set_xlabel(rf"Y$_{{i-{lag}}}$") ax.set_ylabel("Y$_{i}$") ax.set_title(f"Lag Plot (lag={lag})") fig.tight_layout() return fig, ax
[docs] def histogram( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, bins: int | str = "auto", ) -> tuple[Figure, Axes]: """Creates a histogram of the data. Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. bins: Number of bins or bin strategy. Returns: The figure and axes containing the plot. """ if isinstance(bins, int) and bins <= 0: msg = f"Number of bins must be positive, got {bins}" raise ValueError(msg) fig, ax = get_figure_and_axes(fig, ax) ax.hist(data.y, bins=bins) ax.set_xlabel("Value") ax.set_ylabel("Frequency") ax.set_title("Histogram of Y values") fig.tight_layout() return fig, ax
[docs] def normal_probability_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, *, return_rsquared: bool = False, ) -> tuple[Figure, Axes, float | None]: """Creates a normal probability plot. Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. return_rsquared: If True, includes R-squared as the third element. Returns: (fig, ax, r_squared) where r_squared is None if return_rsquared is False. """ fig, ax = get_figure_and_axes(fig, ax) _, (_, _, r_value) = sp.stats.probplot( data.y, dist="norm", plot=ax, rvalue=True ) ax.set_ylabel(_ORDERED_VALUES) ax.set_xlabel("Theoretical Quantiles") ax.set_title("Normal Probability Plot") fig.tight_layout() r_squared = r_value**2 if return_rsquared else None return fig, ax, r_squared
[docs] def four_plot( data: EDAData, fig: Figure | None = None, axes: np.ndarray | None = None, ) -> tuple[Figure, np.ndarray]: """Creates a 4-plot (run sequence, lag, histogram, normal probability). Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. axes: ndarray of Axes with shape (2, 2). If None, creates new axes. Returns: (fig, axes_flat) where axes_flat has shape (4,). """ if fig is None and axes is None: fig, axes = plt.subplots(2, 2) elif axes is None: axes = fig.subplots(2, 2) elif fig is None: fig = axes.flat[0].get_figure() if axes.shape != (2, 2): msg = "Axes must be an iterable of (2, 2) Axes objects." raise ValueError(msg) axes = axes.flatten() run_sequence_plot(data, fig, axes[0]) lag_plot(data, fig, axes[1]) histogram(data, fig, axes[2]) normal_probability_plot(data, fig, axes[3]) return fig, axes
[docs] def ppcc_plot( # noqa: PLR0913 data: EDAData, fig: Figure | None = None, ax: np.ndarray | None = None, rough_range: tuple[float, float] = (-2, 2), n_rough: int = 50, n_fine: int = 100, ) -> tuple[Figure, np.ndarray]: """Creates a PPCC plot (rough + fine) for distribution shape estimation. Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. ax: ndarray of Axes with shape (2,). If None, creates new axes. rough_range: (min, max) range for rough search. n_rough: Points in rough plot. n_fine: Points in fine plot. Returns: (fig, axes) where axes has shape (2,). """ if len(rough_range) != 2: # noqa: PLR2004 msg = "Rough range must contain exactly 2 elements" raise ValueError(msg) if rough_range[0] >= rough_range[1]: msg = "Rough range must be (min, max) with min < max" raise ValueError(msg) if n_rough <= 0: msg = "Number of points must be positive" raise ValueError(msg) if n_fine <= 0: msg = "Number of points must be positive" raise ValueError(msg) if fig is None and ax is None: fig, ax = plt.subplots(1, 2) elif ax is None: ax = fig.subplots(1, 2) elif fig is None: fig = ax.flat[0].get_figure() if ax.shape != (2,): msg = "Axes must be an iterable of 2 Axes objects." raise ValueError(msg) rough_shape_values, rough_ppcc = sp.stats.ppcc_plot( data.y, rough_range[0], rough_range[1], N=n_rough, plot=ax[0], ) rough_max_index = np.argmax(rough_ppcc) fine_shape_values, fine_ppcc = sp.stats.ppcc_plot( data.y, rough_shape_values[rough_max_index] - 0.5, rough_shape_values[rough_max_index] + 0.5, N=n_fine, plot=ax[1], ) fine_max_index = np.argmax(fine_ppcc) max_shape = rough_shape_values[rough_max_index] ax[0].vlines( max_shape, 0, rough_ppcc[rough_max_index], color="r", label=f"Max PPCC at shape={max_shape:.3g}", ) ax[0].legend() max_shape_fine = fine_shape_values[fine_max_index] ax[1].axvline( max_shape_fine, color="r", label=f"Max PPCC at shape={max_shape_fine:.3g}", ) ax[1].legend() ax[0].set_title("Rough PPCC Plot") ax[1].set_title("Fine PPCC Plot") fig.tight_layout() return fig, ax
[docs] def weibull_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, ) -> tuple[Figure, Axes]: """Creates a Weibull probability plot. Args: data: EDAData container. Requires y > 0. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. Returns: The figure and axes containing the plot. """ if not np.all(data.y > 0): msg = "Weibull plot requires all y values to be positive." raise ValueError(msg) fig, ax = get_figure_and_axes(fig, ax) n = len(data.y) p = (np.arange(1, n + 1) - 0.3) / (n + 0.4) weibull_probabilities = np.log(-np.log(1 - p)) ordered_data = np.log(np.sort(data.y)) model = LinearModel() params = model.make_params(intercept=0, slope=1) result = model.fit(weibull_probabilities, params, x=ordered_data) shape = result.params["slope"].value intercept = result.params["intercept"].value scale = np.exp(-intercept / shape) x_fit = np.linspace(min(ordered_data), max(ordered_data), 100) y_fit = intercept + shape * x_fit ax.plot(ordered_data, weibull_probabilities, ".", label="Data") ax.plot( x_fit, y_fit, "-", label=f"Fit with R$^2$={result.rsquared:.3g}", ) ax.set_xlabel(_ORDERED_VALUES) ax.set_ylabel("Theoretical Quantiles (Weibull)") ax.set_title("Weibull Probability Plot") min_percentage = np.floor( np.log10(1 - np.exp(np.exp(min(weibull_probabilities)) * -1)), ) percentages = np.concatenate( ( np.logspace(min_percentage - 1, -1, num=int(-min_percentage) + 1), [0.5, 0.9, 0.99], ), ) ax.set_yticks( np.log(-np.log(1 - percentages)), labels=[f"{p * 100:.3g}" for p in percentages], ) ax.axhline(np.log(-np.log(1 - 0.632)), color="black", linestyle="--") ax.axvline(np.log(scale), color="black", linestyle="--") ax.legend() fig.tight_layout() return fig, ax
[docs] def probability_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, distribution: str = "norm", ) -> tuple[Figure, Axes]: """Creates a probability plot for a specified distribution. Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. distribution: scipy.stats distribution name. Returns: The figure and axes containing the plot. """ fig, ax = get_figure_and_axes(fig, ax) sp.stats.probplot(data.y, dist=distribution, plot=ax) ax.set_ylabel(_ORDERED_VALUES) ax.set_xlabel("Theoretical Quantiles") ax.set_title(f"{distribution.capitalize()} Probability Plot") fig.tight_layout() return fig, ax
[docs] def box_cox_normality_plot( data: EDAData, fig: Figure | None = None, axes: np.ndarray | None = None, ) -> tuple[Figure, np.ndarray]: """Creates a Box-Cox normality plot (2x2 grid). Shows: original histogram, Box-Cox normality curve, transformed histogram, normal probability plot of transformed data. Args: data: EDAData container. Requires y > 0. fig: Matplotlib figure. If None, creates new figure. axes: ndarray of Axes with shape (2, 2). If None, creates new. Returns: (fig, axes) where axes has shape (2, 2). """ if not np.all(data.y > 0): msg = "Box-Cox transformation requires all y values to be positive." raise ValueError(msg) if fig is None and axes is None: fig, axes = plt.subplots(2, 2) elif axes is None: axes = fig.subplots(2, 2) elif fig is None: fig = axes.flat[0].get_figure() if axes.shape != (2, 2): msg = "Axes must be an iterable of (2, 2) Axes objects." raise ValueError(msg) sp.stats.boxcox_normplot(data.y, -2, 2, plot=axes[0, 1], N=200) transformed_y, maxlog = sp.stats.boxcox(data.y) axes[0, 0].hist(data.y, bins="auto") axes[0, 0].set_xlabel("Value") axes[0, 0].set_ylabel("Frequency") axes[0, 0].set_title("Histogram of Original (positive shifted) Y values") axes[0, 1].set_title("Box-Cox Normality Plot") axes[0, 1].axvline( maxlog, color="r", linestyle="--", label=f"Max log={maxlog:.3g}" ) axes[0, 1].legend() axes[1, 0].hist(transformed_y, bins="auto") axes[1, 0].set_xlabel("Value") axes[1, 0].set_ylabel("Frequency") axes[1, 0].set_title("Histogram of transformed Y values") sp.stats.probplot(transformed_y, dist="norm", plot=axes[1, 1], rvalue=True) axes[1, 1].set_title("Normal Probability Plot of Transformed Data") fig.tight_layout() return fig, axes
[docs] def bootstrap_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, statistic: Callable = np.mean, n_bootstrap: int = 10000, ) -> tuple[Figure, Axes]: """Creates a bootstrap distribution plot. Args: data: EDAData container. Requires y. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. statistic: Callable to bootstrap. Must be callable. n_bootstrap: Number of bootstrap resamples. Returns: The figure and axes containing the plot. """ if not callable(statistic): msg = "Statistic must be callable" raise TypeError(msg) if n_bootstrap <= 0: msg = "Number of bootstrap samples must be positive" raise ValueError(msg) fig, ax = get_figure_and_axes(fig, ax) result = sp.stats.bootstrap((data.y,), statistic, n_resamples=n_bootstrap) variability_label = ( f"Variability of {statistic.__name__}: {result.standard_error:.3g}" ) ax.hist( result.bootstrap_distribution, bins="auto", density=True, label=variability_label, ) ax.set_xlabel(f"Bootstrap {statistic.__name__} values") ax.set_ylabel("Density") ax.set_title("Bootstrap Distribution") ax.legend() fig.tight_layout() return fig, ax
[docs] def box_cox_linearity_plot( data: EDAData, fig: Figure | None = None, ax: Axes | None = None, lmbda_range: tuple[float, float] = (-2, 2), n: int = 100, ) -> tuple[Figure, Axes]: """Creates a Box-Cox linearity plot. Plots ``abs(corr(Y, X^λ))`` across a range of λ values to find the power transformation of X that maximises linearity with Y. Args: data: EDAData container. Requires x > 0. fig: Matplotlib figure. If None, creates new figure. ax: Matplotlib axes. If None, creates new axes. lmbda_range: (min, max) range of λ to evaluate. n: Number of λ values to evaluate. Returns: The figure and axes containing the plot. """ if data.x is None: msg = "box_cox_linearity_plot requires x" raise ValueError(msg) if not np.all(data.x > 0): msg = "box_cox_linearity_plot requires all x values to be positive" raise ValueError(msg) fig, ax = get_figure_and_axes(fig, ax) lambdas = np.linspace(lmbda_range[0], lmbda_range[1], n) correlations = np.array( [ abs( np.corrcoef( np.log(data.x) if lmbda == 0 else (data.x**lmbda - 1) / lmbda, data.y, )[0, 1] ) for lmbda in lambdas ] ) optimal_lambda = lambdas[np.argmax(correlations)] ax.plot(lambdas, correlations) ax.axvline( optimal_lambda, color="r", linestyle="--", label=f"Optimal λ={optimal_lambda:.3g}", ) ax.set_xlabel("λ (power transformation)") ax.set_ylabel("|Correlation(Y, X^λ)|") ax.set_title("Box-Cox Linearity Plot") ax.legend() fig.tight_layout() return fig, ax