"""Plotting functions for univariate models (y = c + e)."""
from __future__ import annotations
from typing import TYPE_CHECKING
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
from lmfit.models import LinearModel
from drippy.utilities import get_figure_and_axes
if TYPE_CHECKING:
from collections.abc import Callable
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from drippy.data import EDAData
[docs]
_ORDERED_VALUES = "Ordered Values"
[docs]
def run_sequence_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
) -> tuple[Figure, Axes]:
"""Creates a run sequence plot of y vs index or continuous index t.
Args:
data: EDAData container. Uses t as x-axis if present, else index.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
Returns:
The figure and axes containing the plot.
"""
fig, ax = get_figure_and_axes(fig, ax)
x_vals = data.t if data.t is not None else np.arange(len(data.y))
ax.plot(x_vals, data.y, label="Data")
ax.legend()
ax.set_xlabel("Index" if data.t is None else "t")
ax.set_ylabel("Y")
ax.set_title("Run Sequence Plot")
fig.tight_layout()
return fig, ax
[docs]
def lag_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
lag: int = 1,
) -> tuple[Figure, Axes]:
"""Creates a lag plot of the data.
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
lag: Number of lags. Must be positive and less than len(y).
Returns:
The figure and axes containing the plot.
"""
lag = int(lag)
if lag <= 0:
msg = "Lag must be a positive integer"
raise ValueError(msg)
if lag >= len(data.y):
msg = "Lag must be less than the length of the data"
raise ValueError(msg)
fig, ax = get_figure_and_axes(fig, ax)
y_original = data.y[lag:]
y_lagged = data.y[:-lag]
colors = np.arange(len(y_lagged))
scatter = ax.scatter(
y_lagged, y_original, c=colors, cmap="viridis", label="Lag Plot"
)
fig.colorbar(scatter, ax=ax, label="Index")
ax.set_xlabel(rf"Y$_{{i-{lag}}}$")
ax.set_ylabel("Y$_{i}$")
ax.set_title(f"Lag Plot (lag={lag})")
fig.tight_layout()
return fig, ax
[docs]
def histogram(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
bins: int | str = "auto",
) -> tuple[Figure, Axes]:
"""Creates a histogram of the data.
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
bins: Number of bins or bin strategy.
Returns:
The figure and axes containing the plot.
"""
if isinstance(bins, int) and bins <= 0:
msg = f"Number of bins must be positive, got {bins}"
raise ValueError(msg)
fig, ax = get_figure_and_axes(fig, ax)
ax.hist(data.y, bins=bins)
ax.set_xlabel("Value")
ax.set_ylabel("Frequency")
ax.set_title("Histogram of Y values")
fig.tight_layout()
return fig, ax
[docs]
def normal_probability_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
*,
return_rsquared: bool = False,
) -> tuple[Figure, Axes, float | None]:
"""Creates a normal probability plot.
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
return_rsquared: If True, includes R-squared as the third element.
Returns:
(fig, ax, r_squared) where r_squared is None if return_rsquared
is False.
"""
fig, ax = get_figure_and_axes(fig, ax)
_, (_, _, r_value) = sp.stats.probplot(
data.y, dist="norm", plot=ax, rvalue=True
)
ax.set_ylabel(_ORDERED_VALUES)
ax.set_xlabel("Theoretical Quantiles")
ax.set_title("Normal Probability Plot")
fig.tight_layout()
r_squared = r_value**2 if return_rsquared else None
return fig, ax, r_squared
[docs]
def four_plot(
data: EDAData,
fig: Figure | None = None,
axes: np.ndarray | None = None,
) -> tuple[Figure, np.ndarray]:
"""Creates a 4-plot (run sequence, lag, histogram, normal probability).
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
axes: ndarray of Axes with shape (2, 2). If None, creates new axes.
Returns:
(fig, axes_flat) where axes_flat has shape (4,).
"""
if fig is None and axes is None:
fig, axes = plt.subplots(2, 2)
elif axes is None:
axes = fig.subplots(2, 2)
elif fig is None:
fig = axes.flat[0].get_figure()
if axes.shape != (2, 2):
msg = "Axes must be an iterable of (2, 2) Axes objects."
raise ValueError(msg)
axes = axes.flatten()
run_sequence_plot(data, fig, axes[0])
lag_plot(data, fig, axes[1])
histogram(data, fig, axes[2])
normal_probability_plot(data, fig, axes[3])
return fig, axes
[docs]
def ppcc_plot( # noqa: PLR0913
data: EDAData,
fig: Figure | None = None,
ax: np.ndarray | None = None,
rough_range: tuple[float, float] = (-2, 2),
n_rough: int = 50,
n_fine: int = 100,
) -> tuple[Figure, np.ndarray]:
"""Creates a PPCC plot (rough + fine) for distribution shape estimation.
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
ax: ndarray of Axes with shape (2,). If None, creates new axes.
rough_range: (min, max) range for rough search.
n_rough: Points in rough plot.
n_fine: Points in fine plot.
Returns:
(fig, axes) where axes has shape (2,).
"""
if len(rough_range) != 2: # noqa: PLR2004
msg = "Rough range must contain exactly 2 elements"
raise ValueError(msg)
if rough_range[0] >= rough_range[1]:
msg = "Rough range must be (min, max) with min < max"
raise ValueError(msg)
if n_rough <= 0:
msg = "Number of points must be positive"
raise ValueError(msg)
if n_fine <= 0:
msg = "Number of points must be positive"
raise ValueError(msg)
if fig is None and ax is None:
fig, ax = plt.subplots(1, 2)
elif ax is None:
ax = fig.subplots(1, 2)
elif fig is None:
fig = ax.flat[0].get_figure()
if ax.shape != (2,):
msg = "Axes must be an iterable of 2 Axes objects."
raise ValueError(msg)
rough_shape_values, rough_ppcc = sp.stats.ppcc_plot(
data.y,
rough_range[0],
rough_range[1],
N=n_rough,
plot=ax[0],
)
rough_max_index = np.argmax(rough_ppcc)
fine_shape_values, fine_ppcc = sp.stats.ppcc_plot(
data.y,
rough_shape_values[rough_max_index] - 0.5,
rough_shape_values[rough_max_index] + 0.5,
N=n_fine,
plot=ax[1],
)
fine_max_index = np.argmax(fine_ppcc)
max_shape = rough_shape_values[rough_max_index]
ax[0].vlines(
max_shape,
0,
rough_ppcc[rough_max_index],
color="r",
label=f"Max PPCC at shape={max_shape:.3g}",
)
ax[0].legend()
max_shape_fine = fine_shape_values[fine_max_index]
ax[1].axvline(
max_shape_fine,
color="r",
label=f"Max PPCC at shape={max_shape_fine:.3g}",
)
ax[1].legend()
ax[0].set_title("Rough PPCC Plot")
ax[1].set_title("Fine PPCC Plot")
fig.tight_layout()
return fig, ax
[docs]
def weibull_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
) -> tuple[Figure, Axes]:
"""Creates a Weibull probability plot.
Args:
data: EDAData container. Requires y > 0.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
Returns:
The figure and axes containing the plot.
"""
if not np.all(data.y > 0):
msg = "Weibull plot requires all y values to be positive."
raise ValueError(msg)
fig, ax = get_figure_and_axes(fig, ax)
n = len(data.y)
p = (np.arange(1, n + 1) - 0.3) / (n + 0.4)
weibull_probabilities = np.log(-np.log(1 - p))
ordered_data = np.log(np.sort(data.y))
model = LinearModel()
params = model.make_params(intercept=0, slope=1)
result = model.fit(weibull_probabilities, params, x=ordered_data)
shape = result.params["slope"].value
intercept = result.params["intercept"].value
scale = np.exp(-intercept / shape)
x_fit = np.linspace(min(ordered_data), max(ordered_data), 100)
y_fit = intercept + shape * x_fit
ax.plot(ordered_data, weibull_probabilities, ".", label="Data")
ax.plot(
x_fit,
y_fit,
"-",
label=f"Fit with R$^2$={result.rsquared:.3g}",
)
ax.set_xlabel(_ORDERED_VALUES)
ax.set_ylabel("Theoretical Quantiles (Weibull)")
ax.set_title("Weibull Probability Plot")
min_percentage = np.floor(
np.log10(1 - np.exp(np.exp(min(weibull_probabilities)) * -1)),
)
percentages = np.concatenate(
(
np.logspace(min_percentage - 1, -1, num=int(-min_percentage) + 1),
[0.5, 0.9, 0.99],
),
)
ax.set_yticks(
np.log(-np.log(1 - percentages)),
labels=[f"{p * 100:.3g}" for p in percentages],
)
ax.axhline(np.log(-np.log(1 - 0.632)), color="black", linestyle="--")
ax.axvline(np.log(scale), color="black", linestyle="--")
ax.legend()
fig.tight_layout()
return fig, ax
[docs]
def probability_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
distribution: str = "norm",
) -> tuple[Figure, Axes]:
"""Creates a probability plot for a specified distribution.
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
distribution: scipy.stats distribution name.
Returns:
The figure and axes containing the plot.
"""
fig, ax = get_figure_and_axes(fig, ax)
sp.stats.probplot(data.y, dist=distribution, plot=ax)
ax.set_ylabel(_ORDERED_VALUES)
ax.set_xlabel("Theoretical Quantiles")
ax.set_title(f"{distribution.capitalize()} Probability Plot")
fig.tight_layout()
return fig, ax
[docs]
def box_cox_normality_plot(
data: EDAData,
fig: Figure | None = None,
axes: np.ndarray | None = None,
) -> tuple[Figure, np.ndarray]:
"""Creates a Box-Cox normality plot (2x2 grid).
Shows: original histogram, Box-Cox normality curve, transformed
histogram, normal probability plot of transformed data.
Args:
data: EDAData container. Requires y > 0.
fig: Matplotlib figure. If None, creates new figure.
axes: ndarray of Axes with shape (2, 2). If None, creates new.
Returns:
(fig, axes) where axes has shape (2, 2).
"""
if not np.all(data.y > 0):
msg = "Box-Cox transformation requires all y values to be positive."
raise ValueError(msg)
if fig is None and axes is None:
fig, axes = plt.subplots(2, 2)
elif axes is None:
axes = fig.subplots(2, 2)
elif fig is None:
fig = axes.flat[0].get_figure()
if axes.shape != (2, 2):
msg = "Axes must be an iterable of (2, 2) Axes objects."
raise ValueError(msg)
sp.stats.boxcox_normplot(data.y, -2, 2, plot=axes[0, 1], N=200)
transformed_y, maxlog = sp.stats.boxcox(data.y)
axes[0, 0].hist(data.y, bins="auto")
axes[0, 0].set_xlabel("Value")
axes[0, 0].set_ylabel("Frequency")
axes[0, 0].set_title("Histogram of Original (positive shifted) Y values")
axes[0, 1].set_title("Box-Cox Normality Plot")
axes[0, 1].axvline(
maxlog, color="r", linestyle="--", label=f"Max log={maxlog:.3g}"
)
axes[0, 1].legend()
axes[1, 0].hist(transformed_y, bins="auto")
axes[1, 0].set_xlabel("Value")
axes[1, 0].set_ylabel("Frequency")
axes[1, 0].set_title("Histogram of transformed Y values")
sp.stats.probplot(transformed_y, dist="norm", plot=axes[1, 1], rvalue=True)
axes[1, 1].set_title("Normal Probability Plot of Transformed Data")
fig.tight_layout()
return fig, axes
[docs]
def bootstrap_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
statistic: Callable = np.mean,
n_bootstrap: int = 10000,
) -> tuple[Figure, Axes]:
"""Creates a bootstrap distribution plot.
Args:
data: EDAData container. Requires y.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
statistic: Callable to bootstrap. Must be callable.
n_bootstrap: Number of bootstrap resamples.
Returns:
The figure and axes containing the plot.
"""
if not callable(statistic):
msg = "Statistic must be callable"
raise TypeError(msg)
if n_bootstrap <= 0:
msg = "Number of bootstrap samples must be positive"
raise ValueError(msg)
fig, ax = get_figure_and_axes(fig, ax)
result = sp.stats.bootstrap((data.y,), statistic, n_resamples=n_bootstrap)
variability_label = (
f"Variability of {statistic.__name__}: {result.standard_error:.3g}"
)
ax.hist(
result.bootstrap_distribution,
bins="auto",
density=True,
label=variability_label,
)
ax.set_xlabel(f"Bootstrap {statistic.__name__} values")
ax.set_ylabel("Density")
ax.set_title("Bootstrap Distribution")
ax.legend()
fig.tight_layout()
return fig, ax
[docs]
def box_cox_linearity_plot(
data: EDAData,
fig: Figure | None = None,
ax: Axes | None = None,
lmbda_range: tuple[float, float] = (-2, 2),
n: int = 100,
) -> tuple[Figure, Axes]:
"""Creates a Box-Cox linearity plot.
Plots ``abs(corr(Y, X^λ))`` across a range of λ values to find the
power transformation of X that maximises linearity with Y.
Args:
data: EDAData container. Requires x > 0.
fig: Matplotlib figure. If None, creates new figure.
ax: Matplotlib axes. If None, creates new axes.
lmbda_range: (min, max) range of λ to evaluate.
n: Number of λ values to evaluate.
Returns:
The figure and axes containing the plot.
"""
if data.x is None:
msg = "box_cox_linearity_plot requires x"
raise ValueError(msg)
if not np.all(data.x > 0):
msg = "box_cox_linearity_plot requires all x values to be positive"
raise ValueError(msg)
fig, ax = get_figure_and_axes(fig, ax)
lambdas = np.linspace(lmbda_range[0], lmbda_range[1], n)
correlations = np.array(
[
abs(
np.corrcoef(
np.log(data.x)
if lmbda == 0
else (data.x**lmbda - 1) / lmbda,
data.y,
)[0, 1]
)
for lmbda in lambdas
]
)
optimal_lambda = lambdas[np.argmax(correlations)]
ax.plot(lambdas, correlations)
ax.axvline(
optimal_lambda,
color="r",
linestyle="--",
label=f"Optimal λ={optimal_lambda:.3g}",
)
ax.set_xlabel("λ (power transformation)")
ax.set_ylabel("|Correlation(Y, X^λ)|")
ax.set_title("Box-Cox Linearity Plot")
ax.legend()
fig.tight_layout()
return fig, ax