Code for comparing models of blackbody radiation#

\(\require{mathtools} \newcommand{\notag}{} \newcommand{\tag}{} \newcommand{\label}[1]{} \newcommand{\sfrac}[2]{#1/#2} \newcommand{\bm}[1]{\boldsymbol{#1}} \newcommand{\num}[1]{#1} \newcommand{\qty}[2]{#1\,#2} \renewenvironment{align} {\begin{aligned}} {\end{aligned}} \renewenvironment{alignat} {\begin{alignedat}} {\end{alignedat}} \newcommand{\pdfmspace}[1]{} % Ignore PDF-only spacing commands \newcommand{\htmlmspace}[1]{\mspace{#1}} % Ignore PDF-only spacing commands \newcommand{\scaleto}[2]{#1} % Allow to use scaleto from scalerel package \newcommand{\RR}{\mathbb R} \newcommand{\NN}{\mathbb N} \newcommand{\PP}{\mathbb P} \newcommand{\EE}{\mathbb E} \newcommand{\XX}{\mathbb X} \newcommand{\ZZ}{\mathbb Z} \newcommand{\QQ}{\mathbb Q} \newcommand{\fF}{\mathcal F} \newcommand{\dD}{\mathcal D} \newcommand{\lL}{\mathcal L} \newcommand{\gG}{\mathcal G} \newcommand{\hH}{\mathcal H} \newcommand{\nN}{\mathcal N} \newcommand{\pP}{\mathcal P} \newcommand{\BB}{\mathbb B} \newcommand{\Exp}{\operatorname{Exp}} \newcommand{\Binomial}{\operatorname{Binomial}} \newcommand{\Poisson}{\operatorname{Poisson}} \newcommand{\linop}{\mathcal{L}(\mathbb{B})} \newcommand{\linopell}{\mathcal{L}(\ell_1)} \DeclareMathOperator{\trace}{trace} \DeclareMathOperator{\Var}{Var} \DeclareMathOperator{\Span}{span} \DeclareMathOperator{\proj}{proj} \DeclareMathOperator{\col}{col} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\gt}{>} \definecolor{highlight-blue}{RGB}{0,123,255} % definition, theorem, proposition \definecolor{highlight-yellow}{RGB}{255,193,7} % lemma, conjecture, example \definecolor{highlight-orange}{RGB}{253,126,20} % criterion, corollary, property \definecolor{highlight-red}{RGB}{220,53,69} % criterion \newcommand{\logL}{\ell} \newcommand{\eE}{\mathcal{E}} \newcommand{\oO}{\mathcal{O}} \newcommand{\defeq}{\stackrel{\mathrm{def}}{=}} \newcommand{\Bspec}{\mathcal{B}} % Spectral radiance \newcommand{\X}{\mathcal{X}} % X space \newcommand{\Y}{\mathcal{Y}} % Y space \newcommand{\M}{\mathcal{M}} % Model \newcommand{\Tspace}{\mathcal{T}} \newcommand{\Vspace}{\mathcal{V}} \newcommand{\Mtrue}{\mathcal{M}_{\mathrm{true}}} \newcommand{\MP}{\M_{\mathrm{P}}} \newcommand{\MRJ}{\M_{\mathrm{RJ}}} \newcommand{\qproc}{\mathfrak{Q}} \newcommand{\D}{\mathcal{D}} % Data (true or generic) \newcommand{\Dt}{\tilde{\mathcal{D}}} \newcommand{\Phit}{\widetilde{\Phi}} \newcommand{\Phis}{\Phi^*} \newcommand{\qt}{\tilde{q}} \newcommand{\qs}{q^*} \newcommand{\qh}{\hat{q}} \newcommand{\AB}[1]{\mathtt{AB}~\mathtt{#1}} \newcommand{\LP}[1]{\mathtt{LP}~\mathtt{#1}} \newcommand{\NML}{\mathrm{NML}} \newcommand{\iI}{\mathcal{I}} \newcommand{\true}{\mathrm{true}} \newcommand{\dist}{D} \newcommand{\Mtheo}[1]{\mathcal{M}_{#1}} % Model (theoretical model); index: param set \newcommand{\DL}[1][L]{\mathcal{D}^{(#1)}} % Data (RV or generic) \newcommand{\DLp}[1][L]{\mathcal{D}^{(#1')}} % Data (RV or generic) \newcommand{\DtL}[1][L]{\tilde{\mathcal{D}}^{(#1)}} % Data (RV or generic) \newcommand{\DpL}[1][L]{{\mathcal{D}'}^{(#1)}} % Data (RV or generic) \newcommand{\Dobs}[1][]{\mathcal{D}_{\mathrm{obs}}^{#1}} % Data (observed) \newcommand{\calibset}{\mathcal{C}} \newcommand{\N}{\mathcal{N}} % Normal distribution \newcommand{\Z}{\mathcal{Z}} % Partition function \newcommand{\VV}{\mathbb{V}} % Variance \newcommand{\T}{\mathsf{T}} % Transpose \newcommand{\EMD}{\mathrm{EMD}} \newcommand{\dEMD}{d_{\mathrm{EMD}}} \newcommand{\dEMDtilde}{\tilde{d}_{\mathrm{EMD}}} \newcommand{\dEMDsafe}{d_{\mathrm{EMD}}^{\text{(safe)}}} \newcommand{\e}{ε} % Model confusion threshold \newcommand{\falsifythreshold}{ε} \newcommand{\bayes}[1][]{B_{#1}} \newcommand{\bayesthresh}[1][]{B_{0}} \newcommand{\bayesm}[1][]{B^{\mathcal{M}}_{#1}} \newcommand{\bayesl}[1][]{B^l_{#1}} \newcommand{\bayesphys}[1][]{B^{{p}}_{#1}} \newcommand{\Bconf}[1]{B^{\mathrm{epis}}_{#1}} \newcommand{\Bemd}[1]{B^{\mathrm{EMD}}_{#1}} \newcommand{\Bconfbin}[1][]{\bar{B}^{\mathrm{conf}}_{#1}} \newcommand{\Bemdbin}[1][]{\bar{B}_{#1}^{\mathrm{EMD}}} \newcommand{\bin}{\mathcal{B}} \newcommand{\Bconft}[1][]{\tilde{B}^{\mathrm{conf}}_{#1}} \newcommand{\fc}{f_c} \newcommand{\fcbin}{\bar{f}_c} \newcommand{\paramphys}[1][]{Θ^{{p}}_{#1}} \newcommand{\paramobs}[1][]{Θ^{ε}_{#1}} \newcommand{\test}{\mathrm{test}} \newcommand{\train}{\mathrm{train}} \newcommand{\synth}{\mathrm{synth}} \newcommand{\rep}{\mathrm{rep}} \newcommand{\MNtrue}{\mathcal{M}^{{p}}_{\text{true}}} \newcommand{\MN}[1][]{\mathcal{M}^{{p}}_{#1}} \newcommand{\MNA}{\mathcal{M}^{{p}}_{Θ_A}} \newcommand{\MNB}{\mathcal{M}^{{p}}_{Θ_B}} \newcommand{\Me}[1][]{\mathcal{M}^ε_{#1}} \newcommand{\Metrue}{\mathcal{M}^ε_{\text{true}}} \newcommand{\Meobs}{\mathcal{M}^ε_{\text{obs}}} \newcommand{\Meh}[1][]{\hat{\mathcal{M}}^ε_{#1}} \newcommand{\MNa}{\mathcal{M}^{\mathcal{N}}_a} \newcommand{\MeA}{\mathcal{M}^ε_A} \newcommand{\MeB}{\mathcal{M}^ε_B} \newcommand{\Ms}{\mathcal{M}^*} \newcommand{\MsA}{\mathcal{M}^*_A} \newcommand{\MsB}{\mathcal{M}^*_B} \newcommand{\Msa}{\mathcal{M}^*_a} \newcommand{\MsAz}{\mathcal{M}^*_{A,z}} \newcommand{\MsBz}{\mathcal{M}^*_{B,z}} \newcommand{\Msaz}{\mathcal{M}^*_{a,z}} \newcommand{\MeAz}{\mathcal{M}^ε_{A,z}} \newcommand{\MeBz}{\mathcal{M}^ε_{B,z}} \newcommand{\Meaz}{\mathcal{M}^ε_{a,z}} \newcommand{\zo}{z^{0}} \renewcommand{\lL}[2][]{\mathcal{L}_{#1|{#2}}} % likelihood \newcommand{\Lavg}[2][]{\mathcal{L}^{/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLphys}[2][]{\mathcal{L}^{{p}}_{#1|#2}} \newcommand{\Lavgphys}[2][]{\mathcal{L}^{{p}/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLL}[3][]{\mathcal{L}^{(#3)}_{#1|#2}} \newcommand{\lLphysL}[3][]{\mathcal{L}^{{p},(#3)}_{#1|#2}} \newcommand{\lnL}[2][]{l_{#1|#2}} % Per-sample log likelihood \newcommand{\lnLt}[2][]{\widetilde{l}_{#1|#2}} \newcommand{\lnLtt}{\widetilde{l}} % Used only in path_sampling \newcommand{\lnLh}[1][]{\hat{l}_{#1}} \newcommand{\lnLphys}[2][]{l^{{p}}_{#1|#2}} \newcommand{\lnLphysL}[3][]{l^{{p},(#3)}_{#1|#2}} \newcommand{\Elmu}[2][1]{μ_{{#2}}^{(#1)}} \newcommand{\Elmuh}[2][1]{\hat{μ}_{{#2}}^{(#1)}} \newcommand{\Elsig}[2][1]{Σ_{{#2}}^{(#1)}} \newcommand{\Elsigh}[2][1]{\hat{Σ}_{{#2}}^{(#1)}} \newcommand{\pathP}{\mathop{{p}}} % Path-sampling process (generic) \newcommand{\pathPhb}{\mathop{{p}}_{\mathrm{Beta}}} % Path-sampling process (hierarchical beta) \newcommand{\interval}{\mathcal{I}} \newcommand{\Phiset}[1]{\{\Phi\}^{\small (#1)}} \newcommand{\Phipart}[1]{\{\mathcal{I}_Φ\}^{\small (#1)}} \newcommand{\qhset}[1]{\{\qh\}^{\small (#1)}} \newcommand{\Dqpart}[1]{\{Δ\qh_{2^{#1}}\}} \newcommand{\LsAzl}{\mathcal{L}_{\smash{{}^{\,*}_A},z,L}} \newcommand{\LsBzl}{\mathcal{L}_{\smash{{}^{\,*}_B},z,L}} \newcommand{\lsA}{l_{\smash{{}^{\,*}_A}}} \newcommand{\lsB}{l_{\smash{{}^{\,*}_B}}} \newcommand{\lsAz}{l_{\smash{{}^{\,*}_A},z}} \newcommand{\lsAzj}{l_{\smash{{}^{\,*}_A},z_j}} \newcommand{\lsAzo}{l_{\smash{{}^{\,*}_A},z^0}} \newcommand{\leAz}{l_{\smash{{}^{\,ε}_A},z}} \newcommand{\lsAez}{l_{\smash{{}^{*ε}_A},z}} \newcommand{\lsBz}{l_{\smash{{}^{\,*}_B},z}} \newcommand{\lsBzj}{l_{\smash{{}^{\,*}_B},z_j}} \newcommand{\lsBzo}{l_{\smash{{}^{\,*}_B},z^0}} \newcommand{\leBz}{l_{\smash{{}^{\,ε}_B},z}} \newcommand{\lsBez}{l_{\smash{{}^{*ε}_B},z}} \newcommand{\LaszL}{\mathcal{L}_{\smash{{}^{*}_a},z,L}} \newcommand{\lasz}{l_{\smash{{}^{*}_a},z}} \newcommand{\laszj}{l_{\smash{{}^{*}_a},z_j}} \newcommand{\laszo}{l_{\smash{{}^{*}_a},z^0}} \newcommand{\laez}{l_{\smash{{}^{ε}_a},z}} \newcommand{\lasez}{l_{\smash{{}^{*ε}_a},z}} \newcommand{\lhatasz}{\hat{l}_{\smash{{}^{*}_a},z}} \newcommand{\pasz}{p_{\smash{{}^{*}_a},z}} \newcommand{\paez}{p_{\smash{{}^{ε}_a},z}} \newcommand{\pasez}{p_{\smash{{}^{*ε}_a},z}} \newcommand{\phatsaz}{\hat{p}_{\smash{{}^{*}_a},z}} \newcommand{\phateaz}{\hat{p}_{\smash{{}^{ε}_a},z}} \newcommand{\phatseaz}{\hat{p}_{\smash{{}^{*ε}_a},z}} \newcommand{\Phil}[2][]{Φ_{#1|#2}} % Φ_{\la} \newcommand{\Philt}[2][]{\widetilde{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philhat}[2][]{\hat{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philsaz}{Φ_{\smash{{}^{*}_a},z}} % Φ_{\lasz} \newcommand{\Phileaz}{Φ_{\smash{{}^{ε}_a},z}} % Φ_{\laez} \newcommand{\Philseaz}{Φ_{\smash{{}^{*ε}_a},z}} % Φ_{\lasez} \newcommand{\mus}[1][1]{μ^{(#1)}_*} \newcommand{\musA}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\SigsA}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\musB}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\SigsB}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\musa}[1][1]{μ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Sigsa}[1][1]{Σ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Msah}{{\color{highlight-red}\mathcal{M}^{*}_a}} \newcommand{\Msazh}{{\color{highlight-red}\mathcal{M}^{*}_{a,z}}} \newcommand{\Meah}{{\color{highlight-blue}\mathcal{M}^{ε}_a}} \newcommand{\Meazh}{{\color{highlight-blue}\mathcal{M}^{ε}_{a,z}}} \newcommand{\lsazh}{{\color{highlight-red}l_{\smash{{}^{*}_a},z}}} \newcommand{\leazh}{{\color{highlight-blue}l_{\smash{{}^{ε}_a},z}}} \newcommand{\lseazh}{{\color{highlight-orange}l_{\smash{{}^{*ε}_a},z}}} \newcommand{\Philsazh}{{\color{highlight-red}Φ_{\smash{{}^{*}_a},z}}} % Φ_{\lasz} \newcommand{\Phileazh}{{\color{highlight-blue}Φ_{\smash{{}^{ε}_a},z}}} % Φ_{\laez} \newcommand{\Philseazh}{{\color{highlight-orange}Φ_{\smash{{}^{*ε}_a},z}}} % Φ_{\lasez} \newcommand{\emdstd}{\tilde{σ}} \DeclareMathOperator{\Mvar}{Mvar} \DeclareMathOperator{\AIC}{AIC} \DeclareMathOperator{\epll}{epll} \DeclareMathOperator{\elpd}{elpd} \DeclareMathOperator{\MDL}{MDL} \DeclareMathOperator{\comp}{COMP} \DeclareMathOperator{\Lognorm}{Lognorm} \DeclareMathOperator{\erf}{erf} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator{\Image}{Image} \DeclareMathOperator{\sgn}{sgn} \DeclareMathOperator{\SE}{SE} % standard error \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\SkewNormal}{SkewNormal} \DeclareMathOperator{\TruncNormal}{TruncNormal} \DeclareMathOperator{\Exponential}{Exponential} \DeclareMathOperator{\exGaussian}{exGaussian} \DeclareMathOperator{\IG}{IG} \DeclareMathOperator{\NIG}{NIG} \DeclareMathOperator{\Gammadist}{Gamma} \DeclareMathOperator{\Lognormal}{Lognormal} \DeclareMathOperator{\Beta}{Beta} \newcommand{\sinf}{{s_{\infty}}}\)

Hide code cell source
import logging
import time
import numpy as np
import pandas as pd
import pint
import holoviews as hv

from numpy.typing import ArrayLike
from scipy import stats
from scipy import optimize

from collections import OrderedDict, namedtuple
from collections.abc import Callable
from dataclasses import dataclass, replace
from functools import cache, lru_cache, wraps, partial
from pathlib import Path
from types import SimpleNamespace
from typing import Literal
from warnings import filterwarnings, catch_warnings

from tqdm.notebook import tqdm
from addict import Dict
from joblib import Memory
from scityping.base import Dataclass
from scityping.functions import PureFunction
from scityping.numpy import RNGenerator
from scityping.scipy import Distribution
from scityping.pint import PintQuantity

logger = logging.getLogger(__name__)

EMD imports

import emdcmp as emd
import emdcmp.tasks
import emdcmp.viz

Project imports

import utils
import viz

Configuration#

The project configuration object allows to modify not just configuration options for this project, but also exposes the config objects dependencies which use ValConfig.

from config import config

Dimensions#

Hide code cell source
ureg = pint.get_application_registry()
ureg.default_format = "~P"
if "photons" not in ureg: ureg.define("photons = count")
Q_  = pint.Quantity
K   = ureg.K
μm  = ureg.μm
nm  = ureg.nm
kW  = ureg.kW
c   = ureg.c
h   = ureg.planck_constant
kB  = ureg.boltzmann_constant
photons = ureg.photons

Bunits = ureg.kW / ureg.steradian / ureg.m**2 / ureg.nm
sunits = photons / Bunits

Notebook parameters#

# Dataset sizes
L_small    = 2**9     # 512
L_med      = 2**12    # 4096
L_large    = 2**15    # 32768  –  Unecessarily large for most cases.
                      # Used only to check that a criterion saturates.
L_synth    = 2**12    # Dataset size for constructing empirical PPFs.
                      # Generally this is cheap, so we want to make this large enough
                      # to make numerical errors in the PPF negligible.
# Dataset parameters
data_λ_min   = 15*μm
#data_λ_min   = 20*μm
data_λ_max   = 30*μm
data_T       = 4000*K
data_noise_s = 1e5 * sunits  # Determines variance of the Poisson noise
                             # (Inverse proportional to std dev)
data_B0      = 2.5e-6 * Bunits

# Fitting parameters (used for the illustrative temperature fits)
Nfits = 64

Plotting#

Hide code cell source
@dataclass
class colors(viz.ColorScheme):
    scale  : str = "#222222"
    data   : str = "#b2b2b2"  # Greyscale version of "high-constrast:yellow"
    RJ     : str = config.figures.colors["high-contrast"].red
    Planck : str = config.figures.colors["high-contrast"].blue
    calib_curves: hv.Palette = hv.Palette("YlOrBr", range=(0.1, .65), reverse=True)

@dataclass
class color_labels:
    data   : str = "grey"
    RJ     : str = "red"
    Planck : str = "blue"

dash_patterns = ["dotted", "dashed", "solid"]
sanitize = hv.core.util.sanitize_identifier_fn
hv.extension("matplotlib", "bokeh")
colors
scaledataRJPlanckcalib_curves

Model definition#

Generative physical model#

To compare candidate models using the EMD criterion, we need three things:

  • A generative model for each candidate, also called a “forward model”.

  • A risk function for each candidate. When available, the negative log likelihood is often a good choice.

  • A data generative process. This may be an actual experiment, or a simulated experiment as we do here. In the case of a simulated experiment, we may use the shorthand “true model” for this, but it is important to remember that it is different from the candidate models.

Important

The true model is treated as a black box, because in real applications we don’t have access to it. We have no equations for the true model – only data. Consequently, there are no “true parameters” against which to compare, nor is the notion of “true risk” even defined. The only thing we can do with the true model is request new data.

Nevertheless, in the case of simulated experiments where we actually do have equations for the true model, it is still useful as a sanity check to compare parameters of the true and candidate models. However such a comparison cannot be part of any quantitative assessment.

Model signatures & implementation#

  • The data generating model should take one argument – an integer \(L\) – and return a dataset with \(L\) samples:

\[\begin{split}\begin{aligned} \texttt{data-model}&:& L &\mapsto \begin{cases} \bigl[(x_1, y_1), (x_2, y_2), \dotsc, (x_L, y_L)\bigr] \\ (xy)_1, (xy)_2, \dotsc, (xy)_L \end{cases} \end{aligned}\end{split}\]

Note

A process with no independent variable (like drawing from a static distribution) is equivalent to the \((xy)_i\) format with a dummy value for \(x\).

This dataset will normally be composed of pairs of independent (\(x\)) and dependent (\(y\)) variables. Whether these are returned as separate values \((x,y)\) or combined \((xy)\) is up to user preference.

  • The candidate models attempt to predict the set of \(\{y_i\}\) from the set of \(\{x_i\}\); we denote these predictions \(\{\hat{y}_i\}\). Therefore they normally take one of the following forms:

\[\begin{split}\begin{aligned} \texttt{candidate-model}&:& \{(x_i,y_i)\} &\mapsto \{\hat{y}_i\} \\ \texttt{candidate-model}&:& \bigl\{(xy)_i\bigr\} &\mapsto \{\hat{y}_i\} \end{aligned}\end{split}\]

In addition, they may also accept the simplified form

\[\begin{aligned} \texttt{candidate-model}&:& \{x_i\} &\mapsto \{\hat{y}_i\} \,. \end{aligned}\]

This can be done by inspecting the argument, to see if it matches the form \(\{(x_i,y_i)\}\) or \(\{x_i\}\).

Important

The output of the data model must be a valid input for a candidate model. Generally the latter will disregard the \(y_i\) component when making its prediction \(\hat{y}_i\). (There may be cases where \(y_i\) is actually used, for example if we want to test models based on their one-step-ahead prediction.)

Important

Our implementation assumes that a) any arbitrary number of samples \(L\) can be requested, and b) that samples are all of equivalent quality.

For example, with data provided by solving an ODE, the correct way to increase the number of samples is to keep the time step \(Δt\) fixed and to increase the integration time. Decreasing \(Δt\) would NOT work: altough it results in more samples, they are also more correlated, and thus of “lesser quality”.

In contrast, for a static relationship like the radiance of a black body, then we do want to keep the same upper and lower bounds but increase the density of points within those bounds. (Extending the bounds then would mean testing a different physical regime.) For static relationships, points can be generated independently, so generating more points within the same interval does increase statistical power.

Note

In theory data and candidate models can also be defined using only plain functions, but classes make it easier to control how the model is pickled. (Pickling is used to send data to multiprocessing subthreads.)

def BRayleighJeans(λ, T, c=c, kB=kB): return 2*c*kB*T/λ**4
def BPlanck(λ, T, h=h, c=c, kB=kB): return 2*h*c**2/λ**5 / ( np.exp(h*c/(λ*kB*T)) - 1 )

phys_models = {
    "Rayleigh-Jeans":BRayleighJeans,
    "Planck"        :BPlanck
}
@dataclass(frozen=True)
class DataModel:
    T         : PintQuantity    # units: K
    λ_min     : PintQuantity    # units: nm
    λ_max     : PintQuantity    # units: nm
    phys_model: Literal["Rayleigh-Jeans", "Planck"]
    
    def __call__(self, L: int, rng=None) -> tuple[ArrayLike, ArrayLike]:
        λarr = np.linspace(self.λ_min, self.λ_max, L)
        # NB: Returning as tuple (rather than 2D array) allows to keep units
        return λarr, phys_models[self.phys_model](λarr, self.T).to(Bunits)

@dataclass(frozen=True)
class CandidateModel:
    """Model variant used to compute candidate predictions given observed data.
    Uses the {x} -> {ŷ} signature. Also accepts {(x,y)} -> {ŷ}.
    """
    phys_model: Literal["Rayleigh-Jeans", "Planck"]
    T         : PintQuantity    # units: K
    
    def __call__(self, λarr: ArrayLike, rng=None) -> tuple[ArrayLike, ArrayLike]:
        if isinstance(λarr, tuple):  # Allow passing output of data directly
            assert len(λarr) == 2, "CandidateModel expects either a single array `λarr`, or a tuple `(λarr, Barr)`."
            λarr = λarr[0]
        return λarr, phys_models[self.phys_model](λarr, self.T).to(Bunits)

Generative observation model#

An observation model is a function which takes both the independent and dependent variables (generically \(x\) and \(y\)), and returns a transformed value \(\tilde{y}\). Processes which are time-independent (such as simple additive Gaussian noise) can simply ignore their \(x\) argument.

The pattern for defining a noise model is a pair of nested functions. The outer function sets the noise parameters, the inner function applies the noise to data:

\[\begin{split}\begin{aligned} &\texttt{def public-noise-name}([arguments...], rng): \\ &\qquad\texttt{def }ξ(x, y, rng=rng): \\ &\qquad\qquad\texttt{return}\; \tilde{y} \\ &\qquad\texttt{return}\; ξ \end{aligned}\end{split}\]

For consistency, noise arguments should always include a random number generator rng, even if the noise is actually deterministic. The value passed as rng should be a Generator instance, as created by numpy.random.default_rng or numpy.random.Generator.

We use two types of noise for this example:

Poisson noise

is used to generate the actual data. (Eq. 2.33)

(1)#\[\tilde{\Bspec} \mid \Bspec \sim \frac{1}{s}\Poisson\bigl(s \, \Bspec\bigr) + \Bspec_0\]
Gaussian noise

is used by the candidate models (since we use squared difference to quantify error)

\[\tilde{\Bspec} \mid \Bspec \sim \nN\bigl(\Bspec + \Bspec_0, σ^2\bigr)\]
@dataclass(frozen=True)
class poisson_noise:
    label="poisson"
    B0 : PintQuantity
    s  : PintQuantity
    rng: RNGenerator|None=None
    def __call__(self, λ_B: tuple[ArrayLike, ArrayLike], rng: np.random.Generator=None) -> tuple[ArrayLike, ArrayLike]:
        λ, B = λ_B
        rng = rng or self.rng
        sB = self.s*B
        assert sB.dimensionless; "Dimension error: s*B should be dimensionless"
        assert rng, "No RNG was specified"
        return λ, rng.poisson(sB.m) / self.s + self.B0
        #return xy + self.rv.rvs(size=xy.shape, random_state=rng)

@dataclass(frozen=True)
class gaussian_noise:
    label="gaussian"
    B0 : PintQuantity
    σ  : PintQuantity
    rng: RNGenerator|None=None
    def __call__(self, λ_B: tuple[ArrayLike, ArrayLike], rng: np.random.Generator=None) -> tuple[ArrayLike, ArrayLike]:
        λ, B = λ_B
        rng = rng or self.rng
        assert rng, "No RNG was specified"
        return λ, B + self.B0 + self.σ*rng.normal(0, 1, size=B.shape)

Full data model definition#

Our full data generating model is the Planck model composed with Poisson noise.

Important

The purpose attribute serves as our RNG seed. We take care not to make the RNG itself an attribute: sending RN generators is fragile and memory-hungry, while sending seeds is cheap and easy. Within the Calibrate task, each data model is shipped to an MP subprocess with pickle: by making Dataset callable, we can ship the Dataset instance (which only needs to serialize a seed) instead of the utils.compose instance (which would serialize an RNG)

Hint

utils.compose(g, f) works like \(g \circ f\).

data_model = utils.compose(poisson_noise(s=data_noise_s, B0=data_B0),
                           DataModel(λ_min=data_λ_min, λ_max=data_λ_max, T=data_T, phys_model="Planck"))
Hide code cell source
@dataclass(frozen=True)
class Dataset:
    purpose: str                    # Used to seed the RNG
    L      : int
    λmin   : PintQuantity
    λmax   : PintQuantity
    s      : PintQuantity
    T      : PintQuantity=data_T
    B0     : PintQuantity=0*Bunits
    phys_model: str="Planck"        # Only changed during calibration
    @property
    def rng(self): return utils.get_rng("UV", self.purpose)
    @property
    def data_model(self):
        return utils.compose(
            poisson_noise(s=self.s, B0=self.B0, rng=self.rng),
            DataModel(λ_min=self.λmin, λ_max=self.λmax, T=self.T, phys_model=self.phys_model) )
    @cache
    def get_data(self, rng=None):
        return self.__call__(self.L, rng)
    def __call__(self, L, rng=None):
        return self.data_model(L, rng=(rng or self.rng))

Observed data set#

Using the data model, we generate a unique dataset which simulates the real recorded data. The goal for the rest of this notebook is to compare candidate models against this dataset

λ_data, B_data = data_model(L_med, rng=utils.get_rng("UV", "data"))

Loss function#

Loss functions follow from the observation model assumed by a candidate model. In this example we assume Gaussian observation noise and choose the risk to be the negative log likelihood – so for a model candidate \(a\), observed data point \(\tilde{\Bspec}\) and model prediction \(\hat{\Bspec}\), the risk \(q\) is given by

\[q_a(λ,\tilde{\Bspec}) = -\log p\Bigl(\tilde{\Bspec} \,\Bigm|\, \hat{\Bspec}_{a}(λ; T), σ\Bigr) = \log \sqrt{2π}σ + \frac{(\tilde{\Bspec}-\hat{\Bspec}_{a}(λ; T))^2}{2σ^2} \,.\]

The variables \(T\) and \(σ\) are model parameters – \(T\) is a parameter of the physical model itself, and \(σ\) of the observation model we assumed. To get the fairest comparison between models, we fit \(σ\) to the data.

Important

Observation models – like the Gaussian noise we assume here – are added by necessity: we mostly care about the physical model (here \(\Bspec_{\mathrm{RJ}}\) or \(\Bspec_{\mathrm{P}}\)), but since that doesn’t predict the data perfectly, we need the observation model to account for discrepancies. Therefore, barring a perfect physical model, a candidate model is generally composed of both:

\[\text{candidate model} = \text{physical model} + \text{observation model} \,.\]

(If the inputs to the physical model are also unknown, then the r.h.s. may also have a third input model term.)

The concrete takeaway is that to define the candidate models, we need to determine the value not only of \(T\) but also of \(σ\). Both of these must be fitted.

Important also is to tailor the observation model to the specifics of the system – consider for example the difference between testing a model using 1000 measurements from the same device, versus using 1000 single measurements from 1000 different devices. In the latter case, it might make more sense to use a posterior probability as an observation model.

Loss functions \(q\) typically only require the observed data as argument, so depending whether data_model() returns \(x\) and \(y\) separately as x and y or as single object xy, the signature for \(q\) should be either

q(x, y)

or

q(xy)

Additional parameters can be fixed at definition time.

In this notebook we use the q(x, y) signature.

@dataclass(frozen=True)
class Q:
    """Loss of a model assuming Gaussian observation noise.

    :param:candidate_model: One of the phys_model labels; will be passes `CandidateModel`.
    :param:σ: The standard deviation of the assumed Gaussian noise.
        Lower values mean that deviations between data and model are
        less strongly penalized.
    """
    candidate_model: str
    σ              : float|PintQuantity=1.*Bunits   # Used in fitting function, so must
    T              : float|PintQuantity=data_T      # support plain floats as well

    def __post_init__(self):
        # If parameters were given as plain floats, convert to default units
        if not isinstance(self.σ, pint.Quantity):
            object.__setattr__(self, "σ", self.σ * Bunits)   # IMO bypassing frozen dataclass in __post_init__ is acceptable
        if not isinstance(self.T, pint.Quantity):
            object.__setattr__(self, "T", self.T * ureg.kelvin)
        # We use σ in logpdf, which needs a plain float. Might as well convert it now
        object.__setattr__(self, "σ", self.σ.to(Bunits).magnitude)
    def __call__(self, λ_B):
        λ, Bdata = λ_B
        physmodel = CandidateModel(self.candidate_model, T=self.T)
        _, Btheory = physmodel(λ)
        return -stats.norm.logpdf((Bdata - Btheory).m, loc=0, scale=self.σ)

Candidate models#

As mentioned above, the observation model parameters (here \(σ\)) are also part of the candidate models, so to construct candidate models we also need to set \(σ\). We do this by maximizing the likelihood of \(σ\).

The true observation model is not always the best

It would actually be a poor choice to use the log likelihood of the true Poisson noise to compute the risk: since the Poisson distribution is discrete, any discrepancy between model and data would lead to an infinite risk. In general, even when the data are generated with discrete noise, candidate models must be assessed with a continuous noise model. Likewise, an observation model with finite support (such as a uniform distribution) is often a poor choice, since data points outside that support will also cause the risk to diverge.

On the flip side, some distributions are perfectly fine for fitting but make things more challenging when used to generate data. For example, when estimating the expected negative log likelihood from samples, the number of samples required is about 10x greater if they are generated from a Cauchy compared to a Normal distribution.

Hint

Fitting \(\log σ\) and \(\log T\) is justified on ontological grounds: they are positive quantities, and we are fitting their scale as much as we are their precise value. But fitting logarithms is hugely beneficial for numerical stability.

For similar reasons, we use priors to regularize the fit and prevent runoff towards values like 5 K or \(10^{10} K\). This is also easily justified by the fact that an experimenter would have some idea of the temperature of their black body source and noise of their device.

Hide code cell source
FitResult = namedtuple("FitResult", ["σ", "T"])
def fit_gaussian_σT(data) -> Dict[str, FitResult]:
    """
    The candidate models depend on the temperature T and use a Gaussian
    observation model with std dev σ to compute their risk.
    Both T and σ are chosen by maximizing the likelihood.
    """
    
    fitted_σT = Dict()

    log2_T0    = np.log2(data_T.m)
    priorT_std = 12  # |log₂ 4000 - log₂ 5000| ≈ 12
    def f(log2_σT, candidate_model, _λ_B):
        σ, T = 2**log2_σT
        risk = Q(candidate_model, σ, T=T)(_λ_B).mean()
        priorσ = 2**(-log2_σT[0]/128)  # Soft floor on σ so it cannot go too low
        priorT = (log2_σT[1] - log2_T0)**2 / (2*priorT_std**2)  
        return risk + priorσ + priorT
    
    res = optimize.minimize(f, np.log2([1e-4, data_T.m]), ("Rayleigh-Jeans", data), tol=1e-5)
    σ, T = 2**res.x
    fitted_σT.RJ = FitResult(σ*Bunits, T*K)
    
    res = optimize.minimize(f, np.log2([1e-4, data_T.m]), ("Planck", data))
    σ, T = 2**res.x
    fitted_σT.Planck = FitResult(σ*Bunits, T*K)

    return fitted_σT
fitted = fit_gaussian_σT((λ_data, B_data))
candidate_models = Dict({
    "Rayleigh-Jeans": utils.compose(gaussian_noise(0, fitted.RJ.σ), 
                                    CandidateModel("Rayleigh-Jeans", T=fitted.RJ.T)),
    "Planck": utils.compose(gaussian_noise(0, fitted.Planck.σ),
                            CandidateModel("Planck", T=fitted.Planck.T))
})
Qrisk = Dict({
    "Rayleigh-Jeans": Q(candidate_model="Rayleigh-Jeans", σ=fitted.RJ.σ),
    "Planck"        : Q(candidate_model="Planck",         σ=fitted.Planck.σ)
})
We could alternatively use moment matching, which yields similar results.

In that case, since

\[\operatorname{Var}\biggl[\frac{1}{s} \Poisson(s \Bspec) \biggr] = \frac{\Bspec}{s} \,,\]

we would either set \(σ\) to

\[σ = \sqrt{\frac{\langle \Bspec \rangle_{\text{data}}}{s}} \approx 3\times 10^{-5} \mathrm{kW}/\mathrm{m}^2 \cdot \mathrm{nm} \cdot \mathrm{sr}\]

or

\[σ(\Bspec) = \sqrt{\frac{\Bspec}{s}} \approx 3\times 10^{-5} \mathrm{kW}/\mathrm{m}^2 \cdot \mathrm{nm} \cdot \mathrm{sr} \,.\]

The first expression gives very similar results than the maximum likelihood fit. The second will remain more accurate further from the data used for fitting. In both cases of course this presumes that we know the parameters of the data generation process.

σ = np.sqrt(B_data.mean()/data_noise_s)

candidate_models = Dict(
    RJ     = utils.compose(gaussian_noise(0, σ), CandidateModel("Rayleigh-Jeans", T=data_T)),
    Planck = utils.compose(gaussian_noise(0, σ), CandidateModel("Planck", T=data_T))
)
Qrisk = Dict(
    RJ    =Q(candidate_model="Rayleigh-Jeans", σ=σ),
    Planck=Q(candidate_model="Planck",         σ=σ)
)

However, moment matching isn’t as reliable when we don’t know the data generation model. Therefore it does not work as well during calibration experiments.

Hide code cell source
rng = utils.get_rng("uv", "compare ML vs moment matching")
σ_ml = fitted.Planck.σ.m  # max likelihood
σ_mm = np.sqrt(B_data.mean()/data_noise_s).m  # moment matched
_, B = poisson_noise(s=data_noise_s, B0=0*Bunits, rng=rng)((None, B_data))
ξ = (B - B_data).m

centers = np.sort(np.unique(ξ)); Δ = np.diff(centers).mean()
centers = np.concatenate((centers[:1], centers[1:][np.diff(centers)>Δ]))  # Combine values which are actually very close
edges = np.concatenate((centers[:1] - np.diff(centers)[0],
                        (centers[:-1] + centers[1:])/2,
                        centers[-1:] + np.diff(centers)[-1:]))

bins, edges = np.histogram(ξ, bins=edges, density=True)
pois = hv.Histogram((bins, edges), kdims="ξ", label="data").opts(xlabel="deviation from theory")
ξarr = np.linspace(edges[0], edges[-1])
norm_maxL  = hv.Curve(zip(ξarr, stats.norm(0, σ_ml).pdf(ξarr)), label="max likelihood")
norm_match = hv.Curve(zip(ξarr, stats.norm(0, σ_mm).pdf(ξarr)), label="moment matched")
fig = pois*norm_maxL*norm_match
fig.opts(
    hv.opts.Histogram(line_color=None, fill_color=colors.data, backend="bokeh"),
    hv.opts.Curve(f"Curve.{sanitize('max likelihood')}", backend="bokeh",
                  color=config.figures.colors.vibrant.orange),
    hv.opts.Curve(f"Curve.{sanitize('moment matched')}", backend="bokeh",
                  color=config.figures.colors.vibrant.blue, line_dash="dashed"),
    hv.opts.Overlay(width=500, height=300, legend_position="top", backend="bokeh")
)
hv.output(fig, backend="bokeh")

Hint

utils.compose(g, f) works like \(g \circ f\).

Example spectra#

Hide code cell source
def plot_data(L, T=data_T, λmin=data_λ_min, λmax=data_λ_max, s=data_noise_s, B0=0*Bunits):
    """
    Alternative signatures:
    
    - plot_data(Dataset)
    - plot_data(L, utils.compose[Obsmodel, DataModel])
    - plot_data(L, DataModel, s=s, B0=B0)
    """
    if isinstance(L, Dataset):
        𝒟 = L
        del L, T, λmin, λmax, s, B0  # Ensure no code accidentally uses these variables
    elif isinstance(T, utils.compose):
        𝒟 = Dataset("data", L, T[1].λ_min, T[1].λ_max, T[0].s, T[1].T, T[0].B0)
        del T, λmin, λmax, s, B0     # Ensure no code accidentally uses these variables
    elif isinstance(T, DataModel):
        𝒟 = Dataset("data", L, T.λ_min, T.λ_max, s, T.T, B0)
        del T, λmin, λmax            # Ensure no code accidentally uses these variables
    else:
        𝒟 = Dataset("data", L, λmin, λmax, s, T, B0)
    λ_data, B_data = 𝒟.get_data()
    
    candidate_spectra = Dict(
        RayleighJeans = CandidateModel("Rayleigh-Jeans", T=𝒟.T)(λ_data),
        Planck        = CandidateModel("Planck", T=𝒟.T)(λ_data)
    )
    
    scat_data   = hv.Scatter({"λ":λ_data.m, "B":B_data.m}, kdims=[dims.λ], vdims=[dims.B],
                              label="observed data")
    λ, B = candidate_spectra.RayleighJeans
    curve_RJ     = hv.Curve({"λ":λ.m, "B":B.m}, kdims=[dims.λ], vdims=[dims.B],
                            group="candidate", label="Rayleigh-Jeans")
    λ, B = candidate_spectra.Planck
    curve_Planck = hv.Curve({"λ":λ.m, "B":B.m}, kdims=[dims.λ], vdims=[dims.B],
                            group="candidate", label="Planck")
    
    return scat_data * curve_RJ * curve_Planck
Hide code cell source
Tlist = [data_T]
λmin_list = [data_λ_min]
slist = [data_noise_s]
#B0list = [0*Bunits]
B0list = [data_B0]
# Uncomment the lines below to check the bounds of the calibration distribution
# Uncomment also the line at the bottom of this cell
# Tlist = [1000, 3000, 5000]*K
# λmin_list = [5, data_λ_min]*μm
# slist = np.array([2**8, 2**6, 2**3, 1])*data_noise_s
#B0list = 1e-4 * np.array([-0.025, 0, 0.025]) * Bunits

panelA = hv.HoloMap({(T.m, λmin.m, s.m, B0.m): plot_data(L_small, T=T, λmin=λmin, s=s, B0=B0)
                     for T in Tlist for λmin in λmin_list for s in slist for B0 in B0list},
                    kdims=["T", "λmin", "s", "B₀"])
panelB = hv.HoloMap({(T.m, λmin.m, s.m, B0.m): plot_data(L_large, T=T, λmin=λmin, s=s, B0=B0)
                     for T in Tlist for λmin in λmin_list for s in slist for B0 in B0list},
                    kdims=["T", "λmin", "s", "B₀"])

fig = panelA + panelB
xticks = [14, 16, 18, 20, 22, 24, 26, 28, 30]
fig.opts(
    hv.opts.Scatter(edgecolor="none", facecolors=colors.data,
                    hooks=[viz.set_xticks_hook(xticks), viz.despine_hook],
                    backend="matplotlib"),
    hv.opts.Scatter(color=colors.data, xticks=xticks, backend="bokeh"),
    hv.opts.Curve(f"Candidate.{sanitize('Rayleigh-Jeans')}", color=colors.RJ, backend="matplotlib"),
    hv.opts.Curve(f"Candidate.{sanitize('Rayleigh-Jeans')}", color=colors.RJ, backend="bokeh"),
    hv.opts.Curve("Candidate.Planck", color=colors.Planck, linestyle="dashed", backend="matplotlib"),
    hv.opts.Curve("Candidate.Planck", color=colors.Planck, line_dash="dashed", backend="bokeh"),
    hv.opts.Layout(hspace=0.05, vspace=0.05, fontscale=1.3,
                   fig_inches=config.figures.matplotlib.defaults.fig_inches,
                   sublabel_position=(0.3, .8), sublabel_format="({alpha})",
                   backend="matplotlib")
)#.opts(fig_inches=config.figures.matplotlib.defaults.fig_inches, backend="matplotlib")
panelB.opts(
    hv.opts.Overlay(show_legend=False),
)
panelA.opts(
    hv.opts.Curve(hooks=[viz.xaxis_off_hook]),  # Have to do this last to leave panelB untouched
)

#hv.output(fig.opts(legend_position="right", width=500, clone=True, backend="bokeh"), backend="bokeh")
fig.cols(1)

# Use this when checking calibration distribution
#panelA.opts(framewise=True)
Hide code cell source
# Rearrange figure for HTML output
#panelA.opts(hv.opts.Curve(hooks=[]))  # Make xaxis visible again
#panelB.opts(hv.opts.Curve(hooks=[viz.yaxis_off_hook]))  # Hide yaxis instead
panelA.opts(hv.opts.Scatter(
                hooks=[viz.set_yticks_hook([0, 0.0004, 0.0008]),
                       viz.set_yticklabels_hook(["0", "", "0.0008"]),
                       viz.xaxis_off_hook,
                       viz.ylabel_shift_hook(20),
                       viz.despine_hook])
           )
panelB.opts(hv.opts.Curve(
                hooks=[viz.set_yticks_hook([0, 0.0004, 0.0008]),
                       viz.set_yticklabels_hook(["0", "", "0.0008"]),
                       viz.ylabel_shift_hook(10)]))
figcol = panelA * hv.Text(30, 0.00055, f"$s = {viz.format_pow10(data_noise_s, format='latex')}$\n\n$L = {viz.format_pow2(L_small, format='latex')}$", halign="right") \
         + (panelB * hv.Text(30, 0.00055, f"$s = {viz.format_pow10(data_noise_s, format='latex')}$\n\n$L = {viz.format_pow2(L_large, format='latex')}$", halign="right")).opts(
             show_legend=False)
figcol.opts(
    hv.opts.Layout(hspace=0.05, vspace=0.05, fontscale=1.3,
                   fig_inches=7.5/2,
                   sublabel_position=(0.3, .8), sublabel_format="({alpha})",
                   backend="matplotlib"),
    
).cols(1)

Standard error of fits#

To get an estimate of the fitting error for \(T\), we repeat the fit multiple times and use a bootstrap estimate. Fits also estimate \(σ\), the standard deviation of the Gaussian noise process assumed by candidate models.

def f(log2σ_log2T, phys_model, data):
    return Q(phys_model, *(2**log2σ_log2T))(data).mean()
def fit(phys_model, data_model, L, rng):
    res = optimize.minimize(f, [-16., 12.], (phys_model, data_model(L, rng=rng)))
    assert res.success
    return 2**res.x
def do_fits(phys_model, data_model, L):
    rng = utils.get_rng("uv", "T fit", L)
    return np.array(
        [fit(phys_model, data_model, L, rng)
         for _ in tqdm(range(Nfits), f"{phys_model} - {L=}")])
data_model = observed_dataset.data_model
fit_results = {(phys_model, L): do_fits(phys_model, data_model, L)
               for phys_model in ["Rayleigh-Jeans", "Planck"]
               for L in [L_small, L_large]}
Hide code cell content
df = pd.DataFrame({(phys_model, L, varname): values
                  for (phys_model, L), results in fit_results.items()
                  for varname, values in zip(["σ", "T"], results.T)})
df
Rayleigh-Jeans Planck
512 32768 512 32768
σ T σ T σ T σ T
0 0.000040 3712.176350 0.000042 3627.201766 0.000039 4119.279150 0.000041 4033.662057
1 0.000040 3598.146288 0.000042 3627.075854 0.000040 4004.868663 0.000041 4033.533052
2 0.000043 3570.100175 0.000042 3621.462061 0.000043 3976.752135 0.000042 4027.901580
3 0.000043 3670.005400 0.000041 3625.281622 0.000042 4076.998385 0.000041 4031.726293
4 0.000040 3614.520627 0.000042 3618.944033 0.000039 4021.326721 0.000042 4025.371380
... ... ... ... ... ... ... ... ...
59 0.000040 3647.234458 0.000042 3618.207936 0.000040 4054.093362 0.000041 4024.637786
60 0.000042 3653.789740 0.000042 3620.214110 0.000042 4060.695994 0.000042 4026.651163
61 0.000041 3678.976048 0.000042 3621.369883 0.000041 4085.960738 0.000041 4027.812920
62 0.000040 3598.848617 0.000042 3625.025855 0.000040 4005.561838 0.000042 4031.468355
63 0.000039 3611.644723 0.000042 3631.805397 0.000039 4018.392016 0.000042 4038.282365

64 rows × 8 columns

Hide code cell source
fit_stats = pd.DataFrame({"mean": df.mean(axis="index"),
              "std": df.std(axis="index")}).T
fit_stats.style.format(partial(viz.format_scientific, sig_digits=4, min_power=5))
  Rayleigh-Jeans Planck
  512 32768 512 32768
  σ T σ T σ T σ T
mean 4.184×10⁻⁵ 3635 4.185×10⁻⁵ 3624 4.165×10⁻⁵ 4041 4.165×10⁻⁵ 4031
std 1.669×10⁻⁶ 46.24 2.079×10⁻⁷ 5.324 1.671×10⁻⁶ 46.40 2.056×10⁻⁷ 5.342

Candidate model PPFs#

In order to create a set of risk samples for each candidate model, we define for each a generative model which includes the expected noise (in this case additive Gaussian noise with parameter \(σ\) fitted above). For each model, we then generate a synthetic dataset, evaluate the risk \(q\) at every data point, and get a distribution for \(q\). We represent this distribution by its quantile function, a.k.a. percent point function (PPF).

Hint

Note how we use different random seeds for each candidate model to avoid accidental correlations.

def generate_synth_samples(model: CandidateModel, L_synth: int=L_synth, λmin: float=data_λ_min, λmax: float=data_λ_max):
    rng = utils.get_rng("uv", "synth_ppf", model[1].phys_model)
    λarr = np.linspace(λmin, λmax, L_synth)
    return model(λarr, rng=rng)
synth_ppf = Dict({
    "Rayleigh-Jeans": emd.make_empirical_risk_ppf(
        Qrisk["Rayleigh-Jeans"](generate_synth_samples(candidate_models["Rayleigh-Jeans"]))),
    "Planck": emd.make_empirical_risk_ppf(
        Qrisk["Planck"](generate_synth_samples(candidate_models["Planck"])))
})
Hide code cell source
Φarr = np.linspace(0, 1, 200)
curve_synth_ppfRJ = hv.Curve(zip(Φarr, synth_ppf["Rayleigh-Jeans"](Φarr)), kdims=dims.Φ, vdims=dims.q, group="synth", label="synth PPF (Rayleigh-Jeans)")
curve_synth_ppfP = hv.Curve(zip(Φarr, synth_ppf["Planck"](Φarr)), kdims=dims.Φ, vdims=dims.q, group="synth", label="synth PPF (Planck)")
fig_synth = curve_synth_ppfRJ * curve_synth_ppfP
# Plot styling
curve_synth_ppfRJ.opts(color=colors.RJ)
curve_synth_ppfP.opts(color=colors.Planck, linestyle="dashdot")
fig_synth.opts(hv.opts.Curve(linewidth=3, backend="matplotlib"),
               hv.opts.Overlay(show_title=False, legend_position="top_left"),
               hv.opts.Overlay(legend_position="right", fontscale=1.75),
               hv.opts.Overlay(fig_inches=4, backend="matplotlib"))

The EMD criterion works by comparing a synthetic PPF with a mixed PPF. The mixed PPF is obtained with the same risk function but evaluated on the actual data.

  • If the theoretical models are good, differences between synthetic and mixed PPFs can be quite small. They may only be visible by zooming the plot. This is fine – in fact, models which are very close to each other are easier to calibrate, since it is easier to generate datasets for which either model is an equally good fit.

  • Note that the mixed PPF curves are below the synthetic ones at low. Although there are counter-examples (e.g. if a model overestimates the variance of the noise), this is generally expected, especially if models are fitted by minimizing the expected risk.

mixed_ppf = Dict({
    "Rayleigh-Jeans": emd.make_empirical_risk_ppf(Qrisk["Rayleigh-Jeans"]((λ_data, B_data))),
    "Planck"        : emd.make_empirical_risk_ppf(Qrisk["Planck"]((λ_data, B_data)))
})
Hide code cell source
curve_mixed_ppfRJ = hv.Curve(zip(Φarr, mixed_ppf["Rayleigh-Jeans"](Φarr)), kdims=dims.Φ, vdims=dims.q, group="mixed", label="mixed PPF (Rayleigh-Jeans)")
curve_mixed_ppfP = hv.Curve(zip(Φarr, mixed_ppf["Planck"](Φarr)), kdims=dims.Φ, vdims=dims.q, group="mixed", label="mixed PPF (Planck)")
fig_emp = curve_mixed_ppfRJ * curve_mixed_ppfP
fig_emp.opts(hv.opts.Curve("mixed", linestyle="dashed", backend="matplotlib"),
             hv.opts.Curve("mixed", line_dash="dashed", backend="bokeh"))
curve_mixed_ppfRJ.opts(color=colors.RJ)
curve_mixed_ppfP.opts(color=colors.Planck)

panel = fig_synth * fig_emp
fig = panel# + panel.redim.range(risk=(0.5, 1.5))
fig.opts(hv.opts.Overlay(fontscale=1.75, fig_inches=4, legend_position="upper_left", backend="matplotlib"),
         hv.opts.Overlay(width=600, legend_position="top_left", backend="bokeh"),
         hv.opts.Layout(sublabel_format=""))
#hv.output(fig.redim.range(q=(-10, 200)), backend="bokeh")
hv.output(fig, backend="bokeh")

Hint

We recommend always inspecting quantile functions visually. For examples, if the risk function is continuous, then we expect the PPF to be smooth (since it involves integrating the risk) – if this isn’t the case, then we likely need more samples to get a reliable estimate

def get_ppfs(_dataset, _fitted: None|Dict[str, FitResult]=None) -> tuple[Dict[str, emd.interp1d], Dict[str, emd.interp1d]]:
    """
    Returns a total of four quantile functions (PPFs) organized into two dictionaries:
    
    - The first contains mixed PPFs.
    - The second contains synthetic PPFs.

    If the fitted σ and T are not given through `_fitted`, they are obtained with
    a call to `fit_gaussian_σT`.

    Dictionary keys are the labels used to identify the models.
    """
    _data = _dataset.get_data()
    if _fitted is None:
        _fitted = fit_gaussian_σT(_data)
    _candidate_models = Dict({
        "Rayleigh-Jeans": utils.compose(gaussian_noise(0, _fitted.RJ.σ),
                                      CandidateModel("Rayleigh-Jeans", T=_fitted.RJ.T)),
        "Planck"       : utils.compose(gaussian_noise(0, _fitted.Planck.σ),
                                      CandidateModel("Planck", T=_fitted.Planck.T))
    })
    _Qrisk = Dict({
        "Rayleigh-Jeans": Q(candidate_model="Rayleigh-Jeans", σ=_fitted.RJ.σ),
        "Planck": Q(candidate_model="Planck",         σ=_fitted.Planck.σ)
    })

    _synth_ppf = Dict({
        "Rayleigh-Jeans": emd.make_empirical_risk_ppf(
            _Qrisk["Rayleigh-Jeans"](
                generate_synth_samples(_candidate_models["Rayleigh-Jeans"],
                                       λmin=_dataset.λmin, λmax=_dataset.λmax))),
        "Planck": emd.make_empirical_risk_ppf(
            _Qrisk["Planck"](
                generate_synth_samples(_candidate_models["Planck"],
                                       λmin=_dataset.λmin, λmax=_dataset.λmax)))
    })

    _mixed_ppf = Dict({
        "Rayleigh-Jeans": emd.make_empirical_risk_ppf(
            _Qrisk["Rayleigh-Jeans"](_data)),
        "Planck": emd.make_empirical_risk_ppf(
            _Qrisk["Planck"](_data))
    })

    return _mixed_ppf, _synth_ppf

How many samples do we need ?#

Samples are used ultimately to estimate the expected risk. This is done in two ways:

  • By integrating the PPF.

  • By directly averaging the risk of each sample.

When computing the \(\Bemd{AB;c}\) criterion we use the first approach. During calibration we compare this with \(\Bconf{AB}\), which uses the second approach. When computing \(\Bconf{AB}\), we ideally use enough samples to reliably determine which of \(A\) or \(B\) has the highest expected risk.

In the figure below we show the expected risk as a function of the number of samples, computed either by constructing the PPF from samples and then integrating it (left) or averaging the samples directly (right).

The takeaway from this verification is that a value Linf=10000 (\(\approx 2^{13}\)) should be appropriate for the calibration.

Rvals_ppf = {"Rayleigh-Jeans": {}, "Planck": {}}
Rvals_avg = {"Rayleigh-Jeans": {}, "Planck": {}}
for model in Rvals_ppf:
    for _L in np.logspace(2, 6, 30):  # log10(40000) = 4.602
        _L = int(_L)
        if _L % 2 == 0: _L+=1  # Ensure L is odd
        # q_list = Qrisk[model](data_model(_L, rng=nsamples_rng))
        q_list = Qrisk[model](
            replace(observed_dataset, L=_L, purpose=f"n samples ppf - {model}")
            .get_data())
        ppf = emd.make_empirical_risk_ppf(q_list)
        Φ_arr = np.linspace(0, 1, _L+1)
        Rvals_ppf[model][_L] = scipy.integrate.simpson(ppf(Φ_arr), x=Φ_arr)
        Rvals_avg[model][_L] = q_list.mean()

Note

Although these curves look identical, they really computed in two different manners. This similarity confirms that estimating the expected risk by empirically constructing the quantile function (PPF) and then integrating it is just as accurate as averaging samples.

Hide code cell source
curves = {"ppf": [], "avg": []}
for model in Rvals_ppf:
    curves["ppf"].append(
        hv.Curve(list(Rvals_ppf[model].items()),
                 kdims=hv.Dimension("L", label="num. samples"),
                 vdims=hv.Dimension("R", label="exp. risk"),
                 label=f"{model} model - true samples"))
    curves["avg"].append(
        hv.Curve(list(Rvals_avg[model].items()),
                 kdims=hv.Dimension("L", label="num. samples"),
                 vdims=hv.Dimension("R", label="exp. risk"),
                 label=f"{model} model - true samples"))
fig_ppf = hv.Overlay(curves["ppf"]).opts(title="Integrating $q$ PPF")
fig_avg = hv.Overlay(curves["avg"]).opts(title="Averaging $q$ samples")
fig_ppf.opts(show_legend=False)
fig = fig_ppf + fig_avg
# Plot styling
fig.opts(
    hv.opts.Overlay(logx=True, fig_inches=5, aspect=3, fontscale=1.7, legend_position="right", backend="matplotlib")
)

Calibration#

Calibration distributions#

Calibration is a way to test that \(\Bemd{AB;c}\) actually does provide a bound on the probability that \(R_A < R_B\), where the probability is over a variety of experimental conditions.

There is no unique distribution over experimental conditions. Since this example is meant for illustration we define only one distribution, but in normal practice we expect one would define many, and look for a value of \(c\) which satisfies all of them. Here again, knowledge of the system being modelled is key: for any given \(c\), it is always possible to define an epistemic distribution for which calibration fails – for example, a model which is 99% random noise would be difficult to calibrate against. The question is whether the uncertainty on experimental conditions justifies including such a model in our distribution. Epistemic distributions are how we quantify our uncertainty in experimental conditions, or describe what we think are reasonable variations of those conditions.

The epistemic distribution we use varies the parameters \(s\), \(\Bspec_0\) and \(T\) of the Poisson noise (Eq. Eq. 1):

\[\begin{split}\begin{aligned} \log_2 \frac{s}{10^5 \, [\Bspec]^{-1}} &\sim \Unif(0, 8) \\ \frac{\Bspec_0}{[\Bspec]} &\sim \nN(0, (10^4)^2) \\ \frac{T}{[T]} &\sim \Unif(1000, 5000) \end{aligned}\end{split}\]

(Here \([x]\) indicates the units of \(x\); units are the same we use elsewhere in this analysis.) The actual data were generated with \(s = 10^5 [\Bspec]^{-1}\).

We like to avoid Gaussian distributions for calibration, because often Gaussian are “too nice”, and may hide or suppress rare behaviours. (For example, neural network weights are often not initialized with Gaussians: distributions with heavier tails tend to produce more exploitable initial features.) Uniform distributions are convenient because they can produce extreme values with high probability, without also producing unphysically extreme values. This is of course only a choice, and in many cases a Gaussian calibration distribution can also be justified: in this example we use a Gaussian distribution for \(\Bspec_0\) because it actually promotes a more even distribution of \(\Bemd{}\). (Datasets with large biases are unambiguous and so all end up in the same \((\Bemd{}, \Bconf{})\) bin.)

Important

Calibration is concerned with the transition from “certainty in model \(A\)” to “equivocal evidence” to “certainty in model \(B\)”. It is of no use to us if all calibration datasets are best fitted with the same model. The easiest way to avoid this is to use the candidate models to generate the calibration datasets, randomly selecting which candidate for each dataset. All other things being equal, the calibration curve will resolve fastest if we select each candidate with 50% probability.

We also need datasets to span the entire range of certainties, with both clear and ambiguous decisions in favour of \(A\) or \(B\). One way to do this is to start from a dataset we expect to be ambiguous, and identify a control parameter which can reduce that ambiguity. In this example, the calibration datasets are generated with different \(λ_{min}\): this is effective at generating a range of certainties, since for high \(λ_{min}\) the two models almost perfectly overlap, while for low \(λ_{min}\) the differences are unmistakable. Equivalently, we could start from a dataset with near-perfect discriminability, and identify a control parameters which makes the decision problem ambiguous – this is what we do in the Prinz example.

Note that since we use the candidate models to generate the datasets during calibration, we don’t need to know the true data model. We only need to identify a regime where model predictions differ.

We can conclude from these remarks that calibration works best when the models are close and allow for ambiguity. However this is not too strong a limitation: if we are unable to calibrate the \(\Bemd{}\) because there is no ambiguity between models, then we don’t need the \(\Bemd{}\) to falsify one of them.

Hide code cell source
@dataclass(frozen=True)
class FittedCandidateModels:
    """
    Candidate models need to be tuned to a dataset by fitting T and σ.
    This encapsulates that functionality.
    """
    dataset: Dataset

    @property
    @cache
    def fitted(self):  # Fitting is deffered until we actually need it.
        return fit_gaussian_σT(self.dataset.get_data())

    @property
    def Planck(self):
        rng = utils.get_rng(*self.dataset.purpose, "candidate Planck")
        return utils.compose(
            gaussian_noise(0, self.fitted.Planck.σ, rng=rng),
            CandidateModel("Planck", T=self.fitted.Planck.T)
        )
    @property
    def RJ(self):
        rng = utils.get_rng(*self.dataset.purpose, "candidate RJ")
        return utils.compose(
            gaussian_noise(0, self.fitted.RJ.σ, rng=rng),
            CandidateModel("Rayleigh-Jeans", T=self.fitted.RJ.T)
        )

    @property
    def QPlanck(self):
        return Q("Planck", σ=self.fitted.Planck.σ, T=self.dataset.T)
    @property
    def QRJ(self):
        return Q("Rayleigh-Jeans", σ=self.fitted.RJ.σ, T=self.dataset.T)

Hint

A calibration distribution type doesn’t need to subclass emd.tasks.EpistemicDist, but it should be a dataclass satisfying the following:

  • Iterating over it yields data models.

    • Iteration must be non-consuming, because we iterate over models multiple times.

  • __len__ is defined.

  • All parameters are serializable.[1]

  • Created with frozen=True.

To view these requirements in IPython or Jupyter, along with sample code, type emd.tasks.EpistemicDist??.

Hint

The Dataset instance takes care of initializing its RNG based on its purpose arguments; here we use ("uv", "calibration", "fit candidates", n) so each dataset uses a different seed.

We found it much more reliable to send an object which sets its own seed, compared to creating the RNGs already within EpistemicDist.__iter__.

@dataclass(frozen=True)  # frozen allows dataclass to be hashed
class EpistemicDist(emd.tasks.EpistemicDist):
    N: int|Literal[np.inf] = np.inf

    s_p_range   : tuple[int,int] = (13, 20)  # log₂ data_s ≈ 16.6
    B0_std      : PintQuantity   = 1e-4 * Bunits
    T_range     : PintQuantity   = (1000, 5000) * K
    λmin_range  : PintQuantity   = (10, 20) * μm
    λwidth_range: PintQuantity   = (5, 20) * μm
    
    __version__: int       = 5  # If the distribution is changed, update this number
                                # to make sure previous tasks are invalidated
    def get_s(self, rng):
        p = rng.uniform(*self.s_p_range)
        return 2**p * Bunits**-1    # NB: Larger values => less noise
    def get_B0(self, rng):          # NB: sunits would be better, but we did the original runs with Bunits, and changing to sunits would change the task hash
        return self.B0_std * rng.normal()
    def get_T(self, rng):
        return rng.uniform(*self.T_range.to(K).m) * K
    def get_λmin(self, rng):
        return rng.uniform(*self.λmin_range.to(μm).m) * μm
    def get_λwidth(self, rng):
        return rng.uniform(*self.λwidth_range.to(μm).m) * μm

    ## Experiment generator ##

    def __iter__(self):
        rng = utils.get_rng("uv", "calibration")
        n = 0
        while n < self.N:
            n += 1
            dataset = Dataset(
                ("uv", "calibration", "fit candidates", n),
                L    = L_med,          # L only used to fit model candidates. `CalibrateTask` will
                λmin = (λmin:= self.get_λmin(rng)),
                λmax = λmin + self.get_λwidth(rng),
                s    = self.get_s(rng),
                T    = self.get_T(rng),
                B0   = self.get_B0(rng),
                phys_model = rng.choice(["Rayleigh-Jeans", "Planck"])
            )
            # Fit the candidate models to the data
            candidates = FittedCandidateModels(dataset)
            # Yield the data model, candidate models along with their loss functions
            yield emd.tasks.Experiment(
                data_model=dataset,
                candidateA=candidates.Planck, candidateB=candidates.RJ,
                QA=candidates.QPlanck, QB=candidates.QRJ)

Execution#

How many experiments

The function emd.viz.calibration_plot attempts to collect results into 16 bins, so making \(N\) a multiple of 16 works nicely. (With the constraint that no bin can have less than 16 points.)

For an initial pilot run, we found \(N=64\) or \(N=128\) to be good numbers. These numbers produce respectively 4 or 8 bins, which is often enough to check that \(\Bemd{}\) and \(\Bconf{}\) are reasonably distributed and that the epistemic distribution is actually probing the transition from strong to equivocal evidence. A subsequent run with \(N \in \{256, 512, 1024\}\) can then refine and smooth the curve.

N = 1024
Ωdct = {"infrared": EpistemicDist(),
        "microwave": EpistemicDist(λmin_range   = (20, 500) * μm,
                                   λwidth_range = (200, 500) * μm)
       }
tasks = {}
for Ωkey, Ω in Ωdct.items():
    task = emd.tasks.Calibrate(
        reason = f"UV calibration – RJ vs Planck – {Ωkey} – Gaussian obs. model",
        #c_list = [.5, 1, 2],
        #c_list = [2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3, 2**-2, 2**-1, 2**0],
        #c_list = [2**-8, 2**-6, 2**-4, 2**-2, 1],
        #c_list = [2**-8, 2**-2],
        #c_list = [1, 2**2, 2**4],
        #c_list = [2**-2, 2**-6, 2**-4, 2**-8, 2**-10, 2**-12, 2**-14, 2**-16],
        #c_list = [2**-16, 2**-12, 2**-8, 2**-4, 2**-2, 2**0, 2**2],
        #c_list = [2**-8, 2**-4, 2**0],
        c_list = [2**-4, 2**-3, 2**-2, 2**-1, 2**0, 2**1, 2**2, 2**3, 2**4],
        #c_list = [2**0],
        # Collection of generative data model
        #data_models = Ω.generate(1000),
        experiments = Ω.generate(N),
        # # Theory models
        # riskA           = Qrisk["Rayleigh-Jeans"],
        # riskB           = Qrisk["Planck"],
        # Calibration parameters
        Ldata = 1024,
        Linf = 12288  # 2¹³ + 2¹²
        #Linf = 32767 # 2¹⁵ - 1
    )
    tasks[Ωkey]=task

The code below creates task files which can be executed from the command line with the following:

smttask run --import config <task file>
    for key, task in tasks.items():
        if not task.has_run:  # Don’t create task files for tasks which have already run
            Ω = task.experiments
            taskfilename = f"uv_calibration_{key}_N={Ω.N}_c={task.c_list}"
            task.save(taskfilename)

Analysis#

Workaround to be able run notebook while a new calibration is running
# Use the last finished task
from smttask.view import RecordStoreView
rsview = RecordStoreView()
params = rsview.get('20231029-161729_cf5215').parameters  # Latest run with UV as of 09.11
if "models_Qs" in params.inputs:
    params.inputs.experiments = params.inputs.models_Qs
    del params.inputs["models_Qs"]
task = emd.tasks.Calibrate.from_desc(params)

Select which of the epistemic distributions we want the calibration plots for.

Ωkey = "infrared"
#key = "microwave"
task = tasks[Ωkey]
assert task.has_run, "Run the calibration from the command line environment, using `smttask run`. Executing it as part of a Jupyter Book build would take a **long** time."
Ωdesc = f"N={task.experiments.N}_{Ωkey}"
calib_results = task.unpack_results(task.run())

We can check the efficiency of sampling by plotting histograms of \(\Bemd{}\) and \(\Bconf{}\): ideally the distribution of \(\Bemd{}\) is flat, and that of \(\Bconf{}\) is equally distributed between 0 and 1. Since we need enough samples at every subinterval of \(\Bemd{}\), it is the most sparsely sampled regions which determine how many calibration datasets we need to generate. (And therefore how long the computation needs to run.) Beyond making for shorter compute times, a flat distribution however isn’t in and of itself a good thing: more important is that the criterion is able to resolve the models when it should.

Hide code cell source
hists_emd = {}
hists_conf = {}
for c, res in calib_results.items():
    hists_emd[c] = hv.Histogram(np.histogram(res["Bemd"], bins="auto", density=False), kdims=["Bemd"], label="Bemd")
    hists_conf[c] = hv.Histogram(np.histogram(res["Bconf"].astype(int), bins="auto", density=False), kdims=["Bconf"], label="Bconf")
frames = {viz.format_pow2(c): hists_emd[c] * hists_conf[c] for c in hists_emd}
    
hmap = hv.HoloMap(frames, kdims=["c"])
hmap.opts(
    hv.opts.Histogram(backend="bokeh",
                      line_color=None, alpha=0.75,
                      color=hv.Cycle(values=config.figures.colors.light.cycle)),
    hv.opts.Overlay(backend="bokeh", legend_position="top", width=400)
)
hv.output(hmap, backend="bokeh", widget_location="right")

Hint

Default properties calibration_plot can be changed by updating the configuration object config.emd.viz.matplotlib (relevant fields are calibration_curves, prohibited_areas, discouraged_area). As with any HoloViews plot element, they can also be changed on a per-object basis by calling their .opts method.

c_chosen = 2**-1
c_list = [2**-4, 2**-2, 2**-1, 2**0, 2**1, 2**3]
Hide code cell source
_hists_emd = {c: h.relabel(group="Bemd", label=f"$c={viz.format_pow2(c, format='latex')}$")
              for c, h in hists_emd.items() if c in c_list}
for c in c_list:
    α = 1 if c == c_chosen else 0.8
    _hists_emd[c].opts(alpha=α)
histpanel_emd = hv.Overlay(_hists_emd.values())

calib_curves, prohibited_areas, discouraged_areas = emd.viz.calibration_plot(calib_results)

for c, curve in calib_curves.items():
    calib_curves[c] = curve.relabel(label=f"$c={viz.format_pow2(c, format='latex')}$")

for c in c_list:  # We highlight the curve for the chosen c by making the line wider
    α = 1 #if c == c_chosen else 0.85
    w = 3 if c == c_chosen else 2
    calib_curves[c].opts(alpha=α, linewidth=w)

main_panel = prohibited_areas * discouraged_areas * hv.Overlay(calib_curves.select(c=c_list).values())

histpanel_emd.redim(Bemd=dims.Bemd, Bconf=dims.Bconf, c=dims.c)
main_panel.redim(Bemd=dims.Bemd, Bconf=dims.Bconf, c=dims.c)

main_panel.opts(
    legend_position="top_left", legend_cols=1,
    hooks=[viz.despine_hook],
)
histpanel_emd.opts(
    legend_cols=3,
    legend_opts={"columnspacing": .5, "alignment": "center",
                 "loc": "upper center"},
    hooks=[viz.yaxis_off_hook, partial(viz.despine_hook, left=True)]
)

fig = main_panel << hv.Empty() << histpanel_emd.opts(show_legend=True)

# Plot styling
# NB: If we also set the Bokeh options, somehow that corrupts the matplotlib options
fig.opts(
    hv.opts.Curve(color=colors.calib_curves),
    #hv.opts.Curve(color=colors.calib_curves, line_width=3, backend="bokeh"),
    hv.opts.Area(alpha=0.5),
    #hv.opts.Area(alpha=0.5, backend="bokeh"),
    hv.opts.Histogram(color="none", edgecolor="none", facecolor=colors.calib_curves),
    #hv.opts.Histogram(line_color=None, fill_color=colors.calib_curves, backend="bokeh"),
    hv.opts.Overlay(fig_inches=config.figures.defaults.fig_inches,
                    #fig_inches=4,
                    aspect=1,
                    xlabel="$B^{\mathrm{EMD}}$", ylabel="$B^{\mathrm{conf}}$",
                    #show_legend=False,
                    #legend_position="right", legend_cols=1,
                    fontscale=1.3,
                    backend="matplotlib"),
)
fig.opts(backend="matplotlib", fig_inches=5)
WARNING:param.OverlayPlot03037: Plotting hook functools.partial(<function despine_hook at 0x7f7c7ea059e0>, left=True) could not be applied:

 despine_hook() got an unexpected keyword argument 'left'
WARNING:param.OverlayPlot03037:Plotting hook functools.partial(<function despine_hook at 0x7f7c7ea059e0>, left=True) could not be applied:

 despine_hook() got an unexpected keyword argument 'left'

Finalized with Inkscape:

  • Align histogram axis with curves axis (use vertical lines with 0.8 pt width)

  • ~~Improve placement of legends~~

  • Put the curve corresponding to uv_c_chosen on top. Highlight curve with white surround (2x curve width).

  • Remove whitespace

Additional comments on choosing \(c\)

  • Both the calibration curve and the \(\Bemd{}\) histogram should check out. For example, small values of \(c\) may seem to have good statistics on average, but be heavily biased towards the ends at 0 and 1.
    (In fact \(\Bemd{}\) becomes asymptotically equivalent to \(\Bconf{}\) when \(c\) approaches 0. We don’t want this: the point of \(\Bemd{}\) is that it estimates the probability of \(\Bconf{}\), without having to average over an ensemble of datasets.)

  • Larger values of \(c\), in addition to being more conservative, may also take longer to sample: Since they sample a wider variety of quantile curves, it takes more samples to achieve the target accuracy on \(R\).

EMD model comparison#

Based on the figure above, we choose the value ‘$2^{-1}$’ to compute the \(\Bemd{}\) criterion between models.

R_samples = Dict({
    "Rayleigh-Jeans": emd.draw_R_samples(mixed_ppf["Rayleigh-Jeans"], synth_ppf["Rayleigh-Jeans"], c=c_chosen),
    "Planck": emd.draw_R_samples(mixed_ppf["Planck"], synth_ppf["Planck"], c=c_chosen),
})
Hide code cell source
Rdists = [hv.Distribution(_Rlst, kdims=[dims.R], label=f"{a}")
          for a, _Rlst in R_samples.items()]
Rcurves = [hv.operation.stats.univariate_kde(dist).to.curve()
           for dist in Rdists]
fig = hv.Overlay(Rdists) * hv.Overlay(Rcurves)
# Plot styling
color_cycle = hv.Cycle(values=[colors.RJ, colors.Planck])
#fig = fig.redim.range(R=(-9.08, -8.89))
fig.opts(
    hv.opts.Distribution(alpha=.3),
    hv.opts.Distribution(facecolor=color_cycle, color="none", edgecolor="none", backend="matplotlib"),
    hv.opts.Curve(color=color_cycle),
    hv.opts.Curve(linestyle="solid", backend="matplotlib"),
    hv.opts.Overlay(legend_position="top_left", legend_cols=1,
                    fontscale=1.3, hooks=[viz.despine_hook], backend="matplotlib")
)
fig.opts(clone=True, backend="matplotlib", fig_inches=5)

EMD estimates for the probabilities \(P(R_a < R_b)\) are best reported as a table:

df = emd.utils.compare_matrix(R_samples)
df.index = pd.MultiIndex.from_tuples([("a", a) for a in df.index])
df.columns = pd.MultiIndex.from_tuples([("b", b) for b in df.columns])
df.style.format(precision=3)
    b
    Rayleigh-Jeans Planck
a Rayleigh-Jeans 0.500 0.304
Planck 0.696 0.500

Dependence of \(R\) distributions on bandwidth and noise level#

Approximate noise level ranges (low \(s\) ⤇ high noise):

  • High \(λ\): \(\frac{s}{[s]} \in [2, 40]\).

  • Low \(λ\): \(\frac{s}{[s]} \in [500, 15000]\).

Note

  • The Poisson noise (Eq. 1) has variance \(\frac{\Bspec}{s}\), and therefore its standard deviation scales only as \(\Bspec^{1/2}\). Since different \(λ\) ranges lead to different orders of magnitude for \(\Bspec\), if we kept the same \(s\) for all rows, we would not see similar amounts of spread. This is why we decrease \(s\) in the last row, where wavelength (\(λ\)) is lower.

def panel_param_iterator(L=L_small, purpose="data", progbar=True):
    #λmin_list = [6. , 1. , .2] * μm
    λmin_list = [6. , 6. , .2] * μm
    λmax_list = [20., 20., 3.] * μm
    B0_list = [2**-10+2**-11, 0, 0] * Bunits
    #s_list = [2**-11, 2**-8, 2**-6] * (10e5 * Bunits**-1)
    
    for λmin, λmax, B0 in tqdm(zip(λmin_list, λmax_list, B0_list), desc="λ", total=len(λmin_list), disable=not progbar):
        if λmin < 2 * μm:
            # 3rd row
            s_list = [2**-11, 2**-9, 2**-7] * (2**12 * Bunits**-1)
            #B0 = 2**-5 * Bunits
            #B0 = (2**-10 + 2**-9) * Bunits
        else:
            # 1st & 2nd row
            s_list = [2**-3 , 2**0 , 2**2]  * (2**12 * Bunits**-1)
            #B0 = (2**-10 + 2**-9) * Bunits
        for s in tqdm(s_list, desc="s", leave=False, disable=not progbar):
            yield Dataset(purpose, L, λmin, λmax, s, T=data_T, B0=B0)
data_panels = [plot_data(L=D.L, T=D.T, λmin=D.λmin, λmax=D.λmax, s=D.s, B0=D.B0)
               for D in panel_param_iterator()]
Hide code cell source
legend_specs = hv.plotting.mpl.LegendPlot.legend_specs
legend_specs["top_orig"] = dict(legend_specs["top"])  # `dict` to force a copy
legend_specs["top"].update({"mode": None, 'bbox_to_anchor': (-0.3, 1.02, 1.0, 0.102)})

Note

For the last row we use the \(σ\) and \(T\) values obtained by fitting the original dataset. This is because the Rayleigh-Jeans model is so bad in the UV region, that the fit doesn’t converge. For pedagogical reasons we want to illustrate this extreme case, but in practice the fact that the fit doesn’t converge should be reason enough to reject the Rayleigh-Jeans model.

Hide code cell source
          #+ [panel.redim.range(B=(0, 5)) for panel in data_panels[3:6]] \
_panels = [panel.redim.range(B=(0, 0.03)) for panel in data_panels[:3]] \
          + [panel.redim.range(B=(0, 0.03)) for panel in data_panels[3:6]] \
          + [panel.redim.range(B=(0, 7)) for panel in data_panels[6:9]]
for (i, panel), D in zip(enumerate(_panels), panel_param_iterator(progbar=False)):
    if i < 6:
        panel *= hv.Text(10, 0.02, f"$s={viz.format_pow2(D.s.m, format='latex')}$",
                         halign="left")
    else:
        panel *= hv.Text(0.25, 6, f"$s={viz.format_pow2(D.s.m, format='latex')}$",
                         halign="center")
    _panels[i] = panel
data_layout = hv.Layout(_panels)
for i, panel in enumerate(_panels):
    hooks = []
    if i != 0:
        panel.opts(show_legend=False)
    if i == 0:
        panel.opts(legend_position="top", legend_cols=3)
    if i != 6:
        panel.opts(ylabel="")
    if i == 6:
        hooks.append(viz.set_ylabel_hook(f"{dims.B.label} ({dims.B.unit})", loc="bottom"))
    if i != 7:
        panel.opts(xlabel="")
    if i % 3:
        hooks.extend([viz.no_spine_hook("left"), viz.no_yticks_hook])
    if i < 6:
        def major_formatter(x, pos): return f"{x:.3g}"
        def minor_formatter(x, pos): return r"$6$" if x == 6 else r"$20$" if x == 20 else ""
        hooks.extend([viz.hide_minor_ticks_hook,
                      viz.set_minor_xticks_formatter(minor_formatter),
                      viz.set_major_xticks_formatter(major_formatter)])
    else:
        def major_formatter(x, pos): return f"{x:.3g}"
        def minor_formatter(x, pos): return r"$0.2$" if x == 0.2 else r"$3$" if x == 3 else ""
        hooks.extend([viz.set_major_xticks_formatter(major_formatter),
                      viz.set_minor_xticks_formatter(minor_formatter)])
    panel.opts(logx=True, aspect=3, hooks=hooks, fontscale=1.3)
    #_panels[i] = panel
data_layout = hv.Layout(_panels)

data_layout.opts(
    hv.opts.Layout(backend="matplotlib", sublabel_format="({alpha})", sublabel_position=(0.3, 0.7),
                   hspace=0.04, vspace=0.5, fig_inches=7/6),  # full width fig is about 7in wide
    hv.opts.Scatter(backend="matplotlib", axiswise=True,
                    color="#888888", s=3),
    hv.opts.Overlay(axiswise=True),
    hv.opts.Text(axiswise=True),
    hv.opts.Curve(backend="matplotlib", axiswise=True, linewidth=3, alpha=.6),
    hv.opts.Curve(f"Candidate.{sanitize('Rayleigh-Jeans')}", color=colors.RJ),
    hv.opts.Curve(f"Candidate.Planck", color=colors.lighten(.1).Planck),
)
data_layout.cols(3)

Hint

Together, get_ppfs and plot_Rdists condense almost the entire EMD pipeline to two functions.

def plot_Rdists(_dataset, c):
    _R_samples = _draw_R_samples(_dataset, c)

    _Rdists = [hv.Distribution(_Rlst, kdims=[dims.R], label=f"{a}")
               for a, _Rlst in _R_samples.items()]
    _Rcurves = [hv.operation.stats.univariate_kde(dist).to.curve()
                for dist in _Rdists]
    Bemd = (_R_samples["Planck"] < _R_samples["Rayleigh-Jeans"][:,None]).mean()
    _fig = hv.Overlay(_Rdists) * hv.Overlay(_Rcurves)
    _fig *= hv.Text(0, 0, "$B^{\mathrm{EMD}}="+f"{Bemd:.2f}$", halign="left", label="Bemd")
        # The x,y position of the Bemd will be set later

    return _fig

@memory.cache
def _draw_R_samples(_dataset, c):  # This function mostly exists so it can be cached
    if _dataset.λmax <= 5*μm:
        _mixed_ppf, _synth_ppf = get_ppfs(_dataset, fitted)
    else:
        _mixed_ppf, _synth_ppf = get_ppfs(_dataset)
    return Dict({
        "Rayleigh-Jeans": emd.draw_R_samples(
            _mixed_ppf["Rayleigh-Jeans"], _synth_ppf["Rayleigh-Jeans"], c=c),
        "Planck": emd.draw_R_samples(
            _mixed_ppf["Planck"], _synth_ppf["Planck"], c=c),
    })
Rdist_panels = [plot_Rdists(D, c=c_chosen) for D in panel_param_iterator()]
Hide code cell source
_panels = [panel.redim.range(R=(-7, 0), Density=(0, 7), R_density=(0,7)) for panel in Rdist_panels[:3]] \
          + [panel.redim.range(R=(-10, 20), Density=(0, 1.5), R_density=(0,1.5)) for panel in Rdist_panels[3:6]] \
          + [panel.redim.range(R=(-1e17, 6e17), Density=(0, 0.7e-17), R_density=(0,0.7e-17)) for panel in Rdist_panels[6:9]]
hooks = {i: [] for i in range(len(_panels))}
for i, panel in enumerate(_panels):
    text = panel.Text.Bemd
    if i < 3:
        panel.opts(xticks=[-6, -2], yticks=(0, 4))
        text.x = -3.5  # NB: This doesn’t change the position of the Text
        text.y = 4     #     but will set the position of _cloned_ Text
    elif 3 <= i < 6:
        panel.opts(xticks=[-5, 15], yticks=(0, 1))
        text.x = 5
        text.y = 1
    elif 6 <= i < 9:
        panel *= hv.VLine(panel.Curve.Planck.data["R"].mean()).opts(color=colors.Planck, linewidth=2)
        hooks[i].extend([viz.set_yticks_hook([0, 6e-18], labels=["0", "$6\\times 10^{-18}$"], rotation=90),
                         #viz.set_yticklabels_hook(["0", "$4\\times 10^{-18}$"]),
                         viz.set_xticks_hook([0, 3e17 ], labels=["0", "$3\\times 10^{17}$"])])
                         #viz.set_xticklabels_hook(["0", "$3\\times 10^{17}$"])])
        _panels[i] = panel
        text.x = 3e17
        text.y = 0.4e-17
    text.data = (text.x, text.y, *text.data[2:])
    if i != 7:
        panel.opts(xlabel="")
    if i != 3:
        panel.opts(hv.opts.Curve(ylabel=""))
    if i % 3:
        hooks[i].extend([viz.yaxis_off_hook, viz.no_yticks_hook])
for i, hooklist in hooks.items():
    _panels[i].opts(hv.opts.Curve(hooks=hooklist))

Rdist_layout = hv.Layout(_panels)

Rdist_layout.opts(
    hv.opts.Layout(backend="matplotlib", sublabel_format="({alpha})", sublabel_position=(0.4, 0.6),
                   hspace=0.04, vspace=0.1, fig_inches=7/6),  # full width fig is about 7in wide
    hv.opts.Overlay(show_legend=False),
    hv.opts.Distribution(backend="matplotlib", axiswise=True),
    hv.opts.Curve(backend="matplotlib", axiswise=True, linewidth=1),
    # Colors
    hv.opts.Curve(f"Curve.{sanitize('Rayleigh-Jeans')}", color=colors.RJ),
    hv.opts.Distribution(f"Distribution.{sanitize('Rayleigh-Jeans')}", facecolor=colors.RJ, backend="matplotlib"),
    hv.opts.Curve(f"Curve.Planck", color=colors.Planck),
    hv.opts.Distribution(f"Distribution.Planck", facecolor=colors.Planck, backend="matplotlib"),
    hv.opts.Distribution(edgecolor="none", color="none", backend="matplotlib")
)
for i, panel in enumerate(Rdist_layout):
    if 3 <= i < 6:
        panel.redim.range(Density=(0, 2), R_density=(0,2))
Rdist_layout.cols(3)

(For insertion into the manuscript, these two figures should be combined into one wide figure with Inkscape and saved as uv_dataset-and-Rdist-grids (both as Optimized SVG and PDF).

Comparison with other criteria#

See Comparison with other criteria.

Ω = task.experiments

2025-04-11   git: emd-paper main #39053df5