Code: Aleatoric vs Sampling vs Replication uncertainty#

\(\require{mathtools} \newcommand{\notag}{} \newcommand{\tag}{} \newcommand{\label}[1]{} \newcommand{\sfrac}[2]{#1/#2} \newcommand{\bm}[1]{\boldsymbol{#1}} \newcommand{\num}[1]{#1} \newcommand{\qty}[2]{#1\,#2} \renewenvironment{align} {\begin{aligned}} {\end{aligned}} \renewenvironment{alignat} {\begin{alignedat}} {\end{alignedat}} \newcommand{\pdfmspace}[1]{} % Ignore PDF-only spacing commands \newcommand{\htmlmspace}[1]{\mspace{#1}} % Ignore PDF-only spacing commands \newcommand{\scaleto}[2]{#1} % Allow to use scaleto from scalerel package \newcommand{\RR}{\mathbb R} \newcommand{\NN}{\mathbb N} \newcommand{\PP}{\mathbb P} \newcommand{\EE}{\mathbb E} \newcommand{\XX}{\mathbb X} \newcommand{\ZZ}{\mathbb Z} \newcommand{\QQ}{\mathbb Q} \newcommand{\fF}{\mathcal F} \newcommand{\dD}{\mathcal D} \newcommand{\lL}{\mathcal L} \newcommand{\gG}{\mathcal G} \newcommand{\hH}{\mathcal H} \newcommand{\nN}{\mathcal N} \newcommand{\pP}{\mathcal P} \newcommand{\BB}{\mathbb B} \newcommand{\Exp}{\operatorname{Exp}} \newcommand{\Binomial}{\operatorname{Binomial}} \newcommand{\Poisson}{\operatorname{Poisson}} \newcommand{\linop}{\mathcal{L}(\mathbb{B})} \newcommand{\linopell}{\mathcal{L}(\ell_1)} \DeclareMathOperator{\trace}{trace} \DeclareMathOperator{\Var}{Var} \DeclareMathOperator{\Span}{span} \DeclareMathOperator{\proj}{proj} \DeclareMathOperator{\col}{col} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\gt}{>} \definecolor{highlight-blue}{RGB}{0,123,255} % definition, theorem, proposition \definecolor{highlight-yellow}{RGB}{255,193,7} % lemma, conjecture, example \definecolor{highlight-orange}{RGB}{253,126,20} % criterion, corollary, property \definecolor{highlight-red}{RGB}{220,53,69} % criterion \newcommand{\logL}{\ell} \newcommand{\eE}{\mathcal{E}} \newcommand{\oO}{\mathcal{O}} \newcommand{\defeq}{\stackrel{\mathrm{def}}{=}} \newcommand{\Bspec}{\mathcal{B}} % Spectral radiance \newcommand{\X}{\mathcal{X}} % X space \newcommand{\Y}{\mathcal{Y}} % Y space \newcommand{\M}{\mathcal{M}} % Model \newcommand{\Tspace}{\mathcal{T}} \newcommand{\Vspace}{\mathcal{V}} \newcommand{\Mtrue}{\mathcal{M}_{\mathrm{true}}} \newcommand{\MP}{\M_{\mathrm{P}}} \newcommand{\MRJ}{\M_{\mathrm{RJ}}} \newcommand{\qproc}{\mathfrak{Q}} \newcommand{\D}{\mathcal{D}} % Data (true or generic) \newcommand{\Dt}{\tilde{\mathcal{D}}} \newcommand{\Phit}{\widetilde{\Phi}} \newcommand{\Phis}{\Phi^*} \newcommand{\qt}{\tilde{q}} \newcommand{\qs}{q^*} \newcommand{\qh}{\hat{q}} \newcommand{\AB}[1]{\mathtt{AB}~\mathtt{#1}} \newcommand{\LP}[1]{\mathtt{LP}~\mathtt{#1}} \newcommand{\NML}{\mathrm{NML}} \newcommand{\iI}{\mathcal{I}} \newcommand{\true}{\mathrm{true}} \newcommand{\dist}{D} \newcommand{\Mtheo}[1]{\mathcal{M}_{#1}} % Model (theoretical model); index: param set \newcommand{\DL}[1][L]{\mathcal{D}^{(#1)}} % Data (RV or generic) \newcommand{\DLp}[1][L]{\mathcal{D}^{(#1')}} % Data (RV or generic) \newcommand{\DtL}[1][L]{\tilde{\mathcal{D}}^{(#1)}} % Data (RV or generic) \newcommand{\DpL}[1][L]{{\mathcal{D}'}^{(#1)}} % Data (RV or generic) \newcommand{\Dobs}[1][]{\mathcal{D}_{\mathrm{obs}}^{#1}} % Data (observed) \newcommand{\calibset}{\mathcal{C}} \newcommand{\N}{\mathcal{N}} % Normal distribution \newcommand{\Z}{\mathcal{Z}} % Partition function \newcommand{\VV}{\mathbb{V}} % Variance \newcommand{\T}{\mathsf{T}} % Transpose \newcommand{\EMD}{\mathrm{EMD}} \newcommand{\dEMD}{d_{\mathrm{EMD}}} \newcommand{\dEMDtilde}{\tilde{d}_{\mathrm{EMD}}} \newcommand{\dEMDsafe}{d_{\mathrm{EMD}}^{\text{(safe)}}} \newcommand{\e}{ε} % Model confusion threshold \newcommand{\falsifythreshold}{ε} \newcommand{\bayes}[1][]{B_{#1}} \newcommand{\bayesthresh}[1][]{B_{0}} \newcommand{\bayesm}[1][]{B^{\mathcal{M}}_{#1}} \newcommand{\bayesl}[1][]{B^l_{#1}} \newcommand{\bayesphys}[1][]{B^{{p}}_{#1}} \newcommand{\Bconf}[1]{B^{\mathrm{epis}}_{#1}} \newcommand{\Bemd}[1]{B^{\mathrm{EMD}}_{#1}} \newcommand{\Bconfbin}[1][]{\bar{B}^{\mathrm{conf}}_{#1}} \newcommand{\Bemdbin}[1][]{\bar{B}_{#1}^{\mathrm{EMD}}} \newcommand{\bin}{\mathcal{B}} \newcommand{\Bconft}[1][]{\tilde{B}^{\mathrm{conf}}_{#1}} \newcommand{\fc}{f_c} \newcommand{\fcbin}{\bar{f}_c} \newcommand{\paramphys}[1][]{Θ^{{p}}_{#1}} \newcommand{\paramobs}[1][]{Θ^{ε}_{#1}} \newcommand{\test}{\mathrm{test}} \newcommand{\train}{\mathrm{train}} \newcommand{\synth}{\mathrm{synth}} \newcommand{\rep}{\mathrm{rep}} \newcommand{\MNtrue}{\mathcal{M}^{{p}}_{\text{true}}} \newcommand{\MN}[1][]{\mathcal{M}^{{p}}_{#1}} \newcommand{\MNA}{\mathcal{M}^{{p}}_{Θ_A}} \newcommand{\MNB}{\mathcal{M}^{{p}}_{Θ_B}} \newcommand{\Me}[1][]{\mathcal{M}^ε_{#1}} \newcommand{\Metrue}{\mathcal{M}^ε_{\text{true}}} \newcommand{\Meobs}{\mathcal{M}^ε_{\text{obs}}} \newcommand{\Meh}[1][]{\hat{\mathcal{M}}^ε_{#1}} \newcommand{\MNa}{\mathcal{M}^{\mathcal{N}}_a} \newcommand{\MeA}{\mathcal{M}^ε_A} \newcommand{\MeB}{\mathcal{M}^ε_B} \newcommand{\Ms}{\mathcal{M}^*} \newcommand{\MsA}{\mathcal{M}^*_A} \newcommand{\MsB}{\mathcal{M}^*_B} \newcommand{\Msa}{\mathcal{M}^*_a} \newcommand{\MsAz}{\mathcal{M}^*_{A,z}} \newcommand{\MsBz}{\mathcal{M}^*_{B,z}} \newcommand{\Msaz}{\mathcal{M}^*_{a,z}} \newcommand{\MeAz}{\mathcal{M}^ε_{A,z}} \newcommand{\MeBz}{\mathcal{M}^ε_{B,z}} \newcommand{\Meaz}{\mathcal{M}^ε_{a,z}} \newcommand{\zo}{z^{0}} \renewcommand{\lL}[2][]{\mathcal{L}_{#1|{#2}}} % likelihood \newcommand{\Lavg}[2][]{\mathcal{L}^{/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLphys}[2][]{\mathcal{L}^{{p}}_{#1|#2}} \newcommand{\Lavgphys}[2][]{\mathcal{L}^{{p}/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLL}[3][]{\mathcal{L}^{(#3)}_{#1|#2}} \newcommand{\lLphysL}[3][]{\mathcal{L}^{{p},(#3)}_{#1|#2}} \newcommand{\lnL}[2][]{l_{#1|#2}} % Per-sample log likelihood \newcommand{\lnLt}[2][]{\widetilde{l}_{#1|#2}} \newcommand{\lnLtt}{\widetilde{l}} % Used only in path_sampling \newcommand{\lnLh}[1][]{\hat{l}_{#1}} \newcommand{\lnLphys}[2][]{l^{{p}}_{#1|#2}} \newcommand{\lnLphysL}[3][]{l^{{p},(#3)}_{#1|#2}} \newcommand{\Elmu}[2][1]{μ_{{#2}}^{(#1)}} \newcommand{\Elmuh}[2][1]{\hat{μ}_{{#2}}^{(#1)}} \newcommand{\Elsig}[2][1]{Σ_{{#2}}^{(#1)}} \newcommand{\Elsigh}[2][1]{\hat{Σ}_{{#2}}^{(#1)}} \newcommand{\pathP}{\mathop{{p}}} % Path-sampling process (generic) \newcommand{\pathPhb}{\mathop{{p}}_{\mathrm{Beta}}} % Path-sampling process (hierarchical beta) \newcommand{\interval}{\mathcal{I}} \newcommand{\Phiset}[1]{\{\Phi\}^{\small (#1)}} \newcommand{\Phipart}[1]{\{\mathcal{I}_Φ\}^{\small (#1)}} \newcommand{\qhset}[1]{\{\qh\}^{\small (#1)}} \newcommand{\Dqpart}[1]{\{Δ\qh_{2^{#1}}\}} \newcommand{\LsAzl}{\mathcal{L}_{\smash{{}^{\,*}_A},z,L}} \newcommand{\LsBzl}{\mathcal{L}_{\smash{{}^{\,*}_B},z,L}} \newcommand{\lsA}{l_{\smash{{}^{\,*}_A}}} \newcommand{\lsB}{l_{\smash{{}^{\,*}_B}}} \newcommand{\lsAz}{l_{\smash{{}^{\,*}_A},z}} \newcommand{\lsAzj}{l_{\smash{{}^{\,*}_A},z_j}} \newcommand{\lsAzo}{l_{\smash{{}^{\,*}_A},z^0}} \newcommand{\leAz}{l_{\smash{{}^{\,ε}_A},z}} \newcommand{\lsAez}{l_{\smash{{}^{*ε}_A},z}} \newcommand{\lsBz}{l_{\smash{{}^{\,*}_B},z}} \newcommand{\lsBzj}{l_{\smash{{}^{\,*}_B},z_j}} \newcommand{\lsBzo}{l_{\smash{{}^{\,*}_B},z^0}} \newcommand{\leBz}{l_{\smash{{}^{\,ε}_B},z}} \newcommand{\lsBez}{l_{\smash{{}^{*ε}_B},z}} \newcommand{\LaszL}{\mathcal{L}_{\smash{{}^{*}_a},z,L}} \newcommand{\lasz}{l_{\smash{{}^{*}_a},z}} \newcommand{\laszj}{l_{\smash{{}^{*}_a},z_j}} \newcommand{\laszo}{l_{\smash{{}^{*}_a},z^0}} \newcommand{\laez}{l_{\smash{{}^{ε}_a},z}} \newcommand{\lasez}{l_{\smash{{}^{*ε}_a},z}} \newcommand{\lhatasz}{\hat{l}_{\smash{{}^{*}_a},z}} \newcommand{\pasz}{p_{\smash{{}^{*}_a},z}} \newcommand{\paez}{p_{\smash{{}^{ε}_a},z}} \newcommand{\pasez}{p_{\smash{{}^{*ε}_a},z}} \newcommand{\phatsaz}{\hat{p}_{\smash{{}^{*}_a},z}} \newcommand{\phateaz}{\hat{p}_{\smash{{}^{ε}_a},z}} \newcommand{\phatseaz}{\hat{p}_{\smash{{}^{*ε}_a},z}} \newcommand{\Phil}[2][]{Φ_{#1|#2}} % Φ_{\la} \newcommand{\Philt}[2][]{\widetilde{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philhat}[2][]{\hat{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philsaz}{Φ_{\smash{{}^{*}_a},z}} % Φ_{\lasz} \newcommand{\Phileaz}{Φ_{\smash{{}^{ε}_a},z}} % Φ_{\laez} \newcommand{\Philseaz}{Φ_{\smash{{}^{*ε}_a},z}} % Φ_{\lasez} \newcommand{\mus}[1][1]{μ^{(#1)}_*} \newcommand{\musA}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\SigsA}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\musB}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\SigsB}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\musa}[1][1]{μ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Sigsa}[1][1]{Σ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Msah}{{\color{highlight-red}\mathcal{M}^{*}_a}} \newcommand{\Msazh}{{\color{highlight-red}\mathcal{M}^{*}_{a,z}}} \newcommand{\Meah}{{\color{highlight-blue}\mathcal{M}^{ε}_a}} \newcommand{\Meazh}{{\color{highlight-blue}\mathcal{M}^{ε}_{a,z}}} \newcommand{\lsazh}{{\color{highlight-red}l_{\smash{{}^{*}_a},z}}} \newcommand{\leazh}{{\color{highlight-blue}l_{\smash{{}^{ε}_a},z}}} \newcommand{\lseazh}{{\color{highlight-orange}l_{\smash{{}^{*ε}_a},z}}} \newcommand{\Philsazh}{{\color{highlight-red}Φ_{\smash{{}^{*}_a},z}}} % Φ_{\lasz} \newcommand{\Phileazh}{{\color{highlight-blue}Φ_{\smash{{}^{ε}_a},z}}} % Φ_{\laez} \newcommand{\Philseazh}{{\color{highlight-orange}Φ_{\smash{{}^{*ε}_a},z}}} % Φ_{\lasez} \newcommand{\emdstd}{\tilde{σ}} \DeclareMathOperator{\Mvar}{Mvar} \DeclareMathOperator{\AIC}{AIC} \DeclareMathOperator{\epll}{epll} \DeclareMathOperator{\elpd}{elpd} \DeclareMathOperator{\MDL}{MDL} \DeclareMathOperator{\comp}{COMP} \DeclareMathOperator{\Lognorm}{Lognorm} \DeclareMathOperator{\erf}{erf} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator{\Image}{Image} \DeclareMathOperator{\sgn}{sgn} \DeclareMathOperator{\SE}{SE} % standard error \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\SkewNormal}{SkewNormal} \DeclareMathOperator{\TruncNormal}{TruncNormal} \DeclareMathOperator{\Exponential}{Exponential} \DeclareMathOperator{\exGaussian}{exGaussian} \DeclareMathOperator{\IG}{IG} \DeclareMathOperator{\NIG}{NIG} \DeclareMathOperator{\Gammadist}{Gamma} \DeclareMathOperator{\Lognormal}{Lognormal} \DeclareMathOperator{\Beta}{Beta} \newcommand{\sinf}{{s_{\infty}}}\)

Some readers may ask why go through the trouble of compute the \(\Bemd{}\), when we could just use a bootstrap to estimate the uncertainty on the expected risk. The simple answer is that

  1. a bootstrap estimate of the variance measures the aleatoric uncertainty, not the epistemic uncertainty;

  2. the aleatoric uncertainty on the expected risk vanishes in the limit of large data.

Indeed, the aleatoric uncertainty amounts to the standard error, which typically decreases as \(L^{-1/2}\) with increasing dataset size. A bootstrap estimate of the aleatoric uncertainty therefore also contributes to the spread of \(R\)-distribution, in addition to the epistemic contribution estimated with \(δ^\EMD\) and \(\qproc\). With the dataset size \(L=4000\) we used in this example, a bootstrap estimate adds about 0.07 to the spread of the \(R\)-distribution (measured as the difference between the 95% and 5% percentiles). In contrast, the \(R\)-distributions shown in the figure have spreads around 0.5.

To illustrate this, we compare \(R\)-distributions with

  • distributions of the loss itself over the dataset;

  • re-samples of the dataset. (What one typically estimates with a bootstrap approach.)

from Ex_Prinz2004 import *
from tqdm.auto import trange
import itertools
/home/alex/.cache/pypoetry/virtualenvs/emd-paper-8sT9_m0L-py3.11/lib/python3.11/site-packages/pyloric_simulator/prinz2004.py:1066: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  vp = act_params.stack("var").unstack("channel") \

The block below is copied from Ex_Prinz2004 because it is only executed when that file is run as a notebook.

candidate_models = Dict()
Qrisk = Dict()
for a in "ABCD":
    fitted_σ = fit_gaussian_σ(LP_data, phys_models[a], "Gaussian")
    candidate_models[a] = utils.compose(AdditiveNoise("Gaussian", fitted_σ),
                                        phys_models[a])
    Qrisk[a] = Q(phys_model=phys_models[a], obs_model="Gaussian", σ=fitted_σ)
hv.extension("matplotlib")
precomputing = False  # Only executed in notebook

Generating long 40000 sample datasets means integrating the ODE for 40000 time steps, which takes a long time. So we use a memoized function to avoid redoing this every time.

LP_data_big = replace(LP_data, L=40_000)  # Reuse the same datasets for each dataset size
                                          # (The last 16 calls to fit_gaussian_dataset are cached, with the dataset as an arg)
                                          # (Each size is still done many times.)
@memory.cache
def get_data(LP_model="LP 1", seed=0):
    dataset = replace(LP_data_big, LP_model=LP_model)
    return dataset.get_data(rng=utils.get_rng("prinz", "aleatoric-comparison", seed))
@memory.cache
def get_Qarr(candidate: Literal["a","b","c","d"], L: int,
             LP_model="LP 1", seed=0):
    dataset = replace(LP_data_big, LP_model=LP_model)
    fitted_σ = fit_gaussian_σ(dataset, phys_models[a], "Gaussian")
    
    data = get_data(LP_model, seed).iloc[:L]
    return Q(phys_model=phys_models[a], obs_model="Gaussian", σ=fitted_σ)(data)
# Temporary hack to invalidate just a subset of the cache
def is_valid(metadata):
    return int(metadata["input_args"]["L"]) <= 40_000
get_Qarr.cache_validation_callback = is_valid

Bootstrapping comparison#

Hide code cell source
n_bootstrap_resamples = 1200
Ls_bs = [400, 4000, 40_000]  # Dataset sizes for bootstrapping illustration
rng_bs = utils.get_rng("prinz", "aleatoric", "bootstrap", "subsampling")  # Used to create bootstrap samples by subsampling the dataset

assert LP_data_big.L >= max(Ls_bs)  

models = "ABCD"
R_bootstrap_samples = {L_bs: {a: [] for a in models}
                       for L_bs in Ls_bs}
for L_bs, a in tqdm(itertools.product(Ls_bs, models), desc="Computing bootstrap samples", total=len(Ls_bs)*len(models)):
    Qarr = get_Qarr(a, L=L_bs)
    for subsample in range(n_bootstrap_resamples):
        R_bootstrap_samples[L_bs][a].append(Qarr[rng_bs.integers(L_bs, size=L_bs)].mean())
ipdb>  [func_id, args_id]
['HOME-Notebooks-emd-paper-<aleatoric-is-not-epistemic>/get_Qarr', '00959773ada2421cc85a707784aac832']
ipdb>  p args
('A',)
ipdb>  p kwargs
{'L': 400}
Hide code cell source
index = pd.MultiIndex.from_product((Ls_bs, R_bootstrap_samples[L_bs].keys()))
bootstrap_stats = pd.DataFrame(
    [(q05:=np.quantile(Rvals, 0.05), q95:=np.quantile(Rvals, 0.95), q95-q05)
     for L_bs in Ls_bs
     for Rvals in R_bootstrap_samples[L_bs].values()],
    index=index, columns=["5% quantile ($q_{05}$)", "95% quantile ($q_{95}$)", "$q_{95} - q_{05}$"])
bootstrap_stats.unstack(level=0)#sort=False).swaplevel(axis="columns").sort_index(axis="columns")
5% quantile ($q_{05}$) 95% quantile ($q_{95}$) $q_{95} - q_{05}$
400 4000 40000 400 4000 40000 400 4000 40000
A 4.275290 4.201492 4.570985 4.325467 4.229739 4.584441 0.050177 0.028247 0.013456
B 4.285103 4.129929 4.525927 4.344873 4.156519 4.539568 0.059771 0.026590 0.013641
C 4.430444 4.349413 4.677843 4.523319 4.379492 4.690432 0.092875 0.030080 0.012588
D 4.481351 4.343034 4.652231 4.611937 4.376171 4.664875 0.130585 0.033137 0.012643
figs = []
for L_bs, samples in R_bootstrap_samples.items():
    fig = viz.make_Rdist_fig(R_bootstrap_samples[L_bs],
                             colors=colors.LP_candidates,
                             xticks = [4, 4.2, 4.4, 4.6, 4.8],
                             xticklabels = ["4", "", "", "", "4.8"],
                             yticks = [])
    ymax = fig.range("Density")[1]
    _opts = fig.opts  # Adding Text replaces the Overlay, so we need to transfer opts over
    fig = fig * hv.Text(4.8, 0.9*ymax, f"L = {L_bs:<5}", halign="left", valign="top",
                        kdims=[dims.R, "R_density"], fontsize=10)
    figs.append(fig.opts(_opts.get()))
hooks = figs[-1].opts.get().options.get("hooks", [])
for panel in figs[:-1]:
    panel.opts(hv.opts.Overlay(hooks=hooks+[viz.yaxis_off_hook, viz.xaxis_off_hook]))
figs[-1].opts(hooks = hooks+[viz.yaxis_off_hook])
fig_bootstrap = hv.Layout(figs)
fig_bootstrap.opts(
    hv.opts.Overlay(xlim=(4, 5), fontscale=1.3, fig_inches=4,
                    aspect=4, show_legend=False, sublabel_format=""),
    hv.opts.Layout(vspace=0.0, shared_axes=False)
)
fig_bootstrap.cols(1)

Loss distribution#

What about simply taking the distribution of losses computed on all the samples? Apart from again measuring only aleatoric, not epistemic, uncertainty, it would make for a very underpowered criterion:

Having the loss distributions of two models overlap does not mean that they cannot be compared.

What matters when comparing models is their expected risk. Of courses, if the distributions of losses are so different that they don’t overlap, then it is also clear which will have the lowest expected risks. But such domination of one model over the other is rare, especially if both were fitted to the data. Much more common is that the loss distributions will overlap substantially.

Hide code cell source
Qdists = []
for a, c in zip("ABCD", colors.LP_candidates.values):
    Qdist = hv.Distribution(Qrisk[a](LP_data.get_data()), kdims=["loss"], label=f"Model {a}").opts(facecolor=c, edgecolor=c)
    Qcurve = hv.operation.stats.univariate_kde(Qdist).to.curve().opts(color=c)
    Qdists.append(Qdist*Qcurve)
fig_Qdist = hv.Overlay(Qdists)
fig_Qdist.opts(
    hv.opts.Distribution(alpha=0.5, xlim=(2.8, 9),
                         xticks=[3, 5, 7, 9],
                         xformatter=lambda x: f"{x:.0f}" if x == 3 or x == 9 else "",
                         yformatter=lambda y: f"{y:.1f}" if np.isclose(y, [0, 1]).any() else ""),
    hv.opts.Curve(linewidth=3, alpha=0.7),
    hv.opts.Overlay(fig_inches=4, fontscale=1.3, aspect=3,
                    show_legend=True, legend_opts={"labelspacing": 0.4}),
)
fig_Qdist.opts(
    hooks=[viz.despine_hook, viz.xlabel_shift_hook(8), viz.ylabel_shift_hook(10)]
)

Fully-synthetic resampling#

As a final comparison, we make a completely synthetic test, where the loss is evaluated on its own candidate model. This has two immediate drawbacks:

  • Still, we only consider aleatoric variability. Especially so, since now the computation in no way depends on the actual data.

  • The value of the loss on synthetic samples can be very different from the real ones, especially since we neglected epistemic errors.

We do this for both the Prinz models (A, B, C, D) and black body radiation models (Planck, Rayleigh-Jeans).

n_synthetic_resamples = 400
Ls_bs = [400, 4000, 40_000, 400_000]  # Dataset sizes for bootstrapping illustration
#assert LP_data_big.L >= max(Ls_bs)

Prinz models#

  • a ∈ {A,B,C,D} : The candidate model we consider.

  • LP_model ∈ {LP 1, LP 2, LP 3, LP 4, LP 5}: The model used to generate the dataset.

    • In this experiment, this is always the model corresponding to a. (So effectively LP 1 is never used.)

  • L_synth: Number of synthetic data samples in one dataset

  • subsamble_idx: Arbitrary integer; used to seed the RNG, so that we get different datasets.

  • n_synthetic_resamples: Number of times we recreate a new dataset.

For each dataset, we:

  1. Generate the dataset given LP_model and the ground truth \(σ_\mathrm{obs}\) used everywhere in the paper. (Currently 2 mV.)

  2. Fit the observation \(σ_\mathrm{obs}\) by maximum likelihood.

    • For the synthetic experiments this is somewhat redundant, since the MLE will be very close to the ground truth value used to generate the dataset.

    • For experiments where there is model mismatch, this is essential to both to represent actual practice and to get consistent results.

  3. Evaluate \(Q(\mathcal{D}^{\mathrm{test}} | \mathcal{M}_a, σ_\mathrm{obs})\).

LP_data_big = replace(LP_data_big, L=L_synth)
dataset = replace(LP_data_big, LP_model=LP_model)
#fitted_σ = fit_gaussian_σ(dataset, phys_models[a], "Gaussian")

data = get_data(LP_model, 0)#.iloc[:L_synth]
print(LP_data_big.L)
print(len(data))

Caution – Hack

We are here enlarging LP_data_big, because I hard-coded it into the memoized functions, and I don’t want to invalidate the cache for the experiments above. This means the notebook won’t work as expected if executed out of order.

# Hack to enlarge the dataset size
LP_data_big = replace(LP_data_big, L=400_000)
# These two packages are used to track missing computations and print a summary message
from collections import Counter
from tabulate import tabulate
# Temporary hack to invalidate the part of the cache related to synth datasets
def invalid(metadata):
    return False
get_Qarr.cache_validation_callback = invalid
get_data.cache_validation_callback = invalid

The loop below has two code paths:

  • If precomputing is True: Assume that we are pre-computing the datasets and Qarr functions.

    • No point in generating the R samples since we won’t use them.

  • Otherwise: Assume that we want to generate the plots.

    • To allow plotting results before a batch of computations are done, we skip examples for which Qarr is not precomputed.

So as currently coded, this loop must be executed in two different ways.

  • First with multiple processes, to generate all the datasets and Q_arr functions.

  • Then in a single process, to generate the plots.

If needed, we could add flags to allow generating everything in one go (at the cost of potentially locking up the process for a long time).

models = "ABCD"
R_synthetic_samples = {L_synth: {a: [] for a in models}
                       for L_synth in Ls_bs}
skipped = Counter()
print_status = precomputing   # Don’t print status messages while preparing plots in the notebook
for L_synth, a in tqdm(itertools.product(Ls_bs, models), total=len(Ls_bs)*len(models),
                       desc="Pure synthetic bootstrap (Prinz model)"):
    #if print_status: print(f"Synthetic resamples with dataset size L={L_synth}")
    if precomputing:
        print(f"Caching the Qarr function for {n_synthetic_resamples // num_processes} resamples of model {a}. (Subset #{process_idx})")
    else:
        if print_status: print(f"Computing {n_synthetic_resamples} resamples of model {a}.")

    LP_model = f"LP {ord(a) - ord('A') + 2}"
    for subsample_idx in range(n_synthetic_resamples):
        # print(f"{a}, L={L_synth}, LP_model={LP_model}, seed={subsample_idx} – "
        #       f"{get_Qarr.check_call_in_cache(a, L=L_synth, LP_model=LP_model, seed=subsample_idx)}")
        # continue
        if subsample_idx % num_processes != process_idx:   # Poor man’s parallelization: execute script multiple times with different args
            continue
        if print_status: tqdm.write(f"Computing synthetic sample #{subsample_idx}")

        if precomputing:
            # We are pre-computing the Qarr function – no point to generate R samples
            get_Qarr(a, L=L_synth, LP_model=LP_model, seed=subsample_idx)
        else:
            # We are (probably) generating the plots
            if get_Qarr.check_call_in_cache(a, L=L_synth, LP_model=LP_model, seed=subsample_idx):
                Qarr = get_Qarr(a, L=L_synth, LP_model=LP_model, seed=subsample_idx)
                R_synthetic_samples[L_synth][a].append(Qarr.mean())
            else:
                # To prevent locking the process, we skip those examples that don’t have pre-computed Qarr
                skipped[(L_synth, a)] += 1                

if skipped.total() > 0:
    total_examples = len(Ls_bs) * len("ABCD") * n_synthetic_resamples
    print(f"\nA total of {skipped.total()} examples (out of {total_examples}) were skipped because they don’t have a precomputed Qarr:")
    print(tabulate([(L, model, v) for (L, model), v in skipped.items()], headers=["dataset size (L)", "model", "# skipped"]))
for L, sampledict in R_synthetic_samples.items():
    to_remove = [a for a, samplelist in sampledict.items()
                   if len(samplelist) == 0]
    for a in to_remove:
        del sampledict[a]
R_synthetic_samples_prinz = R_synthetic_samples

Rayleigh-Jeans models#

import Ex_UV

Hint

The RNG seed is determined from a dataset’s purpose.

models = ["Planck", "Rayleigh-Jeans"]
R_synthetic_samples = {L_synth: {a: [] for a in models}
                       for L_synth in Ls_bs}

for L_synth, a in tqdm(itertools.product(Ls_bs, models), total=len(Ls_bs)*len(models),
                       desc="Pure synthetic bootstrap (BB radiation)"):
    for subsample_idx in range(n_synthetic_resamples):
        dataset = replace(Ex_UV.observed_dataset,
                          L=L_synth, phys_model=a,
                          purpose=f"Pure synthetic bootstrap – {L_synth}, {a}, {subsample_idx}")
        Qarr = Ex_UV.Qrisk[a](dataset.get_data())
        R_synthetic_samples[L_synth][a].append(Qarr.mean())
R_synthetic_samples_blackbody = R_synthetic_samples

Plot results#

Accurately representing the distributions is delicate: we want to show that they become Dirac deltas, but the effect is so pronounced that they can’t all be plotted on the same scale. We could plot them with a logarithmic y scale, but this is not what people expect. Moreover, while it is accurate, it falsely gives the impression that there is meaningful probability mass in tails.

Our solution is to keep the same scale for all plots, but to allow the lower ones to stretch into the upper ones. (With some shading effects so the main plot on each level stands out.) This is easiest to achieve by reducing the aspect (so the plot outputs taller but with the same width), and then stacking the levels in Inkscape.

def plot_synthetic_stack(R_synthetic_samples, colors, xticks, xtext):
    figs = []
    for L_synth, samples in R_synthetic_samples.items():
        fig = viz.make_Rdist_fig(R_synthetic_samples[L_synth],
                                 colors = colors, xticks = xticks, yticks = []
                                )
        ymax = fig.range("Density")[1]
        _opts = fig.opts  # Adding Text replaces the Overlay, so we need to transfer opts over
        fig = fig * hv.Text(xtext, 0.9*ymax, f"L = {L_synth:<5}", halign="left", valign="top",
                            kdims=[dims.R, "R_density"], fontsize=10)
        figs.append(fig.opts(_opts.get()))
    hooks = figs[-1].opts.get().options["hooks"]
    for panel in figs[:-1]:
        panel.opts(hv.opts.Overlay(hooks=hooks+[viz.yaxis_off_hook, viz.xaxis_off_hook]))
    figs[-1].opts(hooks = hooks+[viz.yaxis_off_hook])
    fig_synthetic = hv.Layout(figs)
    fig_synthetic.opts(
        hv.opts.Overlay(#xlim=(4.15, 5),
                        fontscale=1.3, fig_inches=4,
                        aspect=5, show_legend=False, sublabel_format=""),
        hv.opts.Layout(vspace=0.0, shared_axes=False)
    )
    return fig_synthetic.cols(1)
fig_synthetic_Prinz = plot_synthetic_stack(
    R_synthetic_samples_prinz,
    colors = colors.LP_candidates,
    xticks = [3, 6],
    xtext  = 5
)
fig_synthetic_Prinz
fig_synthetic_Prinz.opts(
    hv.opts.Overlay(aspect=1, ylim=(0,18.5))
)
fig_synthetic_UV = plot_synthetic_stack(
    R_synthetic_samples_blackbody,
    colors=hv.Cycle([Ex_UV.colors.Planck, Ex_UV.colors.RJ]),
    xticks = [-8.7, -8.6, -8.5, -8.4],
    xtext  = -8.45
)
fig_synthetic_UV.opts(hv.opts.Overlay(aspect=0.3, ylim=(0, 240)))
hv.save(fig_synthetic_Prinz, config.paths.figures/"synth-bootstrap_Prinz_raw.svg")
hv.save(fig_synthetic_UV, config.paths.figures/"synth-bootstrap_UV_raw.svg")

Note

We decided not to include the black body figure.

Finalization in Inkscape#

  • Combine all figures together.

  • Remove vertical white space between subpanels.

  • Align the \(L\) labels

  • Add vertical guidelines (#f2f2f2)

  • Add vertical scale bars for each panel (0.4, 2 and 40)

  • Add subfigure labels

  • Replace all axis labels with text generated with TexText.

2025-02-28   git: emd-paper develop #f0d7a261