Rejection probability is not predicted by loss distribution#

\(\require{mathtools} \newcommand{\notag}{} \newcommand{\tag}{} \newcommand{\label}[1]{} \newcommand{\sfrac}[2]{#1/#2} \newcommand{\bm}[1]{\boldsymbol{#1}} \newcommand{\num}[1]{#1} \newcommand{\qty}[2]{#1\,#2} \renewenvironment{align} {\begin{aligned}} {\end{aligned}} \renewenvironment{alignat} {\begin{alignedat}} {\end{alignedat}} \newcommand{\pdfmspace}[1]{} % Ignore PDF-only spacing commands \newcommand{\htmlmspace}[1]{\mspace{#1}} % Ignore PDF-only spacing commands \newcommand{\scaleto}[2]{#1} % Allow to use scaleto from scalerel package \newcommand{\RR}{\mathbb R} \newcommand{\NN}{\mathbb N} \newcommand{\PP}{\mathbb P} \newcommand{\EE}{\mathbb E} \newcommand{\XX}{\mathbb X} \newcommand{\ZZ}{\mathbb Z} \newcommand{\QQ}{\mathbb Q} \newcommand{\fF}{\mathcal F} \newcommand{\dD}{\mathcal D} \newcommand{\lL}{\mathcal L} \newcommand{\gG}{\mathcal G} \newcommand{\hH}{\mathcal H} \newcommand{\nN}{\mathcal N} \newcommand{\pP}{\mathcal P} \newcommand{\BB}{\mathbb B} \newcommand{\Exp}{\operatorname{Exp}} \newcommand{\Binomial}{\operatorname{Binomial}} \newcommand{\Poisson}{\operatorname{Poisson}} \newcommand{\linop}{\mathcal{L}(\mathbb{B})} \newcommand{\linopell}{\mathcal{L}(\ell_1)} \DeclareMathOperator{\trace}{trace} \DeclareMathOperator{\Var}{Var} \DeclareMathOperator{\Span}{span} \DeclareMathOperator{\proj}{proj} \DeclareMathOperator{\col}{col} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\gt}{>} \definecolor{highlight-blue}{RGB}{0,123,255} % definition, theorem, proposition \definecolor{highlight-yellow}{RGB}{255,193,7} % lemma, conjecture, example \definecolor{highlight-orange}{RGB}{253,126,20} % criterion, corollary, property \definecolor{highlight-red}{RGB}{220,53,69} % criterion \newcommand{\logL}{\ell} \newcommand{\eE}{\mathcal{E}} \newcommand{\oO}{\mathcal{O}} \newcommand{\defeq}{\stackrel{\mathrm{def}}{=}} \newcommand{\Bspec}{\mathcal{B}} % Spectral radiance \newcommand{\X}{\mathcal{X}} % X space \newcommand{\Y}{\mathcal{Y}} % Y space \newcommand{\M}{\mathcal{M}} % Model \newcommand{\Tspace}{\mathcal{T}} \newcommand{\Vspace}{\mathcal{V}} \newcommand{\Mtrue}{\mathcal{M}_{\mathrm{true}}} \newcommand{\MP}{\M_{\mathrm{P}}} \newcommand{\MRJ}{\M_{\mathrm{RJ}}} \newcommand{\qproc}{\mathfrak{Q}} \newcommand{\D}{\mathcal{D}} % Data (true or generic) \newcommand{\Dt}{\tilde{\mathcal{D}}} \newcommand{\Phit}{\widetilde{\Phi}} \newcommand{\Phis}{\Phi^*} \newcommand{\qt}{\tilde{q}} \newcommand{\qs}{q^*} \newcommand{\qh}{\hat{q}} \newcommand{\AB}[1]{\mathtt{AB}~\mathtt{#1}} \newcommand{\LP}[1]{\mathtt{LP}~\mathtt{#1}} \newcommand{\NML}{\mathrm{NML}} \newcommand{\iI}{\mathcal{I}} \newcommand{\true}{\mathrm{true}} \newcommand{\dist}{D} \newcommand{\Mtheo}[1]{\mathcal{M}_{#1}} % Model (theoretical model); index: param set \newcommand{\DL}[1][L]{\mathcal{D}^{(#1)}} % Data (RV or generic) \newcommand{\DLp}[1][L]{\mathcal{D}^{(#1')}} % Data (RV or generic) \newcommand{\DtL}[1][L]{\tilde{\mathcal{D}}^{(#1)}} % Data (RV or generic) \newcommand{\DpL}[1][L]{{\mathcal{D}'}^{(#1)}} % Data (RV or generic) \newcommand{\Dobs}[1][]{\mathcal{D}_{\mathrm{obs}}^{#1}} % Data (observed) \newcommand{\calibset}{\mathcal{C}} \newcommand{\N}{\mathcal{N}} % Normal distribution \newcommand{\Z}{\mathcal{Z}} % Partition function \newcommand{\VV}{\mathbb{V}} % Variance \newcommand{\T}{\mathsf{T}} % Transpose \newcommand{\EMD}{\mathrm{EMD}} \newcommand{\dEMD}{d_{\mathrm{EMD}}} \newcommand{\dEMDtilde}{\tilde{d}_{\mathrm{EMD}}} \newcommand{\dEMDsafe}{d_{\mathrm{EMD}}^{\text{(safe)}}} \newcommand{\e}{ε} % Model confusion threshold \newcommand{\falsifythreshold}{ε} \newcommand{\bayes}[1][]{B_{#1}} \newcommand{\bayesthresh}[1][]{B_{0}} \newcommand{\bayesm}[1][]{B^{\mathcal{M}}_{#1}} \newcommand{\bayesl}[1][]{B^l_{#1}} \newcommand{\bayesphys}[1][]{B^{{p}}_{#1}} \newcommand{\Bconf}[1]{B^{\mathrm{epis}}_{#1}} \newcommand{\Bemd}[1]{B^{\mathrm{EMD}}_{#1}} \newcommand{\BQ}[1]{B^{Q}_{#1}} \newcommand{\Bconfbin}[1][]{\bar{B}^{\mathrm{conf}}_{#1}} \newcommand{\Bemdbin}[1][]{\bar{B}_{#1}^{\mathrm{EMD}}} \newcommand{\bin}{\mathcal{B}} \newcommand{\Bconft}[1][]{\tilde{B}^{\mathrm{conf}}_{#1}} \newcommand{\fc}{f_c} \newcommand{\fcbin}{\bar{f}_c} \newcommand{\paramphys}[1][]{Θ^{{p}}_{#1}} \newcommand{\paramobs}[1][]{Θ^{ε}_{#1}} \newcommand{\test}{\mathrm{test}} \newcommand{\train}{\mathrm{train}} \newcommand{\synth}{\mathrm{synth}} \newcommand{\rep}{\mathrm{rep}} \newcommand{\MNtrue}{\mathcal{M}^{{p}}_{\text{true}}} \newcommand{\MN}[1][]{\mathcal{M}^{{p}}_{#1}} \newcommand{\MNA}{\mathcal{M}^{{p}}_{Θ_A}} \newcommand{\MNB}{\mathcal{M}^{{p}}_{Θ_B}} \newcommand{\Me}[1][]{\mathcal{M}^ε_{#1}} \newcommand{\Metrue}{\mathcal{M}^ε_{\text{true}}} \newcommand{\Meobs}{\mathcal{M}^ε_{\text{obs}}} \newcommand{\Meh}[1][]{\hat{\mathcal{M}}^ε_{#1}} \newcommand{\MNa}{\mathcal{M}^{\mathcal{N}}_a} \newcommand{\MeA}{\mathcal{M}^ε_A} \newcommand{\MeB}{\mathcal{M}^ε_B} \newcommand{\Ms}{\mathcal{M}^*} \newcommand{\MsA}{\mathcal{M}^*_A} \newcommand{\MsB}{\mathcal{M}^*_B} \newcommand{\Msa}{\mathcal{M}^*_a} \newcommand{\MsAz}{\mathcal{M}^*_{A,z}} \newcommand{\MsBz}{\mathcal{M}^*_{B,z}} \newcommand{\Msaz}{\mathcal{M}^*_{a,z}} \newcommand{\MeAz}{\mathcal{M}^ε_{A,z}} \newcommand{\MeBz}{\mathcal{M}^ε_{B,z}} \newcommand{\Meaz}{\mathcal{M}^ε_{a,z}} \newcommand{\zo}{z^{0}} \renewcommand{\lL}[2][]{\mathcal{L}_{#1|{#2}}} % likelihood \newcommand{\Lavg}[2][]{\mathcal{L}^{/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLphys}[2][]{\mathcal{L}^{{p}}_{#1|#2}} \newcommand{\Lavgphys}[2][]{\mathcal{L}^{{p}/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLL}[3][]{\mathcal{L}^{(#3)}_{#1|#2}} \newcommand{\lLphysL}[3][]{\mathcal{L}^{{p},(#3)}_{#1|#2}} \newcommand{\lnL}[2][]{l_{#1|#2}} % Per-sample log likelihood \newcommand{\lnLt}[2][]{\widetilde{l}_{#1|#2}} \newcommand{\lnLtt}{\widetilde{l}} % Used only in path_sampling \newcommand{\lnLh}[1][]{\hat{l}_{#1}} \newcommand{\lnLphys}[2][]{l^{{p}}_{#1|#2}} \newcommand{\lnLphysL}[3][]{l^{{p},(#3)}_{#1|#2}} \newcommand{\Elmu}[2][1]{μ_{{#2}}^{(#1)}} \newcommand{\Elmuh}[2][1]{\hat{μ}_{{#2}}^{(#1)}} \newcommand{\Elsig}[2][1]{Σ_{{#2}}^{(#1)}} \newcommand{\Elsigh}[2][1]{\hat{Σ}_{{#2}}^{(#1)}} \newcommand{\pathP}{\mathop{{p}}} % Path-sampling process (generic) \newcommand{\pathPhb}{\mathop{{p}}_{\mathrm{Beta}}} % Path-sampling process (hierarchical beta) \newcommand{\interval}{\mathcal{I}} \newcommand{\Phiset}[1]{\{\Phi\}^{\small (#1)}} \newcommand{\Phipart}[1]{\{\mathcal{I}_Φ\}^{\small (#1)}} \newcommand{\qhset}[1]{\{\qh\}^{\small (#1)}} \newcommand{\Dqpart}[1]{\{Δ\qh_{2^{#1}}\}} \newcommand{\LsAzl}{\mathcal{L}_{\smash{{}^{\,*}_A},z,L}} \newcommand{\LsBzl}{\mathcal{L}_{\smash{{}^{\,*}_B},z,L}} \newcommand{\lsA}{l_{\smash{{}^{\,*}_A}}} \newcommand{\lsB}{l_{\smash{{}^{\,*}_B}}} \newcommand{\lsAz}{l_{\smash{{}^{\,*}_A},z}} \newcommand{\lsAzj}{l_{\smash{{}^{\,*}_A},z_j}} \newcommand{\lsAzo}{l_{\smash{{}^{\,*}_A},z^0}} \newcommand{\leAz}{l_{\smash{{}^{\,ε}_A},z}} \newcommand{\lsAez}{l_{\smash{{}^{*ε}_A},z}} \newcommand{\lsBz}{l_{\smash{{}^{\,*}_B},z}} \newcommand{\lsBzj}{l_{\smash{{}^{\,*}_B},z_j}} \newcommand{\lsBzo}{l_{\smash{{}^{\,*}_B},z^0}} \newcommand{\leBz}{l_{\smash{{}^{\,ε}_B},z}} \newcommand{\lsBez}{l_{\smash{{}^{*ε}_B},z}} \newcommand{\LaszL}{\mathcal{L}_{\smash{{}^{*}_a},z,L}} \newcommand{\lasz}{l_{\smash{{}^{*}_a},z}} \newcommand{\laszj}{l_{\smash{{}^{*}_a},z_j}} \newcommand{\laszo}{l_{\smash{{}^{*}_a},z^0}} \newcommand{\laez}{l_{\smash{{}^{ε}_a},z}} \newcommand{\lasez}{l_{\smash{{}^{*ε}_a},z}} \newcommand{\lhatasz}{\hat{l}_{\smash{{}^{*}_a},z}} \newcommand{\pasz}{p_{\smash{{}^{*}_a},z}} \newcommand{\paez}{p_{\smash{{}^{ε}_a},z}} \newcommand{\pasez}{p_{\smash{{}^{*ε}_a},z}} \newcommand{\phatsaz}{\hat{p}_{\smash{{}^{*}_a},z}} \newcommand{\phateaz}{\hat{p}_{\smash{{}^{ε}_a},z}} \newcommand{\phatseaz}{\hat{p}_{\smash{{}^{*ε}_a},z}} \newcommand{\Phil}[2][]{Φ_{#1|#2}} % Φ_{\la} \newcommand{\Philt}[2][]{\widetilde{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philhat}[2][]{\hat{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philsaz}{Φ_{\smash{{}^{*}_a},z}} % Φ_{\lasz} \newcommand{\Phileaz}{Φ_{\smash{{}^{ε}_a},z}} % Φ_{\laez} \newcommand{\Philseaz}{Φ_{\smash{{}^{*ε}_a},z}} % Φ_{\lasez} \newcommand{\mus}[1][1]{μ^{(#1)}_*} \newcommand{\musA}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\SigsA}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\musB}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\SigsB}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\musa}[1][1]{μ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Sigsa}[1][1]{Σ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Msah}{{\color{highlight-red}\mathcal{M}^{*}_a}} \newcommand{\Msazh}{{\color{highlight-red}\mathcal{M}^{*}_{a,z}}} \newcommand{\Meah}{{\color{highlight-blue}\mathcal{M}^{ε}_a}} \newcommand{\Meazh}{{\color{highlight-blue}\mathcal{M}^{ε}_{a,z}}} \newcommand{\lsazh}{{\color{highlight-red}l_{\smash{{}^{*}_a},z}}} \newcommand{\leazh}{{\color{highlight-blue}l_{\smash{{}^{ε}_a},z}}} \newcommand{\lseazh}{{\color{highlight-orange}l_{\smash{{}^{*ε}_a},z}}} \newcommand{\Philsazh}{{\color{highlight-red}Φ_{\smash{{}^{*}_a},z}}} % Φ_{\lasz} \newcommand{\Phileazh}{{\color{highlight-blue}Φ_{\smash{{}^{ε}_a},z}}} % Φ_{\laez} \newcommand{\Philseazh}{{\color{highlight-orange}Φ_{\smash{{}^{*ε}_a},z}}} % Φ_{\lasez} \newcommand{\emdstd}{\tilde{σ}} \DeclareMathOperator{\Mvar}{Mvar} \DeclareMathOperator{\AIC}{AIC} \DeclareMathOperator{\epll}{epll} \DeclareMathOperator{\elpd}{elpd} \DeclareMathOperator{\MDL}{MDL} \DeclareMathOperator{\comp}{COMP} \DeclareMathOperator{\Lognorm}{Lognorm} \DeclareMathOperator{\erf}{erf} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator{\Image}{Image} \DeclareMathOperator{\sgn}{sgn} \DeclareMathOperator{\SE}{SE} % standard error \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\SkewNormal}{SkewNormal} \DeclareMathOperator{\TruncNormal}{TruncNormal} \DeclareMathOperator{\Exponential}{Exponential} \DeclareMathOperator{\exGaussian}{exGaussian} \DeclareMathOperator{\IG}{IG} \DeclareMathOperator{\NIG}{NIG} \DeclareMathOperator{\Gammadist}{Gamma} \DeclareMathOperator{\Lognormal}{Lognormal} \DeclareMathOperator{\Beta}{Beta} \newcommand{\sinf}{{s_{\infty}}}\)

This notebook explores a similar question as Aleatoric vs Sampling vs Replication uncertainty, namely why we cannot use the loss distribution directly and instead really need to estimate the epistemic distribution.

Here we explore the question in the context of our calibration experiments. Ultimately the goal is to predict the probability that a replication of the experiment would select model \(A\) over model \(B\), and this is exactly what our calibration experiments test (with synthetic replications). So if we can replace \(\Bemd{}\) by a hypothetical \(\BQ{}\) defined solely in terms of the loss distribution and still find a correlation with \(\Bconf{}\), that would suggest that we don’t really need \(\Bemd{}\) (which depends on a more complicated distribution and ad hoc parameter \(c\)) – we could get away with considering just the loss distribution, which is much simpler and therefore more appealing.

Of course, there is no reason to expect that the loss distribution (which is a result of aleatoric uncertainty) will be related to replication uncertainty – indeed, that is the point of the aforementioned notebook. But the question gets asked, so in addition to arguing on the basis of principle, it is good to tackle the question head-on, in the most direct way possible, with an actual experiment.

One challenge is how best to define \(B^Q\): since it measures aleatoric instead of replication/epistemic uncertainty, there is no ontologically obvious way to do this. So instead we will use the mathematically obvious way[1] and “hope for the best”:[2] we match the form of Eq. 2.13, replacing the \(R\)-distribution by the \(Q\)-distribution:

\[\begin{split}\BQ{AB;c_Q} &\coloneqq P(Q_A < Q_B + η)\,, \\ η &\sim \nN(0, c_Q^2) \,.\end{split}\]

Note that now the probability now is over the samples of \(\Mtrue\) rather than over replications. The parameter \(c_Q\) is an additional degree of freedom; it effectively convolves the \(Q\) distributions with a Gaussian, thus increasing their spread and, (similar to the effect of \(c\) on \(\Bemd{}\)) decreasing the sensitivity of \(\BQ{}\). We include \(η\) so that both \(\BQ{}\) and \(\Bemd{}\) have the same number of free parameters, and thus make the comparison more fair. We will select the \(c_Q\) which maximizes the correlation with \(\Bconf{}\), thus giving \(\BQ{}\) the best chance to match the performance of \(\Bemd{}\).

The advantage of Eq. 7.1 is that it is easily estimated from observed samples, with no need to define an ad hoc distribution over quantile functions. The disadvantage is that it replaces an ad hoc distribution by an ad hoc ontology, since there is no clear link between the distribution of \(Q\) values and the ability of a model to replicate. (The two are not unrelated, but the relation is highly confounded by the aleatoric uncertainty.) As we will see below, this makes it a poor predictor for \(\Bconf{}\).

Note also that appropriate values of \(c_Q\) are entirely dependent on what the typical values of the loss are. If the loss is given in terms of a probability density, rather than a probability mass, then this is not scale independent. There is even less reason to expect the values of \(c_Q\) therefore to generalize to other problems, in contrast to what we observed with \(\Bemd{}\).

Imports#

import numpy as np
from functools import partial
from itertools import chain
import holoviews as hv
hv.extension("matplotlib", "bokeh")

emdcmp library

import emdcmp

Project code

from config import config
import viz
import viz.emdcmp   # Backported updates to emdcmp.viz

Load the epistemic dists used in the calibration experiments

Load the customized calibration task which computes \(\BQ{}\) instead of \(\Bemd{}\).

Execution#

How many experiments

The function emd.viz.calibration_plot attempts to collect results into 16 bins,[3] so making \(N\) a multiple of 16 works nicely.

Numbers that worked well for us

For an initial pilot run, we found \(N=64\) or \(N=128\) to be good numbers. These numbers produce respectively 4 or 8 bins, which is often enough to check that \(\Bemd{}\) and \(\Bconf{}\) are reasonably distributed and that the epistemic distribution is actually probing the transition from strong to equivocal evidence. A subsequent run with \(N \in \{256, 512, 1024, 2048, 4096\}\) can then refine and smooth the curve.

We only run the experiments on the 6 epistemic distributions used in the main paper.

#N = 64
#N = 128
#N = 512
N = 2048
Ωdct = {(f"{Ω.a} vs {Ω.b}", Ω.ξ_name, Ω.σo_dist, Ω.τ_dist, Ω.σi_dist): Ω
        for Ω in [
            EpistemicDist(N, "C", "D", "Gaussian", "low noise", "short input correlations", "weak input"),
            EpistemicDist(N, "A", "B", "Gaussian", "low noise", "short input correlations", "weak input"),
            EpistemicDist(N, "A", "D", "Gaussian", "low noise", "short input correlations", "weak input"),
            EpistemicDist(N, "C", "D", "Gaussian", "low noise", "short input correlations", "strong input"),
            EpistemicDist(N, "A", "B", "Gaussian", "low noise", "short input correlations", "strong input"),
            EpistemicDist(N, "A", "D", "Gaussian", "low noise", "short input correlations", "strong input"),
        ]
       }

Hint

Calibrate will iterate over the data models twice, so it is important that the iterable passed as data_models not be consumable.

c_list = [2**-6, 2**-4, 2**-2, 2**0, 2**2, 2**4]
LQ = 4000 # Number of draws for the Monte Carlo estimate of normal dist addition
cQ_chosen = -2**-3
cQ_list=[#-2**1,  2**1,
         #-2**0,  2**0,
         -2**-1, 2**-1,
         #-2**-2, 2**-2,
         -2**-3, 2**-3,
         #0
        ]
tasks_normal = {}
tasks_BQ = {}
for Ωkey, Ω in Ωdct.items():
    task = CalibrateBQ(
        reason = f"BQ calibration attempt – {Ω.a} vs {Ω.b} - {Ω.ξ_name} - {Ω.σo_dist} - {Ω.τ_dist} - {Ω.σi_dist} - {N=}",
        #c_list = cQ_list,
        c_list = c_list,
        experiments = Ω.generate(N),
        Ldata = L_data,
        Linf = Linf,
        LQ = LQ
    )
    tasks_BQ[Ωkey] = task

    task = emdcmp.tasks.Calibrate(
        reason = f"Prinz calibration – {Ω.a} vs {Ω.b} - {Ω.ξ_name} - {Ω.σo_dist} - {Ω.τ_dist} - {Ω.σi_dist} - {N=}",
        c_list = c_list,
        #c_list = [2**-6, 2**-4, 2**-2, 2**0, 2**4],
        experiments = Ω.generate(N),
        Ldata = L_data,
        Linf = Linf
    )
    tasks_normal[Ωkey] = task

The code below creates task files for any missing tasks

    for task in chain(tasks_normal.values(), tasks_BQ.values()):
        if not task.has_run:  # Don’t create task files for tasks which have already run
            Ω = task.experiments
            taskfilename = f"prinz_{type(task).__qualname__}__{Ω.a}vs{Ω.b}_{Ω.ξ_name}_{Ω.σo_dist}_{Ω.τ_dist}_{Ω.σi_dist}_N={Ω.N}_c={task.c_list}"
            task.save(taskfilename)

If any files were created, run those tasks from the command line with

smttask run -n1 --import config <task file>

before continuing.

assert all(task.has_run for task in chain(tasks_normal.values(), tasks_BQ.values()))

Comparison of \(\Bemd{}\) and \(\BQ{}\) calibration curves#

Important

The basic principles for calibration

Wide support

The goal of calibration is to probe that intermediate where selection of either model \(A\) or \(B\) is not certain. It is important therefore that we obtain values \(\Bemd{}\) over the entire interval \([0, 1]\).
Whether this is the case will be a function of two things:

  • the design of the design of the calibration experiments: whether it produces ambiguous selection problems;

  • the choice of \(c\): generally, a larger \(c\) will concentrate \(\Bemd{}\) towards 0.5, a smaller \(c\) will concentrate them towards 0 and 1.

So we want the support of \(\Bemd{}\) to be as large as possible.

Flat distribution

As a secondary goal, we also want it to be as flat as possible, since this will lead to more efficient sampling: Since we need enough samples at every subinterval of \(\Bemd{}\), it is the most sparsely sampled regions which determine how many calibration datasets we need to generate. (And therefore how long the computation needs to run.)

Beyond making for shorter compute times, a flat distribution however isn’t in and of itself a good thing: more important is that the criterion is able to resolve the models when it should.

calibs_normal = {key: viz.emdcmp.calibration_plot(task.unpack_results(task.run()))
                 for key, task in tasks_normal.items()}
calibs_BQ     = {key: viz.emdcmp.calibration_plot(task.unpack_results(task.run()))
                 for key, task in tasks_BQ.items()}
Hide code cell source
def mkkey(taskkey): return taskkey[0], taskkey[4]
taskdims = ["models", "input"]
hm = hv.HoloMap({("EMD", "Bemd", c, *mkkey(taskkey)): hist for taskkey, calplot in calibs_normal.items() for c, hist in calplot.Bemd_hists.items()}
                | {("EMD", "Bepis", c, *mkkey(taskkey)): hist for taskkey, calplot in calibs_normal.items() for c, hist in calplot.Bepis_hists.items()}
                | {("Q", "Bemd", c, *mkkey(taskkey)): hist for taskkey, calplot in calibs_BQ.items() for c, hist in calplot.Bemd_hists.items()}
                | {("Q", "Bepis", c, *mkkey(taskkey)): hist for taskkey, calplot in calibs_BQ.items() for c, hist in calplot.Bepis_hists.items()},
                kdims=["uncertainty", "B", "c", *taskdims])
# NB: The set of c values is disjoint, so attempting to plot the two uncertainty dists together will lead to artifacts
hm.select(uncertainty="EMD").overlay("B").layout(taskdims).cols(3).opts(
    hv.opts.Layout(backend="matplotlib", fig_inches=3, tight=True),
    hv.opts.NdLayout(backend="matplotlib", fig_inches=3, tight=True),
    hv.opts.Histogram(backend="matplotlib", aspect=2)
)
Hide code cell source
hm.select(uncertainty="Q").overlay("B").layout(taskdims).cols(3).opts(
    hv.opts.Layout(backend="matplotlib", fig_inches=3, tight=True),
    hv.opts.NdLayout(backend="matplotlib", fig_inches=3, tight=True),
    hv.opts.Histogram(backend="matplotlib", aspect=2)
)
Hide code cell source
fig = hm.select(uncertainty="EMD", B="Bemd").drop_dimension(["uncertainty", "B"]).overlay("c") \
      + hm.select(uncertainty="Q", B="Bemd").drop_dimension(["uncertainty", "B"]).overlay("c")\
        .redim(Bemd="BQ").opts(title="BQ")
fig.opts(hv.opts.Layout(backend="matplotlib", fig_inches=2, sublabel_format=""),
         hv.opts.Histogram(backend="matplotlib", aspect=1.5),
         hv.opts.NdOverlay(legend_position="best")
        )
calibopts = (
    hv.opts.Overlay(backend="matplotlib",
                    #legend_cols=3,
                    #legend_opts={"columnspacing": .5, "alignment": "center",
                    #             "loc": "upper center"},
                    #hooks=[partial(viz.despine_hook(), left=False)],
                    #fig_inches=config.figures.defaults.fig_inches,
                    aspect=1, fontscale=1.3),
    hv.opts.Scatter(backend="matplotlib", s=20),
    #hv.opts.Scatter(color=hv.Palette("copper", range=(0., 1), reverse=True)),
    hv.opts.Layout(sublabel_format=""),
    hv.opts.Layout(backend="matplotlib", hspace=0.1, vspace=0.05,
                   fig_inches=0.65*config.figures.defaults.fig_inches)
)
def format_calib_curves(panels, tasks):
    assert len(panels) == 6
    top_row = panels[:3]
    bot_row = panels[3:]
    
    # Column titles
    pairs = [(task.experiments.a, task.experiments.b) for task in tasks]
    assert pairs[:3] == pairs[3:]
    pairs = pairs[:3]
    col_titles = [rf"$\mathcal{{{{M}}}}_{a}$ vs $\mathcal{{{{M}}}}_{b}$" for a,b in pairs]
    panels[0].opts(title=col_titles[0])
    panels[1].opts(title=col_titles[1])
    panels[2].opts(title=col_titles[2])

    # Row titles
    assert all(task.experiments.σi_dist == "weak input" for task in tasks[:3])
    assert all(task.experiments.σi_dist == "strong input" for task in tasks[3:])
    panels[0].opts(ylabel=r"weak $I_{\mathrm{ext}}$")
    panels[3].opts(ylabel=r"strong $I_{\mathrm{ext}}$")

    # Legend placement
    panels[0].opts(hv.opts.Overlay(legend_cols=5, legend_position="top_left", legend_opts={"columnspacing": 2.}))
    for i in (1, 2, 3, 4, 5):
        panels[i].opts(hv.opts.Overlay(show_legend=False))

    # xlabel only on the centred bottom panel
    for i in (0, 1, 2, 3, 5):
        panels[i].opts(xlabel="")

    # Despine, set axis ticks, display labels only on outer panels
    hooks = {i: [viz.despine_hook, viz.set_xticks_hook([0, 0.5, 1]), viz.set_yticks_hook([0, 0.5, 1])] for i in range(6)}
    for i in (0, 3):
        hooks[i].extend([viz.set_yticklabels_hook(["$0$", "$0.5$", "$1$"])])
    for i in (3, 4, 5):
        hooks[i].extend([viz.set_xticklabels_hook(["$0$", "$0.5$", "$1$"])])
    for i in (0, 1, 2):
        hooks[i].append(viz.xaxis_off_hook)
    for i in (1, 2, 4, 5):
        hooks[i].append(viz.yaxis_off_hook)
    for i, hook_lst in hooks.items():
        panels[i].opts(hooks=hook_lst)

    
    return panels
Hide code cell source
def mkkey(taskkey): return taskkey[0], taskkey[4]
taskdims = ["models", "input"]
hm = hv.HoloMap({("EMD", "Bemd", float(scat.label[2:]), *mkkey(taskkey)): scat
                     for taskkey, calplot in calibs_normal.items()
                     for scat in calplot.overlayed_scatters.Scatter.values()}
                | {("Q", "Bemd", float(scat.label[2:]), *mkkey(taskkey)): scat
                     for taskkey, calplot in calibs_BQ.items()
                     for scat in calplot.overlayed_scatters.Scatter.values()},
                kdims=["uncertainty", "B", "c", *taskdims])
hm.select(uncertainty="EMD")
fig = hv.Layout(
    format_calib_curves([calplot.overlayed_scatters.redim(Bemd=hv.Dimension("Bemd", label=r"$B^{\mathrm{EMD}}$"))
                         for calplot in calibs_normal.values()],
                        list(tasks_normal.values()))
).cols(3).opts(*calibopts, hv.opts.Scatter(backend="matplotlib", s=12),
               #hv.opts.Overlay(legend_position="left", legend_cols=1)
               hv.opts.Overlay(show_legend=False),
               hv.opts.Layout(fig_inches=1.15)
              )
display(fig)
# The legend will use Curve data, which are all grey dotted lines.
# To get a legend with coloured dots, we do another figure with only the scatter plots
# We don’t want the figure (just the legend), so we make the figure tiny, remove its axes, and let the legend overflow
#hv.Overlay(list(fig.Overlay.II.Scatter.data.values())).opts(
#    fig_inches=0.01,
#    hooks=[viz.xaxis_off_hook, viz.yaxis_off_hook])
Hide code cell source
fig = hv.Layout(
    format_calib_curves([calplot.overlayed_scatters.redim(Bemd=hv.Dimension("BQ", label="$B^Q$"))
                         for calplot in calibs_BQ.values()],
                        list(tasks_BQ.values()))
).cols(3).opts(*calibopts, hv.opts.Scatter(backend="matplotlib", s=12),
               #hv.opts.Layout(backend="matplotlib", hspace=-0.2, vspace=.1),
               hv.opts.Overlay(show_legend=False),
               hv.opts.Layout(fig_inches=1.15)
              )
display(fig)
Hide code cell source
legend = hv.Overlay(
    [next(iter(plot.Scatter.values())).clone()
     for (c,), plot in next(iter(calibs_normal.values())).scatters.overlay("c", sort=False).data.items()])
c_values = hv.Table([(c, np.log2(c)) for c in c_list], kdims=["c"], vdims=["log2(c)"])
(legend + c_values).opts(
    hv.opts.Overlay(fontscale=2), # fig_inches=0.01
    hv.opts.Scatter(hooks=[viz.xaxis_off_hook, viz.yaxis_off_hook], s=40, xlim=(0,.01), ylim=(0,.01)),
    hv.opts.Layout(sublabel_format="")
)
Hide code cell source
# Print panel descriptions
from tabulate import tabulate
headers = ["models", "input corr", "input strength", "obs noise", "obs dist"]
data = [(f"Panel ({lbl})",
         f"{(Ω:=task.experiments).a} vs {Ω.b}", f"{Ω.τ_dist}", f"{Ω.σi_dist}", f"{Ω.σo_dist}", f"{Ω.ξ_name} noise")
        for lbl, task in zip("abcdef", tasks_BQ.values())]
print(tabulate(data, headers, tablefmt="simple_outline"))
┌───────────┬──────────┬──────────────────────────┬──────────────────┬─────────────┬────────────────┐
│           │ models   │ input corr               │ input strength   │ obs noise   │ obs dist       │
├───────────┼──────────┼──────────────────────────┼──────────────────┼─────────────┼────────────────┤
│ Panel (a) │ C vs D   │ short input correlations │ weak input       │ low noise   │ Gaussian noise │
│ Panel (b) │ A vs B   │ short input correlations │ weak input       │ low noise   │ Gaussian noise │
│ Panel (c) │ A vs D   │ short input correlations │ weak input       │ low noise   │ Gaussian noise │
│ Panel (d) │ C vs D   │ short input correlations │ strong input     │ low noise   │ Gaussian noise │
│ Panel (e) │ A vs B   │ short input correlations │ strong input     │ low noise   │ Gaussian noise │
│ Panel (f) │ A vs D   │ short input correlations │ strong input     │ low noise   │ Gaussian noise │
└───────────┴──────────┴──────────────────────────┴──────────────────┴─────────────┴────────────────┘

Redraw the calibration curves in the main text#

Since in the course of this experiment, we redid the original six panel calibration with more experiments, we might as well update the figure in the main text with a higher-resolution one. The original code is found here.

fig_calib_highres = hv.Layout(
    format_calib_curves([calplot.overlayed_lines.redim(Bemd=hv.Dimension("Bemd", label=r"$B^{\mathrm{EMD}}$"))
                         for calplot in calibs_normal.values()],
                        list(tasks_normal.values()))
).cols(3).opts(*calibopts,
               #hv.opts.Overlay(legend_position="left", legend_cols=1),
               hv.opts.Overlay(show_legend=False),
               hv.opts.Layout(fig_inches=1.15),
               hv.opts.Curve(color=hv.Palette("copper", range=(0., 1), reverse=True))
              )
display(fig_calib_highres)

The legend is created separately and assembled with Inkscape.

import math
legend_calib_highres = hv.Overlay([curve.clone().redim.range(Bemd=(0,0.1), Bepis=(0.5,0.55)).relabel(label=f"$c=2^{{{int(round(math.log2(c)))}}}$")
            for c, curve in zip(c_list, fig_calib_highres.Overlay.I.Curve)]).opts(
    hv.opts.Overlay(show_legend=True, legend_cols=6,
                    hooks=[viz.xaxis_off_hook, viz.yaxis_off_hook],
                    aspect=6)
)
legend_calib_highres
hv.save(fig_calib_highres, config.paths.figures/"prinz_calibrations_high-res_raw.svg")
hv.save(legend_calib_highres, config.paths.figures/"prinz_calibrations_high-res_legend_raw.svg")

Compute comparison tables for each \(c\) value#

from addict import Dict
from functools import partial
import math
import emdcmp

import utils
from Ex_Prinz2004 import LP_data, phys_models, AdditiveNoise, fit_gaussian_σ, generate_synth_samples, Q
import holoviews as hv
hv.extension("matplotlib")

Recreate the synthetic and mixed PPFs as we do in the base notebook.

candidate_models = Dict()
Qrisk = Dict()
with mp.Pool(4) as pool:
    fitted_σs = pool.starmap(fit_gaussian_σ, [(LP_data, phys_models[a], "Gaussian") for a in "ABCD"])
for a, σ in zip("ABCD", fitted_σs):
    candidate_models[a] = utils.compose(AdditiveNoise("Gaussian", σ),
                                        phys_models[a])
    Qrisk[a] = Q(phys_model=phys_models[a], obs_model="Gaussian", σ=σ)
/usr/lib64/python3.11/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
  self.pid = os.fork()
def get_synth_ppf(a):
    return emdcmp.make_empirical_risk_ppf(Qrisk[a](generate_synth_samples(candidate_models[a])))
def get_mixed_ppf(a):
    return emdcmp.make_empirical_risk_ppf(Qrisk[a](LP_data.get_data()))

with mp.Pool(4) as pool:
    synth_ppf = Dict(zip("ABCD", pool.map(get_synth_ppf, "ABCD")))
    mixed_ppf = Dict(zip("ABCD", pool.map(get_mixed_ppf, "ABCD")))

Draw sets of R samples for each value of \(c\).

c_list = [2**-6, 2**-4, 2**-2, 2**0, 2**2, 2**4]
def draw_R_samples(a, c):
    return emdcmp.draw_R_samples(mixed_ppf[a], synth_ppf[a], c=c)
with mp.Pool(4) as pool:
    R_samples = {}
    for c in c_list:
        R_samples[c] = Dict(zip("ABCD", pool.map(partial(draw_R_samples, c=c), "ABCD")))
hm = hv.HoloMap({int(round(math.log2(c))): hv.Table(emdcmp.utils.compare_matrix(R_samples[c]).reset_index().rename(columns={"index": "models"}))
                 for c in c_list},
                kdims=["log2(c)"])
hm
emdcmp.utils.GitSHA(packages=["emdcmp", "pyloric-network-simulator"])

2025-07-18   git: emd-paper main #05b68aa4


emdcmp : 1.1.0
pyloric-network-simulator : 0.1.4.dev0+g2093368.d20250708