Comparison with other criteria – large sample asymptotics#

\(\require{mathtools} \newcommand{\notag}{} \newcommand{\tag}{} \newcommand{\label}[1]{} \newcommand{\sfrac}[2]{#1/#2} \newcommand{\bm}[1]{\boldsymbol{#1}} \newcommand{\num}[1]{#1} \newcommand{\qty}[2]{#1\,#2} \renewenvironment{align} {\begin{aligned}} {\end{aligned}} \renewenvironment{alignat} {\begin{alignedat}} {\end{alignedat}} \newcommand{\pdfmspace}[1]{} % Ignore PDF-only spacing commands \newcommand{\htmlmspace}[1]{\mspace{#1}} % Ignore PDF-only spacing commands \newcommand{\scaleto}[2]{#1} % Allow to use scaleto from scalerel package \newcommand{\RR}{\mathbb R} \newcommand{\NN}{\mathbb N} \newcommand{\PP}{\mathbb P} \newcommand{\EE}{\mathbb E} \newcommand{\XX}{\mathbb X} \newcommand{\ZZ}{\mathbb Z} \newcommand{\QQ}{\mathbb Q} \newcommand{\fF}{\mathcal F} \newcommand{\dD}{\mathcal D} \newcommand{\lL}{\mathcal L} \newcommand{\gG}{\mathcal G} \newcommand{\hH}{\mathcal H} \newcommand{\nN}{\mathcal N} \newcommand{\pP}{\mathcal P} \newcommand{\BB}{\mathbb B} \newcommand{\Exp}{\operatorname{Exp}} \newcommand{\Binomial}{\operatorname{Binomial}} \newcommand{\Poisson}{\operatorname{Poisson}} \newcommand{\linop}{\mathcal{L}(\mathbb{B})} \newcommand{\linopell}{\mathcal{L}(\ell_1)} \DeclareMathOperator{\trace}{trace} \DeclareMathOperator{\Var}{Var} \DeclareMathOperator{\Span}{span} \DeclareMathOperator{\proj}{proj} \DeclareMathOperator{\col}{col} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\gt}{>} \definecolor{highlight-blue}{RGB}{0,123,255} % definition, theorem, proposition \definecolor{highlight-yellow}{RGB}{255,193,7} % lemma, conjecture, example \definecolor{highlight-orange}{RGB}{253,126,20} % criterion, corollary, property \definecolor{highlight-red}{RGB}{220,53,69} % criterion \newcommand{\logL}{\ell} \newcommand{\eE}{\mathcal{E}} \newcommand{\oO}{\mathcal{O}} \newcommand{\defeq}{\stackrel{\mathrm{def}}{=}} \newcommand{\Bspec}{\mathcal{B}} % Spectral radiance \newcommand{\X}{\mathcal{X}} % X space \newcommand{\Y}{\mathcal{Y}} % Y space \newcommand{\M}{\mathcal{M}} % Model \newcommand{\Tspace}{\mathcal{T}} \newcommand{\Vspace}{\mathcal{V}} \newcommand{\Mtrue}{\mathcal{M}_{\mathrm{true}}} \newcommand{\MP}{\M_{\mathrm{P}}} \newcommand{\MRJ}{\M_{\mathrm{RJ}}} \newcommand{\qproc}{\mathfrak{Q}} \newcommand{\D}{\mathcal{D}} % Data (true or generic) \newcommand{\Dt}{\tilde{\mathcal{D}}} \newcommand{\Phit}{\widetilde{\Phi}} \newcommand{\Phis}{\Phi^*} \newcommand{\qt}{\tilde{q}} \newcommand{\qs}{q^*} \newcommand{\qh}{\hat{q}} \newcommand{\AB}[1]{\mathtt{AB}~\mathtt{#1}} \newcommand{\LP}[1]{\mathtt{LP}~\mathtt{#1}} \newcommand{\NML}{\mathrm{NML}} \newcommand{\iI}{\mathcal{I}} \newcommand{\true}{\mathrm{true}} \newcommand{\dist}{D} \newcommand{\Mtheo}[1]{\mathcal{M}_{#1}} % Model (theoretical model); index: param set \newcommand{\DL}[1][L]{\mathcal{D}^{(#1)}} % Data (RV or generic) \newcommand{\DLp}[1][L]{\mathcal{D}^{(#1')}} % Data (RV or generic) \newcommand{\DtL}[1][L]{\tilde{\mathcal{D}}^{(#1)}} % Data (RV or generic) \newcommand{\DpL}[1][L]{{\mathcal{D}'}^{(#1)}} % Data (RV or generic) \newcommand{\Dobs}[1][]{\mathcal{D}_{\mathrm{obs}}^{#1}} % Data (observed) \newcommand{\calibset}{\mathcal{C}} \newcommand{\N}{\mathcal{N}} % Normal distribution \newcommand{\Z}{\mathcal{Z}} % Partition function \newcommand{\VV}{\mathbb{V}} % Variance \newcommand{\T}{\mathsf{T}} % Transpose \newcommand{\EMD}{\mathrm{EMD}} \newcommand{\dEMD}{d_{\mathrm{EMD}}} \newcommand{\dEMDtilde}{\tilde{d}_{\mathrm{EMD}}} \newcommand{\dEMDsafe}{d_{\mathrm{EMD}}^{\text{(safe)}}} \newcommand{\e}{ε} % Model confusion threshold \newcommand{\falsifythreshold}{ε} \newcommand{\bayes}[1][]{B_{#1}} \newcommand{\bayesthresh}[1][]{B_{0}} \newcommand{\bayesm}[1][]{B^{\mathcal{M}}_{#1}} \newcommand{\bayesl}[1][]{B^l_{#1}} \newcommand{\bayesphys}[1][]{B^{{p}}_{#1}} \newcommand{\Bconf}[1]{B^{\mathrm{epis}}_{#1}} \newcommand{\Bemd}[1]{B^{\mathrm{EMD}}_{#1}} \newcommand{\BQ}[1]{B^{Q}_{#1}} \newcommand{\Bconfbin}[1][]{\bar{B}^{\mathrm{conf}}_{#1}} \newcommand{\Bemdbin}[1][]{\bar{B}_{#1}^{\mathrm{EMD}}} \newcommand{\bin}{\mathcal{B}} \newcommand{\Bconft}[1][]{\tilde{B}^{\mathrm{conf}}_{#1}} \newcommand{\fc}{f_c} \newcommand{\fcbin}{\bar{f}_c} \newcommand{\paramphys}[1][]{Θ^{{p}}_{#1}} \newcommand{\paramobs}[1][]{Θ^{ε}_{#1}} \newcommand{\test}{\mathrm{test}} \newcommand{\train}{\mathrm{train}} \newcommand{\synth}{\mathrm{synth}} \newcommand{\rep}{\mathrm{rep}} \newcommand{\MNtrue}{\mathcal{M}^{{p}}_{\text{true}}} \newcommand{\MN}[1][]{\mathcal{M}^{{p}}_{#1}} \newcommand{\MNA}{\mathcal{M}^{{p}}_{Θ_A}} \newcommand{\MNB}{\mathcal{M}^{{p}}_{Θ_B}} \newcommand{\Me}[1][]{\mathcal{M}^ε_{#1}} \newcommand{\Metrue}{\mathcal{M}^ε_{\text{true}}} \newcommand{\Meobs}{\mathcal{M}^ε_{\text{obs}}} \newcommand{\Meh}[1][]{\hat{\mathcal{M}}^ε_{#1}} \newcommand{\MNa}{\mathcal{M}^{\mathcal{N}}_a} \newcommand{\MeA}{\mathcal{M}^ε_A} \newcommand{\MeB}{\mathcal{M}^ε_B} \newcommand{\Ms}{\mathcal{M}^*} \newcommand{\MsA}{\mathcal{M}^*_A} \newcommand{\MsB}{\mathcal{M}^*_B} \newcommand{\Msa}{\mathcal{M}^*_a} \newcommand{\MsAz}{\mathcal{M}^*_{A,z}} \newcommand{\MsBz}{\mathcal{M}^*_{B,z}} \newcommand{\Msaz}{\mathcal{M}^*_{a,z}} \newcommand{\MeAz}{\mathcal{M}^ε_{A,z}} \newcommand{\MeBz}{\mathcal{M}^ε_{B,z}} \newcommand{\Meaz}{\mathcal{M}^ε_{a,z}} \newcommand{\zo}{z^{0}} \renewcommand{\lL}[2][]{\mathcal{L}_{#1|{#2}}} % likelihood \newcommand{\Lavg}[2][]{\mathcal{L}^{/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLphys}[2][]{\mathcal{L}^{{p}}_{#1|#2}} \newcommand{\Lavgphys}[2][]{\mathcal{L}^{{p}/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLL}[3][]{\mathcal{L}^{(#3)}_{#1|#2}} \newcommand{\lLphysL}[3][]{\mathcal{L}^{{p},(#3)}_{#1|#2}} \newcommand{\lnL}[2][]{l_{#1|#2}} % Per-sample log likelihood \newcommand{\lnLt}[2][]{\widetilde{l}_{#1|#2}} \newcommand{\lnLtt}{\widetilde{l}} % Used only in path_sampling \newcommand{\lnLh}[1][]{\hat{l}_{#1}} \newcommand{\lnLphys}[2][]{l^{{p}}_{#1|#2}} \newcommand{\lnLphysL}[3][]{l^{{p},(#3)}_{#1|#2}} \newcommand{\Elmu}[2][1]{μ_{{#2}}^{(#1)}} \newcommand{\Elmuh}[2][1]{\hat{μ}_{{#2}}^{(#1)}} \newcommand{\Elsig}[2][1]{Σ_{{#2}}^{(#1)}} \newcommand{\Elsigh}[2][1]{\hat{Σ}_{{#2}}^{(#1)}} \newcommand{\pathP}{\mathop{{p}}} % Path-sampling process (generic) \newcommand{\pathPhb}{\mathop{{p}}_{\mathrm{Beta}}} % Path-sampling process (hierarchical beta) \newcommand{\interval}{\mathcal{I}} \newcommand{\Phiset}[1]{\{\Phi\}^{\small (#1)}} \newcommand{\Phipart}[1]{\{\mathcal{I}_Φ\}^{\small (#1)}} \newcommand{\qhset}[1]{\{\qh\}^{\small (#1)}} \newcommand{\Dqpart}[1]{\{Δ\qh_{2^{#1}}\}} \newcommand{\LsAzl}{\mathcal{L}_{\smash{{}^{\,*}_A},z,L}} \newcommand{\LsBzl}{\mathcal{L}_{\smash{{}^{\,*}_B},z,L}} \newcommand{\lsA}{l_{\smash{{}^{\,*}_A}}} \newcommand{\lsB}{l_{\smash{{}^{\,*}_B}}} \newcommand{\lsAz}{l_{\smash{{}^{\,*}_A},z}} \newcommand{\lsAzj}{l_{\smash{{}^{\,*}_A},z_j}} \newcommand{\lsAzo}{l_{\smash{{}^{\,*}_A},z^0}} \newcommand{\leAz}{l_{\smash{{}^{\,ε}_A},z}} \newcommand{\lsAez}{l_{\smash{{}^{*ε}_A},z}} \newcommand{\lsBz}{l_{\smash{{}^{\,*}_B},z}} \newcommand{\lsBzj}{l_{\smash{{}^{\,*}_B},z_j}} \newcommand{\lsBzo}{l_{\smash{{}^{\,*}_B},z^0}} \newcommand{\leBz}{l_{\smash{{}^{\,ε}_B},z}} \newcommand{\lsBez}{l_{\smash{{}^{*ε}_B},z}} \newcommand{\LaszL}{\mathcal{L}_{\smash{{}^{*}_a},z,L}} \newcommand{\lasz}{l_{\smash{{}^{*}_a},z}} \newcommand{\laszj}{l_{\smash{{}^{*}_a},z_j}} \newcommand{\laszo}{l_{\smash{{}^{*}_a},z^0}} \newcommand{\laez}{l_{\smash{{}^{ε}_a},z}} \newcommand{\lasez}{l_{\smash{{}^{*ε}_a},z}} \newcommand{\lhatasz}{\hat{l}_{\smash{{}^{*}_a},z}} \newcommand{\pasz}{p_{\smash{{}^{*}_a},z}} \newcommand{\paez}{p_{\smash{{}^{ε}_a},z}} \newcommand{\pasez}{p_{\smash{{}^{*ε}_a},z}} \newcommand{\phatsaz}{\hat{p}_{\smash{{}^{*}_a},z}} \newcommand{\phateaz}{\hat{p}_{\smash{{}^{ε}_a},z}} \newcommand{\phatseaz}{\hat{p}_{\smash{{}^{*ε}_a},z}} \newcommand{\Phil}[2][]{Φ_{#1|#2}} % Φ_{\la} \newcommand{\Philt}[2][]{\widetilde{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philhat}[2][]{\hat{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philsaz}{Φ_{\smash{{}^{*}_a},z}} % Φ_{\lasz} \newcommand{\Phileaz}{Φ_{\smash{{}^{ε}_a},z}} % Φ_{\laez} \newcommand{\Philseaz}{Φ_{\smash{{}^{*ε}_a},z}} % Φ_{\lasez} \newcommand{\mus}[1][1]{μ^{(#1)}_*} \newcommand{\musA}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\SigsA}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\musB}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\SigsB}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\musa}[1][1]{μ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Sigsa}[1][1]{Σ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Msah}{{\color{highlight-red}\mathcal{M}^{*}_a}} \newcommand{\Msazh}{{\color{highlight-red}\mathcal{M}^{*}_{a,z}}} \newcommand{\Meah}{{\color{highlight-blue}\mathcal{M}^{ε}_a}} \newcommand{\Meazh}{{\color{highlight-blue}\mathcal{M}^{ε}_{a,z}}} \newcommand{\lsazh}{{\color{highlight-red}l_{\smash{{}^{*}_a},z}}} \newcommand{\leazh}{{\color{highlight-blue}l_{\smash{{}^{ε}_a},z}}} \newcommand{\lseazh}{{\color{highlight-orange}l_{\smash{{}^{*ε}_a},z}}} \newcommand{\Philsazh}{{\color{highlight-red}Φ_{\smash{{}^{*}_a},z}}} % Φ_{\lasz} \newcommand{\Phileazh}{{\color{highlight-blue}Φ_{\smash{{}^{ε}_a},z}}} % Φ_{\laez} \newcommand{\Philseazh}{{\color{highlight-orange}Φ_{\smash{{}^{*ε}_a},z}}} % Φ_{\lasez} \newcommand{\emdstd}{\tilde{σ}} \DeclareMathOperator{\Mvar}{Mvar} \DeclareMathOperator{\AIC}{AIC} \DeclareMathOperator{\epll}{epll} \DeclareMathOperator{\elpd}{elpd} \DeclareMathOperator{\MDL}{MDL} \DeclareMathOperator{\comp}{COMP} \DeclareMathOperator{\Lognorm}{Lognorm} \DeclareMathOperator{\erf}{erf} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator{\Image}{Image} \DeclareMathOperator{\sgn}{sgn} \DeclareMathOperator{\SE}{SE} % standard error \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\SkewNormal}{SkewNormal} \DeclareMathOperator{\TruncNormal}{TruncNormal} \DeclareMathOperator{\Exponential}{Exponential} \DeclareMathOperator{\exGaussian}{exGaussian} \DeclareMathOperator{\IG}{IG} \DeclareMathOperator{\NIG}{NIG} \DeclareMathOperator{\Gammadist}{Gamma} \DeclareMathOperator{\Lognormal}{Lognormal} \DeclareMathOperator{\Beta}{Beta} \newcommand{\sinf}{{s_{\infty}}}\)

from emdcmp import draw_R_samples
from other_criteria import FactorizedPrior, get_ppfs, R, AIC, BIC, DIC, logℰ, elpd, 𝓁
import mdl_uv
Hide code cell source
import viz  # Local module
from paul_tol_rainbow import discrete_rainbow_scheme  # Local module
import matplotlib as mpl
import matplotlib.pyplot as plt
import panel as pn
import holoviews as hv
from cycler import cycler

hv.extension("matplotlib")

Use sans-serif for LaTeX axis labels.

Hide code cell source
params = {'text.usetex': False,
          'font.family': 'STIXGeneral',
          'mathtext.fontset': 'stixsans'
          #'mathtext.fontset': 'custom',
          #'mathtext.rm'     : 'Bitstream Vera Sans',
          #'mathtext.it'     : 'Bitstream Vera Sans:italic',
          #'mathtext.bf'     : 'Bitstream Vera Sans:bold',
         }
mpl.rcParams.update(params)

We need to extend the color scheme from Ex_UV to support 4 candidates.

For this we use the discrete scheme proposed by Paul Tol [78, Fig. 22], which supports sequences between 1 and 23 steps. This is implemented by our custom function discrete_rainbow_scheme, which is essentially just a table lookup.

Some other decent options for candidates are listed below, from most to least discriminable. The greater variety of colours in the most discriminable palettes makes them a bit jarring.

hv.Palette("TolRainbow", range=(0.1, 1.), reverse=True)
hv.Palette("Sunset", range=(0., 1.), reverse=True)
hv.Palette("Magma", range=(0., .9), reverse=False)
hv.Palette("TolYlOrBr", range=(0.3, 1.), reverse=True)
@dataclass
class colors(colors):
    candidates : hv.Cycle = hv.Cycle(values = discrete_rainbow_scheme(4)[::-1])
colors
scaledataRJPlanckcalib_curvescandidates

Note

See Fig. 2.8 for a more complete version of this table

Criterion

Not restricted to nested models

No restriction on noise model

Not restricted to log likelihood risk

Not asymptotic

Allows singular models

Allows Bayesian models

Symmetric

Information-theoretic

Consistent

Robust wrt. misspecification

Source of variability

\(\Bemd{}\)

Imperfect replications

WAIC

✘⁽²⁾

Posterior

Bayes factor

✘⁽²⁾

Posterior

MDL

✔/✘⁽¹⁾

✔/✘⁽¹⁾

✘⁽³⁾

Uniform dist on event space

(BIC)

✔/✘

Posterior

DIC

Posterior

AIC

Perfect replications

⁽¹⁾ Some entries are marked ✔/✘ for MDL because although the general form may not have these restrictions, they are usually required to obtain tractable approximations.
⁽²⁾ Bayesian methods are sometimes described as accounting for epistemic errors, but only for errors which are within the hypothesis class. So specifically not misspecification.
⁽³⁾ There has been at least one attempt to make MDL robust in the case of misspecified models, but it is far from standardized. It is also unclear whether it would be broadly applicable, since it depends on an ad hoc scaling of the likelihood, which itself introduces an additional hyperparameter.

Data#

For this experiment we want one of the candidates to be the true model. Also, in order to compute NML complexity exactly, we want to be able to enumerate all possible datasets. This is possible thanks to how we set the problem:

  • The use of Poisson noise means that possible values are discretized. Technically there are infinitely many possible values, but their probability quickly vanishes, so we need to consider only a dozen or so.

  • Fixed, exact \(λ\) values: for a given dataset size \(L\), we always generate data with the abscissa \(λ_k = μλ_{\mathrm{min}} + k \frac{λ_{\mathrm{max}} - λ_{\mathrm{min}}}{L-1}\), where \(k=0, \dotsc, L-1\).

Hint

This is implemented as the EnumerableDataset class within the MDL utilities.

This means we can implement enumeration as follows:

  1. For each \(λ_k\), determine a set of possible values for \(\Bspec_k\). Below we use an interval containing 0.9998% of the probability mass. Each \(\Bspec_k\) is a sequential array with equal steps.

  2. A dataset is generated by picking a random value from each \(\Bspec_k\): \(\{(λ_k, b_k) : b_k \sim \Bspec_k\}\). The total number of datasets is therefore \(\prod_{k=0}^{L-1} \lvert \Bspec_k \rvert\).

  3. In practice there are still an unfeasibly large number of datasets. Therefore we generate them from most to least likely: the hope is that the integrand in an NML integral (which in this case is a series) should correlate with the true data likelihood, and that we can truncate the series once we have enough dominant terms.

𝒟 = mdl_uv.EnumerableDataset(
    "comparison with other criteria",
    L   =100,
    λmin=0.6*μm,
    λmax=2  *μm,
    s   =16*Bunits**-1,
    T   =data_T)
Hide code cell source
#data   = Dataset("", L=4000, λmin=0.1*μm, λmax=2*μm, s=(2e-3*Bunits)**-1, T=3000*K)
𝒟_mean = replace(𝒟, s=(4e-5*Bunits)**-1)
dims = dict(kdims=hv.Dimension("λ", label="λ — wavelength", unit="μm"),
            vdims=hv.Dimension("B", label="$\\tilde{B}$ – radiance", unit="kW/m² nm sr"))
fig = hv.Scatter(𝒟.get_data(strip_units=True), **dims) * hv.Curve(𝒟_mean.get_data(strip_units=True), **dims)
fig.opts(
    hv.opts.Curve(color="#AA0022"),
    hv.opts.Scatter(size=1.5, backend="bokeh"),
    hv.opts.Scatter(s=12, backend="matplotlib"),
    hv.opts.Overlay(fig_inches=tuple(2*((40, 34)*ureg.mm).to("in").m),
                    fontscale=2, backend="matplotlib"),
    hv.opts.Overlay(width=500, backend="bokeh")
)
hv.save(fig, config.paths.figures/"compare-other-methods_sample-data.svg")

Note that in contrast to the main setup, we consider a different regime, with wavelength window closer to the distribution’s peak and lower relative noise. This makes the likelihood essentially Gaussian, up to a normalization factor.

Candidate models#

Most established model comparison methods are meant to compare nested models, where preferably one of the alternatives is the true model. For criteria typically interpreted in terms of information theory, like AIC and MDL, that interpretation usually only holds when the true model is also the simplest. Therefore to give existing criteria the fairest comparison, we setup a model comparison problem where the model alternatives are variants of the Planck radiance model augmented with a polynomial:

\[\tilde{\Bspec} \mid λ \sim \nN\biggl(\MP(λ\,;\, T) + \sum_{j=0}^{m-1} b_j λ^j \;,\;\; \frac{σ^2}{\MP(λ\,;\, T)}\biggr)\]

Note that in contrast to the main setup, the difference between Poisson and Gaussian noise is not too important, for two reasons:

  • We increased the signal to noise ratio, partly by choosing a regime (i.e. a \(λ\) window) which contains the largest radiance values.

  • We scale the variance in accordance with the Poisson distribution of the underlying data.

This ensures that whether we fit the model using Poisson or Gaussian likelihood, the true model is recovered when \(m=0\).

It is worth emphasizing that in order to accommodate some criteria, we are posing a different problem than the one we actually want to solve. First, instead of comparing the Planck (\(\MP\)) and Rayleigh-Jeans (\(\MRJ\)) models, we compare some artificially augmented version of \(\MP\). Second, we need to be in the regime where the Poisson is effectively a discretized Gaussian. This allows us to substitute the Poisson’s discrete PMF with the Gaussian’s continuous PDF for evaluating the loss. (If we tried to fit a discrete model, its PMF would vanish as soon as one data point is not in the support.) This also ensures that the asymptotic criteria, which assume that we have reached the regime where the likelihood is “Gaussian-like”, are valid. Finally, the discreteness of the data distribution nevertheless allows us to enumerate all possible datasets, a feature which is convenient for computing the NML distribution.

Making these choices of regime and data-generating model ensures no method is disadvantaged, but of course in practice a practitioner does not get to choose their data, and should choose their model(s) based on what makes sense for those data, not what simplifies the statistical analysis.

In practice of course we would like to be able to compare models without restriction on their structure or the type of noise, as we did in Fig. 2.7 comparing the Planck and Rayleigh-Jeans models with \(\Bemd{}\). Bayesian methods are also agnostic to the choice of model. Bayesian methods however consider a different type of epistemic error, and in particular ignore errors due to misspecification. We illustrate this with a comparison table in Table 6.1. For completeness, this table also includes values obtained with the other criteria – since in practice they are often used even when they are invalid.

@dataclass(frozen=True)
class CandidateModel:
    physmodel: Literal["Plank","Rayleigh-Jeans"]
    σ : float|PintQuantity
    T : PintQuantity  # units: K
    coeffs: list[float]

    @property
    def m(self): 
        """Returns the order of the model’s spurious polynomial."""
        return len(self.coeffs)

    def __post_init__(self):
        # If parameters were given as plain floats, convert to default units
        if not isinstance(self.σ, pint.Quantity):
            object.__setattr__(self, "σ", self.σ * Bunits)   # IMO bypassing frozen dataclass in __post_init__ is acceptable
        if not isinstance(self.T, pint.Quantity):
            object.__setattr__(self, "T", self.T * ureg.kelvin)

    def Q(self, λ_B):
        """
        The loss is computed from differences between the observed data `λ_B`
        and predictions of the _physical_ model. The logpdf of the _observation_
        model is used to convert those differences into a loss.
        """
        λ, Bdata = λ_B
        _, Btheory = self.apply_physmodel(λ)
        #return -stats.poisson.logpdf((Bdata - Btheory).m , 
        Bphys = phys_models[self.physmodel](λ, self.T).to(Bunits)  # HACK: Copied from apply_physmodel, but this isn’t DRY
        return -stats.norm.logpdf((Bdata - Btheory).m, loc=0, scale=self.σ.m*np.sqrt(Bphys.m))  # IMPORTANT: Must match def in obsmodel

    def apply_obsmodel(self, λ_B, rng=None):
        λ, Bdata = λ_B
        Bphys = phys_models[self.physmodel](λ, self.T).to(Bunits)  # HACK: Copied from apply_physmodel, but this isn’t DRY
        return λ, Bdata + stats.norm.rvs(size=len(Bdata), loc=0, scale=self.σ.m*np.sqrt(Bphys.m), random_state=rng)*Bunits
    
    def apply_physmodel(self, λarr: ArrayLike, rng=None) -> tuple[ArrayLike, ArrayLike]:
        if isinstance(λarr, tuple):  # Allow passing output of data directly
            assert len(λarr) == 2, "CandidateModel expects either a single array `λarr`, or a tuple `(λarr, Barr)`."
            λarr = λarr[0]
        Barr = phys_models[self.physmodel](λarr, self.T).to(Bunits)
        if self.coeffs:
            Barr += Polynomial(self.coeffs)(λarr.m) * Bunits
        return λarr, Barr

    def gen_data(self, L, λmin=data_λ_min, λmax=data_λ_max, rng=None):
        λarr = np.linspace(λmin, λmax, L)
        return self.apply_obsmodel(self.apply_physmodel(λarr), rng=rng)
    
    def __call__(self, λarr, rng=None):
        return self.apply_obsmodel(self.apply_physmodel(λarr), rng=rng)

Criteria & loss definitions#

Risk functional#

We use the negative log likelihood as our risk functional, with a Gaussian observation model.

(For technical reasons the risk functional is defined above as a method of the candidate model. The comparison functions expect a separate functional, so Q[phys,σ,T,coeffs] is a wrapper which calls Q from a properly parameterized model.)

Hide code cell source
class QMeta(type):
    def __getitem__(cls, Θ):
        σ, T, *coeffs = Θ
        return cls.__call__(σ, T, coeffs)
    def __call__(cls, σ, T, coeffs=()):
        if len(coeffs) == 1 and isinstance(coeffs[0], (list, tuple)):
            coeffs = coeffs[0]
        #return Q(σ, T, coeffs)
        model = CandidateModel(cls.physmodel, σ, T, coeffs)
        return model.Q
#@dataclass
class QPlanck(metaclass=QMeta):
    physmodel="Planck"
class QRJ(metaclass=QMeta):
    physmodel="Rayleigh-Jeans"
Q = {"Planck": QPlanck, "Rayleigh-Jeans": QRJ}

Fitting function#

Given some observed data and the order of the extra polynomial, returns a tuple (σ, T, coeffs) of fitted parameters.

Parameters are fitted by minimizing \(Q\), subject to some mild regularization on \(σ\) and \(T\).

Hide code cell source
def _fitΘ(λℬ, physmodel, m):
    log2_T0    = np.log2(data_T.m)
    priorT_std = 12  # |log₂ 4000 - log₂ 5000| ≈ 12
    def f(Θtilde, physmodel=physmodel, λℬ=λℬ):
        log2σ, log2T, *coeffs = Θtilde
        σ = 2**log2σ; T = 2**log2T
        risk = Q[physmodel](σ, T, coeffs)(λℬ).mean()
        priorσ = 2**(-log2σ/128)  # Soft floor on σ so it cannot go too low
        priorT = (log2T - log2_T0)**2 / (2*priorT_std**2)  
        #priorσ = 0
        #priorT = 0
        return risk + priorσ + priorT

    res = optimize.minimize(f, np.r_[np.log2([.1, data_T.m]), [0]*m], tol=1e-5)
    log2σ, log2T, *coeffs = res.x
    σ = 2**log2σ; T = 2**log2T

    return σ, T, tuple(coeffs)

_fitΘ_cache = {}
def fitΘ(λℬ, physmodel, m):
    key = (tuple(λℬ[0].m), tuple(λℬ[1].m), physmodel, m)
    res = _fitΘ_cache.get(key)
    if res is None:
        res = _fitΘ(λℬ, physmodel, m)
    return res

Bayesian prior#

π_phys_loose = FactorizedPrior(["loguniform", "loguniform"],
                               [(2**-10, 2**-1), (1000, 10000)],
                               rng="prior - compare - models")
π_phys_tight = FactorizedPrior(["loguniform", "loguniform"],
                               [(2**-5, 2**-4), (2000, 5000)],
                               rng="prior - compare - models")
def π_coeffs_loose(m):
    return FactorizedPrior(["norm"]*m, [(0, 5)]*m,
                           rng="prior - compare - models - coeffs")
def π_coeffs_tight(m):
    return FactorizedPrior(["norm"]*m, [(0, 2)]*m,
                           rng="prior - compare - models - coeffs")

NML distribution#

def logL_mdl(*Θ, Q): return lambda λB: -Q(*Θ)(λB)

def MDL_criterion(Q, θˆ, 𝒟, physmodel, m):
    #return Q[θˆ](𝒟.get_data()) \
    comp = mdl_uv.comp(𝒟, r=30, m=m, logL=partial(logL_mdl, Q=Q),
                       fitΘ=lambda λℬ, m: fitΘ(λℬ, physmodel, m), 
                       cache_key="UV - Plank + poly", rng=3014,
                       no_compute=refresh_shelves)
    return None if comp is None else -𝓁(θˆ, Q, 𝒟) + comp

Compute criteria#

Note: We don’t change the dataset’s seed, so there is correlation in the data, but not exactly the same \((x_i, y_i)\) pairs are drawn. (Because the \(x_i\) are distributed evenly across \([λ_{min}, λ_{max}]\).)

mlist = [0,2,4,6]
Llist       = [2**6, 2**7, 2**8, 2**9, 2**10, 2**11, 2**12]   # np.logspace(6, 12, 7, base=2)
Llist_short = [2**6,       2**8,       2**10,        2**12] 
Llist       = np.logspace(6, 12, 25, base=2).astype(int)
assert len(mlist) <= len(colors.candidates)

The PPF curves are useful diagnostics in and of themselves, to check that the fitted model behaves as expected.

Hide code cell source
Φarr = np.linspace(0, 1)
ppf_panels = {}
scatter_panels = {}
def no_units(args): return tuple(a.m for a in args)
for physmodel in ["Planck", "Rayleigh-Jeans"]:
    for L in Llist:
        for m in mlist:
            𝒟 = replace(𝒟, L=L, purpose=𝒟.purpose + f" - {L=}")
            θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
            𝓜 = CandidateModel(physmodel, *θˆ)
            mixed_ppf, synth_ppf = get_ppfs(𝓜, 𝒟, rng=utils.get_rng("synth ppf - compare - models"))
            ppf_panels[physmodel, L, m] = \
                hv.Curve((Φarr, mixed_ppf(Φarr)), kdims="Φ", vdims="PPF", label="mixed") \
                * hv.Curve((Φarr, synth_ppf(Φarr)), kdims="Φ", vdims="PPF", label="synth")
            scatter_panels[physmodel, L, m] = \
                hv.Scatter(no_units(𝓜.gen_data(D.L, λmin=D.λmin, λmax=D.λmax)), kdims="λ", vdims="B", label="model") \
                * hv.Scatter(no_units(𝒟.get_data()), kdims="λ", vdims="B", label="true")

Note

Unbiased estimators should “flip-flop” between equivalent models. To show this, we change the seed for each dataset, i.e. for each different \(L\). (We don’t change for different \(m\), because for a given dataset size, all models must be fitted to the same data.)

Note

elpd is reported with “deviance” scaling, so that it is comparable with other information criteria.

Compute MDL criteria#

Note

Because the MDL calculations are so onerous, it is worth manually managing our own cache under data/*-model-compare to ensure we never trigger a run accidentally, or delete an old run accidentally.

import multiprocessing as mp
import psutil

def compute_MDL(phys_L_m, 𝒟=𝒟):
    physmodel, L, m = phys_L_m
    𝒟 = replace(𝒟, L=L, purpose=f"compare models - nested -- data -- {L}")
    θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
    _Q = Q[physmodel]

    try:
        mdl_score = MDL_criterion(_Q, θˆ, 𝒟, physmodel, m)
    except ValueError:
        # If computations for a particular set of args fails, don’t let the exception terminate other computations
        # Instead store a flag value, so we can diagnose this later
        mdl_score = None
    return physmodel, L, m, mdl_score

MDL_scores = {}
arglist = [(phys,L,m) for phys in ["Planck", "Rayleigh-Jeans"] for L in Llist for m in mlist
           if L <= 2**10   # Computations too slow otherwise
          ]
dellist = []  # indices of args which have already been computed, and can thus be removed from arglist
with shelve.open(str(config.paths.data/"MDL-model-compare")) as shelf:
    if refresh_shelves:
        shelf.clear()
    else:
        for i, (phys,L,m) in enumerate(arglist):
            shelfkey = f"{phys} - {L} - {m}"
            if (mdl_score := shelf.get(shelfkey)) is not None:
                MDL_scores["MDL", phys,m,L] = mdl_score
                dellist.append(i)
    
        for i in sorted(dellist, reverse=True):
            del arglist[i]

if arglist and do_long_computations:
    cores = min(psutil.cpu_count(logical=False), len(arglist))
    with mp.Pool(cores) as pool:
        for phys, L, m, mdl_score in tqdm(pool.imap_unordered(compute_MDL, arglist),
                                           desc="MDL scores", total=len(arglist), miniters=1):
            # By opening & closing the shelf every time we ensure a) that results are written immediately and b) that we don’t lock the shelf
            # TODO: Implement timeout & retry in case the shelf is locked by another process
            with shelve.open(str(config.paths.data/"MDL-model-compare")) as shelf:
                shelfkey = f"{phys} - {L} - {m}"
                shelf[shelfkey] = MDL_scores[phys, m, L] = mdl_score

Compute other criteria#

# scores = {crit: {m: [] for m in range(4)}
#           for crit in ["AIC", "BIC", "EMD"]}
scores = {}
#scores = []
#_tqdm = lambda x, *a, **kw: x
crits_long = ["R", "AIC", "DIC", "BIC", "logE", "elpd"]
crits_short = []#["elpd"]
with shelve.open(str(config.paths.data/"criteria-model-compare")) as shelf_scores:
    for physmodel in ["Planck", "Rayleigh-Jeans"]:
        print(physmodel)
        progbar_L = tqdm(desc="L", total=len(Llist), miniters=1)
        progbar_m = tqdm(desc="m", total=len(mlist), miniters=1)
        for L in Llist:
            progbar_m.reset()
            for m in mlist:
                #tqdm.write(f"{L=}, {m=}")
                crits = (crits_long + crits_short) if L in Llist_short else crits_long
                keys = [(C, physmodel, m, L) for C in crits]
                strkeys = {key: ",".join(str(k) for k in key) for key in keys}
                _scores = {key: shelf_scores.get(strkeys[key]) for key in keys}
                missing = {C for (C, *_), s in _scores.items() if s is None}

                subkey = (physmodel, m, L)
                
                if missing:
                    π = π_phys_loose & π_coeffs_loose(m)
                    _𝒟 = replace(𝒟, L=L, purpose=f"compare models - nested -- data -- {L}")
                    _Q = Q[physmodel]
                    if missing & {"R", "AIC", "DIC", "BIC"}:
                        θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
                    else:
                        θˆ = None

                    if "R"    in missing: _scores[(key:=("R"   , *subkey))] = shelf_scores[strkeys[key]] = R(_Q, θˆ, _𝒟)
                    if "AIC"  in missing: _scores[(key:=("AIC" , *subkey))] = shelf_scores[strkeys[key]] = AIC(_Q, θˆ, _𝒟)
                    if "DIC"  in missing: _scores[(key:=("DIC" , *subkey))] = shelf_scores[strkeys[key]] = DIC(_Q, θˆ, π, _𝒟)
                    if "BIC"  in missing: _scores[(key:=("BIC" , *subkey))] = shelf_scores[strkeys[key]] = BIC(_Q, θˆ, _𝒟)
                    if "logE" in missing: _scores[(key:=("logE", *subkey))] = shelf_scores[strkeys[key]] = logℰ(_Q, π, _𝒟)
                    if "elpd" in missing: _scores[(key:=("elpd", *subkey))] = shelf_scores[strkeys[key]] = elpd(_Q, π, _𝒟, method="waic", scale="log")
                    #if L in Llist_short:
                    #    #scores[("MDL", physmodel, m, L)] = MDL_criterion(_Q, θˆ, _𝒟, m)
                    #    scores[("elpd", physmodel, m, L)], scores[("elpd_se", physmodel, m, L)] = elpd(_Q, π, _𝒟, method="waic", scale="log")
                    #    #𝓜 = CandidateModel(physmodel, *θˆ)
                    #    #mixed_ppf, synth_ppf = get_ppfs(𝓜, _𝒟, rng=utils.get_rng("synth ppf - compare - models"))

                    del π, _𝒟, _Q, θˆ
                    
                scores[("R", *subkey)]   = _scores[("R", *subkey)]
                scores[("AIC", *subkey)] = _scores[("AIC", *subkey)]
                scores[("DIC", *subkey)] = _scores[("DIC", *subkey)]
                scores[("BIC", *subkey)] = _scores[("BIC", *subkey)]
                scores[("logE", *subkey)], scores[("logE_se", *subkey)] = _scores[("logE", *subkey)]
                if L in ("elpd", *subkey) in _scores:
                    scores[("elpd", *subkey)], scores[("elpd_se", *subkey)] = _scores[("elpd", *subkey)]

                progbar_m.update()
            progbar_L.update()

Compute \(R\)-distributions#

def compute_Rdist(phys_L_m, 𝒟=𝒟):
    physmodel, L, m = phys_L_m
    𝒟 = replace(𝒟, L=L, purpose=f"compare models - nested -- data -- {L}")
    θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
    𝓜 = CandidateModel(physmodel, *θˆ)

    #scores[("MDL", m, L)] = MDL_criterion(Q, θ^, 𝒟, m)
    mixed_ppf, synth_ppf = get_ppfs(𝓜, 𝒟, rng=utils.get_rng("synth ppf - compare - models", "nested", "EMD", L, m))
    return physmodel, L, m, draw_R_samples(mixed_ppf, synth_ppf, c=0.5, max_M=2**14)

Rdists = {}
arglist = [(phys,L,m) for phys in ["Planck", "Rayleigh-Jeans"] for L in Llist_short for m in mlist]
dellist = []  # indices of args which have already been computed, and can thus be removed from arglist
with shelve.open(str(config.paths.data/"Rdists-model-compare")) as shelf:
    for i, (phys,L,m) in enumerate(arglist):
        shelfkey = f"{phys} - {L} - {m}"
        Rsamples = shelf.get(shelfkey)
        if Rsamples is not None:
            Rdists[phys,m,L] = Rsamples
            dellist.append(i)

    for i in sorted(dellist, reverse=True):
        del arglist[i]

    if arglist:
        cores = min(psutil.cpu_count(logical=False), len(arglist))
        with mp.Pool(cores) as pool:
            for phys, L, m, Rsamples in tqdm(pool.imap_unordered(compute_Rdist, arglist),
                                       desc="R distributions", total=len(arglist)):
                shelfkey = f"{phys} - {L} - {m}"
                shelf[shelfkey] = Rdists[phys, m, L] = Rsamples
df = pd.DataFrame([(*k, v) for k,v in (scores|MDL_scores).items()],
                  columns=["criterion", "phys", "m", "L", "score"])
df = df.pivot(index=["L"], columns=["criterion", "m", "phys"], values=["score"]) \
       .droplevel(0, axis="columns")
df
criterion R AIC DIC BIC logE logE_se elpd elpd_se R AIC ... elpd elpd_se MDL
m 0 0 0 0 0 0 0 0 2 2 ... 6 6 0 2 4 6 0 2 4 6
phys Planck Planck Planck Planck Planck Planck Planck Planck Planck Planck ... Rayleigh-Jeans Rayleigh-Jeans Planck Planck Planck Planck Rayleigh-Jeans Rayleigh-Jeans Rayleigh-Jeans Rayleigh-Jeans
L
64 0.630972 34.212413 34.099891 38.530179 -23.763575 0.089247 -15.520759 6.170950 0.635304 37.446421 ... -308.729600 83.927159 NaN NaN NaN NaN NaN NaN NaN NaN
76 0.590268 74.693873 74.794400 79.355339 -43.279302 0.088922 -35.056534 5.935170 0.606385 78.441006 ... -410.527259 99.914677 145.818206 251.072729 253.645187 261.059533 253.735672 265.810171 270.504725 275.633965
90 0.590268 63.787197 63.923645 68.786816 -38.474928 0.090904 -29.924548 7.074875 0.606385 67.595652 ... -393.532347 95.722918 159.669791 283.960879 289.550367 298.856449 296.208005 304.965702 308.431948 312.555011
107 0.590268 93.090067 93.058435 98.435725 -53.120108 0.091431 -44.500003 8.308889 0.606385 97.067552 ... -513.638107 109.594226 200.248460 344.733875 351.005956 359.291008 357.443339 366.390585 372.302910 379.484959
128 0.590268 106.762285 106.931420 112.466346 -60.612310 0.092781 -51.745718 8.390453 0.606385 112.582422 ... -597.582195 117.332536 237.708262 410.966339 416.901510 422.989940 431.198851 435.528584 435.949517 440.663500
152 0.590268 126.749901 126.643467 132.797662 -70.649692 0.093250 -61.704673 8.940944 0.606385 128.474031 ... -766.633382 131.917327 281.624622 486.405564 494.847813 507.664457 504.804909 521.813047 521.824407 528.667014
181 0.590268 147.995945 148.133914 154.392940 -80.419961 0.093936 -71.316444 9.519865 0.606385 159.259083 ... -793.708423 132.163846 333.279752 572.890648 587.451006 597.327621 596.747340 616.611591 622.158419 623.672471
215 0.590268 182.903371 182.994254 189.644647 -98.791659 0.094568 -89.601963 11.353392 0.606385 187.362177 ... -966.294704 145.378500 401.113934 690.851795 705.100029 713.599367 713.752588 733.011338 733.178870 735.227082
256 0.590268 217.951592 217.952916 225.041947 -113.383919 0.095355 -104.063599 13.162413 0.606385 223.539403 ... -1284.943342 175.440460 NaN NaN NaN NaN NaN NaN NaN NaN
304 0.590268 217.990374 217.854283 225.424430 -115.791232 0.097920 -105.966137 12.741243 0.606385 226.452395 ... -1388.079441 171.260925 546.264487 947.635087 968.084218 983.712663 1003.555774 1028.787487 1031.182846 1031.767008
362 0.590268 333.414407 333.461190 341.197696 -172.868606 0.096922 -163.231930 14.316732 0.606385 343.690404 ... -1640.577967 176.041484 688.233567 1163.105387 1190.005217 1207.655251 1193.905209 1236.455671 1253.690735 1260.298738
430 0.590268 389.178881 389.315344 397.306451 -201.431139 0.099242 -191.359164 15.659166 0.606385 407.677396 ... -2139.883813 209.656286 814.540922 1371.647368 1401.453053 1422.242105 1413.520312 1453.415892 1467.929369 1478.798829
512 0.590268 369.577805 369.393745 378.054455 -191.519205 0.100085 -181.319885 17.556974 0.606385 372.004981 ... -2317.411669 219.170199 923.339660 1584.383012 1623.831553 1653.403065 1697.402223 1703.701715 1719.385035 1737.511908
608 0.590268 480.907840 480.866468 489.728190 -245.189913 0.101918 -234.628295 18.912407 0.606385 487.623812 ... -2950.879791 251.512690 1116.914686 1905.810839 1946.096864 1981.328390 2002.976233 2043.816195 2044.678672 2066.932739
724 0.590268 526.174407 526.214624 535.343990 -263.967185 0.101589 -253.470697 23.289231 0.606385 534.783330 ... -3421.283716 278.362624 1303.252426 2231.906062 2292.637024 2327.147044 2385.355880 2405.120929 2422.105215 2431.659940
861 0.590268 676.001813 675.926759 685.518002 -345.029696 0.102294 -334.396497 22.755336 0.606385 686.972846 ... -4439.554636 304.233163 1584.722994 2678.501867 2748.802783 2795.240825 2861.590420 2871.903842 2895.447354 2905.372485
1024 0.590268 797.005646 797.053144 806.868590 -403.366233 0.103106 -392.556843 24.811266 0.606385 813.906939 ... -5062.862907 317.800180 NaN NaN NaN NaN NaN NaN NaN NaN
1217 0.590268 965.547357 965.689909 975.755646 -486.146521 0.102893 -475.357856 26.464297 0.606385 991.463982 ... -5572.303122 323.451579 NaN NaN NaN NaN NaN NaN NaN NaN
1448 0.590268 1168.254348 1168.100968 1178.810225 -586.427307 0.103941 -575.454907 29.416060 0.606385 1205.970035 ... -6931.336305 405.671523 NaN NaN NaN NaN NaN NaN NaN NaN
1722 0.590268 1315.818874 1315.674767 1326.721357 -655.986223 0.105830 -644.623474 31.521313 0.606385 1347.522375 ... -7717.987053 400.589072 NaN NaN NaN NaN NaN NaN NaN NaN
2048 0.590268 1591.982806 1591.956635 1603.232044 -800.800418 0.107504 -789.088417 34.073232 0.606385 1639.752945 ... -9995.485396 439.348871 NaN NaN NaN NaN NaN NaN NaN NaN
2435 0.590268 1897.286722 1897.268913 1908.882126 -951.066446 0.106863 -939.484573 38.532618 0.606385 1920.949141 ... -10769.259172 449.601008 NaN NaN NaN NaN NaN NaN NaN NaN
2896 0.590268 2303.465136 2303.586378 2315.407308 -1150.158621 0.109348 -1138.031372 40.746489 0.606385 2343.460546 ... -14412.216490 520.935334 NaN NaN NaN NaN NaN NaN NaN NaN
3444 0.590268 2668.068307 2668.044462 2680.357085 -1335.238423 0.109623 -1323.082958 44.591955 0.606385 2714.944900 ... -16655.095014 609.238300 NaN NaN NaN NaN NaN NaN NaN NaN
4096 0.590268 3333.769198 3333.736458 3346.404730 -1653.628551 0.108972 -1641.603046 48.380477 0.606385 3376.738766 ... -18302.979627 590.357457 NaN NaN NaN NaN NaN NaN NaN NaN

25 rows × 72 columns

Computed columns#

For comparability, we take the negative log evidence and negative elpd, so that for each criterion, smaller is better.

We also display criteria with respect to a reference column (the one corresponding to the model with \(m=0\)).

Hide code cell source
def add_Δ_cols(df: pd.DataFrame, vdim, ref_col=None):
    """
    Add columns to `df` corresponding to the values of vdim subtracted by the reference column.
    By default, the `ref_col` is (`vdim`, 0).
    The new columns will have the criterion name 'Δ {vdim}'.
    """
    Δvdim = f{vdim}"
    if Δvdim in df.columns:
        # Δ cols have already been added
        return df
    if ref_col is None: ref_col = (vdim, 0)
    cols = df[vdim].columns
    Δdf = df[vdim].sub(df[ref_col], axis="index")
    cols_df = cols.to_frame()
    cols_df.insert(0, "criterion", Δvdim)
    #Δdf.columns = pd.MultiIndex.from_arrays([[Δvdim]*len(cols), cols], names=["criterion", *cols.names])
    Δdf.columns = pd.MultiIndex.from_frame(cols_df)
    return pd.concat((df, Δdf), axis="columns")

def add_neg_cols(df: pd.DataFrame, vdim):
    """
    Add columns to `df` corresponding to the negation of the subtracted by the reference column.
    The new columns will have the criterion name '-{vdim}'.
    """
    neg_vdim = f"-{vdim}"
    if neg_vdim in df.columns:
        # neg cols have already been added
        return df
    cols = df[vdim].columns
    neg_df = -df[vdim]
    cols_df = cols.to_frame()
    cols_df.insert(0, "criterion", neg_vdim)
    #neg_df.columns = pd.MultiIndex.from_arrays([[neg_vdim]*len(cols), cols], names=["criterion", *cols.names])
    neg_df.columns = pd.MultiIndex.from_frame(cols_df)
    return pd.concat((df, neg_df), axis="columns")
for vdim in ["BIC", "AIC", "logE", "elpd", "MDL"]:
    df = add_Δ_cols(df, vdim).sort_index()
for vdim in ["Δ logE", "Δ elpd"]:
    df = add_neg_cols(df, vdim).sort_index()

Standard error on \(\log \eE\) is negligible:

df["logE_se"].max(None)
np.float64(0.3755792683276744)

Criteria comparison grid#

Hide code cell source
def plot_Rdists(Rdists: dict,
                orientation: Literal["horizontal","vertical"]="horizontal",
                offset: bool=False,
                ref_m=0,
                width=0.25,
                ylabel="$R$ distribution (EMD)",
                colors=colors.candidates.values,
                alpha=0.65,
                linewidth=1,
                ax=None):
    """
    `ref_m`: The grey reference line will be the mean of this distribution
    `width`: width or height of distributions. Scaled with L so they are visually consistent.
    """
    assert orientation[:3] in {"hor", "ver"}
    vertical = bool(orientation.startswith("ver"))
    assert isinstance(colors, list)
    
    if offset is True:
        offset = 0.025
    
    if ax is None:
        # Avoid using the `plt` interface to avoid memory leaks.
        # See https://panel.holoviz.org/reference/panes/Matplotlib.html#using-the-matplotlib-pyplot-interface
        fig = mpl.figure.Figure(figsize=(4,2))
        ax = fig.subplots()
    else:
        fig = None

    mlist = sorted({m for (m, L) in Rdists})
    Llist = sorted({L for (m, L) in Rdists})
    
    data = []
    Lvals = set()  # L values before any offset
    positions = []
    widths = []
    #n_m = len(np.unique(df.index.get_level_values("m")))
    #for (L,m), Rvals in df.groupby(["L", "m"]):
    # NB: Color cycle logic assumes the fast loop is over m
    for i, (L, m) in enumerate(product(Llist, mlist)):
        Rvals = Rdists[m,L]
        data.append(Rvals.flatten())
        if offset:
            L = L * 10**(offset*(i-len(mlist)/2))
        positions.append( L )
        widths.append( width * L )

    if vertical:
        ax.axvline(Rdists[ref_m, Llist[-1]].mean(), color="#AAAAAA", alpha=0.15, zorder=-1)
    else:
        ax.axhline(Rdists[ref_m, Llist[-1]].mean(), color="#AAAAAA", alpha=0.15, zorder=-1)

    # NB: A horizontal plot (L along x axis) needs *vertical* distributions
    parts = ax.violinplot(data, positions, widths=widths, orientation="horizontal" if vertical else "vertical",
                          showextrema=False, showmeans=False, side="low")
    for i, (m, polycoll, c) in enumerate(zip(cycle(mlist), parts["bodies"], cycle(colors[:len(mlist)]))):
        polycoll.set_facecolor(c)
        polycoll.set_edgecolor("none")
        polycoll.set_alpha(alpha=alpha)
        # AFAICT, it is not possible to set separate alpha values for face and edge colors (PolyCollection has its own alpha setting, which overrides the alpha of both facecolor and edgecolor)
        # Moreover, it draws the line down the middle of the violin, which in our case adds a useless flat line to the bottom of each plot.
        # So instead we extract the edge path and draw it ourselves
        path = polycoll._paths[0]  # Path is formed of two parts: first the curved edge, then the straight line in the middle of the violin
        edge = mpl.patches.PathPatch(mpl.path.Path(path.vertices[:len(path)//2], path.codes[:len(path)//2]),  # Draw only the curved part
                                     edgecolor=c, linewidth=linewidth,
                                     label=str(m) if i < len(mlist) else "_")  # Only apply one label for each m value, otherwise we can duplicate entries in the legend
        ax.add_patch(edge)

    if vertical:
        kaxis = ax.yaxis
        vaxis = ax.xaxis
        ax.invert_yaxis()
    else:
        kaxis = ax.xaxis
        vaxis = ax.yaxis
    
    kaxis._set_axes_scale("log")
    kaxis.minorticks_off()
    kaxis._set_tick_locations(Llist)
    kaxis.set_major_formatter(mpl.ticker.FuncFormatter(pow2_formatter))
    
    kaxis.set_label_text("$L$")
    vaxis.set_label_text(ylabel)

    return fig

Basic analysis#

Hide code cell source
_Rdists = {(m, L): Rdist for (phys, m, L), Rdist in Rdists.items() if phys == "Planck"}
pn.pane.Matplotlib(
    plot_Rdists(_Rdists, "vert", offset=0.05, width=1, colors=discrete_rainbow_scheme(len(mlist)), alpha=.65),
    tight=True
)

Note

Even with the Planck model, there is still some small level of mismatch because the candidates use Gaussian noise, while the data use Poisson. This is almost certainly why higher-order polynomials obtain tighter \(R\)-distributions.

Hide code cell source
_Rdists = {(m, L): Rdist for (phys, m, L), Rdist in Rdists.items() if phys == "Rayleigh-Jeans"}
pn.pane.Matplotlib(
    plot_Rdists(_Rdists, "vert", offset=0.05, ref_m=6, width=1, colors=discrete_rainbow_scheme(len(mlist)), alpha=.65),
    tight=True
)
Hide code cell source
_Rdists = {(phys, L): Rdist for (phys, m, L), Rdist in Rdists.items() if m == 0}
pn.pane.Matplotlib(
    plot_Rdists(_Rdists, "vert", offset=0, ref_m="Planck", width=1, colors=discrete_rainbow_scheme(2), alpha=.65),
    tight=True
)

Planck is favoured by \(\Bemd{}\), although the difference is less when we add polynomials (which can partially compensate for the incorrect Rayleigh-Jeans model).

Hide code cell source
P_index  = pd.MultiIndex.from_tuples([("Planck", m_P) for m_P in mlist], names=["phys model", "m"])
RJ_index = pd.MultiIndex.from_tuples([("Rayleigh-Jeans", m_P) for m_P in mlist], name=["phys model", "m"])
pd.DataFrame(
    [[np.less.outer(Rdists[(*P_model, 2**12)], Rdists[(*RJ_model, 2**12)]).mean()
      for RJ_model in RJ_index] for P_model in P_index],
    index=P_index, columns=RJ_index
).style.set_caption(r"$B^{\mathrm{EMD}}$ between Planck & Rayleigh-Jeans models")
$B^{\mathrm{EMD}}$ between Planck & Rayleigh-Jeans models
  phys model Rayleigh-Jeans
  m 0 2 4 6
phys model m        
Planck 0 0.981366 0.778191 0.883850 0.753235
2 0.966974 0.731175 0.723118 0.669835
4 0.988357 0.808201 0.942017 0.801147
6 0.988236 0.801918 0.975159 0.782227

Final comparison grid figure#

Hide code cell source
w = 30.548
fig = mpl.figure.Figure(figsize=((6*w + 50, 3*40)*ureg.mm).to("in").m)
axes = fig.subplots(3,6)

## Nested models with true (Planck + poly) ##
axes_row = axes[0]
color_cycle = cycler(color=discrete_rainbow_scheme(len(mlist)))
for ax in axes_row:
    ax.set_prop_cycle(color_cycle)
_df = df.loc[:,(np.s_[:],np.s_[:],"Planck")].droplevel("phys", axis="columns")
_Rdists = {(m, L): Rd for (phys, m, L), Rd in Rdists.items() if phys=="Planck"}
plot_curve(_df, "Δ BIC" ,       None, "m", "vert", ax=axes_row[0])
plot_curve(_df, "-Δ logE", "logE_se", "m", "vert", ax=axes_row[1], error_style="fill")
# Server issues corrupted the results for L=256 and L=64. Don’t drop L=64 because it would slightly shift the axis limits
plot_curve(_df.drop([256]), "Δ MDL" ,       None, "m", "vert", ax=axes_row[2])
plot_curve(_df, "Δ AIC" ,       None, "m", "vert", ax=axes_row[3])
# One doesn’t see anything when plotting the elpd as curves, so plot only the error bars instead
plot_curve(_df.loc[Llist_short], "Δ elpd", "elpd_se", "m", "vert", ax=axes_row[4], error_style="bar", linestyle="", offset=0.025)
plot_Rdists(_Rdists,                 "vert", ax=axes_row[5], offset=0.05, width=1, colors=color_cycle.by_key()["color"], alpha=.15)
axes_row[-1].legend(loc="upper left", bbox_to_anchor=(1,1))

## Nested models without true (Rayleigh-Jeans + poly) ##
axes_row = axes[1]
color_cycle = cycler(color=discrete_rainbow_scheme(len(mlist)))
for ax in axes_row:
    ax.set_prop_cycle(color_cycle)
_df = df.loc[:,(np.s_[:],np.s_[:],"Rayleigh-Jeans")].droplevel("phys", axis="columns")
_Rdists = {(m, L): Rd for (phys, m, L), Rd in Rdists.items() if phys=="Rayleigh-Jeans"}
plot_curve(_df, "Δ BIC" ,       None, "m", "vert", ax=axes_row[0])
plot_curve(_df, "-Δ logE", "logE_se", "m", "vert", ax=axes_row[1], error_style="fill")
plot_curve(_df.drop([256]), "Δ MDL" ,       None, "m", "vert", ax=axes_row[2])
plot_curve(_df, "Δ AIC" ,       None, "m", "vert", ax=axes_row[3])
plot_curve(_df, "-Δ elpd", "elpd_se", "m", "vert", ax=axes_row[4], error_style="fill")
plot_Rdists(_Rdists,                 "vert", ax=axes_row[5], offset=0.05, width=1, colors=color_cycle.by_key()["color"], alpha=.15)

## Structurally different models (Planck vs Rayleigh-Jeans, m=0) ##
axes_row = axes[2]
color_cycle = cycler(color=discrete_rainbow_scheme(2))
for ax in axes_row:
    ax.set_prop_cycle(color_cycle)
_df = df.loc[:,(np.s_[:],0)].droplevel("m", axis="columns")
_Rdists = {(phys, L): Rd for (phys, m, L), Rd in Rdists.items() if m==0}
# Recompute relative values wrt to Planck, m=0
_df.drop([C for C in df.columns.unique("criterion") if C[0] in "-Δ"],
          axis="columns", inplace=True)
for vdim in ["BIC", "AIC", "logE", "elpd", "MDL"]:
    _df = add_Δ_cols(_df, vdim, ref_col=(vdim, "Planck")).sort_index()
for vdim in ["Δ logE", "Δ elpd"]:
    _df = add_neg_cols(_df, vdim).sort_index()
# Now do the plots
plot_curve(_df, "Δ BIC" ,       None, "phys", "vert", ax=axes_row[0])
plot_curve(_df, "-Δ logE", "logE_se", "phys", "vert", ax=axes_row[1], error_style="bar")
plot_curve(_df.drop([256]), "Δ MDL" ,       None, "phys", "vert", ax=axes_row[2])
plot_curve(_df, "Δ AIC" ,       None, "phys", "vert", ax=axes_row[3])
plot_curve(_df, "-Δ elpd", "elpd_se", "phys", "vert", ax=axes_row[4], error_style="fill")
plot_Rdists(_Rdists,                 "vert", ax=axes_row[5], offset=0, ref_m="Planck", width=1, colors=color_cycle.by_key()["color"], alpha=.15)
axes_row[-1].legend(loc="upper left", bbox_to_anchor=(1,1))

for ax in axes[:-1,:].flat:
    ax.set_xlabel(None)
#    ax.xaxis.set_visible(False)
for ax in axes[:,1:].flat:
    ax.yaxis.set_visible(False)
    ax.spines["left"].set_visible(False)
for ax in axes[:,0]:
    ax.set_yticks(Llist_short)

pn.pane.Matplotlib(fig, tight=True, width=800)
/tmp/ipykernel_55955/3783345752.py:44: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.
  _df.drop([C for C in df.columns.unique("criterion") if C[0] in "-Δ"],

2025-07-19   git: emd-paper main #af8cfb86