Comparison with other criteria – large sample asymptotics#
\(\require{mathtools} \newcommand{\notag}{} \newcommand{\tag}{} \newcommand{\label}[1]{} \newcommand{\sfrac}[2]{#1/#2} \newcommand{\bm}[1]{\boldsymbol{#1}} \newcommand{\num}[1]{#1} \newcommand{\qty}[2]{#1\,#2} \renewenvironment{align} {\begin{aligned}} {\end{aligned}} \renewenvironment{alignat} {\begin{alignedat}} {\end{alignedat}} \newcommand{\pdfmspace}[1]{} % Ignore PDF-only spacing commands \newcommand{\htmlmspace}[1]{\mspace{#1}} % Ignore PDF-only spacing commands \newcommand{\scaleto}[2]{#1} % Allow to use scaleto from scalerel package \newcommand{\RR}{\mathbb R} \newcommand{\NN}{\mathbb N} \newcommand{\PP}{\mathbb P} \newcommand{\EE}{\mathbb E} \newcommand{\XX}{\mathbb X} \newcommand{\ZZ}{\mathbb Z} \newcommand{\QQ}{\mathbb Q} \newcommand{\fF}{\mathcal F} \newcommand{\dD}{\mathcal D} \newcommand{\lL}{\mathcal L} \newcommand{\gG}{\mathcal G} \newcommand{\hH}{\mathcal H} \newcommand{\nN}{\mathcal N} \newcommand{\pP}{\mathcal P} \newcommand{\BB}{\mathbb B} \newcommand{\Exp}{\operatorname{Exp}} \newcommand{\Binomial}{\operatorname{Binomial}} \newcommand{\Poisson}{\operatorname{Poisson}} \newcommand{\linop}{\mathcal{L}(\mathbb{B})} \newcommand{\linopell}{\mathcal{L}(\ell_1)} \DeclareMathOperator{\trace}{trace} \DeclareMathOperator{\Var}{Var} \DeclareMathOperator{\Span}{span} \DeclareMathOperator{\proj}{proj} \DeclareMathOperator{\col}{col} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\gt}{>} \definecolor{highlight-blue}{RGB}{0,123,255} % definition, theorem, proposition \definecolor{highlight-yellow}{RGB}{255,193,7} % lemma, conjecture, example \definecolor{highlight-orange}{RGB}{253,126,20} % criterion, corollary, property \definecolor{highlight-red}{RGB}{220,53,69} % criterion \newcommand{\logL}{\ell} \newcommand{\eE}{\mathcal{E}} \newcommand{\oO}{\mathcal{O}} \newcommand{\defeq}{\stackrel{\mathrm{def}}{=}} \newcommand{\Bspec}{\mathcal{B}} % Spectral radiance \newcommand{\X}{\mathcal{X}} % X space \newcommand{\Y}{\mathcal{Y}} % Y space \newcommand{\M}{\mathcal{M}} % Model \newcommand{\Tspace}{\mathcal{T}} \newcommand{\Vspace}{\mathcal{V}} \newcommand{\Mtrue}{\mathcal{M}_{\mathrm{true}}} \newcommand{\MP}{\M_{\mathrm{P}}} \newcommand{\MRJ}{\M_{\mathrm{RJ}}} \newcommand{\qproc}{\mathfrak{Q}} \newcommand{\D}{\mathcal{D}} % Data (true or generic) \newcommand{\Dt}{\tilde{\mathcal{D}}} \newcommand{\Phit}{\widetilde{\Phi}} \newcommand{\Phis}{\Phi^*} \newcommand{\qt}{\tilde{q}} \newcommand{\qs}{q^*} \newcommand{\qh}{\hat{q}} \newcommand{\AB}[1]{\mathtt{AB}~\mathtt{#1}} \newcommand{\LP}[1]{\mathtt{LP}~\mathtt{#1}} \newcommand{\NML}{\mathrm{NML}} \newcommand{\iI}{\mathcal{I}} \newcommand{\true}{\mathrm{true}} \newcommand{\dist}{D} \newcommand{\Mtheo}[1]{\mathcal{M}_{#1}} % Model (theoretical model); index: param set \newcommand{\DL}[1][L]{\mathcal{D}^{(#1)}} % Data (RV or generic) \newcommand{\DLp}[1][L]{\mathcal{D}^{(#1')}} % Data (RV or generic) \newcommand{\DtL}[1][L]{\tilde{\mathcal{D}}^{(#1)}} % Data (RV or generic) \newcommand{\DpL}[1][L]{{\mathcal{D}'}^{(#1)}} % Data (RV or generic) \newcommand{\Dobs}[1][]{\mathcal{D}_{\mathrm{obs}}^{#1}} % Data (observed) \newcommand{\calibset}{\mathcal{C}} \newcommand{\N}{\mathcal{N}} % Normal distribution \newcommand{\Z}{\mathcal{Z}} % Partition function \newcommand{\VV}{\mathbb{V}} % Variance \newcommand{\T}{\mathsf{T}} % Transpose \newcommand{\EMD}{\mathrm{EMD}} \newcommand{\dEMD}{d_{\mathrm{EMD}}} \newcommand{\dEMDtilde}{\tilde{d}_{\mathrm{EMD}}} \newcommand{\dEMDsafe}{d_{\mathrm{EMD}}^{\text{(safe)}}} \newcommand{\e}{ε} % Model confusion threshold \newcommand{\falsifythreshold}{ε} \newcommand{\bayes}[1][]{B_{#1}} \newcommand{\bayesthresh}[1][]{B_{0}} \newcommand{\bayesm}[1][]{B^{\mathcal{M}}_{#1}} \newcommand{\bayesl}[1][]{B^l_{#1}} \newcommand{\bayesphys}[1][]{B^{{p}}_{#1}} \newcommand{\Bconf}[1]{B^{\mathrm{epis}}_{#1}} \newcommand{\Bemd}[1]{B^{\mathrm{EMD}}_{#1}} \newcommand{\BQ}[1]{B^{Q}_{#1}} \newcommand{\Bconfbin}[1][]{\bar{B}^{\mathrm{conf}}_{#1}} \newcommand{\Bemdbin}[1][]{\bar{B}_{#1}^{\mathrm{EMD}}} \newcommand{\bin}{\mathcal{B}} \newcommand{\Bconft}[1][]{\tilde{B}^{\mathrm{conf}}_{#1}} \newcommand{\fc}{f_c} \newcommand{\fcbin}{\bar{f}_c} \newcommand{\paramphys}[1][]{Θ^{{p}}_{#1}} \newcommand{\paramobs}[1][]{Θ^{ε}_{#1}} \newcommand{\test}{\mathrm{test}} \newcommand{\train}{\mathrm{train}} \newcommand{\synth}{\mathrm{synth}} \newcommand{\rep}{\mathrm{rep}} \newcommand{\MNtrue}{\mathcal{M}^{{p}}_{\text{true}}} \newcommand{\MN}[1][]{\mathcal{M}^{{p}}_{#1}} \newcommand{\MNA}{\mathcal{M}^{{p}}_{Θ_A}} \newcommand{\MNB}{\mathcal{M}^{{p}}_{Θ_B}} \newcommand{\Me}[1][]{\mathcal{M}^ε_{#1}} \newcommand{\Metrue}{\mathcal{M}^ε_{\text{true}}} \newcommand{\Meobs}{\mathcal{M}^ε_{\text{obs}}} \newcommand{\Meh}[1][]{\hat{\mathcal{M}}^ε_{#1}} \newcommand{\MNa}{\mathcal{M}^{\mathcal{N}}_a} \newcommand{\MeA}{\mathcal{M}^ε_A} \newcommand{\MeB}{\mathcal{M}^ε_B} \newcommand{\Ms}{\mathcal{M}^*} \newcommand{\MsA}{\mathcal{M}^*_A} \newcommand{\MsB}{\mathcal{M}^*_B} \newcommand{\Msa}{\mathcal{M}^*_a} \newcommand{\MsAz}{\mathcal{M}^*_{A,z}} \newcommand{\MsBz}{\mathcal{M}^*_{B,z}} \newcommand{\Msaz}{\mathcal{M}^*_{a,z}} \newcommand{\MeAz}{\mathcal{M}^ε_{A,z}} \newcommand{\MeBz}{\mathcal{M}^ε_{B,z}} \newcommand{\Meaz}{\mathcal{M}^ε_{a,z}} \newcommand{\zo}{z^{0}} \renewcommand{\lL}[2][]{\mathcal{L}_{#1|{#2}}} % likelihood \newcommand{\Lavg}[2][]{\mathcal{L}^{/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLphys}[2][]{\mathcal{L}^{{p}}_{#1|#2}} \newcommand{\Lavgphys}[2][]{\mathcal{L}^{{p}/#2}_{#1}} % Geometric average of likelihood \newcommand{\lLL}[3][]{\mathcal{L}^{(#3)}_{#1|#2}} \newcommand{\lLphysL}[3][]{\mathcal{L}^{{p},(#3)}_{#1|#2}} \newcommand{\lnL}[2][]{l_{#1|#2}} % Per-sample log likelihood \newcommand{\lnLt}[2][]{\widetilde{l}_{#1|#2}} \newcommand{\lnLtt}{\widetilde{l}} % Used only in path_sampling \newcommand{\lnLh}[1][]{\hat{l}_{#1}} \newcommand{\lnLphys}[2][]{l^{{p}}_{#1|#2}} \newcommand{\lnLphysL}[3][]{l^{{p},(#3)}_{#1|#2}} \newcommand{\Elmu}[2][1]{μ_{{#2}}^{(#1)}} \newcommand{\Elmuh}[2][1]{\hat{μ}_{{#2}}^{(#1)}} \newcommand{\Elsig}[2][1]{Σ_{{#2}}^{(#1)}} \newcommand{\Elsigh}[2][1]{\hat{Σ}_{{#2}}^{(#1)}} \newcommand{\pathP}{\mathop{{p}}} % Path-sampling process (generic) \newcommand{\pathPhb}{\mathop{{p}}_{\mathrm{Beta}}} % Path-sampling process (hierarchical beta) \newcommand{\interval}{\mathcal{I}} \newcommand{\Phiset}[1]{\{\Phi\}^{\small (#1)}} \newcommand{\Phipart}[1]{\{\mathcal{I}_Φ\}^{\small (#1)}} \newcommand{\qhset}[1]{\{\qh\}^{\small (#1)}} \newcommand{\Dqpart}[1]{\{Δ\qh_{2^{#1}}\}} \newcommand{\LsAzl}{\mathcal{L}_{\smash{{}^{\,*}_A},z,L}} \newcommand{\LsBzl}{\mathcal{L}_{\smash{{}^{\,*}_B},z,L}} \newcommand{\lsA}{l_{\smash{{}^{\,*}_A}}} \newcommand{\lsB}{l_{\smash{{}^{\,*}_B}}} \newcommand{\lsAz}{l_{\smash{{}^{\,*}_A},z}} \newcommand{\lsAzj}{l_{\smash{{}^{\,*}_A},z_j}} \newcommand{\lsAzo}{l_{\smash{{}^{\,*}_A},z^0}} \newcommand{\leAz}{l_{\smash{{}^{\,ε}_A},z}} \newcommand{\lsAez}{l_{\smash{{}^{*ε}_A},z}} \newcommand{\lsBz}{l_{\smash{{}^{\,*}_B},z}} \newcommand{\lsBzj}{l_{\smash{{}^{\,*}_B},z_j}} \newcommand{\lsBzo}{l_{\smash{{}^{\,*}_B},z^0}} \newcommand{\leBz}{l_{\smash{{}^{\,ε}_B},z}} \newcommand{\lsBez}{l_{\smash{{}^{*ε}_B},z}} \newcommand{\LaszL}{\mathcal{L}_{\smash{{}^{*}_a},z,L}} \newcommand{\lasz}{l_{\smash{{}^{*}_a},z}} \newcommand{\laszj}{l_{\smash{{}^{*}_a},z_j}} \newcommand{\laszo}{l_{\smash{{}^{*}_a},z^0}} \newcommand{\laez}{l_{\smash{{}^{ε}_a},z}} \newcommand{\lasez}{l_{\smash{{}^{*ε}_a},z}} \newcommand{\lhatasz}{\hat{l}_{\smash{{}^{*}_a},z}} \newcommand{\pasz}{p_{\smash{{}^{*}_a},z}} \newcommand{\paez}{p_{\smash{{}^{ε}_a},z}} \newcommand{\pasez}{p_{\smash{{}^{*ε}_a},z}} \newcommand{\phatsaz}{\hat{p}_{\smash{{}^{*}_a},z}} \newcommand{\phateaz}{\hat{p}_{\smash{{}^{ε}_a},z}} \newcommand{\phatseaz}{\hat{p}_{\smash{{}^{*ε}_a},z}} \newcommand{\Phil}[2][]{Φ_{#1|#2}} % Φ_{\la} \newcommand{\Philt}[2][]{\widetilde{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philhat}[2][]{\hat{Φ}_{#1|#2}} % Φ_{\la} \newcommand{\Philsaz}{Φ_{\smash{{}^{*}_a},z}} % Φ_{\lasz} \newcommand{\Phileaz}{Φ_{\smash{{}^{ε}_a},z}} % Φ_{\laez} \newcommand{\Philseaz}{Φ_{\smash{{}^{*ε}_a},z}} % Φ_{\lasez} \newcommand{\mus}[1][1]{μ^{(#1)}_*} \newcommand{\musA}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\SigsA}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_A}}} \newcommand{\musB}[1][1]{μ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\SigsB}[1][1]{Σ^{(#1)}_{\smash{{}^{\,*}_B}}} \newcommand{\musa}[1][1]{μ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Sigsa}[1][1]{Σ^{(#1)}_{\smash{{}^{*}_a}}} \newcommand{\Msah}{{\color{highlight-red}\mathcal{M}^{*}_a}} \newcommand{\Msazh}{{\color{highlight-red}\mathcal{M}^{*}_{a,z}}} \newcommand{\Meah}{{\color{highlight-blue}\mathcal{M}^{ε}_a}} \newcommand{\Meazh}{{\color{highlight-blue}\mathcal{M}^{ε}_{a,z}}} \newcommand{\lsazh}{{\color{highlight-red}l_{\smash{{}^{*}_a},z}}} \newcommand{\leazh}{{\color{highlight-blue}l_{\smash{{}^{ε}_a},z}}} \newcommand{\lseazh}{{\color{highlight-orange}l_{\smash{{}^{*ε}_a},z}}} \newcommand{\Philsazh}{{\color{highlight-red}Φ_{\smash{{}^{*}_a},z}}} % Φ_{\lasz} \newcommand{\Phileazh}{{\color{highlight-blue}Φ_{\smash{{}^{ε}_a},z}}} % Φ_{\laez} \newcommand{\Philseazh}{{\color{highlight-orange}Φ_{\smash{{}^{*ε}_a},z}}} % Φ_{\lasez} \newcommand{\emdstd}{\tilde{σ}} \DeclareMathOperator{\Mvar}{Mvar} \DeclareMathOperator{\AIC}{AIC} \DeclareMathOperator{\epll}{epll} \DeclareMathOperator{\elpd}{elpd} \DeclareMathOperator{\MDL}{MDL} \DeclareMathOperator{\comp}{COMP} \DeclareMathOperator{\Lognorm}{Lognorm} \DeclareMathOperator{\erf}{erf} \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator{\Image}{Image} \DeclareMathOperator{\sgn}{sgn} \DeclareMathOperator{\SE}{SE} % standard error \DeclareMathOperator{\Unif}{Unif} \DeclareMathOperator{\Poisson}{Poisson} \DeclareMathOperator{\SkewNormal}{SkewNormal} \DeclareMathOperator{\TruncNormal}{TruncNormal} \DeclareMathOperator{\Exponential}{Exponential} \DeclareMathOperator{\exGaussian}{exGaussian} \DeclareMathOperator{\IG}{IG} \DeclareMathOperator{\NIG}{NIG} \DeclareMathOperator{\Gammadist}{Gamma} \DeclareMathOperator{\Lognormal}{Lognormal} \DeclareMathOperator{\Beta}{Beta} \newcommand{\sinf}{{s_{\infty}}}\)
from emdcmp import draw_R_samples
from other_criteria import FactorizedPrior, get_ppfs, R, AIC, BIC, DIC, logℰ, elpd, 𝓁
import mdl_uv
Show code cell source
import viz # Local module
from paul_tol_rainbow import discrete_rainbow_scheme # Local module
import matplotlib as mpl
import matplotlib.pyplot as plt
import panel as pn
import holoviews as hv
from cycler import cycler
hv.extension("matplotlib")
Use sans-serif for LaTeX axis labels.
STIX is maybe not the prettiest font, but it is very legible.
Bitstream Vera Sans is the matplotlib default for non-TeX labels.
Show code cell source
params = {'text.usetex': False,
'font.family': 'STIXGeneral',
'mathtext.fontset': 'stixsans'
#'mathtext.fontset': 'custom',
#'mathtext.rm' : 'Bitstream Vera Sans',
#'mathtext.it' : 'Bitstream Vera Sans:italic',
#'mathtext.bf' : 'Bitstream Vera Sans:bold',
}
mpl.rcParams.update(params)
We need to extend the color scheme from Ex_UV
to support 4 candidates.
For this we use the discrete scheme proposed by Paul Tol [78, Fig. 22], which supports sequences between 1 and 23 steps. This is implemented by our custom function discrete_rainbow_scheme
, which is essentially just a table lookup.
Some other decent options for candidates are listed below, from most to least discriminable. The greater variety of colours in the most discriminable palettes makes them a bit jarring.
hv.Palette("TolRainbow", range=(0.1, 1.), reverse=True)
hv.Palette("Sunset", range=(0., 1.), reverse=True)
hv.Palette("Magma", range=(0., .9), reverse=False)
hv.Palette("TolYlOrBr", range=(0.3, 1.), reverse=True)
@dataclass
class colors(colors):
candidates : hv.Cycle = hv.Cycle(values = discrete_rainbow_scheme(4)[::-1])
colors
scale | data | RJ | Planck | calib_curves | candidates |
---|---|---|---|---|---|
Note
See Fig. 2.8 for a more complete version of this table
Criterion |
Not restricted to nested models |
No restriction on noise model |
Not restricted to log likelihood risk |
Not asymptotic |
Allows singular models |
Allows Bayesian models |
Symmetric |
Information-theoretic |
Consistent |
Robust wrt. misspecification |
Source of variability |
---|---|---|---|---|---|---|---|---|---|---|---|
\(\Bemd{}\) |
✔ |
✔ |
✔ |
✔ |
✔ |
✔ |
✔ |
✘ |
✔ |
✔ |
Imperfect replications |
WAIC |
✔ |
✔ |
✔ |
✔ |
✔ |
✔ |
✔ |
✘ |
✔ |
✘⁽²⁾ |
Posterior |
Bayes factor |
✔ |
✔ |
✘ |
✔ |
✘ |
✔ |
✔ |
✘ |
✔ |
✘⁽²⁾ |
Posterior |
MDL |
✔/✘⁽¹⁾ |
✔ |
✔/✘⁽¹⁾ |
✔ |
✘ |
✔ |
✘ |
✔ |
✔ |
✘⁽³⁾ |
Uniform dist on event space |
(BIC) |
✔ |
✔ |
✘ |
✘ |
✘ |
✔ |
✔ |
✘ |
✔/✘ |
✘ |
Posterior |
DIC |
✘ |
✘ |
✘ |
✘ |
✘ |
✔ |
✘ |
✔ |
✔ |
✘ |
Posterior |
AIC |
✘ |
✘ |
✘ |
✘ |
✘ |
✘ |
✘ |
✔ |
✘ |
✘ |
Perfect replications |
⁽¹⁾ Some entries are marked ✔/✘ for MDL because although the general form may not have these restrictions, they are usually required to obtain tractable approximations.
⁽²⁾ Bayesian methods are sometimes described as accounting for epistemic errors, but only for errors which are within the hypothesis class. So specifically not misspecification.
⁽³⁾ There has been at least one attempt to make MDL robust in the case of misspecified models, but it is far from standardized. It is also unclear whether it would be broadly applicable, since it depends on an ad hoc scaling of the likelihood, which itself introduces an additional hyperparameter.
Data#
For this experiment we want one of the candidates to be the true model. Also, in order to compute NML complexity exactly, we want to be able to enumerate all possible datasets. This is possible thanks to how we set the problem:
The use of Poisson noise means that possible values are discretized. Technically there are infinitely many possible values, but their probability quickly vanishes, so we need to consider only a dozen or so.
Fixed, exact \(λ\) values: for a given dataset size \(L\), we always generate data with the abscissa \(λ_k = μλ_{\mathrm{min}} + k \frac{λ_{\mathrm{max}} - λ_{\mathrm{min}}}{L-1}\), where \(k=0, \dotsc, L-1\).
Hint
This is implemented as the EnumerableDataset
class within the MDL utilities.
This means we can implement enumeration as follows:
For each \(λ_k\), determine a set of possible values for \(\Bspec_k\). Below we use an interval containing 0.9998% of the probability mass. Each \(\Bspec_k\) is a sequential array with equal steps.
A dataset is generated by picking a random value from each \(\Bspec_k\): \(\{(λ_k, b_k) : b_k \sim \Bspec_k\}\). The total number of datasets is therefore \(\prod_{k=0}^{L-1} \lvert \Bspec_k \rvert\).
In practice there are still an unfeasibly large number of datasets. Therefore we generate them from most to least likely: the hope is that the integrand in an NML integral (which in this case is a series) should correlate with the true data likelihood, and that we can truncate the series once we have enough dominant terms.
𝒟 = mdl_uv.EnumerableDataset(
"comparison with other criteria",
L =100,
λmin=0.6*μm,
λmax=2 *μm,
s =16*Bunits**-1,
T =data_T)
Show code cell source
#data = Dataset("", L=4000, λmin=0.1*μm, λmax=2*μm, s=(2e-3*Bunits)**-1, T=3000*K)
𝒟_mean = replace(𝒟, s=(4e-5*Bunits)**-1)
dims = dict(kdims=hv.Dimension("λ", label="λ — wavelength", unit="μm"),
vdims=hv.Dimension("B", label="$\\tilde{B}$ – radiance", unit="kW/m² nm sr"))
fig = hv.Scatter(𝒟.get_data(strip_units=True), **dims) * hv.Curve(𝒟_mean.get_data(strip_units=True), **dims)
fig.opts(
hv.opts.Curve(color="#AA0022"),
hv.opts.Scatter(size=1.5, backend="bokeh"),
hv.opts.Scatter(s=12, backend="matplotlib"),
hv.opts.Overlay(fig_inches=tuple(2*((40, 34)*ureg.mm).to("in").m),
fontscale=2, backend="matplotlib"),
hv.opts.Overlay(width=500, backend="bokeh")
)
hv.save(fig, config.paths.figures/"compare-other-methods_sample-data.svg")
Note that in contrast to the main setup, we consider a different regime, with wavelength window closer to the distribution’s peak and lower relative noise. This makes the likelihood essentially Gaussian, up to a normalization factor.
Candidate models#
Most established model comparison methods are meant to compare nested models, where preferably one of the alternatives is the true model. For criteria typically interpreted in terms of information theory, like AIC and MDL, that interpretation usually only holds when the true model is also the simplest. Therefore to give existing criteria the fairest comparison, we setup a model comparison problem where the model alternatives are variants of the Planck radiance model augmented with a polynomial:
Note that in contrast to the main setup, the difference between Poisson and Gaussian noise is not too important, for two reasons:
We increased the signal to noise ratio, partly by choosing a regime (i.e. a \(λ\) window) which contains the largest radiance values.
We scale the variance in accordance with the Poisson distribution of the underlying data.
This ensures that whether we fit the model using Poisson or Gaussian likelihood, the true model is recovered when \(m=0\).
It is worth emphasizing that in order to accommodate some criteria, we are posing a different problem than the one we actually want to solve. First, instead of comparing the Planck (\(\MP\)) and Rayleigh-Jeans (\(\MRJ\)) models, we compare some artificially augmented version of \(\MP\). Second, we need to be in the regime where the Poisson is effectively a discretized Gaussian. This allows us to substitute the Poisson’s discrete PMF with the Gaussian’s continuous PDF for evaluating the loss. (If we tried to fit a discrete model, its PMF would vanish as soon as one data point is not in the support.) This also ensures that the asymptotic criteria, which assume that we have reached the regime where the likelihood is “Gaussian-like”, are valid. Finally, the discreteness of the data distribution nevertheless allows us to enumerate all possible datasets, a feature which is convenient for computing the NML distribution.
Making these choices of regime and data-generating model ensures no method is disadvantaged, but of course in practice a practitioner does not get to choose their data, and should choose their model(s) based on what makes sense for those data, not what simplifies the statistical analysis.
In practice of course we would like to be able to compare models without restriction on their structure or the type of noise, as we did in Fig. 2.7 comparing the Planck and Rayleigh-Jeans models with \(\Bemd{}\). Bayesian methods are also agnostic to the choice of model. Bayesian methods however consider a different type of epistemic error, and in particular ignore errors due to misspecification. We illustrate this with a comparison table in Table 6.1. For completeness, this table also includes values obtained with the other criteria – since in practice they are often used even when they are invalid.
@dataclass(frozen=True)
class CandidateModel:
physmodel: Literal["Plank","Rayleigh-Jeans"]
σ : float|PintQuantity
T : PintQuantity # units: K
coeffs: list[float]
@property
def m(self):
"""Returns the order of the model’s spurious polynomial."""
return len(self.coeffs)
def __post_init__(self):
# If parameters were given as plain floats, convert to default units
if not isinstance(self.σ, pint.Quantity):
object.__setattr__(self, "σ", self.σ * Bunits) # IMO bypassing frozen dataclass in __post_init__ is acceptable
if not isinstance(self.T, pint.Quantity):
object.__setattr__(self, "T", self.T * ureg.kelvin)
def Q(self, λ_B):
"""
The loss is computed from differences between the observed data `λ_B`
and predictions of the _physical_ model. The logpdf of the _observation_
model is used to convert those differences into a loss.
"""
λ, Bdata = λ_B
_, Btheory = self.apply_physmodel(λ)
#return -stats.poisson.logpdf((Bdata - Btheory).m ,
Bphys = phys_models[self.physmodel](λ, self.T).to(Bunits) # HACK: Copied from apply_physmodel, but this isn’t DRY
return -stats.norm.logpdf((Bdata - Btheory).m, loc=0, scale=self.σ.m*np.sqrt(Bphys.m)) # IMPORTANT: Must match def in obsmodel
def apply_obsmodel(self, λ_B, rng=None):
λ, Bdata = λ_B
Bphys = phys_models[self.physmodel](λ, self.T).to(Bunits) # HACK: Copied from apply_physmodel, but this isn’t DRY
return λ, Bdata + stats.norm.rvs(size=len(Bdata), loc=0, scale=self.σ.m*np.sqrt(Bphys.m), random_state=rng)*Bunits
def apply_physmodel(self, λarr: ArrayLike, rng=None) -> tuple[ArrayLike, ArrayLike]:
if isinstance(λarr, tuple): # Allow passing output of data directly
assert len(λarr) == 2, "CandidateModel expects either a single array `λarr`, or a tuple `(λarr, Barr)`."
λarr = λarr[0]
Barr = phys_models[self.physmodel](λarr, self.T).to(Bunits)
if self.coeffs:
Barr += Polynomial(self.coeffs)(λarr.m) * Bunits
return λarr, Barr
def gen_data(self, L, λmin=data_λ_min, λmax=data_λ_max, rng=None):
λarr = np.linspace(λmin, λmax, L)
return self.apply_obsmodel(self.apply_physmodel(λarr), rng=rng)
def __call__(self, λarr, rng=None):
return self.apply_obsmodel(self.apply_physmodel(λarr), rng=rng)
Criteria & loss definitions#
Risk functional#
We use the negative log likelihood as our risk functional, with a Gaussian observation model.
(For technical reasons the risk functional is defined above as a method of the candidate model. The comparison functions expect a separate functional, so Q[phys,σ,T,coeffs]
is a wrapper which calls Q
from a properly parameterized model.)
Show code cell source
class QMeta(type):
def __getitem__(cls, Θ):
σ, T, *coeffs = Θ
return cls.__call__(σ, T, coeffs)
def __call__(cls, σ, T, coeffs=()):
if len(coeffs) == 1 and isinstance(coeffs[0], (list, tuple)):
coeffs = coeffs[0]
#return Q(σ, T, coeffs)
model = CandidateModel(cls.physmodel, σ, T, coeffs)
return model.Q
#@dataclass
class QPlanck(metaclass=QMeta):
physmodel="Planck"
class QRJ(metaclass=QMeta):
physmodel="Rayleigh-Jeans"
Q = {"Planck": QPlanck, "Rayleigh-Jeans": QRJ}
Fitting function#
Given some observed data and the order of the extra polynomial, returns a tuple (σ, T, coeffs)
of fitted parameters.
Parameters are fitted by minimizing \(Q\), subject to some mild regularization on \(σ\) and \(T\).
Show code cell source
def _fitΘ(λℬ, physmodel, m):
log2_T0 = np.log2(data_T.m)
priorT_std = 12 # |log₂ 4000 - log₂ 5000| ≈ 12
def f(Θtilde, physmodel=physmodel, λℬ=λℬ):
log2σ, log2T, *coeffs = Θtilde
σ = 2**log2σ; T = 2**log2T
risk = Q[physmodel](σ, T, coeffs)(λℬ).mean()
priorσ = 2**(-log2σ/128) # Soft floor on σ so it cannot go too low
priorT = (log2T - log2_T0)**2 / (2*priorT_std**2)
#priorσ = 0
#priorT = 0
return risk + priorσ + priorT
res = optimize.minimize(f, np.r_[np.log2([.1, data_T.m]), [0]*m], tol=1e-5)
log2σ, log2T, *coeffs = res.x
σ = 2**log2σ; T = 2**log2T
return σ, T, tuple(coeffs)
_fitΘ_cache = {}
def fitΘ(λℬ, physmodel, m):
key = (tuple(λℬ[0].m), tuple(λℬ[1].m), physmodel, m)
res = _fitΘ_cache.get(key)
if res is None:
res = _fitΘ(λℬ, physmodel, m)
return res
Bayesian prior#
π_phys_loose = FactorizedPrior(["loguniform", "loguniform"],
[(2**-10, 2**-1), (1000, 10000)],
rng="prior - compare - models")
π_phys_tight = FactorizedPrior(["loguniform", "loguniform"],
[(2**-5, 2**-4), (2000, 5000)],
rng="prior - compare - models")
def π_coeffs_loose(m):
return FactorizedPrior(["norm"]*m, [(0, 5)]*m,
rng="prior - compare - models - coeffs")
def π_coeffs_tight(m):
return FactorizedPrior(["norm"]*m, [(0, 2)]*m,
rng="prior - compare - models - coeffs")
NML distribution#
def logL_mdl(*Θ, Q): return lambda λB: -Q(*Θ)(λB)
def MDL_criterion(Q, θˆ, 𝒟, physmodel, m):
#return Q[θˆ](𝒟.get_data()) \
comp = mdl_uv.comp(𝒟, r=30, m=m, logL=partial(logL_mdl, Q=Q),
fitΘ=lambda λℬ, m: fitΘ(λℬ, physmodel, m),
cache_key="UV - Plank + poly", rng=3014,
no_compute=refresh_shelves)
return None if comp is None else -𝓁(θˆ, Q, 𝒟) + comp
Compute criteria#
Note: We don’t change the dataset’s seed, so there is correlation in the data, but not exactly the same \((x_i, y_i)\) pairs are drawn. (Because the \(x_i\) are distributed evenly across \([λ_{min}, λ_{max}]\).)
mlist = [0,2,4,6]
Llist = [2**6, 2**7, 2**8, 2**9, 2**10, 2**11, 2**12] # np.logspace(6, 12, 7, base=2)
Llist_short = [2**6, 2**8, 2**10, 2**12]
Llist = np.logspace(6, 12, 25, base=2).astype(int)
assert len(mlist) <= len(colors.candidates)
The PPF curves are useful diagnostics in and of themselves, to check that the fitted model behaves as expected.
Show code cell source
Φarr = np.linspace(0, 1)
ppf_panels = {}
scatter_panels = {}
def no_units(args): return tuple(a.m for a in args)
for physmodel in ["Planck", "Rayleigh-Jeans"]:
for L in Llist:
for m in mlist:
𝒟 = replace(𝒟, L=L, purpose=𝒟.purpose + f" - {L=}")
θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
𝓜 = CandidateModel(physmodel, *θˆ)
mixed_ppf, synth_ppf = get_ppfs(𝓜, 𝒟, rng=utils.get_rng("synth ppf - compare - models"))
ppf_panels[physmodel, L, m] = \
hv.Curve((Φarr, mixed_ppf(Φarr)), kdims="Φ", vdims="PPF", label="mixed") \
* hv.Curve((Φarr, synth_ppf(Φarr)), kdims="Φ", vdims="PPF", label="synth")
scatter_panels[physmodel, L, m] = \
hv.Scatter(no_units(𝓜.gen_data(D.L, λmin=D.λmin, λmax=D.λmax)), kdims="λ", vdims="B", label="model") \
* hv.Scatter(no_units(𝒟.get_data()), kdims="λ", vdims="B", label="true")
Note
Unbiased estimators should “flip-flop” between equivalent models. To show this, we change the seed for each dataset, i.e. for each different \(L\). (We don’t change for different \(m\), because for a given dataset size, all models must be fitted to the same data.)
Note
elpd
is reported with “deviance” scaling, so that it is comparable with other information criteria.
Compute MDL criteria#
Note
Because the MDL calculations are so onerous, it is worth manually managing our own cache under data/*-model-compare
to ensure we never trigger a run accidentally, or delete an old run accidentally.
import multiprocessing as mp
import psutil
def compute_MDL(phys_L_m, 𝒟=𝒟):
physmodel, L, m = phys_L_m
𝒟 = replace(𝒟, L=L, purpose=f"compare models - nested -- data -- {L}")
θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
_Q = Q[physmodel]
try:
mdl_score = MDL_criterion(_Q, θˆ, 𝒟, physmodel, m)
except ValueError:
# If computations for a particular set of args fails, don’t let the exception terminate other computations
# Instead store a flag value, so we can diagnose this later
mdl_score = None
return physmodel, L, m, mdl_score
MDL_scores = {}
arglist = [(phys,L,m) for phys in ["Planck", "Rayleigh-Jeans"] for L in Llist for m in mlist
if L <= 2**10 # Computations too slow otherwise
]
dellist = [] # indices of args which have already been computed, and can thus be removed from arglist
with shelve.open(str(config.paths.data/"MDL-model-compare")) as shelf:
if refresh_shelves:
shelf.clear()
else:
for i, (phys,L,m) in enumerate(arglist):
shelfkey = f"{phys} - {L} - {m}"
if (mdl_score := shelf.get(shelfkey)) is not None:
MDL_scores["MDL", phys,m,L] = mdl_score
dellist.append(i)
for i in sorted(dellist, reverse=True):
del arglist[i]
if arglist and do_long_computations:
cores = min(psutil.cpu_count(logical=False), len(arglist))
with mp.Pool(cores) as pool:
for phys, L, m, mdl_score in tqdm(pool.imap_unordered(compute_MDL, arglist),
desc="MDL scores", total=len(arglist), miniters=1):
# By opening & closing the shelf every time we ensure a) that results are written immediately and b) that we don’t lock the shelf
# TODO: Implement timeout & retry in case the shelf is locked by another process
with shelve.open(str(config.paths.data/"MDL-model-compare")) as shelf:
shelfkey = f"{phys} - {L} - {m}"
shelf[shelfkey] = MDL_scores[phys, m, L] = mdl_score
Compute other criteria#
# scores = {crit: {m: [] for m in range(4)}
# for crit in ["AIC", "BIC", "EMD"]}
scores = {}
#scores = []
#_tqdm = lambda x, *a, **kw: x
crits_long = ["R", "AIC", "DIC", "BIC", "logE", "elpd"]
crits_short = []#["elpd"]
with shelve.open(str(config.paths.data/"criteria-model-compare")) as shelf_scores:
for physmodel in ["Planck", "Rayleigh-Jeans"]:
print(physmodel)
progbar_L = tqdm(desc="L", total=len(Llist), miniters=1)
progbar_m = tqdm(desc="m", total=len(mlist), miniters=1)
for L in Llist:
progbar_m.reset()
for m in mlist:
#tqdm.write(f"{L=}, {m=}")
crits = (crits_long + crits_short) if L in Llist_short else crits_long
keys = [(C, physmodel, m, L) for C in crits]
strkeys = {key: ",".join(str(k) for k in key) for key in keys}
_scores = {key: shelf_scores.get(strkeys[key]) for key in keys}
missing = {C for (C, *_), s in _scores.items() if s is None}
subkey = (physmodel, m, L)
if missing:
π = π_phys_loose & π_coeffs_loose(m)
_𝒟 = replace(𝒟, L=L, purpose=f"compare models - nested -- data -- {L}")
_Q = Q[physmodel]
if missing & {"R", "AIC", "DIC", "BIC"}:
θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
else:
θˆ = None
if "R" in missing: _scores[(key:=("R" , *subkey))] = shelf_scores[strkeys[key]] = R(_Q, θˆ, _𝒟)
if "AIC" in missing: _scores[(key:=("AIC" , *subkey))] = shelf_scores[strkeys[key]] = AIC(_Q, θˆ, _𝒟)
if "DIC" in missing: _scores[(key:=("DIC" , *subkey))] = shelf_scores[strkeys[key]] = DIC(_Q, θˆ, π, _𝒟)
if "BIC" in missing: _scores[(key:=("BIC" , *subkey))] = shelf_scores[strkeys[key]] = BIC(_Q, θˆ, _𝒟)
if "logE" in missing: _scores[(key:=("logE", *subkey))] = shelf_scores[strkeys[key]] = logℰ(_Q, π, _𝒟)
if "elpd" in missing: _scores[(key:=("elpd", *subkey))] = shelf_scores[strkeys[key]] = elpd(_Q, π, _𝒟, method="waic", scale="log")
#if L in Llist_short:
# #scores[("MDL", physmodel, m, L)] = MDL_criterion(_Q, θˆ, _𝒟, m)
# scores[("elpd", physmodel, m, L)], scores[("elpd_se", physmodel, m, L)] = elpd(_Q, π, _𝒟, method="waic", scale="log")
# #𝓜 = CandidateModel(physmodel, *θˆ)
# #mixed_ppf, synth_ppf = get_ppfs(𝓜, _𝒟, rng=utils.get_rng("synth ppf - compare - models"))
del π, _𝒟, _Q, θˆ
scores[("R", *subkey)] = _scores[("R", *subkey)]
scores[("AIC", *subkey)] = _scores[("AIC", *subkey)]
scores[("DIC", *subkey)] = _scores[("DIC", *subkey)]
scores[("BIC", *subkey)] = _scores[("BIC", *subkey)]
scores[("logE", *subkey)], scores[("logE_se", *subkey)] = _scores[("logE", *subkey)]
if L in ("elpd", *subkey) in _scores:
scores[("elpd", *subkey)], scores[("elpd_se", *subkey)] = _scores[("elpd", *subkey)]
progbar_m.update()
progbar_L.update()
Compute \(R\)-distributions#
def compute_Rdist(phys_L_m, 𝒟=𝒟):
physmodel, L, m = phys_L_m
𝒟 = replace(𝒟, L=L, purpose=f"compare models - nested -- data -- {L}")
θˆ = fitΘ(𝒟.get_data(), physmodel, m=m)
𝓜 = CandidateModel(physmodel, *θˆ)
#scores[("MDL", m, L)] = MDL_criterion(Q, θ^, 𝒟, m)
mixed_ppf, synth_ppf = get_ppfs(𝓜, 𝒟, rng=utils.get_rng("synth ppf - compare - models", "nested", "EMD", L, m))
return physmodel, L, m, draw_R_samples(mixed_ppf, synth_ppf, c=0.5, max_M=2**14)
Rdists = {}
arglist = [(phys,L,m) for phys in ["Planck", "Rayleigh-Jeans"] for L in Llist_short for m in mlist]
dellist = [] # indices of args which have already been computed, and can thus be removed from arglist
with shelve.open(str(config.paths.data/"Rdists-model-compare")) as shelf:
for i, (phys,L,m) in enumerate(arglist):
shelfkey = f"{phys} - {L} - {m}"
Rsamples = shelf.get(shelfkey)
if Rsamples is not None:
Rdists[phys,m,L] = Rsamples
dellist.append(i)
for i in sorted(dellist, reverse=True):
del arglist[i]
if arglist:
cores = min(psutil.cpu_count(logical=False), len(arglist))
with mp.Pool(cores) as pool:
for phys, L, m, Rsamples in tqdm(pool.imap_unordered(compute_Rdist, arglist),
desc="R distributions", total=len(arglist)):
shelfkey = f"{phys} - {L} - {m}"
shelf[shelfkey] = Rdists[phys, m, L] = Rsamples
df = pd.DataFrame([(*k, v) for k,v in (scores|MDL_scores).items()],
columns=["criterion", "phys", "m", "L", "score"])
df = df.pivot(index=["L"], columns=["criterion", "m", "phys"], values=["score"]) \
.droplevel(0, axis="columns")
df
criterion | R | AIC | DIC | BIC | logE | logE_se | elpd | elpd_se | R | AIC | ... | elpd | elpd_se | MDL | |||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
m | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | ... | 6 | 6 | 0 | 2 | 4 | 6 | 0 | 2 | 4 | 6 |
phys | Planck | Planck | Planck | Planck | Planck | Planck | Planck | Planck | Planck | Planck | ... | Rayleigh-Jeans | Rayleigh-Jeans | Planck | Planck | Planck | Planck | Rayleigh-Jeans | Rayleigh-Jeans | Rayleigh-Jeans | Rayleigh-Jeans |
L | |||||||||||||||||||||
64 | 0.630972 | 34.212413 | 34.099891 | 38.530179 | -23.763575 | 0.089247 | -15.520759 | 6.170950 | 0.635304 | 37.446421 | ... | -308.729600 | 83.927159 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
76 | 0.590268 | 74.693873 | 74.794400 | 79.355339 | -43.279302 | 0.088922 | -35.056534 | 5.935170 | 0.606385 | 78.441006 | ... | -410.527259 | 99.914677 | 145.818206 | 251.072729 | 253.645187 | 261.059533 | 253.735672 | 265.810171 | 270.504725 | 275.633965 |
90 | 0.590268 | 63.787197 | 63.923645 | 68.786816 | -38.474928 | 0.090904 | -29.924548 | 7.074875 | 0.606385 | 67.595652 | ... | -393.532347 | 95.722918 | 159.669791 | 283.960879 | 289.550367 | 298.856449 | 296.208005 | 304.965702 | 308.431948 | 312.555011 |
107 | 0.590268 | 93.090067 | 93.058435 | 98.435725 | -53.120108 | 0.091431 | -44.500003 | 8.308889 | 0.606385 | 97.067552 | ... | -513.638107 | 109.594226 | 200.248460 | 344.733875 | 351.005956 | 359.291008 | 357.443339 | 366.390585 | 372.302910 | 379.484959 |
128 | 0.590268 | 106.762285 | 106.931420 | 112.466346 | -60.612310 | 0.092781 | -51.745718 | 8.390453 | 0.606385 | 112.582422 | ... | -597.582195 | 117.332536 | 237.708262 | 410.966339 | 416.901510 | 422.989940 | 431.198851 | 435.528584 | 435.949517 | 440.663500 |
152 | 0.590268 | 126.749901 | 126.643467 | 132.797662 | -70.649692 | 0.093250 | -61.704673 | 8.940944 | 0.606385 | 128.474031 | ... | -766.633382 | 131.917327 | 281.624622 | 486.405564 | 494.847813 | 507.664457 | 504.804909 | 521.813047 | 521.824407 | 528.667014 |
181 | 0.590268 | 147.995945 | 148.133914 | 154.392940 | -80.419961 | 0.093936 | -71.316444 | 9.519865 | 0.606385 | 159.259083 | ... | -793.708423 | 132.163846 | 333.279752 | 572.890648 | 587.451006 | 597.327621 | 596.747340 | 616.611591 | 622.158419 | 623.672471 |
215 | 0.590268 | 182.903371 | 182.994254 | 189.644647 | -98.791659 | 0.094568 | -89.601963 | 11.353392 | 0.606385 | 187.362177 | ... | -966.294704 | 145.378500 | 401.113934 | 690.851795 | 705.100029 | 713.599367 | 713.752588 | 733.011338 | 733.178870 | 735.227082 |
256 | 0.590268 | 217.951592 | 217.952916 | 225.041947 | -113.383919 | 0.095355 | -104.063599 | 13.162413 | 0.606385 | 223.539403 | ... | -1284.943342 | 175.440460 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
304 | 0.590268 | 217.990374 | 217.854283 | 225.424430 | -115.791232 | 0.097920 | -105.966137 | 12.741243 | 0.606385 | 226.452395 | ... | -1388.079441 | 171.260925 | 546.264487 | 947.635087 | 968.084218 | 983.712663 | 1003.555774 | 1028.787487 | 1031.182846 | 1031.767008 |
362 | 0.590268 | 333.414407 | 333.461190 | 341.197696 | -172.868606 | 0.096922 | -163.231930 | 14.316732 | 0.606385 | 343.690404 | ... | -1640.577967 | 176.041484 | 688.233567 | 1163.105387 | 1190.005217 | 1207.655251 | 1193.905209 | 1236.455671 | 1253.690735 | 1260.298738 |
430 | 0.590268 | 389.178881 | 389.315344 | 397.306451 | -201.431139 | 0.099242 | -191.359164 | 15.659166 | 0.606385 | 407.677396 | ... | -2139.883813 | 209.656286 | 814.540922 | 1371.647368 | 1401.453053 | 1422.242105 | 1413.520312 | 1453.415892 | 1467.929369 | 1478.798829 |
512 | 0.590268 | 369.577805 | 369.393745 | 378.054455 | -191.519205 | 0.100085 | -181.319885 | 17.556974 | 0.606385 | 372.004981 | ... | -2317.411669 | 219.170199 | 923.339660 | 1584.383012 | 1623.831553 | 1653.403065 | 1697.402223 | 1703.701715 | 1719.385035 | 1737.511908 |
608 | 0.590268 | 480.907840 | 480.866468 | 489.728190 | -245.189913 | 0.101918 | -234.628295 | 18.912407 | 0.606385 | 487.623812 | ... | -2950.879791 | 251.512690 | 1116.914686 | 1905.810839 | 1946.096864 | 1981.328390 | 2002.976233 | 2043.816195 | 2044.678672 | 2066.932739 |
724 | 0.590268 | 526.174407 | 526.214624 | 535.343990 | -263.967185 | 0.101589 | -253.470697 | 23.289231 | 0.606385 | 534.783330 | ... | -3421.283716 | 278.362624 | 1303.252426 | 2231.906062 | 2292.637024 | 2327.147044 | 2385.355880 | 2405.120929 | 2422.105215 | 2431.659940 |
861 | 0.590268 | 676.001813 | 675.926759 | 685.518002 | -345.029696 | 0.102294 | -334.396497 | 22.755336 | 0.606385 | 686.972846 | ... | -4439.554636 | 304.233163 | 1584.722994 | 2678.501867 | 2748.802783 | 2795.240825 | 2861.590420 | 2871.903842 | 2895.447354 | 2905.372485 |
1024 | 0.590268 | 797.005646 | 797.053144 | 806.868590 | -403.366233 | 0.103106 | -392.556843 | 24.811266 | 0.606385 | 813.906939 | ... | -5062.862907 | 317.800180 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1217 | 0.590268 | 965.547357 | 965.689909 | 975.755646 | -486.146521 | 0.102893 | -475.357856 | 26.464297 | 0.606385 | 991.463982 | ... | -5572.303122 | 323.451579 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1448 | 0.590268 | 1168.254348 | 1168.100968 | 1178.810225 | -586.427307 | 0.103941 | -575.454907 | 29.416060 | 0.606385 | 1205.970035 | ... | -6931.336305 | 405.671523 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1722 | 0.590268 | 1315.818874 | 1315.674767 | 1326.721357 | -655.986223 | 0.105830 | -644.623474 | 31.521313 | 0.606385 | 1347.522375 | ... | -7717.987053 | 400.589072 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2048 | 0.590268 | 1591.982806 | 1591.956635 | 1603.232044 | -800.800418 | 0.107504 | -789.088417 | 34.073232 | 0.606385 | 1639.752945 | ... | -9995.485396 | 439.348871 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2435 | 0.590268 | 1897.286722 | 1897.268913 | 1908.882126 | -951.066446 | 0.106863 | -939.484573 | 38.532618 | 0.606385 | 1920.949141 | ... | -10769.259172 | 449.601008 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2896 | 0.590268 | 2303.465136 | 2303.586378 | 2315.407308 | -1150.158621 | 0.109348 | -1138.031372 | 40.746489 | 0.606385 | 2343.460546 | ... | -14412.216490 | 520.935334 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3444 | 0.590268 | 2668.068307 | 2668.044462 | 2680.357085 | -1335.238423 | 0.109623 | -1323.082958 | 44.591955 | 0.606385 | 2714.944900 | ... | -16655.095014 | 609.238300 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4096 | 0.590268 | 3333.769198 | 3333.736458 | 3346.404730 | -1653.628551 | 0.108972 | -1641.603046 | 48.380477 | 0.606385 | 3376.738766 | ... | -18302.979627 | 590.357457 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
25 rows × 72 columns
Computed columns#
For comparability, we take the negative log evidence and negative elpd, so that for each criterion, smaller is better.
We also display criteria with respect to a reference column (the one corresponding to the model with \(m=0\)).
Show code cell source
def add_Δ_cols(df: pd.DataFrame, vdim, ref_col=None):
"""
Add columns to `df` corresponding to the values of vdim subtracted by the reference column.
By default, the `ref_col` is (`vdim`, 0).
The new columns will have the criterion name 'Δ {vdim}'.
"""
Δvdim = f"Δ {vdim}"
if Δvdim in df.columns:
# Δ cols have already been added
return df
if ref_col is None: ref_col = (vdim, 0)
cols = df[vdim].columns
Δdf = df[vdim].sub(df[ref_col], axis="index")
cols_df = cols.to_frame()
cols_df.insert(0, "criterion", Δvdim)
#Δdf.columns = pd.MultiIndex.from_arrays([[Δvdim]*len(cols), cols], names=["criterion", *cols.names])
Δdf.columns = pd.MultiIndex.from_frame(cols_df)
return pd.concat((df, Δdf), axis="columns")
def add_neg_cols(df: pd.DataFrame, vdim):
"""
Add columns to `df` corresponding to the negation of the subtracted by the reference column.
The new columns will have the criterion name '-{vdim}'.
"""
neg_vdim = f"-{vdim}"
if neg_vdim in df.columns:
# neg cols have already been added
return df
cols = df[vdim].columns
neg_df = -df[vdim]
cols_df = cols.to_frame()
cols_df.insert(0, "criterion", neg_vdim)
#neg_df.columns = pd.MultiIndex.from_arrays([[neg_vdim]*len(cols), cols], names=["criterion", *cols.names])
neg_df.columns = pd.MultiIndex.from_frame(cols_df)
return pd.concat((df, neg_df), axis="columns")
for vdim in ["BIC", "AIC", "logE", "elpd", "MDL"]:
df = add_Δ_cols(df, vdim).sort_index()
for vdim in ["Δ logE", "Δ elpd"]:
df = add_neg_cols(df, vdim).sort_index()
Standard error on \(\log \eE\) is negligible:
df["logE_se"].max(None)
np.float64(0.3755792683276744)
Criteria comparison grid#
Show code cell source
def plot_Rdists(Rdists: dict,
orientation: Literal["horizontal","vertical"]="horizontal",
offset: bool=False,
ref_m=0,
width=0.25,
ylabel="$R$ distribution (EMD)",
colors=colors.candidates.values,
alpha=0.65,
linewidth=1,
ax=None):
"""
`ref_m`: The grey reference line will be the mean of this distribution
`width`: width or height of distributions. Scaled with L so they are visually consistent.
"""
assert orientation[:3] in {"hor", "ver"}
vertical = bool(orientation.startswith("ver"))
assert isinstance(colors, list)
if offset is True:
offset = 0.025
if ax is None:
# Avoid using the `plt` interface to avoid memory leaks.
# See https://panel.holoviz.org/reference/panes/Matplotlib.html#using-the-matplotlib-pyplot-interface
fig = mpl.figure.Figure(figsize=(4,2))
ax = fig.subplots()
else:
fig = None
mlist = sorted({m for (m, L) in Rdists})
Llist = sorted({L for (m, L) in Rdists})
data = []
Lvals = set() # L values before any offset
positions = []
widths = []
#n_m = len(np.unique(df.index.get_level_values("m")))
#for (L,m), Rvals in df.groupby(["L", "m"]):
# NB: Color cycle logic assumes the fast loop is over m
for i, (L, m) in enumerate(product(Llist, mlist)):
Rvals = Rdists[m,L]
data.append(Rvals.flatten())
if offset:
L = L * 10**(offset*(i-len(mlist)/2))
positions.append( L )
widths.append( width * L )
if vertical:
ax.axvline(Rdists[ref_m, Llist[-1]].mean(), color="#AAAAAA", alpha=0.15, zorder=-1)
else:
ax.axhline(Rdists[ref_m, Llist[-1]].mean(), color="#AAAAAA", alpha=0.15, zorder=-1)
# NB: A horizontal plot (L along x axis) needs *vertical* distributions
parts = ax.violinplot(data, positions, widths=widths, orientation="horizontal" if vertical else "vertical",
showextrema=False, showmeans=False, side="low")
for i, (m, polycoll, c) in enumerate(zip(cycle(mlist), parts["bodies"], cycle(colors[:len(mlist)]))):
polycoll.set_facecolor(c)
polycoll.set_edgecolor("none")
polycoll.set_alpha(alpha=alpha)
# AFAICT, it is not possible to set separate alpha values for face and edge colors (PolyCollection has its own alpha setting, which overrides the alpha of both facecolor and edgecolor)
# Moreover, it draws the line down the middle of the violin, which in our case adds a useless flat line to the bottom of each plot.
# So instead we extract the edge path and draw it ourselves
path = polycoll._paths[0] # Path is formed of two parts: first the curved edge, then the straight line in the middle of the violin
edge = mpl.patches.PathPatch(mpl.path.Path(path.vertices[:len(path)//2], path.codes[:len(path)//2]), # Draw only the curved part
edgecolor=c, linewidth=linewidth,
label=str(m) if i < len(mlist) else "_") # Only apply one label for each m value, otherwise we can duplicate entries in the legend
ax.add_patch(edge)
if vertical:
kaxis = ax.yaxis
vaxis = ax.xaxis
ax.invert_yaxis()
else:
kaxis = ax.xaxis
vaxis = ax.yaxis
kaxis._set_axes_scale("log")
kaxis.minorticks_off()
kaxis._set_tick_locations(Llist)
kaxis.set_major_formatter(mpl.ticker.FuncFormatter(pow2_formatter))
kaxis.set_label_text("$L$")
vaxis.set_label_text(ylabel)
return fig
Basic analysis#
Show code cell source
_Rdists = {(m, L): Rdist for (phys, m, L), Rdist in Rdists.items() if phys == "Planck"}
pn.pane.Matplotlib(
plot_Rdists(_Rdists, "vert", offset=0.05, width=1, colors=discrete_rainbow_scheme(len(mlist)), alpha=.65),
tight=True
)
Note
Even with the Planck model, there is still some small level of mismatch because the candidates use Gaussian noise, while the data use Poisson. This is almost certainly why higher-order polynomials obtain tighter \(R\)-distributions.
Show code cell source
_Rdists = {(m, L): Rdist for (phys, m, L), Rdist in Rdists.items() if phys == "Rayleigh-Jeans"}
pn.pane.Matplotlib(
plot_Rdists(_Rdists, "vert", offset=0.05, ref_m=6, width=1, colors=discrete_rainbow_scheme(len(mlist)), alpha=.65),
tight=True
)
Show code cell source
_Rdists = {(phys, L): Rdist for (phys, m, L), Rdist in Rdists.items() if m == 0}
pn.pane.Matplotlib(
plot_Rdists(_Rdists, "vert", offset=0, ref_m="Planck", width=1, colors=discrete_rainbow_scheme(2), alpha=.65),
tight=True
)
Planck is favoured by \(\Bemd{}\), although the difference is less when we add polynomials (which can partially compensate for the incorrect Rayleigh-Jeans model).
Show code cell source
P_index = pd.MultiIndex.from_tuples([("Planck", m_P) for m_P in mlist], names=["phys model", "m"])
RJ_index = pd.MultiIndex.from_tuples([("Rayleigh-Jeans", m_P) for m_P in mlist], name=["phys model", "m"])
pd.DataFrame(
[[np.less.outer(Rdists[(*P_model, 2**12)], Rdists[(*RJ_model, 2**12)]).mean()
for RJ_model in RJ_index] for P_model in P_index],
index=P_index, columns=RJ_index
).style.set_caption(r"$B^{\mathrm{EMD}}$ between Planck & Rayleigh-Jeans models")
phys model | Rayleigh-Jeans | ||||
---|---|---|---|---|---|
m | 0 | 2 | 4 | 6 | |
phys model | m | ||||
Planck | 0 | 0.981366 | 0.778191 | 0.883850 | 0.753235 |
2 | 0.966974 | 0.731175 | 0.723118 | 0.669835 | |
4 | 0.988357 | 0.808201 | 0.942017 | 0.801147 | |
6 | 0.988236 | 0.801918 | 0.975159 | 0.782227 |
Final comparison grid figure#
Show code cell source
w = 30.548
fig = mpl.figure.Figure(figsize=((6*w + 50, 3*40)*ureg.mm).to("in").m)
axes = fig.subplots(3,6)
## Nested models with true (Planck + poly) ##
axes_row = axes[0]
color_cycle = cycler(color=discrete_rainbow_scheme(len(mlist)))
for ax in axes_row:
ax.set_prop_cycle(color_cycle)
_df = df.loc[:,(np.s_[:],np.s_[:],"Planck")].droplevel("phys", axis="columns")
_Rdists = {(m, L): Rd for (phys, m, L), Rd in Rdists.items() if phys=="Planck"}
plot_curve(_df, "Δ BIC" , None, "m", "vert", ax=axes_row[0])
plot_curve(_df, "-Δ logE", "logE_se", "m", "vert", ax=axes_row[1], error_style="fill")
# Server issues corrupted the results for L=256 and L=64. Don’t drop L=64 because it would slightly shift the axis limits
plot_curve(_df.drop([256]), "Δ MDL" , None, "m", "vert", ax=axes_row[2])
plot_curve(_df, "Δ AIC" , None, "m", "vert", ax=axes_row[3])
# One doesn’t see anything when plotting the elpd as curves, so plot only the error bars instead
plot_curve(_df.loc[Llist_short], "Δ elpd", "elpd_se", "m", "vert", ax=axes_row[4], error_style="bar", linestyle="", offset=0.025)
plot_Rdists(_Rdists, "vert", ax=axes_row[5], offset=0.05, width=1, colors=color_cycle.by_key()["color"], alpha=.15)
axes_row[-1].legend(loc="upper left", bbox_to_anchor=(1,1))
## Nested models without true (Rayleigh-Jeans + poly) ##
axes_row = axes[1]
color_cycle = cycler(color=discrete_rainbow_scheme(len(mlist)))
for ax in axes_row:
ax.set_prop_cycle(color_cycle)
_df = df.loc[:,(np.s_[:],np.s_[:],"Rayleigh-Jeans")].droplevel("phys", axis="columns")
_Rdists = {(m, L): Rd for (phys, m, L), Rd in Rdists.items() if phys=="Rayleigh-Jeans"}
plot_curve(_df, "Δ BIC" , None, "m", "vert", ax=axes_row[0])
plot_curve(_df, "-Δ logE", "logE_se", "m", "vert", ax=axes_row[1], error_style="fill")
plot_curve(_df.drop([256]), "Δ MDL" , None, "m", "vert", ax=axes_row[2])
plot_curve(_df, "Δ AIC" , None, "m", "vert", ax=axes_row[3])
plot_curve(_df, "-Δ elpd", "elpd_se", "m", "vert", ax=axes_row[4], error_style="fill")
plot_Rdists(_Rdists, "vert", ax=axes_row[5], offset=0.05, width=1, colors=color_cycle.by_key()["color"], alpha=.15)
## Structurally different models (Planck vs Rayleigh-Jeans, m=0) ##
axes_row = axes[2]
color_cycle = cycler(color=discrete_rainbow_scheme(2))
for ax in axes_row:
ax.set_prop_cycle(color_cycle)
_df = df.loc[:,(np.s_[:],0)].droplevel("m", axis="columns")
_Rdists = {(phys, L): Rd for (phys, m, L), Rd in Rdists.items() if m==0}
# Recompute relative values wrt to Planck, m=0
_df.drop([C for C in df.columns.unique("criterion") if C[0] in "-Δ"],
axis="columns", inplace=True)
for vdim in ["BIC", "AIC", "logE", "elpd", "MDL"]:
_df = add_Δ_cols(_df, vdim, ref_col=(vdim, "Planck")).sort_index()
for vdim in ["Δ logE", "Δ elpd"]:
_df = add_neg_cols(_df, vdim).sort_index()
# Now do the plots
plot_curve(_df, "Δ BIC" , None, "phys", "vert", ax=axes_row[0])
plot_curve(_df, "-Δ logE", "logE_se", "phys", "vert", ax=axes_row[1], error_style="bar")
plot_curve(_df.drop([256]), "Δ MDL" , None, "phys", "vert", ax=axes_row[2])
plot_curve(_df, "Δ AIC" , None, "phys", "vert", ax=axes_row[3])
plot_curve(_df, "-Δ elpd", "elpd_se", "phys", "vert", ax=axes_row[4], error_style="fill")
plot_Rdists(_Rdists, "vert", ax=axes_row[5], offset=0, ref_m="Planck", width=1, colors=color_cycle.by_key()["color"], alpha=.15)
axes_row[-1].legend(loc="upper left", bbox_to_anchor=(1,1))
for ax in axes[:-1,:].flat:
ax.set_xlabel(None)
# ax.xaxis.set_visible(False)
for ax in axes[:,1:].flat:
ax.yaxis.set_visible(False)
ax.spines["left"].set_visible(False)
for ax in axes[:,0]:
ax.set_yticks(Llist_short)
pn.pane.Matplotlib(fig, tight=True, width=800)
/tmp/ipykernel_55955/3783345752.py:44: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.
_df.drop([C for C in df.columns.unique("criterion") if C[0] in "-Δ"],
2025-07-19 git: emd-paper main #af8cfb86