What do you do when you don't know what's going on?
What do you do when you don't know what's going on?
A Bayesian nonparametric model is a Bayesian model on an infinite-dimensional parameter space.
dummy dummy
A Gaussian process regression is a Bayesian linear regression with infinitely many basis expansions.
library('rjags') fit_model = function(ms, y, X, n.iter = 2000) { dat = list('y' = y, 'X' = X, 'n' = length(y), 'p' = ncol(X)) model = jags.model(textConnection(ms), dat, n.chains = 3, quiet = TRUE) coda.samples(model, variable.names = 'beta', n.iter = n.iter) } extract_coefs = function(samples) apply(as.matrix(samples), 2, mean) expand = function(x, powers) sapply(powers, function(i) x^i) plot_fit = function(fn) { ggplot(dat, aes(x = Timebin, y = basePupil)) + theme_bw() + geom_line(colour = 'red1') + geom_point() + stat_function(fun = fn, colour = 'darkblue') }
regression = ' model { sigma ~ dunif(0, 100) tau = pow(sigma, -2) for (i in 1:p) { beta[i] ~ dnorm(0, .001) } for (i in 1:n) { y[i] ~ dnorm(mu[i], tau) mu[i] = inprod(X[i, ], beta[]) } }'
\[ RSS = \sum_{i=1}^n (y_i - f(x_i))^2 \]
I think we are onto something.
Are we explaining anything?
\[ PRSS = \sum_{i=1}^N (y_i - f(x_i) \beta)^2 + \lambda \int f''(t)^2 \mathrm{d}t \]
library('mgcv'); m = gam(basePupil ~ s(Timebin, k = 10), data = dat); plot(m)
\[ p(\beta|y, X) = \frac{\pi(\beta)\prod_{i=1}^n p(y_i|X, \beta)}{\int \pi(\beta)\prod_{i=1}^n p(y_i|X, \beta) \mathrm{d}\beta} \]
\[
\begin{align*}
\beta &\sim \mathcal{N}(m_0, \Sigma_p) \\
y|\beta, X &\sim \mathcal{N}(X\beta, C := I\sigma^2) \\
\beta|y, X &\sim \mathcal{N}(\mu, A^{-1})
\end{align*}
\]
\[ \begin{align*} A^{-1} &= nC^{-1} + \Sigma_p^{-1} \\[1.5ex] \mu &= A^{-1} (nC^{-1} \bar{\boldsymbol{y}} + \Sigma_p^{-1} m_0) \end{align*} \]
(for gory details, see Rasmussen & Williams, 2006; p.12)
A Gaussian process is an infinite dimensional multivariate Normal distribution.
\[ \begin{align*} \begin{pmatrix} X_1 \\ X_2 \\ X_3 \end{pmatrix} &\sim N \begin{bmatrix} \begin{pmatrix} \mu_1 \\ \mu_2 \\ \mu_3 \end{pmatrix}, \begin{pmatrix} \Sigma_{11} & \Sigma_{12} & \Sigma_{13} \\ \Sigma_{21} & \Sigma_{22} & \Sigma_{23} \\ \Sigma_{31} & \Sigma_{32} & \Sigma_{33} \end{pmatrix} \end{bmatrix} \\[2ex] \begin{pmatrix} X_1 \\ X_2 \end{pmatrix} &\sim N \begin{bmatrix} \begin{pmatrix} \mu_1 \\ \mu_2 \end{pmatrix}, \begin{pmatrix} \Sigma_{11} & \Sigma_{12} \\ \Sigma_{21} & \Sigma_{22} \end{pmatrix} \end{bmatrix} \\[2ex] X_1 &\sim N(\mu_1, \Sigma_{11}) \end{align*} \]
estimate them from the data
\[
f(x) \sim \mathcal{GP}(m(x), k(x, x'))
\]
function is just a very long vector!
gp = ' data { int N1; int N2; vector[N1] y1; vector[N1] x1; vector[N2] x2; } transformed data { int<lower=1> N; vector[N1 + N2] x; vector[N1 + N2] mu; N <- N1 + N2; for (i in 1:N) mu[i] <- 0; for (i in 1:N1) x[i] <- x1[i]; for (i in 1:N2) x[N1 + i] <- x2[i]; } ... '
'parameters { vector[N2] y2; real<lower=0> eta_sq; real<lower=0> rho_sq; real<lower=0> sigma_sq; } model { vector[N] y; matrix[N, N] Sigma; eta_sq ~ cauchy(0,5); rho_sq ~ cauchy(0,5); sigma_sq ~ cauchy(0, 5); for (i in 1:N1) y[i] <- y1[i]; for (i in 1:N2) y[N1 + i] <- y2[i]; for (i in 1:N) { for (j in 1:N) { Sigma[i,j] <- eta_sq * exp(-rho_sq * pow(x[i] - x[j], 2)) + if_else(i == j, sigma_sq, 0.0); } } y ~ multi_normal(mu, Sigma); }'
library('rstan') library('parallel') # maybe find a better data set?? N1 = 21 N = length(y) N2 = N - N1 stan_dat = list('y1' = y[1:N1], 'y2' = y[(N1+1):N], 'x1' = X[1:N1], 'x2' = X[(N1+1):N], 'N' = N, 'N1' = N1, 'N2' = N2) compfit = stan(model_code = gp, dat = stan_dat, chains = 0) sflist = mclapply(1:4, mc.cores = 4, function(i) stan(fit = compfit, data = stan_dat, seed = runif(1), chains = 1, chain_id = i, refresh = -1, iter = 8000)) fit = sflist2stanfit(sflist)
ypred = apply(rstan::extract(fit, 'y2')[['y2']], 2, mean) dat[(N1+1):N, 2] = ypred # beware of the high variance! ggplot(dat, aes(x = Timebin, y = basePupil)) + theme_bw() + geom_point() + geom_line(colour = 'blue') + ggtitle('Pupil dilation over time')
dummy dummy
(Kruschke, chapter 9.2.4)
library('ggmcmc') library('mixtools') fit_mixture = function(ms, y, k, n.iter = 2000) { dat = list('y' = y, 'n' = length(y), 'alpha' = rep(1, k), 'k' = k) model = jags.model(textConnection(ms), dat, n.chains = 3, quiet = TRUE) update(model, n.iter / 4) coda.samples(model, variable.names = c('mu', 'sigma'), n.iter = n.iter) }
mix = ' model { theta[1:k] ~ ddirch(alpha[]) # mixing distribution # class conditional Gaussians for (i in 1:k) { mu[i] ~ dnorm(0, .0001) sigma[i] ~ dunif(0, 100) tau[i] = pow(sigma[i], -2) } # likelihood for (i in 1:n) { c[i] ~ dcat(theta[1:k]) y[i] ~ dnorm(mu[c[i]], tau[c[i]]) } }'
ss = ggs(fit_mixture(mix, faithful$waiting, 2)) ggs_density(ss)
\[ p(p_i) = \alpha f_{H_0} + (1 - \alpha) f_{H_1} \]
x = replicate(10000, rnorm(100)) ps = apply(x, 2, function(dat) t.test(dat)[['p.value']]) hist(ps)
\[ p(c_n = k|c_{1:n-1}) = \begin{cases} \frac{m_k}{n - 1 + \alpha} & \mbox{if $k \le K_{+}$} \\ \frac{\alpha}{n - 1 + \alpha} & \mbox{otherwise} \end{cases} \]
dummy dummy
\[ \begin{align*} G &\sim DP(\cdot|G_0, \alpha) \\[1.5ex] \theta_i &\sim G(\cdot) \\[1.5ex] \Phi^{-1}(p_i^{H_1}) &\sim N(\cdot|\theta_i)_{T(, -1.64485)} \end{align*} \]
bayesmix = ' model{ alpha ~ dunif(.5,7) # hyperprior phi ~ dbeta(1,1) # mixing weight H0 & H1 for (i in 1:n) { ind[i] ~ dbern(phi) z[i] ~ dcat(proportionsStick[]) z1[i] = z[i] + 1 z2[i] = ifelse(ind[i] == 1, 1, z1[i]) qp[i] ~ dnorm(mu[z2[i]], lambda[z2[i]])T(,-1.64485) } ... }'
' for (j in 1:(nmaxclust - 1)) { prop_remaining[j] ~ dbeta(1, alpha) } prop_stick[1] = prop_remaining[1] for (j in 2:(nmaxclust - 1)) { prop_stick[j] = prop_remaining[j] * prod(1 - prop_remaining[1:(j-1)]) } prop_stick[nmaxclust] = 1 - sum(prop_stick[1:(nmaxclust - 1)]) for (j in 1:nmaxclust) { mu_h1[j] ~ dnorm(0, 1)T(-6,0) lambda_h1[j] ~ dgamma(0.001, 0.001)T(,1) } mu[1] = 0 lambda[1] = 1 mu[2:(nmaxclust + 1)] = mu_h1 lambda[2:(nmaxclust + 1)] = lambda_h1 }'