Sometimes, I run some pretty heavy duty models, including several latent variables and large datasets. I am also a perfectionist and so I insist on simulating and testing these models extensively.
To improve my efficiency, I can run Jags in parallel using a variety of tools, thereby saving considerable time.
Running Jags in parallel
Here is a quick script I wrote to show a few different methods and how their timing compares on my (new and very fast) laptop.
# startup
rm(list = ls())
require(dclone)
require(rjags)
require(foreach)
require(doParallel)
require(snow)
require(snowfall)
<- 3
n.cores <- vector('numeric', 6)
timings
# for easy plotting
require(ggplot2)
# MCMC settings
<- list('n.iter' = 100, 'n.thin' = 1, 'n.burn' = 50)
setts <- 1000
setts.m <- 1
mSetts if(mSetts) setts <- lapply(setts, function(v) v * setts.m)
$n.chains <- 3
setts
# data
<- 20
n <- runif(n, -1, 1)
x <- model.matrix(~ x)
X <- c(2, -1)
beta <- crossprod(t(X), beta)
mu <- rpois(n, exp(mu))
Y <- list(Y = Y, X = X, n = n, np = ncol(X))
dat
# model
<- function() {
glm.model for (i in 1:n) {
~ dpois(lambda[i])
Y[i] log(lambda[i]) <- inprod(X[i,], beta[1, ])
}for (j in 1:np) {
1, j] ~ dnorm(0, 0.001)
beta[
}
}
# monitors; can add 'deviance' but left out here for easy plotting
<- c('beta')
params
# fit with jags.fit
<- proc.time()
timer load.module('glm')
load.module('lecuyer')
load.module('dic')
<- jags.fit(data = dat, params = params, model = glm.model,
m0 n.chains = setts$n.chains,
n.adapt = 100,
n.update = setts$n.burn,
n.iter = setts$n.iter,
thin = setts$n.thin)
<- proc.time() - timer
time.taken 1] <- time.taken[3]
timings[
# fit with jags.parfit
<- proc.time()
timer <- makePSOCKcluster(n.cores)
cl <- clusterEvalQ(cl, library(dclone))
tmp parLoadModule(cl, 'glm')
parLoadModule(cl, 'lecuyer')
parLoadModule(cl, 'dic')
<- jags.parfit(cl = cl, data = dat, params = params, model = glm.model,
m1 n.chains = setts$n.chains,
n.adapt = 100,
n.update = setts$n.burn,
n.iter = setts$n.iter,
thin = setts$n.thin)
stopCluster(cl)
<- proc.time() - timer
time.taken 2] <- time.taken[3]
timings[
# fit with parJagsModel
<- proc.time()
timer <- makePSOCKcluster(n.cores)
cl parLoadModule(cl, 'glm')
parLoadModule(cl, 'lecuyer')
parLoadModule(cl, 'dic')
parJagsModel(cl = cl, name = 'res', file = glm.model, data = dat,
n.chains = setts$n.chains, n.adapt = 100)
parUpdate(cl = cl, object = 'res', n.iter = setts$n.burn)
<- parCodaSamples(cl = cl, model = 'res', variable.names = params,
m2 n.iter = setts$n.iter, thin = setts$n.thin)
stopCluster(cl)
<- proc.time() - timer
time.taken 3] <- time.taken[3]
timings[
# fit with foreach
<- proc.time()
timer <- makePSOCKcluster(n.cores)
cl clusterSetRNGStream(cl)
registerDoParallel(cl)
<- foreach(i = 1:setts$n.chains, .packages = c('dclone', 'rjags'),
m3 .combine = 'c', .final = mcmc.list) %dopar% {
load.module('glm')
load.module('lecuyer')
load.module('dic')
<- jags.fit(data = dat, params = params, model = glm.model,
m n.chains = 1,
n.adapt = 100,
n.update = setts$n.burn,
n.iter = setts$n.iter,
thin = setts$n.thin,
inits = list(.RNG.name = 'lecuyer::RngStream',
.RNG.seed = sample(1:1e6, 1)))
}stopCluster(cl)
<- proc.time() - timer
time.taken 4] <- time.taken[3]
timings[
# fit with snow
<- proc.time()
timer <- function(i){
coda.samples.wrapper load.module('glm')
load.module('lecuyer')
load.module('dic')
<- jags.fit(data = dat, params = params, model = glm.model,
m n.chains = 1,
n.adapt = 100,
n.update = setts$n.burn,
n.iter = setts$n.iter,
thin = setts$n.thin,
inits = list(.RNG.name = 'lecuyer::RngStream',
.RNG.seed = sample(1:1e6, 1)))
}<- makeCluster(n.cores, "SOCK")
cl clusterEvalQ(cl, library('dclone'))
clusterEvalQ(cl, library('rjags'))
clusterExport(cl, list('dat', 'params', 'glm.model', 'setts'))
<- clusterApply(cl, 1:setts$n.chains, coda.samples.wrapper)
m4 for(i in 1:length(m4)){ # reorganize 'm4' as an 'mcmc.list' object
<- m4[[i]][[1]]
m4[[i]]
}class(m4) <- "mcmc.list"
stopCluster(cl)
<- proc.time() - timer
time.taken 5] <- time.taken[3]
timings[
# fit with snowfall
<- proc.time()
timer sfInit(parallel = TRUE, cpus = n.cores)
sfLibrary(rjags)
sfLibrary(dclone)
sfExportAll()
<- sfLapply(1:setts$n.chains, function(i) {
m5 load.module('glm')
load.module('lecuyer')
load.module('dic')
<- jags.fit(data = dat, params = params, model = glm.model,
m n.chains = 1,
n.adapt = 100,
n.update = setts$n.burn,
n.iter = setts$n.iter,
thin = setts$n.thin,
inits = list(.RNG.name = 'lecuyer::RngStream',
.RNG.seed = sample(1:1e6, 1)))
})sfStop()
for(i in 1:length(m5)){ # reorganize 'm5' as an 'mcmc.list' object
<- m5[[i]][[1]]
m5[[i]]
}class(m5) <- "mcmc.list"
<- proc.time() - timer
time.taken 6] <- time.taken[3] timings[
## jags.fit jags.parfit par... functions foreach
## 29.81 12.17 12.41 12.24
## snow snowfall
## 12.42 12.32
Of course, this would be better done using proper benchmarking procedures but I think it is interesting to note that there is no obvious differences in the timing of the different parallel procedures.
My preferences
Personally, I like dclone::par...
functions for single-run models because I have become accustomed to using them (including using them to update particular parameters at different stages by keeping the clusters open) and I like snowfall
for model testing when I want to run models many times over simulated datasets because it has fewer dependencies.
I hope this is useful to someone.
Reuse
Citation
@online{d. gregory2017,
author = {D. Gregory, Stephen},
title = {Running {JAGS} in Parallel},
date = {2017-04-16},
url = {https://stephendavidgregory.github.io/posts/jags-in-parallel},
langid = {en}
}