R code for Chapter 11, Count Data
######## Poisson regression for rates. 10 year CHD mortality in British male doctors
A <- c(1,2,3,4,5,1,2,3,4,5)
S <- c(1,1,1,1,1,0,0,0,0,0)
y <- c(32,104,206,186,102, 2,12,28,28,31)
M <- c(52407,43248,28612,12663,5317,18790,10673,5710,2585,1462)
# Model 2a from WB code. For log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i]
PostMeanRate1 <- 10000*c(0.001034000,0.002381000,0.005488000,0.012660000,0.029230000,
0.000689500,0.001588000,0.003660000,0.008442000,0.019490000)
# Model 2b from WB code. For log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i] + beta[4]*A[i]*S[i]
PostMeanRate2 <- 10000*c(0.00113100,0.00250900,0.00556800,0.01237000,0.02751000,
0.00040880,0.00114900,0.00325000,0.00924700,0.02646000)
# Model 2c from WB code. For log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*A[i]*A[i] + beta[4]*S[i] + # beta[5]*A[i]*S[i] + beta[6]*A[i]*A[i]*S[i]
PostMeanRate3 <- 10000*c(0.0005805,0.0024720,0.0072250,0.0143800,0.0195300,0.0001549,0.0010390,0.0045520,0.0122200,0.0201800)
## Figure 11.1. Empirical rates
plot(A[1:5],10000*y[1:5]/M[1:5], type='b',pch="S",
xlab="Age", ylab="Deaths per 10,000 person years", cex=1.5,lwd=2,ylim=c(0,250),axes=F)
lines(A[6:10],10000*y[6:10]/M[6:10], type='b',pch="N", lty=2, lwd=2, cex=1.5)
legend("bottomright",lty=c(1,2),c("Smokers","Nonsmokers"),lwd=2)
axis(2)
axis(1,at=c(1,2,3,4,5),labels=c("35-44","45-54","55-64","65-74","75-84"))
## Figure 11.2. Empirical rates; log scale
plot(A[1:5],log(10000*y[1:5]/M[1:5]), type='b',pch="S", ylim=c(0,6),
xlab="Age", ylab="Deaths per 10,000 person years (log scale)", cex=1.5,lwd=2,axes=F)
lines(A[6:10],log(10000*y[6:10]/M[6:10]), type='b',pch="N", lty=2, lwd=2, cex=1.5)
legend("bottomright",lty=c(1,2),c("Smokers","Nonsmokers"),lwd=2)
axis(2)
axis(1,at=c(1,2,3,4,5),labels=c("35-44","45-54","55-64","65-74","75-84"))
## LPML for Model 2a. log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i]
cpoinv <- c(35390.00,31.73,931600.00,753.90,9095000000.00,14680.00,25.01,64.69,45.45,20.88)
cpo <- 1/cpoinv
LPML1 <- sum(log(cpo))
LPML1
## LPML for Model 2b. log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i] + beta[4]*A[i]*S[i]
cpoinv <- c(2882000.000,36.970,321300.000,2082.000,112400000.000,303.500,10.750,306.300,24.810,869.400)
cpo <- 1/cpoinv
LPML2 <- sum(log(cpo))
LPML2
## LPML for Model 2c. log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*A[i]*A[i] + beta[4]*S[i] + beta[5]*A[i]*S[i] + beta[6]*A[i]*A[i]*S[i]
cpoinv <- c(27.20,36.34,55.76,50.24,64.93,6.96,13.03,24.03,27.26,59.11)
cpo <- 1/cpoinv
LPML3 <- sum(log(cpo))
LPML3
### MLE estimates of beta in Models 1-3. Can be used as starting values in the Gibbs sampler
A2 <- A*A
MLEfit1 <- glm(y ~ A + S , family=poisson, offset=log(M))
summary(MLEfit1)
MLEfit2 <- glm(y ~ A + S + A*S, family=poisson, offset=log(M))
summary(MLEfit2)
MLEfit3 <- glm(y ~ A + A2 + S + A*S + A2*S, family=poisson, offset=log(M))
summary(MLEfit3)