The book Publication Bias in Meta-Analysis: Prevention,
Assessment and Adjustments by Rothstein et al. (2005) provides a
very comprehensive treatment of the topic of publication bias. In this
document, I provide the R code to reproduce the worked examples and
analyses from various chapters. Emphasis will be on using the
metafor
package, but a few other packages will also be
used. To read more about the metafor
package, see the package website and the package
documentation.
The package can be installed with:
Once the package is installed, we can load it with:
A few additional notes:
We will actually start with Appendix A, which provides the three datasets used throughout the book for illustrative purposes.
# data for the meta-analysis on teacher expectancy effects
dat1 <- dat.raudenbush1985
dat1$vi <- c(0.126, 0.147, 0.167, 0.397, 0.371, 0.103, 0.103, 0.221, 0.165, 0.260,
0.307, 0.223, 0.289, 0.291, 0.159, 0.167, 0.139, 0.094, 0.174)^2
dat1$weeks <- ifelse(dat1$weeks > 1, 1, 0) # dummy-code weeks variable into 'high' vs 'low'
dat1
## study author year weeks setting tester n1i n2i yi vi
## 1 1 Rosenthal et al. 1974 1 group aware 77 339 0.0300 0.0159
## 2 2 Conn et al. 1968 1 group aware 60 198 0.1200 0.0216
## 3 3 Jose & Cody 1971 1 group aware 72 72 -0.1400 0.0279
## 4 4 Pellegrini & Hicks 1972 0 group aware 11 22 1.1800 0.1576
## 5 5 Pellegrini & Hicks 1972 0 group blind 11 22 0.2600 0.1376
## 6 6 Evans & Rosenthal 1969 1 group aware 129 348 -0.0600 0.0106
## 7 7 Fielder et al. 1971 1 group blind 110 636 -0.0200 0.0106
## 8 8 Claiborn 1969 1 group aware 26 99 -0.3200 0.0488
## 9 9 Kester 1969 0 group aware 75 74 0.2700 0.0272
## 10 10 Maxwell 1970 0 indiv blind 32 32 0.8000 0.0676
## 11 11 Carter 1970 0 group blind 22 22 0.5400 0.0942
## 12 12 Flowers 1966 0 group blind 43 38 0.1800 0.0497
## 13 13 Keshock 1970 0 indiv blind 24 24 -0.0200 0.0835
## 14 14 Henrikson 1970 1 indiv blind 19 32 0.2300 0.0847
## 15 15 Fine 1972 1 group aware 80 79 -0.1800 0.0253
## 16 16 Grieger 1970 1 group blind 72 72 -0.0600 0.0279
## 17 17 Rosenthal & Jacobson 1968 0 group aware 65 255 0.3000 0.0193
## 18 18 Fleming & Anttonen 1971 1 group blind 233 224 0.0700 0.0088
## 19 19 Ginsburg 1970 1 group aware 65 67 -0.0700 0.0303
# note: 'yi' are the standardized mean differences and 'vi' the corresponding sampling variances;
# the sampling variances in 'dat.raudenbush1985' were computed in a slightly different way than in
# the dataset included in the book, so to make sure we can obtain the same results as provided in
# the book, we just overwrite 'vi' with the squared standard errors given in Table A.1
# equal-effects model for studies where teachers had more than one week of contact with the students
res.h <- rma(yi, vi, data=dat1, subset=weeks==1, method="EE", digits=2)
res.h
## Equal-Effects Model (k = 11)
##
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 0.64
##
## Test for Heterogeneity:
## Q(df = 10) = 6.38, p-val = 0.78
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## -0.02 0.04 -0.52 0.60 -0.10 0.06
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# equal-effects model for studies where teachers had a week or less of contact with the students
res.l <- rma(yi, vi, data=dat1, subset=weeks==0, method="EE", digits=2)
res.l
## Equal-Effects Model (k = 8)
##
## I^2 (total heterogeneity / total variability): 32.65%
## H^2 (total variability / sampling variability): 1.48
##
## Test for Heterogeneity:
## Q(df = 7) = 10.39, p-val = 0.17
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.35 0.08 4.41 <.01 0.19 0.50 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Figure A.1
par(mar=c(4,4,2,2))
tmp <- rbind(dat1[dat1$weeks == 1,], dat1[dat1$weeks == 0,])
tmp$weeks <- ifelse(tmp$weeks == 1, "High", "Low")
with(tmp, forest(yi, vi, slab=paste0(author, ", ", year), psize=1, efac=c(0,1),
xlim=c(-5, 3.5), ylim=c(-1,25), header=TRUE, at=seq(-1,1.5,by=0.5),
rows=c(22:12, 9:2), ilab=weeks, ilab.xpos=-1.2, ilab.pos=2))
text(-1.6, 24, "Prior Contact", font=2)
addpoly(res.h, row=11, mlab="High", col="white", font=c(sans=2))
addpoly(res.l, row= 1, mlab="Low", col="white", font=c(sans=2))
res <- rma(yi, vi, data=tmp, method="EE")
addpoly(res, row=-1, mlab="Overall", font=c(sans=2))
abline(h=0)
# data for the meta-analysis on the effect of environmental tobacco smoke on lung cancer risk
dat2 <- dat.hackshaw1998
head(dat2, 10) # show the first 10 rows
## study author year country design cases or or.lb or.ub yi vi
## 1 1 Garfinkel 1981 USA cohort 153 1.18 0.90 1.54 0.1655 0.0188
## 2 2 Hirayama 1984 Japan cohort 200 1.45 1.02 2.08 0.3716 0.0330
## 3 3 Butler 1988 USA cohort 8 2.02 0.48 8.56 0.7031 0.5402
## 4 4 Cardenas 1997 USA cohort 150 1.20 0.80 1.60 0.1823 0.0313
## 5 5 Chan 1982 Hong Kong case-control 84 0.75 0.43 1.30 -0.2877 0.0797
## 6 6 Correa 1983 USA case-control 22 2.07 0.81 5.25 0.7275 0.2273
## 7 7 Trichopolous 1983 Greece case-control 62 2.13 1.19 3.83 0.7561 0.0889
## 8 8 Buffler 1984 USA case-control 41 0.80 0.34 1.90 -0.2231 0.1927
## 9 9 Kabat 1984 USA case-control 24 0.79 0.25 2.45 -0.2357 0.3390
## 10 10 Lam 1985 Hong Kong case-control 60 2.01 1.09 3.72 0.6981 0.0981
# note: 'yi' are the log odds ratios and 'vi' the corresponding sampling variances; the studies in
# 'dat.hackshaw1998' are ordered by their publication year and hence are not in the same order as in
# Figure A.2, but this is (with some minor rounding discrepancies) the same dataset; also note that
# the 'yi' values are in some chapters referred to as log risk ratios
# random-effects model
res <- rma(yi, vi, data=dat2, method="DL", digits=2, slab=paste0(author, ", ", year))
predict(res, transf=exp)
## pred ci.lb ci.ub pi.lb pi.ub
## 1.24 1.13 1.36 0.94 1.63
# Figure A.2
par(mar=c(4,4,2,2))
wi <- fmtx(weights(res), digits=2)
forest(res, atransf=exp, psize=1, header=TRUE, xlim=c(-4.5,7), ylim=c(-0.6,40),
at=log(c(0.2, 0.5, 1, 2, 5, 10)), efac=c(0,1),
ilab=wi, ilab.xpos=3.8, ilab.pos=2)
text(3.3, 39, "Weight", font=2)