knitr::opts_chunk$set(warning=F, mesage=F, dpi=200, fig.path='figures/', dev=c('png', 'svg'))
library(meta)
library(metasens)
library(ggplot2)
library(dplyr)
library(tidyr)
Z = qnorm(0.975) * 2
wide_test = function(data, method, title='', vac_c, vac_v, n, n_backup, events, cont, ci1, ci2, funnel=F, subgroup=NULL) {
if(!(n %in% colnames(data))) stop(paste(n, 'not in columns'))
if(!(n_backup %in% colnames(data))) stop(paste(n_backup, 'not in columns'))
d <- data %>% mutate(id=paste(study_id, split), N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))))
if(method=='metabin') {
if(!events %in% colnames(data)) stop(paste(events, 'not in columns'))
d = d %>%
filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(!!sym(events)))
ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
if(nrow(ids)<1) return('no results')
d = d %>% filter(id %in% ids$id) %>%
group_by(study_id, split, vac_mode, author, year) %>%
summarize(n=sum(N, na.rm=T), e=sum(as.integer(!!sym(events)), na.rm=T), age=mean(age), .groups='drop') %>%
pivot_wider(names_from=vac_mode, values_from=c(n, e, age), names_glue="{.value}.{vac_mode}")# %>%
# filter(!is.na(n.CO) & !is.na(n.SEQ) & !is.na(e.CO) & !is.na(e.SEQ))
if(nrow(d) < 1) return('no results')
# p = ggplot(d, aes(x=e.CO/n.CO, y=e.SEQ/n.SEQ)) + geom_point() + geom_label(aes(label=study_id)) + geom_abline(slope=1, intercept=0, color='#00000055')
# print(p)
m = metabin(event.e=e.CO, n.e=n.CO, event.c=e.SEQ, n.c=n.SEQ, studlab=paste(author, year, split), data=d, subset=!is.na(n.CO) & !is.na(n.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), subgroup=subgroup, method='Inverse')
# return(list(TE=m$TE.random, SE=m$seTE.random))
} else if(method=='metacont') {
if(!cont %in% colnames(data)) stop(paste(cont, 'not in columns'))
if(!ci1 %in% colnames(data)) stop(paste(ci1, 'not in columns'))
if(!ci2 %in% colnames(data)) stop(paste(ci2, 'not in columns'))
d <- d %>%
filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(!!sym(cont)) & !is.na(!!sym(ci1)) & !is.na(!!sym(ci2)))
ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0 & !is.na(seq) & !is.na(co))
if(nrow(ids)<1) return('no results')
# print(ids)
d <- d %>%
filter(id %in% ids$id) %>% mutate(sd=(as.numeric(!!sym(ci2))-as.numeric(!!sym(ci1)))/Z) %>%
group_by(study_id, split, vac_mode, author, year) %>%
summarize(n=sum(N, na.rm=T),
cont=sum(N*as.numeric(!!sym(cont)))/sum(N, na.rm=T),
SD=sum(N*sd)/sum(N, na.rm=T), .groups='drop')
# print(d)
d = d %>%
pivot_wider(names_from=vac_mode, values_from=c(n, cont, SD), names_glue="{.value}.{vac_mode}")
if(nrow(d) < 1 | all(is.na(d$n.CO)) | all(is.na(d$n.SEQ))) return('no results')
# print(d)
# if(nrow(d) > 2) {
# p = ggplot(d, aes(x=cont.CO, y=cont.SEQ)) + geom_point() + geom_errorbarh(aes(xmin=cont.CO-SD.CO, xmax=cont.CO+SD.CO), width=.2) + geom_errorbar(aes(ymin=cont.SEQ-SD.SEQ, ymax=cont.SEQ+SD.SEQ)) + geom_label(aes(label=study_id)) + geom_density_2d_filled(alpha=0.1) + geom_histogram(aes(y=..density..), alpha=0.5) + geom_histogram(aes(x=..density..), alpha=0.5) + geom_abline(slope=1, intercept=0, color='#00000055')
# print(p)
# }
#
# if(!is.na(cont) & 'gmt_bl' %in% cont) {
# p = ggplot(d, aes(x=cont.CO, y=cont.SEQ)) + geom_density_2d(alpha=0.1) + geom_histogram(aes(y=..density..), alpha=0.5) + geom_point() + geom_abline(slope=1, intercept=0, color='#00000055')
# print(p)
# }
#
# if(!is.na(cont) & 'gmfr' %in% cont) {
# p = ggplot(d, aes(x=cont.CO)) + geom_density(alpha=0.1) + geom_histogram(aes(y=..density..), alpha=0.5) + geom_point()
# print(p)
# }
m = metacont(mean.e=cont.CO, n.e=n.CO, sd.e=SD.CO, mean.c=cont.SEQ, n.c=n.SEQ, sd.c=SD.SEQ, studlab=paste(author, year, split), data=d, sm='ROM')#, subset=!is.na(n.CO) & !is.na(n.SEQ) & !is.na(cont.CO) & !is.na(cont.SEQ))
} else {
stop(paste0('undefined method "', method, '"'))
}
forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
if(funnel) {
funnel(m, random=T)
metabias(m, method.bias='Egger', k.min=6)
metabias(m, method.bias='Begg', k.min=6)
}
return(m)
}
datasource = 'data/v.1.5_26.03.2025_data_extraction_samuel_CT.xlsx'
Synopsis
Meta analysis of Covid vaccination effectiveness with other
co-vaccinations.
Methods
Statistical analysis was carried out using the
meta (v8.0.2)
package (Balduzzi,
RmĂĽcker, and Schwarzer 2019) in the
R version 4.4.2 (2024-10-31)
programming language (R Core Team 2021).
Meta-analysis of proportions was pooled by fitting a random intercept
logistic regression model with the metaprop
function to
logit transformed proportions in order to include valid estimates for
studies with very few or no events. Study estimates are shown with
computed Clopper-Pearson 95% confidence intervals. The same pooled
estimates were conducted for subgroups and tested by \(\chi^2\)-test for significant pairwise
differences.
Heterogeneity was assessed by estimating the maximum-likelihood of
\(\tau^2\) and quantified with the
\(I^2\) index.
Groupwise comparisons of studies were analyzed using odds ratios
between treatment and control group with the metabin
function using a random effects model for the pooled odds ratio using
inverse variance weighting (instead of the Mantel-Haenszel method (Mantel and Haenszel 1959)). Heterogeneity was
assessed using a restricted maximum-likelihood estimator of \(\tau^2\).
Publication bias was assessed using a funnelplot of the logit
transformed prevalence and inverse variance and tested using the
metabias
function with linear regression test (Egger et al. 1997) and rank correlation test
(Begg and Mazumdar 1994) for
asymmetry.
Study Summaries
d = readxl::read_xlsx(datasource, sheet='main sheet', na='N/A', skip=5)
ids = d %>% group_by(study_id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
d = d %>% filter(study_id %in% ids$study_id & vac_mode %in% c('CO', 'SEQ'))
d$age = as.numeric(ifelse(is.na(d$age_median), d$age_mean, d$age_median))
for(col in grep('(gmr|gmfr|gmt|spr|scr|ae)_', names(d), value=T)){d[,col] = as.numeric(unlist(d[,col]))}
d$gmfr_allb_n = d$gmfr_n_total_fu
d$gmfr_allb = rowMeans(d[,grep('gmfr_b_.*_mean_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci1 = rowMeans(d[,grep('gmfr_b_.*_ci1_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci2 = rowMeans(d[,grep('gmfr_b_.*_ci2_fu', names(d), value=T)], na.rm=T)
# d[,c('study_id', grep('gmfr_allb', names(d), value=T))]
#
# wide_test(d, 'metacont', 'Total B', vac_c='vac_f', vac_v='F', n='gmfr_allb_n', n_backup='gmfr_n_total_fu', cont='gmfr_allb', ci1='gmfr_allb_ci1', ci2='gmfr_allb_ci2')
studies = subset(d, main==1)
rownames(studies) = as.character(studies$study_id)
d$author = studies[d$study_id,]$author
d$year = as.character(studies[d$study_id,]$year)
d$split[is.na(d$split)] = ''
options(scipen=999)
breaks = 10^(1:10)
ggplot(studies, aes(x=paste(study_id, author, year), y=as.integer(participants_total))) + geom_bar(stat='identity') + scale_y_log10(breaks=breaks, labels=breaks) + geom_text(aes(label=as.integer(participants_total)), size=3, color='white', vjust=0.5, hjust=1.2) + theme_classic() + annotation_logticks(sides='b') + theme(panel.grid.minor=element_blank()) + coord_flip() + ylab('participants') + xlab('')

ggplot(studies, aes(x=paste(study_id, author, year))) + geom_point(aes(y=as.integer(age_mean), color='mean'), stat='identity') + geom_point(aes(y=as.integer(age_median), color='median'), stat='identity') + coord_flip() + scale_color_manual(values=c('mean'='skyblue', 'median'='blue')) + xlab('') + ylab('age')

ggplot(studies, aes(x=paste(study_id, author, year))) + geom_point(aes(y=age, color='mean'), stat='identity') + coord_flip() + ggtitle('Age (median OR mean if N/A)') + xlab('')

ggplot(studies, aes(x=age)) + geom_density() + geom_histogram(aes(y=..density..), alpha=0.5)

d.age = d %>% mutate(age=as.numeric(ifelse(is.na(age_median), age_mean, age_median))) %>% group_by(study_id, vac_mode) %>% summarize(age=mean(age)) %>% pivot_wider(names_from=vac_mode, values_from=c(age), names_glue="{.value}.{vac_mode}")
ggplot(d.age, aes(x=age.CO, y=age.SEQ)) + geom_point() + geom_label(aes(label=as.integer(study_id)))

comps = readxl::read_xlsx(datasource, sheet='comparisons')
comps = subset(comps, use==1)
# d3 = readxl::read_xlsx(datasource, sheet=2, na='N/A', skip=1)
There are 57 studies with a total of 7677520 participants.
Primary Outcomes
# ggplot(d) + geom_point(aes(x=as.numeric(gmt_h1n1_mean_bl), y=as.numeric(gmt_h1n1_mean_fu))) + coord_trans('log10', 'log10')
# ggplot(d, aes(x=as.numeric(gmr_h1n1_mean_fu), y=as.numeric(gmt_h1n1_mean_fu)/as.numeric(gmt_h1n1_mean_bl))) + geom_point() + geom_text(aes(label=paste(study_id, split))) + coord_trans('log10', 'log10')
cols = unique(c(comps$n, comps$n_backup, comps$events, comps$cont, comps$ci1, comps$ci2))
if(any(!is.na(cols) & !cols %in% colnames(d))) {
stop(paste('the following columns are missing from the data:', paste(cols[!is.na(cols) & !cols %in% colnames(d)], collapse=', ')))
}
comps$TE = NA; comps$SE = NA; comps$n.seq = NA; comps$n.co = NA
for(vac in unique(comps$vaccination)) {
cat(paste('\n\n##', vac), '\n\n')
for(ab in unique(comps[comps$vaccination==vac,]$antibody)) {
gmt_bl = subset(comps, vaccination==vac & antibody==ab & timepoint=='Baseline' & outcome=='GMT')
gmt_fu = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMT')
gmfr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMFR')
gmr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMR')
if(nrow(gmt_bl)==1 & nrow(gmt_fu)==1 & nrow(gmfr)==1 & nrow(gmr)==1){
d[,gmt_bl$n[1]] = as.numeric(unlist(d[,gmt_bl$n[1]])); d[,gmt_fu$n[1]] = as.numeric(unlist(d[,gmt_fu$n[1]])); d[,gmfr$n[1]] = as.numeric(unlist(d[,gmfr$n[1]]))
d[,gmt_bl$n_backup[1]] = as.numeric(unlist(d[,gmt_bl$n_backup[1]])); d[,gmt_fu$n_backup[1]] = as.numeric(unlist(d[,gmt_fu$n_backup[1]])); d[,gmfr$n_backup[1]] = as.numeric(unlist(d[,gmfr$n_backup[1]]))
d[,gmt_bl$cont[1]] = as.numeric(unlist(d[,gmt_bl$cont[1]])); d[,gmt_bl$ci1[1]] = as.numeric(unlist(d[,gmt_bl$ci1[1]])); d[,gmt_bl$ci2[1]] = as.numeric(unlist(d[,gmt_bl$ci2[1]]))
d[,gmt_fu$cont[1]] = as.numeric(unlist(d[,gmt_fu$cont[1]])); d[,gmt_fu$ci1[1]] = as.numeric(unlist(d[,gmt_fu$ci1[1]])); d[,gmt_fu$ci2[1]] = as.numeric(unlist(d[,gmt_fu$ci2[1]]))
d[,gmr$cont[1]] = as.numeric(unlist(d[,gmr$cont[1]])); d[,gmr$ci1[1]] = as.numeric(unlist(d[,gmr$ci1[1]])); d[,gmr$ci2[1]] = as.numeric(unlist(d[,gmr$ci2[1]]))
d[,gmfr$cont[1]] = as.numeric(unlist(d[,gmfr$cont[1]])); d[,gmfr$ci1[1]] = as.numeric(unlist(d[,gmfr$ci1[1]])); d[,gmfr$ci2[1]] = as.numeric(unlist(d[,gmfr$ci2[1]]))
sel = is.na(is.na(d[,gmt_bl$n[1]]) & !is.na(d[,gmt_bl$n_backup[1]]))
d[sel, gmt_bl$n[1]] = d[sel, gmt_bl$n_backup[1]]
sel = is.na(is.na(d[,gmt_fu$n[1]]) & !is.na(d[,gmt_fu$n_backup[1]]))
d[sel, gmt_fu$n[1]] = d[sel, gmt_fu$n_backup[1]]
d$temp_gmfr = as.numeric(unlist(d[,gmfr$cont[1]])); d$temp_gmr = as.numeric(unlist(d[,gmr$cont[1]])); d$temp_gmt_bl = as.numeric(unlist(d[,gmt_bl$cont[1]])); d$temp_gmt_fu = as.numeric(unlist(d[,gmt_fu$cont[1]]))
# p = ggplot(d) + geom_point(aes(x=temp_gmfr, y=temp_gmr)) + coord_trans('log10', 'log10') + ggtitle('Correlation of GMFR vs GMR', ab)
# print(p)
# p = ggplot(d) + geom_point(aes(x=temp_gmfr, y=temp_gmt_fu/temp_gmt_bl)) + coord_trans('log10', 'log10') + ggtitle('Correlation of GMFR vs GMT_fu/GMT_bl', ab)
# print(p)
sel = d[,gmt_bl$n[1]] == d[,gmt_fu$n[1]]
sel[is.na(sel)] = F
sel = is.na(d[,gmfr$cont[1]]) & !is.na(d[,gmt_bl$cont[1]]) & !is.na(d[,gmt_fu$cont[1]]) & sel
d[sel, gmfr$cont[1]] = as.numeric(unlist(d[sel, gmt_fu$cont[1]]))/as.numeric(unlist(d[sel, gmt_bl$cont[1]]))
d[sel, gmfr$ci1[1]] = as.numeric(unlist(d[sel, gmt_fu$ci1[1]]))/as.numeric(unlist(d[sel, gmt_bl$ci1[1]]))
d[sel, gmfr$ci2[1]] = as.numeric(unlist(d[sel, gmt_fu$ci2[1]]))/as.numeric(unlist(d[sel, gmt_bl$ci2[1]]))
d[sel, gmfr$n[1]] = as.numeric(unlist(d[sel, gmt_bl$n[1]]))
}
}
for(out in unique(comps[comps$vaccination==vac,]$outcome)) {
cat(paste('\n\n###', out), '\n\n')
for(tp in unique(comps[comps$vaccination==vac & comps$outcome==out,]$timepoint)) {
cat(paste('\n\n####', tp), '\n\n')
for(ab in unique(comps[comps$vaccination==vac & comps$outcome==out & comps$timepoint==tp,]$antibody)) {
cat(paste('\n\n#####', ab, '\n\n'))
i = which(comps$vaccination==vac & comps$outcome==out & comps$timepoint==tp & comps$antibody==ab)
if(length(i) != 1) next
res = wide_test(d, method=comps$method[i], vac_c=comps$vac_c[i], vac_v=comps$vac_v[i], n=comps$n[i], n_backup=comps$n_backup[i], events=comps$events[i], cont=comps$cont[i], ci1=comps$ci1[i], ci2=comps$ci2[i])
# cop = copas(res)
# plot(cop)
if(is.character(res)) {cat('\nNO DATA\n\n'); next}
comps$TE[i]=res$TE.random; comps$SE[i]=res$seTE.random; comps$n.seq[i]=sum(res$n.c); comps$n.co[i]=sum(res$n.e)
}
cat('\n\n##### Summary\n\n')
m = metagen(TE=TE, seTE=SE, n.c=n.seq, n.e=n.co, studlab=antibody, data=comps, subset=vaccination==vac & outcome==out & timepoint==tp, sm="RR", label.e='CO', label.c='SEQ')
forest(m)
}
}
}
Influenza
SCR
Follow-up
H1N1
