knitr::opts_chunk$set(warning=F, mesage=F, dpi=200, fig.path='figures/', dev=c('png', 'svg'))
library(meta)
library(metasens)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)
Z = qnorm(0.975) * 2
wide_bin_test = function(data, title='', vac_c, vac_v, n, n_backup, events, funnel=F, subgroup_var=NULL) {
d <- data %>% mutate(id=study_id,
N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))),
e=as.integer(!!sym(events)),
subgroup=rep('', nrow(data))) %>%
filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(e))
if(!is.null(subgroup_var)){ d = d %>% mutate(subgroup = !!sym(subgroup_var), subgroup = factor(ifelse(is.na(subgroup), 'undetermined', as.character(subgroup))), id = paste(id, as.character(subgroup)))}
ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T), .groups='drop') %>% filter(seq>0 & co>0)
if(nrow(ids)<1) return('no results')
d = d %>% filter(id %in% ids$id) %>%
group_by(id, subgroup, author, year, vac_mode) %>%
summarize(N=sum(N, na.rm=T), e=sum(e, na.rm=T)) %>%
pivot_wider(id_cols=c(id, author, year, subgroup), names_from=vac_mode, values_from=c(N, e), names_glue="{.value}.{vac_mode}")
if(nrow(d) < 1) return('no results')
if(!is.null(subgroup_var)){
m = metabin(event.e=e.CO, n.e=N.CO, event.c=e.SEQ, n.c=N.SEQ, studlab=paste(author, year), data=d, subset=!is.na(N.CO) & !is.na(N.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), subgroup=subgroup, method='Inverse')
} else {
m = metabin(event.e=e.CO, n.e=N.CO, event.c=e.SEQ, n.c=N.SEQ, studlab=paste(author, year), data=d, subset=!is.na(N.CO) & !is.na(N.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), method='Inverse')
}
forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
if(funnel) {
funnel(m, random=T)
metabias(m, method.bias='Egger', k.min=6)
metabias(m, method.bias='Begg', k.min=6)
}
return(m)
}
wide_cont_test = function(data, title='', vac_c, vac_v, n, n_backup, cont, ci1, ci2, funnel=F, subgroup_var=NULL) {
d <- data %>% mutate(id=study_id,
N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))),
mean=as.numeric(!!sym(cont)),
sd=ifelse(!!sym(ci2)==-1, !!sym(ci1), (as.numeric(!!sym(ci2))-as.numeric(!!sym(ci1)))/Z),
subgroup=rep('', nrow(data))) %>%
filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(mean) & !is.na(sd))
if(!is.null(subgroup_var)){ d = d %>% mutate(subgroup = !!sym(subgroup_var), subgroup = factor(ifelse(is.na(subgroup), 'undetermined', as.character(subgroup))), id = paste(id, as.character(subgroup)))}
ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0 & !is.na(seq) & !is.na(co))
if(nrow(ids)<1) return('no results')
d <- d %>%
filter(id %in% ids$id) %>%
group_by(id, subgroup, author, year, vac_mode) %>%
summarize(mean=sum(N*mean, na.rm=T)/sum(N, na.rm=T),
sd=sum(N*sd, na.rm=T)/sum(N, na.rm=T), N=sum(N, na.rm=T)) %>%
pivot_wider(id_cols=c(id, author, year, subgroup), names_from=vac_mode, values_from=c(N, mean, sd), names_glue="{.value}.{vac_mode}")
if(nrow(d) < 1 | all(is.na(d$N.CO)) | all(is.na(d$N.SEQ))) return('no results')
if(!is.null(subgroup_var)){
m = metacont(mean.e=mean.CO, n.e=N.CO, sd.e=sd.CO, mean.c=mean.SEQ, n.c=N.SEQ, sd.c=sd.SEQ, studlab=paste(author, year), data=d, sm='ROM', subgroup=subgroup)
} else {
m = metacont(mean.e=mean.CO, n.e=N.CO, sd.e=sd.CO, mean.c=mean.SEQ, n.c=N.SEQ, sd.c=sd.SEQ, studlab=paste(author, year), data=d, sm='ROM')
}
forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
if(funnel) {
funnel(m, random=T)
metabias(m, method.bias='Egger', k.min=6)
metabias(m, method.bias='Begg', k.min=6)
}
return(m)
}
datasource = 'data/v.1.9_05.06.2025_data_extraction_samuel.xlsx'
Meta analysis of Covid vaccination effectiveness with other co-vaccinations.
Statistical analysis was carried out using the
meta (v8.1.0)
package (Balduzzi,
Rmücker, and Schwarzer 2019) in the
R version 4.5.0 (2025-04-11)
programming language (R Core Team 2021).
Meta-analysis of proportions was pooled by fitting a random intercept
logistic regression model with the metaprop
function to
logit transformed proportions in order to include valid estimates for
studies with very few or no events. Study estimates are shown with
computed Clopper-Pearson 95% confidence intervals. The same pooled
estimates were conducted for subgroups and tested by \(\chi^2\)-test for significant pairwise
differences.
Heterogeneity was assessed by estimating the maximum-likelihood of \(\tau^2\) and quantified with the \(I^2\) index.
Groupwise comparisons of studies were analyzed using odds ratios
between treatment and control group with the metabin
function using a random effects model for the pooled odds ratio using
inverse variance weighting (instead of the Mantel-Haenszel method (Mantel and Haenszel 1959)). Heterogeneity was
assessed using a restricted maximum-likelihood estimator of \(\tau^2\).
Publication bias was assessed using a funnelplot of the logit
transformed prevalence and inverse variance and tested using the
metabias
function with linear regression test (Egger et al. 1997) and rank correlation test
(Begg and Mazumdar 1994) for
asymmetry.
d = readxl::read_xlsx(datasource, sheet='main sheet', na='N/A', skip=5) %>%
mutate(study_id = as.character(as.integer(study_id)),
year = as.integer(year),
main = as.integer(main),
study_design = as.integer(study_design),
valency = as.integer(valency))
ids = d %>% group_by(study_id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
d = d %>% filter(study_id %in% ids$study_id & vac_mode %in% c('CO', 'SEQ'))
for(col in grep('(gmr|gmfr|gmt|spr|scr|ae|age|participants|sex|n)_', names(d), value=T)){d[,col] = as.numeric(unlist(d[,col]))}
d = d %>%
mutate(age = ifelse(is.na(age_median), age_mean, age_median),
sex_female_ratio = sex_female/participants_total_groups,
rct = factor(ifelse(!is.na(study_design) & study_design=='4', 'RCT', 'non-RCT')),
valency = factor(ifelse(valency==1, 'quadrivalent', ifelse(valency==2, 'trivalent', NA)))) %>%
mutate(age_dic = factor(ifelse(!is.na(age_range_min) & age_range_max<65 & age_range_min<60 | !is.na(age_sd) & age+age_sd<60 | age<60, 'young', ifelse(!is.na(age_range_min) & age_range_min>=65 & age_range_max>70 | !is.na(age_sd) & age-age_sd>70 | age>70, 'aged', 'other')), levels=c('young', 'aged', 'other')))
d$sex_female_ratio[is.na(d$sex_female_ratio)] = d$sex_rate_female[is.na(d$sex_female_ratio)]
d = d %>% mutate(sex_dic = factor(cut(sex_female_ratio, c(0, .45, .55, 1), labels=c('male', 'neutral', 'female')), levels=c('male', 'neutral', 'female', 'unknown')))
d$age_dic[is.na(d$age_dic)] = 'other'
d$sex_dic[is.na(d$sex_dic)] = 'unknown'
comps = readxl::read_xlsx(datasource, sheet='comparisons')
# recalculating GMFR values from GMT fu/bl if GMFR not available
for(vac in unique(comps$vaccination)) {
for(ab in unique(comps[comps$vaccination==vac,]$antibody)) {
gmt_bl = subset(comps, vaccination==vac & antibody==ab & timepoint=='Baseline' & outcome=='GMT')
gmt_fu = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMT')
gmfr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMFR')
if(nrow(gmt_bl)!=1 | nrow(gmt_fu)!=1 | nrow(gmfr)!=1){
# print(paste('GMFR recalculation skipping', vac, ab))
next
}
sel = is.na(d[,gmfr$cont[1]]) & !is.na(d[,gmt_bl$cont[1]]) & !is.na(d[,gmt_fu$cont[1]])
d[sel, gmfr$n[1]] = as.integer(unlist(d[sel, gmt_fu$n[1]]))
log_gmfr = log(as.numeric(unlist(d[sel, gmt_fu$cont[1]])))-log(as.numeric(unlist(d[sel, gmt_bl$cont[1]])))
d[sel, gmfr$cont[1]] = exp(log_gmfr)
bl_ci1 = as.numeric(unlist(d[sel, gmt_bl$ci1[1]]))
bl_ci2 = as.numeric(unlist(d[sel, gmt_bl$ci2[1]]))
fu_ci1 = as.numeric(unlist(d[sel, gmt_fu$ci1[1]]))
fu_ci2 = as.numeric(unlist(d[sel, gmt_fu$ci2[1]]))
se = sqrt( ((log(fu_ci2)-log(fu_ci1))/Z)^2 + ((log(bl_ci2)-log(bl_ci1))/Z)^2 )
d[sel, gmfr$ci1[1]] = exp(log_gmfr - 1.96 * se)
d[sel, gmfr$ci2[1]] = exp(log_gmfr + 1.96 * se)
}
}
d$gmfr_allb_n = d$gmfr_n_total_fu
d$gmfr_allb = rowMeans(d[,grep('gmfr_b_.*_mean_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci1 = rowMeans(d[,grep('gmfr_b_.*_ci1_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci2 = rowMeans(d[,grep('gmfr_b_.*_ci2_fu', names(d), value=T)], na.rm=T)
studies = subset(d, main==1)
writexl::write_xlsx(subset(studies, select=c(author, year, jounral, participants_total, center_n, country, sex_female_ratio, age, age_sd)), 'summary_table.xlsx')
DT::datatable(subset(studies, select=c(author, year, jounral, participants_total, center_n, country, sex_female_ratio, age, age_sd)), filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
rownames(studies) = as.character(studies$study_id)
d$author = studies[d$study_id,]$author
d$rct = studies[d$study_id,]$rct
d$year = as.character(studies[d$study_id,]$year)
cols = unique(c(comps$n, comps$n_backup, comps$events, comps$cont, comps$ci1, comps$ci2))
if(any(!is.na(cols) & !cols %in% colnames(d))) {
stop(paste('the following columns are missing from the data:', paste(cols[!is.na(cols) & !cols %in% colnames(d)], collapse=', ')))
}
comps = subset(comps, use==1)
comps$TE = NA; comps$SE = NA; comps$n.seq = NA; comps$n.co = NA
There are 57 studies with a total of 7677520 participants.
options(scipen=999)
breaks = 10^(1:10)
ggplot(studies, aes(x=paste(study_id, author, year), y=as.integer(participants_total))) + geom_bar(stat='identity') + scale_y_log10(breaks=breaks, labels=breaks) + geom_text(aes(label=as.integer(participants_total)), size=3, color='white', vjust=0.5, hjust=1.2) + theme_classic() + annotation_logticks(sides='b') + theme(panel.grid.minor=element_blank()) + coord_flip() + ylab('participants') + xlab('')