knitr::opts_chunk$set(warning=F, mesage=F, dpi=200, fig.path='figures/', dev=c('png', 'svg'))
library(meta)
library(metasens)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)

Z = qnorm(0.975) * 2

wide_bin_test = function(data, title='', vac_c, vac_v, n, n_backup, events, funnel=F, subgroup_var=NULL) {
   d <- data %>% mutate(id=study_id,
                        N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))),
                        e=as.integer(!!sym(events)),
                        subgroup=rep('', nrow(data))) %>%
      filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(e)) 
      
   if(!is.null(subgroup_var)){ d = d %>% mutate(subgroup = !!sym(subgroup_var), subgroup = factor(ifelse(is.na(subgroup), 'undetermined', as.character(subgroup))), id = paste(id, as.character(subgroup)))}
   
   ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T), .groups='drop') %>% filter(seq>0 & co>0)
   if(nrow(ids)<1) return('no results')
   
   d = d %>% filter(id %in% ids$id) %>%
      group_by(id, subgroup, author, year, vac_mode) %>%
      summarize(N=sum(N, na.rm=T), e=sum(e, na.rm=T)) %>%
      pivot_wider(id_cols=c(id, author, year, subgroup), names_from=vac_mode, values_from=c(N, e), names_glue="{.value}.{vac_mode}")
   
   if(nrow(d) < 1) return('no results')
   
   if(!is.null(subgroup_var)){
      m = metabin(event.e=e.CO, n.e=N.CO, event.c=e.SEQ, n.c=N.SEQ, studlab=paste(author, year), data=d, subset=!is.na(N.CO) & !is.na(N.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), subgroup=subgroup, method='Inverse')
   } else {
      m = metabin(event.e=e.CO, n.e=N.CO, event.c=e.SEQ, n.c=N.SEQ, studlab=paste(author, year), data=d, subset=!is.na(N.CO) & !is.na(N.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), method='Inverse')
   }

   forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
   if(funnel) {
      funnel(m, random=T)
      metabias(m, method.bias='Egger', k.min=6)
      metabias(m, method.bias='Begg', k.min=6)
   }
   return(m)
}
      
wide_cont_test = function(data, title='', vac_c, vac_v, n, n_backup, cont, ci1, ci2, funnel=F, subgroup_var=NULL) {
   d <- data %>% mutate(id=study_id,
                        N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))),
                        mean=as.numeric(!!sym(cont)),
                        sd=ifelse(!!sym(ci2)==-1, !!sym(ci1), (as.numeric(!!sym(ci2))-as.numeric(!!sym(ci1)))/Z),
                        subgroup=rep('', nrow(data))) %>%
      filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(mean) & !is.na(sd))
   
   if(!is.null(subgroup_var)){ d = d %>% mutate(subgroup = !!sym(subgroup_var), subgroup = factor(ifelse(is.na(subgroup), 'undetermined', as.character(subgroup))), id = paste(id, as.character(subgroup)))}
   
   ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0 & !is.na(seq) & !is.na(co))
   if(nrow(ids)<1) return('no results')

   d <- d %>%
      filter(id %in% ids$id) %>%
      group_by(id, subgroup, author, year, vac_mode) %>%
      summarize(mean=sum(N*mean, na.rm=T)/sum(N, na.rm=T),
                sd=sum(N*sd, na.rm=T)/sum(N, na.rm=T), N=sum(N, na.rm=T)) %>%
      pivot_wider(id_cols=c(id, author, year, subgroup), names_from=vac_mode, values_from=c(N, mean, sd), names_glue="{.value}.{vac_mode}")
   
   if(nrow(d) < 1 | all(is.na(d$N.CO)) | all(is.na(d$N.SEQ))) return('no results')
   
   if(!is.null(subgroup_var)){
      m = metacont(mean.e=mean.CO, n.e=N.CO, sd.e=sd.CO, mean.c=mean.SEQ, n.c=N.SEQ, sd.c=sd.SEQ, studlab=paste(author, year), data=d, sm='ROM', subgroup=subgroup)
   } else {
      m = metacont(mean.e=mean.CO, n.e=N.CO, sd.e=sd.CO, mean.c=mean.SEQ, n.c=N.SEQ, sd.c=sd.SEQ, studlab=paste(author, year), data=d, sm='ROM')
   }

   forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
   if(funnel) {
      funnel(m, random=T)
      metabias(m, method.bias='Egger', k.min=6)
      metabias(m, method.bias='Begg', k.min=6)
   }
   return(m)
}

datasource = 'data/v.1.9_05.06.2025_data_extraction_samuel.xlsx'

Synopsis

Meta analysis of Covid vaccination effectiveness with other co-vaccinations.

Methods

Statistical analysis was carried out using the meta (v8.1.0) package (Balduzzi, Rmücker, and Schwarzer 2019) in the R version 4.5.0 (2025-04-11) programming language (R Core Team 2021).

Meta-analysis of proportions was pooled by fitting a random intercept logistic regression model with the metaprop function to logit transformed proportions in order to include valid estimates for studies with very few or no events. Study estimates are shown with computed Clopper-Pearson 95% confidence intervals. The same pooled estimates were conducted for subgroups and tested by \(\chi^2\)-test for significant pairwise differences.

Heterogeneity was assessed by estimating the maximum-likelihood of \(\tau^2\) and quantified with the \(I^2\) index.

Groupwise comparisons of studies were analyzed using odds ratios between treatment and control group with the metabin function using a random effects model for the pooled odds ratio using inverse variance weighting (instead of the Mantel-Haenszel method (Mantel and Haenszel 1959)). Heterogeneity was assessed using a restricted maximum-likelihood estimator of \(\tau^2\).

Publication bias was assessed using a funnelplot of the logit transformed prevalence and inverse variance and tested using the metabias function with linear regression test (Egger et al. 1997) and rank correlation test (Begg and Mazumdar 1994) for asymmetry.

Study Summaries

d = readxl::read_xlsx(datasource, sheet='main sheet', na='N/A', skip=5) %>%
   mutate(study_id = as.character(as.integer(study_id)),
          year = as.integer(year),
          main = as.integer(main),
          study_design = as.integer(study_design),
          valency = as.integer(valency))
ids = d %>% group_by(study_id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
d = d %>% filter(study_id %in% ids$study_id & vac_mode %in% c('CO', 'SEQ'))

for(col in grep('(gmr|gmfr|gmt|spr|scr|ae|age|participants|sex|n)_', names(d), value=T)){d[,col] = as.numeric(unlist(d[,col]))}

d = d %>% 
   mutate(age = ifelse(is.na(age_median), age_mean, age_median),
          sex_female_ratio = sex_female/participants_total_groups,
          rct = factor(ifelse(!is.na(study_design) & study_design=='4', 'RCT', 'non-RCT')),
          valency = factor(ifelse(valency==1, 'quadrivalent', ifelse(valency==2, 'trivalent', NA)))) %>%
   mutate(age_dic = factor(ifelse(!is.na(age_range_min) & age_range_max<65 & age_range_min<60 | !is.na(age_sd) & age+age_sd<60 | age<60, 'young', ifelse(!is.na(age_range_min) & age_range_min>=65 & age_range_max>70 | !is.na(age_sd) & age-age_sd>70 | age>70, 'aged', 'other')), levels=c('young', 'aged', 'other')))

d$sex_female_ratio[is.na(d$sex_female_ratio)] = d$sex_rate_female[is.na(d$sex_female_ratio)]
d = d %>% mutate(sex_dic = factor(cut(sex_female_ratio, c(0, .45, .55, 1), labels=c('male', 'neutral', 'female')), levels=c('male', 'neutral', 'female', 'unknown')))

d$age_dic[is.na(d$age_dic)] = 'other'
d$sex_dic[is.na(d$sex_dic)] = 'unknown'

comps = readxl::read_xlsx(datasource, sheet='comparisons')

# recalculating GMFR values from GMT fu/bl if GMFR not available
for(vac in unique(comps$vaccination)) {
   for(ab in unique(comps[comps$vaccination==vac,]$antibody)) {
      gmt_bl = subset(comps, vaccination==vac & antibody==ab & timepoint=='Baseline' & outcome=='GMT')
      gmt_fu = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMT')
      gmfr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMFR')
      if(nrow(gmt_bl)!=1 | nrow(gmt_fu)!=1 | nrow(gmfr)!=1){
         # print(paste('GMFR recalculation skipping', vac, ab))
         next
      }
      sel = is.na(d[,gmfr$cont[1]]) & !is.na(d[,gmt_bl$cont[1]]) & !is.na(d[,gmt_fu$cont[1]])
      d[sel, gmfr$n[1]] = as.integer(unlist(d[sel, gmt_fu$n[1]]))
      log_gmfr = log(as.numeric(unlist(d[sel, gmt_fu$cont[1]])))-log(as.numeric(unlist(d[sel, gmt_bl$cont[1]])))
      d[sel, gmfr$cont[1]] = exp(log_gmfr)
      bl_ci1 = as.numeric(unlist(d[sel, gmt_bl$ci1[1]]))
      bl_ci2 = as.numeric(unlist(d[sel, gmt_bl$ci2[1]]))
      fu_ci1 = as.numeric(unlist(d[sel, gmt_fu$ci1[1]]))
      fu_ci2 = as.numeric(unlist(d[sel, gmt_fu$ci2[1]]))
      se = sqrt( ((log(fu_ci2)-log(fu_ci1))/Z)^2 + ((log(bl_ci2)-log(bl_ci1))/Z)^2 )
      d[sel, gmfr$ci1[1]] = exp(log_gmfr - 1.96 * se)
      d[sel, gmfr$ci2[1]] = exp(log_gmfr + 1.96 * se)
   }
}

d$gmfr_allb_n = d$gmfr_n_total_fu
d$gmfr_allb = rowMeans(d[,grep('gmfr_b_.*_mean_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci1 = rowMeans(d[,grep('gmfr_b_.*_ci1_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci2 = rowMeans(d[,grep('gmfr_b_.*_ci2_fu', names(d), value=T)], na.rm=T)

studies = subset(d, main==1)
writexl::write_xlsx(subset(studies, select=c(author, year, jounral, participants_total, center_n, country, sex_female_ratio, age, age_sd)), 'summary_table.xlsx')
DT::datatable(subset(studies, select=c(author, year, jounral, participants_total, center_n, country, sex_female_ratio, age, age_sd)), filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
rownames(studies) = as.character(studies$study_id)
d$author = studies[d$study_id,]$author
d$rct = studies[d$study_id,]$rct
d$year = as.character(studies[d$study_id,]$year)

cols = unique(c(comps$n, comps$n_backup, comps$events, comps$cont, comps$ci1, comps$ci2))
if(any(!is.na(cols) & !cols %in% colnames(d))) {
   stop(paste('the following columns are missing from the data:', paste(cols[!is.na(cols) & !cols %in% colnames(d)], collapse=', ')))
}
comps = subset(comps, use==1)
comps$TE = NA; comps$SE = NA; comps$n.seq = NA; comps$n.co = NA

There are 57 studies with a total of 7677520 participants.

Number participants

options(scipen=999)
breaks = 10^(1:10)
ggplot(studies, aes(x=paste(study_id, author, year), y=as.integer(participants_total))) + geom_bar(stat='identity') + scale_y_log10(breaks=breaks, labels=breaks) + geom_text(aes(label=as.integer(participants_total)), size=3, color='white', vjust=0.5, hjust=1.2) + theme_classic() + annotation_logticks(sides='b') + theme(panel.grid.minor=element_blank()) + coord_flip() + ylab('participants') + xlab('')