knitr::opts_chunk$set(warning=F, mesage=F, dpi=200, fig.path='figures/', dev=c('png', 'svg'))
library(meta)
library(metasens)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)

Z = qnorm(0.975) * 2

wide_bin_test = function(data, title='', vac_c, vac_v, n, n_backup, events, funnel=F, subgroup_var=NULL) {
   d <- data %>% mutate(id=study_id,
                        N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))),
                        e=as.integer(!!sym(events)),
                        subgroup=rep('', nrow(data))) %>%
      filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(e)) 
      
   if(!is.null(subgroup_var)){ d = d %>% mutate(subgroup = !!sym(subgroup_var), subgroup = factor(ifelse(is.na(subgroup), 'undetermined', as.character(subgroup))), id = paste(id, as.character(subgroup)))}
   
   ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T), .groups='drop') %>% filter(seq>0 & co>0)
   if(nrow(ids)<1) return('no results')
   
   d = d %>% filter(id %in% ids$id) %>%
      group_by(id, subgroup, author, year, vac_mode) %>%
      summarize(N=sum(N, na.rm=T), e=sum(e, na.rm=T)) %>%
      pivot_wider(id_cols=c(id, author, year, subgroup), names_from=vac_mode, values_from=c(N, e), names_glue="{.value}.{vac_mode}") %>%
      arrange(year)
   
   if(nrow(d) < 1) return('no results')
   
   if(!is.null(subgroup_var)){
      m = metabin(event.e=e.CO, n.e=N.CO, event.c=e.SEQ, n.c=N.SEQ, studlab=paste(author, year), data=d, subset=!is.na(N.CO) & !is.na(N.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), subgroup=subgroup, method='Inverse')
   } else {
      m = metabin(event.e=e.CO, n.e=N.CO, event.c=e.SEQ, n.c=N.SEQ, studlab=paste(author, year), data=d, subset=!is.na(N.CO) & !is.na(N.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), method='Inverse')
   }

   forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
   if(funnel) {
      funnel(m, random=T)
      metabias(m, method.bias='Egger', k.min=6)
      metabias(m, method.bias='Begg', k.min=6)
   }
   return(m)
}
      
wide_cont_test = function(data, title='', vac_c, vac_v, n, n_backup, cont, ci1, ci2, funnel=F, subgroup_var=NULL) {
   d <- data %>% mutate(id=study_id,
                        N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))),
                        mean=as.numeric(!!sym(cont)),
                        sd=ifelse(!!sym(ci2)==-1, !!sym(ci1), (as.numeric(!!sym(ci2))-as.numeric(!!sym(ci1)))/Z),
                        subgroup=rep('', nrow(data))) %>%
      filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(mean) & !is.na(sd))
   
   if(!is.null(subgroup_var)){ d = d %>% mutate(subgroup = !!sym(subgroup_var), subgroup = factor(as.character(subgroup)), id = paste(id, as.character(subgroup))) %>% filter(!is.na(subgroup))}
   
   ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0 & !is.na(seq) & !is.na(co))
   if(nrow(ids)<1) return('no results')

   d <- d %>%
      filter(id %in% ids$id) %>%
      group_by(id, subgroup, author, year, vac_mode) %>%
      summarize(mean=sum(N*mean, na.rm=T)/sum(N, na.rm=T),
                sd=sum(N*sd, na.rm=T)/sum(N, na.rm=T), N=sum(N, na.rm=T)) %>%
      pivot_wider(id_cols=c(id, author, year, subgroup), names_from=vac_mode, values_from=c(N, mean, sd), names_glue="{.value}.{vac_mode}") %>%
      arrange(year)
   
   if(nrow(d) < 1 | all(is.na(d$N.CO)) | all(is.na(d$N.SEQ))) return('no results')
   
   if(!is.null(subgroup_var)){
      m = metacont(mean.e=mean.CO, n.e=N.CO, sd.e=sd.CO, mean.c=mean.SEQ, n.c=N.SEQ, sd.c=sd.SEQ, studlab=paste(author, year), data=d, sm='ROM', subgroup=subgroup)
   } else {
      m = metacont(mean.e=mean.CO, n.e=N.CO, sd.e=sd.CO, mean.c=mean.SEQ, n.c=N.SEQ, sd.c=sd.SEQ, studlab=paste(author, year), data=d, sm='ROM')
   }

   forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
   if(funnel) {
      funnel(m, random=T)
      metabias(m, method.bias='Egger', k.min=6)
      metabias(m, method.bias='Begg', k.min=6)
   }
   return(m)
}

datasource = 'data/v.2.3_08.07.2025_data_extraction_samuel.xlsx'

Synopsis

Meta analysis of Covid vaccination effectiveness with other co-vaccinations.

Methods

Short version

Meta-analyses were conducted with the meta package in R using random-effects models (metaprop, metabin) with logit transformation and Clopper–Pearson 95% CIs. Heterogeneity was quantified with I², subgroup differences tested by χ², and publication bias assessed with funnel plots plus Egger’s and Begg’s tests.

Long version

Statistical analysis was carried out using the meta (v8.2.1) package (Balduzzi et al. 2019) in the R version 4.5.1 (2025-06-13) programming language (R Core Team 2021).

For meta-analysis of proportions, pooled estimates were calculated using the metaprop function, applying a random-intercept logistic regression model to logit-transformed proportions and reported as relative risk (RR). Study-level estimates are reported with Clopper–Pearson 95% confidence intervals. The same pooling procedure was applied to predefined subgroups, and subgroup differences were evaluated using a \(\chi^2\) test for heterogeneity between subgroups.

For comparisons between treatment and control groups, relative risk (RR) was calculated using the metabin function. A random-effects model with inverse-variance weighting was applied (in preference to the Mantel–Haenszel method; (Mantel and Haenszel 1959)) to account for between-study variability. Given the diversity in study characteristics—particularly in population age structures, vaccine types, and study designs—the random-effects model was chosen over a fixed-effect approach, which would otherwise be dominated by studies with very large sample sizes. In some cases, studies with zero events in one group were excluded from the comparison, potentially introducing a selection bias that may overestimate the pooled OR for the group with more consistently reported events.

Between-study heterogeneity was assessed by estimating \(\tau^2\) using restricted maximum likelihood (REML) and quantified with the \(I^2\) statistic, with higher values indicating greater heterogeneity.

For publication bias, funnel plots of logit-transformed proportions against their inverse standard errors were visually inspected. Asymmetry was formally assessed using both a linear regression test (Egger et al. 1997) and a rank correlation test (Begg and Mazumdar 1994) via the metabias function.

Where possible, geometric mean fold rises (GMFR) were either extracted directly from the studies or calculated as the ratio of follow-up to baseline geometric mean titres (GMTs). When 95% confidence intervals for GMTs were available, standard deviations were derived on the log scale and propagated to estimate uncertainty in GMFR.

Studies were partitioned for subgroup analysis by age into ‘young’ with upper age range below 65 years or mean/median age plus one standard deviation below 60 years, ‘aged’ with lower age range above 65 years or mean/median age minus one standard deviation above 70 years and ‘other’ for all in between or missing data. Similarly, studies were stratified by sex into ‘male overrepresented’ with a female participant ratio below 45%, ‘female overrepresented’ with a ratio above 55%, ‘neutral’ in between.

Study Summaries

d = readxl::read_excel(datasource, sheet='main sheet', na='N/A', skip=5) %>%
   mutate(study_id = as.character(as.integer(study_id)),
          year = as.integer(year),
          main = as.integer(main),
          study_design = as.integer(study_design),
          valency = as.integer(valency),
          # LAIV = as.integer(LAIV),
          production_type = as.integer(production_type))
ids = d %>% group_by(study_id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
d = d %>% filter(study_id %in% ids$study_id & vac_mode %in% c('CO', 'SEQ'))

for(col in grep('(gmr|gmfr|gmt|spr|scr|ae|age|participants|sex|n)_', names(d), value=T)){d[,col] = as.numeric(unlist(d[,col]))}

d = d %>% 
   mutate(age = ifelse(is.na(age_median), age_mean, age_median),
          sex_female_ratio = round(sex_female/participants_total_groups, 2),
          rct = factor(ifelse(!is.na(study_design) & study_design=='4', 'RCT', 'non-RCT')),
          valency = factor(ifelse(valency==1, 'quadrivalent', ifelse(valency==2, 'trivalent', NA))),
          LAIV = factor(ifelse(LAIV==0, 'live vaccine', ifelse(LAIV==1, 'non-live vaccine', NA))),
          adjuvans = factor(ifelse(adjuvans=='0', 'none', ifelse(adjuvans=='a', 'with adjuvans', NA))),
          production_type = factor(ifelse(production_type==1, 'egg based', ifelse(production_type==2, 'recombinant', ifelse(production_type==3, 'cell based', NA))))) %>%
   mutate(age_dic = factor(ifelse(!is.na(age_range_min) & age_range_max<65 & age_range_min<60 | !is.na(age_sd) & age+age_sd<60 | age<60, 'young', ifelse(!is.na(age_range_min) & age_range_min>=65 & age_range_max>70 | !is.na(age_sd) & age-age_sd>70 | age>70, 'aged', 'other')), levels=c('young', 'aged', 'other')))

d$sex_female_ratio[is.na(d$sex_female_ratio)] = d$sex_rate_female[is.na(d$sex_female_ratio)]
d = d %>% mutate(sex_dic = factor(cut(sex_female_ratio, c(0, .45, .55, 1), labels=c('male', 'neutral', 'female')), levels=c('male', 'neutral', 'female', 'unknown')))

d$age_dic[is.na(d$age_dic)] = 'other'
d$sex_dic[is.na(d$sex_dic)] = 'unknown'

comps = readxl::read_excel(datasource, sheet='comparisons')

# recalculating GMFR values from GMT fu/bl if GMFR not available
for(vac in unique(comps$vaccination)) {
   for(ab in unique(comps[comps$vaccination==vac,]$antibody)) {
      gmt_bl = subset(comps, vaccination==vac & antibody==ab & timepoint=='Baseline' & outcome=='GMT')
      gmt_fu = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMT')
      gmfr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMFR')
      if(nrow(gmt_bl)!=1 | nrow(gmt_fu)!=1 | nrow(gmfr)!=1){
         # print(paste('GMFR recalculation skipping', vac, ab))
         next
      }
      sel = is.na(d[,gmfr$cont[1]]) & !is.na(d[,gmt_bl$cont[1]]) & !is.na(d[,gmt_fu$cont[1]])
      d[is.na(d[,gmt_bl$n[1]]), gmt_bl$n[1]] = d[is.na(d[,gmt_bl$n[1]]), gmt_bl$n_backup[1]]
      d[is.na(d[,gmt_fu$n[1]]), gmt_fu$n[1]] = d[is.na(d[,gmt_fu$n[1]]), gmt_fu$n_backup[1]]
      d[is.na(d[,gmfr$n[1]]), gmfr$n[1]] = d[is.na(d[,gmfr$n[1]]), gmfr$n_backup[1]]
      d[sel, gmfr$n[1]] = as.integer(unlist(d[sel, gmt_fu$n[1]]))
      log_gmfr = log(as.numeric(unlist(d[sel, gmt_fu$cont[1]])))-log(as.numeric(unlist(d[sel, gmt_bl$cont[1]])))
      d[sel, gmfr$cont[1]] = exp(log_gmfr)
      bl_ci1 = as.numeric(unlist(d[sel, gmt_bl$ci1[1]]))
      bl_ci2 = as.numeric(unlist(d[sel, gmt_bl$ci2[1]]))
      fu_ci1 = as.numeric(unlist(d[sel, gmt_fu$ci1[1]]))
      fu_ci2 = as.numeric(unlist(d[sel, gmt_fu$ci2[1]]))
      se = sqrt( ((log(fu_ci2)-log(fu_ci1))/Z)^2 + ((log(bl_ci2)-log(bl_ci1))/Z)^2 )
      d[sel, gmfr$ci1[1]] = exp(log_gmfr - 1.96 * se)
      d[sel, gmfr$ci2[1]] = exp(log_gmfr + 1.96 * se)
   }
}

d$gmfr_allb_n = d$gmfr_n_total_fu
d$gmfr_allb = rowMeans(d[,grep('gmfr_b_.*_mean_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci1 = rowMeans(d[,grep('gmfr_b_.*_ci1_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci2 = rowMeans(d[,grep('gmfr_b_.*_ci2_fu', names(d), value=T)], na.rm=T)

studies = subset(d, main==1) %>% arrange(year)
studies_mini = subset(studies, select=c(author, year, jounral, participants_total, center_n, country, sex_female_ratio, age, age_sd))
writexl::write_xlsx(studies_mini, 'summary_table.xlsx')
DT::datatable(studies_mini, filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
rownames(studies) = as.character(studies$study_id)
d$author = studies[d$study_id,]$author
d$rct = studies[d$study_id,]$rct
d$year = as.character(studies[d$study_id,]$year)
d = d %>%
  mutate(study_label = paste(study_id, author, year),
         study_label = factor(study_label, levels = unique(study_label[order(as.integer(year))])) ) %>%
   arrange(year)

cols = unique(c(comps$n, comps$n_backup, comps$events, comps$cont, comps$ci1, comps$ci2))
if(any(!is.na(cols) & !cols %in% colnames(d))) {
   stop(paste('the following columns are missing from the data:', paste(cols[!is.na(cols) & !cols %in% colnames(d)], collapse=', ')))
}
comps = subset(comps, use==1)
comps$TE = NA; comps$SE = NA; comps$n.seq = NA; comps$n.co = NA

There are 57 studies with a total of 7677520 participants.

Number participants

options(scipen=999)
breaks = 10^(1:10)
ggplot(studies, aes(x=paste(study_id, author, year), y=as.integer(participants_total))) + geom_bar(stat='identity') + scale_y_log10(breaks=breaks, labels=breaks) + geom_text(aes(label=as.integer(participants_total)), size=3, color='white', vjust=0.5, hjust=1.2) + theme_classic() + annotation_logticks(sides='b') + theme(panel.grid.minor=element_blank()) + coord_flip() + ylab('participants') + xlab('')