knitr::opts_chunk$set(warning=F, mesage=F, dpi=200, fig.path='figures/', dev=c('png', 'svg'))
library(meta)
library(metasens)
library(ggplot2)
library(dplyr)
library(tidyr)

Z = qnorm(0.975) * 2

wide_test = function(data, method, title='', vac_c, vac_v, n, n_backup, events, cont, ci1, ci2, funnel=F, subgroup=NULL) {
   if(!(n %in% colnames(data))) stop(paste(n, 'not in columns'))
   if(!(n_backup %in% colnames(data))) stop(paste(n_backup, 'not in columns'))
   
   d <- data %>% mutate(id=paste(study_id, split), N=as.integer(ifelse(is.na(!!sym(n)), !!sym(n_backup), !!sym(n))))
   
   if(method=='metabin') {
      if(!events %in% colnames(data)) stop(paste(events, 'not in columns'))
      d = d %>%
         filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(!!sym(events)))
      ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
      if(nrow(ids)<1) return('no results')
      
      d = d %>% filter(id %in% ids$id) %>%
         group_by(study_id, split, vac_mode, author, year) %>%
         summarize(n=sum(N, na.rm=T), e=sum(as.integer(!!sym(events)), na.rm=T), age=mean(age), .groups='drop') %>%
         pivot_wider(names_from=vac_mode, values_from=c(n, e, age), names_glue="{.value}.{vac_mode}")# %>%
         # filter(!is.na(n.CO) & !is.na(n.SEQ) & !is.na(e.CO) & !is.na(e.SEQ))
      if(nrow(d) < 1) return('no results')
       
      # p = ggplot(d, aes(x=e.CO/n.CO, y=e.SEQ/n.SEQ)) + geom_point() + geom_label(aes(label=study_id)) + geom_abline(slope=1, intercept=0, color='#00000055')
      # print(p)
      
      m = metabin(event.e=e.CO, n.e=n.CO, event.c=e.SEQ, n.c=n.SEQ, studlab=paste(author, year, split), data=d, subset=!is.na(n.CO) & !is.na(n.SEQ) & !is.na(e.CO) & !is.na(e.SEQ), subgroup=subgroup, method='Inverse')
      # return(list(TE=m$TE.random, SE=m$seTE.random))
      
   } else if(method=='metacont') {
      if(!cont %in% colnames(data)) stop(paste(cont, 'not in columns'))
      if(!ci1 %in% colnames(data)) stop(paste(ci1, 'not in columns'))
      if(!ci2 %in% colnames(data)) stop(paste(ci2, 'not in columns'))
      d <- d %>%
         filter(!!sym(vac_c)==vac_v & !is.na(N) & !is.na(!!sym(cont)) & !is.na(!!sym(ci1)) & !is.na(!!sym(ci2)))
      
      ids = d %>% group_by(id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0 & !is.na(seq) & !is.na(co))
      if(nrow(ids)<1) return('no results')

      # print(ids)
      d <- d %>%
         filter(id %in% ids$id) %>% mutate(sd=(as.numeric(!!sym(ci2))-as.numeric(!!sym(ci1)))/Z) %>%
         group_by(study_id, split, vac_mode, author, year) %>%
         summarize(n=sum(N, na.rm=T),
                   cont=sum(N*as.numeric(!!sym(cont)))/sum(N, na.rm=T),
                   SD=sum(N*sd)/sum(N, na.rm=T), .groups='drop')
      # print(d)
      d = d %>%
         pivot_wider(names_from=vac_mode, values_from=c(n, cont, SD), names_glue="{.value}.{vac_mode}")
      if(nrow(d) < 1 | all(is.na(d$n.CO)) | all(is.na(d$n.SEQ))) return('no results')
      # print(d)
      # if(nrow(d) > 2) {
      #    p = ggplot(d, aes(x=cont.CO, y=cont.SEQ)) + geom_point() + geom_errorbarh(aes(xmin=cont.CO-SD.CO, xmax=cont.CO+SD.CO), width=.2) + geom_errorbar(aes(ymin=cont.SEQ-SD.SEQ, ymax=cont.SEQ+SD.SEQ)) + geom_label(aes(label=study_id)) + geom_density_2d_filled(alpha=0.1) + geom_histogram(aes(y=..density..), alpha=0.5) + geom_histogram(aes(x=..density..), alpha=0.5) + geom_abline(slope=1, intercept=0, color='#00000055')
      #    print(p)
      # }
      # 
      # if(!is.na(cont) & 'gmt_bl' %in% cont) {
      #    p = ggplot(d, aes(x=cont.CO, y=cont.SEQ)) + geom_density_2d(alpha=0.1) + geom_histogram(aes(y=..density..), alpha=0.5) + geom_point()  + geom_abline(slope=1, intercept=0, color='#00000055')
      #    print(p)
      # }
      # 
      # if(!is.na(cont) & 'gmfr' %in% cont) {
      #    p = ggplot(d, aes(x=cont.CO)) + geom_density(alpha=0.1) + geom_histogram(aes(y=..density..), alpha=0.5) + geom_point()
      #    print(p)
      # }
      
      m = metacont(mean.e=cont.CO, n.e=n.CO, sd.e=SD.CO, mean.c=cont.SEQ, n.c=n.SEQ, sd.c=SD.SEQ, studlab=paste(author, year, split), data=d, sm='ROM')#, subset=!is.na(n.CO) & !is.na(n.SEQ) & !is.na(cont.CO) & !is.na(cont.SEQ))
   } else {
      stop(paste0('undefined method "', method, '"'))
   }
   forest(m, test.overall.random=T, lab.e='CO', lab.c='SEQ')
   if(funnel) {
      funnel(m, random=T)
      metabias(m, method.bias='Egger', k.min=6)
      metabias(m, method.bias='Begg', k.min=6)
   }
   return(m)
}

datasource = 'data/v.1.5_26.03.2025_data_extraction_samuel_CT.xlsx'

Synopsis

Meta analysis of Covid vaccination effectiveness with other co-vaccinations.

Methods

Statistical analysis was carried out using the meta (v8.0.2) package (Balduzzi, RmĂĽcker, and Schwarzer 2019) in the R version 4.4.2 (2024-10-31) programming language (R Core Team 2021).

Meta-analysis of proportions was pooled by fitting a random intercept logistic regression model with the metaprop function to logit transformed proportions in order to include valid estimates for studies with very few or no events. Study estimates are shown with computed Clopper-Pearson 95% confidence intervals. The same pooled estimates were conducted for subgroups and tested by \(\chi^2\)-test for significant pairwise differences.

Heterogeneity was assessed by estimating the maximum-likelihood of \(\tau^2\) and quantified with the \(I^2\) index.

Groupwise comparisons of studies were analyzed using odds ratios between treatment and control group with the metabin function using a random effects model for the pooled odds ratio using inverse variance weighting (instead of the Mantel-Haenszel method (Mantel and Haenszel 1959)). Heterogeneity was assessed using a restricted maximum-likelihood estimator of \(\tau^2\).

Publication bias was assessed using a funnelplot of the logit transformed prevalence and inverse variance and tested using the metabias function with linear regression test (Egger et al. 1997) and rank correlation test (Begg and Mazumdar 1994) for asymmetry.

Study Summaries

d = readxl::read_xlsx(datasource, sheet='main sheet', na='N/A', skip=5)
ids = d %>% group_by(study_id) %>% summarize(seq=sum(vac_mode=='SEQ', na.rm=T), co=sum(vac_mode=='CO', na.rm=T)) %>% filter(seq>0 & co>0)
d = d %>% filter(study_id %in% ids$study_id & vac_mode %in% c('CO', 'SEQ'))
d$age = as.numeric(ifelse(is.na(d$age_median), d$age_mean, d$age_median))

for(col in grep('(gmr|gmfr|gmt|spr|scr|ae)_', names(d), value=T)){d[,col] = as.numeric(unlist(d[,col]))}
d$gmfr_allb_n = d$gmfr_n_total_fu
d$gmfr_allb = rowMeans(d[,grep('gmfr_b_.*_mean_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci1 = rowMeans(d[,grep('gmfr_b_.*_ci1_fu', names(d), value=T)], na.rm=T)
d$gmfr_allb_ci2 = rowMeans(d[,grep('gmfr_b_.*_ci2_fu', names(d), value=T)], na.rm=T)

# d[,c('study_id', grep('gmfr_allb', names(d), value=T))]
# 
# wide_test(d, 'metacont', 'Total B', vac_c='vac_f', vac_v='F', n='gmfr_allb_n', n_backup='gmfr_n_total_fu', cont='gmfr_allb', ci1='gmfr_allb_ci1', ci2='gmfr_allb_ci2')



studies = subset(d, main==1)
rownames(studies) = as.character(studies$study_id)
d$author = studies[d$study_id,]$author
d$year = as.character(studies[d$study_id,]$year)
d$split[is.na(d$split)] = ''

options(scipen=999)
breaks = 10^(1:10)
ggplot(studies, aes(x=paste(study_id, author, year), y=as.integer(participants_total))) + geom_bar(stat='identity') + scale_y_log10(breaks=breaks, labels=breaks) + geom_text(aes(label=as.integer(participants_total)), size=3, color='white', vjust=0.5, hjust=1.2) + theme_classic() + annotation_logticks(sides='b') + theme(panel.grid.minor=element_blank()) + coord_flip() + ylab('participants') + xlab('')

ggplot(studies, aes(x=paste(study_id, author, year))) + geom_point(aes(y=as.integer(age_mean), color='mean'), stat='identity') + geom_point(aes(y=as.integer(age_median), color='median'), stat='identity') + coord_flip() + scale_color_manual(values=c('mean'='skyblue', 'median'='blue')) + xlab('') + ylab('age')

ggplot(studies, aes(x=paste(study_id, author, year))) + geom_point(aes(y=age, color='mean'), stat='identity') + coord_flip() + ggtitle('Age (median OR mean if N/A)') + xlab('')

ggplot(studies, aes(x=age)) + geom_density() + geom_histogram(aes(y=..density..), alpha=0.5)

d.age = d %>% mutate(age=as.numeric(ifelse(is.na(age_median), age_mean, age_median))) %>% group_by(study_id, vac_mode) %>% summarize(age=mean(age)) %>% pivot_wider(names_from=vac_mode, values_from=c(age), names_glue="{.value}.{vac_mode}")

ggplot(d.age, aes(x=age.CO, y=age.SEQ)) + geom_point() + geom_label(aes(label=as.integer(study_id)))

comps = readxl::read_xlsx(datasource, sheet='comparisons')
comps = subset(comps, use==1)

# d3 = readxl::read_xlsx(datasource, sheet=2, na='N/A', skip=1)

There are 57 studies with a total of 7677520 participants.

Primary Outcomes

# ggplot(d) + geom_point(aes(x=as.numeric(gmt_h1n1_mean_bl), y=as.numeric(gmt_h1n1_mean_fu))) + coord_trans('log10', 'log10')

# ggplot(d, aes(x=as.numeric(gmr_h1n1_mean_fu), y=as.numeric(gmt_h1n1_mean_fu)/as.numeric(gmt_h1n1_mean_bl))) + geom_point() + geom_text(aes(label=paste(study_id, split))) + coord_trans('log10', 'log10')

cols = unique(c(comps$n, comps$n_backup, comps$events, comps$cont, comps$ci1, comps$ci2))
if(any(!is.na(cols) & !cols %in% colnames(d))) {
   stop(paste('the following columns are missing from the data:', paste(cols[!is.na(cols) & !cols %in% colnames(d)], collapse=', ')))
}
comps$TE = NA; comps$SE = NA; comps$n.seq = NA; comps$n.co = NA
for(vac in unique(comps$vaccination)) {
   cat(paste('\n\n##', vac), '\n\n')
   for(ab in unique(comps[comps$vaccination==vac,]$antibody)) {
      gmt_bl = subset(comps, vaccination==vac & antibody==ab & timepoint=='Baseline' & outcome=='GMT')
      gmt_fu = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMT')
      gmfr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMFR')
      gmr = subset(comps, vaccination==vac & antibody==ab & timepoint=='Follow-up' & outcome=='GMR')
      if(nrow(gmt_bl)==1 & nrow(gmt_fu)==1 & nrow(gmfr)==1 & nrow(gmr)==1){
         d[,gmt_bl$n[1]] = as.numeric(unlist(d[,gmt_bl$n[1]])); d[,gmt_fu$n[1]] = as.numeric(unlist(d[,gmt_fu$n[1]])); d[,gmfr$n[1]] = as.numeric(unlist(d[,gmfr$n[1]]))
         d[,gmt_bl$n_backup[1]] = as.numeric(unlist(d[,gmt_bl$n_backup[1]])); d[,gmt_fu$n_backup[1]] = as.numeric(unlist(d[,gmt_fu$n_backup[1]])); d[,gmfr$n_backup[1]] = as.numeric(unlist(d[,gmfr$n_backup[1]]))
         d[,gmt_bl$cont[1]] = as.numeric(unlist(d[,gmt_bl$cont[1]])); d[,gmt_bl$ci1[1]] = as.numeric(unlist(d[,gmt_bl$ci1[1]])); d[,gmt_bl$ci2[1]] = as.numeric(unlist(d[,gmt_bl$ci2[1]]))
         d[,gmt_fu$cont[1]] = as.numeric(unlist(d[,gmt_fu$cont[1]])); d[,gmt_fu$ci1[1]] = as.numeric(unlist(d[,gmt_fu$ci1[1]])); d[,gmt_fu$ci2[1]] = as.numeric(unlist(d[,gmt_fu$ci2[1]]))
         d[,gmr$cont[1]] = as.numeric(unlist(d[,gmr$cont[1]])); d[,gmr$ci1[1]] = as.numeric(unlist(d[,gmr$ci1[1]])); d[,gmr$ci2[1]] = as.numeric(unlist(d[,gmr$ci2[1]]))
         d[,gmfr$cont[1]] = as.numeric(unlist(d[,gmfr$cont[1]])); d[,gmfr$ci1[1]] = as.numeric(unlist(d[,gmfr$ci1[1]])); d[,gmfr$ci2[1]] = as.numeric(unlist(d[,gmfr$ci2[1]]))
         
         sel = is.na(is.na(d[,gmt_bl$n[1]]) & !is.na(d[,gmt_bl$n_backup[1]]))
         d[sel, gmt_bl$n[1]] = d[sel, gmt_bl$n_backup[1]]
         sel = is.na(is.na(d[,gmt_fu$n[1]]) & !is.na(d[,gmt_fu$n_backup[1]]))
         d[sel, gmt_fu$n[1]] = d[sel, gmt_fu$n_backup[1]]
         
         d$temp_gmfr = as.numeric(unlist(d[,gmfr$cont[1]])); d$temp_gmr = as.numeric(unlist(d[,gmr$cont[1]])); d$temp_gmt_bl = as.numeric(unlist(d[,gmt_bl$cont[1]])); d$temp_gmt_fu = as.numeric(unlist(d[,gmt_fu$cont[1]]))
         # p = ggplot(d) + geom_point(aes(x=temp_gmfr, y=temp_gmr)) + coord_trans('log10', 'log10') + ggtitle('Correlation of GMFR vs GMR', ab)
         # print(p)
         # p = ggplot(d) + geom_point(aes(x=temp_gmfr, y=temp_gmt_fu/temp_gmt_bl)) + coord_trans('log10', 'log10') + ggtitle('Correlation of GMFR vs GMT_fu/GMT_bl', ab)
         # print(p)
         
         sel = d[,gmt_bl$n[1]] == d[,gmt_fu$n[1]]
         sel[is.na(sel)] = F
         sel = is.na(d[,gmfr$cont[1]]) & !is.na(d[,gmt_bl$cont[1]]) & !is.na(d[,gmt_fu$cont[1]]) & sel
         d[sel, gmfr$cont[1]] = as.numeric(unlist(d[sel, gmt_fu$cont[1]]))/as.numeric(unlist(d[sel, gmt_bl$cont[1]]))
         d[sel, gmfr$ci1[1]] = as.numeric(unlist(d[sel, gmt_fu$ci1[1]]))/as.numeric(unlist(d[sel, gmt_bl$ci1[1]]))
         d[sel, gmfr$ci2[1]] = as.numeric(unlist(d[sel, gmt_fu$ci2[1]]))/as.numeric(unlist(d[sel, gmt_bl$ci2[1]]))
         d[sel, gmfr$n[1]] = as.numeric(unlist(d[sel, gmt_bl$n[1]]))
      }
   }
   
   for(out in unique(comps[comps$vaccination==vac,]$outcome)) {
      cat(paste('\n\n###', out), '\n\n')
      for(tp in unique(comps[comps$vaccination==vac & comps$outcome==out,]$timepoint)) {
         cat(paste('\n\n####', tp), '\n\n')
         for(ab in unique(comps[comps$vaccination==vac & comps$outcome==out & comps$timepoint==tp,]$antibody)) {
            cat(paste('\n\n#####', ab, '\n\n'))
            i = which(comps$vaccination==vac & comps$outcome==out & comps$timepoint==tp & comps$antibody==ab)
            if(length(i) != 1) next
         
            res = wide_test(d, method=comps$method[i], vac_c=comps$vac_c[i], vac_v=comps$vac_v[i], n=comps$n[i], n_backup=comps$n_backup[i], events=comps$events[i], cont=comps$cont[i], ci1=comps$ci1[i], ci2=comps$ci2[i])
            # cop = copas(res)
            # plot(cop)
            if(is.character(res)) {cat('\nNO DATA\n\n'); next}
            comps$TE[i]=res$TE.random; comps$SE[i]=res$seTE.random; comps$n.seq[i]=sum(res$n.c); comps$n.co[i]=sum(res$n.e)
         }
         cat('\n\n##### Summary\n\n')
         m = metagen(TE=TE, seTE=SE, n.c=n.seq, n.e=n.co, studlab=antibody, data=comps, subset=vaccination==vac & outcome==out & timepoint==tp, sm="RR", label.e='CO', label.c='SEQ')
         forest(m)
      }
   }
}

Influenza

SCR

Follow-up

H1N1