# PRELIMINARIES ---------------------------------------------------------------

library(here)
setwd(here())

# some helper fns
source("helper.R")

# data-wrangling packages
library(dplyr)
library(ggplot2)
library(data.table)
library(tidyverse)
# meta-analysis packages
library(metafor)
library(robumeta)
library(MetaUtility)
library(PublicationBias)
# other
library(xtable)
library(testthat)

# this is only for use with the function "my_ggsave"
# lets me auto-save figures to dir where I'm writing the slide deck
overleaf.dir = "~/Dropbox/Apps/Overleaf/2021 PHS meta-analysis slides/figures"
results.dir = here()


# ENTER DATA FROM APPLETON META-ANALYSIS ---------------------------------------------------------------

# http://ajcn.nutrition.org/content/91/3/757/F1.large.jpg

# point estimates from forest plot
SMD.B = c(1.39, 0.09, 1.06, 0.60, 0.06, 
          0.27, 0.09, 0.53, -0.12, -0.15,
          0.81, -0.20, 1.91, 0.34, -0.23, 
          0.09, 0.69, 0.53, 1.74, 1.06, 
          0.45, -0.24, 0.60, 0.11, 0.62, 
          1.02, -0.56, 0.05, 0.62, 0.34,
          -0.10, 1.01, 0.06, 0.00, 0.07,
          0.00, -0.02, 0.11) 

# CI upper bound from forest plot
hi.B = c(2.20, 0.51, 2.01, 1.28, 0.71, 
         0.94, 0.59, 1.05, 0.40, 0.26, 
         1.50, -0.02, 2.82, 1.11, 0.22, 
         0.56, 1.27, 1.09, 2.63, 1.93,
         1.51, 0.19, 1.17, 0.69, 1.75, 
         2.08, -0.04, 0.32, 1.26, 1.12,
         0.17, 1.71, 0.33, 0.27, 0.61,
         0.45, 0.43, 0.46)

# labels
authors = c( "Stoll (1999)",
             "Fenton (2001)",
             "Nemets (2002)",
             "Peet (2002a)",
             "Peet (2002b)",
             "Peet (2002c)",
             "Peet (2002d)",
             "Peet (2002e)",
             "Peet (2002f)",
             "Llorente (2003)",
             "Marangell (2003)",
             "Ness (2003)",
             "Su (2003)",
             "Zanarini (2003)",
             "Silvers (2005)",
             "Fontani (2005)",
             "Frangou (2006a)",
             "Frangou (2006b)",
             "Nemets (2006)",
             "Buydens-Branchey (2006)",
             "Frangou (2007)",
             "Grenyer (2007)",
             "Hallahan (2007)",
             "Chiu (2008)",
             "da Silva (2008a)",
             "da Silva (2008b)",
             "Freeman (2008)",
             "Freund-Levi (2008)",
             "Jazayeri (2008)",
             "Rees (2008)",
             "Rogers (2008)",
             "Su (2008)",
             "van de Rest (2008a)",
             "van de Rest (2008b)",
             "Antypa (2009)",
             "Doornbos (2009a)",
             "Doornbos (2009b)",
             "Lucas (2009)"
)


# effect sizes
ES.B = scrape_meta( est = SMD.B, hi = hi.B, type="raw" )

# put in dataset
d = data.frame( study = authors,
                yi = ES.B$yi,
                vi = ES.B$vyi )

# calculate approximate p-values for each study
d$pval = 2 * ( 1 - pnorm( abs( d$yi/sqrt(d$vi) ) ) )

# order studies in descending order of point estimate
# this is just for forest plot prettiness later
d = d[ order(-d$yi), ]


# ANALYZE ---------------------------------------------------------------

# ~ Simple but wrong ---------------------------------------------------------------

# simple average
# 0.38
mean(d$yi)

# vote-counting based on p-values
mean( d$pval < 0.05 & d$yi > 0 ); sum( d$pval < 0.05 & d$yi > 0 )
mean( d$pval < 0.05 & d$yi < 0 ); sum( d$pval < 0.05 & d$yi < 0 )


# ~ Fixed-effects ---------------------------------------------------------------

( modFE = rma.uni( yi = d$yi,
                   vi = d$vi,
                   method="FE",
                   slab = d$study ) )


# ~ Random-effects ---------------------------------------------------------------
( modRE = rma.uni( yi = d$yi,
                   vi = d$vi,
                   method = "REML",
                   knha = TRUE,  # **note use of KNHA adjustment for SEs
                   
                   # these last 2 arguments are only to help forest()
                   #  called below to label things nicely
                   measure = "SMD",
                   slab = d$study ) )

# show how to extract the good stuff
modRE$b  # pooled point estimate
modRE$ci.lb; modRE$ci.ub  # and its CI
modRE$pval  # and its p-value
sqrt(modRE$vb) # and its standard error
sqrt(modRE$tau2)  # tau (SD of population effects)
modRE$se.tau2  # SE of tau

# CI for tau estimate
tau_CI(modRE)


# # simpler call for slides:
# library(metafor); library(MetaUtility)
# ( modRE = rma.uni( yi = d$yi, vi = d$vi, method = "REML", knha = TRUE ) )
# MetaUtility::tau_CI(modRE)

# prediction interval
pred_int(Mhat = modRE$b,
         Mhat.var = modRE$se^2,
         t2 = modRE$tau2,
         k = nrow(d) )

# proportion of effects SMD > 0.2
# bootstrapping takes a minute or so
prop_stronger( q = 0.2,
               tail = "above",
               dat = d,
               yi.name = "yi",
               vi.name = "vi" )

# proportion of effects SMD < -0.2
prop_stronger( q = -0.2,
               tail = "below",
               dat = d,
               yi.name = "yi",
               vi.name = "vi" )

# ~ Forest plot ---------------------------------------------------------------

forest(modRE,
       showweights = TRUE,
       header = TRUE)


my_baseplot_save( name = "omega_forest.pdf",
                  plot.call = 'forest(modRE,
                                showweights = TRUE,
                                header = TRUE)',
                  width = 7,
                  height = 8)


# ~ Distribution plot ---------------------------------------------------------------

# red line: fitted density of *population* effects from meta-analysis
# gray line: just a dumb density estimate from the *point estimates*
ggplot(data = data.frame(x = c(-2, 2)),
       aes(x)) +
  
  geom_vline(xintercept = 0,
             lwd = 1,
             color = "gray"
  ) +
  
  geom_vline(xintercept = modRE$b,
             lty = 2,
             lwd = 1,
             color = "red"
  ) +
  
  # estimated density from meta-analysis
  stat_function( fun = dnorm,
                 n = 101,
                 args = list( mean = modRE$b,
                              sd = sqrt(modRE$tau2) ),
                 lwd = 1.2,
                 color = "red") +
  
  # estimated density of estimates
  geom_density( data = d,
                aes(x = yi),
                adjust = 1.2 ) +
  
  ylab("") +
  scale_x_continuous( breaks = seq(-2, 2, 0.5)) +
  xlab("Standardized mean difference") +
  theme_minimal() +
  scale_y_continuous(breaks = NULL) +
  theme(text = element_text(size=20),
        axis.text.x = element_text(size=20))


my_ggsave( "omega_density.pdf",
           width = 8,
           height = 6)


# ~ Funnel plot ---------------------------------------------------------------

funnel(modRE)

my_baseplot_save( name = "omega_funnel.pdf",
                  plot.call = 'funnel(modRE)',
                  width = 6,
                  height = 5)


# significance funnel plot showing worst-case estimate
# as sensitivity analysis
significance_funnel(yi = d$yi,
                    vi = d$vi,
                    favor.positive = TRUE,
                    plot.pooled = TRUE)