baci-fry.r

# BACI-fry

# This example is based (loosely) on a consulting project from an 
# Independent Power Producer who was interested in monitoring the 
# effects of an in-stream hydroelectric project. 

# The response variable for the project was the minnow density at 
# different locations in the stream.

# The  monitoring design has the river divided into six segments of 
# which three are upstream of the diversion
# and three are downstream of the diversion. In each segment, several sites 
# have been located where minnow fry 
# congregate. In each of the sites, minnow traps are set for various lengths 
# of time. 

# At the end of the soaking period, the traps are removed and the number of 
# minnows are counted and classified by species.
# The counts are standardized to a common soaking time to adjust for the 
# different soak-time.  
# [This could be done directly in the analysis by using the soak-time as a 
# covariate, but the soak-time data was not available.]

# An initial pre-project monitoring study was run in 2000 to 2002. The 
# project became operational in late 2002, and
# post-project monitoring continued in 2003 and 2004.


# A nice slide presentation on lmer() is found at 
#  http://www.stat.wisc.edu/~bates/PotsdamGLMM/LMMD.pdf


library(brms) # for the bayesian fit

## Loading required package: Rcpp

## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').

## 
## Attaching package: 'brms'

## The following object is masked from 'package:stats':
## 
##     ar

library(ggplot2)
library(lmerTest)

## Loading required package: lme4

## Loading required package: Matrix

## 
## Attaching package: 'lme4'

## The following object is masked from 'package:brms':
## 
##     ngrps

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(emmeans)

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

library(plyr)

source("../../schwarz.functions.r")
#source('http://www.stat.sfu.ca/~cschwarz/Stat-Ecology-Datasets/schwarz.functions.r')
# Get the BACI power program
source("../baci-power.r")


# Read in the data
##***part001b;
# sink('baci-fry-R-001.txt', split=TRUE)
fry <- read.csv('baci-fry.csv', header=TRUE, as.is=TRUE, strip.white=TRUE)

fry$logfry       <- log(fry$Fry)

fry$Period       <- factor(fry$Period)
fry$Site         <- factor(fry$Site)
fry$SiteClass    <- factor(fry$SiteClass)
fry$Period       <- factor(fry$Period)
fry$YearF        <- factor(fry$Year)

fry[1:12,]

##     SiteClass Site Sample Year Period Fry   logfry YearF
## 1  Downstream    D      1 2000 Before 185 5.220356  2000
## 2  Downstream    E      1 2000 Before 258 5.552960  2000
## 3  Downstream    E      2 2000 Before 630 6.445720  2000
## 4  Downstream    E      3 2000 Before 116 4.753590  2000
## 5  Downstream    F      1 2000 Before 208 5.337538  2000
## 6  Downstream    F      2 2000 Before 261 5.564520  2000
## 7  Downstream    D      1 2001 Before 321 5.771441  2001
## 8  Downstream    E      1 2001 Before  65 4.174387  2001
## 9  Downstream    E      2 2001 Before 755 6.626718  2001
## 10 Downstream    F      1 2001 Before 476 6.165418  2001
## 11 Downstream    F      2 2001 Before 521 6.255750  2001
## 12 Downstream    E      1 2002 Before 198 5.288267  2002

##***part001e;
# sink()

str(fry)

## 'data.frame':    52 obs. of  8 variables:
##  $ SiteClass: Factor w/ 2 levels "Downstream","Upstream": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Site     : Factor w/ 6 levels "A","B","C","D",..: 4 5 5 5 6 6 4 5 5 6 ...
##  $ Sample   : int  1 1 2 3 1 2 1 1 2 1 ...
##  $ Year     : int  2000 2000 2000 2000 2000 2000 2001 2001 2001 2001 ...
##  $ Period   : Factor w/ 2 levels "After","Before": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Fry      : int  185 258 630 116 208 261 321 65 755 476 ...
##  $ logfry   : num  5.22 5.55 6.45 4.75 5.34 ...
##  $ YearF    : Factor w/ 5 levels "2000","2001",..: 1 1 1 1 1 1 2 2 2 2 ...

# look at the std dev to mean plot for the original and log() data 
# Compute the averages etc
# sink('baci-fry-R-020.txt', split=TRUE)
##***part020b;
mean.fry <- plyr::ddply(fry, c("Year","YearF","Site","SiteClass","Period"), function(x){
  n <- nrow(x)
  nmiss <- sum(is.na(x$Fry))
  mean.fry <- mean(x$Fry, na.rm=TRUE)
  sd.fry   <- sd(  x$Fry, na.rm=TRUE)
  mean.logfry <- mean(x$logfry, na.rm=TRUE)
  sd.logfry   <- sd  (x$logfry, na.rm=TRUE)
  res<- data.frame(n, nmiss, mean.fry, sd.fry, mean.logfry, sd.logfry)
  })
mean.fry[1:5,]

##   Year YearF Site  SiteClass Period n nmiss mean.fry    sd.fry mean.logfry
## 1 2000  2000    A   Upstream Before 2     0  25.5000  30.40559    2.618221
## 2 2000  2000    B   Upstream Before 2     0 306.5000 167.58431    5.644266
## 3 2000  2000    C   Upstream Before 2     0 124.5000 105.35891    4.602664
## 4 2000  2000    D Downstream Before 1     0 185.0000        NA    5.220356
## 5 2000  2000    E Downstream Before 3     0 334.6667 265.43800    5.584090
##   sd.logfry
## 1 1.7422073
## 2 0.5767497
## 3 0.9767137
## 4        NA
## 5 0.8464942

##***part020e;
# sink()


# Plot the std to mean ratios
##***part021b;
sdmeanplot <- ggplot(data=mean.fry, aes(x=mean.fry, y=sd.fry))+
  ggtitle("SD vs Mean - original data")+
  geom_point()
sdmeanplot

## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

##***part021e;
# ggsave(plot=sdmeanplot, file='baci-fry-R-300-diagnostic.png', h=4, w=6, units="in", dpi=300)


# Plot the std to mean ratios

##***part021logb;
sdmeanplotlog <- ggplot(data=mean.fry, aes(x=mean.logfry, y=sd.logfry))+
  ggtitle("SD vs Mean - logged data")+
  geom_point()
sdmeanplotlog

## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

##***part021loge;
# ggsave(plot=sdmeanplot, file='baci-fry-R-021b.png', h=4, w=6, units="in", dpi=300)


# Plot the mean log-fry over time by site
##***part025b;
meanplot <- ggplot(data=mean.fry, aes(x=Year, y=mean.logfry,
                  group=Site, color=SiteClass, shape=Site))+
  ggtitle("Changes in mean over time")+
  geom_point(position=position_dodge(w=0.2))+
  geom_line(position=position_dodge(w=0.2))+
  geom_vline(xintercept=-0.5+min(mean.fry$Year[as.character(mean.fry$Period) == "After"]))
meanplot

##***part025e;
# ggsave(plot=meanplot, file="baci-fry-R-025.png", h=4, w=6, units="in", dpi=300)


#####################################################################################
# The analysis of the mean(log(Fry))

# sink('baci-fry-R-200-type3.txt', split=TRUE)
##***part200b;
# Notice that we use the YearF rather than Year in the random statements
# Because we are analyzing the mean, the Site:Year term is the residual error
result.mean.lmer <- lmerTest::lmer(mean.logfry ~ Period + SiteClass + 
                      Period:SiteClass+
                      (1|YearF)+(1|Site),
                  data=mean.fry) 

##***part200e;
# sink()

##***part200anovab;
anova(result.mean.lmer, ddf="Kenward-Roger")

## Type III Analysis of Variance Table with Kenward-Roger's method
##                    Sum Sq  Mean Sq NumDF   DenDF F value Pr(>F)
## Period           0.043595 0.043595     1  2.9797  0.2987 0.6230
## SiteClass        0.232904 0.232904     1  4.0194  1.5957 0.2748
## Period:SiteClass 0.019698 0.019698     1 17.1098  0.1350 0.7179

##***part200anovae;



summary(result.mean.lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mean.logfry ~ Period + SiteClass + Period:SiteClass + (1 | YearF) +  
##     (1 | Site)
##    Data: mean.fry
## 
## REML criterion at convergence: 45.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.80986 -0.68657  0.04109  0.61043  1.58328 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.6239   0.7899  
##  YearF    (Intercept) 0.0606   0.2462  
##  Residual             0.1460   0.3820  
## Number of obs: 28, groups:  Site, 6; YearF, 5
## 
## Fixed effects:
##                                Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)                      5.4217     0.5188  5.7813  10.451 5.71e-05 ***
## PeriodBefore                     0.2023     0.3141  5.5010   0.644    0.545    
## SiteClassUpstream               -0.7823     0.6864  4.6649  -1.140    0.309    
## PeriodBefore:SiteClassUpstream  -0.1097     0.2979 17.2160  -0.368    0.717    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) PrdBfr StClsU
## PeriodBefor -0.370              
## StClssUpstr -0.671  0.139       
## PrdBfr:StCU  0.194 -0.515 -0.266

# sink('baci-fry-R-200-vc.txt', split=TRUE)
##***part200vcb;
# Extract the variance components
vc <- VarCorr(result.mean.lmer)
vc

##  Groups   Name        Std.Dev.
##  Site     (Intercept) 0.78987 
##  YearF    (Intercept) 0.24618 
##  Residual             0.38205

##***part200vce;
# sink()

# To extract the variance components for use in R, create a data frame
vc.df <- as.data.frame(vc)
YearSigma    <- vc.df[grep("YearF"   ,vc.df$grp),"sdcor"]
SiteSigma    <- vc.df[grep("Site"    ,vc.df$grp),"sdcor"]
ResidualSigma<- vc.df[grep("Residual",vc.df$grp),"sdcor"] 





# emmeans after a lm() fit
# sink('baci-fry-R-200LSM-SiteClass.txt', split=TRUE)
##***parts200LSM-SiteClassb;
result.mean.lmer.emmo.S <- emmeans::emmeans(result.mean.lmer, ~SiteClass)

## NOTE: Results may be misleading due to involvement in interactions

cat("\nEstimated marginal means for SiteClass \n\n")

## 
## Estimated marginal means for SiteClass

summary(result.mean.lmer.emmo.S)

##  SiteClass  emmean    SE  df lower.CL upper.CL
##  Downstream   5.52 0.483 4.5     4.24     6.81
##  Upstream     4.69 0.480 4.4     3.40     5.97
## 
## Results are averaged over the levels of: Period 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***parts200LSM-SiteClasse;
# sink()

# sink('baci-fry-R-200LSM-Period.txt', split=TRUE)
##***part200LSM-Periodb;
result.mean.lmer.emmo.P <- emmeans::emmeans(result.mean.lmer, ~Period)

## NOTE: Results may be misleading due to involvement in interactions

cat("\nEstimated marginal means for Period \n\n")

## 
## Estimated marginal means for Period

summary(result.mean.lmer.emmo.P)

##  Period emmean    SE   df lower.CL upper.CL
##  After    5.03 0.385 6.03     4.09     5.97
##  Before   5.18 0.365 5.39     4.26     6.10
## 
## Results are averaged over the levels of: SiteClass 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***part200LSM-Periode;
# sink()

# sink('baci-fry-R-200LSM-int.txt', split=TRUE)
##***part200LSM-intb;
result.mean.lmer.emmo.SP <- emmeans::emmeans(result.mean.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means \n\n")

## 
## Estimated marginal means

summary(result.mean.lmer.emmo.SP)

##  SiteClass  Period emmean    SE   df lower.CL upper.CL
##  Downstream After    5.42 0.519 5.73     4.14     6.71
##  Upstream   After    4.64 0.512 5.47     3.36     5.92
##  Downstream Before   5.62 0.497 4.98     4.34     6.90
##  Upstream   Before   4.73 0.494 4.87     3.45     6.01
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***part200LSM-inte;
# sink()

# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.mean.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]

##   Estimate Std. Error         df    t value   Pr(>|t|) 
## -0.1096870  0.2978614 17.2159813 -0.3682485  0.7171768

# sink("baci-fry-R-200baci.txt", split=TRUE)
##***part200bacib; 
# Estimate the BACI contrast along with a se
summary(contrast(result.mean.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)

##  contrast estimate    SE   df lower.CL upper.CL t.ratio p.value
##  baci        -0.11 0.299 17.1   -0.739     0.52  -0.367  0.7179
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***part200bacie;
# sink()




# Check the residuals etc
##***part200diagnosticb;
diagplot <- sf.autoplot.lmer(result.mean.lmer)

## Loading required package: broom.mixed

## Loading required package: grid

## Loading required package: gridExtra

## Loading required package: lattice

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

plot(diagplot)

##***part200diagnostice;
# ggsave(plot=diagplot, file='baci-fry-R-200-diagnostic.png', h=4, w=6, units="in", dpi=300)





#################################################################################
# The analysis of the individual values
# Results will differ slightly from answers on mean because design is unbalanced

# sink('baci-fry-R-300-type3.txt', split=TRUE)
##***part300b;
# Notice that we use the YearF variable in the random effects
result.lmer <- lmerTest::lmer(logfry ~ Period + SiteClass + Period:SiteClass +
                      (1|YearF)+ (1|Site)+(1|YearF:Site), data=fry)

## boundary (singular) fit: see help('isSingular')

##***part300e;
# sink()


##***part300anovab;
anova(result.lmer, ddf="Kenward-Roger")

## Type III Analysis of Variance Table with Kenward-Roger's method
##                   Sum Sq Mean Sq NumDF   DenDF F value Pr(>F)
## Period           0.10071 0.10071     1  3.0855  0.1924 0.6898
## SiteClass        0.90997 0.90997     1  4.0254  1.7386 0.2573
## Period:SiteClass 0.00662 0.00662     1 16.1019  0.0126 0.9119

##***part300anovae;


summary(result.lmer)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: logfry ~ Period + SiteClass + Period:SiteClass + (1 | YearF) +  
##     (1 | Site) + (1 | YearF:Site)
##    Data: fry
## 
## REML criterion at convergence: 126.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.86112 -0.53960  0.06963  0.55065  1.80479 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  YearF:Site (Intercept) 0.00000  0.0000  
##  Site       (Intercept) 0.58824  0.7670  
##  YearF      (Intercept) 0.04586  0.2142  
##  Residual               0.52340  0.7235  
## Number of obs: 52, groups:  YearF:Site, 28; Site, 6; YearF, 5
## 
## Fixed effects:
##                                Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                     5.47529    0.53445  6.52426  10.245 2.93e-05
## PeriodBefore                    0.15025    0.36306  7.78865   0.414    0.690
## SiteClassUpstream              -0.85822    0.71615  5.67438  -1.198    0.278
## PeriodBefore:SiteClassUpstream -0.04761    0.41842 41.41635  -0.114    0.910
##                                   
## (Intercept)                    ***
## PeriodBefore                      
## SiteClassUpstream                 
## PeriodBefore:SiteClassUpstream    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) PrdBfr StClsU
## PeriodBefor -0.423              
## StClssUpstr -0.686  0.228       
## PrdBfr:StCU  0.265 -0.616 -0.373
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

# sink('baci-fry-R-300-vc.txt', split=TRUE)
##***part300vcb;
# Extract the variance components
vc <- VarCorr(result.lmer)
vc

##  Groups     Name        Std.Dev.
##  YearF:Site (Intercept) 0.00000 
##  Site       (Intercept) 0.76697 
##  YearF      (Intercept) 0.21415 
##  Residual               0.72347

##***part300vce;
# sink()


# emmeans after a lm() fit
# sink('baci-fry-R-300LSM-SiteClass.txt', split=TRUE)
##***parts300LSM-SiteClassb;
result.lmer.emmo.S <- emmeans::emmeans(result.lmer, ~SiteClass)

## NOTE: Results may be misleading due to involvement in interactions

cat("\nEstimated marginal means for SiteClass \n\n")

## 
## Estimated marginal means for SiteClass

summary(result.lmer.emmo.S)

##  SiteClass  emmean    SE   df lower.CL upper.CL
##  Downstream   5.55 0.489 4.47     4.25     6.85
##  Upstream     4.67 0.477 4.19     3.37     5.97
## 
## Results are averaged over the levels of: Period 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***parts300LSM-SiteClasse;
# sink()

# sink('baci-fry-R-300LSM-Period.txt', split=TRUE)
##***part300LSM-Periodb;
result.lmer.emmo.P <- emmeans::emmeans(result.lmer, ~Period)

## NOTE: Results may be misleading due to involvement in interactions

cat("\nEstimated marginal means for Period \n\n")

## 
## Estimated marginal means for Period

summary(result.lmer.emmo.P)

##  Period emmean    SE   df lower.CL upper.CL
##  After    5.05 0.390 5.91     4.09     6.00
##  Before   5.17 0.364 5.04     4.24     6.11
## 
## Results are averaged over the levels of: SiteClass 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***part300LSM-Periode;
# sink()

# sink('baci-fry-R-300LSM-int.txt', split=TRUE)
##***part300LSM-intb;
result.lmer.emmo.SP <- emmeans::emmeans(result.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means for SiteClass:Period \n\n")

## 
## Estimated marginal means for SiteClass:Period

summary(result.lmer.emmo.SP)

##  SiteClass  Period emmean    SE   df lower.CL upper.CL
##  Downstream After    5.48 0.537 6.19     4.17     6.78
##  Upstream   After    4.62 0.524 5.74     3.32     5.91
##  Downstream Before   5.63 0.506 5.05     4.33     6.92
##  Upstream   Before   4.72 0.493 4.65     3.42     6.02
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***part300LSM-inte;
# sink()

# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]

##   Estimate Std. Error         df    t value   Pr(>|t|) 
## -0.0476087  0.4184179 41.4163500 -0.1137826  0.9099599

# sink("baci-fry-R-300baci.txt", split=TRUE)
##***part300bacib; 
# Estimate the BACI contrast along with a se
summary(contrast(result.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)

##  contrast estimate    SE   df lower.CL upper.CL t.ratio p.value
##  baci      -0.0476 0.423 16.1   -0.945     0.85  -0.112  0.9119
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

##***part300bacie;
# sink()




# Check the residuals etc
##***part300diagnosticb;
diagplot <- sf.autoplot.lmer(result.lmer)

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

plot(diagplot)

##***part300diagnostice;
# ggsave(plot=diagplot, file='baci-fry-R-300-diagnostic.png', h=4, w=6, units="in", dpi=300)


###################################################################################################
###################################################################################################
###################################################################################################

# Power of the BACI-fry example

# This illustrates the power computations for a BACI design with multiple treatment
# and control sites, multiple years measured before and after treatment is applied, and
# sub-sampling at each year-site combination.

# The proposed experimental design is a BACI experiment with 
# the  monitoring design has the river divided into sites of which 
# some are upstream of the diversion and some are downstream of the diversion. 
# In each site, several minnow traps are set for various lengths of time. 
#
# At the end of the soaking period, the traps are removed and the number of 
# minnows are counted and classified by species.
# The counts are standardized to a common period of time to adjust for the 
# different soak-times. 
#
# This is run for several years before the project starts and after the project starts

# The analysis was originally done on the log-scale from which
# the variance components were extracted (see baci-fry.r code)
# 
# We want to do a power analysis looking at various aspects of the design
#
# The information we need is:
#     Alpha level (usually .05 or .10)
#     Variance components (these were obtained from the baci-fry.r analysis of the previous data)
#        std_site  -> site to site STANDARD DEVIATION
#        std_year  -> year to year STANDARD DEVIATION
#        std_site_year -> site-year interaction STANDARD DEVIATION
#        std_subsample -> within site sub-sampling STANDARD DEVIATION
#     Sub-sampling sample sizes, i.e. the number of minnow traps (sub-samples) set at each site
#       We allow for different sample sizes in the treatment-time combinations
#        but all sites in that combination must have the same number of minnow traps.
#        It is possible to generalize this; contact me for details.
#        n_IA   -> number of subsamples in Treatment-After  combination
#        n_IB   -> number of subsamples in Treatment-Before combination
#        n_CA   -> number of subsamples in Control-After    combination
#        n_CB   -> number of subsamples in Control-Before   combination
#     Number of segments(sites)
#        We allow for a different number of sites for Treatment and Control areaa
#        ns_I     -> number of treatment sites (i.e. downstram of the project)
#        ns_C     -> number of control sites (i.e. upstream of the project)
#     Number of years of monitoring before/after impact
#        ny_B     -> number of years monitoring before project starts
#        ny_A     -> number of years monitoring after  project starts
#     Marginal Means
#        These are used to form the BACI contrast of interest
#        mu_IA, mu_IB, mu_CA, mu_CB (i.e mean of Treatment-After,
#        Treatment-Before, Control-After, Control_Before)
#        These are chosen based on the size of impact that may be biologically important

# This code was originally created by 
#    Tony Booth
#    Department of Ichthyology and Fisheries Science, 
#    Rhodes University, 
#    PO Box 94, 
#    Grahamstown 6140 SOUTH AFRICA
#    t.booth@ru.ac.za

# The computations are based on the 
#    Stroup, W. W. (1999)
#    Mixed model procedures to assess power, precision, and sample size in the design of experiments.
# paper where "dummy" data is generated and "analyzed" and the resulting F-statistics etc
# provide information needed to compute the power


#-----------------------------------------------



# An illustration of how to find the power for one scenario
baci.power(n_IA=3, n_IB=3, n_CA=3, n_CB=3,
           ns_I=3, ns_C=3, 
           ny_B=3, ny_A=2, 
           mu_IA=5.0, mu_IB=5.5, mu_CA=4.5, mu_CB=4.5, 
           sdYear=0.25, sdSite=0.75, sdSiteYear=0.1, sdResid=0.75)

##   alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C ny_B
## 1  0.05   0.75   0.25        0.1    0.75    3    3    3    3    3    3    3
##   ny_A mu_IA mu_IB mu_CA mu_CB baci   baci.se dfdenom      ncp   Fcrit
## 1    2     5   5.5   4.5   4.5 -0.5 0.3312434      19 2.278481 4.38075
##       power    Tcrit os.power1    os.power2
## 1 0.2996488 1.729133 0.4249189 0.0009589156

vc <- as.data.frame(VarCorr(result.lmer))
vc

##          grp        var1 var2       vcov     sdcor
## 1 YearF:Site (Intercept) <NA> 0.00000000 0.0000000
## 2       Site (Intercept) <NA> 0.58823981 0.7669679
## 3      YearF (Intercept) <NA> 0.04586215 0.2141545
## 4   Residual        <NA> <NA> 0.52340167 0.7234650

#find the power under various scenarios
##***part500b;

sdSiteYear= vc[ vc$grp=="YearF:Site",    "sdcor"]
sdResid   = vc[ vc$grp=="Residual",      "sdcor"]
sdYear    = vc[ vc$grp=="YearF",         "sdcor"]
sdSite    = vc[ vc$grp=="Site",          "sdcor"]

cat("Estimated variance components are \n",
        "sdSiteYear ", round(sdSiteYear,2), 
    ";\n sdSite ",     round(sdSite,2),
    ";\n sdYear ",     round(sdYear,2),
    "; \nsdResid ",    round(sdResid,2),"\n")

## Estimated variance components are 
##  sdSiteYear  0 ;
##  sdSite  0.77 ;
##  sdYear  0.21 ; 
## sdResid  0.72

cat("\n")

scenarios <- expand.grid(n_quad=seq(3,9,3),
                         ns_I = 3, ns_C=3,
                         ny_B =3,
                         ny_A =c(2,3,4),
                         baci_effect=seq(0,0.8,.1),
                         sdYear = sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear, sdResid=sdResid)
head(scenarios)

##   n_quad ns_I ns_C ny_B ny_A baci_effect    sdYear    sdSite sdSiteYear
## 1      3    3    3    3    2           0 0.2141545 0.7669679          0
## 2      6    3    3    3    2           0 0.2141545 0.7669679          0
## 3      9    3    3    3    2           0 0.2141545 0.7669679          0
## 4      3    3    3    3    3           0 0.2141545 0.7669679          0
## 5      6    3    3    3    3           0 0.2141545 0.7669679          0
## 6      9    3    3    3    3           0 0.2141545 0.7669679          0
##    sdResid
## 1 0.723465
## 2 0.723465
## 3 0.723465
## 4 0.723465
## 5 0.723465
## 6 0.723465

power <- plyr::adply(scenarios,1,function(x){
  #browser()
  power <- baci.power(
    n_IA=x$n_quad, n_IB=x$n_quad, n_CA=x$n_quad, n_CB=x$n_quad, 
    ns_I=x$ns_I, ns_C=x$ns_C, 
    ny_B=x$ny_B, ny_A=x$ny_A, 
    mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0, 
    sdYear=x$sdYear, sdSite=x$sdSite, sdSiteYear=x$sdSiteYear, sdResid=x$sdResid)
  power
})

head(power)

##   n_quad baci_effect alpha    sdSite    sdYear sdSiteYear  sdResid n_IA n_IB
## 1      3           0  0.05 0.7669679 0.2141545          0 0.723465    3    3
## 2      6           0  0.05 0.7669679 0.2141545          0 0.723465    6    6
## 3      9           0  0.05 0.7669679 0.2141545          0 0.723465    9    9
## 4      3           0  0.05 0.7669679 0.2141545          0 0.723465    3    3
## 5      6           0  0.05 0.7669679 0.2141545          0 0.723465    6    6
## 6      9           0  0.05 0.7669679 0.2141545          0 0.723465    9    9
##   n_CA n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci   baci.se dfdenom
## 1    3    3    3    3    3    2     0     0     0     0    0 0.3113298      19
## 2    6    6    3    3    3    2     0     0     0     0    0 0.2201434      19
## 3    9    9    3    3    3    2     0     0     0     0    0 0.1797463      19
## 4    3    3    3    3    3    3     0     0     0     0    0 0.2784618      24
## 5    6    6    3    3    3    3     0     0     0     0    0 0.1969022      24
## 6    9    9    3    3    3    3     0     0     0     0    0 0.1607700      24
##   ncp    Fcrit power    Tcrit os.power1 os.power2
## 1   0 4.380750  0.05 1.729133      0.05      0.05
## 2   0 4.380750  0.05 1.729133      0.05      0.05
## 3   0 4.380750  0.05 1.729133      0.05      0.05
## 4   0 4.259677  0.05 1.710882      0.05      0.05
## 5   0 4.259677  0.05 1.710882      0.05      0.05
## 6   0 4.259677  0.05 1.710882      0.05      0.05

cat("\n")

head(power[,c("alpha","n_IA","n_IB","n_CA","n_CB","ny_B","ny_A","ns_I","ns_C","baci","power")])

##   alpha n_IA n_IB n_CA n_CB ny_B ny_A ns_I ns_C baci power
## 1  0.05    3    3    3    3    3    2    3    3    0  0.05
## 2  0.05    6    6    6    6    3    2    3    3    0  0.05
## 3  0.05    9    9    9    9    3    2    3    3    0  0.05
## 4  0.05    3    3    3    3    3    3    3    3    0  0.05
## 5  0.05    6    6    6    6    3    3    3    3    0  0.05
## 6  0.05    9    9    9    9    3    3    3    3    0  0.05

##***part500e;


# sink("baci-fry-power-R-500.txt", split=TRUE)
power[,c("alpha","n_IA","n_IB","n_CA","n_CB","ny_B","ny_A","ns_I","ns_C","baci","power")]

##    alpha n_IA n_IB n_CA n_CB ny_B ny_A ns_I ns_C baci      power
## 1   0.05    3    3    3    3    3    2    3    3  0.0 0.05000000
## 2   0.05    6    6    6    6    3    2    3    3  0.0 0.05000000
## 3   0.05    9    9    9    9    3    2    3    3  0.0 0.05000000
## 4   0.05    3    3    3    3    3    3    3    3  0.0 0.05000000
## 5   0.05    6    6    6    6    3    3    3    3  0.0 0.05000000
## 6   0.05    9    9    9    9    3    3    3    3  0.0 0.05000000
## 7   0.05    3    3    3    3    3    4    3    3  0.0 0.05000000
## 8   0.05    6    6    6    6    3    4    3    3  0.0 0.05000000
## 9   0.05    9    9    9    9    3    4    3    3  0.0 0.05000000
## 10  0.05    3    3    3    3    3    2    3    3  0.1 0.06074423
## 11  0.05    6    6    6    6    3    2    3    3  0.1 0.07161264
## 12  0.05    9    9    9    9    3    2    3    3  0.1 0.08259260
## 13  0.05    3    3    3    3    3    3    3    3  0.1 0.06374111
## 14  0.05    6    6    6    6    3    3    3    3  0.1 0.07768032
## 15  0.05    9    9    9    9    3    3    3    3  0.1 0.09179169
## 16  0.05    3    3    3    3    3    4    3    3  0.1 0.06594293
## 17  0.05    6    6    6    6    3    4    3    3  0.1 0.08214726
## 18  0.05    9    9    9    9    3    4    3    3  0.1 0.09857302
## 19  0.05    3    3    3    3    3    2    3    3  0.2 0.09367195
## 20  0.05    6    6    6    6    3    2    3    3  0.2 0.13875526
## 21  0.05    9    9    9    9    3    2    3    3  0.2 0.18456714
## 22  0.05    3    3    3    3    3    3    3    3  0.2 0.10605047
## 23  0.05    6    6    6    6    3    3    3    3  0.2 0.16410459
## 24  0.05    9    9    9    9    3    3    3    3  0.2 0.22282587
## 25  0.05    3    3    3    3    3    4    3    3  0.2 0.11518237
## 26  0.05    6    6    6    6    3    4    3    3  0.2 0.18276938
## 27  0.05    9    9    9    9    3    4    3    3  0.2 0.25077554
## 28  0.05    3    3    3    3    3    2    3    3  0.3 0.15016428
## 29  0.05    6    6    6    6    3    2    3    3  0.3 0.25340675
## 30  0.05    9    9    9    9    3    2    3    3  0.3 0.35399473
## 31  0.05    3    3    3    3    3    3    3    3  0.3 0.17876866
## 32  0.05    6    6    6    6    3    3    3    3  0.3 0.30986296
## 33  0.05    9    9    9    9    3    3    3    3  0.3 0.43311300
## 34  0.05    3    3    3    3    3    4    3    3  0.3 0.19979884
## 35  0.05    6    6    6    6    3    4    3    3  0.3 0.35031464
## 36  0.05    9    9    9    9    3    4    3    3  0.3 0.48755704
## 37  0.05    3    3    3    3    3    2    3    3  0.4 0.23052703
## 38  0.05    6    6    6    6    3    2    3    3  0.4 0.40730209
## 39  0.05    9    9    9    9    3    2    3    3  0.4 0.56050779
## 40  0.05    3    3    3    3    3    3    3    3  0.4 0.28113601
## 41  0.05    6    6    6    6    3    3    3    3  0.4 0.49604376
## 42  0.05    9    9    9    9    3    3    3    3  0.4 0.66554057
## 43  0.05    3    3    3    3    3    4    3    3  0.4 0.31766352
## 44  0.05    6    6    6    6    3    4    3    3  0.4 0.55551225
## 45  0.05    9    9    9    9    3    4    3    3  0.4 0.72937801
## 46  0.05    3    3    3    3    3    2    3    3  0.5 0.33208787
## 47  0.05    6    6    6    6    3    2    3    3  0.5 0.57758374
## 48  0.05    9    9    9    9    3    2    3    3  0.5 0.75142330
## 49  0.05    3    3    3    3    3    3    3    3  0.5 0.40674095
## 50  0.05    6    6    6    6    3    3    3    3  0.5 0.68324578
## 51  0.05    9    9    9    9    3    3    3    3  0.5 0.84696468
## 52  0.05    3    3    3    3    3    4    3    3  0.5 0.45862275
## 53  0.05    6    6    6    6    3    4    3    3  0.5 0.74662003
## 54  0.05    9    9    9    9    3    4    3    3  0.5 0.89487013
## 55  0.05    3    3    3    3    3    2    3    3  0.6 0.44823669
## 56  0.05    6    6    6    6    3    2    3    3  0.6 0.73427217
## 57  0.05    9    9    9    9    3    2    3    3  0.6 0.88594369
## 58  0.05    3    3    3    3    3    3    3    3  0.6 0.54307490
## 59  0.05    6    6    6    6    3    3    3    3  0.6 0.83230745
## 60  0.05    9    9    9    9    3    3    3    3  0.6 0.94727488
## 61  0.05    3    3    3    3    3    4    3    3  0.6 0.60519703
## 62  0.05    6    6    6    6    3    4    3    3  0.6 0.88258691
## 63  0.05    9    9    9    9    3    4    3    3  0.6 0.97091809
## 64  0.05    3    3    3    3    3    2    3    3  0.7 0.56910556
## 65  0.05    6    6    6    6    3    2    3    3  0.7 0.85441346
## 66  0.05    9    9    9    9    3    2    3    3  0.7 0.95827396
## 67  0.05    3    3    3    3    3    3    3    3  0.7 0.67448764
## 68  0.05    6    6    6    6    3    3    3    3  0.7 0.92641453
## 69  0.05    9    9    9    9    3    3    3    3  0.7 0.98658814
## 70  0.05    3    3    3    3    3    4    3    3  0.7 0.73811583
## 71  0.05    6    6    6    6    3    4    3    3  0.7 0.95654613
## 72  0.05    9    9    9    9    3    4    3    3  0.7 0.99438976
## 73  0.05    3    3    3    3    3    2    3    3  0.8 0.68376032
## 74  0.05    6    6    6    6    3    2    3    3  0.8 0.93122658
## 75  0.05    9    9    9    9    3    2    3    3  0.8 0.98797298
## 76  0.05    3    3    3    3    3    3    3    3  0.8 0.78710050
## 77  0.05    6    6    6    6    3    3    3    3  0.8 0.97354200
## 78  0.05    9    9    9    9    3    3    3    3  0.8 0.99751340
## 79  0.05    3    3    3    3    3    4    3    3  0.8 0.84331812
## 80  0.05    6    6    6    6    3    4    3    3  0.8 0.98731270
## 81  0.05    9    9    9    9    3    4    3    3  0.8 0.99925529

# sink()         



##***part510b;
power.plot <- ggplot(data=power, aes(x=baci_effect, y=power, color=as.factor(n_quad)))+
  ggtitle("Estimated power",
          subtitle=paste("alpha: ", power$alpha[1]))+
  geom_point()+
  geom_line()+
  ylim(0,1)+
  geom_hline(yintercept=0.8, color="blue")+
  xlab("BACI effect size (log scale)")+
  scale_color_discrete(name="# quadrats")+
  facet_wrap(~ny_A, ncol=2, labeller=label_both)
power.plot

##***part510e;
##*


###########################################################################
###########################################################################
###########################################################################

# What to do when number of sub-samples is very large and the design matrix is large
# which makes the power program is very small


sdSiteYear= vc[ vc$grp=="YearF:Site",    "sdcor"]
sdResid   = vc[ vc$grp=="Residual",      "sdcor"]
sdYear    = vc[ vc$grp=="YearF",         "sdcor"]
sdSite    = vc[ vc$grp=="Site",          "sdcor"]

##***part900b;
cat("Estimated variance components are \n",
    "sdSiteYear ", round(sdSiteYear,2), 
    ";\n sdSite ",     round(sdSite,2),
    ";\n sdYear ",     round(sdYear,2),
    "; \nsdResid ",    round(sdResid,2),"\n")

## Estimated variance components are 
##  sdSiteYear  0 ;
##  sdSite  0.77 ;
##  sdYear  0.21 ; 
## sdResid  0.72

cat("\n")

# We can make an equivalent baci design with 1 observations/site-year (the average)
# and revised sdSiteYear and sdResid

# for example consider a baci design with n_quad=10 in all site.years

cat("\nPower computed using individual observations\n")

## 
## Power computed using individual observations

power.indiv <- baci.power(n_IA=10, n_IB=10, n_CA=10, n_CB=10,
           ns_I=3, ns_C=3, 
           ny_B=3, ny_A=2, 
           mu_IA=.4, mu_IB=0, mu_CA=0, mu_CB=0, 
           sdYear=sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear, sdResid=sdResid)
power.indiv

##   alpha    sdSite    sdYear sdSiteYear  sdResid n_IA n_IB n_CA n_CB ns_I ns_C
## 1  0.05 0.7669679 0.2141545          0 0.723465   10   10   10   10    3    3
##   ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci   baci.se dfdenom      ncp   Fcrit
## 1    3    2   0.4     0     0     0  0.4 0.1705223      19 5.502466 4.38075
##       power    Tcrit os.power1    os.power2
## 1 0.6049787 1.729133 0.7310784 4.589899e-05

# this has equivalent power when you analyze the "averages" and create revised sdSiteYear and sdResid
sdSiteYear.avg <- sqrt(sdSiteYear^2 + sdResid^2/10)
sdResid.avg    <- 0

cat("\nPower computed using averages\n")

## 
## Power computed using averages

power.avg <- baci.power(n_IA=1, n_IB=1, n_CA=1, n_CB=1,
                          ns_I=3, ns_C=3, 
                          ny_B=3, ny_A=2, 
                          mu_IA=.4, mu_IB=0, mu_CA=0, mu_CB=0, 
                          sdYear=sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear.avg, sdResid=sdResid.avg)
power.avg

##   alpha    sdSite    sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C
## 1  0.05 0.7669679 0.2141545  0.2287797       0    1    1    1    1    3    3
##   ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci   baci.se dfdenom      ncp   Fcrit
## 1    3    2   0.4     0     0     0  0.4 0.1705223      19 5.502466 4.38075
##       power    Tcrit os.power1    os.power2
## 1 0.6049787 1.729133 0.7310784 4.589899e-05

##***part900e;


##***part901b;
scenarios <- expand.grid(n_quad=seq(3,9,3),
                         ns_I = 3, ns_C=3,
                         ny_B =3,
                         ny_A =c(2,3,4),
                         baci_effect=seq(0,0.8,.1),
                         sdYear = sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear, sdResid=sdResid)
cat("Part of the scenarios dataset\n")

## Part of the scenarios dataset

head(scenarios)

##   n_quad ns_I ns_C ny_B ny_A baci_effect    sdYear    sdSite sdSiteYear
## 1      3    3    3    3    2           0 0.2141545 0.7669679          0
## 2      6    3    3    3    2           0 0.2141545 0.7669679          0
## 3      9    3    3    3    2           0 0.2141545 0.7669679          0
## 4      3    3    3    3    3           0 0.2141545 0.7669679          0
## 5      6    3    3    3    3           0 0.2141545 0.7669679          0
## 6      9    3    3    3    3           0 0.2141545 0.7669679          0
##    sdResid
## 1 0.723465
## 2 0.723465
## 3 0.723465
## 4 0.723465
## 5 0.723465
## 6 0.723465

power.avg <- plyr::adply(scenarios,1,function(x){
  #browser()
  # do the short cut
  x$sdSiteYear.avg <- sqrt(x$sdSiteYear^2 + x$sdResid^2/x$n_quad) # Note change here
  x$sdResid.avg    <- 0
  
  power <- baci.power(
    n_IA=1, n_IB=1, n_CA=1, n_CB=1, # notice change here
    ns_I=x$ns_I, ns_C=x$ns_C, 
    ny_B=x$ny_B, ny_A=x$ny_A, 
    mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0, 
    sdYear=x$sdYear, sdSite=x$sdSite, 
    sdSiteYear=x$sdSiteYear.avg, sdResid=x$sdResid.avg) # note change here
  power
})

cat("\nPart of the power dataset computed on averages\n")

## 
## Part of the power dataset computed on averages

head(power.avg)

##   n_quad baci_effect alpha    sdSite    sdYear sdSiteYear sdResid n_IA n_IB
## 1      3           0  0.05 0.7669679 0.2141545  0.4176927       0    1    1
## 2      6           0  0.05 0.7669679 0.2141545  0.2953534       0    1    1
## 3      9           0  0.05 0.7669679 0.2141545  0.2411550       0    1    1
## 4      3           0  0.05 0.7669679 0.2141545  0.4176927       0    1    1
## 5      6           0  0.05 0.7669679 0.2141545  0.2953534       0    1    1
## 6      9           0  0.05 0.7669679 0.2141545  0.2411550       0    1    1
##   n_CA n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci   baci.se dfdenom
## 1    1    1    3    3    3    2     0     0     0     0    0 0.3113298      19
## 2    1    1    3    3    3    2     0     0     0     0    0 0.2201434      19
## 3    1    1    3    3    3    2     0     0     0     0    0 0.1797463      19
## 4    1    1    3    3    3    3     0     0     0     0    0 0.2784618      24
## 5    1    1    3    3    3    3     0     0     0     0    0 0.1969022      24
## 6    1    1    3    3    3    3     0     0     0     0    0 0.1607700      24
##   ncp    Fcrit power    Tcrit os.power1 os.power2
## 1   0 4.380750  0.05 1.729133      0.05      0.05
## 2   0 4.380750  0.05 1.729133      0.05      0.05
## 3   0 4.380750  0.05 1.729133      0.05      0.05
## 4   0 4.259677  0.05 1.710882      0.05      0.05
## 5   0 4.259677  0.05 1.710882      0.05      0.05
## 6   0 4.259677  0.05 1.710882      0.05      0.05

##***part901e;

# the power plot on the "averages" is the same as before

##***part910b;
power.plot <- ggplot(data=power.avg, aes(x=baci_effect, y=power, color=as.factor(n_quad)))+
  ggtitle("Estimated power computed using short cut",
          subtitle=paste("alpha: ", power$alpha[1]))+
  geom_point()+
  geom_line()+
  ylim(0,1)+
  geom_hline(yintercept=0.8, color="blue")+
  xlab("BACI effect size (log scale)")+
  scale_color_discrete(name="# quadrats")+
  facet_wrap(~ny_A, ncol=2, labeller=label_both)
power.plot

##***part910e;




##*###################################################################################################
##*###################################################################################################
##*###################################################################################################
##*###################################################################################################

### Bayesian analysis

baci.bayesian <- brms::brm(logfry ~ Period + SiteClass + Period:SiteClass +
                             (1|YearF)+ (1|Site)+(1|YearF:Site), 
                           data=fry
                           ,sample_prior=TRUE
                           ,seed=23432432)

## Compiling Stan program...
## Trying to compile a simple C file

## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.6.3.2)’
## using SDK: ‘NA’
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Users/cschwarz/Rlibs/Rcpp/include/"  -I"/Users/cschwarz/Rlibs/RcppEigen/include/"  -I"/Users/cschwarz/Rlibs/RcppEigen/include/unsupported"  -I"/Users/cschwarz/Rlibs/BH/include" -I"/Users/cschwarz/Rlibs/StanHeaders/include/src/"  -I"/Users/cschwarz/Rlibs/StanHeaders/include/"  -I"/Users/cschwarz/Rlibs/RcppParallel/include/"  -I"/Users/cschwarz/Rlibs/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Users/cschwarz/Rlibs/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/x86_64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Users/cschwarz/Rlibs/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Users/cschwarz/Rlibs/RcppEigen/include/Eigen/Dense:1:
## In file included from /Users/cschwarz/Rlibs/RcppEigen/include/Eigen/Core:19:
## /Users/cschwarz/Rlibs/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
##   679 | #include <cmath>
##       |          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Warning message:
## In system2("xcrun", "--show-sdk-version", TRUE, TRUE) :
##   running command ''xcrun' --show-sdk-version 2>&1' had status 1

## Start sampling

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 4.4e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.44 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.458 seconds (Warm-up)
## Chain 1:                0.327 seconds (Sampling)
## Chain 1:                0.785 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 3.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.31 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.476 seconds (Warm-up)
## Chain 2:                0.326 seconds (Sampling)
## Chain 2:                0.802 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.5e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.456 seconds (Warm-up)
## Chain 3:                0.441 seconds (Sampling)
## Chain 3:                0.897 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.402 seconds (Warm-up)
## Chain 4:                0.331 seconds (Sampling)
## Chain 4:                0.733 seconds (Total)
## Chain 4:

## Warning: There were 4 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.

## Warning: Examine the pairs() plot to diagnose sampling problems

vc.priorsamps.wide<-brms::prior_samples(baci.bayesian)

## Warning: 'prior_samples' is deprecated. Please use 'prior_draws' instead.

vc.priorsamps.long <- tidyr::pivot_longer(vc.priorsamps.wide,
                                          cols=names(vc.priorsamps.wide)[-1],
                                          names_to="vc",
                                          values_to="prior")

ggplot(data=vc.priorsamps.long, aes(x=prior, y=after_stat(density)))+
  ggtitle("Prior distribution for the SD in the BACI mode")+
  geom_histogram(alpha=0.5)+
  geom_density()+
  facet_wrap(~vc, ncol=2,scales="free")+
  xlim(0,20)

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

## Warning: Removed 57 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 57 rows containing non-finite outside the scale range
## (`stat_density()`).

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_bar()`).

# We will generate a flat prior with a mean of 0 and an sd of 10 for each fixed parameter
vc.priorsamp.fixed <- plyr::ldply(c("Intercept","SiteClass","Period","SiteClass:Period"), function(x){
  data.frame(fixed=x, prior=rnorm(50000,mean=0, sd=50))
})


ggplot(data=vc.priorsamp.fixed, aes(x=prior, y=after_stat(density)))+
  ggtitle(" Prior distribution for the fixed effects in the BACI model")+
  geom_histogram(alpha=0.5)+
  geom_density()+
  facet_wrap(~fixed, ncol=2,scales="free")

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

# Get the posterior for the BACI contrast
  
post.baci <- brms::as_draws_df(baci.bayesian, variable="b_PeriodBefore:SiteClassUpstream")
post.baci <- plyr::rename(post.baci, c("b_PeriodBefore:SiteClassUpstream"="BACI"))  
head(post.baci)

## # A draws_df: 6 iterations, 1 chains, and 1 variables
##     BACI
## 1 -0.358
## 2 -0.044
## 3  0.235
## 4 -0.136
## 5  0.018
## 6 -0.089
## # ... hidden reserved variables {'.chain', '.iteration', '.draw'}

#post.baci <- data.frame(post.baci=unlist(brms::posterior_samples(baci.bayesian, pars="b_PeriodBefore:SiteClassUpstream")))

ggplot(data=post.baci, aes(x=BACI, y=after_stat(density)))+
  ggtitle(#"Posterior distribution of BACI contrast"
    " ", subtitle="Red lines indicate 95% credible interval")+
  geom_histogram(alpha=0.5)+
  geom_density()+
  geom_vline(xintercept=0)+
  geom_vline(xintercept=quantile(post.baci$BACI, prob=c(0.025,0.975)), color="red")

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

# The posterior-belief distribution appears to be slightly peaked just below 0 (the value of 0 indicates no project effect). 
# The red lines indicate the 95% credible interval for the BACI effect, i.e. the bracket a region that contains
# 95% of our belief.
# 
# The wide distribution indicates that the posterior belief is quite uncertain.
# We can summarize the posterior belief distribution using summary statistics (mean, sd, and 95% credible interval) are:
  
c(mean=mean(post.baci$BACI),
  sd  =sd  (post.baci$BACI),
  lci =quantile(post.baci$BACI, prob=0.025),
  uci =quantile(post.baci$BACI, prob=0.975))

##        mean          sd    lci.2.5%   uci.97.5% 
## -0.04670305  0.47413064 -0.96217880  0.89319357

# We see that the 
# 
# - mean of the posterior belief distribution is close to the estimate of the BACI contrast from
# the standard linear model fit, 
# - the SD is close to the baci fit's SE, and 
# - the upper and lower credible intervals are comparable to the 95% confidence intervals.
# 
# This is no accident and is an artefact of the use of noninformative uniform priors and the fairly large dataset
# where the data "overwhelms" the prior.
# 
# So the Bayesian analysis, so far, has not presented anything new. However, we can now made statements about
# our prior belief of an effect by looking at the previous histogram and computing the proportion of posterior samples in various ways.
# 
# For example, we can now say that there is `r round(mean(abs(post.baci$BACI) > .5),2)` posterior belief that the absolute(BACI effect) is
# 0.5 (a 50% differential in the means) or greater (@fig-post-belief1).


post.baci$BACI.g5 <- abs(post.baci$BACI) >= .5
belief <- mean(abs(post.baci$BACI) > .5)

ggplot(data=post.baci, aes(x=BACI))+
  ggtitle("Posterior belief that abs(baci contrast)> .5 (corresponding to large effect)",
       subtitle="Blue are is posterior belief that abs(baci contrast) > .5")+
  geom_histogram(alpha=0.5, aes(fill=as.factor(BACI.g5)), breaks=seq(-2,2,.1))+
  #geom_density()+
  geom_vline(xintercept=0)+
  geom_vline(xintercept=c(-.5,.5), color="red")+
  scale_fill_discrete(name="abs(baci) > .5")+
  annotate("text", label=paste0("Posterior belief abs(baci)>.5: ", round(belief,2)),
             x=-Inf, y=Inf, hjust=0, vjust=1.5)

# Similarly, there is a `r round(mean(abs(post.baci$BACI) <= .1),2)` posterior belief that the absolute(BACI effect) is 0.1 or less
# (@fig-post-belief2).


post.baci$BACI.l1 <- abs(post.baci$BACI) <= .1
belief <- mean(abs(post.baci$BACI) <= .1)

ggplot(data=post.baci, aes(x=BACI))+
  ggtitle("Posterior belief that abs(baci contrast) < .1 (corresponding to small effect)",
       subtitle="Blue are is posterior belief that abs(baci contrast) < .1")+
  geom_histogram(alpha=0.5, aes(fill=as.factor(BACI.l1)), breaks=seq(-2,2,.1))+
  #geom_density()+
  geom_vline(xintercept=0)+
  geom_vline(xintercept=c(-.1,.1), color="red")+
  scale_fill_discrete(name="abs(baci) < .1")+
  annotate("text", label=paste0("Posterior belief abs(baci)<.1: ", round(belief,2)),
            x=-Inf, y=Inf, hjust=0, vjust=1.5)

# **So unlike the classical BACI analysis, you can quantify the belief of a large impact or small impact in a natural way.**
# This is the prime advantage of a Bayesian analysis. 
# 
# Confidence intervals cannot be used in this way. As indicated by Kruschke and Liddell (2018):
# 
# > Highest density interval vs. confidence interval Here we
# > reiterate some essential differences between a Bayesian
# > credible interval (HDI) and a frequentist confidence interval (CI).
# > An important quality of the posterior 95% HDI is that it really does indicate the 95%
# > most probable values of the parameter, given the data. The
# > posterior distribution depends only on the actually observed
# > data (and the prior), and does not depend on the stopping
# > or testing intentions of the analyst. The frequentist CI is
# > often misinterpreted as if it were a posterior distribution,
# > because what analysts intuitively want from their analysis is
# > the Bayesian posterior distribution, as we discuss more later.
# 
# The Bayesian credible interval does not depend on how many hypothesis tests are being done
# (i.e., there are no corrections for multiple testing such as Tukey's method).
# 
# However, it makes no sense to ask "What is posterior belief of no-effect?" because this would ask for the probability that the
# baci effect = 0 exactly which is 0. Similarly, it makes no sense to ask "What is the posterior belief of an effect?"
# because this would ask for the probability that the baci effect $\ne$ 0 which is 1. Only posterior beliefs of intervals
# are sensible.
# 
# We can get a complete summary of a Bayesian fit for all parameters of the model (not shown) in similar ways.


# summarize the bayesian fit
temp <- summary(baci.bayesian)

## Warning: There were 4 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

temp

##  Family: gaussian 
##   Links: mu = identity 
## Formula: logfry ~ Period + SiteClass + Period:SiteClass + (1 | YearF) + (1 | Site) + (1 | YearF:Site) 
##    Data: fry (Number of observations: 52) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Multilevel Hyperparameters:
## ~Site (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.07      0.52     0.45     2.42 1.00     1598     1774
## 
## ~YearF (Number of levels: 5) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.41      0.38     0.02     1.45 1.00     1200     1682
## 
## ~YearF:Site (Number of levels: 28) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.15      0.12     0.01     0.43 1.00     2003     1554
## 
## Regression Coefficients:
##                                Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                          5.44      0.80     3.76     7.01 1.00
## PeriodBefore                       0.17      0.58    -0.93     1.41 1.00
## SiteClassUpstream                 -0.85      1.03    -2.82     1.39 1.00
## PeriodBefore:SiteClassUpstream    -0.05      0.47    -0.96     0.89 1.00
##                                Bulk_ESS Tail_ESS
## Intercept                          1497     1395
## PeriodBefore                       2094     1136
## SiteClassUpstream                  1506     1736
## PeriodBefore:SiteClassUpstream     3380     2239
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.75      0.09     0.60     0.94 1.00     3617     2980
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

# #### Variance components
# 
# We can also get the posterior belief distributions of the variance effects and summarize
# the posterior-beliefs in @tbl-vc-bayesian.


vc.bayesian.1 <- plyr::ldply(temp$random, function(x){
  x
})
vc.bayesian.2 <- temp$spec_pars
vc.bayesian.2$.id <- "Residual"

vc.bayesian <- plyr::rbind.fill(vc.bayesian.1, vc.bayesian.2)
#vc.bayesian

vc.bayesian

##          .id  Estimate  Est.Error    l-95% CI  u-95% CI     Rhat Bulk_ESS
## 1       Site 1.0693147 0.51880207 0.445696895 2.4166136 1.002305 1598.029
## 2      YearF 0.4132569 0.38441194 0.016838186 1.4495008 1.001389 1200.395
## 3 YearF:Site 0.1473127 0.11667138 0.005708658 0.4259360 1.000029 2002.670
## 4   Residual 0.7499756 0.08605112 0.602435688 0.9368301 1.001326 3617.338
##   Tail_ESS
## 1 1774.168
## 2 1682.208
## 3 1553.777
## 4 2979.773

# The mean of the posteriors fairly different from the classical baci fit. This is due, in part, to the influence
# of the prior distribution for the standard deviations seen earlier. In particular, notice how the
# estimate variance component for the year.site interaction has been pulled away from 0.
# 
# Fortunately, the Site and Year variance components "cancel" when estimating the BACI contrast and so have no impact
# on the result of interest.
# 
# A fuller Bayesian analysis would investigate the sensitivity of the results to different choices of prior distributions
# 
# The uncertainty of the variance components (the Est.Error in the above table) is NOT available from a classical baci fit (but 
# could be obtained with some work).

baci-fry.r

cschwarz

2025-12-14