# BACI-fry
# This example is based (loosely) on a consulting project from an
# Independent Power Producer who was interested in monitoring the
# effects of an in-stream hydroelectric project.
# The response variable for the project was the minnow density at
# different locations in the stream.
# The monitoring design has the river divided into six segments of
# which three are upstream of the diversion
# and three are downstream of the diversion. In each segment, several sites
# have been located where minnow fry
# congregate. In each of the sites, minnow traps are set for various lengths
# of time.
# At the end of the soaking period, the traps are removed and the number of
# minnows are counted and classified by species.
# The counts are standardized to a common soaking time to adjust for the
# different soak-time.
# [This could be done directly in the analysis by using the soak-time as a
# covariate, but the soak-time data was not available.]
# An initial pre-project monitoring study was run in 2000 to 2002. The
# project became operational in late 2002, and
# post-project monitoring continued in 2003 and 2004.
# A nice slide presentation on lmer() is found at
# http://www.stat.wisc.edu/~bates/PotsdamGLMM/LMMD.pdf
library(brms) # for the bayesian fit
## Loading required package: Rcpp
## Loading 'brms' package (version 2.23.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
##
## ar
library(ggplot2)
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:brms':
##
## ngrps
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(plyr)
source("../../schwarz.functions.r")
#source('http://www.stat.sfu.ca/~cschwarz/Stat-Ecology-Datasets/schwarz.functions.r')
# Get the BACI power program
source("../baci-power.r")
# Read in the data
##***part001b;
# sink('baci-fry-R-001.txt', split=TRUE)
fry <- read.csv('baci-fry.csv', header=TRUE, as.is=TRUE, strip.white=TRUE)
fry$logfry <- log(fry$Fry)
fry$Period <- factor(fry$Period)
fry$Site <- factor(fry$Site)
fry$SiteClass <- factor(fry$SiteClass)
fry$Period <- factor(fry$Period)
fry$YearF <- factor(fry$Year)
fry[1:12,]
## SiteClass Site Sample Year Period Fry logfry YearF
## 1 Downstream D 1 2000 Before 185 5.220356 2000
## 2 Downstream E 1 2000 Before 258 5.552960 2000
## 3 Downstream E 2 2000 Before 630 6.445720 2000
## 4 Downstream E 3 2000 Before 116 4.753590 2000
## 5 Downstream F 1 2000 Before 208 5.337538 2000
## 6 Downstream F 2 2000 Before 261 5.564520 2000
## 7 Downstream D 1 2001 Before 321 5.771441 2001
## 8 Downstream E 1 2001 Before 65 4.174387 2001
## 9 Downstream E 2 2001 Before 755 6.626718 2001
## 10 Downstream F 1 2001 Before 476 6.165418 2001
## 11 Downstream F 2 2001 Before 521 6.255750 2001
## 12 Downstream E 1 2002 Before 198 5.288267 2002
##***part001e;
# sink()
str(fry)
## 'data.frame': 52 obs. of 8 variables:
## $ SiteClass: Factor w/ 2 levels "Downstream","Upstream": 1 1 1 1 1 1 1 1 1 1 ...
## $ Site : Factor w/ 6 levels "A","B","C","D",..: 4 5 5 5 6 6 4 5 5 6 ...
## $ Sample : int 1 1 2 3 1 2 1 1 2 1 ...
## $ Year : int 2000 2000 2000 2000 2000 2000 2001 2001 2001 2001 ...
## $ Period : Factor w/ 2 levels "After","Before": 2 2 2 2 2 2 2 2 2 2 ...
## $ Fry : int 185 258 630 116 208 261 321 65 755 476 ...
## $ logfry : num 5.22 5.55 6.45 4.75 5.34 ...
## $ YearF : Factor w/ 5 levels "2000","2001",..: 1 1 1 1 1 1 2 2 2 2 ...
# look at the std dev to mean plot for the original and log() data
# Compute the averages etc
# sink('baci-fry-R-020.txt', split=TRUE)
##***part020b;
mean.fry <- plyr::ddply(fry, c("Year","YearF","Site","SiteClass","Period"), function(x){
n <- nrow(x)
nmiss <- sum(is.na(x$Fry))
mean.fry <- mean(x$Fry, na.rm=TRUE)
sd.fry <- sd( x$Fry, na.rm=TRUE)
mean.logfry <- mean(x$logfry, na.rm=TRUE)
sd.logfry <- sd (x$logfry, na.rm=TRUE)
res<- data.frame(n, nmiss, mean.fry, sd.fry, mean.logfry, sd.logfry)
})
mean.fry[1:5,]
## Year YearF Site SiteClass Period n nmiss mean.fry sd.fry mean.logfry
## 1 2000 2000 A Upstream Before 2 0 25.5000 30.40559 2.618221
## 2 2000 2000 B Upstream Before 2 0 306.5000 167.58431 5.644266
## 3 2000 2000 C Upstream Before 2 0 124.5000 105.35891 4.602664
## 4 2000 2000 D Downstream Before 1 0 185.0000 NA 5.220356
## 5 2000 2000 E Downstream Before 3 0 334.6667 265.43800 5.584090
## sd.logfry
## 1 1.7422073
## 2 0.5767497
## 3 0.9767137
## 4 NA
## 5 0.8464942
##***part020e;
# sink()
# Plot the std to mean ratios
##***part021b;
sdmeanplot <- ggplot(data=mean.fry, aes(x=mean.fry, y=sd.fry))+
ggtitle("SD vs Mean - original data")+
geom_point()
sdmeanplot
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

##***part021e;
# ggsave(plot=sdmeanplot, file='baci-fry-R-300-diagnostic.png', h=4, w=6, units="in", dpi=300)
# Plot the std to mean ratios
##***part021logb;
sdmeanplotlog <- ggplot(data=mean.fry, aes(x=mean.logfry, y=sd.logfry))+
ggtitle("SD vs Mean - logged data")+
geom_point()
sdmeanplotlog
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).

##***part021loge;
# ggsave(plot=sdmeanplot, file='baci-fry-R-021b.png', h=4, w=6, units="in", dpi=300)
# Plot the mean log-fry over time by site
##***part025b;
meanplot <- ggplot(data=mean.fry, aes(x=Year, y=mean.logfry,
group=Site, color=SiteClass, shape=Site))+
ggtitle("Changes in mean over time")+
geom_point(position=position_dodge(w=0.2))+
geom_line(position=position_dodge(w=0.2))+
geom_vline(xintercept=-0.5+min(mean.fry$Year[as.character(mean.fry$Period) == "After"]))
meanplot

##***part025e;
# ggsave(plot=meanplot, file="baci-fry-R-025.png", h=4, w=6, units="in", dpi=300)
#####################################################################################
# The analysis of the mean(log(Fry))
# sink('baci-fry-R-200-type3.txt', split=TRUE)
##***part200b;
# Notice that we use the YearF rather than Year in the random statements
# Because we are analyzing the mean, the Site:Year term is the residual error
result.mean.lmer <- lmerTest::lmer(mean.logfry ~ Period + SiteClass +
Period:SiteClass+
(1|YearF)+(1|Site),
data=mean.fry)
##***part200e;
# sink()
##***part200anovab;
anova(result.mean.lmer, ddf="Kenward-Roger")
## Type III Analysis of Variance Table with Kenward-Roger's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Period 0.043595 0.043595 1 2.9797 0.2987 0.6230
## SiteClass 0.232904 0.232904 1 4.0194 1.5957 0.2748
## Period:SiteClass 0.019698 0.019698 1 17.1098 0.1350 0.7179
##***part200anovae;
summary(result.mean.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: mean.logfry ~ Period + SiteClass + Period:SiteClass + (1 | YearF) +
## (1 | Site)
## Data: mean.fry
##
## REML criterion at convergence: 45.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.80986 -0.68657 0.04109 0.61043 1.58328
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 0.6239 0.7899
## YearF (Intercept) 0.0606 0.2462
## Residual 0.1460 0.3820
## Number of obs: 28, groups: Site, 6; YearF, 5
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.4217 0.5188 5.7813 10.451 5.71e-05 ***
## PeriodBefore 0.2023 0.3141 5.5010 0.644 0.545
## SiteClassUpstream -0.7823 0.6864 4.6649 -1.140 0.309
## PeriodBefore:SiteClassUpstream -0.1097 0.2979 17.2160 -0.368 0.717
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PrdBfr StClsU
## PeriodBefor -0.370
## StClssUpstr -0.671 0.139
## PrdBfr:StCU 0.194 -0.515 -0.266
# sink('baci-fry-R-200-vc.txt', split=TRUE)
##***part200vcb;
# Extract the variance components
vc <- VarCorr(result.mean.lmer)
vc
## Groups Name Std.Dev.
## Site (Intercept) 0.78987
## YearF (Intercept) 0.24618
## Residual 0.38205
##***part200vce;
# sink()
# To extract the variance components for use in R, create a data frame
vc.df <- as.data.frame(vc)
YearSigma <- vc.df[grep("YearF" ,vc.df$grp),"sdcor"]
SiteSigma <- vc.df[grep("Site" ,vc.df$grp),"sdcor"]
ResidualSigma<- vc.df[grep("Residual",vc.df$grp),"sdcor"]
# emmeans after a lm() fit
# sink('baci-fry-R-200LSM-SiteClass.txt', split=TRUE)
##***parts200LSM-SiteClassb;
result.mean.lmer.emmo.S <- emmeans::emmeans(result.mean.lmer, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for SiteClass \n\n")
##
## Estimated marginal means for SiteClass
summary(result.mean.lmer.emmo.S)
## SiteClass emmean SE df lower.CL upper.CL
## Downstream 5.52 0.483 4.5 4.24 6.81
## Upstream 4.69 0.480 4.4 3.40 5.97
##
## Results are averaged over the levels of: Period
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***parts200LSM-SiteClasse;
# sink()
# sink('baci-fry-R-200LSM-Period.txt', split=TRUE)
##***part200LSM-Periodb;
result.mean.lmer.emmo.P <- emmeans::emmeans(result.mean.lmer, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for Period \n\n")
##
## Estimated marginal means for Period
summary(result.mean.lmer.emmo.P)
## Period emmean SE df lower.CL upper.CL
## After 5.03 0.385 6.03 4.09 5.97
## Before 5.18 0.365 5.39 4.26 6.10
##
## Results are averaged over the levels of: SiteClass
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part200LSM-Periode;
# sink()
# sink('baci-fry-R-200LSM-int.txt', split=TRUE)
##***part200LSM-intb;
result.mean.lmer.emmo.SP <- emmeans::emmeans(result.mean.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means \n\n")
##
## Estimated marginal means
summary(result.mean.lmer.emmo.SP)
## SiteClass Period emmean SE df lower.CL upper.CL
## Downstream After 5.42 0.519 5.73 4.14 6.71
## Upstream After 4.64 0.512 5.47 3.36 5.92
## Downstream Before 5.62 0.497 4.98 4.34 6.90
## Upstream Before 4.73 0.494 4.87 3.45 6.01
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part200LSM-inte;
# sink()
# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.mean.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
## Estimate Std. Error df t value Pr(>|t|)
## -0.1096870 0.2978614 17.2159813 -0.3682485 0.7171768
# sink("baci-fry-R-200baci.txt", split=TRUE)
##***part200bacib;
# Estimate the BACI contrast along with a se
summary(contrast(result.mean.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
## contrast estimate SE df lower.CL upper.CL t.ratio p.value
## baci -0.11 0.299 17.1 -0.739 0.52 -0.367 0.7179
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part200bacie;
# sink()
# Check the residuals etc
##***part200diagnosticb;
diagplot <- sf.autoplot.lmer(result.mean.lmer)
## Loading required package: broom.mixed
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: lattice
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
plot(diagplot)

##***part200diagnostice;
# ggsave(plot=diagplot, file='baci-fry-R-200-diagnostic.png', h=4, w=6, units="in", dpi=300)
#################################################################################
# The analysis of the individual values
# Results will differ slightly from answers on mean because design is unbalanced
# sink('baci-fry-R-300-type3.txt', split=TRUE)
##***part300b;
# Notice that we use the YearF variable in the random effects
result.lmer <- lmerTest::lmer(logfry ~ Period + SiteClass + Period:SiteClass +
(1|YearF)+ (1|Site)+(1|YearF:Site), data=fry)
## boundary (singular) fit: see help('isSingular')
##***part300e;
# sink()
##***part300anovab;
anova(result.lmer, ddf="Kenward-Roger")
## Type III Analysis of Variance Table with Kenward-Roger's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Period 0.10071 0.10071 1 3.0855 0.1924 0.6898
## SiteClass 0.90997 0.90997 1 4.0254 1.7386 0.2573
## Period:SiteClass 0.00662 0.00662 1 16.1019 0.0126 0.9119
##***part300anovae;
summary(result.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: logfry ~ Period + SiteClass + Period:SiteClass + (1 | YearF) +
## (1 | Site) + (1 | YearF:Site)
## Data: fry
##
## REML criterion at convergence: 126.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.86112 -0.53960 0.06963 0.55065 1.80479
##
## Random effects:
## Groups Name Variance Std.Dev.
## YearF:Site (Intercept) 0.00000 0.0000
## Site (Intercept) 0.58824 0.7670
## YearF (Intercept) 0.04586 0.2142
## Residual 0.52340 0.7235
## Number of obs: 52, groups: YearF:Site, 28; Site, 6; YearF, 5
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.47529 0.53445 6.52426 10.245 2.93e-05
## PeriodBefore 0.15025 0.36306 7.78865 0.414 0.690
## SiteClassUpstream -0.85822 0.71615 5.67438 -1.198 0.278
## PeriodBefore:SiteClassUpstream -0.04761 0.41842 41.41635 -0.114 0.910
##
## (Intercept) ***
## PeriodBefore
## SiteClassUpstream
## PeriodBefore:SiteClassUpstream
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PrdBfr StClsU
## PeriodBefor -0.423
## StClssUpstr -0.686 0.228
## PrdBfr:StCU 0.265 -0.616 -0.373
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# sink('baci-fry-R-300-vc.txt', split=TRUE)
##***part300vcb;
# Extract the variance components
vc <- VarCorr(result.lmer)
vc
## Groups Name Std.Dev.
## YearF:Site (Intercept) 0.00000
## Site (Intercept) 0.76697
## YearF (Intercept) 0.21415
## Residual 0.72347
##***part300vce;
# sink()
# emmeans after a lm() fit
# sink('baci-fry-R-300LSM-SiteClass.txt', split=TRUE)
##***parts300LSM-SiteClassb;
result.lmer.emmo.S <- emmeans::emmeans(result.lmer, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for SiteClass \n\n")
##
## Estimated marginal means for SiteClass
summary(result.lmer.emmo.S)
## SiteClass emmean SE df lower.CL upper.CL
## Downstream 5.55 0.489 4.47 4.25 6.85
## Upstream 4.67 0.477 4.19 3.37 5.97
##
## Results are averaged over the levels of: Period
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***parts300LSM-SiteClasse;
# sink()
# sink('baci-fry-R-300LSM-Period.txt', split=TRUE)
##***part300LSM-Periodb;
result.lmer.emmo.P <- emmeans::emmeans(result.lmer, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for Period \n\n")
##
## Estimated marginal means for Period
summary(result.lmer.emmo.P)
## Period emmean SE df lower.CL upper.CL
## After 5.05 0.390 5.91 4.09 6.00
## Before 5.17 0.364 5.04 4.24 6.11
##
## Results are averaged over the levels of: SiteClass
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part300LSM-Periode;
# sink()
# sink('baci-fry-R-300LSM-int.txt', split=TRUE)
##***part300LSM-intb;
result.lmer.emmo.SP <- emmeans::emmeans(result.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means for SiteClass:Period \n\n")
##
## Estimated marginal means for SiteClass:Period
summary(result.lmer.emmo.SP)
## SiteClass Period emmean SE df lower.CL upper.CL
## Downstream After 5.48 0.537 6.19 4.17 6.78
## Upstream After 4.62 0.524 5.74 3.32 5.91
## Downstream Before 5.63 0.506 5.05 4.33 6.92
## Upstream Before 4.72 0.493 4.65 3.42 6.02
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part300LSM-inte;
# sink()
# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
## Estimate Std. Error df t value Pr(>|t|)
## -0.0476087 0.4184179 41.4163500 -0.1137826 0.9099599
# sink("baci-fry-R-300baci.txt", split=TRUE)
##***part300bacib;
# Estimate the BACI contrast along with a se
summary(contrast(result.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
## contrast estimate SE df lower.CL upper.CL t.ratio p.value
## baci -0.0476 0.423 16.1 -0.945 0.85 -0.112 0.9119
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part300bacie;
# sink()
# Check the residuals etc
##***part300diagnosticb;
diagplot <- sf.autoplot.lmer(result.lmer)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
plot(diagplot)

##***part300diagnostice;
# ggsave(plot=diagplot, file='baci-fry-R-300-diagnostic.png', h=4, w=6, units="in", dpi=300)
###################################################################################################
###################################################################################################
###################################################################################################
# Power of the BACI-fry example
# This illustrates the power computations for a BACI design with multiple treatment
# and control sites, multiple years measured before and after treatment is applied, and
# sub-sampling at each year-site combination.
# The proposed experimental design is a BACI experiment with
# the monitoring design has the river divided into sites of which
# some are upstream of the diversion and some are downstream of the diversion.
# In each site, several minnow traps are set for various lengths of time.
#
# At the end of the soaking period, the traps are removed and the number of
# minnows are counted and classified by species.
# The counts are standardized to a common period of time to adjust for the
# different soak-times.
#
# This is run for several years before the project starts and after the project starts
# The analysis was originally done on the log-scale from which
# the variance components were extracted (see baci-fry.r code)
#
# We want to do a power analysis looking at various aspects of the design
#
# The information we need is:
# Alpha level (usually .05 or .10)
# Variance components (these were obtained from the baci-fry.r analysis of the previous data)
# std_site -> site to site STANDARD DEVIATION
# std_year -> year to year STANDARD DEVIATION
# std_site_year -> site-year interaction STANDARD DEVIATION
# std_subsample -> within site sub-sampling STANDARD DEVIATION
# Sub-sampling sample sizes, i.e. the number of minnow traps (sub-samples) set at each site
# We allow for different sample sizes in the treatment-time combinations
# but all sites in that combination must have the same number of minnow traps.
# It is possible to generalize this; contact me for details.
# n_IA -> number of subsamples in Treatment-After combination
# n_IB -> number of subsamples in Treatment-Before combination
# n_CA -> number of subsamples in Control-After combination
# n_CB -> number of subsamples in Control-Before combination
# Number of segments(sites)
# We allow for a different number of sites for Treatment and Control areaa
# ns_I -> number of treatment sites (i.e. downstram of the project)
# ns_C -> number of control sites (i.e. upstream of the project)
# Number of years of monitoring before/after impact
# ny_B -> number of years monitoring before project starts
# ny_A -> number of years monitoring after project starts
# Marginal Means
# These are used to form the BACI contrast of interest
# mu_IA, mu_IB, mu_CA, mu_CB (i.e mean of Treatment-After,
# Treatment-Before, Control-After, Control_Before)
# These are chosen based on the size of impact that may be biologically important
# This code was originally created by
# Tony Booth
# Department of Ichthyology and Fisheries Science,
# Rhodes University,
# PO Box 94,
# Grahamstown 6140 SOUTH AFRICA
# t.booth@ru.ac.za
# The computations are based on the
# Stroup, W. W. (1999)
# Mixed model procedures to assess power, precision, and sample size in the design of experiments.
# paper where "dummy" data is generated and "analyzed" and the resulting F-statistics etc
# provide information needed to compute the power
#-----------------------------------------------
# An illustration of how to find the power for one scenario
baci.power(n_IA=3, n_IB=3, n_CA=3, n_CB=3,
ns_I=3, ns_C=3,
ny_B=3, ny_A=2,
mu_IA=5.0, mu_IB=5.5, mu_CA=4.5, mu_CB=4.5,
sdYear=0.25, sdSite=0.75, sdSiteYear=0.1, sdResid=0.75)
## alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C ny_B
## 1 0.05 0.75 0.25 0.1 0.75 3 3 3 3 3 3 3
## ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom ncp Fcrit
## 1 2 5 5.5 4.5 4.5 -0.5 0.3312434 19 2.278481 4.38075
## power Tcrit os.power1 os.power2
## 1 0.2996488 1.729133 0.4249189 0.0009589156
vc <- as.data.frame(VarCorr(result.lmer))
vc
## grp var1 var2 vcov sdcor
## 1 YearF:Site (Intercept) <NA> 0.00000000 0.0000000
## 2 Site (Intercept) <NA> 0.58823981 0.7669679
## 3 YearF (Intercept) <NA> 0.04586215 0.2141545
## 4 Residual <NA> <NA> 0.52340167 0.7234650
#find the power under various scenarios
##***part500b;
sdSiteYear= vc[ vc$grp=="YearF:Site", "sdcor"]
sdResid = vc[ vc$grp=="Residual", "sdcor"]
sdYear = vc[ vc$grp=="YearF", "sdcor"]
sdSite = vc[ vc$grp=="Site", "sdcor"]
cat("Estimated variance components are \n",
"sdSiteYear ", round(sdSiteYear,2),
";\n sdSite ", round(sdSite,2),
";\n sdYear ", round(sdYear,2),
"; \nsdResid ", round(sdResid,2),"\n")
## Estimated variance components are
## sdSiteYear 0 ;
## sdSite 0.77 ;
## sdYear 0.21 ;
## sdResid 0.72
cat("\n")
scenarios <- expand.grid(n_quad=seq(3,9,3),
ns_I = 3, ns_C=3,
ny_B =3,
ny_A =c(2,3,4),
baci_effect=seq(0,0.8,.1),
sdYear = sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear, sdResid=sdResid)
head(scenarios)
## n_quad ns_I ns_C ny_B ny_A baci_effect sdYear sdSite sdSiteYear
## 1 3 3 3 3 2 0 0.2141545 0.7669679 0
## 2 6 3 3 3 2 0 0.2141545 0.7669679 0
## 3 9 3 3 3 2 0 0.2141545 0.7669679 0
## 4 3 3 3 3 3 0 0.2141545 0.7669679 0
## 5 6 3 3 3 3 0 0.2141545 0.7669679 0
## 6 9 3 3 3 3 0 0.2141545 0.7669679 0
## sdResid
## 1 0.723465
## 2 0.723465
## 3 0.723465
## 4 0.723465
## 5 0.723465
## 6 0.723465
power <- plyr::adply(scenarios,1,function(x){
#browser()
power <- baci.power(
n_IA=x$n_quad, n_IB=x$n_quad, n_CA=x$n_quad, n_CB=x$n_quad,
ns_I=x$ns_I, ns_C=x$ns_C,
ny_B=x$ny_B, ny_A=x$ny_A,
mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=x$sdYear, sdSite=x$sdSite, sdSiteYear=x$sdSiteYear, sdResid=x$sdResid)
power
})
head(power)
## n_quad baci_effect alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB
## 1 3 0 0.05 0.7669679 0.2141545 0 0.723465 3 3
## 2 6 0 0.05 0.7669679 0.2141545 0 0.723465 6 6
## 3 9 0 0.05 0.7669679 0.2141545 0 0.723465 9 9
## 4 3 0 0.05 0.7669679 0.2141545 0 0.723465 3 3
## 5 6 0 0.05 0.7669679 0.2141545 0 0.723465 6 6
## 6 9 0 0.05 0.7669679 0.2141545 0 0.723465 9 9
## n_CA n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom
## 1 3 3 3 3 3 2 0 0 0 0 0 0.3113298 19
## 2 6 6 3 3 3 2 0 0 0 0 0 0.2201434 19
## 3 9 9 3 3 3 2 0 0 0 0 0 0.1797463 19
## 4 3 3 3 3 3 3 0 0 0 0 0 0.2784618 24
## 5 6 6 3 3 3 3 0 0 0 0 0 0.1969022 24
## 6 9 9 3 3 3 3 0 0 0 0 0 0.1607700 24
## ncp Fcrit power Tcrit os.power1 os.power2
## 1 0 4.380750 0.05 1.729133 0.05 0.05
## 2 0 4.380750 0.05 1.729133 0.05 0.05
## 3 0 4.380750 0.05 1.729133 0.05 0.05
## 4 0 4.259677 0.05 1.710882 0.05 0.05
## 5 0 4.259677 0.05 1.710882 0.05 0.05
## 6 0 4.259677 0.05 1.710882 0.05 0.05
cat("\n")
head(power[,c("alpha","n_IA","n_IB","n_CA","n_CB","ny_B","ny_A","ns_I","ns_C","baci","power")])
## alpha n_IA n_IB n_CA n_CB ny_B ny_A ns_I ns_C baci power
## 1 0.05 3 3 3 3 3 2 3 3 0 0.05
## 2 0.05 6 6 6 6 3 2 3 3 0 0.05
## 3 0.05 9 9 9 9 3 2 3 3 0 0.05
## 4 0.05 3 3 3 3 3 3 3 3 0 0.05
## 5 0.05 6 6 6 6 3 3 3 3 0 0.05
## 6 0.05 9 9 9 9 3 3 3 3 0 0.05
##***part500e;
# sink("baci-fry-power-R-500.txt", split=TRUE)
power[,c("alpha","n_IA","n_IB","n_CA","n_CB","ny_B","ny_A","ns_I","ns_C","baci","power")]
## alpha n_IA n_IB n_CA n_CB ny_B ny_A ns_I ns_C baci power
## 1 0.05 3 3 3 3 3 2 3 3 0.0 0.05000000
## 2 0.05 6 6 6 6 3 2 3 3 0.0 0.05000000
## 3 0.05 9 9 9 9 3 2 3 3 0.0 0.05000000
## 4 0.05 3 3 3 3 3 3 3 3 0.0 0.05000000
## 5 0.05 6 6 6 6 3 3 3 3 0.0 0.05000000
## 6 0.05 9 9 9 9 3 3 3 3 0.0 0.05000000
## 7 0.05 3 3 3 3 3 4 3 3 0.0 0.05000000
## 8 0.05 6 6 6 6 3 4 3 3 0.0 0.05000000
## 9 0.05 9 9 9 9 3 4 3 3 0.0 0.05000000
## 10 0.05 3 3 3 3 3 2 3 3 0.1 0.06074423
## 11 0.05 6 6 6 6 3 2 3 3 0.1 0.07161264
## 12 0.05 9 9 9 9 3 2 3 3 0.1 0.08259260
## 13 0.05 3 3 3 3 3 3 3 3 0.1 0.06374111
## 14 0.05 6 6 6 6 3 3 3 3 0.1 0.07768032
## 15 0.05 9 9 9 9 3 3 3 3 0.1 0.09179169
## 16 0.05 3 3 3 3 3 4 3 3 0.1 0.06594293
## 17 0.05 6 6 6 6 3 4 3 3 0.1 0.08214726
## 18 0.05 9 9 9 9 3 4 3 3 0.1 0.09857302
## 19 0.05 3 3 3 3 3 2 3 3 0.2 0.09367195
## 20 0.05 6 6 6 6 3 2 3 3 0.2 0.13875526
## 21 0.05 9 9 9 9 3 2 3 3 0.2 0.18456714
## 22 0.05 3 3 3 3 3 3 3 3 0.2 0.10605047
## 23 0.05 6 6 6 6 3 3 3 3 0.2 0.16410459
## 24 0.05 9 9 9 9 3 3 3 3 0.2 0.22282587
## 25 0.05 3 3 3 3 3 4 3 3 0.2 0.11518237
## 26 0.05 6 6 6 6 3 4 3 3 0.2 0.18276938
## 27 0.05 9 9 9 9 3 4 3 3 0.2 0.25077554
## 28 0.05 3 3 3 3 3 2 3 3 0.3 0.15016428
## 29 0.05 6 6 6 6 3 2 3 3 0.3 0.25340675
## 30 0.05 9 9 9 9 3 2 3 3 0.3 0.35399473
## 31 0.05 3 3 3 3 3 3 3 3 0.3 0.17876866
## 32 0.05 6 6 6 6 3 3 3 3 0.3 0.30986296
## 33 0.05 9 9 9 9 3 3 3 3 0.3 0.43311300
## 34 0.05 3 3 3 3 3 4 3 3 0.3 0.19979884
## 35 0.05 6 6 6 6 3 4 3 3 0.3 0.35031464
## 36 0.05 9 9 9 9 3 4 3 3 0.3 0.48755704
## 37 0.05 3 3 3 3 3 2 3 3 0.4 0.23052703
## 38 0.05 6 6 6 6 3 2 3 3 0.4 0.40730209
## 39 0.05 9 9 9 9 3 2 3 3 0.4 0.56050779
## 40 0.05 3 3 3 3 3 3 3 3 0.4 0.28113601
## 41 0.05 6 6 6 6 3 3 3 3 0.4 0.49604376
## 42 0.05 9 9 9 9 3 3 3 3 0.4 0.66554057
## 43 0.05 3 3 3 3 3 4 3 3 0.4 0.31766352
## 44 0.05 6 6 6 6 3 4 3 3 0.4 0.55551225
## 45 0.05 9 9 9 9 3 4 3 3 0.4 0.72937801
## 46 0.05 3 3 3 3 3 2 3 3 0.5 0.33208787
## 47 0.05 6 6 6 6 3 2 3 3 0.5 0.57758374
## 48 0.05 9 9 9 9 3 2 3 3 0.5 0.75142330
## 49 0.05 3 3 3 3 3 3 3 3 0.5 0.40674095
## 50 0.05 6 6 6 6 3 3 3 3 0.5 0.68324578
## 51 0.05 9 9 9 9 3 3 3 3 0.5 0.84696468
## 52 0.05 3 3 3 3 3 4 3 3 0.5 0.45862275
## 53 0.05 6 6 6 6 3 4 3 3 0.5 0.74662003
## 54 0.05 9 9 9 9 3 4 3 3 0.5 0.89487013
## 55 0.05 3 3 3 3 3 2 3 3 0.6 0.44823669
## 56 0.05 6 6 6 6 3 2 3 3 0.6 0.73427217
## 57 0.05 9 9 9 9 3 2 3 3 0.6 0.88594369
## 58 0.05 3 3 3 3 3 3 3 3 0.6 0.54307490
## 59 0.05 6 6 6 6 3 3 3 3 0.6 0.83230745
## 60 0.05 9 9 9 9 3 3 3 3 0.6 0.94727488
## 61 0.05 3 3 3 3 3 4 3 3 0.6 0.60519703
## 62 0.05 6 6 6 6 3 4 3 3 0.6 0.88258691
## 63 0.05 9 9 9 9 3 4 3 3 0.6 0.97091809
## 64 0.05 3 3 3 3 3 2 3 3 0.7 0.56910556
## 65 0.05 6 6 6 6 3 2 3 3 0.7 0.85441346
## 66 0.05 9 9 9 9 3 2 3 3 0.7 0.95827396
## 67 0.05 3 3 3 3 3 3 3 3 0.7 0.67448764
## 68 0.05 6 6 6 6 3 3 3 3 0.7 0.92641453
## 69 0.05 9 9 9 9 3 3 3 3 0.7 0.98658814
## 70 0.05 3 3 3 3 3 4 3 3 0.7 0.73811583
## 71 0.05 6 6 6 6 3 4 3 3 0.7 0.95654613
## 72 0.05 9 9 9 9 3 4 3 3 0.7 0.99438976
## 73 0.05 3 3 3 3 3 2 3 3 0.8 0.68376032
## 74 0.05 6 6 6 6 3 2 3 3 0.8 0.93122658
## 75 0.05 9 9 9 9 3 2 3 3 0.8 0.98797298
## 76 0.05 3 3 3 3 3 3 3 3 0.8 0.78710050
## 77 0.05 6 6 6 6 3 3 3 3 0.8 0.97354200
## 78 0.05 9 9 9 9 3 3 3 3 0.8 0.99751340
## 79 0.05 3 3 3 3 3 4 3 3 0.8 0.84331812
## 80 0.05 6 6 6 6 3 4 3 3 0.8 0.98731270
## 81 0.05 9 9 9 9 3 4 3 3 0.8 0.99925529
# sink()
##***part510b;
power.plot <- ggplot(data=power, aes(x=baci_effect, y=power, color=as.factor(n_quad)))+
ggtitle("Estimated power",
subtitle=paste("alpha: ", power$alpha[1]))+
geom_point()+
geom_line()+
ylim(0,1)+
geom_hline(yintercept=0.8, color="blue")+
xlab("BACI effect size (log scale)")+
scale_color_discrete(name="# quadrats")+
facet_wrap(~ny_A, ncol=2, labeller=label_both)
power.plot

##***part510e;
##*
###########################################################################
###########################################################################
###########################################################################
# What to do when number of sub-samples is very large and the design matrix is large
# which makes the power program is very small
sdSiteYear= vc[ vc$grp=="YearF:Site", "sdcor"]
sdResid = vc[ vc$grp=="Residual", "sdcor"]
sdYear = vc[ vc$grp=="YearF", "sdcor"]
sdSite = vc[ vc$grp=="Site", "sdcor"]
##***part900b;
cat("Estimated variance components are \n",
"sdSiteYear ", round(sdSiteYear,2),
";\n sdSite ", round(sdSite,2),
";\n sdYear ", round(sdYear,2),
"; \nsdResid ", round(sdResid,2),"\n")
## Estimated variance components are
## sdSiteYear 0 ;
## sdSite 0.77 ;
## sdYear 0.21 ;
## sdResid 0.72
cat("\n")
# We can make an equivalent baci design with 1 observations/site-year (the average)
# and revised sdSiteYear and sdResid
# for example consider a baci design with n_quad=10 in all site.years
cat("\nPower computed using individual observations\n")
##
## Power computed using individual observations
power.indiv <- baci.power(n_IA=10, n_IB=10, n_CA=10, n_CB=10,
ns_I=3, ns_C=3,
ny_B=3, ny_A=2,
mu_IA=.4, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear, sdResid=sdResid)
power.indiv
## alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C
## 1 0.05 0.7669679 0.2141545 0 0.723465 10 10 10 10 3 3
## ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom ncp Fcrit
## 1 3 2 0.4 0 0 0 0.4 0.1705223 19 5.502466 4.38075
## power Tcrit os.power1 os.power2
## 1 0.6049787 1.729133 0.7310784 4.589899e-05
# this has equivalent power when you analyze the "averages" and create revised sdSiteYear and sdResid
sdSiteYear.avg <- sqrt(sdSiteYear^2 + sdResid^2/10)
sdResid.avg <- 0
cat("\nPower computed using averages\n")
##
## Power computed using averages
power.avg <- baci.power(n_IA=1, n_IB=1, n_CA=1, n_CB=1,
ns_I=3, ns_C=3,
ny_B=3, ny_A=2,
mu_IA=.4, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear.avg, sdResid=sdResid.avg)
power.avg
## alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C
## 1 0.05 0.7669679 0.2141545 0.2287797 0 1 1 1 1 3 3
## ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom ncp Fcrit
## 1 3 2 0.4 0 0 0 0.4 0.1705223 19 5.502466 4.38075
## power Tcrit os.power1 os.power2
## 1 0.6049787 1.729133 0.7310784 4.589899e-05
##***part900e;
##***part901b;
scenarios <- expand.grid(n_quad=seq(3,9,3),
ns_I = 3, ns_C=3,
ny_B =3,
ny_A =c(2,3,4),
baci_effect=seq(0,0.8,.1),
sdYear = sdYear, sdSite=sdSite, sdSiteYear=sdSiteYear, sdResid=sdResid)
cat("Part of the scenarios dataset\n")
## Part of the scenarios dataset
head(scenarios)
## n_quad ns_I ns_C ny_B ny_A baci_effect sdYear sdSite sdSiteYear
## 1 3 3 3 3 2 0 0.2141545 0.7669679 0
## 2 6 3 3 3 2 0 0.2141545 0.7669679 0
## 3 9 3 3 3 2 0 0.2141545 0.7669679 0
## 4 3 3 3 3 3 0 0.2141545 0.7669679 0
## 5 6 3 3 3 3 0 0.2141545 0.7669679 0
## 6 9 3 3 3 3 0 0.2141545 0.7669679 0
## sdResid
## 1 0.723465
## 2 0.723465
## 3 0.723465
## 4 0.723465
## 5 0.723465
## 6 0.723465
power.avg <- plyr::adply(scenarios,1,function(x){
#browser()
# do the short cut
x$sdSiteYear.avg <- sqrt(x$sdSiteYear^2 + x$sdResid^2/x$n_quad) # Note change here
x$sdResid.avg <- 0
power <- baci.power(
n_IA=1, n_IB=1, n_CA=1, n_CB=1, # notice change here
ns_I=x$ns_I, ns_C=x$ns_C,
ny_B=x$ny_B, ny_A=x$ny_A,
mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=x$sdYear, sdSite=x$sdSite,
sdSiteYear=x$sdSiteYear.avg, sdResid=x$sdResid.avg) # note change here
power
})
cat("\nPart of the power dataset computed on averages\n")
##
## Part of the power dataset computed on averages
head(power.avg)
## n_quad baci_effect alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB
## 1 3 0 0.05 0.7669679 0.2141545 0.4176927 0 1 1
## 2 6 0 0.05 0.7669679 0.2141545 0.2953534 0 1 1
## 3 9 0 0.05 0.7669679 0.2141545 0.2411550 0 1 1
## 4 3 0 0.05 0.7669679 0.2141545 0.4176927 0 1 1
## 5 6 0 0.05 0.7669679 0.2141545 0.2953534 0 1 1
## 6 9 0 0.05 0.7669679 0.2141545 0.2411550 0 1 1
## n_CA n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom
## 1 1 1 3 3 3 2 0 0 0 0 0 0.3113298 19
## 2 1 1 3 3 3 2 0 0 0 0 0 0.2201434 19
## 3 1 1 3 3 3 2 0 0 0 0 0 0.1797463 19
## 4 1 1 3 3 3 3 0 0 0 0 0 0.2784618 24
## 5 1 1 3 3 3 3 0 0 0 0 0 0.1969022 24
## 6 1 1 3 3 3 3 0 0 0 0 0 0.1607700 24
## ncp Fcrit power Tcrit os.power1 os.power2
## 1 0 4.380750 0.05 1.729133 0.05 0.05
## 2 0 4.380750 0.05 1.729133 0.05 0.05
## 3 0 4.380750 0.05 1.729133 0.05 0.05
## 4 0 4.259677 0.05 1.710882 0.05 0.05
## 5 0 4.259677 0.05 1.710882 0.05 0.05
## 6 0 4.259677 0.05 1.710882 0.05 0.05
##***part901e;
# the power plot on the "averages" is the same as before
##***part910b;
power.plot <- ggplot(data=power.avg, aes(x=baci_effect, y=power, color=as.factor(n_quad)))+
ggtitle("Estimated power computed using short cut",
subtitle=paste("alpha: ", power$alpha[1]))+
geom_point()+
geom_line()+
ylim(0,1)+
geom_hline(yintercept=0.8, color="blue")+
xlab("BACI effect size (log scale)")+
scale_color_discrete(name="# quadrats")+
facet_wrap(~ny_A, ncol=2, labeller=label_both)
power.plot

##***part910e;
##*###################################################################################################
##*###################################################################################################
##*###################################################################################################
##*###################################################################################################
### Bayesian analysis
baci.bayesian <- brms::brm(logfry ~ Period + SiteClass + Period:SiteClass +
(1|YearF)+ (1|Site)+(1|YearF:Site),
data=fry
,sample_prior=TRUE
,seed=23432432)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 17.0.0 (clang-1700.6.3.2)’
## using SDK: ‘NA’
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Users/cschwarz/Rlibs/Rcpp/include/" -I"/Users/cschwarz/Rlibs/RcppEigen/include/" -I"/Users/cschwarz/Rlibs/RcppEigen/include/unsupported" -I"/Users/cschwarz/Rlibs/BH/include" -I"/Users/cschwarz/Rlibs/StanHeaders/include/src/" -I"/Users/cschwarz/Rlibs/StanHeaders/include/" -I"/Users/cschwarz/Rlibs/RcppParallel/include/" -I"/Users/cschwarz/Rlibs/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Users/cschwarz/Rlibs/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/x86_64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Users/cschwarz/Rlibs/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Users/cschwarz/Rlibs/RcppEigen/include/Eigen/Dense:1:
## In file included from /Users/cschwarz/Rlibs/RcppEigen/include/Eigen/Core:19:
## /Users/cschwarz/Rlibs/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## 679 | #include <cmath>
## | ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Warning message:
## In system2("xcrun", "--show-sdk-version", TRUE, TRUE) :
## running command ''xcrun' --show-sdk-version 2>&1' had status 1
## Start sampling
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 4.4e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.44 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.458 seconds (Warm-up)
## Chain 1: 0.327 seconds (Sampling)
## Chain 1: 0.785 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 3.1e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.31 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.476 seconds (Warm-up)
## Chain 2: 0.326 seconds (Sampling)
## Chain 2: 0.802 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 1.5e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.456 seconds (Warm-up)
## Chain 3: 0.441 seconds (Sampling)
## Chain 3: 0.897 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 1.5e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.15 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.402 seconds (Warm-up)
## Chain 4: 0.331 seconds (Sampling)
## Chain 4: 0.733 seconds (Total)
## Chain 4:
## Warning: There were 4 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
vc.priorsamps.wide<-brms::prior_samples(baci.bayesian)
## Warning: 'prior_samples' is deprecated. Please use 'prior_draws' instead.
vc.priorsamps.long <- tidyr::pivot_longer(vc.priorsamps.wide,
cols=names(vc.priorsamps.wide)[-1],
names_to="vc",
values_to="prior")
ggplot(data=vc.priorsamps.long, aes(x=prior, y=after_stat(density)))+
ggtitle("Prior distribution for the SD in the BACI mode")+
geom_histogram(alpha=0.5)+
geom_density()+
facet_wrap(~vc, ncol=2,scales="free")+
xlim(0,20)
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 57 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 57 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_bar()`).

# We will generate a flat prior with a mean of 0 and an sd of 10 for each fixed parameter
vc.priorsamp.fixed <- plyr::ldply(c("Intercept","SiteClass","Period","SiteClass:Period"), function(x){
data.frame(fixed=x, prior=rnorm(50000,mean=0, sd=50))
})
ggplot(data=vc.priorsamp.fixed, aes(x=prior, y=after_stat(density)))+
ggtitle(" Prior distribution for the fixed effects in the BACI model")+
geom_histogram(alpha=0.5)+
geom_density()+
facet_wrap(~fixed, ncol=2,scales="free")
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

# Get the posterior for the BACI contrast
post.baci <- brms::as_draws_df(baci.bayesian, variable="b_PeriodBefore:SiteClassUpstream")
post.baci <- plyr::rename(post.baci, c("b_PeriodBefore:SiteClassUpstream"="BACI"))
head(post.baci)
## # A draws_df: 6 iterations, 1 chains, and 1 variables
## BACI
## 1 -0.358
## 2 -0.044
## 3 0.235
## 4 -0.136
## 5 0.018
## 6 -0.089
## # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
#post.baci <- data.frame(post.baci=unlist(brms::posterior_samples(baci.bayesian, pars="b_PeriodBefore:SiteClassUpstream")))
ggplot(data=post.baci, aes(x=BACI, y=after_stat(density)))+
ggtitle(#"Posterior distribution of BACI contrast"
" ", subtitle="Red lines indicate 95% credible interval")+
geom_histogram(alpha=0.5)+
geom_density()+
geom_vline(xintercept=0)+
geom_vline(xintercept=quantile(post.baci$BACI, prob=c(0.025,0.975)), color="red")
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

# The posterior-belief distribution appears to be slightly peaked just below 0 (the value of 0 indicates no project effect).
# The red lines indicate the 95% credible interval for the BACI effect, i.e. the bracket a region that contains
# 95% of our belief.
#
# The wide distribution indicates that the posterior belief is quite uncertain.
# We can summarize the posterior belief distribution using summary statistics (mean, sd, and 95% credible interval) are:
c(mean=mean(post.baci$BACI),
sd =sd (post.baci$BACI),
lci =quantile(post.baci$BACI, prob=0.025),
uci =quantile(post.baci$BACI, prob=0.975))
## mean sd lci.2.5% uci.97.5%
## -0.04670305 0.47413064 -0.96217880 0.89319357
# We see that the
#
# - mean of the posterior belief distribution is close to the estimate of the BACI contrast from
# the standard linear model fit,
# - the SD is close to the baci fit's SE, and
# - the upper and lower credible intervals are comparable to the 95% confidence intervals.
#
# This is no accident and is an artefact of the use of noninformative uniform priors and the fairly large dataset
# where the data "overwhelms" the prior.
#
# So the Bayesian analysis, so far, has not presented anything new. However, we can now made statements about
# our prior belief of an effect by looking at the previous histogram and computing the proportion of posterior samples in various ways.
#
# For example, we can now say that there is `r round(mean(abs(post.baci$BACI) > .5),2)` posterior belief that the absolute(BACI effect) is
# 0.5 (a 50% differential in the means) or greater (@fig-post-belief1).
post.baci$BACI.g5 <- abs(post.baci$BACI) >= .5
belief <- mean(abs(post.baci$BACI) > .5)
ggplot(data=post.baci, aes(x=BACI))+
ggtitle("Posterior belief that abs(baci contrast)> .5 (corresponding to large effect)",
subtitle="Blue are is posterior belief that abs(baci contrast) > .5")+
geom_histogram(alpha=0.5, aes(fill=as.factor(BACI.g5)), breaks=seq(-2,2,.1))+
#geom_density()+
geom_vline(xintercept=0)+
geom_vline(xintercept=c(-.5,.5), color="red")+
scale_fill_discrete(name="abs(baci) > .5")+
annotate("text", label=paste0("Posterior belief abs(baci)>.5: ", round(belief,2)),
x=-Inf, y=Inf, hjust=0, vjust=1.5)

# Similarly, there is a `r round(mean(abs(post.baci$BACI) <= .1),2)` posterior belief that the absolute(BACI effect) is 0.1 or less
# (@fig-post-belief2).
post.baci$BACI.l1 <- abs(post.baci$BACI) <= .1
belief <- mean(abs(post.baci$BACI) <= .1)
ggplot(data=post.baci, aes(x=BACI))+
ggtitle("Posterior belief that abs(baci contrast) < .1 (corresponding to small effect)",
subtitle="Blue are is posterior belief that abs(baci contrast) < .1")+
geom_histogram(alpha=0.5, aes(fill=as.factor(BACI.l1)), breaks=seq(-2,2,.1))+
#geom_density()+
geom_vline(xintercept=0)+
geom_vline(xintercept=c(-.1,.1), color="red")+
scale_fill_discrete(name="abs(baci) < .1")+
annotate("text", label=paste0("Posterior belief abs(baci)<.1: ", round(belief,2)),
x=-Inf, y=Inf, hjust=0, vjust=1.5)

# **So unlike the classical BACI analysis, you can quantify the belief of a large impact or small impact in a natural way.**
# This is the prime advantage of a Bayesian analysis.
#
# Confidence intervals cannot be used in this way. As indicated by Kruschke and Liddell (2018):
#
# > Highest density interval vs. confidence interval Here we
# > reiterate some essential differences between a Bayesian
# > credible interval (HDI) and a frequentist confidence interval (CI).
# > An important quality of the posterior 95% HDI is that it really does indicate the 95%
# > most probable values of the parameter, given the data. The
# > posterior distribution depends only on the actually observed
# > data (and the prior), and does not depend on the stopping
# > or testing intentions of the analyst. The frequentist CI is
# > often misinterpreted as if it were a posterior distribution,
# > because what analysts intuitively want from their analysis is
# > the Bayesian posterior distribution, as we discuss more later.
#
# The Bayesian credible interval does not depend on how many hypothesis tests are being done
# (i.e., there are no corrections for multiple testing such as Tukey's method).
#
# However, it makes no sense to ask "What is posterior belief of no-effect?" because this would ask for the probability that the
# baci effect = 0 exactly which is 0. Similarly, it makes no sense to ask "What is the posterior belief of an effect?"
# because this would ask for the probability that the baci effect $\ne$ 0 which is 1. Only posterior beliefs of intervals
# are sensible.
#
# We can get a complete summary of a Bayesian fit for all parameters of the model (not shown) in similar ways.
# summarize the bayesian fit
temp <- summary(baci.bayesian)
## Warning: There were 4 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
temp
## Family: gaussian
## Links: mu = identity
## Formula: logfry ~ Period + SiteClass + Period:SiteClass + (1 | YearF) + (1 | Site) + (1 | YearF:Site)
## Data: fry (Number of observations: 52)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Multilevel Hyperparameters:
## ~Site (Number of levels: 6)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.07 0.52 0.45 2.42 1.00 1598 1774
##
## ~YearF (Number of levels: 5)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.41 0.38 0.02 1.45 1.00 1200 1682
##
## ~YearF:Site (Number of levels: 28)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.15 0.12 0.01 0.43 1.00 2003 1554
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept 5.44 0.80 3.76 7.01 1.00
## PeriodBefore 0.17 0.58 -0.93 1.41 1.00
## SiteClassUpstream -0.85 1.03 -2.82 1.39 1.00
## PeriodBefore:SiteClassUpstream -0.05 0.47 -0.96 0.89 1.00
## Bulk_ESS Tail_ESS
## Intercept 1497 1395
## PeriodBefore 2094 1136
## SiteClassUpstream 1506 1736
## PeriodBefore:SiteClassUpstream 3380 2239
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.75 0.09 0.60 0.94 1.00 3617 2980
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# #### Variance components
#
# We can also get the posterior belief distributions of the variance effects and summarize
# the posterior-beliefs in @tbl-vc-bayesian.
vc.bayesian.1 <- plyr::ldply(temp$random, function(x){
x
})
vc.bayesian.2 <- temp$spec_pars
vc.bayesian.2$.id <- "Residual"
vc.bayesian <- plyr::rbind.fill(vc.bayesian.1, vc.bayesian.2)
#vc.bayesian
vc.bayesian
## .id Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## 1 Site 1.0693147 0.51880207 0.445696895 2.4166136 1.002305 1598.029
## 2 YearF 0.4132569 0.38441194 0.016838186 1.4495008 1.001389 1200.395
## 3 YearF:Site 0.1473127 0.11667138 0.005708658 0.4259360 1.000029 2002.670
## 4 Residual 0.7499756 0.08605112 0.602435688 0.9368301 1.001326 3617.338
## Tail_ESS
## 1 1774.168
## 2 1682.208
## 3 1553.777
## 4 2979.773
# The mean of the posteriors fairly different from the classical baci fit. This is due, in part, to the influence
# of the prior distribution for the standard deviations seen earlier. In particular, notice how the
# estimate variance component for the year.site interaction has been pulled away from 0.
#
# Fortunately, the Site and Year variance components "cancel" when estimating the BACI contrast and so have no impact
# on the result of interest.
#
# A fuller Bayesian analysis would investigate the sensitivity of the results to different choices of prior distributions
#
# The uncertainty of the variance components (the Est.Error in the above table) is NOT available from a classical baci fit (but
# could be obtained with some work).