# BACI-fish
# This is the example that appears in Smith (2002, Table 6).
# Samples of fish were taken for a period
# of 12 months before and 13 months after a nuclear
# power plant began operations.
# The power plant is cooled by water that is drawn from a river.
# When the water exits the plant, its temperature is
# elevated.
# The concern is that the warmed water will adversely affect the abundance
# and composition of fish below the plant.
# [It wasn't clear from Smith (2002) what the response measure is,
# so I'll arbitrarily assume that it is Counts of fish]
library(ggplot2)
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(plyr)
source("../../schwarz.functions.r")
# Get the BACI power program
source("../baci-power.r")
# Read in the actual data
# sink("baci-fish-R-001.txt",split=TRUE)
##***part001b;
fish <- read.csv("baci-fish.csv", header=TRUE, as.is=TRUE, strip.white=TRUE)
fish$trt <- interaction(fish$SiteClass, fish$Site, fish$Period)
fish$Period <- factor(fish$Period)
fish$SiteClass <- factor(fish$SiteClass)
fish$Site <- factor(fish$Site)
fish$SamplingTimeF <- factor(fish$SamplingTime)
fish$trt <- factor(fish$trt)
fish$logCount <- log(fish$Count+ .5) # add 1/2 of smallest postive count
fish[1:10,]
## SamplingTime Year Period SiteClass Site Count trt
## 1 1 75 Before Control C1 1 Control.C1.Before
## 2 1 75 Before Impact I1 1 Impact.I1.Before
## 3 2 75 Before Control C1 0 Control.C1.Before
## 4 2 75 Before Impact I1 1 Impact.I1.Before
## 5 3 75 Before Control C1 0 Control.C1.Before
## 6 3 75 Before Impact I1 0 Impact.I1.Before
## 7 4 75 Before Control C1 2 Control.C1.Before
## 8 4 75 Before Impact I1 6 Impact.I1.Before
## 9 5 75 Before Control C1 47 Control.C1.Before
## 10 5 75 Before Impact I1 103 Impact.I1.Before
## SamplingTimeF logCount
## 1 1 0.4054651
## 2 1 0.4054651
## 3 2 -0.6931472
## 4 2 0.4054651
## 5 3 -0.6931472
## 6 3 -0.6931472
## 7 4 0.9162907
## 8 4 1.8718022
## 9 5 3.8607297
## 10 5 4.6395716
##***part001e;
# sink()
str(fish)
## 'data.frame': 50 obs. of 9 variables:
## $ SamplingTime : int 1 1 2 2 3 3 4 4 5 5 ...
## $ Year : int 75 75 75 75 75 75 75 75 75 75 ...
## $ Period : Factor w/ 2 levels "After","Before": 2 2 2 2 2 2 2 2 2 2 ...
## $ SiteClass : Factor w/ 2 levels "Control","Impact": 1 2 1 2 1 2 1 2 1 2 ...
## $ Site : Factor w/ 2 levels "C1","I1": 1 2 1 2 1 2 1 2 1 2 ...
## $ Count : int 1 1 0 1 0 0 2 6 47 103 ...
## $ trt : Factor w/ 4 levels "Control.C1.After",..: 3 4 3 4 3 4 3 4 3 4 ...
## $ SamplingTimeF: Factor w/ 25 levels "1","2","3","4",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ logCount : num 0.405 0.405 -0.693 0.405 -0.693 ...
# Preliminary plot
# Get plot of series over time
##***part010b;
prelimplot <- ggplot(data=fish, aes(x=SamplingTime, y=logCount,
group=Site, color=SiteClass, shape=Site))+
ggtitle("Fish counts over time")+
geom_point()+
geom_line()+
geom_vline(xintercept=-0.5+min(fish$SamplingTime[as.character(fish$Period) == "After"]))
prelimplot

##***part010e;
#ggsave(plot=prelimplot, file="baci-fish-R-010.png",
# h=4, w=6, units="in", dpi=300)
# There are several ways in which this BACI design can be analyzed.
###############################################################################
# Do a t-test on the differeces of the averages at each sampling time for each site
# There is only one observtion at each sampling time, so we don't have compute
# the averages here
# sink('baci-fish-R-101.txt',split=TRUE)
##***part101b;
# Convert file from long- to wide-format
# to get the Control and Impact measurements on the same line
# and compute the difference
fish.month <- tidyr::pivot_wider(fish,
id_cols=c("SamplingTime","Period"),
names_from="SiteClass",
values_from="logCount")
fish.month$diff <- fish.month$Impact - fish.month$Control
fish.month[1:10,]
## # A tibble: 10 × 5
## SamplingTime Period Control Impact diff
## <int> <fct> <dbl> <dbl> <dbl>
## 1 1 Before 0.405 0.405 0
## 2 2 Before -0.693 0.405 1.10
## 3 3 Before -0.693 -0.693 0
## 4 4 Before 0.916 1.87 0.956
## 5 5 Before 3.86 4.64 0.779
## 6 6 Before 4.15 3.60 -0.554
## 7 7 Before 4.36 1.87 -2.49
## 8 8 Before 2.80 4.97 2.16
## 9 9 Before 4.97 4.98 0.0138
## 10 10 Before 3.35 2.25 -1.10
##***part101e;
# sink()
# Plot the difference over time
##***part102b;
plotdiff <- ggplot(data=fish.month, aes(x=SamplingTime, y=diff))+
ggtitle("Plot of differences over time")+
ylab("Difference (Impact-Control)")+
geom_point()+
geom_line()+
geom_vline(xintercept=-0.5+min(fish$SamplingTime[as.character(fish$Period) == "After"]))
plotdiff

##***part102e;
#ggsave(plot=plotdiff, file="baci-fish-R-102.png", h=4, w=6, units="in", dpi=300)
# do the two sample t-test not assuming equal variances
# sink('baci-fish-R-104.txt',split=TRUE)
##***part104b;
result <- try(t.test(diff ~ Period, data=fish.month),silent=TRUE)
if(class(result)=="try-error")
{cat("Unable to do unequal variance t-test because of small sample size\n")} else
{
result$diff.in.means <- sum(result$estimate*c(1,-1))
names(result$diff.in.means)<- "diff.in.means"
print(result)
cat("Mean difference in log(Count) :,",result$diff.in.means," (SE ", result$stderr,")\n")
}
##
## Welch Two Sample t-test
##
## data: diff by Period
## t = 0.28689, df = 20.742, p-value = 0.777
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
## -0.8284787 1.0934134
## sample estimates:
## mean in group After mean in group Before
## 0.4418295 0.3093621
##
## Mean difference in log(Count) :, 0.1324674 (SE 0.4617297 )
##***part104e;
# sink()
# do the two sample t-test assuming equal variances
# sink('baci-fish-R-105.txt',split=TRUE)
##***part105b;
result <- t.test(diff ~ Period, data=fish.month, var.equal=TRUE)
result$diff.in.means <- sum(result$estimate*c(1,-1))
names(result$diff.in.means)<- "diff.in.means"
result
##
## Two Sample t-test
##
## data: diff by Period
## t = 0.28989, df = 23, p-value = 0.7745
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
## -0.8128144 1.0777491
## sample estimates:
## mean in group After mean in group Before
## 0.4418295 0.3093621
cat("Mean difference in log(Count) :,",result$diff.in.means," (SE ", result$stderr,")\n")
## Mean difference in log(Count) :, 0.1324674 (SE 0.4569542 )
##***part105e;
# sink()
# do the two sample Wilcoxon test
# sink('baci-fish-R-107.txt',split=TRUE)
##***part107b;
result <- wilcox.test(diff ~ Period, data=fish.month, conf.int=TRUE)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact confidence intervals with ties
result
##
## Wilcoxon rank sum test with continuity correction
##
## data: diff by Period
## W = 81, p-value = 0.8916
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -0.8611464 1.0985632
## sample estimates:
## difference in location
## 0.02032091
##***part107e;
# sink()
###############################################################################
# Do a Mixed effect linear model on the individual values
# sink('baci-fish-R-300-type3.txt',split=TRUE)
##***part300b;
# Because there is ONLY one measurement per year, the SamplingTime*Site and
# residual variance are total confounded and cannot be separated. This is
# the residual term.
result.lmer <- lmerTest::lmer(log(Count+.5) ~ SiteClass+Period+SiteClass:Period +
(1|SamplingTimeF),
data=fish)
##***part300e;
##***part301b;
anova(result.lmer, ddf="Kenward-Roger")
## Type III Analysis of Variance Table with Kenward-Roger's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## SiteClass 1.76058 1.76058 1 23 2.7024 0.1138
## Period 0.11746 0.11746 1 23 0.1803 0.6751
## SiteClass:Period 0.05475 0.05475 1 23 0.0840 0.7745
##***part301e;
# sink()
summary(result.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Count + 0.5) ~ SiteClass + Period + SiteClass:Period + (1 |
## SamplingTimeF)
## Data: fish
##
## REML criterion at convergence: 174.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.62164 -0.51054 -0.01742 0.50000 1.84821
##
## Random effects:
## Groups Name Variance Std.Dev.
## SamplingTimeF (Intercept) 3.0325 1.7414
## Residual 0.6515 0.8071
## Number of obs: 50, groups: SamplingTimeF, 25
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.6423 0.5323 27.4202 3.085 0.00461 **
## SiteClassImpact 0.4418 0.3166 23.0000 1.396 0.17616
## PeriodBefore 0.3777 0.7684 27.4202 0.492 0.62690
## SiteClassImpact:PeriodBefore -0.1325 0.4570 23.0000 -0.290 0.77450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) StClsI PrdBfr
## StClssImpct -0.297
## PeriodBefor -0.693 0.206
## StClssIm:PB 0.206 -0.693 -0.297
# sink('baci-fish-R-300-vc.txt',split=TRUE)
##***part300vcb;
# Extract the variance components
vc <- VarCorr(result.lmer)
vc
## Groups Name Std.Dev.
## SamplingTimeF (Intercept) 1.74142
## Residual 0.80714
##***part300vce;
# sink()
# emmeans after a lm() fit
# sink('baci-fish-R-s300LSM-SiteClass.txt',split=TRUE)
##***parts300LSM-SiteClassb;
result.lmer.emmo.S <- emmeans::emmeans(result.lmer, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for SiteClass \n\n")
##
## Estimated marginal means for SiteClass
summary(result.lmer.emmo.S)
## SiteClass emmean SE df lower.CL upper.CL
## Control 1.83 0.384 27.4 1.04 2.62
## Impact 2.21 0.384 27.4 1.42 2.99
##
## Results are averaged over the levels of: Period
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
##***parts300LSM-SiteClasse;
# sink()
# sink('baci-fish-R-300LSM-Period.txt',split=TRUE)
##***part300LSM-Periodb;
result.lmer.emmo.P <- emmeans::emmeans(result.lmer, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means \n\n")
##
## Estimated marginal means
summary(result.lmer.emmo.P)
## Period emmean SE df lower.CL upper.CL
## After 1.86 0.508 23 0.812 2.91
## Before 2.17 0.529 23 1.080 3.27
##
## Results are averaged over the levels of: SiteClass
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
##***part300LSM-Periode;
# sink()
# sink('baci-fish-R-300LSM-int.txt',split=TRUE)
##***part300LSM-intb;
result.lmer.emmo.SP <- emmeans::emmeans(result.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means \n\n")
##
## Estimated marginal means
summary(result.lmer.emmo.SP)
## SiteClass Period emmean SE df lower.CL upper.CL
## Control After 1.64 0.532 27.4 0.551 2.73
## Impact After 2.08 0.532 27.4 0.993 3.18
## Control Before 2.02 0.554 27.4 0.884 3.16
## Impact Before 2.33 0.554 27.4 1.193 3.47
##
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
##***part300LSM-inte;
# sink()
# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
## Estimate Std. Error df t value Pr(>|t|)
## -0.1324674 0.4569542 23.0000004 -0.2898920 0.7744963
# sink("baci-fish-R-300baci.txt",split=TRUE)
##***part300bacib;
# Estimate the BACI contrast along with a se
summary(contrast(result.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
## contrast estimate SE df lower.CL upper.CL t.ratio p.value
## baci -0.132 0.457 23 -1.08 0.813 -0.290 0.7745
##
## Note: contrasts are still on the log(mu + 0.5) scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part300bacie;
# sink()
# Check the residuals etc
##***part300diagnosticb;
diagplot <- sf.autoplot.lmer(result.lmer)
## Loading required package: broom.mixed
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: lattice
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
plot(diagplot)

##***part300diagnostice;
#ggsave.ggmultiplot(plotdiag, # see https://github.com/sinhrks/ggfortify/issues/98 for bug in autoplot
# file='baci-fish-R-300-diagnostic.png', h=4, w=6, units="in", dpi=300)
#######################################################################################
#######################################################################################
#######################################################################################
# Power analysis for BACI design with a single control and single impact site, but
# with multiple years before/after impact
# Example of of the fish analysis.
# Fish counts are quite variable and a BACI difference of .5 on the log scale is important
# We use mu_IA=20, mu_IB=30, mu_CB=30, mu_cA=30
#
# We have the year-to-year variation (sdYear)
# The site-to-site variation can be set to any arbitrary value (e.g. 0)
# The residual and site-year variance components are confounded to set
# the sdYearSite and the sdResidual to 0
# Also set the n_IA=1 etc.
#
# We examine various scenarios for the number of sites and number subsamples
# Illustrate how to get computations for a single scenario
baci.power(n_IA=1, n_IB=1, n_CA=1, n_CB=1,
ns_I=1, ns_C=1,
ny_B=12, ny_A=13,
mu_IA=.2, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=31.71, sdSite=99, sdSiteYear=27.07, sdResid=0)
## alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C ny_B
## 1 0.05 99 31.71 27.07 0 1 1 1 1 1 1 12
## ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom ncp Fcrit
## 1 13 0.2 0 0 0 0.2 15.32537 23 0.0001703092 4.279344
## power Tcrit os.power1 os.power2
## 1 0.05001794 1.713872 0.05132061 0.04870662
vc <- as.data.frame(VarCorr(result.lmer))
##***part500b;
# Note that because there is only 1 site, there is NO variance component
# associated with site. Use any value here. In general, the site variance
# component is not important because when you meaure the same sites over time
# the site effects cancel out (like blocking) and so site-to-site variability
# is not important. The site-year variation really is what drives the BACI power.
sdResid = vc[ vc$grp=="Residual", "sdcor"]
sdYear = vc[ vc$grp=="SamplingTimeF", "sdcor"]
scenarios <- expand.grid(n_quad=1,
n.months_B =seq(12,20,2),
n.months_A =seq(13,20,1),
baci_effect=0.5,
sdYear = sdYear, sdResid=sdResid)
head(scenarios)
## n_quad n.months_B n.months_A baci_effect sdYear sdResid
## 1 1 12 13 0.5 1.741419 0.807142
## 2 1 14 13 0.5 1.741419 0.807142
## 3 1 16 13 0.5 1.741419 0.807142
## 4 1 18 13 0.5 1.741419 0.807142
## 5 1 20 13 0.5 1.741419 0.807142
## 6 1 12 14 0.5 1.741419 0.807142
power <- plyr::adply(scenarios,1,function(x){
#browser()
power <- baci.power(
n_IA=x$n_quad, n_IB=x$n_quad, n_CA=x$n_quad, n_CB=x$n_quad,
ns_I=1, ns_C=1,
ny_B=x$n.months_B, ny_A=x$n.months_A,
mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=x$sdYear, sdSite=0, sdSiteYear=x$sdResid, sdResid=0)
power
})
head(power)
## n_quad n.months_B n.months_A baci_effect alpha sdSite sdYear sdSiteYear
## 1 1 12 13 0.5 0.05 0 1.741419 0.807142
## 2 1 14 13 0.5 0.05 0 1.741419 0.807142
## 3 1 16 13 0.5 0.05 0 1.741419 0.807142
## 4 1 18 13 0.5 0.05 0 1.741419 0.807142
## 5 1 20 13 0.5 0.05 0 1.741419 0.807142
## 6 1 12 14 0.5 0.05 0 1.741419 0.807142
## sdResid n_IA n_IB n_CA n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci
## 1 0 1 1 1 1 1 1 12 13 0.5 0 0 0 0.5
## 2 0 1 1 1 1 1 1 14 13 0.5 0 0 0 0.5
## 3 0 1 1 1 1 1 1 16 13 0.5 0 0 0 0.5
## 4 0 1 1 1 1 1 1 18 13 0.5 0 0 0 0.5
## 5 0 1 1 1 1 1 1 20 13 0.5 0 0 0 0.5
## 6 0 1 1 1 1 1 1 12 14 0.5 0 0 0 0.5
## baci.se dfdenom ncp Fcrit power Tcrit os.power1 os.power2
## 1 0.4569542 23 1.197277 4.279344 0.1824510 1.713872 0.2800207 0.003387034
## 2 0.4396541 25 1.293355 4.241699 0.1943614 1.708141 0.2951721 0.002960494
## 3 0.4262185 27 1.376181 4.210008 0.2047466 1.703288 0.3081427 0.002641309
## 4 0.4154683 29 1.448319 4.182964 0.2138748 1.699127 0.3193705 0.002394624
## 5 0.4066636 31 1.511714 4.159615 0.2219566 1.695519 0.3291837 0.002198952
## 6 0.4490524 24 1.239784 4.259677 0.1877758 1.710882 0.2868063 0.003188366
##***part500e;
##***part510b;
power.plot <- ggplot(data=power, aes(x=ny_A, y=power, color=as.factor(ny_B)))+
ggtitle("Estimated power",
subtitle=paste("alpha: ", power$alpha[1],"; baci_effect:", power$baci_effect[1]))+
geom_point()+
geom_line()+
ylim(0,1)+
geom_hline(yintercept=0.8, color="blue")+
xlab("Number of months of monitoring after impact")+
scale_color_discrete(name="# months\nbefore")
power.plot

##***part510e;