# BACI-Chironomid
# Taken from Krebs, Ecological Methodology, 2nd Edition. Box 10.3.
# Estimates of chironomid abundance in sediments were taken at one station
# above and below a pulp mill outflow pipe for 3 years before plant #operation
# and for 6 years after plant operation.
library(ggplot2)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(plyr)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
source("../../schwarz.functions.r")
# Read in the actual data
# sink("baci-chironomid-R-001.txt", split=TRUE)
##***part001b;
cat(" BACI design measuring chironomid counts with multiple (paired) yearly measurements before/after \n\n")
## BACI design measuring chironomid counts with multiple (paired) yearly measurements before/after
chironomid <- read.csv("baci-chironomid.csv", header=TRUE, as.is=TRUE, strip.white=TRUE)
cat("Listing of part of the raw data \n")
## Listing of part of the raw data
head(chironomid)
## Year Control.Site Impact.Site Period
## 1 1988 14 17 Before
## 2 1989 12 14 Before
## 3 1990 15 17 Before
## 4 1991 16 21 After
## 5 1992 13 24 After
## 6 1993 12 20 After
##***part001e;
# sink()
# The data is NOT in the usual format where there is only one column
# for the response and a separate column indicating if it is a control or
# impact site. We need to restructure the data
# sink('baci-chironomid-R-301.txt', split=TRUE)
##***part002b;
# We change the data from wide to long format
chironomid.long <- tidyr::pivot_longer(chironomid,
cols=c("Control.Site","Impact.Site"),
names_to = "SiteClass",
values_to="Count")
chironomid.long$SiteClass <- factor(chironomid.long$SiteClass)
chironomid.long$YearF <- factor(chironomid.long$Year)
chironomid.long$Period <- factor(chironomid.long$Period)
chironomid.long$logCount <- log(chironomid.long$Count + .5)
head(chironomid.long)
## # A tibble: 6 × 6
## Year Period SiteClass Count YearF logCount
## <int> <fct> <fct> <int> <fct> <dbl>
## 1 1988 Before Control.Site 14 1988 2.67
## 2 1988 Before Impact.Site 17 1988 2.86
## 3 1989 Before Control.Site 12 1989 2.53
## 4 1989 Before Impact.Site 14 1989 2.67
## 5 1990 Before Control.Site 15 1990 2.74
## 6 1990 Before Impact.Site 17 1990 2.86
##***part002e;
# sink()
str(chironomid.long)
## tibble [18 × 6] (S3: tbl_df/tbl/data.frame)
## $ Year : int [1:18] 1988 1988 1989 1989 1990 1990 1991 1991 1992 1992 ...
## $ Period : Factor w/ 2 levels "After","Before": 2 2 2 2 2 2 1 1 1 1 ...
## $ SiteClass: Factor w/ 2 levels "Control.Site",..: 1 2 1 2 1 2 1 2 1 2 ...
## $ Count : int [1:18] 14 17 12 14 15 17 16 21 13 24 ...
## $ YearF : Factor w/ 9 levels "1988","1989",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ logCount : num [1:18] 2.67 2.86 2.53 2.67 2.74 ...
# Get plot of series over time
##***part010b;
prelimplot <- ggplot(data=chironomid.long,
aes(x=Year, y=logCount, group=SiteClass, color=SiteClass))+
ggtitle("Fish counts over time")+
geom_point()+
geom_line()+
geom_vline(xintercept=-0.5+min(chironomid.long$Year[as.character(chironomid.long$Period) == "After"]))
prelimplot

##***part010e;
#ggsave(plot=prelimplot, file="baci-chironomid-R-010.png", h=4, w=6, units="in", dpi=300)
# There are several ways in which this BACI design can be analyzed.
###########################################################################
# Do a t-test on the differces of the averages for each year
# Because only one measurement was taken at each site in each year, we
# don't have to first average. We can use the wide format data.
# sink('baci-chironomid-R-101.txt', split=TRUE)
##***part101b;
chironomid$diff <- log(chironomid$Impact.Site+.5) - log(chironomid$Control.Site+.5)
head(chironomid)
## Year Control.Site Impact.Site Period diff
## 1 1988 14 17 Before 0.1880522
## 2 1989 12 14 Before 0.1484200
## 3 1990 15 17 Before 0.1213609
## 4 1991 16 21 After 0.2646926
## 5 1992 13 24 After 0.5959834
## 6 1993 12 20 After 0.4946962
##***part101e;
# sink()
# Plot the difference over time
##***part102b;
plotdiff <- ggplot(data=chironomid, aes(x=Year, y=diff))+
ggtitle("Plot of differences over time")+
ylab("Difference (Impact-Control)")+
geom_point()+
geom_line()+
geom_vline(xintercept=-0.5+min(chironomid$Year[as.character(chironomid$Period) == "After"]))
plotdiff

##***part102e;
#ggsave(plot=plotdiff, file="baci-chironomid-R-102.png", h=4, w=6, units="in", dpi=300)
# do the two sample t-test not assuming equal variances
# sink('baci-chironomid-R-104.txt', split=TRUE)
##***part104b;
result <- try(t.test(diff ~ Period, data=chironomid),silent=TRUE)
if(class(result)=="try-error")
{cat("Unable to do unequal variance t-test because of small sample size\n")} else
{
result$diff.in.means <- sum(result$estimate*c(1,-1))
names(result$diff.in.means)<- "diff.in.means"
print(result)
cat("Estimated difference in means :",result$diff.in.means, " (SE ",result$stderr, ")\n")
}
##
## Welch Two Sample t-test
##
## data: diff by Period
## t = 5.2465, df = 6.2914, p-value = 0.001664
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
## 0.1494910 0.4054139
## sample estimates:
## mean in group After mean in group Before
## 0.4300635 0.1526110
##
## Estimated difference in means : 0.2774524 (SE 0.05288317 )
##***part104e;
# sink()
# do the two sample t-test assuming equal variances
# sink('baci-chironomid-R-105.txt', split=TRUE)
##***part105b;
result.ev <- t.test(diff ~ Period, data=chironomid, var.equal=TRUE)
result.ev$diff.in.means <- sum(result$estimate*c(1,-1))
names(result.ev$diff.in.means)<- "diff.in.means"
result.ev
##
## Two Sample t-test
##
## data: diff by Period
## t = 3.7933, df = 7, p-value = 0.006774
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
## 0.1044973 0.4504075
## sample estimates:
## mean in group After mean in group Before
## 0.4300635 0.1526110
cat("Estimated difference in means on log scale:",result.ev$diff.in.means, " (SE ",result.ev$stderr, "\n")
## Estimated difference in means on log scale: 0.2774524 (SE 0.07314273
##***part105e;
# sink()
# do the two sample Wilcoxon test
# sink('baci-chironomid-R-107.txt', split=TRUE)
##***part107b;
result <- wilcox.test(diff ~ Period, data=chironomid, conf.int=TRUE)
result
##
## Wilcoxon rank sum exact test
##
## data: diff by Period
## W = 18, p-value = 0.02381
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## 0.1162725 0.4475634
## sample estimates:
## difference in location
## 0.2941913
##***part107e;
# sink()
##################################################################
# Do a Mixed effect linear model on the individual values
# Because there is ONLY one measurement per year, the SamplingTime*Site and
# residual variance are total confounded and cannot be separated. This is
# the residual term.
# sink('baci-chironomid-R-300-type3.txt', split=TRUE)
##***part300b;
result.lmer <- lmerTest::lmer(log(Count+.5) ~ SiteClass+Period+SiteClass:Period
+ (1|YearF),
data=chironomid.long)
##***part300e;
##***part301b;
anova(result.lmer, ddf="Kenward-Roger")
## Type III Analysis of Variance Table with Kenward-Roger's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## SiteClass 0.33951 0.33951 1 7 63.4614 9.364e-05 ***
## Period 0.04107 0.04107 1 7 7.6773 0.027662 *
## SiteClass:Period 0.07698 0.07698 1 7 14.3891 0.006774 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##***part301e;
# sink()
summary(result.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Count + 0.5) ~ SiteClass + Period + SiteClass:Period + (1 |
## YearF)
## Data: chironomid.long
##
## REML criterion at convergence: -18.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.19374 -0.47740 0.00359 0.49883 1.20252
##
## Random effects:
## Groups Name Variance Std.Dev.
## YearF (Intercept) 0.00706 0.08403
## Residual 0.00535 0.07314
## Number of obs: 18, groups: YearF, 9
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.70149 0.04548 10.57667 59.400 1.07e-14
## SiteClassImpact.Site 0.43006 0.04223 7.00000 10.184 1.90e-05
## PeriodBefore -0.05459 0.07877 10.57667 -0.693 0.50325
## SiteClassImpact.Site:PeriodBefore -0.27745 0.07314 7.00000 -3.793 0.00677
##
## (Intercept) ***
## SiteClassImpact.Site ***
## PeriodBefore
## SiteClassImpact.Site:PeriodBefore **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) StCI.S PrdBfr
## StClssImp.S -0.464
## PeriodBefor -0.577 0.268
## StClsI.S:PB 0.268 -0.577 -0.464
# sink('baci-chironomid-R-300-vc.txt', split=TRUE)
##***part300vcb;
# Extract the variance components
vc <- VarCorr(result.lmer)
vc
## Groups Name Std.Dev.
## YearF (Intercept) 0.084026
## Residual 0.073143
##***part300vce;
# sink()
# emmeans after a lm() fit
# sink('baci-chironomid-R-s300LSM-SiteClass.txt', split=TRUE)
##***parts300LSM-SiteClassb;
result.lmer.emmo.S <- emmeans::emmeans(result.lmer, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for SiteClass \n\n")
##
## Estimated marginal means for SiteClass
summary(result.lmer.emmo.S)
## SiteClass emmean SE df lower.CL upper.CL
## Control.Site 2.67 0.0394 10.6 2.59 2.76
## Impact.Site 2.97 0.0394 10.6 2.88 3.05
##
## Results are averaged over the levels of: Period
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
##***parts300LSM-SiteClasse;
# sink()
# sink('baci-chironomid-R-300LSM-Period.txt', split=TRUE)
##***part300LSM-Periodb;
result.lmer.emmo.P <- emmeans::emmeans(result.lmer, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means \n\n")
##
## Estimated marginal means
summary(result.lmer.emmo.P)
## Period emmean SE df lower.CL upper.CL
## After 2.92 0.0403 7 2.82 3.01
## Before 2.72 0.0570 7 2.59 2.86
##
## Results are averaged over the levels of: SiteClass
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
##***part300LSM-Periode;
# sink()
# sink('baci-chironomid-R-300LSM-int.txt', split=TRUE)
##***part300LSM-intb;
result.lmer.emmo.SP <- emmeans::emmeans(result.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means \n\n")
##
## Estimated marginal means
summary(result.lmer.emmo.SP)
## SiteClass Period emmean SE df lower.CL upper.CL
## Control.Site After 2.70 0.0455 10.6 2.60 2.80
## Impact.Site After 3.13 0.0455 10.6 3.03 3.23
## Control.Site Before 2.65 0.0643 10.6 2.50 2.79
## Impact.Site Before 2.80 0.0643 10.6 2.66 2.94
##
## Degrees-of-freedom method: kenward-roger
## Results are given on the log(mu + 0.5) (not the response) scale.
## Confidence level used: 0.95
##***part300LSM-inte;
# sink()
# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
## Estimate Std. Error df t value Pr(>|t|)
## -0.277452429 0.073142731 7.000000153 -3.793301451 0.006773671
# sink("baci-chironomid-R-300baci.txt", split=TRUE)
##***part300bacib;
# Estimate the BACI contrast along with a se
summary(contrast(result.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
## contrast estimate SE df lower.CL upper.CL t.ratio p.value
## baci -0.277 0.0731 7 -0.45 -0.104 -3.793 0.0068
##
## Note: contrasts are still on the log(mu + 0.5) scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##***part300bacie;
# sink()
# Check the residuals etc
##***part300diagnosticb;
diagplot <- sf.autoplot.lmer(result.lmer)
## Loading required package: broom.mixed
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: lattice
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
plot(diagplot)

##***part300diagnostice;
#ggsave.ggmultiplot(plotdiag, # see https://github.com/sinhrks/ggfortify/issues/98 for bug in autoplot
# file='baci-chironomid-R-300-diagnostic.png',
# h=6, w=6, units="in", dpi=300)