# BACI-Chironomid

# Taken from Krebs, Ecological Methodology, 2nd Edition. Box 10.3.

# Estimates of chironomid abundance in sediments were taken at one station 
# above and below a pulp mill outflow pipe for 3 years before plant #operation 
# and for 6 years after plant operation.


library(ggplot2)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(plyr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
source("../../schwarz.functions.r")

# Read in the actual data
# sink("baci-chironomid-R-001.txt", split=TRUE)
##***part001b;
cat(" BACI design measuring chironomid counts with multiple (paired) yearly measurements before/after \n\n")
##  BACI design measuring chironomid counts with multiple (paired) yearly measurements before/after
chironomid <- read.csv("baci-chironomid.csv", header=TRUE, as.is=TRUE, strip.white=TRUE)
cat("Listing of part of the raw data \n")
## Listing of part of the raw data
head(chironomid)
##   Year Control.Site Impact.Site Period
## 1 1988           14          17 Before
## 2 1989           12          14 Before
## 3 1990           15          17 Before
## 4 1991           16          21  After
## 5 1992           13          24  After
## 6 1993           12          20  After
##***part001e;
# sink()

# The data is NOT in the usual format where there is only one column
# for the response and a separate column indicating if it is a control or 
# impact site. We need to restructure the data
# sink('baci-chironomid-R-301.txt', split=TRUE)
##***part002b;
# We  change the data from wide to long format
chironomid.long <- tidyr::pivot_longer(chironomid, 
                                       cols=c("Control.Site","Impact.Site"),
                                       names_to = "SiteClass",
                                       values_to="Count")
chironomid.long$SiteClass     <-  factor(chironomid.long$SiteClass)
chironomid.long$YearF         <-  factor(chironomid.long$Year)
chironomid.long$Period        <-  factor(chironomid.long$Period)

chironomid.long$logCount      <- log(chironomid.long$Count + .5)

head(chironomid.long)
## # A tibble: 6 × 6
##    Year Period SiteClass    Count YearF logCount
##   <int> <fct>  <fct>        <int> <fct>    <dbl>
## 1  1988 Before Control.Site    14 1988      2.67
## 2  1988 Before Impact.Site     17 1988      2.86
## 3  1989 Before Control.Site    12 1989      2.53
## 4  1989 Before Impact.Site     14 1989      2.67
## 5  1990 Before Control.Site    15 1990      2.74
## 6  1990 Before Impact.Site     17 1990      2.86
##***part002e;
# sink()
str(chironomid.long)
## tibble [18 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Year     : int [1:18] 1988 1988 1989 1989 1990 1990 1991 1991 1992 1992 ...
##  $ Period   : Factor w/ 2 levels "After","Before": 2 2 2 2 2 2 1 1 1 1 ...
##  $ SiteClass: Factor w/ 2 levels "Control.Site",..: 1 2 1 2 1 2 1 2 1 2 ...
##  $ Count    : int [1:18] 14 17 12 14 15 17 16 21 13 24 ...
##  $ YearF    : Factor w/ 9 levels "1988","1989",..: 1 1 2 2 3 3 4 4 5 5 ...
##  $ logCount : num [1:18] 2.67 2.86 2.53 2.67 2.74 ...
# Get plot of series over time
##***part010b;
prelimplot <- ggplot(data=chironomid.long,
                     aes(x=Year, y=logCount, group=SiteClass, color=SiteClass))+
  ggtitle("Fish counts over time")+
  geom_point()+
  geom_line()+
  geom_vline(xintercept=-0.5+min(chironomid.long$Year[as.character(chironomid.long$Period) == "After"]))
prelimplot

##***part010e;
#ggsave(plot=prelimplot, file="baci-chironomid-R-010.png", h=4, w=6, units="in", dpi=300)



# There are several ways in which this BACI design can be analyzed.

###########################################################################
# Do a t-test on the differces of the averages for each year
# Because only one measurement was taken at each site in each year, we
# don't have to first average. We can use the wide format data.


# sink('baci-chironomid-R-101.txt', split=TRUE)
##***part101b;
chironomid$diff <- log(chironomid$Impact.Site+.5) - log(chironomid$Control.Site+.5)
head(chironomid)
##   Year Control.Site Impact.Site Period      diff
## 1 1988           14          17 Before 0.1880522
## 2 1989           12          14 Before 0.1484200
## 3 1990           15          17 Before 0.1213609
## 4 1991           16          21  After 0.2646926
## 5 1992           13          24  After 0.5959834
## 6 1993           12          20  After 0.4946962
##***part101e;
# sink()


# Plot the difference over time
##***part102b;
plotdiff <- ggplot(data=chironomid, aes(x=Year, y=diff))+
  ggtitle("Plot of differences over time")+
  ylab("Difference (Impact-Control)")+
  geom_point()+
  geom_line()+
  geom_vline(xintercept=-0.5+min(chironomid$Year[as.character(chironomid$Period) == "After"]))
plotdiff

##***part102e;
#ggsave(plot=plotdiff, file="baci-chironomid-R-102.png", h=4, w=6, units="in", dpi=300)



# do the two sample t-test not assuming equal variances
# sink('baci-chironomid-R-104.txt', split=TRUE)
##***part104b;
result <- try(t.test(diff ~ Period, data=chironomid),silent=TRUE)
if(class(result)=="try-error")
     {cat("Unable to do unequal variance t-test because of small sample size\n")} else
     { 
   result$diff.in.means <- sum(result$estimate*c(1,-1))
   names(result$diff.in.means)<- "diff.in.means"
   print(result)
   cat("Estimated difference in means :",result$diff.in.means, " (SE ",result$stderr, ")\n")
     }
## 
##  Welch Two Sample t-test
## 
## data:  diff by Period
## t = 5.2465, df = 6.2914, p-value = 0.001664
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
##  0.1494910 0.4054139
## sample estimates:
##  mean in group After mean in group Before 
##            0.4300635            0.1526110 
## 
## Estimated difference in means : 0.2774524  (SE  0.05288317 )
##***part104e;
# sink()


# do the two sample t-test  assuming equal variances
# sink('baci-chironomid-R-105.txt', split=TRUE)
##***part105b;
result.ev <- t.test(diff ~ Period, data=chironomid, var.equal=TRUE)
result.ev$diff.in.means <- sum(result$estimate*c(1,-1))
names(result.ev$diff.in.means)<- "diff.in.means"
result.ev
## 
##  Two Sample t-test
## 
## data:  diff by Period
## t = 3.7933, df = 7, p-value = 0.006774
## alternative hypothesis: true difference in means between group After and group Before is not equal to 0
## 95 percent confidence interval:
##  0.1044973 0.4504075
## sample estimates:
##  mean in group After mean in group Before 
##            0.4300635            0.1526110
cat("Estimated difference in means on log scale:",result.ev$diff.in.means, " (SE ",result.ev$stderr, "\n")
## Estimated difference in means on log scale: 0.2774524  (SE  0.07314273
##***part105e;
# sink()




# do the two sample Wilcoxon test
# sink('baci-chironomid-R-107.txt', split=TRUE)
##***part107b;
result <- wilcox.test(diff ~ Period, data=chironomid, conf.int=TRUE)
result
## 
##  Wilcoxon rank sum exact test
## 
## data:  diff by Period
## W = 18, p-value = 0.02381
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  0.1162725 0.4475634
## sample estimates:
## difference in location 
##              0.2941913
##***part107e;
# sink()





##################################################################
# Do a Mixed effect linear model on the individual values



# Because there is ONLY one measurement per year, the SamplingTime*Site and
# residual variance are total confounded and cannot be separated. This is 
# the residual term.
# sink('baci-chironomid-R-300-type3.txt', split=TRUE)
##***part300b;
result.lmer <- lmerTest::lmer(log(Count+.5) ~ SiteClass+Period+SiteClass:Period 
                              + (1|YearF),
                data=chironomid.long)
##***part300e;

##***part301b;
anova(result.lmer, ddf="Kenward-Roger")
## Type III Analysis of Variance Table with Kenward-Roger's method
##                   Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## SiteClass        0.33951 0.33951     1     7 63.4614 9.364e-05 ***
## Period           0.04107 0.04107     1     7  7.6773  0.027662 *  
## SiteClass:Period 0.07698 0.07698     1     7 14.3891  0.006774 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##***part301e;
# sink()

summary(result.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Count + 0.5) ~ SiteClass + Period + SiteClass:Period + (1 |  
##     YearF)
##    Data: chironomid.long
## 
## REML criterion at convergence: -18.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.19374 -0.47740  0.00359  0.49883  1.20252 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  YearF    (Intercept) 0.00706  0.08403 
##  Residual             0.00535  0.07314 
## Number of obs: 18, groups:  YearF, 9
## 
## Fixed effects:
##                                   Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                        2.70149    0.04548 10.57667  59.400 1.07e-14
## SiteClassImpact.Site               0.43006    0.04223  7.00000  10.184 1.90e-05
## PeriodBefore                      -0.05459    0.07877 10.57667  -0.693  0.50325
## SiteClassImpact.Site:PeriodBefore -0.27745    0.07314  7.00000  -3.793  0.00677
##                                      
## (Intercept)                       ***
## SiteClassImpact.Site              ***
## PeriodBefore                         
## SiteClassImpact.Site:PeriodBefore ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) StCI.S PrdBfr
## StClssImp.S -0.464              
## PeriodBefor -0.577  0.268       
## StClsI.S:PB  0.268 -0.577 -0.464
# sink('baci-chironomid-R-300-vc.txt', split=TRUE)
##***part300vcb;
# Extract the variance components
vc <- VarCorr(result.lmer)
vc
##  Groups   Name        Std.Dev.
##  YearF    (Intercept) 0.084026
##  Residual             0.073143
##***part300vce;
# sink()


# emmeans after a lm() fit
# sink('baci-chironomid-R-s300LSM-SiteClass.txt', split=TRUE)
##***parts300LSM-SiteClassb;
result.lmer.emmo.S <- emmeans::emmeans(result.lmer, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means for SiteClass \n\n")
## 
## Estimated marginal means for SiteClass
summary(result.lmer.emmo.S)
##  SiteClass    emmean     SE   df lower.CL upper.CL
##  Control.Site   2.67 0.0394 10.6     2.59     2.76
##  Impact.Site    2.97 0.0394 10.6     2.88     3.05
## 
## Results are averaged over the levels of: Period 
## Degrees-of-freedom method: kenward-roger 
## Results are given on the log(mu + 0.5) (not the response) scale. 
## Confidence level used: 0.95
##***parts300LSM-SiteClasse;
# sink()

# sink('baci-chironomid-R-300LSM-Period.txt', split=TRUE)
##***part300LSM-Periodb;
result.lmer.emmo.P <- emmeans::emmeans(result.lmer, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\nEstimated marginal means \n\n")
## 
## Estimated marginal means
summary(result.lmer.emmo.P)
##  Period emmean     SE df lower.CL upper.CL
##  After    2.92 0.0403  7     2.82     3.01
##  Before   2.72 0.0570  7     2.59     2.86
## 
## Results are averaged over the levels of: SiteClass 
## Degrees-of-freedom method: kenward-roger 
## Results are given on the log(mu + 0.5) (not the response) scale. 
## Confidence level used: 0.95
##***part300LSM-Periode;
# sink()

# sink('baci-chironomid-R-300LSM-int.txt', split=TRUE)
##***part300LSM-intb;
result.lmer.emmo.SP <- emmeans::emmeans(result.lmer, ~SiteClass:Period)
cat("\nEstimated marginal means \n\n")
## 
## Estimated marginal means
summary(result.lmer.emmo.SP)
##  SiteClass    Period emmean     SE   df lower.CL upper.CL
##  Control.Site After    2.70 0.0455 10.6     2.60     2.80
##  Impact.Site  After    3.13 0.0455 10.6     3.03     3.23
##  Control.Site Before   2.65 0.0643 10.6     2.50     2.79
##  Impact.Site  Before   2.80 0.0643 10.6     2.66     2.94
## 
## Degrees-of-freedom method: kenward-roger 
## Results are given on the log(mu + 0.5) (not the response) scale. 
## Confidence level used: 0.95
##***part300LSM-inte;
# sink()


# Estimate the BACI contrast
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lmer)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
##     Estimate   Std. Error           df      t value     Pr(>|t|) 
## -0.277452429  0.073142731  7.000000153 -3.793301451  0.006773671
# sink("baci-chironomid-R-300baci.txt", split=TRUE)
##***part300bacib; 
# Estimate the BACI contrast along with a se
summary(contrast(result.lmer.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
##  contrast estimate     SE df lower.CL upper.CL t.ratio p.value
##  baci       -0.277 0.0731  7    -0.45   -0.104  -3.793  0.0068
## 
## Note: contrasts are still on the log(mu + 0.5) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
##***part300bacie;
# sink()




# Check the residuals etc

##***part300diagnosticb;
diagplot <- sf.autoplot.lmer(result.lmer)
## Loading required package: broom.mixed
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: lattice
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
plot(diagplot)

##***part300diagnostice;
#ggsave.ggmultiplot(plotdiag, # see https://github.com/sinhrks/ggfortify/issues/98 for bug in autoplot  
#       file='baci-chironomid-R-300-diagnostic.png',
#       h=6, w=6, units="in", dpi=300)