# BACI-crabs

# A simple BACI design was used to assess the impact of cooling water 
# discharge on the Density of shore crabs. 
# The beach near the outlet of the cooling water was sampled 
# using several quadrats before and after the plant started operation, 
# as was a control beach on the other side of the body of water.

# As explained in the, notes, analyze on the log() scale.

library(ggfortify)
## Loading required package: ggplot2
library(ggplot2)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(plyr)

source("../../schwarz.functions.r")
source("../baci-power.r")
## Loading required package: Matrix
# Read in the actual data and define the factors of interest
# sink("baci-crabs-R-001.txt")
##***part001b;
crabs <- read.csv("baci-crabs.csv", header=TRUE, as.is=TRUE, strip.white=TRUE)

crabs$trt <- interaction(crabs$SiteClass, crabs$Period)
crabs$SiteClass <- factor(crabs$SiteClass)
crabs$Period    <- factor(crabs$Period, levels=c("Before","After"), ordered=TRUE) # sort correctly
crabs$trt       <- factor(crabs$trt)

crabs$logDensity <- log(crabs$Density) # we will wan to analyze on the log scale

cat("Listing of part of the raw data \n")
## Listing of part of the raw data
crabs[1:10,]
##    SiteClass Site Period Quadrat Density            trt logDensity
## 1     Impact   I1 Before      Q1      23  Impact.Before   3.135494
## 2     Impact   I1 Before      Q2      30  Impact.Before   3.401197
## 3     Impact   I1 Before      Q3      27  Impact.Before   3.295837
## 4     Impact   I1 Before      Q4      25  Impact.Before   3.218876
## 5     Impact   I1  After      Q5      17   Impact.After   2.833213
## 6     Impact   I1  After      Q6      19   Impact.After   2.944439
## 7     Impact   I1  After      Q7      23   Impact.After   3.135494
## 8     Impact   I1  After      Q8      25   Impact.After   3.218876
## 9     Impact   I1  After      Q9      17   Impact.After   2.833213
## 10   Control   C1 Before     Q10      32 Control.Before   3.465736
##***part001e;
# sink()

str(crabs)
## 'data.frame':    19 obs. of  7 variables:
##  $ SiteClass : Factor w/ 2 levels "Control","Impact": 2 2 2 2 2 2 2 2 2 1 ...
##  $ Site      : chr  "I1" "I1" "I1" "I1" ...
##  $ Period    : Ord.factor w/ 2 levels "Before"<"After": 1 1 1 1 2 2 2 2 2 1 ...
##  $ Quadrat   : chr  "Q1" "Q2" "Q3" "Q4" ...
##  $ Density   : int  23 30 27 25 17 19 23 25 17 32 ...
##  $ trt       : Factor w/ 4 levels "Control.After",..: 4 4 4 4 2 2 2 2 2 3 ...
##  $ logDensity: num  3.14 3.4 3.3 3.22 2.83 ...
# Preliminary plot

##***part010b;
prelimplot <- ggplot(data=crabs, aes(x=trt, y=logDensity))+
  ggtitle("Preliminary plot to look for outliers etc")+
  geom_point(position=position_jitter(w=0.1))+
  geom_boxplot(alpha=0.1)
prelimplot

##***part010e;

#ggsave(plot=prelimplot, file="baci-crabs-R-010.png", h=4, w=6, units="in", dpi=300)

 

# Get some simple summary statistics
# sink('baci-crabs-R-020.txt', split=TRUE)
##***part020b;
report <- plyr::ddply(crabs, c("Period","SiteClass"), function(x){
   res <- sf.simple.summary(x, "logDensity", crd=TRUE)
   return(res)
})
report
##   Period SiteClass n nmiss     mean        sd         se      lcl      ucl
## 1 Before   Control 6     0 3.412531 0.1119179 0.04569030 3.295081 3.529982
## 2 Before    Impact 4     0 3.262851 0.1131096 0.05655481 3.082868 3.442834
## 3  After   Control 4     0 3.309636 0.1087065 0.05435326 3.136660 3.482613
## 4  After    Impact 5     0 2.993047 0.1765971 0.07897664 2.773773 3.212321
##***part020e;
# sink()

# Draw a profile plot includeing the raw data
##***part030b;
profileplot <- ggplot(data=report, aes(x=Period, y=mean, group=SiteClass, color=SiteClass))+
  ggtitle("Profile plot of crab density")+
  ylab("logDensity with mean and 95% ci")+
  geom_point(position=position_dodge(w=0.1))+
  geom_line(position=position_dodge(w=0.1))+
  geom_errorbar(aes(ymax=ucl, ymin=lcl), width=0.1, position=position_dodge(w=0.1))+
  geom_point(data=crabs, aes(y=logDensity), position=position_dodge(w=0.2))
profileplot

##***part030e;

#ggsave(plot=profileplot, file="baci-crabs-R-030.png", h=4, w=6, units="in", dpi=300) 

# Fit the linear model, get the anova table, and the usual stuff
# CAUTION!!! Because the design is unbalanced, the default model
# fit by aov gives the WRONG sum of squares and F-tests.
# The default tests are "sequential tests" where terms are added
# in the order specified. You want the marginal tests 
# (which are reported in JMP or SAS)
#
# Read the entry at 
#  http://r-eco-evo.blogspot.com/2007/10/infamous-type-iii-ss-before-i-started_20.html
#
##***part100b;
cat("The sum of squares and F-tests from the anova() below are INCORRECT\n")
## The sum of squares and F-tests from the anova() below are INCORRECT
cat("in unbalanced data because they are sequential and only adjust for effects\n")
## in unbalanced data because they are sequential and only adjust for effects
cat("that enter the model prior to the term in question.")
## that enter the model prior to the term in question.
result.lm <- lm( log(Density) ~ SiteClass + Period + SiteClass:Period, data=crabs)
cat("\nAnalysis of variance -- this is NOT CORRECT because design is unbalanced \n")
## 
## Analysis of variance -- this is NOT CORRECT because design is unbalanced
anova(result.lm)
## Analysis of Variance Table
## 
## Response: log(Density)
##                  Df  Sum Sq Mean Sq F value   Pr(>F)    
## SiteClass         1 0.31631 0.31631 18.1646 0.000682 ***
## Period            1 0.15503 0.15503  8.9027 0.009275 ** 
## SiteClass:Period  1 0.03214 0.03214  1.8459 0.194340    
## Residuals        15 0.26121 0.01741                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n\nUse the Type III tests from the Anova() function from the car package")
## 
## 
## Use the Type III tests from the Anova() function from the car package
cat("\nbut you need to set the treatment contrasts to sum rather than treatment")
## 
## but you need to set the treatment contrasts to sum rather than treatment
cat("\nSee http://r.789695.n4.nabble.com/Type-I-v-s-Type-III-Sum-Of-Squares-in-ANOVA-td1573657.html\n")
## 
## See http://r.789695.n4.nabble.com/Type-I-v-s-Type-III-Sum-Of-Squares-in-ANOVA-td1573657.html
result.lm2 <- lm( log(Density) ~ SiteClass + Period + SiteClass:Period, data=crabs,
                  contrasts=list(SiteClass="contr.sum", Period='contr.sum'))
##***part100e;

##***part100effectb;
car::Anova(result.lm2,type=3)
## Anova Table (Type III tests)
## 
## Response: log(Density)
##                   Sum Sq Df    F value    Pr(>F)    
## (Intercept)      194.343  1 11160.2604 < 2.2e-16 ***
## SiteClass          0.251  1    14.4055  0.001759 ** 
## Period             0.160  1     9.2039  0.008376 ** 
## SiteClass:Period   0.032  1     1.8459  0.194340    
## Residuals          0.261 15                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##***part100effecte;



# Check the residuals etc

##***part100diagnosticb;
diagplot <- autoplot(result.lm2)
## Warning: `fortify(<lm>)` was deprecated in ggplot2 4.0.0.
## ℹ Please use `broom::augment(<lm>)` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
##   Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
diagplot

##***part100diagnostice;
#ggsave.ggmultiplot(plotdiag, # see https://github.com/sinhrks/ggfortify/issues/98 for bug in autoplot   
#       file='baci-crabs-R-100-diagnostic.png', h=4, w=6, units="in", dpi=300)


# Estimate the marginal means and the various effects
# sink('baci-crabs-R-100LSM-SiteClass.txt', split=TRUE)
##***part100LSM-SiteClassb;
result.emmo.S <- emmeans::emmeans(result.lm2, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\n\n Estimated marginal means for SiteClass \n\n")
## 
## 
##  Estimated marginal means for SiteClass
summary(result.emmo.S, infer=TRUE)
##  SiteClass emmean     SE df lower.CL upper.CL t.ratio p.value
##  Control     3.36 0.0426 15     3.27     3.45  78.917  <.0001
##  Impact      3.13 0.0443 15     3.03     3.22  70.670  <.0001
## 
## Results are averaged over the levels of: Period 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95
##***part100LSM-SiteClasse;
# sink()

# sink('baci-crabs-R-100LSM-Period.txt', split=TRUE)
##***part100LSM-Periodb;
result.emmo.P <- emmeans::emmeans(result.lm2, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\n\n Estimated marginal means for Period \n\n")
## 
## 
##  Estimated marginal means for Period
summary(result.emmo.P)
##  Period emmean     SE df lower.CL upper.CL
##  Before   3.34 0.0426 15     3.25     3.43
##  After    3.15 0.0443 15     3.06     3.25
## 
## Results are averaged over the levels of: SiteClass 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95
##***part100LSM-Periode;
# sink()

# sink('baci-crabs-R-100LSM-int.txt', split=TRUE)
##***part100LSM-intb;
result.emmo.SP <- emmeans::emmeans(result.lm2, ~SiteClass:Period)
cat("Estimated marginal means \n\n")
## Estimated marginal means
summary(result.emmo.SP, infer=TRUE)
##  SiteClass Period emmean     SE df lower.CL upper.CL t.ratio p.value
##  Control   Before   3.41 0.0539 15     3.30     3.53  63.344  <.0001
##  Impact    Before   3.26 0.0660 15     3.12     3.40  49.452  <.0001
##  Control   After    3.31 0.0660 15     3.17     3.45  50.161  <.0001
##  Impact    After    2.99 0.0590 15     2.87     3.12  50.717  <.0001
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95
##***part100LSM-inte;
# sink()

# Estimate the BACI contrast along with a se
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lm)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
##   Estimate Std. Error    t value   Pr(>|t|) 
## -0.1180224  0.0868676 -1.3586464  0.1943404
# sink("baci-crabs-R-100baci.txt", split=TRUE)
##***part100bacib;
summary(contrast(result.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
##  contrast estimate    SE df lower.CL upper.CL t.ratio p.value
##  baci       -0.167 0.123 15   -0.429   0.0949  -1.359  0.1943
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95
##***part100bacie;
# sink()




###------------------------------------------------------------------------------------
###------------------------------------------------------------------------------------
###------------------------------------------------------------------------------------

# Power analysis for simple BACI analysis

# For a simple BACI there is 
#   1 site in each of treatment and control -> ns_I=1, ns_C=1
#   1 year before/after                     -> ny_B=1, ny_A=1
# We cannot separate the site-year interaction from the means so we hope
# for the best and suppose that sdSiteYear=0

# Example of crabs.
# We think that a 20 percentage difference in the differential change is important.
# We arbitarily choose 4 mu values that give us this change
#  E.g. mu_IB =0, mu_IS=.20, mu_CB=0, mu_CA=0
#
# We don't have to worry about the sdSite or the sdYear because these "cancel"
# because every site is measured every year.
# The residual standard deviation is about 5
#
# We examine various scenarios for the number of subsamples at each of the 4 site-year combinations


# Get the BACI power program


# illustration of how the power program work for a specific scenario
sdResid <- summary(result.lm2)$sigma
baci.power(n_IA=5, n_IB=5, n_CA=5, n_CB=5, 
           ns_I=1, ns_C=1, 
           ny_B=1, ny_A=1, 
           mu_IA=.2, mu_IB=0, mu_CA=0, mu_CB=0, 
           sdYear=0, sdSite=0, sdSiteYear=0, sdResid=sdResid)
##   alpha sdSite sdYear sdSiteYear   sdResid n_IA n_IB n_CA n_CB ns_I ns_C ny_B
## 1  0.05      0      0          0 0.1319614    5    5    5    5    1    1    1
##   ny_A mu_IA mu_IB mu_CA mu_CB baci   baci.se dfdenom      ncp    Fcrit
## 1    1   0.2     0     0     0  0.2 0.1180298      16 2.871286 4.493998
##       power    Tcrit os.power1    os.power2
## 1 0.3570512 1.745884 0.4910198 0.0005353837
##***part500b;
scenarios <- expand.grid(n_quad=seq(5,20,2),
                         baci_effect=0.2,
                         sdResid=sdResid)
scenarios
##   n_quad baci_effect   sdResid
## 1      5         0.2 0.1319614
## 2      7         0.2 0.1319614
## 3      9         0.2 0.1319614
## 4     11         0.2 0.1319614
## 5     13         0.2 0.1319614
## 6     15         0.2 0.1319614
## 7     17         0.2 0.1319614
## 8     19         0.2 0.1319614
power <- plyr::adply(scenarios,1,function(x){
   #browser()
   power <- baci.power(
            n_IA=x$n_quad, n_IB=x$n_quad, n_CA=x$n_quad, n_CB=x$n_quad, 
            ns_I=1, ns_C=1, 
            ny_B=1, ny_A=1, 
            mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0, 
            sdYear=0, sdSite=0, sdSiteYear=0, sdResid=x$sdResid)
   power
})

power
##   n_quad baci_effect alpha sdSite sdYear sdSiteYear   sdResid n_IA n_IB n_CA
## 1      5         0.2  0.05      0      0          0 0.1319614    5    5    5
## 2      7         0.2  0.05      0      0          0 0.1319614    7    7    7
## 3      9         0.2  0.05      0      0          0 0.1319614    9    9    9
## 4     11         0.2  0.05      0      0          0 0.1319614   11   11   11
## 5     13         0.2  0.05      0      0          0 0.1319614   13   13   13
## 6     15         0.2  0.05      0      0          0 0.1319614   15   15   15
## 7     17         0.2  0.05      0      0          0 0.1319614   17   17   17
## 8     19         0.2  0.05      0      0          0 0.1319614   19   19   19
##   n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci    baci.se dfdenom
## 1    5    1    1    1    1   0.2     0     0     0  0.2 0.11802982      16
## 2    7    1    1    1    1   0.2     0     0     0  0.2 0.09975341      24
## 3    9    1    1    1    1   0.2     0     0     0  0.2 0.08797424      32
## 4   11    1    1    1    1   0.2     0     0     0  0.2 0.07957569      40
## 5   13    1    1    1    1   0.2     0     0     0  0.2 0.07319899      48
## 6   15    1    1    1    1   0.2     0     0     0  0.2 0.06814455      56
## 7   17    1    1    1    1   0.2     0     0     0  0.2 0.06401066      64
## 8   19    1    1    1    1   0.2     0     0     0  0.2 0.06054802      72
##         ncp    Fcrit     power    Tcrit os.power1    os.power2
## 1  2.871286 4.493998 0.3570512 1.745884 0.4910198 5.353837e-04
## 2  4.019801 4.259677 0.4859071 1.710882 0.6190894 1.619837e-04
## 3  5.168315 4.149097 0.5967341 1.693889 0.7190601 5.406011e-05
## 4  6.316830 4.084746 0.6888782 1.683851 0.7954833 1.921239e-05
## 5  7.465344 4.042652 0.7634033 1.677224 0.8528155 7.140875e-06
## 6  8.613858 4.012973 0.8223342 1.672522 0.8951393 2.746100e-06
## 7  9.762373 3.990924 0.8680675 1.669013 0.9259577 1.084973e-06
## 8 10.910887 3.973897 0.9029987 1.666294 0.9481344 4.382558e-07
cat("\n")
power[,c("alpha","n_IA","n_IB","n_CA","n_CB","baci","power")]
##   alpha n_IA n_IB n_CA n_CB baci     power
## 1  0.05    5    5    5    5  0.2 0.3570512
## 2  0.05    7    7    7    7  0.2 0.4859071
## 3  0.05    9    9    9    9  0.2 0.5967341
## 4  0.05   11   11   11   11  0.2 0.6888782
## 5  0.05   13   13   13   13  0.2 0.7634033
## 6  0.05   15   15   15   15  0.2 0.8223342
## 7  0.05   17   17   17   17  0.2 0.8680675
## 8  0.05   19   19   19   19  0.2 0.9029987
##***part500e;



##***part510b;
power.plot <- ggplot(data=power, aes(x=n_quad, y=power))+
   ggtitle("Estimated power",
            subtitle=paste("alpha: ", power$alpha[1]))+
   geom_point()+
   geom_line()+
   ylim(0,1)+
   geom_hline(yintercept=0.8, color="blue")+
   xlab("Number of quadrats at each site:year")
power.plot

##***part510e;