# BACI-crabs
# A simple BACI design was used to assess the impact of cooling water
# discharge on the Density of shore crabs.
# The beach near the outlet of the cooling water was sampled
# using several quadrats before and after the plant started operation,
# as was a control beach on the other side of the body of water.
# As explained in the, notes, analyze on the log() scale.
library(ggfortify)
## Loading required package: ggplot2
library(ggplot2)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(plyr)
source("../../schwarz.functions.r")
source("../baci-power.r")
## Loading required package: Matrix
# Read in the actual data and define the factors of interest
# sink("baci-crabs-R-001.txt")
##***part001b;
crabs <- read.csv("baci-crabs.csv", header=TRUE, as.is=TRUE, strip.white=TRUE)
crabs$trt <- interaction(crabs$SiteClass, crabs$Period)
crabs$SiteClass <- factor(crabs$SiteClass)
crabs$Period <- factor(crabs$Period, levels=c("Before","After"), ordered=TRUE) # sort correctly
crabs$trt <- factor(crabs$trt)
crabs$logDensity <- log(crabs$Density) # we will wan to analyze on the log scale
cat("Listing of part of the raw data \n")
## Listing of part of the raw data
crabs[1:10,]
## SiteClass Site Period Quadrat Density trt logDensity
## 1 Impact I1 Before Q1 23 Impact.Before 3.135494
## 2 Impact I1 Before Q2 30 Impact.Before 3.401197
## 3 Impact I1 Before Q3 27 Impact.Before 3.295837
## 4 Impact I1 Before Q4 25 Impact.Before 3.218876
## 5 Impact I1 After Q5 17 Impact.After 2.833213
## 6 Impact I1 After Q6 19 Impact.After 2.944439
## 7 Impact I1 After Q7 23 Impact.After 3.135494
## 8 Impact I1 After Q8 25 Impact.After 3.218876
## 9 Impact I1 After Q9 17 Impact.After 2.833213
## 10 Control C1 Before Q10 32 Control.Before 3.465736
##***part001e;
# sink()
str(crabs)
## 'data.frame': 19 obs. of 7 variables:
## $ SiteClass : Factor w/ 2 levels "Control","Impact": 2 2 2 2 2 2 2 2 2 1 ...
## $ Site : chr "I1" "I1" "I1" "I1" ...
## $ Period : Ord.factor w/ 2 levels "Before"<"After": 1 1 1 1 2 2 2 2 2 1 ...
## $ Quadrat : chr "Q1" "Q2" "Q3" "Q4" ...
## $ Density : int 23 30 27 25 17 19 23 25 17 32 ...
## $ trt : Factor w/ 4 levels "Control.After",..: 4 4 4 4 2 2 2 2 2 3 ...
## $ logDensity: num 3.14 3.4 3.3 3.22 2.83 ...
# Preliminary plot
##***part010b;
prelimplot <- ggplot(data=crabs, aes(x=trt, y=logDensity))+
ggtitle("Preliminary plot to look for outliers etc")+
geom_point(position=position_jitter(w=0.1))+
geom_boxplot(alpha=0.1)
prelimplot

##***part010e;
#ggsave(plot=prelimplot, file="baci-crabs-R-010.png", h=4, w=6, units="in", dpi=300)
# Get some simple summary statistics
# sink('baci-crabs-R-020.txt', split=TRUE)
##***part020b;
report <- plyr::ddply(crabs, c("Period","SiteClass"), function(x){
res <- sf.simple.summary(x, "logDensity", crd=TRUE)
return(res)
})
report
## Period SiteClass n nmiss mean sd se lcl ucl
## 1 Before Control 6 0 3.412531 0.1119179 0.04569030 3.295081 3.529982
## 2 Before Impact 4 0 3.262851 0.1131096 0.05655481 3.082868 3.442834
## 3 After Control 4 0 3.309636 0.1087065 0.05435326 3.136660 3.482613
## 4 After Impact 5 0 2.993047 0.1765971 0.07897664 2.773773 3.212321
##***part020e;
# sink()
# Draw a profile plot includeing the raw data
##***part030b;
profileplot <- ggplot(data=report, aes(x=Period, y=mean, group=SiteClass, color=SiteClass))+
ggtitle("Profile plot of crab density")+
ylab("logDensity with mean and 95% ci")+
geom_point(position=position_dodge(w=0.1))+
geom_line(position=position_dodge(w=0.1))+
geom_errorbar(aes(ymax=ucl, ymin=lcl), width=0.1, position=position_dodge(w=0.1))+
geom_point(data=crabs, aes(y=logDensity), position=position_dodge(w=0.2))
profileplot

##***part030e;
#ggsave(plot=profileplot, file="baci-crabs-R-030.png", h=4, w=6, units="in", dpi=300)
# Fit the linear model, get the anova table, and the usual stuff
# CAUTION!!! Because the design is unbalanced, the default model
# fit by aov gives the WRONG sum of squares and F-tests.
# The default tests are "sequential tests" where terms are added
# in the order specified. You want the marginal tests
# (which are reported in JMP or SAS)
#
# Read the entry at
# http://r-eco-evo.blogspot.com/2007/10/infamous-type-iii-ss-before-i-started_20.html
#
##***part100b;
cat("The sum of squares and F-tests from the anova() below are INCORRECT\n")
## The sum of squares and F-tests from the anova() below are INCORRECT
cat("in unbalanced data because they are sequential and only adjust for effects\n")
## in unbalanced data because they are sequential and only adjust for effects
cat("that enter the model prior to the term in question.")
## that enter the model prior to the term in question.
result.lm <- lm( log(Density) ~ SiteClass + Period + SiteClass:Period, data=crabs)
cat("\nAnalysis of variance -- this is NOT CORRECT because design is unbalanced \n")
##
## Analysis of variance -- this is NOT CORRECT because design is unbalanced
anova(result.lm)
## Analysis of Variance Table
##
## Response: log(Density)
## Df Sum Sq Mean Sq F value Pr(>F)
## SiteClass 1 0.31631 0.31631 18.1646 0.000682 ***
## Period 1 0.15503 0.15503 8.9027 0.009275 **
## SiteClass:Period 1 0.03214 0.03214 1.8459 0.194340
## Residuals 15 0.26121 0.01741
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n\nUse the Type III tests from the Anova() function from the car package")
##
##
## Use the Type III tests from the Anova() function from the car package
cat("\nbut you need to set the treatment contrasts to sum rather than treatment")
##
## but you need to set the treatment contrasts to sum rather than treatment
cat("\nSee http://r.789695.n4.nabble.com/Type-I-v-s-Type-III-Sum-Of-Squares-in-ANOVA-td1573657.html\n")
##
## See http://r.789695.n4.nabble.com/Type-I-v-s-Type-III-Sum-Of-Squares-in-ANOVA-td1573657.html
result.lm2 <- lm( log(Density) ~ SiteClass + Period + SiteClass:Period, data=crabs,
contrasts=list(SiteClass="contr.sum", Period='contr.sum'))
##***part100e;
##***part100effectb;
car::Anova(result.lm2,type=3)
## Anova Table (Type III tests)
##
## Response: log(Density)
## Sum Sq Df F value Pr(>F)
## (Intercept) 194.343 1 11160.2604 < 2.2e-16 ***
## SiteClass 0.251 1 14.4055 0.001759 **
## Period 0.160 1 9.2039 0.008376 **
## SiteClass:Period 0.032 1 1.8459 0.194340
## Residuals 0.261 15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##***part100effecte;
# Check the residuals etc
##***part100diagnosticb;
diagplot <- autoplot(result.lm2)
## Warning: `fortify(<lm>)` was deprecated in ggplot2 4.0.0.
## ℹ Please use `broom::augment(<lm>)` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggfortify package.
## Please report the issue at <https://github.com/sinhrks/ggfortify/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
diagplot

##***part100diagnostice;
#ggsave.ggmultiplot(plotdiag, # see https://github.com/sinhrks/ggfortify/issues/98 for bug in autoplot
# file='baci-crabs-R-100-diagnostic.png', h=4, w=6, units="in", dpi=300)
# Estimate the marginal means and the various effects
# sink('baci-crabs-R-100LSM-SiteClass.txt', split=TRUE)
##***part100LSM-SiteClassb;
result.emmo.S <- emmeans::emmeans(result.lm2, ~SiteClass)
## NOTE: Results may be misleading due to involvement in interactions
cat("\n\n Estimated marginal means for SiteClass \n\n")
##
##
## Estimated marginal means for SiteClass
summary(result.emmo.S, infer=TRUE)
## SiteClass emmean SE df lower.CL upper.CL t.ratio p.value
## Control 3.36 0.0426 15 3.27 3.45 78.917 <.0001
## Impact 3.13 0.0443 15 3.03 3.22 70.670 <.0001
##
## Results are averaged over the levels of: Period
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##***part100LSM-SiteClasse;
# sink()
# sink('baci-crabs-R-100LSM-Period.txt', split=TRUE)
##***part100LSM-Periodb;
result.emmo.P <- emmeans::emmeans(result.lm2, ~Period)
## NOTE: Results may be misleading due to involvement in interactions
cat("\n\n Estimated marginal means for Period \n\n")
##
##
## Estimated marginal means for Period
summary(result.emmo.P)
## Period emmean SE df lower.CL upper.CL
## Before 3.34 0.0426 15 3.25 3.43
## After 3.15 0.0443 15 3.06 3.25
##
## Results are averaged over the levels of: SiteClass
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##***part100LSM-Periode;
# sink()
# sink('baci-crabs-R-100LSM-int.txt', split=TRUE)
##***part100LSM-intb;
result.emmo.SP <- emmeans::emmeans(result.lm2, ~SiteClass:Period)
cat("Estimated marginal means \n\n")
## Estimated marginal means
summary(result.emmo.SP, infer=TRUE)
## SiteClass Period emmean SE df lower.CL upper.CL t.ratio p.value
## Control Before 3.41 0.0539 15 3.30 3.53 63.344 <.0001
## Impact Before 3.26 0.0660 15 3.12 3.40 49.452 <.0001
## Control After 3.31 0.0660 15 3.17 3.45 50.161 <.0001
## Impact After 2.99 0.0590 15 2.87 3.12 50.717 <.0001
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##***part100LSM-inte;
# sink()
# Estimate the BACI contrast along with a se
# You could look at the entry in the summary table from the model fit, but
# this is dangerous as these entries depend on the contrast matrix.
# It is far safer to the contrast function applied to an emmeans object
temp <- summary(result.lm)$coefficients # get all the coefficients
temp[grepl("SiteClass",rownames(temp)) & grepl("Period", rownames(temp)),]
## Estimate Std. Error t value Pr(>|t|)
## -0.1180224 0.0868676 -1.3586464 0.1943404
# sink("baci-crabs-R-100baci.txt", split=TRUE)
##***part100bacib;
summary(contrast(result.emmo.SP, list(baci=c(1,-1,-1,1))), infer=TRUE)
## contrast estimate SE df lower.CL upper.CL t.ratio p.value
## baci -0.167 0.123 15 -0.429 0.0949 -1.359 0.1943
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
##***part100bacie;
# sink()
###------------------------------------------------------------------------------------
###------------------------------------------------------------------------------------
###------------------------------------------------------------------------------------
# Power analysis for simple BACI analysis
# For a simple BACI there is
# 1 site in each of treatment and control -> ns_I=1, ns_C=1
# 1 year before/after -> ny_B=1, ny_A=1
# We cannot separate the site-year interaction from the means so we hope
# for the best and suppose that sdSiteYear=0
# Example of crabs.
# We think that a 20 percentage difference in the differential change is important.
# We arbitarily choose 4 mu values that give us this change
# E.g. mu_IB =0, mu_IS=.20, mu_CB=0, mu_CA=0
#
# We don't have to worry about the sdSite or the sdYear because these "cancel"
# because every site is measured every year.
# The residual standard deviation is about 5
#
# We examine various scenarios for the number of subsamples at each of the 4 site-year combinations
# Get the BACI power program
# illustration of how the power program work for a specific scenario
sdResid <- summary(result.lm2)$sigma
baci.power(n_IA=5, n_IB=5, n_CA=5, n_CB=5,
ns_I=1, ns_C=1,
ny_B=1, ny_A=1,
mu_IA=.2, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=0, sdSite=0, sdSiteYear=0, sdResid=sdResid)
## alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA n_CB ns_I ns_C ny_B
## 1 0.05 0 0 0 0.1319614 5 5 5 5 1 1 1
## ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom ncp Fcrit
## 1 1 0.2 0 0 0 0.2 0.1180298 16 2.871286 4.493998
## power Tcrit os.power1 os.power2
## 1 0.3570512 1.745884 0.4910198 0.0005353837
##***part500b;
scenarios <- expand.grid(n_quad=seq(5,20,2),
baci_effect=0.2,
sdResid=sdResid)
scenarios
## n_quad baci_effect sdResid
## 1 5 0.2 0.1319614
## 2 7 0.2 0.1319614
## 3 9 0.2 0.1319614
## 4 11 0.2 0.1319614
## 5 13 0.2 0.1319614
## 6 15 0.2 0.1319614
## 7 17 0.2 0.1319614
## 8 19 0.2 0.1319614
power <- plyr::adply(scenarios,1,function(x){
#browser()
power <- baci.power(
n_IA=x$n_quad, n_IB=x$n_quad, n_CA=x$n_quad, n_CB=x$n_quad,
ns_I=1, ns_C=1,
ny_B=1, ny_A=1,
mu_IA=x$baci_effect, mu_IB=0, mu_CA=0, mu_CB=0,
sdYear=0, sdSite=0, sdSiteYear=0, sdResid=x$sdResid)
power
})
power
## n_quad baci_effect alpha sdSite sdYear sdSiteYear sdResid n_IA n_IB n_CA
## 1 5 0.2 0.05 0 0 0 0.1319614 5 5 5
## 2 7 0.2 0.05 0 0 0 0.1319614 7 7 7
## 3 9 0.2 0.05 0 0 0 0.1319614 9 9 9
## 4 11 0.2 0.05 0 0 0 0.1319614 11 11 11
## 5 13 0.2 0.05 0 0 0 0.1319614 13 13 13
## 6 15 0.2 0.05 0 0 0 0.1319614 15 15 15
## 7 17 0.2 0.05 0 0 0 0.1319614 17 17 17
## 8 19 0.2 0.05 0 0 0 0.1319614 19 19 19
## n_CB ns_I ns_C ny_B ny_A mu_IA mu_IB mu_CA mu_CB baci baci.se dfdenom
## 1 5 1 1 1 1 0.2 0 0 0 0.2 0.11802982 16
## 2 7 1 1 1 1 0.2 0 0 0 0.2 0.09975341 24
## 3 9 1 1 1 1 0.2 0 0 0 0.2 0.08797424 32
## 4 11 1 1 1 1 0.2 0 0 0 0.2 0.07957569 40
## 5 13 1 1 1 1 0.2 0 0 0 0.2 0.07319899 48
## 6 15 1 1 1 1 0.2 0 0 0 0.2 0.06814455 56
## 7 17 1 1 1 1 0.2 0 0 0 0.2 0.06401066 64
## 8 19 1 1 1 1 0.2 0 0 0 0.2 0.06054802 72
## ncp Fcrit power Tcrit os.power1 os.power2
## 1 2.871286 4.493998 0.3570512 1.745884 0.4910198 5.353837e-04
## 2 4.019801 4.259677 0.4859071 1.710882 0.6190894 1.619837e-04
## 3 5.168315 4.149097 0.5967341 1.693889 0.7190601 5.406011e-05
## 4 6.316830 4.084746 0.6888782 1.683851 0.7954833 1.921239e-05
## 5 7.465344 4.042652 0.7634033 1.677224 0.8528155 7.140875e-06
## 6 8.613858 4.012973 0.8223342 1.672522 0.8951393 2.746100e-06
## 7 9.762373 3.990924 0.8680675 1.669013 0.9259577 1.084973e-06
## 8 10.910887 3.973897 0.9029987 1.666294 0.9481344 4.382558e-07
cat("\n")
power[,c("alpha","n_IA","n_IB","n_CA","n_CB","baci","power")]
## alpha n_IA n_IB n_CA n_CB baci power
## 1 0.05 5 5 5 5 0.2 0.3570512
## 2 0.05 7 7 7 7 0.2 0.4859071
## 3 0.05 9 9 9 9 0.2 0.5967341
## 4 0.05 11 11 11 11 0.2 0.6888782
## 5 0.05 13 13 13 13 0.2 0.7634033
## 6 0.05 15 15 15 15 0.2 0.8223342
## 7 0.05 17 17 17 17 0.2 0.8680675
## 8 0.05 19 19 19 19 0.2 0.9029987
##***part500e;
##***part510b;
power.plot <- ggplot(data=power, aes(x=n_quad, y=power))+
ggtitle("Estimated power",
subtitle=paste("alpha: ", power$alpha[1]))+
geom_point()+
geom_line()+
ylim(0,1)+
geom_hline(yintercept=0.8, color="blue")+
xlab("Number of quadrats at each site:year")
power.plot

##***part510e;