# Brook Trout
# This is from MARK demo files on occupancy.
# Collected via electrofishing three 50 m sections of streams at 77 sites
# in the Upper Chattachochee River basin.
# 77 streams 3 occasions, 4 covariates: elevation, cross sectional area each occasion.
# Single Species Single Season Occupancy
# Fitting models using RMark
# 2018-08-15 Code contributed by Carl James Schwarz (cschwarz.stat.sfu.cs@gmail.com)
library(readxl)
library(RMark)
## This is RMark 2.2.5
## Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)
# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.
input.data <- readxl::read_excel("../BrookTrout.xls",
sheet="DetectionHistory",
na="-",
col_names=FALSE) # notice no column names in row 1 of data file.
head(input.data)
## # A tibble: 6 x 3
## X__1 X__2 X__3
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 1.00 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 1.00 0
# Extract the history records
# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
ch=apply(input.data,1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
## freq ch
## 1 1 000
## 2 1 100
## 3 1 000
## 4 1 000
## 5 1 000
## 6 1 010
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
## freq ch
## 1 1 000
## 2 1 100
## 3 1 000
## 4 1 000
## 5 1 000
## 6 1 010
# do some basic checks on your data
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 77
# Get the pond information
# Get the elevation information
elevation.data <- readxl::read_excel("../BrookTrout.xls",
sheet="Elevation",
na="-",
col_names=TRUE)
input.history$Elevation <- elevation.data$Elevation/1000 # standardized it a bit
head(input.history)
## freq ch Elevation
## 1 1 000 4.26614
## 2 1 100 4.03882
## 3 1 000 2.03158
## 4 1 000 3.43917
## 5 1 000 3.03650
## 6 1 010 2.05014
# Get the cross sectional width
cross.data <- readxl::read_excel("../BrookTrout.xls",
sheet="CrossSectionWidth",
na="-",
col_names=TRUE)
head(cross.data)
## # A tibble: 6 x 3
## Cross1 Cross2 Cross3
## <dbl> <dbl> <dbl>
## 1 2.87 2.61 2.73
## 2 0.880 2.54 1.16
## 3 1.50 0.960 1.52
## 4 0.260 1.50 1.13
## 5 0.890 2.59 2.69
## 6 0.670 2.32 1.58
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
# so do not set these to missing as in RPresence
input.history <- cbind(input.history, cross.data)
head(input.history)
## freq ch Elevation Cross1 Cross2 Cross3
## 1 1 000 4.26614 2.87 2.61 2.73
## 2 1 100 4.03882 0.88 2.54 1.16
## 3 1 000 2.03158 1.50 0.96 1.52
## 4 1 000 3.43917 0.26 1.50 1.13
## 5 1 000 3.03650 0.89 2.59 2.69
## 6 1 010 2.05014 0.67 2.32 1.58
# Create the data structure
trout.data <- process.data(data=input.history,
model="Occupancy")
summary(trout.data)
## Length Class Mode
## data 6 data.frame list
## model 1 -none- character
## mixtures 1 -none- numeric
## freq 77 -none- numeric
## nocc 1 -none- numeric
## nocc.secondary 0 -none- NULL
## time.intervals 3 -none- numeric
## begin.time 1 -none- numeric
## age.unit 1 -none- numeric
## initial.ages 1 -none- numeric
## group.covariates 0 -none- NULL
## nstrata 1 -none- numeric
## strata.labels 0 -none- NULL
## counts 0 -none- NULL
## reverse 1 -none- logical
## areas 0 -none- NULL
## events 0 -none- NULL
# If survey covariates are present, modify the ddl.
trout.ddl <- make.design.data(trout.data)
trout.ddl
## $p
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
## 3 3 3 1 2 3 2 2
##
## $Psi
## par.index model.index group age time Age Time
## 1 1 4 1 0 1 0 0
##
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
##
##
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p" "Psi"
# define the list of models to fit
# Notice the commas between the column and the placement of the quotes
model.list.csv <- textConnection("
p, Psi
~1, ~1
~1, ~Elevation
~Cross, ~1
~Cross, ~Elevation")
model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
## p Psi model.number
## 1 ~1 ~1 1
## 2 ~1 ~Elevation 2
## 3 ~Cross ~1 3
## 4 ~Cross ~Elevation 4
# fit the models
myobs <- ls()
myobs <- myobs[ grepl("m...",myobs,fixed=TRUE)]
cat("Removing ", myobs, "\n")
## Removing
rm(list=myobs)
model.fits <- plyr::dlply(model.list, "model.number", function(x,input.data, input.ddl){
cat("\n\n***** Starting ", unlist(x), "\n")
#browser()
#fit <- myoccMod(model=list(as.formula(paste("psi",x$psi)),
fit <- RMark::mark(input.data, ddl=input.ddl,
model="Occupancy",
model.parameters=list(
Psi =list(formula=as.formula(eval(x$Psi))),
p =list(formula=as.formula(eval(x$p)))
)
#,brief=TRUE,output=FALSE, delete=TRUE
#,invisible=TRUE,output=TRUE # set for debugging
)
mnumber <- paste("m...",formatC(x$model.number, width = 3, format = "d", flag = "0"),sep="")
assign( mnumber, fit, envir=.GlobalEnv)
#browser()
fit
},input.data=trout.data, input.ddl=trout.ddl)
##
##
## ***** Starting ~1 ~1 1
##
## Output summary for Occupancy model
## Name : p(~1)Psi(~1)
##
## Npar : 2
## -2lnL: 228.7886
## AICc : 232.9507
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.1340176 0.2478215 -0.3517125 0.6197476
## Psi:(Intercept) -0.1500513 0.2649242 -0.6693028 0.3692002
##
##
## Real Parameter p
## 1 2 3
## 0.5334543 0.5334543 0.5334543
##
##
## Real Parameter Psi
## 1
## 0.4625574
##
##
## ***** Starting ~1 ~Elevation 2
##
## Output summary for Occupancy model
## Name : p(~1)Psi(~Elevation)
##
## Npar : 3
## -2lnL: 204.1568
## AICc : 210.4856
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.1458534 0.2427291 -0.3298956 0.6216023
## Psi:(Intercept) -4.2010319 1.0930922 -6.3434927 -2.0585712
## Psi:Elevation 1.3709710 0.3596842 0.6659901 2.0759520
##
##
## Real Parameter p
## 1 2 3
## 0.5363988 0.5363988 0.5363988
##
##
## Real Parameter Psi
## 1
## 0.4307754
##
##
## ***** Starting ~Cross ~1 3
##
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~1)
##
## Npar : 3
## -2lnL: 220.1193
## AICc : 226.448
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 1.2958769 0.4968140 0.3221215 2.2696324
## p:Cross -0.7776221 0.2737404 -1.3141533 -0.2410909
## Psi:(Intercept) -0.1166741 0.2698332 -0.6455471 0.4121990
##
##
## Real Parameter p
## 1 2 3
## 0.5303794 0.5034525 0.5182143
##
##
## Real Parameter Psi
## 1
## 0.4708645
##
##
## ***** Starting ~Cross ~Elevation 4
##
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~Elevation)
##
## Npar : 4
## -2lnL: 193.7903
## AICc : 202.3458
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 1.3988191 0.4897724 0.4388651 2.3587731
## p:Cross -0.8516879 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804496 1.1653269 -6.7644903 -2.1964089
## Psi:Elevation 1.4996932 0.3918882 0.7315924 2.2677940
##
##
## Real Parameter p
## 1 2 3
## 0.5281628 0.4986601 0.5148322
##
##
## Real Parameter Psi
## 1
## 0.452684
# examine individula model results
model.number <-2
summary(model.fits[[model.number]])
## Output summary for Occupancy model
## Name : p(~1)Psi(~Elevation)
##
## Npar : 3
## -2lnL: 204.1568
## AICc : 210.4856
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.1458534 0.2427291 -0.3298956 0.6216023
## Psi:(Intercept) -4.2010319 1.0930922 -6.3434927 -2.0585712
## Psi:Elevation 1.3709710 0.3596842 0.6659901 2.0759520
##
##
## Real Parameter p
## 1 2 3
## 0.5363988 0.5363988 0.5363988
##
##
## Real Parameter Psi
## 1
## 0.4307754
model.fits[[model.number]]$results$real
## estimate se lcl ucl fixed note
## p g1 a0 t1 0.5363988 0.0603607 0.4182660 0.6505829
## Psi g1 a0 t1 0.4307754 0.0823867 0.2814617 0.5938361
model.fits[[model.number]]$results$beta
## estimate se lcl ucl
## p:(Intercept) 0.1458534 0.2427291 -0.3298956 0.6216023
## Psi:(Intercept) -4.2010319 1.0930922 -6.3434927 -2.0585712
## Psi:Elevation 1.3709710 0.3596842 0.6659901 2.0759520
model.fits[[model.number]]$results$derived
## $Occupancy
## estimate se lcl ucl
## 1 0.4307754 0.08238668 0.2814617 0.5938361
get.real(model.fits[[model.number]], "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 4 2 0.4307754 0.0823867 0.2814617
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.5938361 1 0 1 0 0
get.real(model.fits[[model.number]], "p", se=TRUE)
## all.diff.index par.index estimate se lcl ucl
## p g1 a0 t1 1 1 0.5363988 0.0603607 0.418266 0.6505829
## p g1 a1 t2 2 1 0.5363988 0.0603607 0.418266 0.6505829
## p g1 a2 t3 3 1 0.5363988 0.0603607 0.418266 0.6505829
## fixed note group age time Age Time
## p g1 a0 t1 1 0 1 0 0
## p g1 a1 t2 1 1 2 1 1
## p g1 a2 t3 1 2 3 2 2
# collect models and make AICc table
model.set <- RMark::collect.models( type="Occupancy")
model.set
## model npar AICc DeltaAICc weight Deviance
## 4 p(~Cross)Psi(~Elevation) 4 202.3458 0.000000 9.832014e-01 193.790260
## 2 p(~1)Psi(~Elevation) 3 210.4856 8.139742 1.679268e-02 204.156790
## 3 p(~Cross)Psi(~1) 3 226.4480 24.102222 5.739996e-06 220.119270
## 1 p(~1)Psi(~1) 2 232.9507 30.604907 2.222652e-07 3.324035
names(model.set)
## [1] "m...001" "m...002" "m...003" "m...004" "model.table"
model.set$model.table
## p Psi model npar AICc DeltaAICc
## 4 ~Cross ~Elevation p(~Cross)Psi(~Elevation) 4 202.3458 0.000000
## 2 ~1 ~Elevation p(~1)Psi(~Elevation) 3 210.4856 8.139742
## 3 ~Cross ~1 p(~Cross)Psi(~1) 3 226.4480 24.102222
## 1 ~1 ~1 p(~1)Psi(~1) 2 232.9507 30.604907
## weight Deviance
## 4 9.832014e-01 193.790260
## 2 1.679268e-02 204.156790
## 3 5.739996e-06 220.119270
## 1 2.222652e-07 3.324035
# model averaged values
get.real(model.set[[1]], "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 4 2 0.4625574 0.0658596 0.338653
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.5912657 1 0 1 0 0
get.real(model.set[[2]], "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 4 2 0.4307754 0.0823867 0.2814617
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.5938361 1 0 1 0 0
get.real(model.set[[3]], "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 4 4 0.4708645 0.0672292 0.3439937
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.601615 1 0 1 0 0
Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma
## par.index estimate se fixed note group age time
## Psi g1 a0 t1 4 0.4523162 0.08764107 1 0 1
## Age Time
## Psi g1 a0 t1 0 0
# get the psi-values as function of the covariates
trout.ddl$Psi # see the index numbers
## par.index model.index group age time Age Time
## 1 1 4 1 0 1 0 0
Elev.df <-data.frame(Elevation=seq(min(input.history$Elevation,na.rm=TRUE),
max(input.history$Elevation, na.rm=TRUE),.1))
pred.psi <- covariate.predictions(model.set, indices=4, data=Elev.df)
head(pred.psi$estimates)
## vcv.index model.index par.index covdata estimate se
## 1 1 2 4 0.91107 0.04265042 0.03397415
## 2 2 2 4 1.01107 0.04919944 0.03726829
## 3 3 2 4 1.11107 0.05669477 0.04073741
## 4 4 2 4 1.21107 0.06525388 0.04435689
## 5 5 2 4 1.31107 0.07500271 0.04809324
## 6 6 2 4 1.41107 0.08607420 0.05190357
## lcl ucl fixed
## 1 0.008646044 0.1853831
## 2 0.010740485 0.1978307
## 3 0.013325126 0.2110302
## 4 0.016507014 0.2250190
## 5 0.020412907 0.2398377
## 6 0.025191105 0.2555307
ggplot(data=pred.psi$estimates, aes(x=covdata, y=estimate))+
ggtitle("Occupancy probability as a function of elevation")+
geom_point()+
geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
ylim(0,1)+
ylab("Estimated probability of occupancy")+
xlab("Elevation (km)")

# make a plot of the probability of detection as a function of cross section
Cross.df <- data.frame(Cross1=seq(min(cross.data,na.rm=TRUE),max(cross.data, na.rm=TRUE),.1))
trout.ddl$p # see the index numbers
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
## 3 3 3 1 2 3 2 2
pred.p <- covariate.predictions(model.set, indices=1, data=Cross.df)
head(pred.p$estimates)
## vcv.index model.index par.index covdata estimate se lcl
## 1 1 1 1 0.25 0.7621506 0.08262720 0.5673694
## 2 2 1 1 0.35 0.7468028 0.08130679 0.5594291
## 3 3 1 1 0.45 0.7307866 0.07971666 0.5509325
## 4 4 1 1 0.55 0.7141191 0.07788009 0.5418457
## 5 5 1 1 0.65 0.6968238 0.07583376 0.5321205
## 6 6 1 1 0.75 0.6789308 0.07362947 0.5216923
## ucl fixed
## 1 0.8867421
## 2 0.8726305
## 3 0.8572696
## 4 0.8406635
## 5 0.8228501
## 6 0.8039081
ggplot(data=pred.p$estimates, aes(x=covdata, y=estimate))+
ggtitle("Detection probability as a function of cross sectional width")+
geom_point()+
geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
ylim(0,1)

# It is often convenient to estimate occupancy for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.
# We create a data frame with the covariates etc as measure on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition
site.covariates <- input.history
site.covariates$freq <- NULL
site.covariates$ch <- NULL
trout.ddl$Psi
## par.index model.index group age time Age Time
## 1 1 4 1 0 1 0 0
site.covariates <- merge(trout.ddl$Psi, site.covariates)
head(site.covariates)
## par.index model.index group age time Age Time Elevation Cross1 Cross2
## 1 1 4 1 0 1 0 0 4.26614 2.87 2.61
## 2 1 4 1 0 1 0 0 4.03882 0.88 2.54
## 3 1 4 1 0 1 0 0 2.03158 1.50 0.96
## 4 1 4 1 0 1 0 0 3.43917 0.26 1.50
## 5 1 4 1 0 1 0 0 3.03650 0.89 2.59
## 6 1 4 1 0 1 0 0 2.05014 0.67 2.32
## Cross3
## 1 2.73
## 2 1.16
## 3 1.52
## 4 1.13
## 5 2.69
## 6 1.58
# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
site.covariates$index <- site.covariates$model.index
site.pred <- covariate.predictions(model.set,
data=site.covariates)
ggplot(site.pred$estimates, aes(x=Elevation, y=estimate))+
ggtitle("Model averaged predictions of occupancy for each site")+
geom_point()+
geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)