# Brook Trout
# This is from MARK demo files on occupancy.
# Collected via electrofishing three 50 m sections of streams at 77 sites
# in the Upper Chattachochee River basin.
# 77 streams 3 occasions, 4 covariates: elevation, cross sectional area each occasion.
# Single Species Single Season Occupancy
# Fitting models using RMark
# 2018-11-09 Code contributed by Neil Faught
library(readxl)
library(RMark)
## This is RMark 2.2.5
## Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)
# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.
input.data <- readxl::read_excel("../BrookTrout.xls",
sheet="DetectionHistory",
na="-",
col_names=FALSE) # notice no column names in row 1 of data file.
head(input.data)
## # A tibble: 6 x 3
## X__1 X__2 X__3
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 1.00 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 1.00 0
# Extract the history records
# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
ch=apply(input.data,1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
## freq ch
## 1 1 000
## 2 1 100
## 3 1 000
## 4 1 000
## 5 1 000
## 6 1 010
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
## freq ch
## 1 1 000
## 2 1 100
## 3 1 000
## 4 1 000
## 5 1 000
## 6 1 010
# do some basic checks on your data
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 77
# Get the pond information
# Get the elevation information
elevation.data <- readxl::read_excel("../BrookTrout.xls",
sheet="Elevation",
na="-",
col_names=TRUE)
input.history$Elevation <- elevation.data$Elevation/1000 # standardized it a bit
head(input.history)
## freq ch Elevation
## 1 1 000 4.26614
## 2 1 100 4.03882
## 3 1 000 2.03158
## 4 1 000 3.43917
## 5 1 000 3.03650
## 6 1 010 2.05014
# Get the cross sectional width
cross.data <- readxl::read_excel("../BrookTrout.xls",
sheet="CrossSectionWidth",
na="-",
col_names=TRUE)
head(cross.data)
## # A tibble: 6 x 3
## Cross1 Cross2 Cross3
## <dbl> <dbl> <dbl>
## 1 2.87 2.61 2.73
## 2 0.880 2.54 1.16
## 3 1.50 0.960 1.52
## 4 0.260 1.50 1.13
## 5 0.890 2.59 2.69
## 6 0.670 2.32 1.58
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
# so do not set these to missing as in RPresence
input.history <- cbind(input.history, cross.data)
head(input.history)
## freq ch Elevation Cross1 Cross2 Cross3
## 1 1 000 4.26614 2.87 2.61 2.73
## 2 1 100 4.03882 0.88 2.54 1.16
## 3 1 000 2.03158 1.50 0.96 1.52
## 4 1 000 3.43917 0.26 1.50 1.13
## 5 1 000 3.03650 0.89 2.59 2.69
## 6 1 010 2.05014 0.67 2.32 1.58
# Create the data structure
trout.data <- process.data(data=input.history,
model="Occupancy")
summary(trout.data)
## Length Class Mode
## data 6 data.frame list
## model 1 -none- character
## mixtures 1 -none- numeric
## freq 77 -none- numeric
## nocc 1 -none- numeric
## nocc.secondary 0 -none- NULL
## time.intervals 3 -none- numeric
## begin.time 1 -none- numeric
## age.unit 1 -none- numeric
## initial.ages 1 -none- numeric
## group.covariates 0 -none- NULL
## nstrata 1 -none- numeric
## strata.labels 0 -none- NULL
## counts 0 -none- NULL
## reverse 1 -none- logical
## areas 0 -none- NULL
## events 0 -none- NULL
# If survey covariates are present, modify the ddl.
trout.ddl <- make.design.data(trout.data)
trout.ddl
## $p
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
## 3 3 3 1 2 3 2 2
##
## $Psi
## par.index model.index group age time Age Time
## 1 1 4 1 0 1 0 0
##
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
##
##
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p" "Psi"
# Fit a model
# Models where detection probability (p) varies with temperature (Cross), and
# Occupancy (Psi) varies with Elevation
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
mod.fit <- RMark::mark(trout.data, ddl = trout.ddl,
model="Occupancy",
model.parameters=list(
Psi =list(formula=~Elevation),
p =list(formula=~Cross)
)
)
##
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~Elevation)
##
## Npar : 4
## -2lnL: 193.7903
## AICc : 202.3458
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 1.3988191 0.4897724 0.4388651 2.3587731
## p:Cross -0.8516878 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804500 1.1653271 -6.7644911 -2.1964090
## Psi:Elevation 1.4996933 0.3918882 0.7315924 2.2677943
##
##
## Real Parameter p
## 1 2 3
## 0.5281628 0.4986601 0.5148322
##
##
## Real Parameter Psi
## 1
## 0.452684
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~Elevation)
##
## Npar : 4
## -2lnL: 193.7903
## AICc : 202.3458
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 1.3988191 0.4897724 0.4388651 2.3587731
## p:Cross -0.8516878 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804500 1.1653271 -6.7644911 -2.1964090
## Psi:Elevation 1.4996933 0.3918882 0.7315924 2.2677943
##
##
## Real Parameter p
## 1 2 3
## 0.5281628 0.4986601 0.5148322
##
##
## Real Parameter Psi
## 1
## 0.452684
# Look the objects returned in more details
names(mod.fit)
## [1] "data" "model" "title"
## [4] "model.name" "links" "mixtures"
## [7] "call" "parameters" "time.intervals"
## [10] "number.of.groups" "group.labels" "nocc"
## [13] "begin.time" "covariates" "fixed"
## [16] "design.matrix" "pims" "design.data"
## [19] "strata.labels" "mlogit.list" "profile.int"
## [22] "simplify" "model.parameters" "results"
## [25] "output"
# look at estimates on beta and original scale
mod.fit$results$beta # on the logit scale
## estimate se lcl ucl
## p:(Intercept) 1.3988191 0.4897724 0.4388651 2.3587731
## p:Cross -0.8516878 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804500 1.1653271 -6.7644911 -2.1964090
## Psi:Elevation 1.4996933 0.3918882 0.7315924 2.2677943
mod.fit$results$real# on the regular 0-1 scale for each site
## estimate se lcl ucl fixed note
## p g1 a0 t1 0.5281628 0.0612429 0.4088064 0.6443838
## p g1 a1 t2 0.4986601 0.0617351 0.3800429 0.6174283
## p g1 a2 t3 0.5148322 0.0613377 0.3960466 0.6319654
## Psi g1 a0 t1 0.4526840 0.0876822 0.2924587 0.6233539
# derived variables is the occupancy probability
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
## estimate se lcl ucl
## 1 0.452684 0.08768222 0.2924587 0.6233539
# get the psi-values as function of the covariates
trout.ddl$Psi # see the index numbers
## par.index model.index group age time Age Time
## 1 1 4 1 0 1 0 0
Elev.df <-data.frame(Elevation=seq(min(input.history$Elevation,na.rm=TRUE),
max(input.history$Elevation, na.rm=TRUE),.1))
pred.psi <- covariate.predictions(mod.fit, indices=4, data=Elev.df)
head(pred.psi$estimates)
## vcv.index model.index par.index covdata estimate se
## 1 1 4 4 0.91107 0.04252838 0.03388788
## 2 2 4 4 1.01107 0.04907175 0.03718745
## 3 3 4 4 1.11107 0.05656240 0.04066300
## 4 4 4 4 1.21107 0.06511818 0.04429000
## 5 5 4 4 1.31107 0.07486542 0.04803500
## 6 6 4 4 1.41107 0.08593757 0.05185508
## lcl ucl fixed
## 1 0.008617708 0.1849796
## 2 0.010706642 0.1974698
## 3 0.013285367 0.2107095
## 4 0.016461027 0.2247370
## 5 0.020360543 0.2395937
## 6 0.025132462 0.2553243
ggplot(data=pred.psi$estimates, aes(x=covdata, y=estimate))+
ggtitle("Occupancy probability as a function of elevation")+
geom_point()+
geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
ylim(0,1)+
ylab("Estimated probability of occupancy")+
xlab("Elevation (km)")

# make a plot of the probability of detection as a function of Cross sectional width
Cross.df <- data.frame(Cross1=seq(min(cross.data,na.rm=TRUE),max(cross.data, na.rm=TRUE),0.1))
trout.ddl$p # see the index numbers
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
## 3 3 3 1 2 3 2 2
pred.p <- covariate.predictions(mod.fit, indices=1, data=Cross.df)
head(pred.p$estimates)
## vcv.index model.index par.index covdata estimate se lcl
## 1 1 1 1 0.25 0.7660065 0.07743585 0.5839802
## 2 2 1 1 0.35 0.7503966 0.07676222 0.5738208
## 3 3 1 1 0.45 0.7341068 0.07579342 0.5632828
## 4 4 1 1 0.55 0.7171546 0.07455201 0.5523139
## 5 5 1 1 0.65 0.6995639 0.07307307 0.5408518
## 6 6 1 1 0.75 0.6813653 0.07140556 0.5288222
## ucl fixed
## 1 0.8841838
## 2 0.8703435
## 3 0.8552794
## 4 0.8389934
## 5 0.8215189
## 6 0.8029271
plotdata <- cbind(Cross.df, pred.p$estimates)
head(plotdata)
## Cross1 vcv.index model.index par.index covdata estimate se
## 1 0.25 1 1 1 0.25 0.7660065 0.07743585
## 2 0.35 2 1 1 0.35 0.7503966 0.07676222
## 3 0.45 3 1 1 0.45 0.7341068 0.07579342
## 4 0.55 4 1 1 0.55 0.7171546 0.07455201
## 5 0.65 5 1 1 0.65 0.6995639 0.07307307
## 6 0.75 6 1 1 0.75 0.6813653 0.07140556
## lcl ucl fixed
## 1 0.5839802 0.8841838
## 2 0.5738208 0.8703435
## 3 0.5632828 0.8552794
## 4 0.5523139 0.8389934
## 5 0.5408518 0.8215189
## 6 0.5288222 0.8029271
ggplot(data=plotdata, aes(x=Cross1, y=estimate))+
ggtitle("Detection probability as a function of cross sectional width")+
geom_point()+
geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
ylim(0,1)

# It is often convenient to estimate occupancy for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.
# We create a data frame with the covariates etc as measure on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition
site.covariates <- input.history
site.covariates$freq <- NULL
site.covariates$ch <- NULL
trout.ddl$Psi
## par.index model.index group age time Age Time
## 1 1 4 1 0 1 0 0
site.covariates <- merge(trout.ddl$Psi, site.covariates)
head(site.covariates)
## par.index model.index group age time Age Time Elevation Cross1 Cross2
## 1 1 4 1 0 1 0 0 4.26614 2.87 2.61
## 2 1 4 1 0 1 0 0 4.03882 0.88 2.54
## 3 1 4 1 0 1 0 0 2.03158 1.50 0.96
## 4 1 4 1 0 1 0 0 3.43917 0.26 1.50
## 5 1 4 1 0 1 0 0 3.03650 0.89 2.59
## 6 1 4 1 0 1 0 0 2.05014 0.67 2.32
## Cross3
## 1 2.73
## 2 1.16
## 3 1.52
## 4 1.13
## 5 2.69
## 6 1.58
# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
site.covariates$index <- site.covariates$model.index
site.pred <- covariate.predictions(mod.fit,
data=site.covariates)
ggplot(site.pred$estimates, aes(x=Elevation, y=estimate))+
ggtitle("Model predictions of occupancy for each site")+
geom_point()+
geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)