# Single Species Single Season Occupancy
# Yellow-bellied toad
# Single Season Single Season occupancy
# RMark package
# 2018-11-15 Code contributed by Carl James Schwarz (cschwarz.stat.sfu.cs@gmail.com)
library(readxl)
library(RMark)
## This is RMark 2.2.5
## Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)
# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.
input.detect <- readxl::read_excel(file.path("..","YellowBelliedToad.xlsx"),
sheet="detections")
# do some basic checks on your data
# e.g. check number of sites; number of visits etc
nrow(input.detect)
## [1] 572
ncol(input.detect)
## [1] 2
range(input.detect, na.rm=TRUE)
## [1] 0 1
sum(is.na(input.detect))
## [1] 0
head(input.detect)
## # A tibble: 6 x 2
## V1 V2
## <dbl> <dbl>
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 1.00 0
# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
ch=apply(input.detect[,1:2],1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
## freq ch
## 1 1 00
## 2 1 00
## 3 1 00
## 4 1 00
## 5 1 00
## 6 1 10
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
## freq ch
## 1 1 00
## 2 1 00
## 3 1 00
## 4 1 00
## 5 1 00
## 6 1 10
# Get the site level covariates - none.
# Get the site x visit (sampling) covariates.
jdate <- readxl::read_excel(file.path("..","YellowBelliedToad.xlsx"),
sheet="JulianDate")
range(jdate)
## [1] 121 179
# standarize jdate
jdateS <- (jdate - 150)/10
names(jdate ) <- c('jdate1' ,'jdate2')
names(jdateS) <- c('jdateS1' ,'jdateS2')
# Sampling covariates must be added to the input.history as multiple columns
# We also need to create the quadratic terms in advance - this is a pain
jdateSQ <- jdateS
names(jdateSQ)<- c('jdateSQ1' , 'jdateSQ2')
jdateSQ <- jdateSQ^2
input.history <- cbind(input.history, jdate, jdateS, jdateSQ)
head(input.history)
## freq ch jdate1 jdate2 jdateS1 jdateS2 jdateSQ1 jdateSQ2
## 1 1 00 139 173 -1.1 2.3 1.21 5.29
## 2 1 00 128 148 -2.2 -0.2 4.84 0.04
## 3 1 00 130 147 -2.0 -0.3 4.00 0.09
## 4 1 00 130 144 -2.0 -0.6 4.00 0.36
## 5 1 00 128 148 -2.2 -0.2 4.84 0.04
## 6 1 10 145 175 -0.5 2.5 0.25 6.25
ybf.data <- process.data(data=input.history,
model="Occupancy")
summary(ybf.data)
## Length Class Mode
## data 8 data.frame list
## model 1 -none- character
## mixtures 1 -none- numeric
## freq 572 -none- numeric
## nocc 1 -none- numeric
## nocc.secondary 0 -none- NULL
## time.intervals 2 -none- numeric
## begin.time 1 -none- numeric
## age.unit 1 -none- numeric
## initial.ages 1 -none- numeric
## group.covariates 0 -none- NULL
## nstrata 1 -none- numeric
## strata.labels 0 -none- NULL
## counts 0 -none- NULL
## reverse 1 -none- logical
## areas 0 -none- NULL
## events 0 -none- NULL
# Set up survey covariates in the ddl (in this case none)
ybf.ddl <- make.design.data(ybf.data)
ybf.ddl
## $p
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
##
## $Psi
## par.index model.index group age time Age Time
## 1 1 3 1 0 1 0 0
##
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
##
##
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p" "Psi"
####################################################################
# Fit a model
# Note that formula HAVE AN = SIGN
mod.fit <- RMark::mark(ybf.data, ddl=ybf.ddl,
model="Occupancy",
model.parameters=list(
Psi =list(formula=~1),
p =list(formula=~1)
)
)
##
## Output summary for Occupancy model
## Name : p(~1)Psi(~1)
##
## Npar : 2
## -2lnL: 866.5516
## AICc : 870.5727
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.6613984 0.1781966 0.3121331 1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908
##
##
## Real Parameter p
## 1 2
## 0.6595745 0.6595745
##
##
## Real Parameter Psi
## 1
## 0.2491541
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~1)Psi(~1)
##
## Npar : 2
## -2lnL: 866.5516
## AICc : 870.5727
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.6613984 0.1781966 0.3121331 1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908
##
##
## Real Parameter p
## 1 2
## 0.6595745 0.6595745
##
##
## Real Parameter Psi
## 1
## 0.2491541
# Look the objects returned in more details
names(mod.fit)
## [1] "data" "model" "title"
## [4] "model.name" "links" "mixtures"
## [7] "call" "parameters" "time.intervals"
## [10] "number.of.groups" "group.labels" "nocc"
## [13] "begin.time" "covariates" "fixed"
## [16] "design.matrix" "pims" "design.data"
## [19] "strata.labels" "mlogit.list" "profile.int"
## [22] "simplify" "model.parameters" "results"
## [25] "output"
names(mod.fit$results)
## [1] "lnl" "deviance" "deviance.df"
## [4] "npar" "n" "AICc"
## [7] "beta" "real" "beta.vcv"
## [10] "derived" "derived.vcv" "covariate.values"
## [13] "singular" "real.vcv"
# look at estimates on beta and original scale
mod.fit$results$beta # on the logit scale
## estimate se lcl ucl
## p:(Intercept) 0.6613984 0.1781966 0.3121331 1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908
mod.fit$results$real# on the regular 0-1 scale for each site
## estimate se lcl ucl fixed note
## p g1 a0 t1 0.6595745 0.0400116 0.5774058 0.7331500
## Psi g1 a0 t1 0.2491541 0.0210497 0.2102088 0.2926412
# derived variabldes is the occupancy probability
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
## estimate se lcl ucl
## 1 0.2491541 0.02104974 0.2102088 0.2926412
# alternatively
get.real(mod.fit, "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 3 2 0.2491541 0.0210497 0.2102088
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.2926412 1 0 1 0 0
get.real(mod.fit, "Psi", pim=TRUE)
## 1
## 0.2491541
get.real(mod.fit, "p", se=TRUE)
## all.diff.index par.index estimate se lcl ucl
## p g1 a0 t1 1 1 0.6595745 0.0400116 0.5774058 0.73315
## p g1 a1 t2 2 1 0.6595745 0.0400116 0.5774058 0.73315
## fixed note group age time Age Time
## p g1 a0 t1 1 0 1 0 0
## p g1 a1 t2 1 1 2 1 1
#######################################
# Fit a model with p varying over time
# Note the use of the keyword "time" from the ddl which is a factor
ybf.ddl$p
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
str(ybf.ddl$p)
## 'data.frame': 2 obs. of 7 variables:
## $ par.index : int 1 2
## $ model.index: num 1 2
## $ group : Factor w/ 1 level "1": 1 1
## $ age : Factor w/ 2 levels "0","1": 1 2
## $ time : Factor w/ 2 levels "1","2": 1 2
## $ Age : num 0 1
## $ Time : num 0 1
mod.fit2 <- RMark::mark(ybf.data, ddl=ybf.ddl,
model="Occupancy",
model.parameters=list(
Psi =list(formula=~1),
p =list(formula=~time)
)
)
##
## Output summary for Occupancy model
## Name : p(~time)Psi(~1)
##
## Npar : 3
## -2lnL: 866.3015
## AICc : 872.3437
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.6007738 0.2134029 0.1825042 1.0190435
## p:time2 0.1251632 0.2504897 -0.3657966 0.6161229
## Psi:(Intercept) -1.1037320 0.1124768 -1.3241865 -0.8832776
##
##
## Real Parameter p
## 1 2
## 0.6458333 0.673913
##
##
## Real Parameter Psi
## 1
## 0.2490413
summary(mod.fit2)
## Output summary for Occupancy model
## Name : p(~time)Psi(~1)
##
## Npar : 3
## -2lnL: 866.3015
## AICc : 872.3437
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.6007738 0.2134029 0.1825042 1.0190435
## p:time2 0.1251632 0.2504897 -0.3657966 0.6161229
## Psi:(Intercept) -1.1037320 0.1124768 -1.3241865 -0.8832776
##
##
## Real Parameter p
## 1 2
## 0.6458333 0.673913
##
##
## Real Parameter Psi
## 1
## 0.2490413
get.real(mod.fit2, "p", se=TRUE)
## all.diff.index par.index estimate se lcl
## p g1 a0 t1 1 1 0.6458333 0.0488122 0.5454998
## p g1 a1 t2 2 2 0.6739130 0.0488736 0.5720036
## ucl fixed note group age time Age Time
## p g1 a0 t1 0.7347862 1 0 1 0 0
## p g1 a1 t2 0.7616676 1 1 2 1 1
####################################################
# fit a model with p as a function of julian date (linear)
#
mod.fit3 <- RMark::mark(ybf.data, ddl=ybf.ddl,
model="Occupancy",
model.parameters=list(
Psi =list(formula=~1),
p =list(formula=~jdateS)
)
)
##
## Output summary for Occupancy model
## Name : p(~jdateS)Psi(~1)
##
## Npar : 3
## -2lnL: 862.8199
## AICc : 868.8622
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.5014852 0.1920270 0.1251123 0.8778581
## p:jdateS -0.1849634 0.0958034 -0.3727380 0.0028112
## Psi:(Intercept) -1.1057833 0.1121357 -1.3255694 -0.8859972
##
##
## Real Parameter p
## 1 2
## 0.7041981 0.6188501
##
##
## Real Parameter Psi
## 1
## 0.2486578
summary(mod.fit3)
## Output summary for Occupancy model
## Name : p(~jdateS)Psi(~1)
##
## Npar : 3
## -2lnL: 862.8199
## AICc : 868.8622
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 0.5014852 0.1920270 0.1251123 0.8778581
## p:jdateS -0.1849634 0.0958034 -0.3727380 0.0028112
## Psi:(Intercept) -1.1057833 0.1121357 -1.3255694 -0.8859972
##
##
## Real Parameter p
## 1 2
## 0.7041981 0.6188501
##
##
## Real Parameter Psi
## 1
## 0.2486578
# estimate of p at the "average" julian dates
get.real(mod.fit3, "p", se=TRUE)
## all.diff.index par.index estimate se lcl
## p g1 a0 t1 1 1 0.7041981 0.0446813 0.6099131
## p g1 a1 t2 2 2 0.6188501 0.0460897 0.5254001
## ucl fixed note group age time Age Time
## p g1 a0 t1 0.7837734 1 0 1 0 0
## p g1 a1 t2 0.7042571 1 1 2 1 1
####################################################
# fit a model with p as a function of julian date (quadratinc)
#
mod.fit4 <- RMark::mark(ybf.data, ddl=ybf.ddl,
model="Occupancy",
model.parameters=list(
Psi =list(formula=~1),
p =list(formula=~jdateS+jdateSQ)
)
)
##
## Output summary for Occupancy model
## Name : p(~jdateS + jdateSQ)Psi(~1)
##
## Npar : 4
## -2lnL: 813.0628
## AICc : 821.1333
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 1.5096022 0.2945507 0.9322828 2.0869215
## p:jdateS -0.7217598 0.1984786 -1.1107779 -0.3327417
## p:jdateSQ -0.4799866 0.0929934 -0.6622536 -0.2977195
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709354 -0.9509287
##
##
## Real Parameter p
## 1 2
## 0.693501 0.7152739
##
##
## Real Parameter Psi
## 1
## 0.238498
summary(mod.fit4)
## Output summary for Occupancy model
## Name : p(~jdateS + jdateSQ)Psi(~1)
##
## Npar : 4
## -2lnL: 813.0628
## AICc : 821.1333
##
## Beta
## estimate se lcl ucl
## p:(Intercept) 1.5096022 0.2945507 0.9322828 2.0869215
## p:jdateS -0.7217598 0.1984786 -1.1107779 -0.3327417
## p:jdateSQ -0.4799866 0.0929934 -0.6622536 -0.2977195
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709354 -0.9509287
##
##
## Real Parameter p
## 1 2
## 0.693501 0.7152739
##
##
## Real Parameter Psi
## 1
## 0.238498
# estimate of p at the "average" julian dates and average of the quadratic term - this is silly
get.real(mod.fit4, "p", se=TRUE)
## all.diff.index par.index estimate se lcl
## p g1 a0 t1 1 1 0.6935010 0.0427796 0.6039775
## p g1 a1 t2 2 2 0.7152739 0.0554979 0.5955663
## ucl fixed note group age time Age Time
## p g1 a0 t1 0.7704778 1 0 1 0 0
## p g1 a1 t2 0.8108052 1 1 2 1 1
##############################################
# Collect models
model.set <- RMark::collect.models( type="Occupancy")
model.set
## model npar AICc DeltaAICc weight
## 4 p(~jdateS + jdateSQ)Psi(~1) 4 821.1333 0.00000 1.000000e+00
## 3 p(~jdateS)Psi(~1) 3 868.8622 47.72888 4.323208e-11
## 1 p(~1)Psi(~1) 2 870.5727 49.43942 1.838086e-11
## 2 p(~time)Psi(~1) 3 872.3437 51.21043 7.582233e-12
## Deviance
## 4 8.130628e+02
## 3 8.628199e+02
## 1 2.501630e-01
## 2 -2.273737e-13
names(model.set)
## [1] "mod.fit" "mod.fit2" "mod.fit3" "mod.fit4" "model.table"
model.set$model.table
## p Psi model npar AICc
## 4 ~jdateS + jdateSQ ~1 p(~jdateS + jdateSQ)Psi(~1) 4 821.1333
## 3 ~jdateS ~1 p(~jdateS)Psi(~1) 3 868.8622
## 1 ~1 ~1 p(~1)Psi(~1) 2 870.5727
## 2 ~time ~1 p(~time)Psi(~1) 3 872.3437
## DeltaAICc weight Deviance
## 4 0.00000 1.000000e+00 8.130628e+02
## 3 47.72888 4.323208e-11 8.628199e+02
## 1 49.43942 1.838086e-11 2.501630e-01
## 2 51.21043 7.582233e-12 -2.273737e-13
# model averaged values
get.real(mod.fit , "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 3 2 0.2491541 0.0210497 0.2102088
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.2926412 1 0 1 0 0
get.real(mod.fit2, "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 3 3 0.2490413 0.0210354 0.2101226
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.292499 1 0 1 0 0
get.real(mod.fit3, "Psi", se=TRUE)
## all.diff.index par.index estimate se lcl
## Psi g1 a0 t1 3 3 0.2486578 0.02095 0.2098932
## ucl fixed note group age time Age Time
## Psi g1 a0 t1 0.2919366 1 0 1 0 0
Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma
## par.index estimate se fixed note group age time
## Psi g1 a0 t1 3 0.238498 0.01945924 1 0 1
## Age Time
## Psi g1 a0 t1 0 0
# It is often convenient to estimate parameters (such as p) for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.
# We create a data frame with the covariates etc as measured on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition
p.covariates <- input.history
p.covariates$freq <- NULL
p.covariates$ch <- NULL
ybf.ddl$p
## par.index model.index group age time Age Time
## 1 1 1 1 0 1 0 0
## 2 2 2 1 1 2 1 1
p.covariates <- merge(ybf.ddl$p, p.covariates)
head(p.covariates)
## par.index model.index group age time Age Time jdate1 jdate2 jdateS1
## 1 1 1 1 0 1 0 0 139 173 -1.1
## 2 2 2 1 1 2 1 1 139 173 -1.1
## 3 1 1 1 0 1 0 0 128 148 -2.2
## 4 2 2 1 1 2 1 1 128 148 -2.2
## 5 1 1 1 0 1 0 0 130 147 -2.0
## 6 2 2 1 1 2 1 1 130 147 -2.0
## jdateS2 jdateSQ1 jdateSQ2
## 1 2.3 1.21 5.29
## 2 2.3 1.21 5.29
## 3 -0.2 4.84 0.04
## 4 -0.2 4.84 0.04
## 5 -0.3 4.00 0.09
## 6 -0.3 4.00 0.09
# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
p.covariates$index <- p.covariates$model.index
p.pred <- covariate.predictions(model.set,
data=p.covariates)
# Notice that there is problem in that if model.index =1, it used jdate1 as the predictor
# and if model.index==2, it used jdate2 as the predictor. There is no easy around thi!
head(p.pred$estimates)
## vcv.index model.index par.index par.index.1 model.index.1 group age time
## 1 1 1 1 1 1 1 0 1
## 2 1 1 2 2 2 1 1 2
## 3 1 1 1 1 1 1 0 1
## 4 1 1 2 2 2 1 1 2
## 5 1 1 1 1 1 1 0 1
## 6 1 1 2 2 2 1 1 2
## Age Time jdate1 jdate2 jdateS1 jdateS2 jdateSQ1 jdateSQ2 index
## 1 0 0 139 173 -1.1 2.3 1.21 5.29 1
## 2 1 1 139 173 -1.1 2.3 1.21 5.29 2
## 3 0 0 128 148 -2.2 -0.2 4.84 0.04 1
## 4 1 1 128 148 -2.2 -0.2 4.84 0.04 2
## 5 0 0 130 147 -2.0 -0.3 4.00 0.09 1
## 6 1 1 130 147 -2.0 -0.3 4.00 0.09 2
## estimate se lcl ucl fixed
## 1 0.84848325 0.03509381 0.76633279 0.9053203
## 2 0.06359135 0.05233820 0.01198217 0.2755053
## 3 0.68445868 0.04540351 0.58959613 0.7660932
## 4 0.83681994 0.04011722 0.74248664 0.9011952
## 5 0.73753105 0.04107208 0.64960830 0.8098496
## 6 0.84329230 0.03878904 0.75168669 0.9053585
ggplot(data=NULL, aes( y=estimate))+
ggtitle("Model averaged predictions of detection for each site")+
geom_point(data=p.pred$estimates[ p.pred$estimates$index==2,], aes(x=jdate2))+
geom_point(data=p.pred$estimates[ p.pred$estimates$index==1,], aes(x=jdate1))+
geom_ribbon(data=p.pred$estimates[ p.pred$estimates$index==1,],aes(x=jdate1, ymin=lcl, ymax=ucl), alpha=0.2)+
geom_ribbon(data=p.pred$estimates[ p.pred$estimates$index==2,],aes(x=jdate2, ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)