ybf-ma.R

# Single Species Single Season Occupancy 

# Yellow-bellied toad
# Single Season Single Season occupancy 

#  RMark package

# 2018-12-02 Code contributed by Neil Faught

library(readxl)
library(RMark)

## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip

library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.detect <- readxl::read_excel(file.path("..","YellowBelliedToad.xlsx"),
                                   sheet="detections")

# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.detect)

## [1] 572

ncol(input.detect)

## [1] 2

range(input.detect, na.rm=TRUE)

## [1] 0 1

sum(is.na(input.detect))

## [1] 0

head(input.detect)

## # A tibble: 6 x 2
##      V1    V2
##   <dbl> <dbl>
## 1  0        0
## 2  0        0
## 3  0        0
## 4  0        0
## 5  0        0
## 6  1.00     0

# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.detect[,1:2],1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)

##   freq ch
## 1    1 00
## 2    1 00
## 3    1 00
## 4    1 00
## 5    1 00
## 6    1 10

# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)

##   freq ch
## 1    1 00
## 2    1 00
## 3    1 00
## 4    1 00
## 5    1 00
## 6    1 10

# Get the site level covariates - none.



# Get the site x visit (sampling) covariates. 
jdate <- readxl::read_excel(file.path("..","YellowBelliedToad.xlsx"),
                            sheet="JulianDate")
range(jdate)

## [1] 121 179

# standarize jdate
jdateS <- (jdate - 150)/10

names(jdate ) <- c('jdate1'   ,'jdate2')
names(jdateS) <- c('jdateS1'  ,'jdateS2')

# Sampling covariates must be added to the input.history as multiple columns
# We also need to create the quadratic terms in advance - this is a pain
jdateSQ <- jdateS
names(jdateSQ)<- c('jdateSQ1' , 'jdateSQ2')
jdateSQ <- jdateSQ^2


input.history <- cbind(input.history, jdate, jdateS, jdateSQ)
head(input.history)

##   freq ch jdate1 jdate2 jdateS1 jdateS2 jdateSQ1 jdateSQ2
## 1    1 00    139    173    -1.1     2.3     1.21     5.29
## 2    1 00    128    148    -2.2    -0.2     4.84     0.04
## 3    1 00    130    147    -2.0    -0.3     4.00     0.09
## 4    1 00    130    144    -2.0    -0.6     4.00     0.36
## 5    1 00    128    148    -2.2    -0.2     4.84     0.04
## 6    1 10    145    175    -0.5     2.5     0.25     6.25

ybf.data <- process.data(data=input.history,
                         model="Occupancy")
summary(ybf.data)

##                  Length Class      Mode     
## data               8    data.frame list     
## model              1    -none-     character
## mixtures           1    -none-     numeric  
## freq             572    -none-     numeric  
## nocc               1    -none-     numeric  
## nocc.secondary     0    -none-     NULL     
## time.intervals     2    -none-     numeric  
## begin.time         1    -none-     numeric  
## age.unit           1    -none-     numeric  
## initial.ages       1    -none-     numeric  
## group.covariates   0    -none-     NULL     
## nstrata            1    -none-     numeric  
## strata.labels      0    -none-     NULL     
## counts             0    -none-     NULL     
## reverse            1    -none-     logical  
## areas              0    -none-     NULL     
## events             0    -none-     NULL

# Set up survey covariates in the ddl (in this case none)
ybf.ddl <- make.design.data(ybf.data)
ybf.ddl

## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           3     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"

# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)

## [1] "p"   "Psi"

# define the list of models to fit
# Notice the commas between the column and the placement of the quotes
model.list.csv <- textConnection("
                                 p,         Psi
                                 ~1,              ~1
                                 ~time,           ~1
                                 ~jdateS,         ~1
                                 ~jdateS+jdateSQ, ~1")


model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list

##                 p Psi model.number
## 1              ~1  ~1            1
## 2           ~time  ~1            2
## 3         ~jdateS  ~1            3
## 4 ~jdateS+jdateSQ  ~1            4

# fit the models
myobs <- ls()
myobs <- myobs[ grepl("m...",myobs,fixed=TRUE)]
cat("Removing ", myobs, "\n")

## Removing

rm(list=myobs)

model.fits <- plyr::dlply(model.list, "model.number", function(x,input.data, input.ddl){
  cat("\n\n***** Starting ", unlist(x), "\n")
  #browser()
  
  #fit <- myoccMod(model=list(as.formula(paste("psi",x$psi)),
  fit <- RMark::mark(input.data, ddl=input.ddl,
                     model="Occupancy",
                     model.parameters=list(
                       Psi   =list(formula=as.formula(eval(x$Psi))),
                       p     =list(formula=as.formula(eval(x$p)))
                     )
                     #,brief=TRUE,output=FALSE, delete=TRUE
                     #,invisible=TRUE,output=TRUE  # set for debugging
  )
  mnumber <- paste("m...",formatC(x$model.number, width = 3, format = "d", flag = "0"),sep="")
  assign( mnumber, fit, envir=.GlobalEnv)
  #browser()
  fit
  
},input.data=ybf.data, input.ddl=ybf.ddl)

## 
## 
## ***** Starting  ~1 ~1 1 
## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  866.5516
## AICc :  870.5727
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6613984 0.1781966  0.3121331  1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908
## 
## 
## Real Parameter p
##          1         2
##  0.6595745 0.6595745
## 
## 
## Real Parameter Psi
##          1
##  0.2491541
## 
## 
## ***** Starting  ~time ~1 2 
## 
## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  3
## -2lnL:  866.3015
## AICc :  872.3437
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6007739 0.2134029  0.1825042  1.0190436
## p:time2          0.1251631 0.2504897 -0.3657968  0.6161231
## Psi:(Intercept) -1.1037320 0.1124768 -1.3241865 -0.8832776
## 
## 
## Real Parameter p
##          1        2
##  0.6458333 0.673913
## 
## 
## Real Parameter Psi
##          1
##  0.2490413
## 
## 
## ***** Starting  ~jdateS ~1 3 
## 
## Output summary for Occupancy model
## Name : p(~jdateS)Psi(~1) 
## 
## Npar :  3
## -2lnL:  862.8199
## AICc :  868.8622
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.5014851 0.1920270  0.1251123  0.8778580
## p:jdateS        -0.1849635 0.0958033 -0.3727380  0.0028110
## Psi:(Intercept) -1.1057834 0.1121358 -1.3255694 -0.8859973
## 
## 
## Real Parameter p
##          1         2
##  0.7041982 0.6188501
## 
## 
## Real Parameter Psi
##          1
##  0.2486578
## 
## 
## ***** Starting  ~jdateS+jdateSQ ~1 4 
## 
## Output summary for Occupancy model
## Name : p(~jdateS + jdateSQ)Psi(~1) 
## 
## Npar :  4
## -2lnL:  813.0628
## AICc :  821.1333
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.5096024 0.2945507  0.9322831  2.0869217
## p:jdateS        -0.7217606 0.1984787 -1.1107789 -0.3327423
## p:jdateSQ       -0.4799869 0.0929934 -0.6622541 -0.2977198
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709355 -0.9509288
## 
## 
## Real Parameter p
##          1         2
##  0.6935011 0.7152739
## 
## 
## Real Parameter Psi
##         1
##  0.238498

# examine individula model results
model.number <-4

summary(model.fits[[model.number]])

## Output summary for Occupancy model
## Name : p(~jdateS + jdateSQ)Psi(~1) 
## 
## Npar :  4
## -2lnL:  813.0628
## AICc :  821.1333
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.5096024 0.2945507  0.9322831  2.0869217
## p:jdateS        -0.7217606 0.1984787 -1.1107789 -0.3327423
## p:jdateSQ       -0.4799869 0.0929934 -0.6622541 -0.2977198
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709355 -0.9509288
## 
## 
## Real Parameter p
##          1         2
##  0.6935011 0.7152739
## 
## 
## Real Parameter Psi
##         1
##  0.238498

model.fits[[model.number]]$results$real

##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.6935011 0.0427796 0.6039776 0.7704778              
## p g1 a1 t2   0.7152739 0.0554979 0.5955663 0.8108052              
## Psi g1 a0 t1 0.2384980 0.0194592 0.2024688 0.2786981

model.fits[[model.number]]$results$beta

##                   estimate        se        lcl        ucl
## p:(Intercept)    1.5096024 0.2945507  0.9322831  2.0869217
## p:jdateS        -0.7217606 0.1984787 -1.1107789 -0.3327423
## p:jdateSQ       -0.4799869 0.0929934 -0.6622541 -0.2977198
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709355 -0.9509288

model.fits[[model.number]]$results$derived

## $Occupancy
##   estimate         se       lcl       ucl
## 1 0.238498 0.01945924 0.2024688 0.2786981

get.real(model.fits[[model.number]], "Psi", se=TRUE)

##              all.diff.index par.index estimate        se       lcl
## Psi g1 a0 t1              3         3 0.238498 0.0194592 0.2024688
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.2786981                   1   0    1   0    0

get.real(model.fits[[model.number]], "p",    se=TRUE)

##            all.diff.index par.index  estimate        se       lcl
## p g1 a0 t1              1         1 0.6935011 0.0427796 0.6039776
## p g1 a1 t2              2         2 0.7152739 0.0554979 0.5955663
##                  ucl fixed    note group age time Age Time
## p g1 a0 t1 0.7704778                   1   0    1   0    0
## p g1 a1 t2 0.8108052                   1   1    2   1    1

##############################################
# Collect models
model.set <- RMark::collect.models( type="Occupancy")
model.set

##                         model npar     AICc DeltaAICc       weight
## 4 p(~jdateS + jdateSQ)Psi(~1)    4 821.1333   0.00000 1.000000e+00
## 3           p(~jdateS)Psi(~1)    3 868.8622  47.72888 4.323208e-11
## 1                p(~1)Psi(~1)    2 870.5727  49.43942 1.838086e-11
## 2             p(~time)Psi(~1)    3 872.3437  51.21043 7.582233e-12
##        Deviance
## 4  8.130628e+02
## 3  8.628199e+02
## 1  2.501630e-01
## 2 -1.136868e-13

names(model.set)

## [1] "m...001"     "m...002"     "m...003"     "m...004"     "model.table"

model.set$model.table

##                   p Psi                       model npar     AICc
## 4 ~jdateS + jdateSQ  ~1 p(~jdateS + jdateSQ)Psi(~1)    4 821.1333
## 3           ~jdateS  ~1           p(~jdateS)Psi(~1)    3 868.8622
## 1                ~1  ~1                p(~1)Psi(~1)    2 870.5727
## 2             ~time  ~1             p(~time)Psi(~1)    3 872.3437
##   DeltaAICc       weight      Deviance
## 4   0.00000 1.000000e+00  8.130628e+02
## 3  47.72888 4.323208e-11  8.628199e+02
## 1  49.43942 1.838086e-11  2.501630e-01
## 2  51.21043 7.582233e-12 -1.136868e-13

# Get model averaged values
Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma

##              par.index estimate         se fixed    note group age time
## Psi g1 a0 t1         3 0.238498 0.01945924                   1   0    1
##              Age Time
## Psi g1 a0 t1   0    0

# It is often convenient to estimate parameters (such as p) for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.

# We create a data frame with the covariates etc as measured on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition

p.covariates <- input.history
p.covariates$freq <- NULL
p.covariates$ch   <- NULL

ybf.ddl$p

##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1

p.covariates <- merge(ybf.ddl$p, p.covariates)
head(p.covariates)

##   par.index model.index group age time Age Time jdate1 jdate2 jdateS1
## 1         1           1     1   0    1   0    0    139    173    -1.1
## 2         2           2     1   1    2   1    1    139    173    -1.1
## 3         1           1     1   0    1   0    0    128    148    -2.2
## 4         2           2     1   1    2   1    1    128    148    -2.2
## 5         1           1     1   0    1   0    0    130    147    -2.0
## 6         2           2     1   1    2   1    1    130    147    -2.0
##   jdateS2 jdateSQ1 jdateSQ2
## 1     2.3     1.21     5.29
## 2     2.3     1.21     5.29
## 3    -0.2     4.84     0.04
## 4    -0.2     4.84     0.04
## 5    -0.3     4.00     0.09
## 6    -0.3     4.00     0.09

# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
p.covariates$index <- p.covariates$model.index

p.pred <- covariate.predictions(model.set,
                                data=p.covariates)

# Notice that there is problem in that if model.index =1, it used jdate1 as the predictor
# and if model.index==2, it used jdate2 as the predictor. There is no easy around thi!
head(p.pred$estimates)

##   vcv.index model.index par.index par.index.1 model.index.1 group age time
## 1         1           1         1           1             1     1   0    1
## 2         1           1         2           2             2     1   1    2
## 3         1           1         1           1             1     1   0    1
## 4         1           1         2           2             2     1   1    2
## 5         1           1         1           1             1     1   0    1
## 6         1           1         2           2             2     1   1    2
##   Age Time jdate1 jdate2 jdateS1 jdateS2 jdateSQ1 jdateSQ2 index
## 1   0    0    139    173    -1.1     2.3     1.21     5.29     1
## 2   1    1    139    173    -1.1     2.3     1.21     5.29     2
## 3   0    0    128    148    -2.2    -0.2     4.84     0.04     1
## 4   1    1    128    148    -2.2    -0.2     4.84     0.04     2
## 5   0    0    130    147    -2.0    -0.3     4.00     0.09     1
## 6   1    1    130    147    -2.0    -0.3     4.00     0.09     2
##     estimate         se        lcl       ucl fixed
## 1 0.84848335 0.03509380 0.76633291 0.9053204      
## 2 0.06359116 0.05233808 0.01198212 0.2755048      
## 3 0.68445878 0.04540351 0.58959625 0.7660933      
## 4 0.83681999 0.04011721 0.74248671 0.9011953      
## 5 0.73753117 0.04107207 0.64960843 0.8098497      
## 6 0.84329235 0.03878903 0.75168677 0.9053585

ggplot(data=NULL, aes( y=estimate))+
  ggtitle("Model averaged predictions of detection for each site")+
  geom_point(data=p.pred$estimates[ p.pred$estimates$index==2,], aes(x=jdate2))+
  geom_point(data=p.pred$estimates[ p.pred$estimates$index==1,], aes(x=jdate1))+
  geom_ribbon(data=p.pred$estimates[ p.pred$estimates$index==1,],aes(x=jdate1, ymin=lcl, ymax=ucl), alpha=0.2)+
  geom_ribbon(data=p.pred$estimates[ p.pred$estimates$index==2,],aes(x=jdate2, ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)

ybf-ma.R

Neil

Sun Dec 02 21:11:11 2018