ybf.R

# Single Species Single Season Occupancy 

# Yellow-bellied toad
# Single Season Single Season occupancy 

#  RMark package

# 2018-11-15 Code contributed by Carl James Schwarz (cschwarz.stat.sfu.cs@gmail.com)

library(readxl)
library(RMark)

## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip

library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.detect <- readxl::read_excel(file.path("..","YellowBelliedToad.xlsx"),
                                   sheet="detections")

# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.detect)

## [1] 572

ncol(input.detect)

## [1] 2

range(input.detect, na.rm=TRUE)

## [1] 0 1

sum(is.na(input.detect))

## [1] 0

head(input.detect)

## # A tibble: 6 x 2
##      V1    V2
##   <dbl> <dbl>
## 1  0        0
## 2  0        0
## 3  0        0
## 4  0        0
## 5  0        0
## 6  1.00     0

# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.detect[,1:2],1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)

##   freq ch
## 1    1 00
## 2    1 00
## 3    1 00
## 4    1 00
## 5    1 00
## 6    1 10

# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)

##   freq ch
## 1    1 00
## 2    1 00
## 3    1 00
## 4    1 00
## 5    1 00
## 6    1 10

# Get the site level covariates - none.



# Get the site x visit (sampling) covariates. 
jdate <- readxl::read_excel(file.path("..","YellowBelliedToad.xlsx"),
                            sheet="JulianDate")
range(jdate)

## [1] 121 179

# standarize jdate
jdateS <- (jdate - 150)/10

names(jdate ) <- c('jdate1'   ,'jdate2')
names(jdateS) <- c('jdateS1'  ,'jdateS2')

# Sampling covariates must be added to the input.history as multiple columns
# We also need to create the quadratic terms in advance - this is a pain
jdateSQ <- jdateS
names(jdateSQ)<- c('jdateSQ1' , 'jdateSQ2')
jdateSQ <- jdateSQ^2


input.history <- cbind(input.history, jdate, jdateS, jdateSQ)
head(input.history)

##   freq ch jdate1 jdate2 jdateS1 jdateS2 jdateSQ1 jdateSQ2
## 1    1 00    139    173    -1.1     2.3     1.21     5.29
## 2    1 00    128    148    -2.2    -0.2     4.84     0.04
## 3    1 00    130    147    -2.0    -0.3     4.00     0.09
## 4    1 00    130    144    -2.0    -0.6     4.00     0.36
## 5    1 00    128    148    -2.2    -0.2     4.84     0.04
## 6    1 10    145    175    -0.5     2.5     0.25     6.25

ybf.data <- process.data(data=input.history,
                         model="Occupancy")
summary(ybf.data)

##                  Length Class      Mode     
## data               8    data.frame list     
## model              1    -none-     character
## mixtures           1    -none-     numeric  
## freq             572    -none-     numeric  
## nocc               1    -none-     numeric  
## nocc.secondary     0    -none-     NULL     
## time.intervals     2    -none-     numeric  
## begin.time         1    -none-     numeric  
## age.unit           1    -none-     numeric  
## initial.ages       1    -none-     numeric  
## group.covariates   0    -none-     NULL     
## nstrata            1    -none-     numeric  
## strata.labels      0    -none-     NULL     
## counts             0    -none-     NULL     
## reverse            1    -none-     logical  
## areas              0    -none-     NULL     
## events             0    -none-     NULL

# Set up survey covariates in the ddl (in this case none)
ybf.ddl <- make.design.data(ybf.data)
ybf.ddl

## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           3     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"

# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)

## [1] "p"   "Psi"

####################################################################
# Fit a model
# Note that formula HAVE AN = SIGN
mod.fit <-  RMark::mark(ybf.data, ddl=ybf.ddl, 
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~1),
                          p     =list(formula=~1)
                        )
)

## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  866.5516
## AICc :  870.5727
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6613984 0.1781966  0.3121331  1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908
## 
## 
## Real Parameter p
##          1         2
##  0.6595745 0.6595745
## 
## 
## Real Parameter Psi
##          1
##  0.2491541

summary(mod.fit)

## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  866.5516
## AICc :  870.5727
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6613984 0.1781966  0.3121331  1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908
## 
## 
## Real Parameter p
##          1         2
##  0.6595745 0.6595745
## 
## 
## Real Parameter Psi
##          1
##  0.2491541

# Look the objects returned in more details
names(mod.fit)

##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"

names(mod.fit$results)

##  [1] "lnl"              "deviance"         "deviance.df"     
##  [4] "npar"             "n"                "AICc"            
##  [7] "beta"             "real"             "beta.vcv"        
## [10] "derived"          "derived.vcv"      "covariate.values"
## [13] "singular"         "real.vcv"

# look at estimates on beta and original scale
mod.fit$results$beta  # on the logit scale

##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6613984 0.1781966  0.3121331  1.0106638
## Psi:(Intercept) -1.1031290 0.1125195 -1.3236673 -0.8825908

mod.fit$results$real# on the regular 0-1 scale for each site

##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.6595745 0.0400116 0.5774058 0.7331500              
## Psi g1 a0 t1 0.2491541 0.0210497 0.2102088 0.2926412

# derived variabldes is the occupancy probability 
names(mod.fit$results$derived)

## [1] "Occupancy"

mod.fit$results$derived$Occupancy

##    estimate         se       lcl       ucl
## 1 0.2491541 0.02104974 0.2102088 0.2926412

# alternatively
get.real(mod.fit, "Psi", se=TRUE)

##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              3         2 0.2491541 0.0210497 0.2102088
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.2926412                   1   0    1   0    0

get.real(mod.fit, "Psi", pim=TRUE)

##          1
##  0.2491541

get.real(mod.fit, "p", se=TRUE)

##            all.diff.index par.index  estimate        se       lcl     ucl
## p g1 a0 t1              1         1 0.6595745 0.0400116 0.5774058 0.73315
## p g1 a1 t2              2         1 0.6595745 0.0400116 0.5774058 0.73315
##            fixed    note group age time Age Time
## p g1 a0 t1                   1   0    1   0    0
## p g1 a1 t2                   1   1    2   1    1

#######################################
# Fit a model with p varying over time
# Note the use of the keyword "time" from the ddl which is a factor

ybf.ddl$p

##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1

str(ybf.ddl$p)

## 'data.frame':    2 obs. of  7 variables:
##  $ par.index  : int  1 2
##  $ model.index: num  1 2
##  $ group      : Factor w/ 1 level "1": 1 1
##  $ age        : Factor w/ 2 levels "0","1": 1 2
##  $ time       : Factor w/ 2 levels "1","2": 1 2
##  $ Age        : num  0 1
##  $ Time       : num  0 1

mod.fit2 <-  RMark::mark(ybf.data,  ddl=ybf.ddl,
                         model="Occupancy",
                         model.parameters=list(
                           Psi   =list(formula=~1),
                           p     =list(formula=~time)
                         )
)

## 
## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  3
## -2lnL:  866.3015
## AICc :  872.3437
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6007738 0.2134029  0.1825042  1.0190435
## p:time2          0.1251632 0.2504897 -0.3657966  0.6161229
## Psi:(Intercept) -1.1037320 0.1124768 -1.3241865 -0.8832776
## 
## 
## Real Parameter p
##          1        2
##  0.6458333 0.673913
## 
## 
## Real Parameter Psi
##          1
##  0.2490413

summary(mod.fit2)

## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  3
## -2lnL:  866.3015
## AICc :  872.3437
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.6007738 0.2134029  0.1825042  1.0190435
## p:time2          0.1251632 0.2504897 -0.3657966  0.6161229
## Psi:(Intercept) -1.1037320 0.1124768 -1.3241865 -0.8832776
## 
## 
## Real Parameter p
##          1        2
##  0.6458333 0.673913
## 
## 
## Real Parameter Psi
##          1
##  0.2490413

get.real(mod.fit2, "p", se=TRUE)

##            all.diff.index par.index  estimate        se       lcl
## p g1 a0 t1              1         1 0.6458333 0.0488122 0.5454998
## p g1 a1 t2              2         2 0.6739130 0.0488736 0.5720036
##                  ucl fixed    note group age time Age Time
## p g1 a0 t1 0.7347862                   1   0    1   0    0
## p g1 a1 t2 0.7616676                   1   1    2   1    1

####################################################
# fit a model with p as a function of julian date (linear)
#

mod.fit3 <-  RMark::mark(ybf.data,  ddl=ybf.ddl,
                         model="Occupancy",
                         model.parameters=list(
                           Psi   =list(formula=~1),
                           p     =list(formula=~jdateS)
                         )
)

## 
## Output summary for Occupancy model
## Name : p(~jdateS)Psi(~1) 
## 
## Npar :  3
## -2lnL:  862.8199
## AICc :  868.8622
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.5014852 0.1920270  0.1251123  0.8778581
## p:jdateS        -0.1849634 0.0958034 -0.3727380  0.0028112
## Psi:(Intercept) -1.1057833 0.1121357 -1.3255694 -0.8859972
## 
## 
## Real Parameter p
##          1         2
##  0.7041981 0.6188501
## 
## 
## Real Parameter Psi
##          1
##  0.2486578

summary(mod.fit3)

## Output summary for Occupancy model
## Name : p(~jdateS)Psi(~1) 
## 
## Npar :  3
## -2lnL:  862.8199
## AICc :  868.8622
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.5014852 0.1920270  0.1251123  0.8778581
## p:jdateS        -0.1849634 0.0958034 -0.3727380  0.0028112
## Psi:(Intercept) -1.1057833 0.1121357 -1.3255694 -0.8859972
## 
## 
## Real Parameter p
##          1         2
##  0.7041981 0.6188501
## 
## 
## Real Parameter Psi
##          1
##  0.2486578

# estimate of p at the "average" julian dates
get.real(mod.fit3, "p", se=TRUE)

##            all.diff.index par.index  estimate        se       lcl
## p g1 a0 t1              1         1 0.7041981 0.0446813 0.6099131
## p g1 a1 t2              2         2 0.6188501 0.0460897 0.5254001
##                  ucl fixed    note group age time Age Time
## p g1 a0 t1 0.7837734                   1   0    1   0    0
## p g1 a1 t2 0.7042571                   1   1    2   1    1

####################################################
# fit a model with p as a function of julian date (quadratinc)
#


mod.fit4 <-  RMark::mark(ybf.data,  ddl=ybf.ddl,
                         model="Occupancy",
                         model.parameters=list(
                           Psi   =list(formula=~1),
                           p     =list(formula=~jdateS+jdateSQ)
                         )
)

## 
## Output summary for Occupancy model
## Name : p(~jdateS + jdateSQ)Psi(~1) 
## 
## Npar :  4
## -2lnL:  813.0628
## AICc :  821.1333
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.5096022 0.2945507  0.9322828  2.0869215
## p:jdateS        -0.7217598 0.1984786 -1.1107779 -0.3327417
## p:jdateSQ       -0.4799866 0.0929934 -0.6622536 -0.2977195
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709354 -0.9509287
## 
## 
## Real Parameter p
##         1         2
##  0.693501 0.7152739
## 
## 
## Real Parameter Psi
##         1
##  0.238498

summary(mod.fit4)

## Output summary for Occupancy model
## Name : p(~jdateS + jdateSQ)Psi(~1) 
## 
## Npar :  4
## -2lnL:  813.0628
## AICc :  821.1333
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.5096022 0.2945507  0.9322828  2.0869215
## p:jdateS        -0.7217598 0.1984786 -1.1107779 -0.3327417
## p:jdateSQ       -0.4799866 0.0929934 -0.6622536 -0.2977195
## Psi:(Intercept) -1.1609321 0.1071446 -1.3709354 -0.9509287
## 
## 
## Real Parameter p
##         1         2
##  0.693501 0.7152739
## 
## 
## Real Parameter Psi
##         1
##  0.238498

# estimate of p at the "average" julian dates and average of the quadratic term - this is silly
get.real(mod.fit4, "p", se=TRUE)

##            all.diff.index par.index  estimate        se       lcl
## p g1 a0 t1              1         1 0.6935010 0.0427796 0.6039775
## p g1 a1 t2              2         2 0.7152739 0.0554979 0.5955663
##                  ucl fixed    note group age time Age Time
## p g1 a0 t1 0.7704778                   1   0    1   0    0
## p g1 a1 t2 0.8108052                   1   1    2   1    1

##############################################
# Collect models
model.set <- RMark::collect.models( type="Occupancy")
model.set

##                         model npar     AICc DeltaAICc       weight
## 4 p(~jdateS + jdateSQ)Psi(~1)    4 821.1333   0.00000 1.000000e+00
## 3           p(~jdateS)Psi(~1)    3 868.8622  47.72888 4.323208e-11
## 1                p(~1)Psi(~1)    2 870.5727  49.43942 1.838086e-11
## 2             p(~time)Psi(~1)    3 872.3437  51.21043 7.582233e-12
##        Deviance
## 4  8.130628e+02
## 3  8.628199e+02
## 1  2.501630e-01
## 2 -2.273737e-13

names(model.set)

## [1] "mod.fit"     "mod.fit2"    "mod.fit3"    "mod.fit4"    "model.table"

model.set$model.table

##                   p Psi                       model npar     AICc
## 4 ~jdateS + jdateSQ  ~1 p(~jdateS + jdateSQ)Psi(~1)    4 821.1333
## 3           ~jdateS  ~1           p(~jdateS)Psi(~1)    3 868.8622
## 1                ~1  ~1                p(~1)Psi(~1)    2 870.5727
## 2             ~time  ~1             p(~time)Psi(~1)    3 872.3437
##   DeltaAICc       weight      Deviance
## 4   0.00000 1.000000e+00  8.130628e+02
## 3  47.72888 4.323208e-11  8.628199e+02
## 1  49.43942 1.838086e-11  2.501630e-01
## 2  51.21043 7.582233e-12 -2.273737e-13

# model averaged values
get.real(mod.fit , "Psi", se=TRUE)

##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              3         2 0.2491541 0.0210497 0.2102088
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.2926412                   1   0    1   0    0

get.real(mod.fit2, "Psi", se=TRUE)

##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              3         3 0.2490413 0.0210354 0.2101226
##                   ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.292499                   1   0    1   0    0

get.real(mod.fit3, "Psi", se=TRUE)

##              all.diff.index par.index  estimate      se       lcl
## Psi g1 a0 t1              3         3 0.2486578 0.02095 0.2098932
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.2919366                   1   0    1   0    0

Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma

##              par.index estimate         se fixed    note group age time
## Psi g1 a0 t1         3 0.238498 0.01945924                   1   0    1
##              Age Time
## Psi g1 a0 t1   0    0

# It is often convenient to estimate parameters (such as p) for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.

# We create a data frame with the covariates etc as measured on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition

p.covariates <- input.history
p.covariates$freq <- NULL
p.covariates$ch   <- NULL

ybf.ddl$p

##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1

p.covariates <- merge(ybf.ddl$p, p.covariates)
head(p.covariates)

##   par.index model.index group age time Age Time jdate1 jdate2 jdateS1
## 1         1           1     1   0    1   0    0    139    173    -1.1
## 2         2           2     1   1    2   1    1    139    173    -1.1
## 3         1           1     1   0    1   0    0    128    148    -2.2
## 4         2           2     1   1    2   1    1    128    148    -2.2
## 5         1           1     1   0    1   0    0    130    147    -2.0
## 6         2           2     1   1    2   1    1    130    147    -2.0
##   jdateS2 jdateSQ1 jdateSQ2
## 1     2.3     1.21     5.29
## 2     2.3     1.21     5.29
## 3    -0.2     4.84     0.04
## 4    -0.2     4.84     0.04
## 5    -0.3     4.00     0.09
## 6    -0.3     4.00     0.09

# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
p.covariates$index <- p.covariates$model.index

p.pred <- covariate.predictions(model.set,
                                   data=p.covariates)

# Notice that there is problem in that if model.index =1, it used jdate1 as the predictor
# and if model.index==2, it used jdate2 as the predictor. There is no easy around thi!
head(p.pred$estimates)

##   vcv.index model.index par.index par.index.1 model.index.1 group age time
## 1         1           1         1           1             1     1   0    1
## 2         1           1         2           2             2     1   1    2
## 3         1           1         1           1             1     1   0    1
## 4         1           1         2           2             2     1   1    2
## 5         1           1         1           1             1     1   0    1
## 6         1           1         2           2             2     1   1    2
##   Age Time jdate1 jdate2 jdateS1 jdateS2 jdateSQ1 jdateSQ2 index
## 1   0    0    139    173    -1.1     2.3     1.21     5.29     1
## 2   1    1    139    173    -1.1     2.3     1.21     5.29     2
## 3   0    0    128    148    -2.2    -0.2     4.84     0.04     1
## 4   1    1    128    148    -2.2    -0.2     4.84     0.04     2
## 5   0    0    130    147    -2.0    -0.3     4.00     0.09     1
## 6   1    1    130    147    -2.0    -0.3     4.00     0.09     2
##     estimate         se        lcl       ucl fixed
## 1 0.84848325 0.03509381 0.76633279 0.9053203      
## 2 0.06359135 0.05233820 0.01198217 0.2755053      
## 3 0.68445868 0.04540351 0.58959613 0.7660932      
## 4 0.83681994 0.04011722 0.74248664 0.9011952      
## 5 0.73753105 0.04107208 0.64960830 0.8098496      
## 6 0.84329230 0.03878904 0.75168669 0.9053585

ggplot(data=NULL, aes( y=estimate))+
  ggtitle("Model averaged predictions of detection for each site")+
  geom_point(data=p.pred$estimates[ p.pred$estimates$index==2,], aes(x=jdate2))+
  geom_point(data=p.pred$estimates[ p.pred$estimates$index==1,], aes(x=jdate1))+
  geom_ribbon(data=p.pred$estimates[ p.pred$estimates$index==1,],aes(x=jdate1, ymin=lcl, ymax=ucl), alpha=0.2)+
  geom_ribbon(data=p.pred$estimates[ p.pred$estimates$index==2,],aes(x=jdate2, ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)

ybf.R

Neil

Sun Dec 02 21:06:51 2018