# Single Species Single Season Occupancy models using RMark 

# Blue Gross Beaks.
#Downloaded from https://sites.google.com/site/asrworkshop/home/schedule/r-occupancy-1

#An occupancy study was made on Blue Grosbeaks (Guiraca caerulea) 
# on 41 old fields planted to longleaf pines (Pinus palustris) 
# in southern Georgia, USA. 

# Surveys were 500 m transects across each field 
# and were completed three times during the breeding season in 2001.

# Columns in the file are:
#    field - field number
#    v1, v2, v3 -  detection histories for each site on each of 3 visit during the 2001 breeding season.    
#    field.size - size of the files
#    bqi - Enrollment in bobwihte quail initiative; does occupancy increase if field belongs to this initiative?
#    crop.hist - crop history
#    crop1, crop2 - indicator variables for the crop history
#    count1, count2, count3 - are actual counts of birds detected in each visit


# 2018-08-15 Code contributed by Carl James Schwarz (cschwarz.stat.sfu.cs@gmail.com)
#  RMark package
library(readxl)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)
# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- read.csv(file.path("..","blgr.csv"), 
                       header=TRUE, as.is=TRUE, strip.white=TRUE) 
head(input.data)
##   field v1 v2 v3 field.size bqi crop.hist crop1 crop2 count1 count2 count3
## 1     1  1  1  1       14.0   y      crop     1     0      1      2      2
## 2     2  1  1  0       12.7   y      crop     1     0      2      2      0
## 3     3  0  0  0       15.7   n     mixed     0     1      0      0      0
## 4     4  0  1  0       19.5   n     mixed     0     1      0      2      0
## 5     5  1  0  1       13.5   n      crop     1     0      1      0      1
## 6     6  0  0  1        9.6   n     mixed     0     1      0      0      2
##    X     logFS
## 1 NA 1.1461280
## 2 NA 1.1038037
## 3 NA 1.1958997
## 4 NA 1.2900346
## 5 NA 1.1303338
## 6 NA 0.9822712
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.data)
## [1] 41
range(input.data[, c("v1","v2","v3")], na.rm=TRUE)
## [1] 0 1
sum(is.na(input.data[, c("v1","v2","v3")]))
## [1] 0
# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.data[,c("v1","v2","v3")],1,paste, collapse=""), 
                            stringsAsFactors=FALSE)
head(input.history)
##   freq  ch
## 1    1 111
## 2    1 110
## 3    1 000
## 4    1 010
## 5    1 101
## 6    1 001
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
##   freq  ch
## 1    1 111
## 2    1 110
## 3    1 000
## 4    1 010
## 5    1 101
## 6    1 001
#Add remaining columns (field.size, crop.hist, etc) to capture history
#These rows are contained in the last 10 columns
input.history = cbind(input.history, input.data[,5:14])
#Remove empty column
input.history$X <- NULL

#Create the data structure
grossbeak.data <- process.data(data = input.history,
                               model = "Occupancy")
summary(grossbeak.data)
##                  Length Class      Mode     
## data             11     data.frame list     
## model             1     -none-     character
## mixtures          1     -none-     numeric  
## freq             41     -none-     numeric  
## nocc              1     -none-     numeric  
## nocc.secondary    0     -none-     NULL     
## time.intervals    3     -none-     numeric  
## begin.time        1     -none-     numeric  
## age.unit          1     -none-     numeric  
## initial.ages      1     -none-     numeric  
## group.covariates  0     -none-     NULL     
## nstrata           1     -none-     numeric  
## strata.labels     0     -none-     NULL     
## counts            0     -none-     NULL     
## reverse           1     -none-     logical  
## areas             0     -none-     NULL     
## events            0     -none-     NULL
# If survey covariates are present, modify the ddl
grossbeak.ddl <- make.design.data(grossbeak.data)
grossbeak.ddl
## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p"   "Psi"
# Fit a model
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
mod.fit <-  RMark::mark(grossbeak.data, ddl = grossbeak.ddl,
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~1),
                          p     =list(formula=~1) 
                        )
)
## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  168.1898
## AICc :  172.5055
## 
## Beta
##                  estimate        se        lcl       ucl
## p:(Intercept)   0.2059922 0.2420584 -0.2684423 0.6804266
## Psi:(Intercept) 2.0386894 0.7514126  0.5659206 3.5114581
## 
## 
## Real Parameter p
##          1         2         3
##  0.5513167 0.5513167 0.5513167
## 
## 
## Real Parameter Psi
##          1
##  0.8847997
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  168.1898
## AICc :  172.5055
## 
## Beta
##                  estimate        se        lcl       ucl
## p:(Intercept)   0.2059922 0.2420584 -0.2684423 0.6804266
## Psi:(Intercept) 2.0386894 0.7514126  0.5659206 3.5114581
## 
## 
## Real Parameter p
##          1         2         3
##  0.5513167 0.5513167 0.5513167
## 
## 
## Real Parameter Psi
##          1
##  0.8847997
# Look the objects returned in more details
names(mod.fit)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit$results$beta  # on the logit scale
##                  estimate        se        lcl       ucl
## p:(Intercept)   0.2059922 0.2420584 -0.2684423 0.6804266
## Psi:(Intercept) 2.0386894 0.7514126  0.5659206 3.5114581
mod.fit$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.5513167 0.0598772 0.4332895 0.6638339              
## Psi g1 a0 t1 0.8847997 0.0765909 0.6378213 0.9710120
# derived variables is the occupancy probability 
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
##    estimate         se       lcl      ucl
## 1 0.8847997 0.07659085 0.6378213 0.971012
# Model where p(t) varies across survey occasions
# 

mod.fit.pt <-  RMark::mark(grossbeak.data, ddl = grossbeak.ddl,
                           model="Occupancy",
                           model.parameters=list(
                             Psi   =list(formula=~1),
                             p     =list(formula=~time) 
                           )
)
## 
## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  4
## -2lnL:  167.295
## AICc :  176.4061
## 
## Beta
##                   estimate        se        lcl       ucl
## p:(Intercept)   -0.0105659 0.3566283 -0.7095573 0.6884255
## p:time2          0.4489868 0.4772540 -0.4864311 1.3844047
## p:time3          0.2218307 0.4717451 -0.7027898 1.1464512
## Psi:(Intercept)  2.0183670 0.7358610  0.5760795 3.4606545
## 
## 
## Real Parameter p
##          1         2         3
##  0.4973586 0.6078827 0.5526206
## 
## 
## Real Parameter Psi
##          1
##  0.8827121
summary(mod.fit.pt)
## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  4
## -2lnL:  167.295
## AICc :  176.4061
## 
## Beta
##                   estimate        se        lcl       ucl
## p:(Intercept)   -0.0105659 0.3566283 -0.7095573 0.6884255
## p:time2          0.4489868 0.4772540 -0.4864311 1.3844047
## p:time3          0.2218307 0.4717451 -0.7027898 1.1464512
## Psi:(Intercept)  2.0183670 0.7358610  0.5760795 3.4606545
## 
## 
## Real Parameter p
##          1         2         3
##  0.4973586 0.6078827 0.5526206
## 
## 
## Real Parameter Psi
##          1
##  0.8827121
# Look the objects returned in more details
names(mod.fit.pt)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit.pt$results$beta  # on the logit scale
##                   estimate        se        lcl       ucl
## p:(Intercept)   -0.0105659 0.3566283 -0.7095573 0.6884255
## p:time2          0.4489868 0.4772540 -0.4864311 1.3844047
## p:time3          0.2218307 0.4717451 -0.7027898 1.1464512
## Psi:(Intercept)  2.0183670 0.7358610  0.5760795 3.4606545
mod.fit.pt$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.4973586 0.0891546 0.3296967 0.6656166              
## p g1 a1 t2   0.6078827 0.0902286 0.4246993 0.7650113              
## p g1 a2 t3   0.5526206 0.0900910 0.3768454 0.7161592              
## Psi g1 a0 t1 0.8827121 0.0761848 0.6401648 0.9695473
# derived variables is the occupancy probability 
names(mod.fit.pt$results$derived)
## [1] "Occupancy"
mod.fit.pt$results$derived$Occupancy
##    estimate         se       lcl       ucl
## 1 0.8827121 0.07618478 0.6401648 0.9695473
# Model averaging

# collect models and make AICc table

model.set <- RMark::collect.models( type="Occupancy")
model.set
##             model npar     AICc DeltaAICc    weight Deviance
## 1    p(~1)Psi(~1)    2 172.5055  0.000000 0.8754794 4.363463
## 2 p(~time)Psi(~1)    4 176.4061  3.900602 0.1245206 3.468741
names(model.set)
## [1] "mod.fit"     "mod.fit.pt"  "model.table"
model.set$model.table
##       p Psi           model npar     AICc DeltaAICc    weight Deviance
## 1    ~1  ~1    p(~1)Psi(~1)    2 172.5055  0.000000 0.8754794 4.363463
## 2 ~time  ~1 p(~time)Psi(~1)    4 176.4061  3.900602 0.1245206 3.468741
# model averaged values
Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma
##              par.index  estimate         se fixed    note group age time
## Psi g1 a0 t1         4 0.8845398 0.07654351                   1   0    1
##              Age Time
## Psi g1 a0 t1   0    0
p.ma <- RMark::model.average(model.set, param="p")
p.ma
##            par.index  estimate         se fixed    note group age time Age
## p g1 a0 t1         1 0.5445978 0.06667823                   1   0    1   0
## p g1 a1 t2         2 0.5583603 0.06709246                   1   1    2   1
## p g1 a2 t3         3 0.5514791 0.06441799                   1   2    3   2
##            Time
## p g1 a0 t1    0
## p g1 a1 t2    1
## p g1 a2 t3    2
#cleanup
cleanup(ask=FALSE)