# Single Species Single Season Occupancy 

# Salamander Example with model averaging with individual model fits
library(readxl)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- readxl::read_excel("../salamander.xls",
                                 sheet="CompleteData",
                                 col_names=FALSE)  # notice no column names in row 1 of data file. 

head(input.data)
## # A tibble: 6 x 5
##    X__1  X__2  X__3  X__4  X__5
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  0     0     0     1.00  1.00
## 2  0     1.00  0     0     0   
## 3  0     1.00  0     0     0   
## 4  1.00  1.00  1.00  1.00  0   
## 5  0     0     1.00  0     0   
## 6  0     0     1.00  0     0
# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.data[,1:5],1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
##   freq    ch
## 1    1 00011
## 2    1 01000
## 3    1 01000
## 4    1 11110
## 5    1 00100
## 6    1 00100
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
##   freq    ch
## 1    1 00011
## 2    1 01000
## 3    1 01000
## 4    1 11110
## 5    1 00100
## 6    1 00100
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 39
sal.data <- process.data(data=input.history,
                          model="Occupancy")

summary(sal.data)
##                  Length Class      Mode     
## data              2     data.frame list     
## model             1     -none-     character
## mixtures          1     -none-     numeric  
## freq             39     -none-     numeric  
## nocc              1     -none-     numeric  
## nocc.secondary    0     -none-     NULL     
## time.intervals    5     -none-     numeric  
## begin.time        1     -none-     numeric  
## age.unit          1     -none-     numeric  
## initial.ages      1     -none-     numeric  
## group.covariates  0     -none-     NULL     
## nstrata           1     -none-     numeric  
## strata.labels     0     -none-     NULL     
## counts            0     -none-     NULL     
## reverse           1     -none-     logical  
## areas             0     -none-     NULL     
## events            0     -none-     NULL
# Fit some models.
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
mod.fit <-  RMark::mark(sal.data,
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~1),
                          p     =list(formula=~1) 
                        )
)
## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  161.7586
## AICc :  166.0919
## 
## Beta
##                   estimate        se       lcl       ucl
## p:(Intercept)   -1.0525846 0.3008626 -1.642275 -0.462894
## Psi:(Intercept)  0.3831083 0.5086093 -0.613766  1.379982
## 
## 
## Real Parameter p
##          1         2         3         4         5
##  0.2587291 0.2587291 0.2587291 0.2587291 0.2587291
## 
## 
## Real Parameter Psi
##          1
##  0.5946226
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  161.7586
## AICc :  166.0919
## 
## Beta
##                   estimate        se       lcl       ucl
## p:(Intercept)   -1.0525846 0.3008626 -1.642275 -0.462894
## Psi:(Intercept)  0.3831083 0.5086093 -0.613766  1.379982
## 
## 
## Real Parameter p
##          1         2         3         4         5
##  0.2587291 0.2587291 0.2587291 0.2587291 0.2587291
## 
## 
## Real Parameter Psi
##          1
##  0.5946226
# Look the objects returned in more details
names(mod.fit)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit$results$beta  # on the logit scale
##                   estimate        se       lcl       ucl
## p:(Intercept)   -1.0525846 0.3008626 -1.642275 -0.462894
## Psi:(Intercept)  0.3831083 0.5086093 -0.613766  1.379982
mod.fit$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.2587291 0.0577019 0.1621557 0.3862995              
## Psi g1 a0 t1 0.5946226 0.1225985 0.3512006 0.7989882
# derived variables is the occupancy probability 
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
##    estimate        se       lcl       ucl
## 1 0.5946226 0.1225985 0.3512006 0.7989882
# Model where p(t) varies across survey occasions
# 
mod.fit.pt <-  RMark::mark(sal.data,
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~1),
                          p     =list(formula=~time) 
                        )
)
## 
## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  6
## -2lnL:  155.7144
## AICc :  170.3394
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)   -1.5376878 0.5804906 -2.6754493 -0.3999263
## p:time2         -0.3400079 0.8295295 -1.9658857  1.2858698
## p:time3          1.1237205 0.7019696 -0.2521400  2.4995810
## p:time4          0.9350626 0.7068460 -0.4503556  2.3204809
## p:time5          0.5191253 0.7287340 -0.9091933  1.9474439
## Psi:(Intercept)  0.3222752 0.4825848 -0.6235910  1.2681413
## 
## 
## Real Parameter p
##          1         2         3         4         5
##  0.1768717 0.1326538 0.3979612 0.3537433 0.2653075
## 
## 
## Real Parameter Psi
##          1
##  0.5798786
summary(mod.fit.pt)
## Output summary for Occupancy model
## Name : p(~time)Psi(~1) 
## 
## Npar :  6
## -2lnL:  155.7144
## AICc :  170.3394
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)   -1.5376878 0.5804906 -2.6754493 -0.3999263
## p:time2         -0.3400079 0.8295295 -1.9658857  1.2858698
## p:time3          1.1237205 0.7019696 -0.2521400  2.4995810
## p:time4          0.9350626 0.7068460 -0.4503556  2.3204809
## p:time5          0.5191253 0.7287340 -0.9091933  1.9474439
## Psi:(Intercept)  0.3222752 0.4825848 -0.6235910  1.2681413
## 
## 
## Real Parameter p
##          1         2         3         4         5
##  0.1768717 0.1326538 0.3979612 0.3537433 0.2653075
## 
## 
## Real Parameter Psi
##          1
##  0.5798786
# Look the objects returned in more details
names(mod.fit.pt)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit.pt$results$beta  # on the logit scale
##                   estimate        se        lcl        ucl
## p:(Intercept)   -1.5376878 0.5804906 -2.6754493 -0.3999263
## p:time2         -0.3400079 0.8295295 -1.9658857  1.2858698
## p:time3          1.1237205 0.7019696 -0.2521400  2.4995810
## p:time4          0.9350626 0.7068460 -0.4503556  2.3204809
## p:time5          0.5191253 0.7287340 -0.9091933  1.9474439
## Psi:(Intercept)  0.3222752 0.4825848 -0.6235910  1.2681413
mod.fit.pt$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.1768717 0.0845125 0.0644377 0.4013301              
## p g1 a1 t2   0.1326538 0.0740540 0.0415185 0.3506507              
## p g1 a2 t3   0.3979612 0.1190041 0.1998064 0.6363531              
## p g1 a3 t4   0.3537433 0.1136999 0.1711581 0.5919885              
## p g1 a4 t5   0.2653075 0.1010181 0.1156440 0.4993046              
## Psi g1 a0 t1 0.5798786 0.1175670 0.3489652 0.7804244
# derived variables is the occupancy probability 
names(mod.fit.pt$results$derived)
## [1] "Occupancy"
mod.fit.pt$results$derived$Occupancy
##    estimate       se       lcl       ucl
## 1 0.5798786 0.117567 0.3489652 0.7804244
# Fit a model with the detection probability equal in first 2 occasions and last 3 occasions.
# We need to define a survey covariate that has two levels 
# add a survey level covariate. by adding a column to the design matrix
sal.ddl <- make.design.data(sal.data)
sal.ddl
## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
## 4         4           4     1   3    4   3    3
## 5         5           5     1   4    5   4    4
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           6     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
sal.ddl$p$Effort <- factor(c("d1","d1","d2","d2","d2"))
sal.ddl
## $p
##   par.index model.index group age time Age Time Effort
## 1         1           1     1   0    1   0    0     d1
## 2         2           2     1   1    2   1    1     d1
## 3         3           3     1   2    3   2    2     d2
## 4         4           4     1   3    4   3    3     d2
## 5         5           5     1   4    5   4    4     d2
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           6     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
mod.fit.pcustom <-  RMark::mark(sal.data, ddl = sal.ddl,
                                model="Occupancy",
                                model.parameters=list(
                                  Psi   =list(formula=~1),
                                  p     =list(formula=~Effort) 
                                )
)
## 
## Output summary for Occupancy model
## Name : p(~Effort)Psi(~1) 
## 
## Npar :  3
## -2lnL:  156.8176
## AICc :  163.5033
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)   -1.7043682 0.4484045 -2.5832411 -0.8254953
## p:Effortd2       1.0281447 0.4867145  0.0741843  1.9821051
## Psi:(Intercept)  0.3357009 0.4881035 -0.6209820  1.2923838
## 
## 
## Real Parameter p
##          1         2         3         4         5
##  0.1538956 0.1538956 0.3371047 0.3371047 0.3371047
## 
## 
## Real Parameter Psi
##          1
##  0.5831458
summary(mod.fit.pcustom)
## Output summary for Occupancy model
## Name : p(~Effort)Psi(~1) 
## 
## Npar :  3
## -2lnL:  156.8176
## AICc :  163.5033
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)   -1.7043682 0.4484045 -2.5832411 -0.8254953
## p:Effortd2       1.0281447 0.4867145  0.0741843  1.9821051
## Psi:(Intercept)  0.3357009 0.4881035 -0.6209820  1.2923838
## 
## 
## Real Parameter p
##          1         2         3         4         5
##  0.1538956 0.1538956 0.3371047 0.3371047 0.3371047
## 
## 
## Real Parameter Psi
##          1
##  0.5831458
# Look the objects returned in more details
names(mod.fit.pcustom)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit.pcustom$results$beta  # on the logit scale
##                   estimate        se        lcl        ucl
## p:(Intercept)   -1.7043682 0.4484045 -2.5832411 -0.8254953
## p:Effortd2       1.0281447 0.4867145  0.0741843  1.9821051
## Psi:(Intercept)  0.3357009 0.4881035 -0.6209820  1.2923838
mod.fit.pcustom$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.1538956 0.0583875 0.0702248 0.3045984              
## p g1 a2 t3   0.3371047 0.0767915 0.2059100 0.4993277              
## Psi g1 a0 t1 0.5831458 0.1186515 0.3495581 0.7845504
# derived variables is the occupancy probability 
names(mod.fit.pcustom$results$derived)
## [1] "Occupancy"
mod.fit.pcustom$results$derived$Occupancy
##    estimate        se       lcl       ucl
## 1 0.5831458 0.1186515 0.3495581 0.7845504
# Model averaging
model.set <- RMark::collect.models( type="Occupancy")
model.set
##               model npar     AICc DeltaAICc    weight Deviance
## 2 p(~Effort)Psi(~1)    3 163.5033  0.000000 0.7651935 26.11443
## 1      p(~1)Psi(~1)    2 166.0919  2.588649 0.2097265 31.05546
## 3   p(~time)Psi(~1)    6 170.3394  6.836116 0.0250800 25.01126
Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma
##              par.index  estimate        se fixed    note group age time
## Psi g1 a0 t1         6 0.5854709 0.1195573                   1   0    1
##              Age Time
## Psi g1 a0 t1   0    0
# cleanup
cleanup(ask=FALSE)