# Blue Ridge Salamander with some missing values
# Notice how we need to change any NA to . when creating the capture history

# Single Species Single Season Occupancy

# Fitting a single model

# 2018-08-15 Code contributed by Carl James Schwarz (cschwarz.stat.sfu.cs@gmail.com)
library(readxl)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- readxl::read_excel("../salamander.xls",
                                 sheet="MissingData",
                                 na="-",
                                 col_names=FALSE)  # notice no column names in row 1 of data file. 

head(input.data)
## # A tibble: 6 x 5
##    X__1  X__2  X__3  X__4  X__5
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  0     0        0  1.00  1.00
## 2  0     1.00    NA  0     0   
## 3  0    NA       NA  0     0   
## 4  1.00 NA       NA  1.00  0   
## 5  0    NA       NA  0     0   
## 6  0     0       NA  0     0
# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.data[,1:5],1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
##   freq      ch
## 1    1   00011
## 2    1  01NA00
## 3    1 0NANA00
## 4    1 1NANA10
## 5    1 0NANA00
## 6    1  00NA00
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
##   freq    ch
## 1    1 00011
## 2    1 01.00
## 3    1 0..00
## 4    1 1..10
## 5    1 0..00
## 6    1 00.00
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 39
sal.data <- process.data(data=input.history,
                          model="Occupancy")
summary(sal.data)
##                  Length Class      Mode     
## data              2     data.frame list     
## model             1     -none-     character
## mixtures          1     -none-     numeric  
## freq             39     -none-     numeric  
## nocc              1     -none-     numeric  
## nocc.secondary    0     -none-     NULL     
## time.intervals    5     -none-     numeric  
## begin.time        1     -none-     numeric  
## age.unit          1     -none-     numeric  
## initial.ages      1     -none-     numeric  
## group.covariates  0     -none-     NULL     
## nstrata           1     -none-     numeric  
## strata.labels     0     -none-     NULL     
## counts            0     -none-     NULL     
## reverse           1     -none-     logical  
## areas             0     -none-     NULL     
## events            0     -none-     NULL
# modify the ddl if needed (e.g. for site level covariates)
sal.ddl <- make.design.data(sal.data)
sal.ddl
## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
## 4         4           4     1   3    4   3    3
## 5         5           5     1   4    5   4    4
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           6     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p"   "Psi"
# Fit a model
# Note that formula DO NOT HAVE AN = SIGN
mod.fit <-  RMark::mark(sal.data, ddl=sal.ddl,
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~1),
                          p     =list(formula=~1)
                        )
                     )
## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  137.613
## AICc :  141.9463
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)   -0.9444715 0.3316785 -1.5945614 -0.2943816
## Psi:(Intercept) -0.0226466 0.4636927 -0.9314843  0.8861910
## 
## 
## Real Parameter p
##         1        2        3        4        5
##  0.279998 0.279998 0.279998 0.279998 0.279998
## 
## 
## Real Parameter Psi
##          1
##  0.4943386
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  137.613
## AICc :  141.9463
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)   -0.9444715 0.3316785 -1.5945614 -0.2943816
## Psi:(Intercept) -0.0226466 0.4636927 -0.9314843  0.8861910
## 
## 
## Real Parameter p
##         1        2        3        4        5
##  0.279998 0.279998 0.279998 0.279998 0.279998
## 
## 
## Real Parameter Psi
##          1
##  0.4943386
# Look the objects returned in more details
names(mod.fit)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
names(mod.fit$results)
##  [1] "lnl"              "deviance"         "deviance.df"     
##  [4] "npar"             "n"                "AICc"            
##  [7] "beta"             "real"             "beta.vcv"        
## [10] "derived"          "derived.vcv"      "covariate.values"
## [13] "singular"         "real.vcv"
# look at estimates on beta and original scale
mod.fit$results$beta  # on the logit scale
##                   estimate        se        lcl        ucl
## p:(Intercept)   -0.9444715 0.3316785 -1.5945614 -0.2943816
## Psi:(Intercept) -0.0226466 0.4636927 -0.9314843  0.8861910
mod.fit$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.2799980 0.0668661 0.1687431 0.4269315              
## Psi g1 a0 t1 0.4943386 0.1159083 0.2826237 0.7081035
# derived variabldes is the occupancy probability 
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
##    estimate        se       lcl       ucl
## 1 0.4943386 0.1159083 0.2826237 0.7081035
# alternatively
get.real(mod.fit, "Psi", se=TRUE)
##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              6         2 0.4943386 0.1159083 0.2826237
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.7081035                   1   0    1   0    0
get.real(mod.fit, "Psi", pim=TRUE)
##          1
##  0.4943386
get.real(mod.fit, "p", se=TRUE)
##            all.diff.index par.index estimate        se       lcl       ucl
## p g1 a0 t1              1         1 0.279998 0.0668661 0.1687431 0.4269315
## p g1 a1 t2              2         1 0.279998 0.0668661 0.1687431 0.4269315
## p g1 a2 t3              3         1 0.279998 0.0668661 0.1687431 0.4269315
## p g1 a3 t4              4         1 0.279998 0.0668661 0.1687431 0.4269315
## p g1 a4 t5              5         1 0.279998 0.0668661 0.1687431 0.4269315
##            fixed    note group age time Age Time
## p g1 a0 t1                   1   0    1   0    0
## p g1 a1 t2                   1   1    2   1    1
## p g1 a2 t3                   1   2    3   2    2
## p g1 a3 t4                   1   3    4   3    3
## p g1 a4 t5                   1   4    5   4    4
# cleanup
cleanup()
## Delete file mark001.inp (y/n)[y]?
## Delete file mark001.out (y/n)[y]?
## Delete file mark001.res (y/n)[y]?
## Delete file mark001.vcv (y/n)[y]?