# Single Species Single Season Occupancy models using unmarked packages
# Missing data - notice how the missing data are read in.
#  unmarked package

library(readxl)
library(unmarked)
## Loading required package: reshape
## Loading required package: lattice
## Loading required package: parallel
## Loading required package: Rcpp
library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- readxl::read_excel("../salamander.xls",
                                 sheet="MissingData",
                                 na="-",
                                 col_names=FALSE)  # notice no column names in row 1 of data file. 

head(input.data)
## # A tibble: 6 x 5
##    X__1  X__2  X__3  X__4  X__5
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     0     0     0     1     1
## 2     0     1    NA     0     0
## 3     0    NA    NA     0     0
## 4     1    NA    NA     1     0
## 5     0    NA    NA     0     0
## 6     0     0    NA     0     0
# Extract the history records
input.history <- input.data[, 1:5] # the history extracted
head(input.history)
## # A tibble: 6 x 5
##    X__1  X__2  X__3  X__4  X__5
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     0     0     0     1     1
## 2     0     1    NA     0     0
## 3     0    NA    NA     0     0
## 4     1    NA    NA     1     0
## 5     0    NA    NA     0     0
## 6     0     0    NA     0     0
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 39
ncol(input.history)
## [1] 5
range(input.history, na.rm=TRUE) # check that all values are either 0 or 1
## [1] 0 1
sum(is.na(input.history))    # are there any missing values?
## [1] 14
# Create the data structore for the occupancy model
# unmarked does not have any reserved keywords like RPresence so 
# we need to construct a covariate data frame with the covariates
# again stacked. The ordering is different in unmarked.
# Here you want for site 1, the visit specific covariates, then
# for site 2, etc.
# This needs to be added to the unmarked dataframe.

# We also want to set up covariate values where we
# set the detection probability equal in first 2 occasions and last 3 occasions.
# We need to define a survey covariate that has two levels 
obs.covar <- data.frame( Site=rep(1:nrow(input.history), each=ncol(input.history)),
                         Visit=rep(1:ncol(input.history), nrow(input.history)),
                         Time =rep(c("T1","T2","T3","T4","T5"), nrow(input.history)),
                         D =rep(c("D1","D1","D2","D2","D2"), nrow(input.history)))
obs.covar[1:10,]
##    Site Visit Time  D
## 1     1     1   T1 D1
## 2     1     2   T2 D1
## 3     1     3   T3 D2
## 4     1     4   T4 D2
## 5     1     5   T5 D2
## 6     2     1   T1 D1
## 7     2     2   T2 D1
## 8     2     3   T3 D2
## 9     2     4   T4 D2
## 10    2     5   T5 D2
salamander.UMF <- unmarked::unmarkedFrameOccu(
                     y = input.history,
                     obsCovs=obs.covar) 
summary(salamander.UMF)
## unmarkedFrame Object
## 
## 39 sites
## Maximum number of observations per site: 5 
## Mean number of observations per site: 4.64 
## Sites with at least one detection: 15 
## 
## Tabulation of y observations:
##    0    1 <NA> 
##  156   25   14 
## 
## Observation-level covariates:
##       Site        Visit   Time     D      
##  Min.   : 1   Min.   :1   T1:39   D1: 78  
##  1st Qu.:10   1st Qu.:2   T2:39   D2:117  
##  Median :20   Median :3   T3:39           
##  Mean   :20   Mean   :3   T4:39           
##  3rd Qu.:30   3rd Qu.:4   T5:39           
##  Max.   :39   Max.   :5
plot(salamander.UMF)

# Fit some models.
# Note that formula DO NOT HAVE AN = SIGN
# The two formula are for detecton and occupancy in that order
mod.pdot.u <- unmarked::occu(~1 ~1 , data=salamander.UMF, se=TRUE)

# This creates a complicated S4 object with SLOTS that can be accessed using various
# functions. This is more complcated than with RPresence
slotNames(mod.pdot.u)
##  [1] "knownOcc"         "fitType"          "call"            
##  [4] "formula"          "data"             "sitesRemoved"    
##  [7] "estimates"        "AIC"              "opt"             
## [10] "negLogLike"       "nllFun"           "bootstrapSamples"
## [13] "covMatBS"
# These functions return estimates on the LOGIT scale - not very useful for most people
coef(mod.pdot.u, type="state")
##    psi(Int) 
## -0.02264653
SE(mod.pdot.u, type="state")
##  psi(Int) 
## 0.4636931
confint(mod.pdot.u, type="state")
##               0.025     0.975
## psi(Int) -0.9314683 0.8861753
# It is possible to use backTransform() but only if NO covariates
backTransform(mod.pdot.u, type='state')
## Backtransformed linear combination(s) of Occupancy estimate(s)
## 
##  Estimate    SE LinComb (Intercept)
##     0.494 0.116 -0.0226           1
## 
## Transformation: logistic
# better to use predict() to do the estimation.
# Because there are no covariates here, the data.frame is empty
newdata <- data.frame(factor=1:1)
predict(mod.pdot.u, type='state', newdata=newdata)
##   Predicted        SE     lower     upper
## 1 0.4943386 0.1159084 0.2826269 0.7081003
# Ditto for the detection probability
coef(mod.pdot.u,    type="det")
##     p(Int) 
## -0.9444735
SE(mod.pdot.u,      type="det")
##    p(Int) 
## 0.3316786
confint(mod.pdot.u, type="det")
##            0.025      0.975
## p(Int) -1.594552 -0.2943953
backTransform(mod.pdot.u, type="det")
## Backtransformed linear combination(s) of Detection estimate(s)
## 
##  Estimate     SE LinComb (Intercept)
##      0.28 0.0669  -0.944           1
## 
## Transformation: logistic
predict(mod.pdot.u, type='det', newdata=newdata)
##   Predicted         SE     lower     upper
## 1 0.2799976 0.06686607 0.1687445 0.4269282
# Look at the posterior probability of detection
# The best unbiased predictor of the posterior probability of detection
bup(ranef(mod.pdot.u))
##  [1] 1.0000000 1.0000000 0.2673423 1.0000000 0.2673423 0.2080616 1.0000000
##  [8] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [15] 0.2080616 0.1590719 0.1590719 0.1590719 0.1590719 0.1590719 0.2080616
## [22] 0.1590719 0.1590719 0.1590719 0.1590719 0.2080616 0.1590719 0.1590719
## [29] 0.1590719 0.1590719 0.1590719 0.2080616 0.1590719 0.1590719 0.1590719
## [36] 1.0000000 1.0000000 1.0000000 1.0000000
# Model where p(t) varies across survey occasions
# 

# Note that formula DO NOT HAVE AN = SIGN
# The two formula are for detecton and occupancy in that order
mod.pt.u <- unmarked::occu(~Time ~1 , data=salamander.UMF, se=TRUE)

# This creates a complicated S4 object with SLOTS that can be accessed using various
# functions. This is more complcated than with RPresence

# These functions return estimates on the LOGIT scale - not very useful for most people
# better to use predict() to do the estimation.
# Because there are no covariates here, the data.frame is empty
newdata <- data.frame(Time=c("T1","T2","T3","T4","T5"))
predict(mod.pt.u, type='state', newdata=newdata)[1,]
##   Predicted        SE     lower     upper
## 1 0.4751652 0.1086698 0.2781848 0.6801893
# Ditto for the detection probability
cbind(newdata, predict(mod.pt.u, type='det', newdata=newdata))
##   Time  Predicted        SE       lower     upper
## 1   T1 0.21585341 0.1012475 0.078536661 0.4706350
## 2   T2 0.05841593 0.0573941 0.007961663 0.3241366
## 3   T3 0.40747291 0.1443568 0.175622901 0.6894276
## 4   T4 0.43652187 0.1331344 0.211460634 0.6911633
## 5   T5 0.34951631 0.1270821 0.152295821 0.6164187
# Look at the posterior probability of detection
# The best unbiased predictor of the posterior probability of detection
bup(ranef(mod.pt.u))
##  [1] 1.0000000 1.0000000 0.2064848 1.0000000 0.2064848 0.1967965 1.0000000
##  [8] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [15] 0.1824617 0.1267731 0.1267731 0.1267731 0.1267731 0.1267731 0.1967965
## [22] 0.1267731 0.1267731 0.1267731 0.1267731 0.2048636 0.1267731 0.1267731
## [29] 0.1267731 0.1267731 0.1267731 0.1824617 0.1267731 0.1267731 0.1267731
## [36] 1.0000000 1.0000000 1.0000000 1.0000000
sum(bup(ranef(mod.pt.u)))
## [1] 18.53149
# Note that formula DO NOT HAVE AN = SIGN
# The two formula are for detecton and occupancy in that order
mod.pcustom.u <- unmarked::occu(~D ~1 , data=salamander.UMF, se=TRUE)

# This creates a complicated S4 object with SLOTS that can be accessed using various
# functions. This is more complcated than with RPresence

# These functions return estimates on the LOGIT scale - not very useful for most people
# better to use predict() to do the estimation.
# Because there are no covariates here, the data.frame is empty
newdata <- data.frame(D=c("D1","D1","D2","D2","D2"))
predict(mod.pcustom.u, type='state', newdata=newdata)[1,]
##   Predicted        SE     lower     upper
## 1 0.4782833 0.1097203 0.2791387 0.6845789
# Ditto for the detection probability
cbind(newdata, predict(mod.pcustom.u, type='det', newdata=newdata))
##    D Predicted         SE      lower     upper
## 1 D1 0.1394318 0.06178666 0.05576335 0.3077255
## 2 D1 0.1394318 0.06178666 0.05576335 0.3077255
## 3 D2 0.3959021 0.09213979 0.23547288 0.5823732
## 4 D2 0.3959021 0.09213979 0.23547288 0.5823732
## 5 D2 0.3959021 0.09213979 0.23547288 0.5823732
# Look at the posterior probability of detection
# The best unbiased predictor of the posterior probability of detection
bup(ranef(mod.pcustom.u))
##  [1] 1.0000000 1.0000000 0.2235457 1.0000000 0.2235457 0.1985655 1.0000000
##  [8] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [15] 0.1985655 0.1301873 0.1301873 0.1301873 0.1301873 0.1301873 0.1985655
## [22] 0.1301873 0.1301873 0.1301873 0.1301873 0.1985655 0.1301873 0.1301873
## [29] 0.1301873 0.1301873 0.1301873 0.1985655 0.1301873 0.1301873 0.1301873
## [36] 1.0000000 1.0000000 1.0000000 1.0000000
sum(bup(ranef(mod.pcustom.u)))
## [1] 18.6531
# Model averaging
models.u <-unmarked::fitList(mod.pdot.u, mod.pt.u, mod.pcustom.u)
## Warning in unmarked::fitList(mod.pdot.u, mod.pt.u, mod.pcustom.u): Your
## list was unnamed, so model names were added as object names
aic.table.u <- unmarked::modSel(models.u)
aic.table.u
##               nPars    AIC delta AICwt cumltvWt
## mod.pcustom.u     3 136.53  0.00 0.812     0.81
## mod.pt.u          6 140.29  3.76 0.124     0.94
## mod.pdot.u        2 141.61  5.08 0.064     1.00
# Get model averaged estimates of occupancy
predict(models.u, type="state")[1,]
##   Predicted        SE     lower    upper
## 1  0.478925 0.1100612 0.2792439 0.685541
# Get model averaged estimates of detection. 
# Notice these are one big list of nsites x nvisits long
# with the detection probabilities for each site
cbind(obs.covar, predict(models.u, type="det"))[1:10,]
##    Site Visit Time  D Predicted         SE      lower     upper
## 1     1     1   T1 D1 0.1578838 0.07572345 0.06581161 0.3355123
## 2     1     2   T2 D1 0.1384026 0.07239700 0.05707871 0.3173846
## 3     1     3   T3 D2 0.3899165 0.10122755 0.22379675 0.5856722
## 4     1     4   T4 D2 0.3935110 0.10061047 0.22823129 0.5858870
## 5     1     5   T5 D2 0.3827450 0.09970000 0.22091027 0.5766382
## 6     2     1   T1 D1 0.1578838 0.07572345 0.06581161 0.3355123
## 7     2     2   T2 D1 0.1384026 0.07239700 0.05707871 0.3173846
## 8     2     3   T3 D2 0.3899165 0.10122755 0.22379675 0.5856722
## 9     2     4   T4 D2 0.3935110 0.10061047 0.22823129 0.5858870
## 10    2     5   T5 D2 0.3827450 0.09970000 0.22091027 0.5766382