# Example using model averaging on the pronghorn dataset
# 256 sites, two sampling occasions per site
# Covariates used in this example:
# 1. sagebrush (continuos) - Sagebrush density
# 2. aspect (Categorical) - Compass direction slope faces (N,S,E,W)

# Code contributed by Neil Faught - 26/11/2018

#  RMark package

library(readxl)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data<-read.csv(file.path("..","pronghorn.csv"), header=TRUE, as.is=TRUE, strip.white=TRUE)
head(input.data)
##   Plot Survey.1 Survey.2 sagebrush slope  DW aspect
## 1    1        0        0       9.0     0  25      W
## 2    2        0        1      18.0     5 150      S
## 3    3        0        0       8.4    45 150      W
## 4    4        0        0       3.2    65 375      E
## 5    5        0        1      12.0     5 375      S
## 6    6        1        1       7.8     5 150      S
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.data)
## [1] 256
range(input.data[, c("Survey.1","Survey.2")], na.rm=TRUE)
## [1] 0 1
sum(is.na(input.data[, c("Survey.1","Survey.2")]))
## [1] 0
input.history <- input.data[, c("Survey.1","Survey.2")]
head(input.history)
##   Survey.1 Survey.2
## 1        0        0
## 2        0        1
## 3        0        0
## 4        0        0
## 5        0        1
## 6        1        1
site.covar <- input.data[, c("sagebrush","aspect")]
head(site.covar)
##   sagebrush aspect
## 1       9.0      W
## 2      18.0      S
## 3       8.4      W
## 4       3.2      E
## 5      12.0      S
## 6       7.8      S
#Convert aspect to a factor
site.covar$aspect = as.factor(site.covar$aspect)

# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.history,1,paste, collapse=""), 
                            stringsAsFactors=FALSE)
head(input.history)
##   freq ch
## 1    1 00
## 2    1 01
## 3    1 00
## 4    1 00
## 5    1 01
## 6    1 11
# Change any NA to . in the capture history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
##   freq ch
## 1    1 00
## 2    1 01
## 3    1 00
## 4    1 00
## 5    1 01
## 6    1 11
#Add aspect and sagebrush data to the capture history
input.history = cbind(input.history, site.covar)
head(input.history)
##   freq ch sagebrush aspect
## 1    1 00       9.0      W
## 2    1 01      18.0      S
## 3    1 00       8.4      W
## 4    1 00       3.2      E
## 5    1 01      12.0      S
## 6    1 11       7.8      S
#Create the data structure
pronghorn.data <- process.data(data = input.history,
                               group="aspect",
                               model = "Occupancy")
summary(pronghorn.data)
##                  Length Class      Mode     
## data             5      data.frame list     
## model            1      -none-     character
## mixtures         1      -none-     numeric  
## freq             4      data.frame list     
## nocc             1      -none-     numeric  
## nocc.secondary   0      -none-     NULL     
## time.intervals   2      -none-     numeric  
## begin.time       1      -none-     numeric  
## age.unit         1      -none-     numeric  
## initial.ages     4      -none-     numeric  
## group.covariates 1      data.frame list     
## nstrata          1      -none-     numeric  
## strata.labels    0      -none-     NULL     
## counts           0      -none-     NULL     
## reverse          1      -none-     logical  
## areas            0      -none-     NULL     
## events           0      -none-     NULL
# modify the ddl if needed (e.g. for site level covariates)
pronghorn.ddl <- make.design.data(pronghorn.data)
pronghorn.ddl
## $p
##   par.index model.index group age time Age Time aspect
## 1         1           1     E   0    1   0    0      E
## 2         2           2     E   1    2   1    1      E
## 3         3           3     N   0    1   0    0      N
## 4         4           4     N   1    2   1    1      N
## 5         5           5     S   0    1   0    0      S
## 6         6           6     S   1    2   1    1      S
## 7         7           7     W   0    1   0    0      W
## 8         8           8     W   1    2   1    1      W
## 
## $Psi
##   par.index model.index group age time Age Time aspect
## 1         1           9     E   0    1   0    0      E
## 2         2          10     N   0    1   0    0      N
## 3         3          11     S   0    1   0    0      S
## 4         4          12     W   0    1   0    0      W
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p"   "Psi"
# Fit a model
# Model where detection probability (p) varies with aspect, and
# Occupancy (Psi) varies with sagebrush
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made

mod.fit <-  RMark::mark(pronghorn.data, ddl = pronghorn.ddl,
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~sagebrush),
                          p     =list(formula=~aspect) 
                        )
)
## 
## Output summary for Occupancy model
## Name : p(~aspect)Psi(~sagebrush) 
## 
## Npar :  6
## -2lnL:  627.7315
## AICc :  640.0689
## 
## Beta
##                   estimate        se        lcl       ucl
## p:(Intercept)   -0.5326105 0.3995735 -1.3157746 0.2505535
## p:aspectN        1.2035107 0.5514201  0.1227274 2.2842941
## p:aspectS        0.3587193 0.4692243 -0.5609604 1.2783990
## p:aspectW        0.1960692 0.3988148 -0.5856078 0.9777461
## Psi:(Intercept)  0.8282348 0.4045104  0.0353943 1.6210752
## Psi:sagebrush    0.0392903 0.0575420 -0.0734920 0.1520726
## 
## 
## Real Parameter p
##                       1         2
## Group:aspectE 0.3699082 0.3699082
## Group:aspectN 0.6617047 0.6617047
## Group:aspectS 0.4566364 0.4566364
## Group:aspectW 0.4166499 0.4166499
## 
## 
## Real Parameter Psi
##                       1
## Group:aspectE 0.7294364
## Group:aspectN 0.7294364
## Group:aspectS 0.7294364
## Group:aspectW 0.7294364
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~aspect)Psi(~sagebrush) 
## 
## Npar :  6
## -2lnL:  627.7315
## AICc :  640.0689
## 
## Beta
##                   estimate        se        lcl       ucl
## p:(Intercept)   -0.5326105 0.3995735 -1.3157746 0.2505535
## p:aspectN        1.2035107 0.5514201  0.1227274 2.2842941
## p:aspectS        0.3587193 0.4692243 -0.5609604 1.2783990
## p:aspectW        0.1960692 0.3988148 -0.5856078 0.9777461
## Psi:(Intercept)  0.8282348 0.4045104  0.0353943 1.6210752
## Psi:sagebrush    0.0392903 0.0575420 -0.0734920 0.1520726
## 
## 
## Real Parameter p
##                       1         2
## Group:aspectE 0.3699082 0.3699082
## Group:aspectN 0.6617047 0.6617047
## Group:aspectS 0.4566364 0.4566364
## Group:aspectW 0.4166499 0.4166499
## 
## 
## Real Parameter Psi
##                       1
## Group:aspectE 0.7294364
## Group:aspectN 0.7294364
## Group:aspectS 0.7294364
## Group:aspectW 0.7294364
# Look at the objects returned in more details
names(mod.fit)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit$results$beta  # on the logit scale
##                   estimate        se        lcl       ucl
## p:(Intercept)   -0.5326105 0.3995735 -1.3157746 0.2505535
## p:aspectN        1.2035107 0.5514201  0.1227274 2.2842941
## p:aspectS        0.3587193 0.4692243 -0.5609604 1.2783990
## p:aspectW        0.1960692 0.3988148 -0.5856078 0.9777461
## Psi:(Intercept)  0.8282348 0.4045104  0.0353943 1.6210752
## Psi:sagebrush    0.0392903 0.0575420 -0.0734920 0.1520726
mod.fit$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p gE a0 t1   0.3699082 0.0931310 0.2115221 0.5623127              
## p gN a0 t1   0.6617047 0.0971485 0.4551924 0.8207611              
## p gS a0 t1   0.4566364 0.0808420 0.3073589 0.6141317              
## p gW a0 t1   0.4166499 0.0499630 0.3231249 0.5165852              
## Psi gE a0 t1 0.7294364 0.0680171 0.5784167 0.8412093
# derived variables is the occupancy probability 
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
##    estimate         se       lcl       ucl
## 1 0.7294364 0.06801714 0.5784167 0.8412093
## 2 0.7294364 0.06801714 0.5784167 0.8412093
## 3 0.7294364 0.06801714 0.5784167 0.8412093
## 4 0.7294364 0.06801714 0.5784167 0.8412093
# get the psi-values as function of the covariates
pronghorn.ddl$Psi # see the index numbers
##   par.index model.index group age time Age Time aspect
## 1         1           9     E   0    1   0    0      E
## 2         2          10     N   0    1   0    0      N
## 3         3          11     S   0    1   0    0      S
## 4         4          12     W   0    1   0    0      W
sage.df <-data.frame(sagebrush=seq(min(input.history$sagebrush,na.rm=TRUE),
                                   max(input.history$sagebrush, na.rm=TRUE),.5)) 
pred.psi <- covariate.predictions(mod.fit, indices=9, data=sage.df)
head(pred.psi$estimates)
##   vcv.index model.index par.index covdata  estimate         se       lcl
## 1         1           5         9     0.0 0.6959816 0.08559086 0.5088477
## 2         2           5         9     0.5 0.7001222 0.08190179 0.5208093
## 3         3           5         9     1.0 0.7042305 0.07859513 0.5319434
## 4         4           5         9     1.5 0.7083059 0.07569940 0.5421610
## 5         5           5         9     2.0 0.7123481 0.07324068 0.5513804
## 6         6           5         9     2.5 0.7163567 0.07124063 0.5595319
##         ucl fixed
## 1 0.8349433      
## 2 0.8337547      
## 3 0.8330086      
## 4 0.8327571      
## 5 0.8330475      
## 6 0.8339188
ggplot(data=pred.psi$estimates, aes(x=covdata, y=estimate))+
  ggtitle("Occupancy probability as a function of sagebrush density")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
  ylim(0,1)+
  ylab("Estimated probability of occupancy")+
  xlab("Sagebrush")

# It is often convenient to estimate occupancy for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.

# We create a data frame with the covariates etc as measure on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition
pronghorn.ddl$Psi
##   par.index model.index group age time Age Time aspect
## 1         1           9     E   0    1   0    0      E
## 2         2          10     N   0    1   0    0      N
## 3         3          11     S   0    1   0    0      S
## 4         4          12     W   0    1   0    0      W
site.covar <- merge(pronghorn.ddl$Psi, site.covar)
head(site.covar)
##   aspect par.index model.index group age time Age Time sagebrush
## 1      E         1           9     E   0    1   0    0       6.1
## 2      E         1           9     E   0    1   0    0       4.8
## 3      E         1           9     E   0    1   0    0       3.0
## 4      E         1           9     E   0    1   0    0       3.2
## 5      E         1           9     E   0    1   0    0       3.2
## 6      E         1           9     E   0    1   0    0       1.8
# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
site.covar$index <- site.covar$model.index

site.pred <- covariate.predictions(mod.fit,
                                   data=site.covar)
ggplot(site.pred$estimates, aes(x=sagebrush, y=estimate))+
  ggtitle("Model predictions of occupancy for each site")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=0.2)

#cleanup
cleanup(ask=FALSE)