# Brook Trout

# This is from MARK demo files on occupancy.
# Collected via electrofishing three 50 m sections of streams at 77 sites 
# in the Upper Chattachochee River basin. 
#       77 streams 3 occasions, 4 covariates: elevation, cross sectional area each occasion.

# Single Species Single Season Occupancy


# Fitting models using RMark

# 2018-11-09 Code contributed by Neil Faught
library(readxl)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- readxl::read_excel("../BrookTrout.xls",
                                 sheet="DetectionHistory",
                                 na="-",
                                 col_names=FALSE)  # notice no column names in row 1 of data file. 

head(input.data)
## # A tibble: 6 x 3
##    X__1  X__2  X__3
##   <dbl> <dbl> <dbl>
## 1  0     0        0
## 2  1.00  0        0
## 3  0     0        0
## 4  0     0        0
## 5  0     0        0
## 6  0     1.00     0
# Extract the history records

# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.data,1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
##   freq  ch
## 1    1 000
## 2    1 100
## 3    1 000
## 4    1 000
## 5    1 000
## 6    1 010
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
##   freq  ch
## 1    1 000
## 2    1 100
## 3    1 000
## 4    1 000
## 5    1 000
## 6    1 010
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 77
# Get the pond information
# Get the elevation information
elevation.data <- readxl::read_excel("../BrookTrout.xls",
                                     sheet="Elevation",
                                     na="-",
                                     col_names=TRUE)  
input.history$Elevation <- elevation.data$Elevation/1000  # standardized it a bit
head(input.history)
##   freq  ch Elevation
## 1    1 000   4.26614
## 2    1 100   4.03882
## 3    1 000   2.03158
## 4    1 000   3.43917
## 5    1 000   3.03650
## 6    1 010   2.05014
# Get the cross sectional width
cross.data <- readxl::read_excel("../BrookTrout.xls",
                                 sheet="CrossSectionWidth",
                                 na="-",
                                 col_names=TRUE)  
head(cross.data)
## # A tibble: 6 x 3
##   Cross1 Cross2 Cross3
##    <dbl>  <dbl>  <dbl>
## 1  2.87   2.61    2.73
## 2  0.880  2.54    1.16
## 3  1.50   0.960   1.52
## 4  0.260  1.50    1.13
## 5  0.890  2.59    2.69
## 6  0.670  2.32    1.58
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
# so do not set these to missing as in RPresence

input.history <- cbind(input.history, cross.data)
head(input.history)
##   freq  ch Elevation Cross1 Cross2 Cross3
## 1    1 000   4.26614   2.87   2.61   2.73
## 2    1 100   4.03882   0.88   2.54   1.16
## 3    1 000   2.03158   1.50   0.96   1.52
## 4    1 000   3.43917   0.26   1.50   1.13
## 5    1 000   3.03650   0.89   2.59   2.69
## 6    1 010   2.05014   0.67   2.32   1.58
# Create the data structure
trout.data <- process.data(data=input.history,
                           model="Occupancy")
summary(trout.data)
##                  Length Class      Mode     
## data              6     data.frame list     
## model             1     -none-     character
## mixtures          1     -none-     numeric  
## freq             77     -none-     numeric  
## nocc              1     -none-     numeric  
## nocc.secondary    0     -none-     NULL     
## time.intervals    3     -none-     numeric  
## begin.time        1     -none-     numeric  
## age.unit          1     -none-     numeric  
## initial.ages      1     -none-     numeric  
## group.covariates  0     -none-     NULL     
## nstrata           1     -none-     numeric  
## strata.labels     0     -none-     NULL     
## counts            0     -none-     NULL     
## reverse           1     -none-     logical  
## areas             0     -none-     NULL     
## events            0     -none-     NULL
# If survey covariates are present, modify the ddl.
trout.ddl <- make.design.data(trout.data)
trout.ddl
## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p"   "Psi"
# Fit a model
# Models where detection probability (p) varies with temperature (Cross), and
# Occupancy (Psi) varies with Elevation
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
mod.fit <-  RMark::mark(trout.data, ddl = trout.ddl,
                        model="Occupancy",
                        model.parameters=list(
                          Psi   =list(formula=~Elevation),
                          p     =list(formula=~Cross) 
                        )
)
## 
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~Elevation) 
## 
## Npar :  4
## -2lnL:  193.7903
## AICc :  202.3458
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.3988191 0.4897724  0.4388651  2.3587731
## p:Cross         -0.8516878 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804500 1.1653271 -6.7644911 -2.1964090
## Psi:Elevation    1.4996933 0.3918882  0.7315924  2.2677943
## 
## 
## Real Parameter p
##          1         2         3
##  0.5281628 0.4986601 0.5148322
## 
## 
## Real Parameter Psi
##         1
##  0.452684
summary(mod.fit)
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~Elevation) 
## 
## Npar :  4
## -2lnL:  193.7903
## AICc :  202.3458
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.3988191 0.4897724  0.4388651  2.3587731
## p:Cross         -0.8516878 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804500 1.1653271 -6.7644911 -2.1964090
## Psi:Elevation    1.4996933 0.3918882  0.7315924  2.2677943
## 
## 
## Real Parameter p
##          1         2         3
##  0.5281628 0.4986601 0.5148322
## 
## 
## Real Parameter Psi
##         1
##  0.452684
# Look the objects returned in more details
names(mod.fit)
##  [1] "data"             "model"            "title"           
##  [4] "model.name"       "links"            "mixtures"        
##  [7] "call"             "parameters"       "time.intervals"  
## [10] "number.of.groups" "group.labels"     "nocc"            
## [13] "begin.time"       "covariates"       "fixed"           
## [16] "design.matrix"    "pims"             "design.data"     
## [19] "strata.labels"    "mlogit.list"      "profile.int"     
## [22] "simplify"         "model.parameters" "results"         
## [25] "output"
# look at estimates on beta and original scale
mod.fit$results$beta  # on the logit scale
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.3988191 0.4897724  0.4388651  2.3587731
## p:Cross         -0.8516878 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804500 1.1653271 -6.7644911 -2.1964090
## Psi:Elevation    1.4996933 0.3918882  0.7315924  2.2677943
mod.fit$results$real# on the regular 0-1 scale for each site
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.5281628 0.0612429 0.4088064 0.6443838              
## p g1 a1 t2   0.4986601 0.0617351 0.3800429 0.6174283              
## p g1 a2 t3   0.5148322 0.0613377 0.3960466 0.6319654              
## Psi g1 a0 t1 0.4526840 0.0876822 0.2924587 0.6233539
# derived variables is the occupancy probability 
names(mod.fit$results$derived)
## [1] "Occupancy"
mod.fit$results$derived$Occupancy
##   estimate         se       lcl       ucl
## 1 0.452684 0.08768222 0.2924587 0.6233539
# get the psi-values as function of the covariates
trout.ddl$Psi # see the index numbers
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
Elev.df <-data.frame(Elevation=seq(min(input.history$Elevation,na.rm=TRUE),
                                   max(input.history$Elevation, na.rm=TRUE),.1)) 
pred.psi <- covariate.predictions(mod.fit, indices=4, data=Elev.df)
head(pred.psi$estimates)
##   vcv.index model.index par.index covdata   estimate         se
## 1         1           4         4 0.91107 0.04252838 0.03388788
## 2         2           4         4 1.01107 0.04907175 0.03718745
## 3         3           4         4 1.11107 0.05656240 0.04066300
## 4         4           4         4 1.21107 0.06511818 0.04429000
## 5         5           4         4 1.31107 0.07486542 0.04803500
## 6         6           4         4 1.41107 0.08593757 0.05185508
##           lcl       ucl fixed
## 1 0.008617708 0.1849796      
## 2 0.010706642 0.1974698      
## 3 0.013285367 0.2107095      
## 4 0.016461027 0.2247370      
## 5 0.020360543 0.2395937      
## 6 0.025132462 0.2553243
ggplot(data=pred.psi$estimates, aes(x=covdata, y=estimate))+
  ggtitle("Occupancy probability as a function of elevation")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
  ylim(0,1)+
  ylab("Estimated probability of occupancy")+
  xlab("Elevation (km)")

# make a plot of the probability of detection as a function of Cross sectional width
Cross.df <- data.frame(Cross1=seq(min(cross.data,na.rm=TRUE),max(cross.data, na.rm=TRUE),0.1))

trout.ddl$p # see the index numbers
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
pred.p <- covariate.predictions(mod.fit, indices=1, data=Cross.df)
head(pred.p$estimates)
##   vcv.index model.index par.index covdata  estimate         se       lcl
## 1         1           1         1    0.25 0.7660065 0.07743585 0.5839802
## 2         2           1         1    0.35 0.7503966 0.07676222 0.5738208
## 3         3           1         1    0.45 0.7341068 0.07579342 0.5632828
## 4         4           1         1    0.55 0.7171546 0.07455201 0.5523139
## 5         5           1         1    0.65 0.6995639 0.07307307 0.5408518
## 6         6           1         1    0.75 0.6813653 0.07140556 0.5288222
##         ucl fixed
## 1 0.8841838      
## 2 0.8703435      
## 3 0.8552794      
## 4 0.8389934      
## 5 0.8215189      
## 6 0.8029271
plotdata <- cbind(Cross.df, pred.p$estimates)
head(plotdata)
##   Cross1 vcv.index model.index par.index covdata  estimate         se
## 1   0.25         1           1         1    0.25 0.7660065 0.07743585
## 2   0.35         2           1         1    0.35 0.7503966 0.07676222
## 3   0.45         3           1         1    0.45 0.7341068 0.07579342
## 4   0.55         4           1         1    0.55 0.7171546 0.07455201
## 5   0.65         5           1         1    0.65 0.6995639 0.07307307
## 6   0.75         6           1         1    0.75 0.6813653 0.07140556
##         lcl       ucl fixed
## 1 0.5839802 0.8841838      
## 2 0.5738208 0.8703435      
## 3 0.5632828 0.8552794      
## 4 0.5523139 0.8389934      
## 5 0.5408518 0.8215189      
## 6 0.5288222 0.8029271
ggplot(data=plotdata, aes(x=Cross1, y=estimate))+
  ggtitle("Detection probability as a function of cross sectional width")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
  ylim(0,1)

# It is often convenient to estimate occupancy for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.

# We create a data frame with the covariates etc as measure on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition

site.covariates <- input.history
site.covariates$freq <- NULL
site.covariates$ch   <- NULL

trout.ddl$Psi
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
site.covariates <- merge(trout.ddl$Psi, site.covariates)
head(site.covariates)
##   par.index model.index group age time Age Time Elevation Cross1 Cross2
## 1         1           4     1   0    1   0    0   4.26614   2.87   2.61
## 2         1           4     1   0    1   0    0   4.03882   0.88   2.54
## 3         1           4     1   0    1   0    0   2.03158   1.50   0.96
## 4         1           4     1   0    1   0    0   3.43917   0.26   1.50
## 5         1           4     1   0    1   0    0   3.03650   0.89   2.59
## 6         1           4     1   0    1   0    0   2.05014   0.67   2.32
##   Cross3
## 1   2.73
## 2   1.16
## 3   1.52
## 4   1.13
## 5   2.69
## 6   1.58
# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
site.covariates$index <- site.covariates$model.index

site.pred <- covariate.predictions(mod.fit,
                                   data=site.covariates)
ggplot(site.pred$estimates, aes(x=Elevation, y=estimate))+
  ggtitle("Model predictions of occupancy for each site")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)