# Brook Trout

# This is from MARK demo files on occupancy.
# Collected via electrofishing three 50 m sections of streams at 77 sites 
# in the Upper Chattachochee River basin. 
#       77 streams 3 occasions, 4 covariates: elevation, cross sectional area each occasion.

# Single Species Single Season Occupancy


# Fitting models using RMark

# 2018-08-15 Code contributed by Carl James Schwarz (cschwarz.stat.sfu.cs@gmail.com)
library(readxl)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- readxl::read_excel("../BrookTrout.xls",
                                 sheet="DetectionHistory",
                                 na="-",
                                 col_names=FALSE)  # notice no column names in row 1 of data file. 

head(input.data)
## # A tibble: 6 x 3
##    X__1  X__2  X__3
##   <dbl> <dbl> <dbl>
## 1  0     0        0
## 2  1.00  0        0
## 3  0     0        0
## 4  0     0        0
## 5  0     0        0
## 6  0     1.00     0
# Extract the history records

# Extract the history records and create a capture history
input.history <- data.frame(freq=1,
                            ch=apply(input.data,1,paste, collapse=""), stringsAsFactors=FALSE)
head(input.history)
##   freq  ch
## 1    1 000
## 2    1 100
## 3    1 000
## 4    1 000
## 5    1 000
## 6    1 010
# Change any NA to . in the chapter history
input.history$ch <- gsub("NA",".", input.history$ch, fixed=TRUE)
head(input.history)
##   freq  ch
## 1    1 000
## 2    1 100
## 3    1 000
## 4    1 000
## 5    1 000
## 6    1 010
# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.history)
## [1] 77
# Get the pond information
# Get the elevation information
elevation.data <- readxl::read_excel("../BrookTrout.xls",
                                     sheet="Elevation",
                                     na="-",
                                     col_names=TRUE)  
input.history$Elevation <- elevation.data$Elevation/1000  # standardized it a bit
head(input.history)
##   freq  ch Elevation
## 1    1 000   4.26614
## 2    1 100   4.03882
## 3    1 000   2.03158
## 4    1 000   3.43917
## 5    1 000   3.03650
## 6    1 010   2.05014
# Get the cross sectional width
cross.data <- readxl::read_excel("../BrookTrout.xls",
                                 sheet="CrossSectionWidth",
                                 na="-",
                                 col_names=TRUE)  
head(cross.data)
## # A tibble: 6 x 3
##   Cross1 Cross2 Cross3
##    <dbl>  <dbl>  <dbl>
## 1  2.87   2.61    2.73
## 2  0.880  2.54    1.16
## 3  1.50   0.960   1.52
## 4  0.260  1.50    1.13
## 5  0.890  2.59    2.69
## 6  0.670  2.32    1.58
# Notice that RMark does not allow missing values in time-varying covariates, even when visits are not made
# so do not set these to missing as in RPresence

input.history <- cbind(input.history, cross.data)
head(input.history)
##   freq  ch Elevation Cross1 Cross2 Cross3
## 1    1 000   4.26614   2.87   2.61   2.73
## 2    1 100   4.03882   0.88   2.54   1.16
## 3    1 000   2.03158   1.50   0.96   1.52
## 4    1 000   3.43917   0.26   1.50   1.13
## 5    1 000   3.03650   0.89   2.59   2.69
## 6    1 010   2.05014   0.67   2.32   1.58
# Create the data structure
trout.data <- process.data(data=input.history,
                         model="Occupancy")
summary(trout.data)
##                  Length Class      Mode     
## data              6     data.frame list     
## model             1     -none-     character
## mixtures          1     -none-     numeric  
## freq             77     -none-     numeric  
## nocc              1     -none-     numeric  
## nocc.secondary    0     -none-     NULL     
## time.intervals    3     -none-     numeric  
## begin.time        1     -none-     numeric  
## age.unit          1     -none-     numeric  
## initial.ages      1     -none-     numeric  
## group.covariates  0     -none-     NULL     
## nstrata           1     -none-     numeric  
## strata.labels     0     -none-     NULL     
## counts            0     -none-     NULL     
## reverse           1     -none-     logical  
## areas             0     -none-     NULL     
## events            0     -none-     NULL
# If survey covariates are present, modify the ddl.
trout.ddl <- make.design.data(trout.data)
trout.ddl
## $p
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
## 
## $Psi
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
## 
## $pimtypes
## $pimtypes$p
## $pimtypes$p$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi
## $pimtypes$Psi$pim.type
## [1] "all"
# What are the parameter names for Single Season Single Species models
setup.parameters("Occupancy", check=TRUE)
## [1] "p"   "Psi"
# define the list of models to fit
# Notice the commas between the column and the placement of the quotes
model.list.csv <- textConnection("
p,         Psi
~1,              ~1
~1,              ~Elevation
~Cross,          ~1
~Cross,         ~Elevation")


model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
##        p        Psi model.number
## 1     ~1         ~1            1
## 2     ~1 ~Elevation            2
## 3 ~Cross         ~1            3
## 4 ~Cross ~Elevation            4
# fit the models
myobs <- ls()
myobs <- myobs[ grepl("m...",myobs,fixed=TRUE)]
cat("Removing ", myobs, "\n")
## Removing
rm(list=myobs)

model.fits <- plyr::dlply(model.list, "model.number", function(x,input.data, input.ddl){
  cat("\n\n***** Starting ", unlist(x), "\n")
  #browser()
  
  #fit <- myoccMod(model=list(as.formula(paste("psi",x$psi)),
  fit <- RMark::mark(input.data, ddl=input.ddl,
                     model="Occupancy",
                     model.parameters=list(
                       Psi   =list(formula=as.formula(eval(x$Psi))),
                       p     =list(formula=as.formula(eval(x$p)))
                     )
                     #,brief=TRUE,output=FALSE, delete=TRUE
                     #,invisible=TRUE,output=TRUE  # set for debugging
  )
  mnumber <- paste("m...",formatC(x$model.number, width = 3, format = "d", flag = "0"),sep="")
  assign( mnumber, fit, envir=.GlobalEnv)
  #browser()
  fit
  
},input.data=trout.data, input.ddl=trout.ddl)
## 
## 
## ***** Starting  ~1 ~1 1 
## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~1) 
## 
## Npar :  2
## -2lnL:  228.7886
## AICc :  232.9507
## 
## Beta
##                   estimate        se        lcl       ucl
## p:(Intercept)    0.1340176 0.2478215 -0.3517125 0.6197476
## Psi:(Intercept) -0.1500513 0.2649242 -0.6693028 0.3692002
## 
## 
## Real Parameter p
##          1         2         3
##  0.5334543 0.5334543 0.5334543
## 
## 
## Real Parameter Psi
##          1
##  0.4625574
## 
## 
## ***** Starting  ~1 ~Elevation 2 
## 
## Output summary for Occupancy model
## Name : p(~1)Psi(~Elevation) 
## 
## Npar :  3
## -2lnL:  204.1568
## AICc :  210.4856
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.1458534 0.2427291 -0.3298956  0.6216023
## Psi:(Intercept) -4.2010319 1.0930922 -6.3434927 -2.0585712
## Psi:Elevation    1.3709710 0.3596842  0.6659901  2.0759520
## 
## 
## Real Parameter p
##          1         2         3
##  0.5363988 0.5363988 0.5363988
## 
## 
## Real Parameter Psi
##          1
##  0.4307754
## 
## 
## ***** Starting  ~Cross ~1 3 
## 
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~1) 
## 
## Npar :  3
## -2lnL:  220.1193
## AICc :  226.448
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.2958769 0.4968140  0.3221215  2.2696324
## p:Cross         -0.7776221 0.2737404 -1.3141533 -0.2410909
## Psi:(Intercept) -0.1166741 0.2698332 -0.6455471  0.4121990
## 
## 
## Real Parameter p
##          1         2         3
##  0.5303794 0.5034525 0.5182143
## 
## 
## Real Parameter Psi
##          1
##  0.4708645
## 
## 
## ***** Starting  ~Cross ~Elevation 4 
## 
## Output summary for Occupancy model
## Name : p(~Cross)Psi(~Elevation) 
## 
## Npar :  4
## -2lnL:  193.7903
## AICc :  202.3458
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    1.3988191 0.4897724  0.4388651  2.3587731
## p:Cross         -0.8516879 0.2732420 -1.3872422 -0.3161335
## Psi:(Intercept) -4.4804496 1.1653269 -6.7644903 -2.1964089
## Psi:Elevation    1.4996932 0.3918882  0.7315924  2.2677940
## 
## 
## Real Parameter p
##          1         2         3
##  0.5281628 0.4986601 0.5148322
## 
## 
## Real Parameter Psi
##         1
##  0.452684
# examine individula model results
model.number <-2

summary(model.fits[[model.number]])
## Output summary for Occupancy model
## Name : p(~1)Psi(~Elevation) 
## 
## Npar :  3
## -2lnL:  204.1568
## AICc :  210.4856
## 
## Beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.1458534 0.2427291 -0.3298956  0.6216023
## Psi:(Intercept) -4.2010319 1.0930922 -6.3434927 -2.0585712
## Psi:Elevation    1.3709710 0.3596842  0.6659901  2.0759520
## 
## 
## Real Parameter p
##          1         2         3
##  0.5363988 0.5363988 0.5363988
## 
## 
## Real Parameter Psi
##          1
##  0.4307754
model.fits[[model.number]]$results$real
##               estimate        se       lcl       ucl fixed    note
## p g1 a0 t1   0.5363988 0.0603607 0.4182660 0.6505829              
## Psi g1 a0 t1 0.4307754 0.0823867 0.2814617 0.5938361
model.fits[[model.number]]$results$beta
##                   estimate        se        lcl        ucl
## p:(Intercept)    0.1458534 0.2427291 -0.3298956  0.6216023
## Psi:(Intercept) -4.2010319 1.0930922 -6.3434927 -2.0585712
## Psi:Elevation    1.3709710 0.3596842  0.6659901  2.0759520
model.fits[[model.number]]$results$derived
## $Occupancy
##    estimate         se       lcl       ucl
## 1 0.4307754 0.08238668 0.2814617 0.5938361
get.real(model.fits[[model.number]], "Psi", se=TRUE)
##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              4         2 0.4307754 0.0823867 0.2814617
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.5938361                   1   0    1   0    0
get.real(model.fits[[model.number]], "p",    se=TRUE)
##            all.diff.index par.index  estimate        se      lcl       ucl
## p g1 a0 t1              1         1 0.5363988 0.0603607 0.418266 0.6505829
## p g1 a1 t2              2         1 0.5363988 0.0603607 0.418266 0.6505829
## p g1 a2 t3              3         1 0.5363988 0.0603607 0.418266 0.6505829
##            fixed    note group age time Age Time
## p g1 a0 t1                   1   0    1   0    0
## p g1 a1 t2                   1   1    2   1    1
## p g1 a2 t3                   1   2    3   2    2
# collect models and make AICc table

model.set <- RMark::collect.models( type="Occupancy")
model.set
##                      model npar     AICc DeltaAICc       weight   Deviance
## 4 p(~Cross)Psi(~Elevation)    4 202.3458  0.000000 9.832014e-01 193.790260
## 2     p(~1)Psi(~Elevation)    3 210.4856  8.139742 1.679268e-02 204.156790
## 3         p(~Cross)Psi(~1)    3 226.4480 24.102222 5.739996e-06 220.119270
## 1             p(~1)Psi(~1)    2 232.9507 30.604907 2.222652e-07   3.324035
names(model.set)
## [1] "m...001"     "m...002"     "m...003"     "m...004"     "model.table"
model.set$model.table
##        p        Psi                    model npar     AICc DeltaAICc
## 4 ~Cross ~Elevation p(~Cross)Psi(~Elevation)    4 202.3458  0.000000
## 2     ~1 ~Elevation     p(~1)Psi(~Elevation)    3 210.4856  8.139742
## 3 ~Cross         ~1         p(~Cross)Psi(~1)    3 226.4480 24.102222
## 1     ~1         ~1             p(~1)Psi(~1)    2 232.9507 30.604907
##         weight   Deviance
## 4 9.832014e-01 193.790260
## 2 1.679268e-02 204.156790
## 3 5.739996e-06 220.119270
## 1 2.222652e-07   3.324035
# model averaged values
get.real(model.set[[1]], "Psi", se=TRUE)
##              all.diff.index par.index  estimate        se      lcl
## Psi g1 a0 t1              4         2 0.4625574 0.0658596 0.338653
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.5912657                   1   0    1   0    0
get.real(model.set[[2]], "Psi", se=TRUE)
##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              4         2 0.4307754 0.0823867 0.2814617
##                    ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.5938361                   1   0    1   0    0
get.real(model.set[[3]], "Psi", se=TRUE)
##              all.diff.index par.index  estimate        se       lcl
## Psi g1 a0 t1              4         4 0.4708645 0.0672292 0.3439937
##                   ucl fixed    note group age time Age Time
## Psi g1 a0 t1 0.601615                   1   0    1   0    0
Psi.ma <- RMark::model.average(model.set, param="Psi")
Psi.ma
##              par.index  estimate         se fixed    note group age time
## Psi g1 a0 t1         4 0.4523162 0.08764107                   1   0    1
##              Age Time
## Psi g1 a0 t1   0    0
# get the psi-values as function of the covariates
trout.ddl$Psi # see the index numbers
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
Elev.df <-data.frame(Elevation=seq(min(input.history$Elevation,na.rm=TRUE),
                                   max(input.history$Elevation, na.rm=TRUE),.1)) 
pred.psi <- covariate.predictions(model.set, indices=4, data=Elev.df)
head(pred.psi$estimates)
##   vcv.index model.index par.index covdata   estimate         se
## 1         1           2         4 0.91107 0.04265042 0.03397415
## 2         2           2         4 1.01107 0.04919944 0.03726829
## 3         3           2         4 1.11107 0.05669477 0.04073741
## 4         4           2         4 1.21107 0.06525388 0.04435689
## 5         5           2         4 1.31107 0.07500271 0.04809324
## 6         6           2         4 1.41107 0.08607420 0.05190357
##           lcl       ucl fixed
## 1 0.008646044 0.1853831      
## 2 0.010740485 0.1978307      
## 3 0.013325126 0.2110302      
## 4 0.016507014 0.2250190      
## 5 0.020412907 0.2398377      
## 6 0.025191105 0.2555307
ggplot(data=pred.psi$estimates, aes(x=covdata, y=estimate))+
  ggtitle("Occupancy probability as a function of elevation")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
  ylim(0,1)+
  ylab("Estimated probability of occupancy")+
  xlab("Elevation (km)")

# make a plot of the probability of detection as a function of cross section
Cross.df <- data.frame(Cross1=seq(min(cross.data,na.rm=TRUE),max(cross.data, na.rm=TRUE),.1))

trout.ddl$p # see the index numbers
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 2         2           2     1   1    2   1    1
## 3         3           3     1   2    3   2    2
pred.p <- covariate.predictions(model.set, indices=1, data=Cross.df)
head(pred.p$estimates)
##   vcv.index model.index par.index covdata  estimate         se       lcl
## 1         1           1         1    0.25 0.7621506 0.08262720 0.5673694
## 2         2           1         1    0.35 0.7468028 0.08130679 0.5594291
## 3         3           1         1    0.45 0.7307866 0.07971666 0.5509325
## 4         4           1         1    0.55 0.7141191 0.07788009 0.5418457
## 5         5           1         1    0.65 0.6968238 0.07583376 0.5321205
## 6         6           1         1    0.75 0.6789308 0.07362947 0.5216923
##         ucl fixed
## 1 0.8867421      
## 2 0.8726305      
## 3 0.8572696      
## 4 0.8406635      
## 5 0.8228501      
## 6 0.8039081
ggplot(data=pred.p$estimates, aes(x=covdata, y=estimate))+
      ggtitle("Detection probability as a function of cross sectional width")+
      geom_point()+
      geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.2)+
      ylim(0,1)

# It is often convenient to estimate occupancy for each site using the
# values of the covariates specific for that site.
# This is similar to RPresence which gives estimates at each site.

# We create a data frame with the covariates etc as measure on the input.history dataframe.
# But we also need to add the appropriate index number for psi to the covariate data frame to account
# for the groups definition

site.covariates <- input.history
site.covariates$freq <- NULL
site.covariates$ch   <- NULL

trout.ddl$Psi
##   par.index model.index group age time Age Time
## 1         1           4     1   0    1   0    0
site.covariates <- merge(trout.ddl$Psi, site.covariates)
head(site.covariates)
##   par.index model.index group age time Age Time Elevation Cross1 Cross2
## 1         1           4     1   0    1   0    0   4.26614   2.87   2.61
## 2         1           4     1   0    1   0    0   4.03882   0.88   2.54
## 3         1           4     1   0    1   0    0   2.03158   1.50   0.96
## 4         1           4     1   0    1   0    0   3.43917   0.26   1.50
## 5         1           4     1   0    1   0    0   3.03650   0.89   2.59
## 6         1           4     1   0    1   0    0   2.05014   0.67   2.32
##   Cross3
## 1   2.73
## 2   1.16
## 3   1.52
## 4   1.13
## 5   2.69
## 6   1.58
# we only want a prediction for that particular model.index.
# we need to create a variable "index" that has the model.index
site.covariates$index <- site.covariates$model.index

site.pred <- covariate.predictions(model.set,
                                   data=site.covariates)

ggplot(site.pred$estimates, aes(x=Elevation, y=estimate))+
  ggtitle("Model averaged predictions of occupancy for each site")+
  geom_point()+
  geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=0.2)

# cleanup
cleanup(ask=FALSE)