# Multi state single season
# Coosa Bass collected via electrofishing from four 50-80m sections in streams
# at 54 sites in the Upper Coosa River basin.

# Objective was to evaluate the effect of stream flow variability on Coosa
# Bass reproduction. States are:
#    Species not detected (state = 0)
#    Adult Detected (state = 1)
#    YOY present (state = 2)
# 2 covariates: stream link magnitude (standardized), and CV of stream flow
# during summer.

# Analysis in RMark

# 2018-11-27 code submitted by Neil Faught

# General model averaging framework
library(ggplot2)
library(plyr)
library(readxl)
library(reshape2)
library(RMark)
## This is RMark 2.2.5
##  Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
# get the data
input.data <- readxl::read_excel(file.path("..","CoosaBass.xls"),
                               sheet="Sheet1",
                               skip=13, na='.')
head(input.data)
## # A tibble: 6 x 8
##    Site   `1`   `2`   `3`   `4` X__1   Link  Flow
##   <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl> <dbl>
## 1     1     1     1     1     1 NA     0.05  2.73
## 2     2     1     1     1     2 NA     1.97  0.8 
## 3     3     0     0     0     0 NA    -0.98  1.4 
## 4     4     2     2     0     0 NA     1.12  1.94
## 5     5     0     1     1     2 NA     0.41  1.85
## 6     6     0     1     1     1 NA     1.85  0.51
# create the capture history
input.history <- data.frame(ch  =apply(input.data[,-c(1,6:8)],1,paste,sep="", collapse=""),
                            freq=1)
input.history[1:5,]
##     ch freq
## 1 1111    1
## 2 1112    1
## 3 0000    1
## 4 2200    1
## 5 0112    1
# need to deal with missing values
input.history$ch <- gsub("NA",".",input.history$ch)
head(input.history)
##     ch freq
## 1 1111    1
## 2 1112    1
## 3 0000    1
## 4 2200    1
## 5 0112    1
## 6 0111    1
# Add site level covariates to input history
site.covar = input.data[,7:8]
nrow(site.covar)
## [1] 54
head(site.covar)
## # A tibble: 6 x 2
##    Link  Flow
##   <dbl> <dbl>
## 1  0.05  2.73
## 2  1.97  0.8 
## 3 -0.98  1.4 
## 4  1.12  1.94
## 5  0.41  1.85
## 6  1.85  0.51
names(site.covar) = c("L","CV")
input.history = cbind(input.history,site.covar)
head(input.history)
##     ch freq     L   CV
## 1 1111    1  0.05 2.73
## 2 1112    1  1.97 0.80
## 3 0000    1 -0.98 1.40
## 4 2200    1  1.12 1.94
## 5 0112    1  0.41 1.85
## 6 0111    1  1.85 0.51
# create the Rmark data file

bass.data <- process.data(data=input.history,
                         model="MSOccupancy")  # this parameterization is more stable

summary(bass.data)
##                  Length Class      Mode     
## data              4     data.frame list     
## model             1     -none-     character
## mixtures          1     -none-     numeric  
## freq             54     -none-     numeric  
## nocc              1     -none-     numeric  
## nocc.secondary    0     -none-     NULL     
## time.intervals    3     -none-     numeric  
## begin.time        1     -none-     numeric  
## age.unit          1     -none-     numeric  
## initial.ages      1     -none-     numeric  
## group.covariates  0     -none-     NULL     
## nstrata           1     -none-     numeric  
## strata.labels     0     -none-     NULL     
## counts            0     -none-     NULL     
## reverse           1     -none-     logical  
## areas             0     -none-     NULL     
## events            0     -none-     NULL
# time-specific covariates are added to the ddl's
# We need to modify the design data to create this design matrix
# See the dipper example in Section C.6 of the RMark manual
bass.ddl=make.design.data(bass.data)
bass.ddl
## $Psi1
##   par.index model.index group age time Age Time
## 1         1           1     1   0    1   0    0
## 
## $Psi2
##   par.index model.index group age time Age Time
## 1         1           2     1   0    1   0    0
## 
## $p1
##   par.index model.index group age time Age Time
## 1         1           3     1   0    1   0    0
## 2         2           4     1   1    2   1    1
## 3         3           5     1   2    3   2    2
## 4         4           6     1   3    4   3    3
## 
## $p2
##   par.index model.index group age time Age Time
## 1         1           7     1   0    1   0    0
## 2         2           8     1   1    2   1    1
## 3         3           9     1   2    3   2    2
## 4         4          10     1   3    4   3    3
## 
## $Delta
##   par.index model.index group age time Age Time
## 1         1          11     1   0    1   0    0
## 2         2          12     1   1    2   1    1
## 3         3          13     1   2    3   2    2
## 4         4          14     1   3    4   3    3
## 
## $pimtypes
## $pimtypes$Psi1
## $pimtypes$Psi1$pim.type
## [1] "all"
## 
## 
## $pimtypes$Psi2
## $pimtypes$Psi2$pim.type
## [1] "all"
## 
## 
## $pimtypes$p1
## $pimtypes$p1$pim.type
## [1] "all"
## 
## 
## $pimtypes$p2
## $pimtypes$p2$pim.type
## [1] "all"
## 
## 
## $pimtypes$Delta
## $pimtypes$Delta$pim.type
## [1] "all"
setup.parameters("MSOccupancy", check=TRUE)
## [1] "Psi1"  "Psi2"  "p1"    "p2"    "Delta"
# Use the multi-state model, but only for a single season 
# The real parameters are
#    Psi1(i) = probability that a site i is occupied regardless 
#              of reproductive state, Pr(true state = 1 or 2);
#    Psi2(i) = probability that young occurred, given that the site i is occupied,
#              Pr(true state = 2 | true state = 1 or 2);
#
#
#    p1(i, t) = probability that occupancy is detected for site i, 
#               period t, given that true state = 1, Pr(detection | true state = 1);
#    p2(i, t) = probability that occupancy is detected for site i, 
#               period t, given that true state = 2, Pr(detection | true state = 2);
#    Delta(i, t) = probability that evidence of successful reproduction is found, 
#                given detection of occupancy at site i, period t, with successful 
#                reproduction, Pr(classified state 2|true state = 2).

#   Thus, psi1 and psi2 are single parameters relating to a site, 
#   whereas p1, p2, and delta are time-specific parameters, 
#   and thus the number of values in the PIM for each of these parameters equals the number of occasions.
#
#   The product psi1*psi2 is the unconditional probability that a site is 
#   occupied by successful breeders, and is provided as a derived parameter.


# define the list of models to fit
# Notice the commas between the column and the placement of the quotes
# Notice the difference between Time and time in the model terms
model.list.csv <- textConnection("
Psi1,  Psi2,       p1,      p2,    Delta
~1,     ~1,       ~1,      ~SHARE,   ~1
~L,     ~L,       ~L,      ~SHARE,   ~L
~CV,    ~CV,      ~CV,     ~SHARE,   ~CV
")

model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list
##   Psi1 Psi2  p1     p2 Delta
## 1   ~1   ~1  ~1 ~SHARE    ~1
## 2   ~L   ~L  ~L ~SHARE    ~L
## 3  ~CV  ~CV ~CV ~SHARE   ~CV
model.list$model.number <- 1:nrow(model.list)
model.list
##   Psi1 Psi2  p1     p2 Delta model.number
## 1   ~1   ~1  ~1 ~SHARE    ~1            1
## 2   ~L   ~L  ~L ~SHARE    ~L            2
## 3  ~CV  ~CV ~CV ~SHARE   ~CV            3
# fit the models
myobs <- ls()
myobs <- myobs[ grepl("m...",myobs,fixed=TRUE)]
cat("Removing ", myobs, "\n")
## Removing
rm(list=myobs)


# fit the models
model.fits <- plyr::dlply(model.list, "model.number", function(x,input.data, input.ddl){
  cat("\n\n***** Starting ", unlist(x), "\n")
  #browser()
  model.parameters=list(
    Psi1   =list(formula=as.formula(eval(x$Psi1 ))),
    Psi2   =list(formula=as.formula(eval(x$Psi2))),
    p1     =list(formula=as.formula(eval(x$p1 ))),
    p2     =list(formula=as.formula(eval(x$p2 ))),
    Delta  =list(formula=as.formula(eval(x$Delta)))
  )
  if(x$p2 == '~SHARE'){
    model.parameters$p1  =list(formula=as.formula(eval(x$p1 )),share=TRUE)
    model.parameters$p2 = NULL
  }
  
  fit <- RMark::mark(input.data, ddl=input.ddl,
                     model="MSOccupancy",
                     model.parameters=model.parameters
                     #,brief=TRUE,output=FALSE, delete=TRUE
                     #,invisible=TRUE,output=TRUE  # set for debugging
  )

  mnumber <- paste("m...",formatC(x$model.number, width = 3, format = "d", flag = "0"),sep="")
  assign( mnumber, fit, envir=.GlobalEnv)
  #browser()
  fit
  
},input.data=bass.data, input.ddl=bass.ddl)
## 
## 
## ***** Starting  ~1 ~1 ~1 ~SHARE ~1 1 
## 
## Output summary for MSOccupancy model
## Name : Psi1(~1)Psi2(~1)p1(~1)p2()Delta(~1) 
## 
## Npar :  4
## -2lnL:  381.1956
## AICc :  390.0119
## 
## Beta
##                     estimate        se        lcl       ucl
## Psi1:(Intercept)   1.2596547 0.3291162  0.6145869 1.9047226
## Psi2:(Intercept)   1.1248675 0.6653546 -0.1792276 2.4289625
## p1:(Intercept)     1.4010154 0.1960365  1.0167839 1.7852469
## Delta:(Intercept) -0.5227751 0.2940718 -1.0991558 0.0536056
## 
## 
## Real Parameter Psi1
##          1
##  0.7789667
## 
## 
## Real Parameter Psi2
##          1
##  0.7548905
## 
## 
## Real Parameter p1
##         1        2        3        4
##  0.802345 0.802345 0.802345 0.802345
## 
## 
## Real Parameter p2
##         1        2        3        4
##  0.802345 0.802345 0.802345 0.802345
## 
## 
## Real Parameter Delta
##          1         2         3         4
##  0.3722036 0.3722036 0.3722036 0.3722036
## 
## 
## ***** Starting  ~L ~L ~L ~SHARE ~L 2 
## 
## Output summary for MSOccupancy model
## Name : Psi1(~L)Psi2(~L)p1(~L)p2()Delta(~L) 
## 
## Npar :  8
## -2lnL:  344.7405
## AICc :  363.9404
## 
## Beta
##                     estimate        se        lcl       ucl
## Psi1:(Intercept)   3.7267937 1.4143065  0.9547528 6.4988345
## Psi1:L             3.5339916 1.2414516  1.1007463 5.9672368
## Psi2:(Intercept)   1.4197246 0.9709157 -0.4832701 3.3227193
## Psi2:L            -0.5372995 0.7437800 -1.9951083 0.9205093
## p1:(Intercept)     1.5582875 0.2434489  1.0811278 2.0354473
## p1:L              -0.2513002 0.2129820 -0.6687450 0.1661445
## Delta:(Intercept) -0.5722785 0.3185629 -1.1966619 0.0521048
## Delta:L            0.1457851 0.2874047 -0.4175282 0.7090984
## 
## 
## Real Parameter Psi1
##          1
##  0.9842012
## 
## 
## Real Parameter Psi2
##          1
##  0.7954563
## 
## 
## Real Parameter p1
##          1         2         3         4
##  0.8219304 0.8219304 0.8219304 0.8219304
## 
## 
## Real Parameter p2
##          1         2         3         4
##  0.8219304 0.8219304 0.8219304 0.8219304
## 
## 
## Real Parameter Delta
##          1         2         3         4
##  0.3645737 0.3645737 0.3645737 0.3645737
## 
## 
## ***** Starting  ~CV ~CV ~CV ~SHARE ~CV 3 
## 
## Output summary for MSOccupancy model
## Name : Psi1(~CV)Psi2(~CV)p1(~CV)p2()Delta(~CV) 
## 
## Npar :  8
## -2lnL:  364.0208
## AICc :  383.2208
## 
## Beta
##                     estimate         se         lcl        ucl
## Psi1:(Intercept)   1.3355797  0.7184988  -0.0726780  2.7438374
## Psi1:CV           -0.0507153  0.4272957  -0.8882149  0.7867842
## Psi2:(Intercept)  16.9613190 16.5864900 -15.5482030 49.4708410
## Psi2:CV           -8.1589219  7.6673076 -23.1868450  6.8690014
## p1:(Intercept)     1.1470511  0.4285943   0.3070062  1.9870959
## p1:CV              0.1759806  0.2686260  -0.3505265  0.7024877
## Delta:(Intercept) -0.7138861  0.5116714  -1.7167622  0.2889899
## Delta:CV           0.1816607  0.4620385  -0.7239348  1.0872563
## 
## 
## Real Parameter Psi1
##          1
##  0.7791675
## 
## 
## Real Parameter Psi2
##          1
##  0.9928571
## 
## 
## Real Parameter p1
##          1         2         3         4
##  0.8032069 0.8032069 0.8032069 0.8032069
## 
## 
## Real Parameter p2
##          1         2         3         4
##  0.8032069 0.8032069 0.8032069 0.8032069
## 
## 
## Real Parameter Delta
##          1         2         3         4
##  0.3902873 0.3902873 0.3902873 0.3902873
# examine individual model results
model.number <-2

summary(model.fits[[model.number]])
## Output summary for MSOccupancy model
## Name : Psi1(~L)Psi2(~L)p1(~L)p2()Delta(~L) 
## 
## Npar :  8
## -2lnL:  344.7405
## AICc :  363.9404
## 
## Beta
##                     estimate        se        lcl       ucl
## Psi1:(Intercept)   3.7267937 1.4143065  0.9547528 6.4988345
## Psi1:L             3.5339916 1.2414516  1.1007463 5.9672368
## Psi2:(Intercept)   1.4197246 0.9709157 -0.4832701 3.3227193
## Psi2:L            -0.5372995 0.7437800 -1.9951083 0.9205093
## p1:(Intercept)     1.5582875 0.2434489  1.0811278 2.0354473
## p1:L              -0.2513002 0.2129820 -0.6687450 0.1661445
## Delta:(Intercept) -0.5722785 0.3185629 -1.1966619 0.0521048
## Delta:L            0.1457851 0.2874047 -0.4175282 0.7090984
## 
## 
## Real Parameter Psi1
##          1
##  0.9842012
## 
## 
## Real Parameter Psi2
##          1
##  0.7954563
## 
## 
## Real Parameter p1
##          1         2         3         4
##  0.8219304 0.8219304 0.8219304 0.8219304
## 
## 
## Real Parameter p2
##          1         2         3         4
##  0.8219304 0.8219304 0.8219304 0.8219304
## 
## 
## Real Parameter Delta
##          1         2         3         4
##  0.3645737 0.3645737 0.3645737 0.3645737
model.fits[[model.number]]$results$real
##                 estimate        se       lcl       ucl fixed    note
## Psi1 g1 a0 t1  0.9842012 0.0240583 0.7501327 0.9992270              
## Psi2 g1 a0 t1  0.7954563 0.1476975 0.3962598 0.9584071              
## p1 g1 a0 t1    0.8219304 0.0336287 0.7463313 0.8786621              
## Delta g1 a0 t1 0.3645737 0.0711546 0.2391066 0.5116103
model.fits[[model.number]]$results$beta
##                     estimate        se        lcl       ucl
## Psi1:(Intercept)   3.7267937 1.4143065  0.9547528 6.4988345
## Psi1:L             3.5339916 1.2414516  1.1007463 5.9672368
## Psi2:(Intercept)   1.4197246 0.9709157 -0.4832701 3.3227193
## Psi2:L            -0.5372995 0.7437800 -1.9951083 0.9205093
## p1:(Intercept)     1.5582875 0.2434489  1.0811278 2.0354473
## p1:L              -0.2513002 0.2129820 -0.6687450 0.1661445
## Delta:(Intercept) -0.5722785 0.3185629 -1.1966619 0.0521048
## Delta:L            0.1457851 0.2874047 -0.4175282 0.7090984
model.fits[[model.number]]$results$derived
## $`psi1*psi2`
##   estimate        se       lcl       ucl
## 1 0.782889 0.1466183 0.3993674 0.9513516
get.real(model.fits[[model.number]], "Psi1", se=TRUE)
##               all.diff.index par.index  estimate        se       lcl
## Psi1 g1 a0 t1              1         1 0.9842012 0.0240583 0.7501327
##                    ucl fixed    note group age time Age Time
## Psi1 g1 a0 t1 0.999227                   1   0    1   0    0
get.real(model.fits[[model.number]], "Psi2", se=TRUE)
##               all.diff.index par.index  estimate        se       lcl
## Psi2 g1 a0 t1              2         2 0.7954563 0.1476975 0.3962598
##                     ucl fixed    note group age time Age Time
## Psi2 g1 a0 t1 0.9584071                   1   0    1   0    0
get.real(model.fits[[model.number]], "p1",    se=TRUE)
##             all.diff.index par.index  estimate        se       lcl
## p1 g1 a0 t1              3         3 0.8219304 0.0336287 0.7463313
## p1 g1 a1 t2              4         3 0.8219304 0.0336287 0.7463313
## p1 g1 a2 t3              5         3 0.8219304 0.0336287 0.7463313
## p1 g1 a3 t4              6         3 0.8219304 0.0336287 0.7463313
##                   ucl fixed    note group age time Age Time
## p1 g1 a0 t1 0.8786621                   1   0    1   0    0
## p1 g1 a1 t2 0.8786621                   1   1    2   1    1
## p1 g1 a2 t3 0.8786621                   1   2    3   2    2
## p1 g1 a3 t4 0.8786621                   1   3    4   3    3
get.real(model.fits[[model.number]], "p2",    se=TRUE)
##             all.diff.index par.index  estimate        se       lcl
## p2 g1 a0 t1              7         3 0.8219304 0.0336287 0.7463313
## p2 g1 a1 t2              8         3 0.8219304 0.0336287 0.7463313
## p2 g1 a2 t3              9         3 0.8219304 0.0336287 0.7463313
## p2 g1 a3 t4             10         3 0.8219304 0.0336287 0.7463313
##                   ucl fixed    note group age time Age Time
## p2 g1 a0 t1 0.8786621                   1   0    1   0    0
## p2 g1 a1 t2 0.8786621                   1   1    2   1    1
## p2 g1 a2 t3 0.8786621                   1   2    3   2    2
## p2 g1 a3 t4 0.8786621                   1   3    4   3    3
get.real(model.fits[[model.number]], "Delta", se=TRUE)
##                all.diff.index par.index  estimate        se       lcl
## Delta g1 a0 t1             11         4 0.3645737 0.0711546 0.2391066
## Delta g1 a1 t2             12         4 0.3645737 0.0711546 0.2391066
## Delta g1 a2 t3             13         4 0.3645737 0.0711546 0.2391066
## Delta g1 a3 t4             14         4 0.3645737 0.0711546 0.2391066
##                      ucl fixed    note group age time Age Time
## Delta g1 a0 t1 0.5116103                   1   0    1   0    0
## Delta g1 a1 t2 0.5116103                   1   1    2   1    1
## Delta g1 a2 t3 0.5116103                   1   2    3   2    2
## Delta g1 a3 t4 0.5116103                   1   3    4   3    3
# collect models and make AICc table

model.set <- RMark::collect.models( type="MSOccupancy")
model.set
##                                     model npar     AICc DeltaAICc
## 2     Psi1(~L)Psi2(~L)p1(~L)p2()Delta(~L)    8 363.9404   0.00000
## 3 Psi1(~CV)Psi2(~CV)p1(~CV)p2()Delta(~CV)    8 383.2208  19.28032
## 1     Psi1(~1)Psi2(~1)p1(~1)p2()Delta(~1)    4 390.0119  26.07146
##         weight  Deviance
## 2 9.999328e-01 344.74045
## 3 6.505827e-05 364.02077
## 1 2.180851e-06  65.84174
# No need to do model averaging as nearly 100% of AIC weight is in one model

cleanup(ask=FALSE)