# Nest survival model using JAGS 
# Fit a nest level categorical variable (habitat) plus DBH 
# with a random effect of Year (rather than the fixed effect used in the paper)

# Analysis of Shelly redstart data using a Bayesian model

# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536

# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Data from: Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870

#
# 2019-06-28 CHJS First Edition
#

library("R2jags")  # used for call to JAGS
## Loading required package: rjags
## Loading required package: coda
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
## 
## Attaching package: 'R2jags'
## The following object is masked from 'package:coda':
## 
##     traceplot
library(coda)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(reshape2)

options(width=200)

source(file.path("..","..","jags-nest-survival-random-effects.r"))


# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

nestdata <- readxl::read_excel(file.path("..","Sherry.xlsx"), 
                              sheet="NestData")
head(nestdata)
## # A tibble: 6 x 11
##   NestId       FirstFound LastPresent LastChecked  Fate  Freq Height BaffleStatus   DBH AgeDay1 Year 
##   <chr>             <dbl>       <dbl>       <dbl> <dbl> <dbl>  <dbl> <chr>        <dbl>   <dbl> <chr>
## 1 /*1983-175*/         11          11          18     1     1   16.9 N             39        -8 1983 
## 2 /*1983-176*/         13          22          23     1     1   10.5 N             10.7     -10 1983 
## 3 /*1983-178*/          7          26          26     0     1    4.8 N              8.8      -4 1983 
## 4 /*1983-180*/         12          14          20     1     1    3.7 N              5.1      -9 1983 
## 5 /*1983-181*/          6          20          25     1     1   10.7 N             15.6      -3 1983 
## 6 /*1983-182*/         11          19          20     1     1   17.8 N             46.2      -8 1983
# as noted in the paper, we remove nests from years with no baffeling
dim(nestdata)
## [1] 537  11
nestdata <- nestdata[ !(nestdata$BaffleStatus=="N" & nestdata$Year %in% c(1983, 1984, 1990)),]
dim(nestdata)
## [1] 414  11
# create factor variable for year because it numeric
nestdata$YearF <- factor(nestdata$Year)


# Unfortunately, JAGS cannot deal with alpha numeric code and 
# so we need to convert the alphanumberic NestID to numeric codes
# by declaring NestId as a factor and extracting the level values
nestdata$NestId.num <- as.numeric(factor(nestdata$NestId))

# We must create a file with every combination of next x day nest was "active"
# being every day from FirstCound to LastChecked-1

nesttime <- plyr::adply(nestdata, 1, function(x){
     nesttime <- expand.grid(NestId.num=x$NestId.num, 
                             Day=x$FirstFound:(x$LastChecked-1),
                             Survive=1-x$Fate,
                             stringsAsFactors=FALSE)
     nesttime
})


# Extract the nest level covariates (including AgeNest1)
# The next level covariates should be indexed using NestId
# If AgeNest1 variable is present then the age of the nest is computed
#
nest.covariates <- NULL

if( !is.null(nest.covariates)){
   nesttime <- merge(nesttime, nest.covariates, by="NestId")
}


# Extract any survey time covariates such as time, time^2, early/late
# weather covariates ect.
# All of these covariates will affect all nests simultaneouls

# none added here

# if there is a AgeDay1 variable, we compute the nest age for each time for each nest
if( !is.null(nesttime$AgeDay1)){
   nesttime$NestAge <- nesttime$AgeDay1 + nesttime$Day -1
}
head(nesttime)
##         NestId FirstFound LastPresent LastChecked Fate Freq Height BaffleStatus DBH AgeDay1 Year YearF NestId.num Day Survive NestAge
## 1 /*1985-434*/          7          27          27    0    1    2.6            N 3.3      -4 1985  1985          1   7       1       2
## 2 /*1985-434*/          7          27          27    0    1    2.6            N 3.3      -4 1985  1985          1   8       1       3
## 3 /*1985-434*/          7          27          27    0    1    2.6            N 3.3      -4 1985  1985          1   9       1       4
## 4 /*1985-434*/          7          27          27    0    1    2.6            N 3.3      -4 1985  1985          1  10       1       5
## 5 /*1985-434*/          7          27          27    0    1    2.6            N 3.3      -4 1985  1985          1  11       1       6
## 6 /*1985-434*/          7          27          27    0    1    2.6            N 3.3      -4 1985  1985          1  12       1       7
# Add any next x day survey covariates to the nesttime data
#

# there is nothing here for this example


# Set up the design matrix for the fixed effects
fe.design <- model.matrix(  ~ DBH+BaffleStatus, data=nesttime)

head(fe.design)
##   (Intercept) DBH BaffleStatusY
## 1           1 3.3             0
## 2           1 3.3             0
## 3           1 3.3             0
## 4           1 3.3             0
## 5           1 3.3             0
## 6           1 3.3             0
# Set up the design matrix for the random effects.
# Use a "no interept" model so that each random effect is represented by a separate column
re.z1.design <- model.matrix(  ~ -1 + YearF, data=nesttime)
head(re.z1.design)
##   YearF1985 YearF1986 YearF1987 YearF1988 YearF1989 YearF1991 YearF1992 YearF1993 YearF1994 YearF1995
## 1         1         0         0         0         0         0         0         0         0         0
## 2         1         0         0         0         0         0         0         0         0         0
## 3         1         0         0         0         0         0         0         0         0         0
## 4         1         0         0         0         0         0         0         0         0         0
## 5         1         0         0         0         0         0         0         0         0         0
## 6         1         0         0         0         0         0         0         0         0         0
# load and run the JAGS model
fitted.model <- jags.nest.survival.random.effects(
         nestdata=nestdata,    # nest data
         nesttime=nesttime,    # daily nest values with nest, time, nest x time covariates
         fe.design=fe.design,  # fixed effects design matrix
         re.z1.design=re.z1.design, # random effects design matrix
         init.seed=12321312)   # initial seed)  
## module glm loaded
## Compiling data graph
##    Declaring variables
##    Resolving undeclared variables
##    Allocating nodes
##    Initializing
##    Reading data back into data table
## Compiling model graph
##    Declaring variables
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 561
##    Unobserved stochastic nodes: 14
##    Total graph size: 93122
## 
## Initializing model
# the nesttime dataframe has the estimated DSR for every combination of NestId.num and Day

# the results list has lots of other stuff
results <- fitted.model$results



results$BUGSoutput$summary[ grepl("z1",   rownames(results$BUGSoutput$summary)),]
##               mean        sd        2.5%         25%         50%         75%       97.5%     Rhat n.eff
## z1[1]  -0.05321827 0.2988639 -0.64201570 -0.24087651 -0.04981226  0.13315340  0.54784523 1.003497   710
## z1[2]   0.48193093 0.3112683 -0.08212826  0.26643843  0.46476715  0.68399231  1.13236493 1.002190  1300
## z1[3]   0.22595224 0.3198712 -0.36739311  0.01093077  0.21013011  0.42489638  0.90086581 1.003241   780
## z1[4]   0.57114801 0.3433482 -0.04857496  0.33723081  0.55222831  0.77964513  1.31255283 1.001036  4500
## z1[5]  -0.11840080 0.3046760 -0.72531147 -0.32376259 -0.12086071  0.07945038  0.48015342 1.005816   390
## z1[6]   0.40121495 0.3042617 -0.14926656  0.19794902  0.38788160  0.58858689  1.04807151 1.001532  2300
## z1[7]  -0.41081971 0.2507078 -0.89910034 -0.57071742 -0.41331655 -0.24653289  0.07270387 1.007004   360
## z1[8]  -0.77958817 0.2621312 -1.30315051 -0.94932942 -0.76960969 -0.60749685 -0.27436544 1.010133   230
## z1[9]   0.03251032 0.3315956 -0.60311661 -0.18940301  0.02711324  0.24177884  0.70593527 1.005536   460
## z1[10] -0.27344906 0.3297544 -0.94547830 -0.49558671 -0.26331284 -0.05768986  0.35226694 1.004876   480
## z1sd    0.56365417 0.1887773  0.29194542  0.43097935  0.53247987  0.66188639  1.01939431 1.005547   420
results$BUGSoutput$summary[ grepl("beta", rownames(results$BUGSoutput$summary)),]
##               mean          sd       2.5%        25%        50%        75%      97.5%     Rhat n.eff
## beta[1] 3.00845955 0.223688663 2.55834907 2.87186432 3.01026594 3.14776804 3.45432573 1.006797   430
## beta[2] 0.02235649 0.006142708 0.01082913 0.01812101 0.02213662 0.02657579 0.03449535 1.003934   610
## beta[3] 0.98876294 0.252989056 0.50771275 0.81538741 0.98125347 1.15715254 1.49677290 1.001501  2400
# the nesttime dataframe has the estimated DSR for every combination of NestId.num and Day
# evaluated at the covariates for that nest and time

# This is problematic as DBH will vary among the nests.
# We want the DSR at the mean DBH for each baffle status x year combination
# Consequently, we need to create a design matrix for every baffle status x year and
# the mean DBH and then multiply by the estimated betas 

pred.df <- unique(fitted.model$nesttime[, c("BaffleStatus","YearF")])
pred.df$DBH <- mean(nestdata$DBH)

# Notice that the model must MATCH exactly in the order of terms etc as used to generate
# the fe.design matrix seen earlier
pred.fe.design <- model.matrix( ~ DBH+BaffleStatus, data=pred.df)
pred.re.design <- model.matrix( ~ -1 +YearF, data=pred.df) # notice no intercept for random effects
head(fe.design)
##   (Intercept) DBH BaffleStatusY
## 1           1 3.3             0
## 2           1 3.3             0
## 3           1 3.3             0
## 4           1 3.3             0
## 5           1 3.3             0
## 6           1 3.3             0
head(pred.fe.design)
##     (Intercept)      DBH BaffleStatusY
## 1             1 16.12215             0
## 21            1 16.12215             1
## 43            1 16.12215             0
## 108           1 16.12215             1
## 225           1 16.12215             0
## 297           1 16.12215             1
head(re.z1.design)
##   YearF1985 YearF1986 YearF1987 YearF1988 YearF1989 YearF1991 YearF1992 YearF1993 YearF1994 YearF1995
## 1         1         0         0         0         0         0         0         0         0         0
## 2         1         0         0         0         0         0         0         0         0         0
## 3         1         0         0         0         0         0         0         0         0         0
## 4         1         0         0         0         0         0         0         0         0         0
## 5         1         0         0         0         0         0         0         0         0         0
## 6         1         0         0         0         0         0         0         0         0         0
head(pred.re.design)
##     YearF1985 YearF1986 YearF1987 YearF1988 YearF1989 YearF1991 YearF1992 YearF1993 YearF1994 YearF1995
## 1           1         0         0         0         0         0         0         0         0         0
## 21          1         0         0         0         0         0         0         0         0         0
## 43          0         0         1         0         0         0         0         0         0         0
## 108         0         0         1         0         0         0         0         0         0         0
## 225         0         0         0         1         0         0         0         0         0         0
## 297         0         0         0         1         0         0         0         0         0         0
# extract the posterior sample of the beta as a matrix
# the results$BUGSoutput$sims.matrix is a 2-d object [iterations, variables] with chains stacked
# on top of each other
dim(  results$BUGSoutput$sims.matrix)
## [1] 4500 5238
select <- grepl("^beta", colnames(results$BUGSoutput$sims.matrix))
colnames(results$BUGSoutput$sims.matrix)[select]
## [1] "beta[1]" "beta[2]" "beta[3]"
beta.matrix <- results$BUGSoutput$sims.matrix[,select]
dim(beta.matrix)
## [1] 4500    3
# extract the posterior sample of the z1 values as a matrix in a similar fashion
dim(  results$BUGSoutput$sims.matrix)
## [1] 4500 5238
select <- grepl("z1[", colnames(results$BUGSoutput$sims.matrix), fixed=TRUE)
colnames(results$BUGSoutput$sims.matrix)[select]
##  [1] "z1[1]"  "z1[2]"  "z1[3]"  "z1[4]"  "z1[5]"  "z1[6]"  "z1[7]"  "z1[8]"  "z1[9]"  "z1[10]"
z1.matrix <- results$BUGSoutput$sims.matrix[,select]
dim(z1.matrix)
## [1] 4500   10
# create a violin plot of the z1 values
plotdata <- reshape2::melt(z1.matrix, value.name="z.value")
head(plotdata)
##   Var1  Var2    z.value
## 1    1 z1[1] -0.4961646
## 2    2 z1[1] -0.1777880
## 3    3 z1[1]  0.3940033
## 4    4 z1[1]  0.3838399
## 5    5 z1[1]  0.1870228
## 6    6 z1[1] -0.1940508
ggplot(data=plotdata, aes(x=Var2, y=z.value))+
  ggtitle("Violin plot of estimated (random) year effects")+
  geom_violin()+
  geom_hline(yintercept=0)

# create posterior sample of DSR (first on the logit scale) and then back transformed
DSR <- beta.matrix %*% t(pred.fe.design) + z1.matrix %*% t(pred.re.design)
DSR <- 1/(1+exp(-DSR))
dim(DSR)
## [1] 4500   20
pred.df$mean <- apply(DSR,2,mean)
pred.df$sd   <- apply(DSR,2,sd)
pred.df$lcl  <- apply(DSR,2, quantile, prob=.025)
pred.df$ucl  <- apply(DSR,2,  quantile, prob=.975)

# now we are ready to plot
pred.df$Year <- as.numeric(as.character(pred.df$YearF))
# in this case, we fit a S~Habitat model, so all rows of nesttime have the same estimated DSR

ggplot(data=pred.df, aes(x=Year, y=mean, color=BaffleStatus))+
  ggtitle("Estimated DSR by Baffle Status and Year",
          subtitle="Evaluated at the mean DBH and year as a random effect")+
  geom_line( position=position_dodge(w=0.2))+
  geom_errorbar(aes(ymin=lcl, ymax=ucl), width=0.1, position=position_dodge(w=0.2))+
  ylab("DSR (95% ci)")+
  theme(legend.position=c(0,0),legend.justification=c(0,0))