# Analysis of Shelly redstart data using the proportional hazard models
# 2019-05-01 CJS Initial code

# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536

# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Data from: Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870

library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)
library(survival)

source(file.path("..","..","logistic-exposure-model.R"))

# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"), 
                               sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
##   NestId FirstFound LastPresent LastChecked  Fate  Freq Height BaffleStatus
##   <chr>       <dbl>       <dbl>       <dbl> <dbl> <dbl>  <dbl> <chr>       
## 1 /*198…         11          11          18     1     1   16.9 N           
## 2 /*198…         13          22          23     1     1   10.5 N           
## 3 /*198…          7          26          26     0     1    4.8 N           
## 4 /*198…         12          14          20     1     1    3.7 N           
## 5 /*198…          6          20          25     1     1   10.7 N           
## 6 /*198…         11          19          20     1     1   17.8 N           
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
# We expand the data to generate the effective sample size
reddata2 <- expand.nest.data.ph(reddata)
head(reddata2)
##         NestId FirstFound LastPresent LastChecked Fate Freq Height
## 1 /*1983-175*/         11          11          18    1    1   16.9
## 2 /*1983-176*/         13          22          23    1    1   10.5
## 3 /*1983-176*/         13          22          23    1    1   10.5
## 4 /*1983-176*/         13          22          23    1    1   10.5
## 5 /*1983-176*/         13          22          23    1    1   10.5
## 6 /*1983-176*/         13          22          23    1    1   10.5
##   BaffleStatus  DBH AgeDay1 Year Fail Start End NestAge     Surv
## 1            N 39.0      -8 1983    1    11  18       2  (11,18]
## 2            N 10.7     -10 1983    0    13  14       2 (13,14+]
## 3            N 10.7     -10 1983    0    14  15       3 (14,15+]
## 4            N 10.7     -10 1983    0    15  16       4 (15,16+]
## 5            N 10.7     -10 1983    0    16  17       5 (16,17+]
## 6            N 10.7     -10 1983    0    17  18       6 (17,18+]
rm(reddata)


# as noted in the paper, we remove nests from years with no baffeling
dim(reddata2)
## [1] 6492   16
reddata2 <- reddata2[ !(reddata2$BaffleStatus=="N" & reddata2$Year %in% c(1983, 1984, 1990)),]
dim(reddata2)
## [1] 4977   16
# create factor variable for year
reddata2$YearF <- factor(reddata2$Year)

# create factor for baffle
reddata2$BaffleStatus <- factor(reddata2$BaffleStatus)
#  Set up the set of model to fit
model.list.csv <- textConnection(
" S
 ~DBH
 ~DBH+YearF
 ~DBH+YearF+BaffleStatus
")

model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
##                         S model.number
## 1                    ~DBH            1
## 2              ~DBH+YearF            2
## 3 ~DBH+YearF+BaffleStatus            3
model.fits <- plyr::dlply(model.list, c("model.number","S"), function(x,input.data, input.ddl){
  cat("\n\n***** Starting ", unlist(x), "\n")
  
  fit <- coxph(formula=as.formula(paste("Surv", eval(x$S))),data=input.data)
  fit
  
},input.data=reddata2)
## 
## 
## ***** Starting  ~DBH 1 
## 
## 
## ***** Starting  ~DBH+YearF 2 
## 
## 
## ***** Starting  ~DBH+YearF+BaffleStatus 3
# examine results for a particular model
model.number <-3
names(model.fits[[model.number]])
##  [1] "coefficients"      "var"               "loglik"           
##  [4] "score"             "iter"              "linear.predictors"
##  [7] "residuals"         "means"             "first"            
## [10] "info"              "method"            "n"                
## [13] "nevent"            "terms"             "assign"           
## [16] "wald.test"         "concordance"       "y"                
## [19] "formula"           "xlevels"           "contrasts"        
## [22] "call"
model.fits[[model.number]]$formula
## Surv ~ DBH + YearF + BaffleStatus
## <environment: 0x7fe756d3c720>
summary(model.fits[[model.number]])
## Call:
## coxph(formula = as.formula(paste("Surv", eval(x$S))), data = input.data)
## 
##   n= 4977, number of events= 167 
## 
##                    coef exp(coef)  se(coef)      z Pr(>|z|)    
## DBH           -0.022067  0.978175  0.006133 -3.598 0.000321 ***
## YearF1986     -0.564970  0.568377  0.406094 -1.391 0.164155    
## YearF1987     -0.254334  0.775432  0.417990 -0.608 0.542876    
## YearF1988     -0.899607  0.406730  0.466222 -1.930 0.053660 .  
## YearF1989      0.106887  1.112808  0.377127  0.283 0.776852    
## YearF1991     -0.428874  0.651242  0.387547 -1.107 0.268451    
## YearF1992      0.357927  1.430361  0.315109  1.136 0.256006    
## YearF1993      0.827784  2.288242  0.316193  2.618 0.008845 ** 
## YearF1994     -0.129945  0.878144  0.428593 -0.303 0.761745    
## YearF1995      0.248231  1.281756  0.416935  0.595 0.551596    
## BaffleStatusY -0.915535  0.400302  0.249458 -3.670 0.000242 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## DBH              0.9782     1.0223    0.9665    0.9900
## YearF1986        0.5684     1.7594    0.2564    1.2598
## YearF1987        0.7754     1.2896    0.3418    1.7593
## YearF1988        0.4067     2.4586    0.1631    1.0143
## YearF1989        1.1128     0.8986    0.5314    2.3304
## YearF1991        0.6512     1.5355    0.3047    1.3919
## YearF1992        1.4304     0.6991    0.7713    2.6526
## YearF1993        2.2882     0.4370    1.2313    4.2525
## YearF1994        0.8781     1.1388    0.3791    2.0342
## YearF1995        1.2818     0.7802    0.5661    2.9020
## BaffleStatusY    0.4003     2.4981    0.2455    0.6527
## 
## Concordance= 0.69  (se = 0.021 )
## Likelihood ratio test= 70.21  on 11 df,   p=1e-10
## Wald test            = 64.23  on 11 df,   p=2e-09
## Score (logrank) test = 69.12  on 11 df,   p=2e-10
# Model comparision and averaging
# collect models and make AICc table
AICcmodavg::aictab(model.fits)
## 
## Model selection based on AICc:
## 
##                            K    AICc Delta_AICc AICcWt Cum.Wt      LL
## 3.~DBH+YearF+BaffleStatus 11 1566.57       0.00      1      1 -772.26
## 2.~DBH+YearF              10 1581.10      14.54      0      1 -780.53
## 1.~DBH                     1 1602.46      35.89      0      1 -800.23
# look at impact of baffle status (averaged over all years)
fit.emmo <- emmeans::emmeans(model.fits[[3]], ~BaffleStatus)
pairs(fit.emmo)
##  contrast estimate    SE df z.ratio p.value
##  N - Y       0.916 0.249 NA 3.670   0.0002 
## 
## Results are averaged over the levels of: YearF 
## Results are given on the log (not the response) scale.
summary(pairs(fit.emmo, type="response"), infer=TRUE)
##  contrast ratio    SE df asymp.LCL asymp.UCL z.ratio p.value
##  N / Y      2.5 0.623 NA      1.53      4.07 3.670   0.0002 
## 
## Results are averaged over the levels of: YearF 
## Confidence level used: 0.95 
## Intervals are back-transformed from the log scale 
## Tests are performed on the log scale