# Analysis of Redstart data illustrating Cox PH models
# Reproduce Table 1b.

# 2019-05-01 CJS Initial code

# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536

# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Data from: Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870


library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
source(file.path("..","..","CoxPH-additional-functions.R"))


# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"), 
                               sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
##   NestId FirstFound LastPresent LastChecked  Fate  Freq Height BaffleStatus
##   <chr>       <dbl>       <dbl>       <dbl> <dbl> <dbl>  <dbl> <chr>       
## 1 /*198…         11          11          18     1     1   16.9 N           
## 2 /*198…         13          22          23     1     1   10.5 N           
## 3 /*198…          7          26          26     0     1    4.8 N           
## 4 /*198…         12          14          20     1     1    3.7 N           
## 5 /*198…          6          20          25     1     1   10.7 N           
## 6 /*198…         11          19          20     1     1   17.8 N           
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
reddata <- as.data.frame(reddata)


# get the yearly date and merge with the nest data
annual.data <- readxl::read_excel(file.path("..","Sherry.xlsx"),
                                  sheet="AnnualCovariates", skip=9)
head(annual.data)
## # A tibble: 6 x 7
##    Year Density Predators MayTemp JuneTemp MayRain JuneRain
##   <dbl>   <dbl>     <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1  1983   0.867       2.4    8.89     17.1   194.      35.3
## 2  1984   0.467       0.5    9.06     16.2   312.     180. 
## 3  1985   0.589       3.4   10.6      13.2   107.     130  
## 4  1986   0.439       6     12.2      13.4   128.      89.6
## 5  1987   0.372       1.7   10.9      15.4    91.6    200. 
## 6  1988   0.361       0.3   12        14.4    97.8     70.2
dim(reddata)
## [1] 537  11
reddata <- merge(reddata, annual.data, all.x=TRUE)
dim(reddata)
## [1] 537  17
# any missing data?
sum(!complete.cases(reddata))
## [1] 0
# as noted in the paper, we remove nests with baffeling
dim(reddata)
## [1] 537  17
reddata <- reddata[ !(reddata$BaffleStatus=="Y"),]
dim(reddata)
## [1] 466  17
# expand the data
reddata2 <- expand.nest.data.ph(reddata)
rm(reddata)

# create factor variable for year
reddata2$YearF <- factor(reddata2$Year)
# Set up the set of model to fit
model.list.csv <- textConnection(
  " S
  ~DBH
  ~DBH+Predators+ MayTemp                   +JuneRain  +NestAge
  ~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge
  ~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge
  ~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density
  ~DBH+Predators+ MayTemp                   +JuneRain  
  ~DBH+Predators+ MayTemp          +MayRain            +NestAge 
  ~DBH+Predators+ MayTemp                              +NestAge 
  ~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density
  ")

model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
##                                                                         S
## 1                                                                    ~DBH
## 2           ~DBH+Predators+ MayTemp                   +JuneRain  +NestAge
## 3           ~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge
## 4           ~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge
## 5 ~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density
## 6                     ~DBH+Predators+ MayTemp                   +JuneRain
## 7           ~DBH+Predators+ MayTemp          +MayRain            +NestAge
## 8           ~DBH+Predators+ MayTemp                              +NestAge
## 9 ~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density
##   model.number
## 1            1
## 2            2
## 3            3
## 4            4
## 5            5
## 6            6
## 7            7
## 8            8
## 9            9
model.fits <- plyr::dlply(model.list, c("model.number","S"), function(x,input.data, input.ddl){
  cat("\n\n***** Starting ", unlist(x), "\n")
  
  fit <- coxph(formula=as.formula(paste("Surv", eval(x$S))),
            data=input.data)
  fit
  
},input.data=reddata2)
## 
## 
## ***** Starting  ~DBH 1 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp                   +JuneRain  +NestAge 2 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge 3 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge 4 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density 5 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp                   +JuneRain 6 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp          +MayRain            +NestAge 7 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp                              +NestAge 8 
## 
## 
## ***** Starting  ~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density 9
# examine individual model results
model.number <-1
summary(model.fits[[model.number]])
## Call:
## coxph(formula = as.formula(paste("Surv", eval(x$S))), data = input.data)
## 
##   n= 5374, number of events= 208 
## 
##          coef exp(coef)  se(coef)     z Pr(>|z|)    
## DBH -0.022144  0.978100  0.005427 -4.08  4.5e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## DBH    0.9781      1.022    0.9678    0.9886
## 
## Concordance= 0.568  (se = 0.019 )
## Likelihood ratio test= 19.1  on 1 df,   p=1e-05
## Wald test            = 16.65  on 1 df,   p=4e-05
## Score (logrank) test = 17.11  on 1 df,   p=4e-05
# Model comparision and averaging
# collect models and make AICc table
AICcmodavg::aictab(model.fits)
## 
## Model selection based on AICc:
## 
##                                                                           K
## 2.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge           5
## 6.~DBH+Predators+ MayTemp                   +JuneRain                     4
## 3.~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge           6
## 4.~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge           6
## 5.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density 6
## 7.~DBH+Predators+ MayTemp          +MayRain            +NestAge           5
## 8.~DBH+Predators+ MayTemp                              +NestAge           4
## 9.~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density 6
## 1.~DBH                                                                    1
##                                                                              AICc
## 2.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge           2010.66
## 6.~DBH+Predators+ MayTemp                   +JuneRain                     2010.75
## 3.~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge           2011.49
## 4.~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge           2012.33
## 5.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density 2012.48
## 7.~DBH+Predators+ MayTemp          +MayRain            +NestAge           2013.88
## 8.~DBH+Predators+ MayTemp                              +NestAge           2014.20
## 9.~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density 2015.19
## 1.~DBH                                                                    2026.34
##                                                                           Delta_AICc
## 2.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge                 0.00
## 6.~DBH+Predators+ MayTemp                   +JuneRain                           0.10
## 3.~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge                 0.83
## 4.~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge                 1.67
## 5.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density       1.82
## 7.~DBH+Predators+ MayTemp          +MayRain            +NestAge                 3.22
## 8.~DBH+Predators+ MayTemp                              +NestAge                 3.54
## 9.~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density       4.53
## 1.~DBH                                                                         15.69
##                                                                           AICcWt
## 2.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge             0.25
## 6.~DBH+Predators+ MayTemp                   +JuneRain                       0.24
## 3.~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge             0.17
## 4.~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge             0.11
## 5.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density   0.10
## 7.~DBH+Predators+ MayTemp          +MayRain            +NestAge             0.05
## 8.~DBH+Predators+ MayTemp                              +NestAge             0.04
## 9.~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density   0.03
## 1.~DBH                                                                      0.00
##                                                                           Cum.Wt
## 2.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge             0.25
## 6.~DBH+Predators+ MayTemp                   +JuneRain                       0.50
## 3.~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge             0.67
## 4.~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge             0.78
## 5.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density   0.88
## 7.~DBH+Predators+ MayTemp          +MayRain            +NestAge             0.93
## 8.~DBH+Predators+ MayTemp                              +NestAge             0.97
## 9.~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density   1.00
## 1.~DBH                                                                      1.00
##                                                                                 LL
## 2.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge           -1000.32
## 6.~DBH+Predators+ MayTemp                   +JuneRain                     -1001.37
## 3.~DBH+Predators+ MayTemp          +MayRain +JuneRain  +NestAge            -999.73
## 4.~DBH+Predators+ MayTemp+JuneTemp          +JuneRain  +NestAge           -1000.16
## 5.~DBH+Predators+ MayTemp                   +JuneRain  +NestAge + Density -1000.23
## 7.~DBH+Predators+ MayTemp          +MayRain            +NestAge           -1001.93
## 8.~DBH+Predators+ MayTemp                              +NestAge           -1003.10
## 9.~DBH+Predators+ MayTemp          +MayRain            +NestAge + Density -1001.59
## 1.~DBH                                                                    -1012.17
# Estimates of beta from the top model
fit.best <- model.fits[[2]]
summary(fit.best)
## Call:
## coxph(formula = as.formula(paste("Surv", eval(x$S))), data = input.data)
## 
##   n= 5374, number of events= 208 
## 
##                coef exp(coef)  se(coef)      z Pr(>|z|)    
## DBH       -0.021668  0.978565  0.005399 -4.013 5.99e-05 ***
## Predators  0.080875  1.084236  0.031459  2.571  0.01015 *  
## MayTemp   -0.186446  0.829904  0.063393 -2.941  0.00327 ** 
## JuneRain  -0.003219  0.996786  0.001353 -2.380  0.01732 *  
## NestAge   -0.019749  0.980445  0.013571 -1.455  0.14560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## DBH          0.9786     1.0219    0.9683    0.9890
## Predators    1.0842     0.9223    1.0194    1.1532
## MayTemp      0.8299     1.2050    0.7329    0.9397
## JuneRain     0.9968     1.0032    0.9941    0.9994
## NestAge      0.9804     1.0199    0.9547    1.0069
## 
## Concordance= 0.645  (se = 0.021 )
## Likelihood ratio test= 42.8  on 5 df,   p=4e-08
## Wald test            = 40.8  on 5 df,   p=1e-07
## Score (logrank) test = 41.89  on 5 df,   p=6e-08
# baseline cumulative hazard
cumhaz <- basehaz(fit.best)
# estimate change in cumulative hazard and plot
cumhaz$deltaHaz <- c(NA,diff(cumhaz$hazard))
basehaz.plot <- ggplot(data=cumhaz, aes(x=time, y=deltaHaz))+
  ggtitle("Estimated baseline hazard function over time")+
  geom_point()+
  geom_smooth(se=FALSE)
basehaz.plot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(basehaz.plot,
    file=file.path("..","..","..","..","MyStuff","Images","sherry-ph-basehazard.png"), h=4, w=6, units="in", dpi=300)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).
# test the assumption of proportionality
# look for approximate parallelism of the curveys
cox.zph(fit.best)
##               rho chisq     p
## DBH       -0.0604 0.876 0.349
## Predators  0.0291 0.164 0.686
## MayTemp   -0.0764 1.099 0.294
## JuneRain   0.0657 0.841 0.359
## NestAge    0.0968 1.768 0.184
## GLOBAL         NA 5.173 0.395
ggcoxzph(cox.zph(fit.best))

png(file=file.path("..","..","..","..","MyStuff","Images","sherry-ph-best-testprop.png"), h=4, w=6, units="in", res=300)
ggcoxzph(cox.zph(fit.best))
dev.off()
## quartz_off_screen 
##                 2