# Analysis of Redstart data illustrating Cox PH models
# Reproduce Table 1b.
# 2019-05-01 CJS Initial code
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015)
# Impacts of nest predators and weather on reproductive success and
# population limitation in a long-distance migratory songbird.
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536
# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015)
# Data from: Impacts of nest predators and weather on reproductive success and
# population limitation in a long-distance migratory songbird.
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870
library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
source(file.path("..","..","CoxPH-additional-functions.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"),
sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
## NestId FirstFound LastPresent LastChecked Fate Freq Height BaffleStatus
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 /*198… 11 11 18 1 1 16.9 N
## 2 /*198… 13 22 23 1 1 10.5 N
## 3 /*198… 7 26 26 0 1 4.8 N
## 4 /*198… 12 14 20 1 1 3.7 N
## 5 /*198… 6 20 25 1 1 10.7 N
## 6 /*198… 11 19 20 1 1 17.8 N
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
reddata <- as.data.frame(reddata)
# get the yearly date and merge with the nest data
annual.data <- readxl::read_excel(file.path("..","Sherry.xlsx"),
sheet="AnnualCovariates", skip=9)
head(annual.data)
## # A tibble: 6 x 7
## Year Density Predators MayTemp JuneTemp MayRain JuneRain
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1983 0.867 2.4 8.89 17.1 194. 35.3
## 2 1984 0.467 0.5 9.06 16.2 312. 180.
## 3 1985 0.589 3.4 10.6 13.2 107. 130
## 4 1986 0.439 6 12.2 13.4 128. 89.6
## 5 1987 0.372 1.7 10.9 15.4 91.6 200.
## 6 1988 0.361 0.3 12 14.4 97.8 70.2
dim(reddata)
## [1] 537 11
reddata <- merge(reddata, annual.data, all.x=TRUE)
dim(reddata)
## [1] 537 17
# any missing data?
sum(!complete.cases(reddata))
## [1] 0
# as noted in the paper, we remove nests with baffeling
dim(reddata)
## [1] 537 17
reddata <- reddata[ !(reddata$BaffleStatus=="Y"),]
dim(reddata)
## [1] 466 17
# expand the data
reddata2 <- expand.nest.data.ph(reddata)
rm(reddata)
# create factor variable for year
reddata2$YearF <- factor(reddata2$Year)
# Set up the set of model to fit
model.list.csv <- textConnection(
" S
~DBH
~DBH+Predators+ MayTemp +JuneRain +NestAge
~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge
~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge
~DBH+Predators+ MayTemp +JuneRain +NestAge + Density
~DBH+Predators+ MayTemp +JuneRain
~DBH+Predators+ MayTemp +MayRain +NestAge
~DBH+Predators+ MayTemp +NestAge
~DBH+Predators+ MayTemp +MayRain +NestAge + Density
")
model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
## S
## 1 ~DBH
## 2 ~DBH+Predators+ MayTemp +JuneRain +NestAge
## 3 ~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge
## 4 ~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge
## 5 ~DBH+Predators+ MayTemp +JuneRain +NestAge + Density
## 6 ~DBH+Predators+ MayTemp +JuneRain
## 7 ~DBH+Predators+ MayTemp +MayRain +NestAge
## 8 ~DBH+Predators+ MayTemp +NestAge
## 9 ~DBH+Predators+ MayTemp +MayRain +NestAge + Density
## model.number
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
model.fits <- plyr::dlply(model.list, c("model.number","S"), function(x,input.data, input.ddl){
cat("\n\n***** Starting ", unlist(x), "\n")
fit <- coxph(formula=as.formula(paste("Surv", eval(x$S))),
data=input.data)
fit
},input.data=reddata2)
##
##
## ***** Starting ~DBH 1
##
##
## ***** Starting ~DBH+Predators+ MayTemp +JuneRain +NestAge 2
##
##
## ***** Starting ~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge 3
##
##
## ***** Starting ~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge 4
##
##
## ***** Starting ~DBH+Predators+ MayTemp +JuneRain +NestAge + Density 5
##
##
## ***** Starting ~DBH+Predators+ MayTemp +JuneRain 6
##
##
## ***** Starting ~DBH+Predators+ MayTemp +MayRain +NestAge 7
##
##
## ***** Starting ~DBH+Predators+ MayTemp +NestAge 8
##
##
## ***** Starting ~DBH+Predators+ MayTemp +MayRain +NestAge + Density 9
# examine individual model results
model.number <-1
summary(model.fits[[model.number]])
## Call:
## coxph(formula = as.formula(paste("Surv", eval(x$S))), data = input.data)
##
## n= 5374, number of events= 208
##
## coef exp(coef) se(coef) z Pr(>|z|)
## DBH -0.022144 0.978100 0.005427 -4.08 4.5e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## DBH 0.9781 1.022 0.9678 0.9886
##
## Concordance= 0.568 (se = 0.019 )
## Likelihood ratio test= 19.1 on 1 df, p=1e-05
## Wald test = 16.65 on 1 df, p=4e-05
## Score (logrank) test = 17.11 on 1 df, p=4e-05
# Model comparision and averaging
# collect models and make AICc table
AICcmodavg::aictab(model.fits)
##
## Model selection based on AICc:
##
## K
## 2.~DBH+Predators+ MayTemp +JuneRain +NestAge 5
## 6.~DBH+Predators+ MayTemp +JuneRain 4
## 3.~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge 6
## 4.~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge 6
## 5.~DBH+Predators+ MayTemp +JuneRain +NestAge + Density 6
## 7.~DBH+Predators+ MayTemp +MayRain +NestAge 5
## 8.~DBH+Predators+ MayTemp +NestAge 4
## 9.~DBH+Predators+ MayTemp +MayRain +NestAge + Density 6
## 1.~DBH 1
## AICc
## 2.~DBH+Predators+ MayTemp +JuneRain +NestAge 2010.66
## 6.~DBH+Predators+ MayTemp +JuneRain 2010.75
## 3.~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge 2011.49
## 4.~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge 2012.33
## 5.~DBH+Predators+ MayTemp +JuneRain +NestAge + Density 2012.48
## 7.~DBH+Predators+ MayTemp +MayRain +NestAge 2013.88
## 8.~DBH+Predators+ MayTemp +NestAge 2014.20
## 9.~DBH+Predators+ MayTemp +MayRain +NestAge + Density 2015.19
## 1.~DBH 2026.34
## Delta_AICc
## 2.~DBH+Predators+ MayTemp +JuneRain +NestAge 0.00
## 6.~DBH+Predators+ MayTemp +JuneRain 0.10
## 3.~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge 0.83
## 4.~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge 1.67
## 5.~DBH+Predators+ MayTemp +JuneRain +NestAge + Density 1.82
## 7.~DBH+Predators+ MayTemp +MayRain +NestAge 3.22
## 8.~DBH+Predators+ MayTemp +NestAge 3.54
## 9.~DBH+Predators+ MayTemp +MayRain +NestAge + Density 4.53
## 1.~DBH 15.69
## AICcWt
## 2.~DBH+Predators+ MayTemp +JuneRain +NestAge 0.25
## 6.~DBH+Predators+ MayTemp +JuneRain 0.24
## 3.~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge 0.17
## 4.~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge 0.11
## 5.~DBH+Predators+ MayTemp +JuneRain +NestAge + Density 0.10
## 7.~DBH+Predators+ MayTemp +MayRain +NestAge 0.05
## 8.~DBH+Predators+ MayTemp +NestAge 0.04
## 9.~DBH+Predators+ MayTemp +MayRain +NestAge + Density 0.03
## 1.~DBH 0.00
## Cum.Wt
## 2.~DBH+Predators+ MayTemp +JuneRain +NestAge 0.25
## 6.~DBH+Predators+ MayTemp +JuneRain 0.50
## 3.~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge 0.67
## 4.~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge 0.78
## 5.~DBH+Predators+ MayTemp +JuneRain +NestAge + Density 0.88
## 7.~DBH+Predators+ MayTemp +MayRain +NestAge 0.93
## 8.~DBH+Predators+ MayTemp +NestAge 0.97
## 9.~DBH+Predators+ MayTemp +MayRain +NestAge + Density 1.00
## 1.~DBH 1.00
## LL
## 2.~DBH+Predators+ MayTemp +JuneRain +NestAge -1000.32
## 6.~DBH+Predators+ MayTemp +JuneRain -1001.37
## 3.~DBH+Predators+ MayTemp +MayRain +JuneRain +NestAge -999.73
## 4.~DBH+Predators+ MayTemp+JuneTemp +JuneRain +NestAge -1000.16
## 5.~DBH+Predators+ MayTemp +JuneRain +NestAge + Density -1000.23
## 7.~DBH+Predators+ MayTemp +MayRain +NestAge -1001.93
## 8.~DBH+Predators+ MayTemp +NestAge -1003.10
## 9.~DBH+Predators+ MayTemp +MayRain +NestAge + Density -1001.59
## 1.~DBH -1012.17
# Estimates of beta from the top model
fit.best <- model.fits[[2]]
summary(fit.best)
## Call:
## coxph(formula = as.formula(paste("Surv", eval(x$S))), data = input.data)
##
## n= 5374, number of events= 208
##
## coef exp(coef) se(coef) z Pr(>|z|)
## DBH -0.021668 0.978565 0.005399 -4.013 5.99e-05 ***
## Predators 0.080875 1.084236 0.031459 2.571 0.01015 *
## MayTemp -0.186446 0.829904 0.063393 -2.941 0.00327 **
## JuneRain -0.003219 0.996786 0.001353 -2.380 0.01732 *
## NestAge -0.019749 0.980445 0.013571 -1.455 0.14560
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## DBH 0.9786 1.0219 0.9683 0.9890
## Predators 1.0842 0.9223 1.0194 1.1532
## MayTemp 0.8299 1.2050 0.7329 0.9397
## JuneRain 0.9968 1.0032 0.9941 0.9994
## NestAge 0.9804 1.0199 0.9547 1.0069
##
## Concordance= 0.645 (se = 0.021 )
## Likelihood ratio test= 42.8 on 5 df, p=4e-08
## Wald test = 40.8 on 5 df, p=1e-07
## Score (logrank) test = 41.89 on 5 df, p=6e-08
# baseline cumulative hazard
cumhaz <- basehaz(fit.best)
# estimate change in cumulative hazard and plot
cumhaz$deltaHaz <- c(NA,diff(cumhaz$hazard))
basehaz.plot <- ggplot(data=cumhaz, aes(x=time, y=deltaHaz))+
ggtitle("Estimated baseline hazard function over time")+
geom_point()+
geom_smooth(se=FALSE)
basehaz.plot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(basehaz.plot,
file=file.path("..","..","..","..","MyStuff","Images","sherry-ph-basehazard.png"), h=4, w=6, units="in", dpi=300)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# test the assumption of proportionality
# look for approximate parallelism of the curveys
cox.zph(fit.best)
## rho chisq p
## DBH -0.0604 0.876 0.349
## Predators 0.0291 0.164 0.686
## MayTemp -0.0764 1.099 0.294
## JuneRain 0.0657 0.841 0.359
## NestAge 0.0968 1.768 0.184
## GLOBAL NA 5.173 0.395
ggcoxzph(cox.zph(fit.best))

png(file=file.path("..","..","..","..","MyStuff","Images","sherry-ph-best-testprop.png"), h=4, w=6, units="in", res=300)
ggcoxzph(cox.zph(fit.best))
dev.off()
## quartz_off_screen
## 2