# Analysis of mallard data illustrating proportional hazard model and compare to logistic exposure model
# How to fit a nest level categorical variable (habitat)
# 2019-05-01 CJS Initial code
# This is the mallard data that ships with RMark
library(AICcmodavg)
library(emmeans)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
source(file.path("..","..","logistic-exposure-model.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# Also contains the following fields
# Robel - Measurement of Robel pole of visibility of nest
# PpnGrass - proportion of grassland cover on the 10.4 km2 study site t
# AgeFound - Age of nest when found
# AgeDay1 - Age of nest on day 1 of study (can be negative)
# Habitat - N=Native; P=Planted; W=Wetland; R=roadside right of way
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
malldata <- readxl::read_excel(file.path("..","mallard.xlsx"),
sheet="mallard")
head(malldata)
## # A tibble: 6 x 10
## FirstFound LastPresent LastChecked Fate Freq Robel PpnGrass AgeFound
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 73 89 89 0 1 6 0.800 13
## 2 63 90 90 0 1 3.75 0.668 5
## 3 70 70 76 1 1 3.75 0.800 13
## 4 63 81 85 1 1 3.12 0.668 6
## 5 61 61 66 1 1 4.5 0.668 4
## 6 57 57 61 1 1 4.25 0.668 1
## # … with 2 more variables: AgeDay1 <dbl>, Habitat <chr>
malldata2 <- expand.nest.data(malldata)
malldata2$Habitat <- factor(malldata2$Habitat) # Convert categorical variable to be factors
# Fit a particular model
# This is a model with S varying by habitat type
mod.hab <- glm(Survive~Habitat,
family=binomial(link=logexp(malldata2$Exposure)),
data=malldata2)
summary(mod.hab)
##
## Call:
## glm(formula = Survive ~ Habitat, family = binomial(link = logexp(malldata2$Exposure)),
## data = malldata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5021 0.2990 0.2990 0.3333 0.3333
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.86293 0.09508 30.111 <2e-16 ***
## HabitatP 0.22268 0.12234 1.820 0.0687 .
## HabitatR 0.21111 0.22719 0.929 0.3528
## HabitatW 0.09291 0.23509 0.395 0.6927
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2494.8 on 6125 degrees of freedom
## Residual deviance: 1564.0 on 6122 degrees of freedom
## AIC: 1572
##
## Number of Fisher Scoring iterations: 7
# fit a null models
mod.null <- glm(Survive~1,
family=binomial(link=logexp(malldata2$Exposure)),
data=malldata2)
summary(mod.null)
##
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(malldata2$Exposure)),
## data = malldata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4714 0.3109 0.3109 0.3109 0.3109
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.00565 0.05551 54.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2494.8 on 6125 degrees of freedom
## Residual deviance: 1567.1 on 6125 degrees of freedom
## AIC: 1569.1
##
## Number of Fisher Scoring iterations: 7
AICcmodavg::aictab( list(mod.hab, mod.null))
## Warning in aictab.AICglm.lm(list(mod.hab, mod.null)):
## Model names have been supplied automatically in the table
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Mod2 1 1569.12 0.00 0.81 0.81 -783.56
## Mod1 4 1571.96 2.84 0.19 1.00 -781.98
### [proportional hazard model
malldata3 <- expand.nest.data.ph(malldata)
head(malldata3[,c("FirstFound","LastPresent","LastChecked","Fate","Start","End","Fail","Surv")], n=100)
## FirstFound LastPresent LastChecked Fate Start End Fail Surv
## 1 73 89 89 0 73 74 0 (73,74+]
## 2 73 89 89 0 74 75 0 (74,75+]
## 3 73 89 89 0 75 76 0 (75,76+]
## 4 73 89 89 0 76 77 0 (76,77+]
## 5 73 89 89 0 77 78 0 (77,78+]
## 6 73 89 89 0 78 79 0 (78,79+]
## 7 73 89 89 0 79 80 0 (79,80+]
## 8 73 89 89 0 80 81 0 (80,81+]
## 9 73 89 89 0 81 82 0 (81,82+]
## 10 73 89 89 0 82 83 0 (82,83+]
## 11 73 89 89 0 83 84 0 (83,84+]
## 12 73 89 89 0 84 85 0 (84,85+]
## 13 73 89 89 0 85 86 0 (85,86+]
## 14 73 89 89 0 86 87 0 (86,87+]
## 15 73 89 89 0 87 88 0 (87,88+]
## 16 73 89 89 0 88 89 0 (88,89+]
## 17 63 90 90 0 63 64 0 (63,64+]
## 18 63 90 90 0 64 65 0 (64,65+]
## 19 63 90 90 0 65 66 0 (65,66+]
## 20 63 90 90 0 66 67 0 (66,67+]
## 21 63 90 90 0 67 68 0 (67,68+]
## 22 63 90 90 0 68 69 0 (68,69+]
## 23 63 90 90 0 69 70 0 (69,70+]
## 24 63 90 90 0 70 71 0 (70,71+]
## 25 63 90 90 0 71 72 0 (71,72+]
## 26 63 90 90 0 72 73 0 (72,73+]
## 27 63 90 90 0 73 74 0 (73,74+]
## 28 63 90 90 0 74 75 0 (74,75+]
## 29 63 90 90 0 75 76 0 (75,76+]
## 30 63 90 90 0 76 77 0 (76,77+]
## 31 63 90 90 0 77 78 0 (77,78+]
## 32 63 90 90 0 78 79 0 (78,79+]
## 33 63 90 90 0 79 80 0 (79,80+]
## 34 63 90 90 0 80 81 0 (80,81+]
## 35 63 90 90 0 81 82 0 (81,82+]
## 36 63 90 90 0 82 83 0 (82,83+]
## 37 63 90 90 0 83 84 0 (83,84+]
## 38 63 90 90 0 84 85 0 (84,85+]
## 39 63 90 90 0 85 86 0 (85,86+]
## 40 63 90 90 0 86 87 0 (86,87+]
## 41 63 90 90 0 87 88 0 (87,88+]
## 42 63 90 90 0 88 89 0 (88,89+]
## 43 63 90 90 0 89 90 0 (89,90+]
## 44 70 70 76 1 70 76 1 (70,76]
## 45 63 81 85 1 63 64 0 (63,64+]
## 46 63 81 85 1 64 65 0 (64,65+]
## 47 63 81 85 1 65 66 0 (65,66+]
## 48 63 81 85 1 66 67 0 (66,67+]
## 49 63 81 85 1 67 68 0 (67,68+]
## 50 63 81 85 1 68 69 0 (68,69+]
## 51 63 81 85 1 69 70 0 (69,70+]
## 52 63 81 85 1 70 71 0 (70,71+]
## 53 63 81 85 1 71 72 0 (71,72+]
## 54 63 81 85 1 72 73 0 (72,73+]
## 55 63 81 85 1 73 74 0 (73,74+]
## 56 63 81 85 1 74 75 0 (74,75+]
## 57 63 81 85 1 75 76 0 (75,76+]
## 58 63 81 85 1 76 77 0 (76,77+]
## 59 63 81 85 1 77 78 0 (77,78+]
## 60 63 81 85 1 78 79 0 (78,79+]
## 61 63 81 85 1 79 80 0 (79,80+]
## 62 63 81 85 1 80 81 0 (80,81+]
## 63 63 81 85 1 81 85 1 (81,85]
## 64 61 61 66 1 61 66 1 (61,66]
## 65 57 57 61 1 57 61 1 (57,61]
## 66 67 88 88 0 67 68 0 (67,68+]
## 67 67 88 88 0 68 69 0 (68,69+]
## 68 67 88 88 0 69 70 0 (69,70+]
## 69 67 88 88 0 70 71 0 (70,71+]
## 70 67 88 88 0 71 72 0 (71,72+]
## 71 67 88 88 0 72 73 0 (72,73+]
## 72 67 88 88 0 73 74 0 (73,74+]
## 73 67 88 88 0 74 75 0 (74,75+]
## 74 67 88 88 0 75 76 0 (75,76+]
## 75 67 88 88 0 76 77 0 (76,77+]
## 76 67 88 88 0 77 78 0 (77,78+]
## 77 67 88 88 0 78 79 0 (78,79+]
## 78 67 88 88 0 79 80 0 (79,80+]
## 79 67 88 88 0 80 81 0 (80,81+]
## 80 67 88 88 0 81 82 0 (81,82+]
## 81 67 88 88 0 82 83 0 (82,83+]
## 82 67 88 88 0 83 84 0 (83,84+]
## 83 67 88 88 0 84 85 0 (84,85+]
## 84 67 88 88 0 85 86 0 (85,86+]
## 85 67 88 88 0 86 87 0 (86,87+]
## 86 67 88 88 0 87 88 0 (87,88+]
## 87 58 87 87 0 58 59 0 (58,59+]
## 88 58 87 87 0 59 60 0 (59,60+]
## 89 58 87 87 0 60 61 0 (60,61+]
## 90 58 87 87 0 61 62 0 (61,62+]
## 91 58 87 87 0 62 63 0 (62,63+]
## 92 58 87 87 0 63 64 0 (63,64+]
## 93 58 87 87 0 64 65 0 (64,65+]
## 94 58 87 87 0 65 66 0 (65,66+]
## 95 58 87 87 0 66 67 0 (66,67+]
## 96 58 87 87 0 67 68 0 (67,68+]
## 97 58 87 87 0 68 69 0 (68,69+]
## 98 58 87 87 0 69 70 0 (69,70+]
## 99 58 87 87 0 70 71 0 (70,71+]
## 100 58 87 87 0 71 72 0 (71,72+]
malldata3$Habitat <- factor(malldata3$Habitat) # Convert categorical variable to be factors
mod.hab.ph <- coxph(Surv~Habitat,data=malldata3)
summary(mod.hab.ph)
## Call:
## coxph(formula = Surv ~ Habitat, data = malldata3)
##
## n= 6126, number of events= 317
##
## coef exp(coef) se(coef) z Pr(>|z|)
## HabitatP -0.1996 0.8190 0.1249 -1.598 0.110
## HabitatR -0.2059 0.8139 0.2300 -0.895 0.371
## HabitatW -0.0697 0.9327 0.2403 -0.290 0.772
##
## exp(coef) exp(-coef) lower .95 upper .95
## HabitatP 0.8190 1.221 0.6412 1.046
## HabitatR 0.8139 1.229 0.5185 1.278
## HabitatW 0.9327 1.072 0.5824 1.494
##
## Concordance= 0.518 (se = 0.014 )
## Likelihood ratio test= 2.73 on 3 df, p=0.4
## Wald test = 2.78 on 3 df, p=0.4
## Score (logrank) test = 2.79 on 3 df, p=0.4
car::Anova(mod.hab.ph, type=3)
## Analysis of Deviance Table
## Cox model: response is Surv
## Terms added sequentially (first to last)
##
## loglik Chisq Df Pr(>|Chi|)
## NULL -1481.3
## Habitat -1480.0 2.7347 3 0.4344
mod.hab.ph.emmo <- emmeans::emmeans(mod.hab.ph, ~Habitat)
summary(pairs(mod.hab.ph.emmo), infer=TRUE)
## contrast estimate SE df asymp.LCL asymp.UCL z.ratio p.value
## N - P 0.19964 0.125 NA -0.121 0.521 1.598 0.3795
## N - R 0.20588 0.230 NA -0.385 0.797 0.895 0.8075
## N - W 0.06970 0.240 NA -0.548 0.687 0.290 0.9915
## P - R 0.00624 0.224 NA -0.568 0.581 0.028 1.0000
## P - W -0.12993 0.234 NA -0.731 0.471 -0.556 0.9450
## R - W -0.13618 0.304 NA -0.917 0.645 -0.448 0.9700
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 4 estimates
## P value adjustment: tukey method for comparing a family of 4 estimates
summary(pairs(mod.hab.ph.emmo, type="response"), infer=TRUE)
## contrast ratio SE df asymp.LCL asymp.UCL z.ratio p.value
## N / P 1.221 0.153 NA 0.886 1.68 1.598 0.3795
## N / R 1.229 0.283 NA 0.680 2.22 0.895 0.8075
## N / W 1.072 0.258 NA 0.578 1.99 0.290 0.9915
## P / R 1.006 0.225 NA 0.567 1.79 0.028 1.0000
## P / W 0.878 0.205 NA 0.482 1.60 -0.556 0.9450
## R / W 0.873 0.265 NA 0.400 1.91 -0.448 0.9700
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 4 estimates
## Intervals are back-transformed from the log scale
## P value adjustment: tukey method for comparing a family of 4 estimates
## Tests are performed on the log scale
# baseline cumulative hazard
cumhaz <- basehaz(mod.hab.ph)
# estimate change in cumulative hazard and plot
cumhaz$deltaHaz <- c(NA,diff(cumhaz$hazard))
basehaz.plot <- ggplot(data=cumhaz, aes(x=time, y=deltaHaz))+
ggtitle("Estimated baseline hazard function over time")+
geom_point()+
geom_smooth(se=FALSE)
basehaz.plot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(basehaz.plot,
file=file.path("..","..","..","..","MyStuff","Images","mallard-ph-basehazard.png"), h=4, w=6, units="in", dpi=300)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# test the assumption of proportionality
# look for approximate parallelism of the curveys
cox.zph(mod.hab.ph)
## rho chisq p
## HabitatP -0.09736 3.05475 0.0805
## HabitatR 0.00341 0.00368 0.9516
## HabitatW -0.08343 2.19603 0.1384
## GLOBAL NA 4.56614 0.2065
ggcoxzph(cox.zph(mod.hab.ph))

png(file=file.path("..","..","..","..","MyStuff","Images","mallard-ph-hab-testprop.png"), h=4, w=6, units="in", res=300)
ggcoxzph(cox.zph(mod.hab.ph))
dev.off()
## quartz_off_screen
## 2
# estimated survival curves for the two habitats
pred.data <- expand.grid( Habitat=unique(malldata3$Habitat))
pred.survival <- survfit(mod.hab.ph, newdata=pred.data, se.fit=TRUE)
plot(pred.survival)

mod.null.ph <- coxph(Surv~1, data=malldata3)
AICcmodavg::aictab( list(mod.hab.ph, mod.null.ph))
## Warning in aictab.AICcoxph(list(mod.hab.ph, mod.null.ph)):
## Model names have been supplied automatically in the table
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Mod2 0 2962.64 0.00 0.84 0.84 -1481.32
## Mod1 3 2965.91 3.27 0.16 1.00 -1479.95