# Analysis of mallard data using logistic exposure model vs proportional hazard model
# How to fit a nest level continous covariate (Robel height
# 2019-05-01 CJS Initial code
# This is the mallard data that ships with RMark
library(AICcmodavg)
library(emmeans)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
source(file.path("..","..","logistic-exposure-model.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# Also contains the following fields
# Robel - Measurement of Robel pole of visibility of nest
# PpnGrass - proportion of grassland cover on the 10.4 km2 study site t
# AgeFound - Age of nest when found
# AgeDay1 - Age of nest on day 1 of study (can be negative)
# Habitat - N=Native; P=Planted; W=Wetland; R=roadside right of way
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
malldata <- readxl::read_excel(file.path("..","mallard.xlsx"),
sheet="mallard")
head(malldata)
## # A tibble: 6 x 10
## FirstFound LastPresent LastChecked Fate Freq Robel PpnGrass AgeFound
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 73 89 89 0 1 6 0.800 13
## 2 63 90 90 0 1 3.75 0.668 5
## 3 70 70 76 1 1 3.75 0.800 13
## 4 63 81 85 1 1 3.12 0.668 6
## 5 61 61 66 1 1 4.5 0.668 4
## 6 57 57 61 1 1 4.25 0.668 1
## # … with 2 more variables: AgeDay1 <dbl>, Habitat <chr>
malldata2 <- expand.nest.data(malldata)
malldata2$Habitat <- factor(malldata2$Habitat) # RMark wants categorical variable to be factors
# Fit a particular model
# This is a model with S varying by robel height
mod.rob <- glm(Survive~Robel,
family=binomial(link=logexp(malldata2$Exposure)),
data=malldata2)
summary(mod.rob)
##
## Call:
## glm(formula = Survive ~ Robel, family = binomial(link = logexp(malldata2$Exposure)),
## data = malldata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4814 0.3070 0.3106 0.3142 0.3232
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.90884 0.16827 17.287 <2e-16 ***
## Robel 0.02727 0.04499 0.606 0.544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2494.8 on 6125 degrees of freedom
## Residual deviance: 1566.8 on 6124 degrees of freedom
## AIC: 1570.8
##
## Number of Fisher Scoring iterations: 7
# fit a null models
mod.null <- glm(Survive~1,
family=binomial(link=logexp(malldata2$Exposure)),
data=malldata2)
summary(mod.null)
##
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(malldata2$Exposure)),
## data = malldata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4714 0.3109 0.3109 0.3109 0.3109
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.00565 0.05551 54.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2494.8 on 6125 degrees of freedom
## Residual deviance: 1567.1 on 6125 degrees of freedom
## AIC: 1569.1
##
## Number of Fisher Scoring iterations: 7
AICcmodavg::aictab( list(mod.rob, mod.null))
## Warning in aictab.AICglm.lm(list(mod.rob, mod.null)):
## Model names have been supplied automatically in the table
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Mod2 1 1569.12 0.00 0.7 0.7 -783.56
## Mod1 2 1570.77 1.66 0.3 1.0 -783.39
malldata3 <- expand.nest.data.ph(malldata)
mod.rob.ph <- coxph(Surv~Robel,data=malldata3)
summary(mod.rob.ph)
## Call:
## coxph(formula = Surv ~ Robel, data = malldata3)
##
## n= 6126, number of events= 317
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Robel -0.03877 0.96197 0.04888 -0.793 0.428
##
## exp(coef) exp(-coef) lower .95 upper .95
## Robel 0.962 1.04 0.8741 1.059
##
## Concordance= 0.512 (se = 0.014 )
## Likelihood ratio test= 0.63 on 1 df, p=0.4
## Wald test = 0.63 on 1 df, p=0.4
## Score (logrank) test = 0.63 on 1 df, p=0.4
# test the assumption of proportionality
# look for approximate parallelism of the curveys
cox.zph(mod.rob.ph)
## rho chisq p
## Robel -0.0829 2.02 0.155
ggcoxzph(cox.zph(mod.rob.ph))

png(file=file.path("..","..","..","..","MyStuff","Images","mallard-ph-rob-testprop.png"), h=4, w=6, units="in", res=300)
ggcoxzph(cox.zph(mod.rob.ph))
dev.off()
## quartz_off_screen
## 2
# baseline cumulative hazard
cumhaz <- basehaz(mod.rob.ph)
# estimate change in cumulative hazard and plot
cumhaz$deltaHaz <- c(NA,diff(cumhaz$hazard))
ggplot(data=cumhaz, aes(x=time, y=deltaHaz))+
ggtitle("Estimated baseline hazard function over time")+
geom_point()+
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# estimated hazard curves at three levels of the Robel
pred.data <- expand.grid( Robel=seq(min(malldata3$Robel),max(malldata3$Robel), length.out=3))
pred.survival <- survfit(mod.rob.ph, newdata=pred.data, se.fit=TRUE)
plot(pred.survival)

mod.null.ph <- coxph(Surv~1, data=malldata3)
AICcmodavg::aictab( list(mod.rob.ph, mod.null.ph))
## Warning in aictab.AICcoxph(list(mod.rob.ph, mod.null.ph)):
## Model names have been supplied automatically in the table
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Mod2 0 2962.64 0.00 0.66 0.66 -1481.32
## Mod1 1 2964.01 1.37 0.34 1.00 -1481.00