# Analysis of mallard data using logistic exposure model vs proportional hazard model
# How to fit a nest level continous covariate (Robel height

# 2019-05-01 CJS Initial code

# This is the mallard data that ships with RMark

library(AICcmodavg)
library(emmeans)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
source(file.path("..","..","logistic-exposure-model.R"))

# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#   
# Also contains the following fields
#   Robel    -  Measurement of Robel pole of visibility of nest
#   PpnGrass - proportion of grassland cover on the 10.4 km2 study site t
#   AgeFound - Age of nest when found
#   AgeDay1  - Age of nest on day 1 of study (can be negative)
#   Habitat - N=Native; P=Planted; W=Wetland; R=roadside right of way
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

malldata <- readxl::read_excel(file.path("..","mallard.xlsx"), 
                               sheet="mallard")
head(malldata)
## # A tibble: 6 x 10
##   FirstFound LastPresent LastChecked  Fate  Freq Robel PpnGrass AgeFound
##        <dbl>       <dbl>       <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1         73          89          89     0     1  6       0.800       13
## 2         63          90          90     0     1  3.75    0.668        5
## 3         70          70          76     1     1  3.75    0.800       13
## 4         63          81          85     1     1  3.12    0.668        6
## 5         61          61          66     1     1  4.5     0.668        4
## 6         57          57          61     1     1  4.25    0.668        1
## # … with 2 more variables: AgeDay1 <dbl>, Habitat <chr>
malldata2 <- expand.nest.data(malldata)

malldata2$Habitat <- factor(malldata2$Habitat)  # RMark wants categorical variable to be factors



# Fit a particular model
# This is a model with S varying by robel height
mod.rob <- glm(Survive~Robel,
         family=binomial(link=logexp(malldata2$Exposure)),
         data=malldata2)
summary(mod.rob)
## 
## Call:
## glm(formula = Survive ~ Robel, family = binomial(link = logexp(malldata2$Exposure)), 
##     data = malldata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4814   0.3070   0.3106   0.3142   0.3232  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.90884    0.16827  17.287   <2e-16 ***
## Robel        0.02727    0.04499   0.606    0.544    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2494.8  on 6125  degrees of freedom
## Residual deviance: 1566.8  on 6124  degrees of freedom
## AIC: 1570.8
## 
## Number of Fisher Scoring iterations: 7
# fit a null models
mod.null <-  glm(Survive~1,
         family=binomial(link=logexp(malldata2$Exposure)),
         data=malldata2)
summary(mod.null)
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(malldata2$Exposure)), 
##     data = malldata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4714   0.3109   0.3109   0.3109   0.3109  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.00565    0.05551   54.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2494.8  on 6125  degrees of freedom
## Residual deviance: 1567.1  on 6125  degrees of freedom
## AIC: 1569.1
## 
## Number of Fisher Scoring iterations: 7
AICcmodavg::aictab( list(mod.rob, mod.null))
## Warning in aictab.AICglm.lm(list(mod.rob, mod.null)): 
## Model names have been supplied automatically in the table
## 
## Model selection based on AICc:
## 
##      K    AICc Delta_AICc AICcWt Cum.Wt      LL
## Mod2 1 1569.12       0.00    0.7    0.7 -783.56
## Mod1 2 1570.77       1.66    0.3    1.0 -783.39
malldata3 <- expand.nest.data.ph(malldata)


mod.rob.ph <-  coxph(Surv~Robel,data=malldata3)
summary(mod.rob.ph)
## Call:
## coxph(formula = Surv ~ Robel, data = malldata3)
## 
##   n= 6126, number of events= 317 
## 
##           coef exp(coef) se(coef)      z Pr(>|z|)
## Robel -0.03877   0.96197  0.04888 -0.793    0.428
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## Robel     0.962       1.04    0.8741     1.059
## 
## Concordance= 0.512  (se = 0.014 )
## Likelihood ratio test= 0.63  on 1 df,   p=0.4
## Wald test            = 0.63  on 1 df,   p=0.4
## Score (logrank) test = 0.63  on 1 df,   p=0.4
# test the assumption of proportionality
# look for approximate parallelism of the curveys
cox.zph(mod.rob.ph)
##           rho chisq     p
## Robel -0.0829  2.02 0.155
ggcoxzph(cox.zph(mod.rob.ph))

png(file=file.path("..","..","..","..","MyStuff","Images","mallard-ph-rob-testprop.png"), h=4, w=6, units="in", res=300)
ggcoxzph(cox.zph(mod.rob.ph))
dev.off()
## quartz_off_screen 
##                 2
# baseline cumulative hazard
cumhaz <- basehaz(mod.rob.ph)
# estimate change in cumulative hazard and plot
cumhaz$deltaHaz <- c(NA,diff(cumhaz$hazard))
ggplot(data=cumhaz, aes(x=time, y=deltaHaz))+
  ggtitle("Estimated baseline hazard function over time")+
  geom_point()+
  geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# estimated hazard curves at three levels of the Robel
pred.data <- expand.grid( Robel=seq(min(malldata3$Robel),max(malldata3$Robel), length.out=3))
pred.survival <- survfit(mod.rob.ph, newdata=pred.data, se.fit=TRUE)
plot(pred.survival)

mod.null.ph <- coxph(Surv~1, data=malldata3)


AICcmodavg::aictab( list(mod.rob.ph, mod.null.ph))
## Warning in aictab.AICcoxph(list(mod.rob.ph, mod.null.ph)): 
## Model names have been supplied automatically in the table
## 
## Model selection based on AICc:
## 
##      K    AICc Delta_AICc AICcWt Cum.Wt       LL
## Mod2 0 2962.64       0.00   0.66   0.66 -1481.32
## Mod1 1 2964.01       1.37   0.34   1.00 -1481.00