# Analysis of mallard data illustrating proportional hazard model and compare to logistic exposure model
# How to fit a nest level categorical variable (habitat)

# 2019-05-01 CJS Initial code

# This is the mallard data that ships with RMark

library(AICcmodavg)
library(emmeans)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggpubr
## Loading required package: magrittr
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
source(file.path("..","..","logistic-exposure-model.R"))

# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#   
# Also contains the following fields
#   Robel    -  Measurement of Robel pole of visibility of nest
#   PpnGrass - proportion of grassland cover on the 10.4 km2 study site t
#   AgeFound - Age of nest when found
#   AgeDay1  - Age of nest on day 1 of study (can be negative)
#   Habitat - N=Native; P=Planted; W=Wetland; R=roadside right of way
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

malldata <- readxl::read_excel(file.path("..","mallard.xlsx"), 
                               sheet="mallard")
head(malldata)
## # A tibble: 6 x 10
##   FirstFound LastPresent LastChecked  Fate  Freq Robel PpnGrass AgeFound
##        <dbl>       <dbl>       <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1         73          89          89     0     1  6       0.800       13
## 2         63          90          90     0     1  3.75    0.668        5
## 3         70          70          76     1     1  3.75    0.800       13
## 4         63          81          85     1     1  3.12    0.668        6
## 5         61          61          66     1     1  4.5     0.668        4
## 6         57          57          61     1     1  4.25    0.668        1
## # … with 2 more variables: AgeDay1 <dbl>, Habitat <chr>
malldata2 <- expand.nest.data(malldata)

malldata2$Habitat <- factor(malldata2$Habitat)  # Convert categorical variable to be factors

# Fit a particular model
# This is a model with S varying by habitat type
mod.hab <-  glm(Survive~Habitat,
         family=binomial(link=logexp(malldata2$Exposure)),
         data=malldata2)
summary(mod.hab)
## 
## Call:
## glm(formula = Survive ~ Habitat, family = binomial(link = logexp(malldata2$Exposure)), 
##     data = malldata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5021   0.2990   0.2990   0.3333   0.3333  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.86293    0.09508  30.111   <2e-16 ***
## HabitatP     0.22268    0.12234   1.820   0.0687 .  
## HabitatR     0.21111    0.22719   0.929   0.3528    
## HabitatW     0.09291    0.23509   0.395   0.6927    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2494.8  on 6125  degrees of freedom
## Residual deviance: 1564.0  on 6122  degrees of freedom
## AIC: 1572
## 
## Number of Fisher Scoring iterations: 7
# fit a null models
mod.null <-  glm(Survive~1,
         family=binomial(link=logexp(malldata2$Exposure)),
         data=malldata2)
summary(mod.null)
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(malldata2$Exposure)), 
##     data = malldata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4714   0.3109   0.3109   0.3109   0.3109  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.00565    0.05551   54.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2494.8  on 6125  degrees of freedom
## Residual deviance: 1567.1  on 6125  degrees of freedom
## AIC: 1569.1
## 
## Number of Fisher Scoring iterations: 7
AICcmodavg::aictab( list(mod.hab, mod.null))
## Warning in aictab.AICglm.lm(list(mod.hab, mod.null)): 
## Model names have been supplied automatically in the table
## 
## Model selection based on AICc:
## 
##      K    AICc Delta_AICc AICcWt Cum.Wt      LL
## Mod2 1 1569.12       0.00   0.81   0.81 -783.56
## Mod1 4 1571.96       2.84   0.19   1.00 -781.98
### [proportional hazard model

malldata3 <- expand.nest.data.ph(malldata)
head(malldata3[,c("FirstFound","LastPresent","LastChecked","Fate","Start","End","Fail","Surv")], n=100)
##     FirstFound LastPresent LastChecked Fate Start End Fail     Surv
## 1           73          89          89    0    73  74    0 (73,74+]
## 2           73          89          89    0    74  75    0 (74,75+]
## 3           73          89          89    0    75  76    0 (75,76+]
## 4           73          89          89    0    76  77    0 (76,77+]
## 5           73          89          89    0    77  78    0 (77,78+]
## 6           73          89          89    0    78  79    0 (78,79+]
## 7           73          89          89    0    79  80    0 (79,80+]
## 8           73          89          89    0    80  81    0 (80,81+]
## 9           73          89          89    0    81  82    0 (81,82+]
## 10          73          89          89    0    82  83    0 (82,83+]
## 11          73          89          89    0    83  84    0 (83,84+]
## 12          73          89          89    0    84  85    0 (84,85+]
## 13          73          89          89    0    85  86    0 (85,86+]
## 14          73          89          89    0    86  87    0 (86,87+]
## 15          73          89          89    0    87  88    0 (87,88+]
## 16          73          89          89    0    88  89    0 (88,89+]
## 17          63          90          90    0    63  64    0 (63,64+]
## 18          63          90          90    0    64  65    0 (64,65+]
## 19          63          90          90    0    65  66    0 (65,66+]
## 20          63          90          90    0    66  67    0 (66,67+]
## 21          63          90          90    0    67  68    0 (67,68+]
## 22          63          90          90    0    68  69    0 (68,69+]
## 23          63          90          90    0    69  70    0 (69,70+]
## 24          63          90          90    0    70  71    0 (70,71+]
## 25          63          90          90    0    71  72    0 (71,72+]
## 26          63          90          90    0    72  73    0 (72,73+]
## 27          63          90          90    0    73  74    0 (73,74+]
## 28          63          90          90    0    74  75    0 (74,75+]
## 29          63          90          90    0    75  76    0 (75,76+]
## 30          63          90          90    0    76  77    0 (76,77+]
## 31          63          90          90    0    77  78    0 (77,78+]
## 32          63          90          90    0    78  79    0 (78,79+]
## 33          63          90          90    0    79  80    0 (79,80+]
## 34          63          90          90    0    80  81    0 (80,81+]
## 35          63          90          90    0    81  82    0 (81,82+]
## 36          63          90          90    0    82  83    0 (82,83+]
## 37          63          90          90    0    83  84    0 (83,84+]
## 38          63          90          90    0    84  85    0 (84,85+]
## 39          63          90          90    0    85  86    0 (85,86+]
## 40          63          90          90    0    86  87    0 (86,87+]
## 41          63          90          90    0    87  88    0 (87,88+]
## 42          63          90          90    0    88  89    0 (88,89+]
## 43          63          90          90    0    89  90    0 (89,90+]
## 44          70          70          76    1    70  76    1  (70,76]
## 45          63          81          85    1    63  64    0 (63,64+]
## 46          63          81          85    1    64  65    0 (64,65+]
## 47          63          81          85    1    65  66    0 (65,66+]
## 48          63          81          85    1    66  67    0 (66,67+]
## 49          63          81          85    1    67  68    0 (67,68+]
## 50          63          81          85    1    68  69    0 (68,69+]
## 51          63          81          85    1    69  70    0 (69,70+]
## 52          63          81          85    1    70  71    0 (70,71+]
## 53          63          81          85    1    71  72    0 (71,72+]
## 54          63          81          85    1    72  73    0 (72,73+]
## 55          63          81          85    1    73  74    0 (73,74+]
## 56          63          81          85    1    74  75    0 (74,75+]
## 57          63          81          85    1    75  76    0 (75,76+]
## 58          63          81          85    1    76  77    0 (76,77+]
## 59          63          81          85    1    77  78    0 (77,78+]
## 60          63          81          85    1    78  79    0 (78,79+]
## 61          63          81          85    1    79  80    0 (79,80+]
## 62          63          81          85    1    80  81    0 (80,81+]
## 63          63          81          85    1    81  85    1  (81,85]
## 64          61          61          66    1    61  66    1  (61,66]
## 65          57          57          61    1    57  61    1  (57,61]
## 66          67          88          88    0    67  68    0 (67,68+]
## 67          67          88          88    0    68  69    0 (68,69+]
## 68          67          88          88    0    69  70    0 (69,70+]
## 69          67          88          88    0    70  71    0 (70,71+]
## 70          67          88          88    0    71  72    0 (71,72+]
## 71          67          88          88    0    72  73    0 (72,73+]
## 72          67          88          88    0    73  74    0 (73,74+]
## 73          67          88          88    0    74  75    0 (74,75+]
## 74          67          88          88    0    75  76    0 (75,76+]
## 75          67          88          88    0    76  77    0 (76,77+]
## 76          67          88          88    0    77  78    0 (77,78+]
## 77          67          88          88    0    78  79    0 (78,79+]
## 78          67          88          88    0    79  80    0 (79,80+]
## 79          67          88          88    0    80  81    0 (80,81+]
## 80          67          88          88    0    81  82    0 (81,82+]
## 81          67          88          88    0    82  83    0 (82,83+]
## 82          67          88          88    0    83  84    0 (83,84+]
## 83          67          88          88    0    84  85    0 (84,85+]
## 84          67          88          88    0    85  86    0 (85,86+]
## 85          67          88          88    0    86  87    0 (86,87+]
## 86          67          88          88    0    87  88    0 (87,88+]
## 87          58          87          87    0    58  59    0 (58,59+]
## 88          58          87          87    0    59  60    0 (59,60+]
## 89          58          87          87    0    60  61    0 (60,61+]
## 90          58          87          87    0    61  62    0 (61,62+]
## 91          58          87          87    0    62  63    0 (62,63+]
## 92          58          87          87    0    63  64    0 (63,64+]
## 93          58          87          87    0    64  65    0 (64,65+]
## 94          58          87          87    0    65  66    0 (65,66+]
## 95          58          87          87    0    66  67    0 (66,67+]
## 96          58          87          87    0    67  68    0 (67,68+]
## 97          58          87          87    0    68  69    0 (68,69+]
## 98          58          87          87    0    69  70    0 (69,70+]
## 99          58          87          87    0    70  71    0 (70,71+]
## 100         58          87          87    0    71  72    0 (71,72+]
malldata3$Habitat <- factor(malldata3$Habitat)  # Convert categorical variable to be factors

mod.hab.ph <-  coxph(Surv~Habitat,data=malldata3)
summary(mod.hab.ph)
## Call:
## coxph(formula = Surv ~ Habitat, data = malldata3)
## 
##   n= 6126, number of events= 317 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)
## HabitatP -0.1996    0.8190   0.1249 -1.598    0.110
## HabitatR -0.2059    0.8139   0.2300 -0.895    0.371
## HabitatW -0.0697    0.9327   0.2403 -0.290    0.772
## 
##          exp(coef) exp(-coef) lower .95 upper .95
## HabitatP    0.8190      1.221    0.6412     1.046
## HabitatR    0.8139      1.229    0.5185     1.278
## HabitatW    0.9327      1.072    0.5824     1.494
## 
## Concordance= 0.518  (se = 0.014 )
## Likelihood ratio test= 2.73  on 3 df,   p=0.4
## Wald test            = 2.78  on 3 df,   p=0.4
## Score (logrank) test = 2.79  on 3 df,   p=0.4
car::Anova(mod.hab.ph, type=3)
## Analysis of Deviance Table
##  Cox model: response is Surv
## Terms added sequentially (first to last)
## 
##          loglik  Chisq Df Pr(>|Chi|)
## NULL    -1481.3                     
## Habitat -1480.0 2.7347  3     0.4344
mod.hab.ph.emmo <- emmeans::emmeans(mod.hab.ph, ~Habitat)
summary(pairs(mod.hab.ph.emmo), infer=TRUE)
##  contrast estimate    SE df asymp.LCL asymp.UCL z.ratio p.value
##  N - P     0.19964 0.125 NA    -0.121     0.521  1.598  0.3795 
##  N - R     0.20588 0.230 NA    -0.385     0.797  0.895  0.8075 
##  N - W     0.06970 0.240 NA    -0.548     0.687  0.290  0.9915 
##  P - R     0.00624 0.224 NA    -0.568     0.581  0.028  1.0000 
##  P - W    -0.12993 0.234 NA    -0.731     0.471 -0.556  0.9450 
##  R - W    -0.13618 0.304 NA    -0.917     0.645 -0.448  0.9700 
## 
## Results are given on the log (not the response) scale. 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 4 estimates 
## P value adjustment: tukey method for comparing a family of 4 estimates
summary(pairs(mod.hab.ph.emmo, type="response"), infer=TRUE)
##  contrast ratio    SE df asymp.LCL asymp.UCL z.ratio p.value
##  N / P    1.221 0.153 NA     0.886      1.68  1.598  0.3795 
##  N / R    1.229 0.283 NA     0.680      2.22  0.895  0.8075 
##  N / W    1.072 0.258 NA     0.578      1.99  0.290  0.9915 
##  P / R    1.006 0.225 NA     0.567      1.79  0.028  1.0000 
##  P / W    0.878 0.205 NA     0.482      1.60 -0.556  0.9450 
##  R / W    0.873 0.265 NA     0.400      1.91 -0.448  0.9700 
## 
## Confidence level used: 0.95 
## Conf-level adjustment: tukey method for comparing a family of 4 estimates 
## Intervals are back-transformed from the log scale 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## Tests are performed on the log scale
# baseline cumulative hazard
cumhaz <- basehaz(mod.hab.ph)
# estimate change in cumulative hazard and plot
cumhaz$deltaHaz <- c(NA,diff(cumhaz$hazard))
basehaz.plot <- ggplot(data=cumhaz, aes(x=time, y=deltaHaz))+
  ggtitle("Estimated baseline hazard function over time")+
  geom_point()+
  geom_smooth(se=FALSE)
basehaz.plot
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(basehaz.plot,
    file=file.path("..","..","..","..","MyStuff","Images","mallard-ph-basehazard.png"), h=4, w=6, units="in", dpi=300)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).
# test the assumption of proportionality
# look for approximate parallelism of the curveys
cox.zph(mod.hab.ph)
##               rho   chisq      p
## HabitatP -0.09736 3.05475 0.0805
## HabitatR  0.00341 0.00368 0.9516
## HabitatW -0.08343 2.19603 0.1384
## GLOBAL         NA 4.56614 0.2065
ggcoxzph(cox.zph(mod.hab.ph))

png(file=file.path("..","..","..","..","MyStuff","Images","mallard-ph-hab-testprop.png"), h=4, w=6, units="in", res=300)
ggcoxzph(cox.zph(mod.hab.ph))
dev.off()
## quartz_off_screen 
##                 2
# estimated survival curves for the two habitats
pred.data <- expand.grid( Habitat=unique(malldata3$Habitat))
pred.survival <- survfit(mod.hab.ph, newdata=pred.data, se.fit=TRUE)
plot(pred.survival)

mod.null.ph <- coxph(Surv~1, data=malldata3)


AICcmodavg::aictab( list(mod.hab.ph, mod.null.ph))
## Warning in aictab.AICcoxph(list(mod.hab.ph, mod.null.ph)): 
## Model names have been supplied automatically in the table
## 
## Model selection based on AICc:
## 
##      K    AICc Delta_AICc AICcWt Cum.Wt       LL
## Mod2 0 2962.64       0.00   0.84   0.84 -1481.32
## Mod1 3 2965.91       3.27   0.16   1.00 -1479.95