# Analysis of mallard data illustrating logistic exposure models
# How to fit a nest level categorical variable (habitat)
# 2019-05-01 CJS Initial code
# This is the mallard data that ships with RMark
library(AICcmodavg)
library(emmeans)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
source(file.path("..","..","logistic-exposure-model.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# Also contains the following fields
# Robel - Measurement of Robel pole of visibility of nest
# PpnGrass - proportion of grassland cover on the 10.4 km2 study site t
# AgeFound - Age of nest when found
# AgeDay1 - Age of nest on day 1 of study (can be negative)
# Habitat - N=Native; P=Planted; W=Wetland; R=roadside right of way
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
malldata <- readxl::read_excel(file.path("..","mallard.xlsx"),
sheet="mallard")
head(malldata)
## # A tibble: 6 x 10
## FirstFound LastPresent LastChecked Fate Freq Robel PpnGrass AgeFound
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 73 89 89 0 1 6 0.800 13
## 2 63 90 90 0 1 3.75 0.668 5
## 3 70 70 76 1 1 3.75 0.800 13
## 4 63 81 85 1 1 3.12 0.668 6
## 5 61 61 66 1 1 4.5 0.668 4
## 6 57 57 61 1 1 4.25 0.668 1
## # … with 2 more variables: AgeDay1 <dbl>, Habitat <chr>
malldata2 <- expand.nest.data(malldata)
rm(malldata)
malldata2$Habitat <- factor(malldata2$Habitat) # RMark wants categorical variable to be factors
# Fit a particular model
# This is a model with S varying by habitat type
mod.hab <- glm(Survive~Habitat,
family=binomial(link=logexp(malldata2$Exposure)),
data=malldata2)
summary(mod.hab)
##
## Call:
## glm(formula = Survive ~ Habitat, family = binomial(link = logexp(malldata2$Exposure)),
## data = malldata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5021 0.2990 0.2990 0.3333 0.3333
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.86293 0.09508 30.111 <2e-16 ***
## HabitatP 0.22268 0.12234 1.820 0.0687 .
## HabitatR 0.21111 0.22719 0.929 0.3528
## HabitatW 0.09291 0.23509 0.395 0.6927
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2494.8 on 6125 degrees of freedom
## Residual deviance: 1564.0 on 6122 degrees of freedom
## AIC: 1572
##
## Number of Fisher Scoring iterations: 7
# You can make predictions just as before
pred.data <- data.frame(Habitat=unique(malldata2$Habitat))
logit.dsr.pred.hab <- predict(mod.hab, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr.hab <- cbind(pred.data, logit.dsr=logit.dsr.pred.hab$fit, logit.dsr.se=logit.dsr.pred.hab$se.fit)
dsr.hab$dsr <- expit(dsr.hab$logit.dsr)
dsr.hab$dsr.se <- dsr.hab$logit.dsr.se* dsr.hab$dsr * (1-dsr.hab$dsr)
dsr.hab
## Habitat logit.dsr logit.dsr.se dsr dsr.se
## 1 R 3.074044 0.20633908 0.9558093 0.008715326
## 2 P 3.085609 0.07698050 0.9562952 0.003217376
## 3 N 2.862929 0.09507968 0.9459832 0.004858478
## 4 W 2.955842 0.21500819 0.9505389 0.010108552
# extract the logit(DSR) for each habitat using emmeans
mod.hab.emmo <- emmeans::emmeans(mod.hab, ~Habitat)
dsr.logit <- CLD(mod.hab.emmo)
dsr.logit
## Habitat emmean SE df asymp.LCL asymp.UCL .group
## N 2.86 0.0951 Inf 2.68 3.05 1
## W 2.96 0.2150 Inf 2.53 3.38 1
## R 3.07 0.2063 Inf 2.67 3.48 1
## P 3.09 0.0770 Inf 2.93 3.24 1
##
## Results are given on the logexp(malldata2$Exposure) (not the response) scale.
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 4 estimates
## significance level used: alpha = 0.05
# Compute the multiple comparison on the ordinary scale
# We need to update the transformation so we can get answers on the original scale as well
mod.hab.rg <- update(ref_grid(mod.hab, at=list(exposure=1)), tran = logexp())
mod.hab.emmo2 <- emmeans::emmeans(mod.hab.rg, ~Habitat)
CLD(mod.hab.emmo2)
## Habitat emmean SE df asymp.LCL asymp.UCL .group
## N 2.86 0.0951 Inf 2.68 3.05 1
## W 2.96 0.2150 Inf 2.53 3.38 1
## R 3.07 0.2063 Inf 2.67 3.48 1
## P 3.09 0.0770 Inf 2.93 3.24 1
##
## Results are given on the logexp(1) (not the response) scale.
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 4 estimates
## significance level used: alpha = 0.05
dsr <- CLD(mod.hab.emmo2, type="response") # on DSR scale
dsr
## Habitat prob SE df asymp.LCL asymp.UCL .group
## N 0.946 0.00486 Inf 0.936 0.955 1
## W 0.951 0.01011 Inf 0.927 0.967 1
## R 0.956 0.00872 Inf 0.935 0.970 1
## P 0.956 0.00322 Inf 0.950 0.962 1
##
## Confidence level used: 0.95
## Intervals are back-transformed from the logexp(1) scale
## P value adjustment: tukey method for comparing a family of 4 estimates
## significance level used: alpha = 0.05
# pairwise effects on logit scale
summary(pairs(mod.hab.emmo2),infer=TRUE)
## contrast estimate SE df asymp.LCL asymp.UCL z.ratio p.value
## N - P -0.2227 0.122 Inf -0.537 0.0916 -1.820 0.2637
## N - R -0.2111 0.227 Inf -0.795 0.3725 -0.929 0.7892
## N - W -0.0929 0.235 Inf -0.697 0.5110 -0.395 0.9791
## P - R 0.0116 0.220 Inf -0.554 0.5773 0.053 0.9999
## P - W 0.1298 0.228 Inf -0.457 0.7165 0.568 0.9415
## R - W 0.1182 0.298 Inf -0.647 0.8838 0.397 0.9789
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 4 estimates
## P value adjustment: tukey method for comparing a family of 4 estimates
# pairwise effects on the DSR scale
mod.hab.emmo3 <- regrid(emmeans(mod.hab, ~Habitat), transform="log")
summary(pairs(mod.hab.emmo3, type="response"), infer=TRUE)
## contrast ratio SE df asymp.LCL asymp.UCL z.ratio p.value
## N / P 0.928 0.0385 Inf 0.834 1.03 -1.803 0.2717
## N / R 0.931 0.0697 Inf 0.768 1.13 -0.950 0.7777
## N / W 0.969 0.0774 Inf 0.789 1.19 -0.399 0.9784
## P / R 1.004 0.0719 Inf 0.835 1.21 0.052 0.9999
## P / W 1.044 0.0803 Inf 0.857 1.27 0.559 0.9442
## R / W 1.040 0.1029 Inf 0.806 1.34 0.396 0.9789
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 4 estimates
## Intervals are back-transformed from the log scale
## P value adjustment: tukey method for comparing a family of 4 estimates
## Tests are performed on the log scale
dsr.hab.plot <- ggplot(data=dsr, aes(x=Habitat, y=prob))+
ggtitle("DSR in different habitats for mallards", subtitle="Logistic exposure model")+
geom_point()+
geom_errorbar(aes(ymin=asymp.LCL, ymax=asymp.UCL), width=.1)+
ylab("DSR (95% ci)")
dsr.hab.plot

ggsave(dsr.hab.plot,
file=file.path("..","..","..","..","MyStuff","Images","mallard-dsr-habitat-le.png"), h=4, w=6, units="in", dpi=300)
# you can get the nest survival probability in the usual way by using the estimated DSRs.
# we want to do this for each habitat
ns <- plyr::ddply(dsr, "Habitat", function(x, ndays){
#browser()
ns <- x$prob^ndays
ns.se <- x$SE* ndays * x$prob^(ndays-1)
data.frame(ns=ns, ns.se=ns.se)
}, ndays=20)
ns
## Habitat ns ns.se
## 1 N 0.3293580 0.03383101
## 2 P 0.4091095 0.02752830
## 3 R 0.4049720 0.07385288
## 4 W 0.3625748 0.07711638
# fit a null models
mod.null <- glm(Survive~1,
family=binomial(link=logexp(malldata2$Exposure)),
data=malldata2)
summary(mod.null)
##
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(malldata2$Exposure)),
## data = malldata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4714 0.3109 0.3109 0.3109 0.3109
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.00565 0.05551 54.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2494.8 on 6125 degrees of freedom
## Residual deviance: 1567.1 on 6125 degrees of freedom
## AIC: 1569.1
##
## Number of Fisher Scoring iterations: 7
AICcmodavg::aictab( list(mod.hab, mod.null))
## Warning in aictab.AICglm.lm(list(mod.hab, mod.null)):
## Model names have been supplied automatically in the table
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## Mod2 1 1569.12 0.00 0.81 0.81 -783.56
## Mod1 4 1571.96 2.84 0.19 1.00 -781.98