# Analysis of killdeer data using the logistic exposure model
# 2019-05-01 CJS Initial code
# This is the killdeer data that ships with RMark
library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
source(file.path("..","..","logistic-exposure-model.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
killdata <- readxl::read_excel(file.path("..","Killdeer.xlsx"),
sheet="killdeer")
head(killdata)
## # A tibble: 6 x 6
## id FirstFound LastPresent LastChecked Fate Freq
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 /*A*/ 1 9 9 0 1
## 2 /*B*/ 5 5 9 1 1
## 3 /*C*/ 5 40 40 0 1
## 4 /*D*/ 9 32 32 0 1
## 5 /*E*/ 7 8 8 0 1
## 6 /*F*/ 3 15 15 0 1
# We expand the data to generate the effective sample size
killdata2 <- expand.nest.data(killdata)
head(killdata2)
## id FirstFound LastPresent LastChecked Fate Freq Day Exposure Survive
## 1 /*A*/ 1 9 9 0 1 1 1 1
## 2 /*A*/ 1 9 9 0 1 2 1 1
## 3 /*A*/ 1 9 9 0 1 3 1 1
## 4 /*A*/ 1 9 9 0 1 4 1 1
## 5 /*A*/ 1 9 9 0 1 5 1 1
## 6 /*A*/ 1 9 9 0 1 6 1 1
# Now to fit the logistic exposure model
fit.Sdot <- glm(Survive~1,
family=binomial(link=logexp(killdata2$Exposure)),
data=killdata2)
summary(fit.Sdot)
##
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(killdata2$Exposure)),
## data = killdata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6777 0.2372 0.2372 0.2372 0.2372
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.5570 0.4085 8.708 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.491 on 209 degrees of freedom
## Residual deviance: 42.510 on 209 degrees of freedom
## AIC: 44.51
##
## Number of Fisher Scoring iterations: 7
-2*logLik(fit.Sdot)
## 'log Lik.' 42.51028 (df=1)
# Convert the logit(DSR) to DSR
DSR <- expit(coef(fit.Sdot))
DSR.se <- arm::se.coef(fit.Sdot)*DSR*(1-DSR)
cat("DSR ", DSR, "(SE ", DSR.se, ")\n")
## DSR 0.9722669 (SE 0.01101397 )
# Find confidence intervals by taking expit of confit of coefficients
expit(confint(fit.Sdot))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## 0.9445889 0.9888876
# make a data frame for model averaging
dsr.dot <- data.frame(Day=1:39, logit.dsr=coef(fit.Sdot), logit.dsr.sr=arm::se.coef(fit.Sdot),
dsr=DSR, dsr.se=DSR.se)
## Warning in data.frame(Day = 1:39, logit.dsr = coef(fit.Sdot), logit.dsr.sr
## = arm::se.coef(fit.Sdot), : row names were found from a short variable and
## have been discarded
# Compute the nest survival
days <- 39
NS <- DSR^days
NS.se <- DSR.se * days * DSR^(days-1)
cat("NS ", days," days ", NS, "(SE ", NS.se, ")\n")
## NS 39 days 0.3339134 (SE 0.147522 )
# Linear in date
# This is a close approximation as the final interval uses the midpoint of the interval
# rather than the individual daily survival probabilities computed with
# the actual date
head(killdata2)
## id FirstFound LastPresent LastChecked Fate Freq Day Exposure Survive
## 1 /*A*/ 1 9 9 0 1 1 1 1
## 2 /*A*/ 1 9 9 0 1 2 1 1
## 3 /*A*/ 1 9 9 0 1 3 1 1
## 4 /*A*/ 1 9 9 0 1 4 1 1
## 5 /*A*/ 1 9 9 0 1 5 1 1
## 6 /*A*/ 1 9 9 0 1 6 1 1
fit.linear <- glm(Survive~Day,
family=binomial(link=logexp(killdata2$Exposure)),
data=killdata2)
summary(fit.linear)
##
## Call:
## glm(formula = Survive ~ Day, family = binomial(link = logexp(killdata2$Exposure)),
## data = killdata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6334 0.1677 0.2185 0.2679 0.4007
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.17135 1.39604 3.704 0.000212 ***
## Day -0.06896 0.05177 -1.332 0.182842
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.491 on 209 degrees of freedom
## Residual deviance: 40.636 on 208 degrees of freedom
## AIC: 44.636
##
## Number of Fisher Scoring iterations: 7
-2*logLik(fit.linear)
## 'log Lik.' 40.6357 (df=2)
# We now want to predict the DSR for days 1..39
# In this case predict() will work on the logit scale because the exposure=1 default is ok,
# but we need to expit the results
pred.data <- data.frame(Day=1:39)
logit.dsr.pred.linear <- predict(fit.linear, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr.linear <- cbind(pred.data, logit.dsr=logit.dsr.pred.linear$fit, logit.dsr.se=logit.dsr.pred.linear$se.fit)
head(dsr.linear)
## Day logit.dsr logit.dsr.se
## 1 1 5.102386 1.346623
## 2 2 5.033424 1.297391
## 3 3 4.964463 1.248365
## 4 4 4.895501 1.199570
## 5 5 4.826539 1.151034
## 6 6 4.757578 1.102793
dsr.linear$dsr <- expit(dsr.linear$logit.dsr)
dsr.linear$dsr.se <- dsr.linear$logit.dsr.se* dsr.linear$dsr * (1-dsr.linear$dsr)
head(dsr.linear)
## Day logit.dsr logit.dsr.se dsr dsr.se
## 1 1 5.102386 1.346623 0.9939546 0.008091722
## 2 2 5.033424 1.297391 0.9935257 0.008345278
## 3 3 4.964463 1.248365 0.9930667 0.008595274
## 4 4 4.895501 1.199570 0.9925754 0.008840225
## 5 5 4.826539 1.151034 0.9920495 0.009078528
## 6 6 4.757578 1.102793 0.9914867 0.009308462
# plot this in the usual way
killdeer.linear.le <- ggplot(data=dsr.linear, aes(x=Day, y=dsr, group=1))+
ggtitle("Estimated linear fit on logit scale", subtitle="Logistic exposure model")+
geom_line()+
geom_ribbon(aes(ymin=expit(logit.dsr-1.96*logit.dsr.se),
ymax=expit(logit.dsr+1.96*logit.dsr.se)), alpha=.1)+
ylim(0.5, 1)
killdeer.linear.le

ggsave(killdeer.linear.le,
file=file.path("..","..","..","..","MyStuff","Images","killdeer-linear-S-le.png"),h=4, w=6, units="in", dpi=300)
# Quadratic in date
# This is a close approximation as the final interval uses the midpoint of the interval
# rather than the individual daily survival probabilities computed with
# the actual date
killdata2$Day2 <- (killdata2$Day-20)^2
fit.quad <- glm(Survive~Day+Day2,
family=binomial(link=logexp(killdata2$Exposure)),
data=killdata2)
summary(fit.quad)
##
## Call:
## glm(formula = Survive ~ Day + Day2, family = binomial(link = logexp(killdata2$Exposure)),
## data = killdata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7486 0.1553 0.1815 0.2335 0.7316
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.166929 1.080126 4.784 1.72e-06 ***
## Day -0.035588 0.039617 -0.898 0.3690
## Day2 -0.007196 0.003861 -1.864 0.0623 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.491 on 209 degrees of freedom
## Residual deviance: 37.762 on 207 degrees of freedom
## AIC: 43.762
##
## Number of Fisher Scoring iterations: 7
-2*logLik(fit.quad)
## 'log Lik.' 37.76182 (df=3)
# We now want to predict the DSR for days 1..39
# In this case predict() will work on the logit scale because the exposure=1 default is ok,
# but we need to expit the results
pred.data <- data.frame(Day=1:39)
pred.data$Day2 <- (pred.data$Day-20)^2 # we need to match coding above
logit.dsr.pred.quad <- predict(fit.quad, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr.quad <- cbind(pred.data, logit.dsr=logit.dsr.pred.quad$fit, logit.dsr.se=logit.dsr.pred.quad$se.fit)
head(dsr.quad )
## Day Day2 logit.dsr logit.dsr.se
## 1 1 361 2.533762 1.5006084
## 2 2 324 2.764408 1.3653107
## 3 3 289 2.980664 1.2407822
## 4 4 256 3.182528 1.1275686
## 5 5 225 3.370001 1.0262679
## 6 6 196 3.543083 0.9374879
dsr.quad $dsr <- expit(dsr.quad $logit.dsr)
dsr.quad $dsr.se <- dsr.quad $logit.dsr.se* dsr.quad$dsr * (1-dsr.quad$dsr)
head(dsr.quad)
## Day Day2 logit.dsr logit.dsr.se dsr dsr.se
## 1 1 361 2.533762 1.5006084 0.9264750 0.10222003
## 2 2 324 2.764408 1.3653107 0.9407219 0.07613543
## 3 3 289 2.980664 1.2407822 0.9516929 0.05704315
## 4 4 256 3.182528 1.1275686 0.9601715 0.04312075
## 5 5 225 3.370001 1.0262679 0.9667537 0.03298523
## 6 6 196 3.543083 0.9374879 0.9718891 0.02561283
# plot this in the usual way
killdeer.quad.le <- ggplot(data=dsr.quad, aes(x=Day, y=dsr, group=1))+
ggtitle("Estimated quadratic fit on logit scale", subtitle="Logistic exposure model")+
geom_line()+
geom_ribbon(aes(ymin=expit(logit.dsr-1.96*logit.dsr.se),
ymax=expit(logit.dsr+1.96*logit.dsr.se)), alpha=.1)+
ylim(0.5, 1)
killdeer.quad.le

ggsave(killdeer.quad.le,
file=file.path("..","..","..","..","MyStuff","Images","killdeer-quad-S-le.png"),h=4, w=6, units="in", dpi=300)
# DSR varies in the first and second half of the study
# need to define the part of the study
# some ambiguity if an interval straddles the breakpoint
killdata2$studyhalf <- car::recode(killdata2$Day,
" lo:20='1st'; 21:hi='2nd'")
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
head(killdata2)
## id FirstFound LastPresent LastChecked Fate Freq Day Exposure Survive
## 1 /*A*/ 1 9 9 0 1 1 1 1
## 2 /*A*/ 1 9 9 0 1 2 1 1
## 3 /*A*/ 1 9 9 0 1 3 1 1
## 4 /*A*/ 1 9 9 0 1 4 1 1
## 5 /*A*/ 1 9 9 0 1 5 1 1
## 6 /*A*/ 1 9 9 0 1 6 1 1
## Day2 studyhalf
## 1 361 1st
## 2 324 1st
## 3 289 1st
## 4 256 1st
## 5 225 1st
## 6 196 1st
fit.2part <- glm(Survive~studyhalf,
family=binomial(link=logexp(killdata2$Exposure)),
data=killdata2)
summary(fit.2part)
##
## Call:
## glm(formula = Survive ~ studyhalf, family = binomial(link = logexp(killdata2$Exposure)),
## data = killdata2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6061 0.2042 0.2611 0.2611 0.2611
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.8604 0.7077 5.455 4.89e-08 ***
## studyhalf2nd -0.4986 0.8666 -0.575 0.565
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.491 on 209 degrees of freedom
## Residual deviance: 42.173 on 208 degrees of freedom
## AIC: 46.173
##
## Number of Fisher Scoring iterations: 7
-2*logLik(fit.2part)
## 'log Lik.' 42.17273 (df=2)
# We now want to predict the DSR for days 1..39
# In this case predict() will work on the logit scale because the exposure=1 default is ok,
# but we need to expit the results
pred.data <- data.frame(Day=1:39)
pred.data$studyhalf <- car::recode(pred.data$Day,
" lo:20='1st'; 21:hi='2nd'")
logit.dsr.pred.2part <- predict(fit.2part, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr.2part <- cbind(pred.data, logit.dsr=logit.dsr.pred.2part$fit, logit.dsr.se=logit.dsr.pred.2part$se.fit)
head(dsr.2part)
## Day studyhalf logit.dsr logit.dsr.se
## 1 1 1st 3.8604 0.7076596
## 2 2 1st 3.8604 0.7076596
## 3 3 1st 3.8604 0.7076596
## 4 4 1st 3.8604 0.7076596
## 5 5 1st 3.8604 0.7076596
## 6 6 1st 3.8604 0.7076596
dsr.2part$dsr <- expit(dsr.2part$logit.dsr)
dsr.2part$dsr.se <- dsr.2part$logit.dsr.se* dsr.2part$dsr * (1-dsr.2part$dsr)
head(dsr.2part)
## Day studyhalf logit.dsr logit.dsr.se dsr dsr.se
## 1 1 1st 3.8604 0.7076596 0.9793748 0.01429459
## 2 2 1st 3.8604 0.7076596 0.9793748 0.01429459
## 3 3 1st 3.8604 0.7076596 0.9793748 0.01429459
## 4 4 1st 3.8604 0.7076596 0.9793748 0.01429459
## 5 5 1st 3.8604 0.7076596 0.9793748 0.01429459
## 6 6 1st 3.8604 0.7076596 0.9793748 0.01429459
tail(dsr.2part)
## Day studyhalf logit.dsr logit.dsr.se dsr dsr.se
## 34 34 2nd 3.36182 0.50029 0.9664898 0.01620304
## 35 35 2nd 3.36182 0.50029 0.9664898 0.01620304
## 36 36 2nd 3.36182 0.50029 0.9664898 0.01620304
## 37 37 2nd 3.36182 0.50029 0.9664898 0.01620304
## 38 38 2nd 3.36182 0.50029 0.9664898 0.01620304
## 39 39 2nd 3.36182 0.50029 0.9664898 0.01620304
# plot this in the usual way
killdeer.2part.le <- ggplot(data=dsr.2part, aes(x=Day, y=dsr, group=1))+
ggtitle("Estimated 2part fit on logit scale", subtitle="Logistic exposure model")+
geom_line()+
geom_ribbon(aes(ymin=expit(logit.dsr-1.96*logit.dsr.se),
ymax=expit(logit.dsr+1.96*logit.dsr.se)), alpha=.1)+
ylim(0.5, 1)
killdeer.2part.le

ggsave(killdeer.2part.le,
file=file.path("..","..","..","..","MyStuff","Images","killdeer-2part-S-le.png"),h=4, w=6, units="in", dpi=300)