# Analysis of killdeer data illustrating model averaging
# and logistic exposure models
# 2019-05-01 CJS Initial version
# This is the killdeer data that ships with RMark
library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
library(RMark)
## This is RMark 2.2.6
## Documentation available at http://www.phidot.org/software/mark/rmark/RMarkDocumentation.zip
## Warning: Software mark not found in path.
## If you have mark executable, you will need to set MarkPath object to its location (e.g. MarkPath="C:/Users/Jeff Laake/Desktop"
source(file.path("..","..","logistic-exposure-model.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
killdata <- readxl::read_excel(file.path("..","Killdeer.xlsx"),
sheet="killdeer")
head(killdata)
## # A tibble: 6 x 6
## id FirstFound LastPresent LastChecked Fate Freq
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 /*A*/ 1 9 9 0 1
## 2 /*B*/ 5 5 9 1 1
## 3 /*C*/ 5 40 40 0 1
## 4 /*D*/ 9 32 32 0 1
## 5 /*E*/ 7 8 8 0 1
## 6 /*F*/ 3 15 15 0 1
# We expand the data to generate the effective sample size
killdata2 <- expand.nest.data(killdata)
head(killdata2)
## id FirstFound LastPresent LastChecked Fate Freq Day Exposure Survive
## 1 /*A*/ 1 9 9 0 1 1 1 1
## 2 /*A*/ 1 9 9 0 1 2 1 1
## 3 /*A*/ 1 9 9 0 1 3 1 1
## 4 /*A*/ 1 9 9 0 1 4 1 1
## 5 /*A*/ 1 9 9 0 1 5 1 1
## 6 /*A*/ 1 9 9 0 1 6 1 1
# Add the Day2 term and the first/second half variables
killdata2$Day2 <- (killdata2$Day-20)^2
killdata2$studyhalf <- car::recode(killdata2$Day,
" lo:20='1st'; 21:hi='2nd'")
# Set up the set of model to fit
model.list.csv <- textConnection(
" S
~1
~Day
~Day+Day2
~studyhalf
")
model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
## S model.number
## 1 ~1 1
## 2 ~Day 2
## 3 ~Day+Day2 3
## 4 ~studyhalf 4
model.fits <- plyr::dlply(model.list, c("S","model.number"), function(x,input.data, input.ddl){
cat("\n\n***** Starting ", unlist(x), "\n")
fit <- glm(formula=as.formula(paste("Survive", eval(x$S))),
family=binomial(link=logexp(input.data$Exposure)),
data=input.data)
fit
},input.data=killdata2)
##
##
## ***** Starting ~1 1
##
##
## ***** Starting ~Day 2
##
##
## ***** Starting ~Day+Day2 3
##
##
## ***** Starting ~studyhalf 4
# examine individual model results
model.number <-1
names(model.fits[[model.number]])
## [1] "coefficients" "residuals" "fitted.values"
## [4] "effects" "R" "rank"
## [7] "qr" "family" "linear.predictors"
## [10] "deviance" "aic" "null.deviance"
## [13] "iter" "weights" "prior.weights"
## [16] "df.residual" "df.null" "y"
## [19] "converged" "boundary" "model"
## [22] "call" "formula" "terms"
## [25] "data" "offset" "control"
## [28] "method" "contrasts" "xlevels"
model.fits[[model.number]]$formula
## Survive ~ 1
## <environment: 0x7fa15003c050>
summary(model.fits[[model.number]])
##
## Call:
## glm(formula = as.formula(paste("Survive", eval(x$S))), family = binomial(link = logexp(input.data$Exposure)),
## data = input.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6777 0.2372 0.2372 0.2372 0.2372
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.5570 0.4085 8.708 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 54.491 on 209 degrees of freedom
## Residual deviance: 42.510 on 209 degrees of freedom
## AIC: 44.51
##
## Number of Fisher Scoring iterations: 7
# Model comparision and averaging
# collect models and make AICc table
AICcmodavg::aictab(model.fits)
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## ~Day+Day2.3 3 43.88 0.00 0.37 0.37 -18.88
## ~1.1 1 44.53 0.65 0.27 0.64 -21.26
## ~Day.2 2 44.69 0.82 0.25 0.89 -20.32
## ~studyhalf.4 2 46.23 2.35 0.11 1.00 -21.09
# compute the DSR and model average the daily values
# we need to set up the prediction matrix with variables that match the models
pred.data <- data.frame(Day=1:39)
pred.data$Day2 <- (pred.data$Day-20)^2 # we need to match coding above
pred.data$studyhalf <- car::recode(pred.data$Day,
" lo:20='1st'; 21:hi='2nd'")
dsr.indiv <- plyr::ldply(model.fits, function(x,pred.data){
# get the predictions on the logit scale and then back transform
logit.dsr.pred <- predict(x, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr <- cbind(pred.data, logit.dsr=logit.dsr.pred$fit, logit.dsr.se=logit.dsr.pred$se.fit)
dsr$dsr <- expit(dsr$logit.dsr)
dsr$dsr.se <- dsr$logit.dsr.se* dsr$dsr * (1-dsr$dsr)
dsr
},pred.data=pred.data)
plotdata.indiv <- dsr.indiv
head(plotdata.indiv)
## S model.number Day Day2 studyhalf logit.dsr logit.dsr.se dsr
## 1 ~1 1 1 361 1st 3.557002 0.4084693 0.9722669
## 2 ~1 1 2 324 1st 3.557002 0.4084693 0.9722669
## 3 ~1 1 3 289 1st 3.557002 0.4084693 0.9722669
## 4 ~1 1 4 256 1st 3.557002 0.4084693 0.9722669
## 5 ~1 1 5 225 1st 3.557002 0.4084693 0.9722669
## 6 ~1 1 6 196 1st 3.557002 0.4084693 0.9722669
## dsr.se
## 1 0.01101397
## 2 0.01101397
## 3 0.01101397
## 4 0.01101397
## 5 0.01101397
## 6 0.01101397
# do the model averaging for each day's DSR
# Extract the logL and K number of parameters for each model in the model set.
# The model number is used to index the values
model.info <- plyr::ldply(model.fits, function(x){
#browser()
logL <- logLik(x)
K <- length(coef(x))
nobs <- nrow(x$data)
data.frame(logL=logL, K=K, nobs=nobs)
})
model.info
## S model.number logL K nobs
## 1 ~1 1 -21.25514 1 210
## 2 ~Day 2 -20.31785 2 210
## 3 ~Day+Day2 3 -18.88091 3 210
## 4 ~studyhalf 4 -21.08636 2 210
dsr.ma <- plyr::ddply(dsr.indiv, c("Day"), function(x, model.info){
# merge the model information with the estimates
x <-merge(x, model.info)
# get the model averaged values
#browser()
ma <- AICcmodavg::modavgCustom(x$logL, x$K, modnames=x$S, nobs=x$nobs,
estimate=x$dsr, se=x$dsr.se)
data.frame(dsr=ma$Mod.avg.est, dsr.se=ma$Uncond.SE, dsr.lcl=ma$Lower.CL, dsr.ucl=ma$Upper.CL)
},model.info=model.info)
head(dsr.ma)
## Day dsr dsr.se dsr.lcl dsr.ucl
## 1 1 0.9614451 0.06878004 0.8266387 1.096251
## 2 2 0.9666242 0.05174868 0.8651987 1.068050
## 3 3 0.9705807 0.03934304 0.8934697 1.047692
## 4 4 0.9736046 0.03041289 0.9139964 1.033213
## 5 5 0.9759166 0.02408579 0.9287093 1.023124
## 6 6 0.9776827 0.01970441 0.9390627 1.016303
plotdata.ma <- dsr.ma
plotdata.ma$S <- "Model avg"
# model average the predicted nest suvival probability
final.fit <- ggplot(data=plotdata.indiv, aes(x=Day, y=dsr, color=S))+
ggtitle("Comparing the model fits", subtitle="Fit using logistic exposure models")+
geom_line(aes(group=S))+
geom_line(data=plotdata.ma, color='black', size=2, group=1)+
theme(legend.justification=c(0,0), legend.position=c(0,0))
final.fit

ggsave(final.fit,
file=file.path("..","..","..","..","MyStuff","Images","killdeer-modavg-le.png"),h=4, w=6, units="in", dpi=300)