# Analysis of Shelly redstart data using the logistic exposure model
# 2019-05-01 CJS Initial code
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015)
# Impacts of nest predators and weather on reproductive success and
# population limitation in a long-distance migratory songbird.
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536
# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015)
# Data from: Impacts of nest predators and weather on reproductive success and
# population limitation in a long-distance migratory songbird.
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870
library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(plyr)
library(readxl)
source(file.path("..","..","logistic-exposure-model.R"))
# The dataframe must contain the following fields with the following names
#
# FirstFound: day the nest was first found
# LastPresent: last day that a chick was present in the nest
# LastChecked: last day the nest was checked
# Fate: fate of the nest; 0=hatch an
# Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#
reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"),
sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
## NestId FirstFound LastPresent LastChecked Fate Freq Height BaffleStatus
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 /*198… 11 11 18 1 1 16.9 N
## 2 /*198… 13 22 23 1 1 10.5 N
## 3 /*198… 7 26 26 0 1 4.8 N
## 4 /*198… 12 14 20 1 1 3.7 N
## 5 /*198… 6 20 25 1 1 10.7 N
## 6 /*198… 11 19 20 1 1 17.8 N
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
# We expand the data to generate the effective sample size
reddata2 <- expand.nest.data(reddata)
head(reddata2)
## NestId FirstFound LastPresent LastChecked Fate Freq Height
## 1 /*1983-175*/ 11 11 18 1 1 16.9
## 2 /*1983-176*/ 13 22 23 1 1 10.5
## 3 /*1983-176*/ 13 22 23 1 1 10.5
## 4 /*1983-176*/ 13 22 23 1 1 10.5
## 5 /*1983-176*/ 13 22 23 1 1 10.5
## 6 /*1983-176*/ 13 22 23 1 1 10.5
## BaffleStatus DBH AgeDay1 Year Exposure Survive Day NestAge
## 1 N 39.0 -8 1983 7 0 14.5 5.5
## 2 N 10.7 -10 1983 1 1 13.0 2.0
## 3 N 10.7 -10 1983 1 1 14.0 3.0
## 4 N 10.7 -10 1983 1 1 15.0 4.0
## 5 N 10.7 -10 1983 1 1 16.0 5.0
## 6 N 10.7 -10 1983 1 1 17.0 6.0
rm(reddata)
# as noted in the paper, we remove nests from years with no baffeling
dim(reddata2)
## [1] 6492 15
reddata2 <- reddata2[ !(reddata2$BaffleStatus=="N" & reddata2$Year %in% c(1983, 1984, 1990)),]
dim(reddata2)
## [1] 4977 15
# create factor variable for year
reddata2$YearF <- factor(reddata2$Year)
# create factor for baffle
reddata2$BaffleStatus <- factor(reddata2$BaffleStatus)
# Set up the set of model to fit
model.list.csv <- textConnection(
" S
~DBH
~DBH+YearF
~DBH+YearF+BaffleStatus
")
model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
## S model.number
## 1 ~DBH 1
## 2 ~DBH+YearF 2
## 3 ~DBH+YearF+BaffleStatus 3
model.fits <- plyr::dlply(model.list, c("S","model.number"), function(x,input.data, input.ddl){
cat("\n\n***** Starting ", unlist(x), "\n")
fit <- glm(formula=as.formula(paste("Survive", eval(x$S))),
family=binomial(link=logexp(input.data$Exposure)),
data=input.data)
fit
},input.data=reddata2)
##
##
## ***** Starting ~DBH 1
##
##
## ***** Starting ~DBH+YearF 2
##
##
## ***** Starting ~DBH+YearF+BaffleStatus 3
# examine results for a particular model
model.number <-1
names(model.fits[[model.number]])
## [1] "coefficients" "residuals" "fitted.values"
## [4] "effects" "R" "rank"
## [7] "qr" "family" "linear.predictors"
## [10] "deviance" "aic" "null.deviance"
## [13] "iter" "weights" "prior.weights"
## [16] "df.residual" "df.null" "y"
## [19] "converged" "boundary" "model"
## [22] "call" "formula" "terms"
## [25] "data" "offset" "control"
## [28] "method" "contrasts" "xlevels"
model.fits[[model.number]]$formula
## Survive ~ DBH
## <environment: 0x7f92c7fbe170>
summary(model.fits[[model.number]])
##
## Call:
## glm(formula = as.formula(paste("Survive", eval(x$S))), family = binomial(link = logexp(input.data$Exposure)),
## data = input.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8465 0.2206 0.2726 0.2822 0.2942
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.097917 0.109130 28.387 < 2e-16 ***
## DBH 0.020253 0.006176 3.279 0.00104 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1462.1 on 4976 degrees of freedom
## Residual deviance: 1215.1 on 4975 degrees of freedom
## AIC: 1219.1
##
## Number of Fisher Scoring iterations: 6
# Model comparision and averaging
# collect models and make AICc table
AICcmodavg::aictab(model.fits)
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## ~DBH+YearF+BaffleStatus.3 12 1177.63 0.00 1 1 -576.78
## ~DBH+YearF.2 11 1194.11 16.48 0 1 -586.03
## ~DBH.1 2 1219.07 41.44 0 1 -607.53
# compute the DSR and model average the daily values
# we need to set up the prediction matrix with variables that match the models
mean.dbh= mean(reddata2$DBH)
mean.dbh
## [1] 16.21035
pred.data <- expand.grid(DBH=mean.dbh,
YearF=as.factor(unique(as.numeric(as.character(reddata2$YearF)))),
BaffleStatus=unique(as.character(reddata2$BaffleStatus)))
pred.data
## DBH YearF BaffleStatus
## 1 16.21035 1985 N
## 2 16.21035 1986 N
## 3 16.21035 1987 N
## 4 16.21035 1988 N
## 5 16.21035 1989 N
## 6 16.21035 1991 N
## 7 16.21035 1992 N
## 8 16.21035 1993 N
## 9 16.21035 1994 N
## 10 16.21035 1995 N
## 11 16.21035 1985 Y
## 12 16.21035 1986 Y
## 13 16.21035 1987 Y
## 14 16.21035 1988 Y
## 15 16.21035 1989 Y
## 16 16.21035 1991 Y
## 17 16.21035 1992 Y
## 18 16.21035 1993 Y
## 19 16.21035 1994 Y
## 20 16.21035 1995 Y
dsr.indiv <- plyr::ldply(model.fits, function(x,pred.data){
# get the predictions on the logit scale and then back transform
logit.dsr.pred <- predict(x, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr <- cbind(pred.data, logit.dsr=logit.dsr.pred$fit, logit.dsr.se=logit.dsr.pred$se.fit)
dsr$dsr <- expit(dsr$logit.dsr)
dsr$dsr.se <- dsr$logit.dsr.se* dsr$dsr * (1-dsr$dsr)
dsr
},pred.data=pred.data)
plotdata.indiv <- dsr.indiv
head(plotdata.indiv)
## S model.number DBH YearF BaffleStatus logit.dsr logit.dsr.se
## 1 ~DBH 1 16.21035 1985 N 3.426224 0.08140163
## 2 ~DBH 1 16.21035 1986 N 3.426224 0.08140163
## 3 ~DBH 1 16.21035 1987 N 3.426224 0.08140163
## 4 ~DBH 1 16.21035 1988 N 3.426224 0.08140163
## 5 ~DBH 1 16.21035 1989 N 3.426224 0.08140163
## 6 ~DBH 1 16.21035 1991 N 3.426224 0.08140163
## dsr dsr.se
## 1 0.9685141 0.002482303
## 2 0.9685141 0.002482303
## 3 0.9685141 0.002482303
## 4 0.9685141 0.002482303
## 5 0.9685141 0.002482303
## 6 0.9685141 0.002482303
# do the model averaging for each day's DSR
# Extract the logL and K number of parameters for each model in the model set.
# The model number is used to index the values
model.info <- plyr::ldply(model.fits, function(x){
#browser()
logL <- logLik(x)
K <- length(coef(x))
nobs <- nrow(x$data)
data.frame(logL=logL, K=K, nobs=nobs)
})
model.info
## S model.number logL K nobs
## 1 ~DBH 1 -607.5314 2 4977
## 2 ~DBH+YearF 2 -586.0276 11 4977
## 3 ~DBH+YearF+BaffleStatus 3 -576.7813 12 4977
dsr.ma <- plyr::ddply(dsr.indiv, c("YearF","BaffleStatus"), function(x, model.info){
# merge the model information with the estimates
x <-merge(x, model.info)
# get the model averaged values
#browser()
ma <- AICcmodavg::modavgCustom(x$logL, x$K, modnames=x$S, nobs=x$nobs,
estimate=x$dsr, se=x$dsr.se)
data.frame(dsr=ma$Mod.avg.est, dsr.se=ma$Uncond.SE, dsr.lcl=ma$Lower.CL, dsr.ucl=ma$Upper.CL)
},model.info=model.info)
head(dsr.ma)
## YearF BaffleStatus dsr dsr.se dsr.lcl dsr.ucl
## 1 1985 N 0.9631886 0.009742288 0.9440940 0.9822831
## 2 1985 Y 0.9858384 0.004841395 0.9763495 0.9953274
## 3 1986 N 0.9814279 0.005534292 0.9705809 0.9922749
## 4 1986 Y 0.9929373 0.002657199 0.9877293 0.9981453
## 5 1987 N 0.9744129 0.007934658 0.9588613 0.9899646
## 6 1987 Y 0.9902261 0.003851598 0.9826771 0.9977751
plotdata.ma <- dsr.ma
plotdata.ma$S <- "Model avg"
# plot of bafflestatus by year (model averages())
final.est <- ggplot(data=plotdata.ma, aes(x=as.numeric(as.character(YearF)), y=dsr, color=BaffleStatus, shape=BaffleStatus))+
ggtitle("Effects of baffle status by year", subtitle="Logistic exposure model")+
geom_point(position=position_dodge(w=0.2))+
geom_line(position=position_dodge(w=0.2))+
geom_errorbar(aes(ymin=dsr.lcl, ymax=dsr.ucl),width=.1, position=position_dodge(w=0.2))+
ylab("DSR (95% ci)")+xlab("Year")+
scale_x_continuous(breaks=1980:2020)+
theme(legend.position=c(0,0), legend.justification=c(0,0))
final.est

ggsave(final.est,
file=file.path("..","..","..","..","MyStuff","Images","sherry-baffle-year-le.png"), h=4, w=6, units="in", dpi=300)
# DSR by DBH for 1995 N
pred.data <- expand.grid(DBH=seq(min(reddata2$DBH),max(reddata2$DBH), length.out=50),
YearF=as.factor(1985),
BaffleStatus="N")
pred.data
## DBH YearF BaffleStatus
## 1 1.000000 1985 N
## 2 2.479592 1985 N
## 3 3.959184 1985 N
## 4 5.438776 1985 N
## 5 6.918367 1985 N
## 6 8.397959 1985 N
## 7 9.877551 1985 N
## 8 11.357143 1985 N
## 9 12.836735 1985 N
## 10 14.316327 1985 N
## 11 15.795918 1985 N
## 12 17.275510 1985 N
## 13 18.755102 1985 N
## 14 20.234694 1985 N
## 15 21.714286 1985 N
## 16 23.193878 1985 N
## 17 24.673469 1985 N
## 18 26.153061 1985 N
## 19 27.632653 1985 N
## 20 29.112245 1985 N
## 21 30.591837 1985 N
## 22 32.071429 1985 N
## 23 33.551020 1985 N
## 24 35.030612 1985 N
## 25 36.510204 1985 N
## 26 37.989796 1985 N
## 27 39.469388 1985 N
## 28 40.948980 1985 N
## 29 42.428571 1985 N
## 30 43.908163 1985 N
## 31 45.387755 1985 N
## 32 46.867347 1985 N
## 33 48.346939 1985 N
## 34 49.826531 1985 N
## 35 51.306122 1985 N
## 36 52.785714 1985 N
## 37 54.265306 1985 N
## 38 55.744898 1985 N
## 39 57.224490 1985 N
## 40 58.704082 1985 N
## 41 60.183673 1985 N
## 42 61.663265 1985 N
## 43 63.142857 1985 N
## 44 64.622449 1985 N
## 45 66.102041 1985 N
## 46 67.581633 1985 N
## 47 69.061224 1985 N
## 48 70.540816 1985 N
## 49 72.020408 1985 N
## 50 73.500000 1985 N
dsr.indiv <- plyr::ldply(model.fits, function(x,pred.data){
# get the predictions on the logit scale and then back transform
logit.dsr.pred <- predict(x, newdata=pred.data, se.fit=TRUE)
# put these together in a data frame
dsr <- cbind(pred.data, logit.dsr=logit.dsr.pred$fit, logit.dsr.se=logit.dsr.pred$se.fit)
dsr$dsr <- expit(dsr$logit.dsr)
dsr$dsr.se <- dsr$logit.dsr.se* dsr$dsr * (1-dsr$dsr)
dsr
},pred.data=pred.data)
plotdata.indiv <- dsr.indiv
head(plotdata.indiv)
## S model.number DBH YearF BaffleStatus logit.dsr logit.dsr.se
## 1 ~DBH 1 1.000000 1985 N 3.118170 0.10489637
## 2 ~DBH 1 2.479592 1985 N 3.148136 0.09900755
## 3 ~DBH 1 3.959184 1985 N 3.178102 0.09364166
## 4 ~DBH 1 5.438776 1985 N 3.208068 0.08889345
## 5 ~DBH 1 6.918367 1985 N 3.238034 0.08486666
## 6 ~DBH 1 8.397959 1985 N 3.268000 0.08166806
## dsr dsr.se
## 1 0.9576361 0.004255566
## 2 0.9588352 0.003907852
## 3 0.9600019 0.003595680
## 4 0.9611368 0.003320426
## 5 0.9622408 0.003083501
## 6 0.9633146 0.002886117
dsr.ma <- plyr::ddply(dsr.indiv, c("DBH"), function(x, model.info){
# merge the model information with the estimates
x <-merge(x, model.info)
# get the model averaged values
#browser()
ma <- AICcmodavg::modavgCustom(x$logL, x$K, modnames=x$S, nobs=x$nobs,
estimate=x$dsr, se=x$dsr.se)
data.frame(dsr=ma$Mod.avg.est, dsr.se=ma$Uncond.SE, dsr.lcl=ma$Lower.CL, dsr.ucl=ma$Upper.CL)
},model.info=model.info)
head(dsr.ma)
## DBH dsr dsr.se dsr.lcl dsr.ucl
## 1 1.000000 0.9494191 0.01328658 0.9233779 0.9754603
## 2 2.479592 0.9509485 0.01283265 0.9257969 0.9761000
## 3 3.959184 0.9524339 0.01240580 0.9281190 0.9767488
## 4 5.438776 0.9538765 0.01200471 0.9303477 0.9774053
## 5 6.918367 0.9552774 0.01162809 0.9324867 0.9780680
## 6 8.397959 0.9566377 0.01127467 0.9345397 0.9787356
dbh.effect <- ggplot(data=dsr.ma, aes(x=DBH, y=dsr))+
ggtitle("Effect of DBH for 1985 N", subtitle="Logistic exposure model")+
geom_line()+
geom_ribbon(aes(ymin=dsr.lcl, ymax=dsr.ucl), alpha=0.2)
dbh.effect

ggsave(dbh.effect,
file=file.path("..","..","..","..","MyStuff","Images","sherry-dbh-le.png"), h=4, w=6, units="in", dpi=300)