# Analysis of Redstart data using logisitic exposure models

# 2019-05-01 CJS Initial code

# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536

# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Data from: Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870


library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)

source(file.path("..","..","logistic-exposure-model.R"))


# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"), 
                               sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
##   NestId FirstFound LastPresent LastChecked  Fate  Freq Height BaffleStatus
##   <chr>       <dbl>       <dbl>       <dbl> <dbl> <dbl>  <dbl> <chr>       
## 1 /*198…         11          11          18     1     1   16.9 N           
## 2 /*198…         13          22          23     1     1   10.5 N           
## 3 /*198…          7          26          26     0     1    4.8 N           
## 4 /*198…         12          14          20     1     1    3.7 N           
## 5 /*198…          6          20          25     1     1   10.7 N           
## 6 /*198…         11          19          20     1     1   17.8 N           
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
# We expand the data to generate the effective sample size
reddata2 <- expand.nest.data(reddata)
head(reddata2)
##         NestId FirstFound LastPresent LastChecked Fate Freq Height
## 1 /*1983-175*/         11          11          18    1    1   16.9
## 2 /*1983-176*/         13          22          23    1    1   10.5
## 3 /*1983-176*/         13          22          23    1    1   10.5
## 4 /*1983-176*/         13          22          23    1    1   10.5
## 5 /*1983-176*/         13          22          23    1    1   10.5
## 6 /*1983-176*/         13          22          23    1    1   10.5
##   BaffleStatus  DBH AgeDay1 Year Exposure Survive  Day NestAge
## 1            N 39.0      -8 1983        7       0 14.5     5.5
## 2            N 10.7     -10 1983        1       1 13.0     2.0
## 3            N 10.7     -10 1983        1       1 14.0     3.0
## 4            N 10.7     -10 1983        1       1 15.0     4.0
## 5            N 10.7     -10 1983        1       1 16.0     5.0
## 6            N 10.7     -10 1983        1       1 17.0     6.0
rm(reddata)
# Baffled nests only

dim(reddata2)
## [1] 6492   15
reddata2.baf <- reddata2[ reddata2$BaffleStatus=="Y",]
dim(reddata2.baf)
## [1] 1118   15
# Fit a particular model
# This is a model with S constant over time (closest to Mayfield method)
mod.baf <-  glm(Survive~1,
         family=binomial(link=logexp(reddata2.baf$Exposure)),
         data=reddata2.baf)
summary(mod.baf)
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(reddata2.baf$Exposure)), 
##     data = reddata2.baf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8583   0.1842   0.1842   0.1842   0.1842  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   4.0680     0.2303   17.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 192.52  on 1117  degrees of freedom
## Residual deviance: 168.64  on 1117  degrees of freedom
## AIC: 170.64
## 
## Number of Fisher Scoring iterations: 7
# Convert the logit(DSR) to DSR
DSR <- expit(coef(mod.baf))
DSR.se <- arm::se.coef(mod.baf)*DSR*(1-DSR)
cat("DSR ", DSR, "(SE ", DSR.se, ")\n")
## DSR  0.9831763 (SE  0.003808499 )
# Find confidence intervals by taking expit of confit of coefficients
expit(confint(mod.baf))
## Waiting for profiling to be done...
##     2.5 %    97.5 % 
## 0.9745497 0.9896193
# make a data frame for model averaging
dsr.dot <- data.frame(Day=1:39, logit.dsr=coef(mod.baf), logit.dsr.sr=arm::se.coef(mod.baf),
                                dsr=DSR, dsr.se=DSR.se)
## Warning in data.frame(Day = 1:39, logit.dsr = coef(mod.baf), logit.dsr.sr
## = arm::se.coef(mod.baf), : row names were found from a short variable and
## have been discarded
# Compute the nest survival for 20 days
days <- 20
NS <- DSR^days
NS.se <- DSR.se * days * DSR^(days-1)
cat("NS ", days," days ", NS, "(SE ", NS.se, ")\n")
## NS  20  days  0.7122429 (SE  0.05517985 )
# control nests only

dim(reddata2)
## [1] 6492   15
reddata2.cntl <- reddata2[ reddata2$BaffleStatus=="N" & !reddata2$Year %in% c(1983, 1984, 1990),]
dim(reddata2.cntl)
## [1] 3859   15
# 3. Fit a particular model
# This is a model with S constant over time (closest to Mayfield method)
mod.cntl <-  glm(Survive~1,
         family=binomial(link=logexp(reddata2.cntl$Exposure)),
         data=reddata2.cntl)
summary(mod.cntl)
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(reddata2.cntl$Exposure)), 
##     data = reddata2.cntl)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5646   0.2758   0.2758   0.2758   0.2758  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.25052    0.08275   39.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1255.5  on 3858  degrees of freedom
## Residual deviance: 1045.0  on 3858  degrees of freedom
## AIC: 1047
## 
## Number of Fisher Scoring iterations: 6
# Convert the logit(DSR) to DSR
DSR <- expit(coef(mod.cntl))
DSR.se <- arm::se.coef(mod.cntl)*DSR*(1-DSR)
cat("DSR ", DSR, "(SE ", DSR.se, ")\n")
## DSR  0.9626919 (SE  0.002972216 )
# Find confidence intervals by taking expit of confit of coefficients
expit(confint(mod.cntl))
## Waiting for profiling to be done...
##     2.5 %    97.5 % 
## 0.9564906 0.9682955
# Compute the nest survival for 20 days
days <- 20
NS <- DSR^days
NS.se <- DSR.se * days * DSR^(days-1)
cat("NS ", days," days ", NS, "(SE ", NS.se, ")\n")
## NS  20  days  0.4674621 (SE  0.02886486 )
# linear effect of date (relative to 27 May) 

dim(reddata2)
## [1] 6492   15
# 3. Fit a particular model
# This is a model with linear effect of date (relative to 27 May)
mod.T <-  glm(Survive~Day,
            family=binomial(link=logexp(reddata2$Exposure)),
            data=reddata2)
summary(mod.T)
## 
## Call:
## glm(formula = Survive ~ Day, family = binomial(link = logexp(reddata2$Exposure)), 
##     data = reddata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7155   0.2464   0.2591   0.2724   0.3420  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.697939   0.170707  21.663   <2e-16 ***
## Day         -0.014561   0.006295  -2.313   0.0207 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1968.4  on 6491  degrees of freedom
## Residual deviance: 1637.6  on 6490  degrees of freedom
## AIC: 1641.6
## 
## Number of Fisher Scoring iterations: 6
# predict the survival probability as a function of time since 21 May
pred.data <- data.frame(Day=1:60)
logit.dsr.pred.linear <- predict(mod.T, newdata=pred.data, se.fit=TRUE)  

# put these together in a data frame
dsr.linear <- cbind(pred.data, logit.dsr=logit.dsr.pred.linear$fit, logit.dsr.se=logit.dsr.pred.linear$se.fit)
head(dsr.linear)
##   Day logit.dsr logit.dsr.se
## 1   1  3.683379    0.1649319
## 2   2  3.668818    0.1591966
## 3   3  3.654257    0.1535052
## 4   4  3.639697    0.1478627
## 5   5  3.625136    0.1422749
## 6   6  3.610576    0.1367486
dsr.linear$dsr <- expit(dsr.linear$logit.dsr)
dsr.linear$dsr.se <- dsr.linear$logit.dsr.se* dsr.linear$dsr * (1-dsr.linear$dsr)
head(dsr.linear)
##   Day logit.dsr logit.dsr.se       dsr      dsr.se
## 1   1  3.683379    0.1649319 0.9754785 0.003945200
## 2   2  3.668818    0.1591966 0.9751278 0.003861087
## 3   3  3.654257    0.1535052 0.9747722 0.003774901
## 4   4  3.639697    0.1478627 0.9744117 0.003686747
## 5   5  3.625136    0.1422749 0.9740461 0.003596754
## 6   6  3.610576    0.1367486 0.9736754 0.003505082
plotdata <- dsr.linear
# add approximate ci on dsr
plotdata$dsr.lcl <- expit(plotdata$logit.dsr - 1.96*plotdata$logit.dsr.se)
plotdata$dsr.ucl <- expit(plotdata$logit.dsr + 1.96*plotdata$logit.dsr.se)

ggplot(data=plotdata, aes(x=Day, y=dsr, group=1))+
  ggtitle("Linear effect on DSR")+
  geom_line()+
  geom_ribbon(aes(ymin=dsr.lcl, ymax=dsr.ucl), alpha=0.2)+
  ylim(0.5,1)

# To compute the nest survival, you also need the covariance between all of the estimates
# This is NOT produced by the predict function and so we need to do this by hand (groan)!

# for what values do you want predictions?
days=1:20
pred.matrix <- data.frame(intecept=1, data=days)
head(pred.matrix)
##   intecept data
## 1        1    1
## 2        1    2
## 3        1    3
## 4        1    4
## 5        1    5
## 6        1    6
# compute the predictions and variance covariance matrix
pred.logit <- as.vector( as.matrix(pred.matrix) %*% coef(mod.T))
pred.logit.vcov <- as.matrix(pred.matrix) %*% vcov(mod.T) %*% t(as.matrix(pred.matrix))

# use msm package to get estimates and vcov after expit()
# create a list of functions to back transform
library(msm)
funs <- paste("~1/(1+exp(-x",days,"))",sep="")
funs <- lapply(funs, as.formula)
head(funs)
## [[1]]
## ~1/(1 + exp(-x1))
## <environment: 0x7f810606b050>
## 
## [[2]]
## ~1/(1 + exp(-x2))
## <environment: 0x7f810606b050>
## 
## [[3]]
## ~1/(1 + exp(-x3))
## <environment: 0x7f810606b050>
## 
## [[4]]
## ~1/(1 + exp(-x4))
## <environment: 0x7f810606b050>
## 
## [[5]]
## ~1/(1 + exp(-x5))
## <environment: 0x7f810606b050>
## 
## [[6]]
## ~1/(1 + exp(-x6))
## <environment: 0x7f810606b050>
# find the inverse predictions and vcov
pred.dsr     <- expit(pred.logit)
pred.dsr
##  [1] 0.9754785 0.9751278 0.9747722 0.9744117 0.9740461 0.9736754 0.9732996
##  [8] 0.9729186 0.9725323 0.9721407 0.9717436 0.9713411 0.9709329 0.9705192
## [15] 0.9700997 0.9696744 0.9692433 0.9688063 0.9683633 0.9679141
pred.dsr.vcov <- msm::deltamethod(funs, mean=pred.logit, cov=pred.logit.vcov, ses=FALSE)

# now find the product of the dsr over the 20 time periods
funs <- as.formula( paste("~", paste(paste("x",days,sep=""),collapse=" * "), collapse=""))
funs
## ~x1 * x2 * x3 * x4 * x5 * x6 * x7 * x8 * x9 * x10 * x11 * x12 * 
##     x13 * x14 * x15 * x16 * x17 * x18 * x19 * x20
ns <- prod(pred.dsr)
ns.se <- msm::deltamethod(funs, mean=pred.dsr, cov=pred.dsr.vcov, ses=TRUE)
cat("NS ", min(days)," to ",max(days),"days:", NS, "(SE ", NS.se, ")\n")
## NS  1  to  20 days: 0.4674621 (SE  0.02886486 )