# Analysis of Shelly redstart data using the logistic exposure model
# 2019-05-01 CJS Initial code

# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536

# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Data from: Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870

library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)

source(file.path("..","..","logistic-exposure-model.R"))

# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"), 
                               sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
##   NestId FirstFound LastPresent LastChecked  Fate  Freq Height BaffleStatus
##   <chr>       <dbl>       <dbl>       <dbl> <dbl> <dbl>  <dbl> <chr>       
## 1 /*198…         11          11          18     1     1   16.9 N           
## 2 /*198…         13          22          23     1     1   10.5 N           
## 3 /*198…          7          26          26     0     1    4.8 N           
## 4 /*198…         12          14          20     1     1    3.7 N           
## 5 /*198…          6          20          25     1     1   10.7 N           
## 6 /*198…         11          19          20     1     1   17.8 N           
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
# We expand the data to generate the effective sample size
reddata2 <- expand.nest.data(reddata)
head(reddata2)
##         NestId FirstFound LastPresent LastChecked Fate Freq Height
## 1 /*1983-175*/         11          11          18    1    1   16.9
## 2 /*1983-176*/         13          22          23    1    1   10.5
## 3 /*1983-176*/         13          22          23    1    1   10.5
## 4 /*1983-176*/         13          22          23    1    1   10.5
## 5 /*1983-176*/         13          22          23    1    1   10.5
## 6 /*1983-176*/         13          22          23    1    1   10.5
##   BaffleStatus  DBH AgeDay1 Year Exposure Survive  Day NestAge
## 1            N 39.0      -8 1983        7       0 14.5     5.5
## 2            N 10.7     -10 1983        1       1 13.0     2.0
## 3            N 10.7     -10 1983        1       1 14.0     3.0
## 4            N 10.7     -10 1983        1       1 15.0     4.0
## 5            N 10.7     -10 1983        1       1 16.0     5.0
## 6            N 10.7     -10 1983        1       1 17.0     6.0
rm(reddata)


# as noted in the paper, we remove nests from years with no baffeling
dim(reddata2)
## [1] 6492   15
reddata2 <- reddata2[ !(reddata2$BaffleStatus=="N" & reddata2$Year %in% c(1983, 1984, 1990)),]
dim(reddata2)
## [1] 4977   15
# create factor variable for year
reddata2$YearF <- factor(reddata2$Year)

# create factor for baffle
reddata2$BaffleStatus <- factor(reddata2$BaffleStatus)
#  Set up the set of model to fit
model.list.csv <- textConnection(
" S
 ~DBH
 ~DBH+YearF
 ~DBH+YearF+BaffleStatus
")

model.list <- read.csv(model.list.csv, header=TRUE, as.is=TRUE, strip.white=TRUE)
model.list$model.number <- 1:nrow(model.list)
model.list
##                         S model.number
## 1                    ~DBH            1
## 2              ~DBH+YearF            2
## 3 ~DBH+YearF+BaffleStatus            3
model.fits <- plyr::dlply(model.list, c("S","model.number"), function(x,input.data, input.ddl){
  cat("\n\n***** Starting ", unlist(x), "\n")
  
  fit <- glm(formula=as.formula(paste("Survive", eval(x$S))),
            family=binomial(link=logexp(input.data$Exposure)),
            data=input.data)
  fit
  
},input.data=reddata2)
## 
## 
## ***** Starting  ~DBH 1 
## 
## 
## ***** Starting  ~DBH+YearF 2 
## 
## 
## ***** Starting  ~DBH+YearF+BaffleStatus 3
# examine results for a particular model
model.number <-1
names(model.fits[[model.number]])
##  [1] "coefficients"      "residuals"         "fitted.values"    
##  [4] "effects"           "R"                 "rank"             
##  [7] "qr"                "family"            "linear.predictors"
## [10] "deviance"          "aic"               "null.deviance"    
## [13] "iter"              "weights"           "prior.weights"    
## [16] "df.residual"       "df.null"           "y"                
## [19] "converged"         "boundary"          "model"            
## [22] "call"              "formula"           "terms"            
## [25] "data"              "offset"            "control"          
## [28] "method"            "contrasts"         "xlevels"
model.fits[[model.number]]$formula
## Survive ~ DBH
## <environment: 0x7f92c7fbe170>
summary(model.fits[[model.number]])
## 
## Call:
## glm(formula = as.formula(paste("Survive", eval(x$S))), family = binomial(link = logexp(input.data$Exposure)), 
##     data = input.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8465   0.2206   0.2726   0.2822   0.2942  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 3.097917   0.109130  28.387  < 2e-16 ***
## DBH         0.020253   0.006176   3.279  0.00104 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1462.1  on 4976  degrees of freedom
## Residual deviance: 1215.1  on 4975  degrees of freedom
## AIC: 1219.1
## 
## Number of Fisher Scoring iterations: 6
# Model comparision and averaging
# collect models and make AICc table
AICcmodavg::aictab(model.fits)
## 
## Model selection based on AICc:
## 
##                            K    AICc Delta_AICc AICcWt Cum.Wt      LL
## ~DBH+YearF+BaffleStatus.3 12 1177.63       0.00      1      1 -576.78
## ~DBH+YearF.2              11 1194.11      16.48      0      1 -586.03
## ~DBH.1                     2 1219.07      41.44      0      1 -607.53
# compute the DSR and model average the daily values
# we need to set up the prediction matrix with variables that match the models
mean.dbh= mean(reddata2$DBH)
mean.dbh
## [1] 16.21035
pred.data      <- expand.grid(DBH=mean.dbh, 
                             YearF=as.factor(unique(as.numeric(as.character(reddata2$YearF)))),
                             BaffleStatus=unique(as.character(reddata2$BaffleStatus)))
pred.data                            
##         DBH YearF BaffleStatus
## 1  16.21035  1985            N
## 2  16.21035  1986            N
## 3  16.21035  1987            N
## 4  16.21035  1988            N
## 5  16.21035  1989            N
## 6  16.21035  1991            N
## 7  16.21035  1992            N
## 8  16.21035  1993            N
## 9  16.21035  1994            N
## 10 16.21035  1995            N
## 11 16.21035  1985            Y
## 12 16.21035  1986            Y
## 13 16.21035  1987            Y
## 14 16.21035  1988            Y
## 15 16.21035  1989            Y
## 16 16.21035  1991            Y
## 17 16.21035  1992            Y
## 18 16.21035  1993            Y
## 19 16.21035  1994            Y
## 20 16.21035  1995            Y
dsr.indiv <- plyr::ldply(model.fits, function(x,pred.data){
   # get the predictions on the logit scale and then back transform
    logit.dsr.pred <- predict(x, newdata=pred.data, se.fit=TRUE)  

    # put these together in a data frame
    dsr <- cbind(pred.data, logit.dsr=logit.dsr.pred$fit, logit.dsr.se=logit.dsr.pred$se.fit)
    dsr$dsr   <- expit(dsr$logit.dsr)
    dsr$dsr.se <- dsr$logit.dsr.se* dsr$dsr * (1-dsr$dsr)
    dsr
},pred.data=pred.data)

plotdata.indiv <- dsr.indiv
head(plotdata.indiv)
##      S model.number      DBH YearF BaffleStatus logit.dsr logit.dsr.se
## 1 ~DBH            1 16.21035  1985            N  3.426224   0.08140163
## 2 ~DBH            1 16.21035  1986            N  3.426224   0.08140163
## 3 ~DBH            1 16.21035  1987            N  3.426224   0.08140163
## 4 ~DBH            1 16.21035  1988            N  3.426224   0.08140163
## 5 ~DBH            1 16.21035  1989            N  3.426224   0.08140163
## 6 ~DBH            1 16.21035  1991            N  3.426224   0.08140163
##         dsr      dsr.se
## 1 0.9685141 0.002482303
## 2 0.9685141 0.002482303
## 3 0.9685141 0.002482303
## 4 0.9685141 0.002482303
## 5 0.9685141 0.002482303
## 6 0.9685141 0.002482303
# do the model averaging for each day's DSR
# Extract the logL and K number of parameters for each model in the model set.
# The model number is used to index the values
model.info <- plyr::ldply(model.fits, function(x){
    #browser()
    logL <- logLik(x)
    K    <- length(coef(x))
    nobs <- nrow(x$data)
    data.frame(logL=logL, K=K, nobs=nobs)
})
model.info
##                         S model.number      logL  K nobs
## 1                    ~DBH            1 -607.5314  2 4977
## 2              ~DBH+YearF            2 -586.0276 11 4977
## 3 ~DBH+YearF+BaffleStatus            3 -576.7813 12 4977
dsr.ma <- plyr::ddply(dsr.indiv, c("YearF","BaffleStatus"), function(x, model.info){
   # merge the model information with the estimates
   x <-merge(x, model.info)
   # get the model averaged values
   #browser()
   ma <- AICcmodavg::modavgCustom(x$logL, x$K, modnames=x$S, nobs=x$nobs,
                                  estimate=x$dsr, se=x$dsr.se)
   data.frame(dsr=ma$Mod.avg.est, dsr.se=ma$Uncond.SE, dsr.lcl=ma$Lower.CL, dsr.ucl=ma$Upper.CL)
},model.info=model.info)
head(dsr.ma)
##   YearF BaffleStatus       dsr      dsr.se   dsr.lcl   dsr.ucl
## 1  1985            N 0.9631886 0.009742288 0.9440940 0.9822831
## 2  1985            Y 0.9858384 0.004841395 0.9763495 0.9953274
## 3  1986            N 0.9814279 0.005534292 0.9705809 0.9922749
## 4  1986            Y 0.9929373 0.002657199 0.9877293 0.9981453
## 5  1987            N 0.9744129 0.007934658 0.9588613 0.9899646
## 6  1987            Y 0.9902261 0.003851598 0.9826771 0.9977751
plotdata.ma  <- dsr.ma
plotdata.ma$S <- "Model avg"


# plot of bafflestatus by year (model averages())
final.est <- ggplot(data=plotdata.ma, aes(x=as.numeric(as.character(YearF)), y=dsr, color=BaffleStatus, shape=BaffleStatus))+
  ggtitle("Effects of baffle status by year", subtitle="Logistic exposure model")+
  geom_point(position=position_dodge(w=0.2))+
  geom_line(position=position_dodge(w=0.2))+
  geom_errorbar(aes(ymin=dsr.lcl, ymax=dsr.ucl),width=.1, position=position_dodge(w=0.2))+
  ylab("DSR (95% ci)")+xlab("Year")+
  scale_x_continuous(breaks=1980:2020)+
  theme(legend.position=c(0,0), legend.justification=c(0,0))
final.est

ggsave(final.est,
       file=file.path("..","..","..","..","MyStuff","Images","sherry-baffle-year-le.png"), h=4, w=6, units="in", dpi=300)
# DSR by DBH for 1995 N
pred.data      <- expand.grid(DBH=seq(min(reddata2$DBH),max(reddata2$DBH), length.out=50), 
                             YearF=as.factor(1985),
                             BaffleStatus="N")
pred.data                            
##          DBH YearF BaffleStatus
## 1   1.000000  1985            N
## 2   2.479592  1985            N
## 3   3.959184  1985            N
## 4   5.438776  1985            N
## 5   6.918367  1985            N
## 6   8.397959  1985            N
## 7   9.877551  1985            N
## 8  11.357143  1985            N
## 9  12.836735  1985            N
## 10 14.316327  1985            N
## 11 15.795918  1985            N
## 12 17.275510  1985            N
## 13 18.755102  1985            N
## 14 20.234694  1985            N
## 15 21.714286  1985            N
## 16 23.193878  1985            N
## 17 24.673469  1985            N
## 18 26.153061  1985            N
## 19 27.632653  1985            N
## 20 29.112245  1985            N
## 21 30.591837  1985            N
## 22 32.071429  1985            N
## 23 33.551020  1985            N
## 24 35.030612  1985            N
## 25 36.510204  1985            N
## 26 37.989796  1985            N
## 27 39.469388  1985            N
## 28 40.948980  1985            N
## 29 42.428571  1985            N
## 30 43.908163  1985            N
## 31 45.387755  1985            N
## 32 46.867347  1985            N
## 33 48.346939  1985            N
## 34 49.826531  1985            N
## 35 51.306122  1985            N
## 36 52.785714  1985            N
## 37 54.265306  1985            N
## 38 55.744898  1985            N
## 39 57.224490  1985            N
## 40 58.704082  1985            N
## 41 60.183673  1985            N
## 42 61.663265  1985            N
## 43 63.142857  1985            N
## 44 64.622449  1985            N
## 45 66.102041  1985            N
## 46 67.581633  1985            N
## 47 69.061224  1985            N
## 48 70.540816  1985            N
## 49 72.020408  1985            N
## 50 73.500000  1985            N
dsr.indiv <- plyr::ldply(model.fits, function(x,pred.data){
   # get the predictions on the logit scale and then back transform
    logit.dsr.pred <- predict(x, newdata=pred.data, se.fit=TRUE)  

    # put these together in a data frame
    dsr <- cbind(pred.data, logit.dsr=logit.dsr.pred$fit, logit.dsr.se=logit.dsr.pred$se.fit)
    dsr$dsr   <- expit(dsr$logit.dsr)
    dsr$dsr.se <- dsr$logit.dsr.se* dsr$dsr * (1-dsr$dsr)
    dsr
},pred.data=pred.data)

plotdata.indiv <- dsr.indiv
head(plotdata.indiv)
##      S model.number      DBH YearF BaffleStatus logit.dsr logit.dsr.se
## 1 ~DBH            1 1.000000  1985            N  3.118170   0.10489637
## 2 ~DBH            1 2.479592  1985            N  3.148136   0.09900755
## 3 ~DBH            1 3.959184  1985            N  3.178102   0.09364166
## 4 ~DBH            1 5.438776  1985            N  3.208068   0.08889345
## 5 ~DBH            1 6.918367  1985            N  3.238034   0.08486666
## 6 ~DBH            1 8.397959  1985            N  3.268000   0.08166806
##         dsr      dsr.se
## 1 0.9576361 0.004255566
## 2 0.9588352 0.003907852
## 3 0.9600019 0.003595680
## 4 0.9611368 0.003320426
## 5 0.9622408 0.003083501
## 6 0.9633146 0.002886117
dsr.ma <- plyr::ddply(dsr.indiv, c("DBH"), function(x, model.info){
   # merge the model information with the estimates
   x <-merge(x, model.info)
   # get the model averaged values
   #browser()
   ma <- AICcmodavg::modavgCustom(x$logL, x$K, modnames=x$S, nobs=x$nobs,
                                  estimate=x$dsr, se=x$dsr.se)
   data.frame(dsr=ma$Mod.avg.est, dsr.se=ma$Uncond.SE, dsr.lcl=ma$Lower.CL, dsr.ucl=ma$Upper.CL)
},model.info=model.info)
head(dsr.ma)
##        DBH       dsr     dsr.se   dsr.lcl   dsr.ucl
## 1 1.000000 0.9494191 0.01328658 0.9233779 0.9754603
## 2 2.479592 0.9509485 0.01283265 0.9257969 0.9761000
## 3 3.959184 0.9524339 0.01240580 0.9281190 0.9767488
## 4 5.438776 0.9538765 0.01200471 0.9303477 0.9774053
## 5 6.918367 0.9552774 0.01162809 0.9324867 0.9780680
## 6 8.397959 0.9566377 0.01127467 0.9345397 0.9787356
dbh.effect <- ggplot(data=dsr.ma, aes(x=DBH, y=dsr))+
  ggtitle("Effect of DBH for 1985 N", subtitle="Logistic exposure model")+
  geom_line()+
  geom_ribbon(aes(ymin=dsr.lcl, ymax=dsr.ucl), alpha=0.2)

dbh.effect

ggsave(dbh.effect, 
       file=file.path("..","..","..","..","MyStuff","Images","sherry-dbh-le.png"), h=4, w=6, units="in", dpi=300)