# Analysis of Redstart data illustrating basic RMark features
# Reproduce Figure 2 where the Nest survival is computed for each year.

# 2019-05-01 CJS Initial code

# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Journal of Avian Biology 46(6): 559-569. https://doi.org/10.1111/jav.00536

# Data from
# Sherry TW, Wilson S, Hunter S, Holmes RT (2015) 
# Data from: Impacts of nest predators and weather on reproductive success and 
# population limitation in a long-distance migratory songbird. 
# Dryad Digital Repository. https://doi.org/10.5061/dryad.73870

library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)

source(file.path("..","..","logistic-exposure-model.R"))


# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

reddata <- readxl::read_excel(file.path("..","Sherry.xlsx"), 
                               sheet="NestData")
head(reddata)
## # A tibble: 6 x 11
##   NestId FirstFound LastPresent LastChecked  Fate  Freq Height BaffleStatus
##   <chr>       <dbl>       <dbl>       <dbl> <dbl> <dbl>  <dbl> <chr>       
## 1 /*198…         11          11          18     1     1   16.9 N           
## 2 /*198…         13          22          23     1     1   10.5 N           
## 3 /*198…          7          26          26     0     1    4.8 N           
## 4 /*198…         12          14          20     1     1    3.7 N           
## 5 /*198…          6          20          25     1     1   10.7 N           
## 6 /*198…         11          19          20     1     1   17.8 N           
## # … with 3 more variables: DBH <dbl>, AgeDay1 <dbl>, Year <chr>
reddata <- as.data.frame(reddata)


# get the yearly date and merge with the nest data
annual.data <- readxl::read_excel(file.path("..","Sherry.xlsx"),
                                  sheet="AnnualCovariates", skip=9)
head(annual.data)
## # A tibble: 6 x 7
##    Year Density Predators MayTemp JuneTemp MayRain JuneRain
##   <dbl>   <dbl>     <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1  1983   0.867       2.4    8.89     17.1   194.      35.3
## 2  1984   0.467       0.5    9.06     16.2   312.     180. 
## 3  1985   0.589       3.4   10.6      13.2   107.     130  
## 4  1986   0.439       6     12.2      13.4   128.      89.6
## 5  1987   0.372       1.7   10.9      15.4    91.6    200. 
## 6  1988   0.361       0.3   12        14.4    97.8     70.2
dim(reddata)
## [1] 537  11
reddata <- merge(reddata, annual.data, all.x=TRUE)
dim(reddata)
## [1] 537  17
# any missing data?
sum(!complete.cases(reddata))
## [1] 0
# as noted in the paper, we remove nests with baffeling
dim(reddata)
## [1] 537  17
reddata <- reddata[ !(reddata$BaffleStatus=="Y"),]
dim(reddata)
## [1] 466  17
# expand the data
reddata2 <- expand.nest.data(reddata)
rm(reddata)

# create factor variable for year
reddata2$YearF <- factor(reddata2$Year)



# Now we look through each year of the study to compute the DSR using a S(.) model
# and compute the nest survival probability
fits <- plyr::dlply(reddata2, c("Year","MayTemp"), function(x){
   cat("\n\n\n*****Fitting ", x$Year[1], x$MayTemp[1], "\n")
  
   # 3. Fit the dot model 
   fit <-glm(Survive~1,
            family=binomial(link=logexp(x$Exposure)),
            data=x)
   print(summary(fit))
   
   # Convert the logit(DSR) to DSR
   DSR <- expit(coef(fit))
   DSR.se <- arm::se.coef(fit)*DSR*(1-DSR)
   cat("DSR ", DSR, "(SE ", DSR.se, ")\n")

   # Compute the nest survival for 20 days
   days <- 20
   NS <- DSR^days
   NS.se <- DSR.se * days * DSR^(days-1)
   cat("NS ", days," days ", NS, "(SE ", NS.se, ")\n")

   list(fit=fit, dsr=DSR, ns=NS, ns.se=NS.se)
  
})
## 
## 
## 
## *****Fitting  1983 8.89 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.300   0.384   0.384   0.384   0.384  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.5701     0.2066   12.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 169.70  on 314  degrees of freedom
## Residual deviance: 133.51  on 314  degrees of freedom
## AIC: 135.51
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9289139 (SE  0.01363976 )
## NS  20  days  0.2288278 (SE  0.06720011 )
## 
## 
## 
## *****Fitting  1984 9.06 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5801   0.2702   0.2702   0.2702   0.2702  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    3.292      0.245   13.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.95  on 465  degrees of freedom
## Residual deviance: 128.88  on 465  degrees of freedom
## AIC: 130.88
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9641542 (SE  0.008468915 )
## NS  20  days  0.4818699 (SE  0.08465275 )
## 
## 
## 
## *****Fitting  1985 10.6 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5034   0.2984   0.2984   0.2984   0.2984  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.0891     0.2905   10.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 97.724  on 264  degrees of freedom
## Residual deviance: 80.165  on 264  degrees of freedom
## AIC: 82.165
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9564409 (SE  0.0121009 )
## NS  20  days  0.410358 (SE  0.1038371 )
## 
## 
## 
## *****Fitting  1986 12.16 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7935   0.2021   0.2021   0.2021   0.2021  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.8814     0.3029   12.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 107.353  on 537  degrees of freedom
## Residual deviance:  93.884  on 537  degrees of freedom
## AIC: 95.884
## 
## Number of Fisher Scoring iterations: 7
## 
## DSR  0.9797943 (SE  0.005996695 )
## NS  20  days  0.6648103 (SE  0.08137759 )
## 
## 
## 
## *****Fitting  1987 10.92 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7435   0.2167   0.2167   0.2167   0.2167  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.7399     0.3349   11.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 85.254  on 381  degrees of freedom
## Residual deviance: 75.149  on 381  degrees of freedom
## AIC: 77.149
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9767958 (SE  0.007590209 )
## NS  20  days  0.6252823 (SE  0.09717533 )
## 
## 
## 
## *****Fitting  1988 12 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9642   0.1577   0.1577   0.1577   0.1577  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   4.3807     0.4487   9.764   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 53.808  on 401  degrees of freedom
## Residual deviance: 48.325  on 401  degrees of freedom
## AIC: 50.325
## 
## Number of Fisher Scoring iterations: 7
## 
## DSR  0.9876384 (SE  0.005477564 )
## NS  20  days  0.779757 (SE  0.08649256 )
## 
## 
## 
## *****Fitting  1989 11.89 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6057   0.2612   0.2612   0.2612   0.2612  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.3608     0.3054   11.01   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 96.114  on 324  degrees of freedom
## Residual deviance: 88.575  on 324  degrees of freedom
## AIC: 90.575
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9664576 (SE  0.009899781 )
## NS  20  days  0.5054242 (SE  0.1035449 )
## 
## 
## 
## *****Fitting  1990 9.15 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7144   0.2256   0.2256   0.2256   0.2256  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.6587     0.2293   15.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 176.36  on 733  degrees of freedom
## Residual deviance: 140.15  on 733  degrees of freedom
## AIC: 142.15
## 
## Number of Fisher Scoring iterations: 7
## 
## DSR  0.9748802 (SE  0.005614341 )
## NS  20  days  0.6012089 (SE  0.06924731 )
## 
## 
## 
## *****Fitting  1991 12.34 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.720   0.224   0.224   0.224   0.224  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.6729     0.2784   13.19   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 121.33  on 514  degrees of freedom
## Residual deviance: 103.03  on 514  degrees of freedom
## AIC: 105.03
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9752273 (SE  0.00672594 )
## NS  20  days  0.6055044 (SE  0.08352076 )
## 
## 
## 
## *****Fitting  1992 11.56 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.429   0.328   0.328   0.328   0.328  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.8956     0.1665    17.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 287.71  on 682  degrees of freedom
## Residual deviance: 241.11  on 682  degrees of freedom
## AIC: 243.11
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9476265 (SE  0.0082612 )
## NS  20  days  0.3409921 (SE  0.05945389 )
## 
## 
## 
## *****Fitting  1993 10.56 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2447   0.4097   0.4097   0.4097   0.4097  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.4354     0.1771   13.75   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 225.23  on 384  degrees of freedom
## Residual deviance: 173.54  on 384  degrees of freedom
## AIC: 175.54
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9194899 (SE  0.01310999 )
## NS  20  days  0.1866119 (SE  0.05321386 )
## 
## 
## 
## *****Fitting  1994 10.16 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2869   0.2757   0.2757   0.2757   0.2757  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.2513     0.3519    9.24   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 67.421  on 202  degrees of freedom
## Residual deviance: 46.866  on 202  degrees of freedom
## AIC: 48.866
## 
## Number of Fisher Scoring iterations: 7
## 
## DSR  0.9627193 (SE  0.01262925 )
## NS  20  days  0.4677286 (SE  0.1227161 )
## 
## 
## 
## *****Fitting  1995 9.63 
## 
## Call:
## glm(formula = Survive ~ 1, family = binomial(link = logexp(x$Exposure)), 
##     data = x)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4243   0.3298   0.3298   0.3298   0.3298  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    2.884      0.335    8.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 69.402  on 160  degrees of freedom
## Residual deviance: 53.096  on 160  degrees of freedom
## AIC: 55.096
## 
## Number of Fisher Scoring iterations: 6
## 
## DSR  0.9470664 (SE  0.01679438 )
## NS  20  days  0.3369832 (SE  0.1195149 )
# Extract the nest success and create the plot
ns<- plyr::ldply(fits, function(x){
  data.frame(ns=x$ns, ns.se=x$ns.se)
})
ns$lcl <- ns$ns - 1.95*ns$ns.se
ns$ucl <- ns$ns + 1.96*ns$ns.se
ns
##    Year MayTemp        ns      ns.se        lcl       ucl
## 1  1983    8.89 0.2288278 0.06720011 0.09778759 0.3605400
## 2  1984    9.06 0.4818699 0.08465275 0.31679704 0.6477893
## 3  1985   10.60 0.4103580 0.10383706 0.20787572 0.6138786
## 4  1986   12.16 0.6648103 0.08137759 0.50612405 0.8243104
## 5  1987   10.92 0.6252823 0.09717533 0.43579038 0.8157459
## 6  1988   12.00 0.7797570 0.08649256 0.61109652 0.9492824
## 7  1989   11.89 0.5054242 0.10354493 0.30351161 0.7083723
## 8  1990    9.15 0.6012089 0.06924731 0.46617661 0.7369336
## 9  1991   12.34 0.6055044 0.08352076 0.44263895 0.7692051
## 10 1992   11.56 0.3409921 0.05945389 0.22505705 0.4575218
## 11 1993   10.56 0.1866119 0.05321386 0.08284490 0.2909111
## 12 1994   10.16 0.4677286 0.12271613 0.22843218 0.7082523
## 13 1995    9.63 0.3369832 0.11951487 0.10392924 0.5712324
ns.vs.temp <- ggplot(data=ns, aes(x=MayTemp, y=ns))+
  ggtitle("Nest success vs. May temp", subtitle="S(.) model fit to each year separately")+
  geom_point()+
  geom_errorbar(aes(ymin=lcl, ymax=ucl), width=0.01)+
  geom_smooth(method="lm")+
  ylim(0,1)
ns.vs.temp

ggsave(ns.vs.temp,
       file=file.path("..","..","..","..","MyStuff","Images","sherry-ns-vs-temp-le.png"),  h=4, w=6, units="in", dpi=300)