# Analysis of sparrpw data illustrating logistic exposure models
# Incorportating an age variable

# This was taken from 
# https://www.researchgate.net/post/Does_anybody_have_code_for_running_a_logistic_exposure_nest_survival_model_in_R
# which appears to be the data set used by Ben Bolker

# 2019-05-01 CJS Initial code

library(AICcmodavg)
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
library(plyr)
library(readxl)

source(file.path("..","..","logistic-exposure-model.R"))


# The dataframe must contain the following fields with the following names
#
#    FirstFound: day the nest was first found
#    LastPresent: last day that a chick was present in the nest
#    LastChecked: last day the nest was checked
#    Fate: fate of the nest; 0=hatch an
#    Freq: number of nests with this data
#    AgeDay1: age of nest at day 1 of study (can be negative)
#
# In this example, the multiple visits to a nest have been collapsed
# to a single record for each nest.
# In more complex examples, you may have multple records per nest
# as shown in the mallard example.
#

spardata <- read.csv(file.path("..","sparrow-summary.csv"), header=TRUE, as.is=TRUE, strip.white=TRUE)

spardata <- as.data.frame(spardata)  # sometimes doesn't play nice with tibbles
head(spardata)
##   nest FirstFound LastPresent LastChecked Fate Freq AgeDay1 treeht sageht
## 1    1        151         166         166    0    1    -138    0.0   64.0
## 2   12        168         181         181    0    1    -155   44.0   41.0
## 3   13        143         150         153    1    1    -139   51.5   36.0
## 4   14        156         158         163    1    1    -139   37.0   80.3
## 5   15        183         190         190    0    1    -162   56.5   22.2
## 6   16        185         192         192    0    1    -167   72.8   22.3
##   grassht
## 1    24.0
## 2    17.9
## 3    17.6
## 4    11.7
## 5    19.3
## 6    26.7
spardata2 <- expand.nest.data(spardata)
head(spardata2)
##   nest FirstFound LastPresent LastChecked Fate Freq AgeDay1 treeht sageht
## 1    1        151         166         166    0    1    -138      0     64
## 2    1        151         166         166    0    1    -138      0     64
## 3    1        151         166         166    0    1    -138      0     64
## 4    1        151         166         166    0    1    -138      0     64
## 5    1        151         166         166    0    1    -138      0     64
## 6    1        151         166         166    0    1    -138      0     64
##   grassht Day Exposure Survive NestAge
## 1      24 151        1       1      12
## 2      24 152        1       1      13
## 3      24 153        1       1      14
## 4      24 154        1       1      15
## 5      24 155        1       1      16
## 6      24 156        1       1      17
# Fit a particular model
# This is a model with S a function of treeheight
mod.treeht <-  glm(Survive~treeht,
         family=binomial(link=logexp(spardata2$Exposure)),
         data=spardata2)
summary(mod.treeht)
## 
## Call:
## glm(formula = Survive ~ treeht, family = binomial(link = logexp(spardata2$Exposure)), 
##     data = spardata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1666   0.2343   0.2552   0.2614   0.2698  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.581807   1.010448   3.545 0.000393 ***
## treeht      -0.003938   0.022902  -0.172 0.863469    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26.440  on 91  degrees of freedom
## Residual deviance: 18.094  on 90  degrees of freedom
## AIC: 22.094
## 
## Number of Fisher Scoring iterations: 7
# This is a model with S a function of nest age
# This will differ from the original results because
# we are using the individual day points with individual nest ages
# rather than the nest age at the start of the interval
mod.age.quad <-  glm(Survive~NestAge+I(NestAge^2),
         family=binomial(link=logexp(spardata2$Exposure)),
         data=spardata2)
summary(mod.age.quad)
## 
## Call:
## glm(formula = Survive ~ NestAge + I(NestAge^2), family = binomial(link = logexp(spardata2$Exposure)), 
##     data = spardata2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1629   0.2040   0.2624   0.2912   0.3011  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)   6.50835    5.99644   1.085    0.278
## NestAge      -0.42118    0.72455  -0.581    0.561
## I(NestAge^2)  0.01290    0.02129   0.606    0.545
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 26.440  on 91  degrees of freedom
## Residual deviance: 17.638  on 89  degrees of freedom
## AIC: 23.638
## 
## Number of Fisher Scoring iterations: 7