blgr-gof.R

# Single Species Single Season Occupancy models 

# Goodness of fit for the gross beak example

#  unmarked package

library(readxl)
library(unmarked)

## Loading required package: reshape

## Loading required package: lattice

## Loading required package: parallel

## Loading required package: Rcpp

library(ggplot2)

# get the data read in
# Data for detections should be a data frame with rows corresponding to sites
# and columns to visits.
# The usual 1=detected; 0=not detected; NA=not visited is used.

input.data <- read.csv(file.path("..","blgr.csv"),
                       header=TRUE, as.is=TRUE, strip.white=TRUE) 
head(input.data)

##   field v1 v2 v3 field.size bqi crop.hist crop1 crop2 count1 count2 count3
## 1     1  1  1  1       14.0   1      crop     1     0      1      2      2
## 2     2  1  1  0       12.7   1      crop     1     0      2      2      0
## 3     3  0  0  0       15.7   0     grass     0     1      0      0      0
## 4     4  0  1  0       19.5   0     grass     0     1      0      2      0
## 5     5  1  0  1       13.5   0      crop     1     0      1      0      1
## 6     6  0  0  1        9.6   0     mixed     0     1      0      0      2
##    X     logFS
## 1 NA 1.1461280
## 2 NA 1.1038037
## 3 NA 1.1958997
## 4 NA 1.2900346
## 5 NA 1.1303338
## 6 NA 0.9822712

# do some basic checks on your data 
# e.g. check number of sites; number of visits etc
nrow(input.data)

## [1] 41

range(input.data[, c("v1","v2","v3")], na.rm=TRUE)

## [1] 0 1

sum(is.na(input.data[, c("v1","v2","v3")]))

## [1] 0

input.history <- input.data[, c("v1","v2","v3")]
head(input.history)

##   v1 v2 v3
## 1  1  1  1
## 2  1  1  0
## 3  0  0  0
## 4  0  1  0
## 5  1  0  1
## 6  0  0  1

# Create the data structore for the occupancy model
# unmarked does not have any reserved keywords like RPresence so 
# we need to construct a covariate data frame with the covariates
# again stacked. The ordering is different in unmarked.
# Here you want for site 1, the visit specific covariates, then
# for site 2, etc.
# This needs to be added to the unmarked dataframe.

# We also want to set up covariate values where we
# set the detection probability equal in first 2 occasions and last 3 occasions.
# We need to define a survey covariate that has two levels 
obs.covar <- data.frame( Site=rep(1:nrow(input.history), each=ncol(input.history)),
                         Visit=rep(1:ncol(input.history), nrow(input.history)),
                         Time =rep(c("T1","T2","T3"), nrow(input.history)))
obs.covar[1:10,]

##    Site Visit Time
## 1     1     1   T1
## 2     1     2   T2
## 3     1     3   T3
## 4     2     1   T1
## 5     2     2   T2
## 6     2     3   T3
## 7     3     1   T1
## 8     3     2   T2
## 9     3     3   T3
## 10    4     1   T1

grossbeak.UMF <- unmarked::unmarkedFrameOccu(
                     y = input.history,
                     obsCovs=obs.covar) 
summary(grossbeak.UMF)

## unmarkedFrame Object
## 
## 41 sites
## Maximum number of observations per site: 3 
## Mean number of observations per site: 3 
## Sites with at least one detection: 33 
## 
## Tabulation of y observations:
##  0  1 
## 63 60 
## 
## Observation-level covariates:
##       Site        Visit   Time   
##  Min.   : 1   Min.   :1   T1:41  
##  1st Qu.:11   1st Qu.:1   T2:41  
##  Median :21   Median :2   T3:41  
##  Mean   :21   Mean   :2          
##  3rd Qu.:31   3rd Qu.:3          
##  Max.   :41   Max.   :3

plot(grossbeak.UMF)

# Fit some models.
# Note that formula DO NOT HAVE AN = SIGN
# The two formula are for detecton and occupancy in that order
mod.pdot.u <- unmarked::occu(~1 ~1 , data=grossbeak.UMF, se=TRUE)

# This creates a complicated S4 object with SLOTS that can be accessed using various
# functions. This is more complcated than with RPresence
slotNames(mod.pdot.u)

##  [1] "knownOcc"         "fitType"          "call"            
##  [4] "formula"          "data"             "sitesRemoved"    
##  [7] "estimates"        "AIC"              "opt"             
## [10] "negLogLike"       "nllFun"           "bootstrapSamples"
## [13] "covMatBS"

# These functions return estimates on the LOGIT scale - not very useful for most people
coef(mod.pdot.u, type="state")

## psi(Int) 
## 2.038689

SE(mod.pdot.u, type="state")

##  psi(Int) 
## 0.7514124

confint(mod.pdot.u, type="state")

##              0.025    0.975
## psi(Int) 0.5659481 3.511431

# It is possible to use backTransform() but only if NO covariates
backTransform(mod.pdot.u, type='state')

## Backtransformed linear combination(s) of Occupancy estimate(s)
## 
##  Estimate     SE LinComb (Intercept)
##     0.885 0.0766    2.04           1
## 
## Transformation: logistic

# better to use predict() to do the estimation.
# Because there are no covariates here, the data.frame is empty
newdata <- data.frame(factor=1:1)
predict(mod.pdot.u, type='state', newdata=newdata)

##   Predicted         SE     lower     upper
## 1 0.8847998 0.07659083 0.6378277 0.9710113

# Ditto for the detection probability
coef(mod.pdot.u,    type="det")

##    p(Int) 
## 0.2059922

SE(mod.pdot.u,      type="det")

##    p(Int) 
## 0.2420584

confint(mod.pdot.u, type="det")

##             0.025    0.975
## p(Int) -0.2684335 0.680418

backTransform(mod.pdot.u, type="det")

## Backtransformed linear combination(s) of Detection estimate(s)
## 
##  Estimate     SE LinComb (Intercept)
##     0.551 0.0599   0.206           1
## 
## Transformation: logistic

predict(mod.pdot.u, type='det', newdata=newdata)

##   Predicted         SE     lower    upper
## 1 0.5513167 0.05987716 0.4332917 0.663832

# Look at the posterior probability of detection
# The best unbiased predictor of the posterior probability of detection
bup(ranef(mod.pdot.u))

##  [1] 1.0000000 1.0000000 0.4095987 1.0000000 1.0000000 1.0000000 1.0000000
##  [8] 1.0000000 1.0000000 1.0000000 1.0000000 0.4095987 0.4095987 1.0000000
## [15] 1.0000000 1.0000000 1.0000000 0.4095987 1.0000000 0.4095987 1.0000000
## [22] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [29] 1.0000000 1.0000000 0.4095987 1.0000000 1.0000000 1.0000000 0.4095987
## [36] 0.4095987 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000

# Model where p(t) varies across survey occasions
# 

# Note that formula DO NOT HAVE AN = SIGN
# The two formula are for detecton and occupancy in that order
mod.pt.u <- unmarked::occu(~Time ~1 , data=grossbeak.UMF, se=TRUE)

# This creates a complicated S4 object with SLOTS that can be accessed using various
# functions. This is more complcated than with RPresence

# These functions return estimates on the LOGIT scale - not very useful for most people
# better to use predict() to do the estimation.
# Because there are no covariates here, the data.frame is empty
newdata <- data.frame(Time=c("T1","T2","T3"), stringsAsFactors=FALSE)
predict(mod.pt.u, type='state', newdata=newdata)[1,]

##   Predicted         SE    lower     upper
## 1 0.8827113 0.07618458 0.640172 0.9695459

# Ditto for the detection probability
cbind(newdata, predict(mod.pt.u, type='det', newdata=newdata))

##   Time Predicted         SE     lower     upper
## 1   T1 0.4973591 0.08915451 0.3297001 0.6656141
## 2   T2 0.6078831 0.09022850 0.4247031 0.7650092
## 3   T3 0.5526210 0.09009099 0.3768490 0.7161568

# Look at the posterior probability of detection
# The best unbiased predictor of the posterior probability of detection
bup(ranef(mod.pt.u))

##  [1] 1.0000000 1.0000000 0.3988967 1.0000000 1.0000000 1.0000000 1.0000000
##  [8] 1.0000000 1.0000000 1.0000000 1.0000000 0.3988967 0.3988967 1.0000000
## [15] 1.0000000 1.0000000 1.0000000 0.3988967 1.0000000 0.3988967 1.0000000
## [22] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [29] 1.0000000 1.0000000 0.3988967 1.0000000 1.0000000 1.0000000 0.3988967
## [36] 0.3988967 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000

sum(bup(ranef(mod.pt.u)))

## [1] 36.19117

# Model averaging
models.u <-unmarked::fitList(mod.pdot.u, mod.pt.u)

## Warning in unmarked::fitList(mod.pdot.u, mod.pt.u): Your list was unnamed,
## so model names were added as object names

aic.table.u <- unmarked::modSel(models.u)
aic.table.u

##            nPars    AIC delta AICwt cumltvWt
## mod.pdot.u     2 172.19  0.00  0.83     0.83
## mod.pt.u       4 175.30  3.11  0.17     1.00

# Get model averaged estimates of occupancy
predict(models.u, type="state")[1,]

##   Predicted         SE     lower     upper
## 1 0.8844349 0.07652398 0.6382373 0.9707553

# Get model averaged estimates of detection. 
# Notice these are one big list of nsites x nvisits long
# with the detection probabilities for each site
cbind(obs.covar, predict(models.u, type="det"))[1:10,]

##    Site Visit Time Predicted         SE     lower     upper
## 1     1     1   T1 0.5418900 0.06743558 0.4151937 0.6641433
## 2     1     2   T2 0.5611992 0.06783319 0.4317912 0.6815082
## 3     1     3   T3 0.5515446 0.06515716 0.4234308 0.6729734
## 4     2     1   T1 0.5418900 0.06743558 0.4151937 0.6641433
## 5     2     2   T2 0.5611992 0.06783319 0.4317912 0.6815082
## 6     2     3   T3 0.5515446 0.06515716 0.4234308 0.6729734
## 7     3     1   T1 0.5418900 0.06743558 0.4151937 0.6641433
## 8     3     2   T2 0.5611992 0.06783319 0.4317912 0.6815082
## 9     3     3   T3 0.5515446 0.06515716 0.4234308 0.6729734
## 10    4     1   T1 0.5418900 0.06743558 0.4151937 0.6641433

# Do we need to worry about lack of it

# See https://groups.google.com/forum/#!searchin/unmarked/goodness$20of$20fit%7Csort:date/unmarked/3wvnDyLlxok/ct1U723vCAAJ
# https://www.rdocumentation.org/packages/AICcmodavg/versions/2.1-1/topics/mb.gof.test
# MacKenzie, D. I., Bailey, L. L. (2004) Assessing the fit of site-occupancy models. Journal of Agricultural, Biological, and Environmental Statistics 9, 300--318.

library(AICcmodavg)

#  Mackenzie Bailey Goodness of fit test
#  compute chi-square for the top model
mod.pdot.u.chi   = mb.chisq(mod.pdot.u)
mod.pdot.u.chi

## 
## MacKenzie and Bailey goodness-of-fit for single-season occupancy model
## 
## Pearson chi-square table:
## 
##     Cohort Observed Expected Chi-square
## 000      0        8     8.00       0.00
## 001      0        6     4.03       0.97
## 010      0        5     4.03       0.24
## 011      0        4     4.95       0.18
## 100      0        3     4.03       0.26
## 101      0        2     4.95       1.76
## 110      0        5     4.95       0.00
## 111      0        8     6.08       0.61
## 
## Chi-square statistic = 4.0094

mod.pdot.u.boot = mb.gof.test(mod.pdot.u, nsim = 100) # should do about 1000 sims

print(mod.pdot.u.boot, digit.vals=4, digits.chisq=4)

## 
## MacKenzie and Bailey goodness-of-fit for single-season occupancy model
## 
## Pearson chi-square table:
## 
##     Cohort Observed Expected Chi-square
## 000      0        8     8.00       0.00
## 001      0        6     4.03       0.97
## 010      0        5     4.03       0.24
## 011      0        4     4.95       0.18
## 100      0        3     4.03       0.26
## 101      0        2     4.95       1.76
## 110      0        5     4.95       0.00
## 111      0        8     6.08       0.61
## 
## Chi-square statistic = 4.0094 
## Number of bootstrap samples = 100
## P-value = 0.6
## 
## Quantiles of bootstrapped statistics:
##    0%   25%   50%   75%  100% 
##  0.68  3.11  4.63  7.01 17.32 
## 
## Estimate of c-hat = 0.74

# estimating chat
names(mod.pdot.u.boot)

## [1] "model.type"  "chisq.table" "chi.square"  "t.star"      "p.value"    
## [6] "c.hat.est"   "nsim"

chat <- mod.pdot.u.boot$c.hat.est
chat

## [1] 0.7407513

# Not possible to insert chat in the unmarked::modSel (groan) so we use te aictab from AICmodavg
AICcmodavg::aictab(list(mod.pdot.u, mod.pt.u, mod.pdot.u) )

## Warning in aictab.AICunmarkedFitOccu(list(mod.pdot.u, mod.pt.u, mod.pdot.u)): 
## Model names have been supplied automatically in the table

## Warning in aictab.AICunmarkedFitOccu(list(mod.pdot.u, mod.pt.u, mod.pdot.u)): 
## Check model structure carefully as some models may be redundant

## 
## Model selection based on AICc:
## 
##      K   AICc Delta_AICc AICcWt Cum.Wt     LL
## Mod3 2 172.51        0.0   0.47   0.47 -84.09
## Mod1 2 172.51        0.0   0.47   0.93 -84.09
## Mod2 4 176.41        3.9   0.07   1.00 -83.65

AICcmodavg::aictab(list(mod.pdot.u, mod.pt.u, mod.pdot.u), c.hat=max(1,chat) )

## Warning in aictab.AICunmarkedFitOccu(list(mod.pdot.u, mod.pt.u, mod.pdot.u), : 
## Model names have been supplied automatically in the table

## Warning in aictab.AICunmarkedFitOccu(list(mod.pdot.u, mod.pt.u, mod.pdot.u), : 
## Check model structure carefully as some models may be redundant

## 
## Model selection based on AICc:
## 
##      K   AICc Delta_AICc AICcWt Cum.Wt     LL
## Mod3 2 172.51        0.0   0.47   0.47 -84.09
## Mod1 2 172.51        0.0   0.47   0.93 -84.09
## Mod2 4 176.41        3.9   0.07   1.00 -83.65

# The bootstrap goodness of fit can also be done directly in umarked by defining
# our own statistics.
# Function returning three fit-statistics.
fitstats <- function(fm) {
    observed <- getY(fm@data)
    expected <- fitted(fm)
    resids <- residuals(fm)
    sse <- sum(resids^2)
    chisq <- sum((observed - expected)^2 / expected)
    freeTuke <- sum((sqrt(observed) - sqrt(expected))^2)
    out <- c(SSE=sse, Chisq=chisq, freemanTukey=freeTuke)
    return(out)
    }

mod.pdot.u.pboot <- unmarked::parboot(mod.pdot.u, fitstats, nsim=99, report=100)

## t0 = 30.73171 63 36.18837

mod.pdot.u.pboot@t0

##          SSE        Chisq freemanTukey 
##     30.73171     63.00000     36.18837

head(mod.pdot.u.pboot@t.star)

##           SSE    Chisq freemanTukey
## [1,] 30.74797 60.99979     35.96312
## [2,] 28.53659 45.00001     31.77204
## [3,] 30.40650 68.00089     36.44319
## [4,] 30.50407 67.01427     36.42437
## [5,] 30.58537 57.00024     35.30730
## [6,] 27.96748 79.99719     35.15189

# How extreme is t0 relative to the bootstrap values.
# Small probabilities indicate lack of fit
colMeans(mod.pdot.u.pboot@t.star > matrix(mod.pdot.u.pboot@t0, byrow=TRUE,
                                    ncol=length(mod.pdot.u.pboot@t0),
                                    nrow=nrow(mod.pdot.u.pboot@t.star)))

##          SSE        Chisq freemanTukey 
##    0.2323232    0.4444444    0.3838384

# No easy way to automatically inflate the se and ci using c.hat in the unmarked package

blgr-gof.R

cschwarz

Sun Aug 12 16:57:40 2018