### Name: boot
### Title: Bootstrap Resampling
### Aliases: boot boot.return
### Keywords: nonparametric htest

### ** Examples

# usual bootstrap of the ratio of means using the city data
ratio <- function(d, w)
     sum(d$x * w)/sum(d$u * w)
boot(city, ratio, R=999, stype="w")

# Stratified resampling for the difference of means.  In this
# example we will look at the difference of means between the final
# two series in the gravity data.
diff.means <- function(d, f)
{    n <- nrow(d)
     gp1 <- 1:table(as.numeric(d$series))[1]
     m1 <- sum(d[gp1,1] * f[gp1])/sum(f[gp1])
     m2 <- sum(d[-gp1,1] * f[-gp1])/sum(f[-gp1])
     ss1 <- sum(d[gp1,1]^2 * f[gp1]) - 
            (m1 *  m1 * sum(f[gp1]))
     ss2 <- sum(d[-gp1,1]^2 * f[-gp1]) - 
            (m2 *  m2 * sum(f[-gp1]))
     c(m1-m2, (ss1+ss2)/(sum(f)-2))
}
grav1 <- gravity[as.numeric(gravity[,2])>=7,]
boot(grav1, diff.means, R=999, stype="f", strata=grav1[,2])

#  In this example we show the use of boot in a prediction from 
#  regression based on the nuclear data.  This example is taken 
#  from Example 6.8 of Davison and Hinkley (1997).  Notice also 
#  that two extra arguments to statistic are passed through boot.
nuke <- nuclear[,c(1,2,5,7,8,10,11)]
nuke.lm <- glm(log(cost)~date+log(cap)+ne+ ct+log(cum.n)+pt, data=nuke)
nuke.diag <- glm.diag(nuke.lm)
nuke.res <- nuke.diag$res*nuke.diag$sd
nuke.res <- nuke.res-mean(nuke.res)

#  We set up a new data frame with the data, the standardized 
#  residuals and the fitted values for use in the bootstrap.
nuke.data <- data.frame(nuke,resid=nuke.res,fit=fitted(nuke.lm))

#  Now we want a prediction of plant number 32 but at date 73.00
new.data <- data.frame(cost=1, date=73.00, cap=886, ne=0,
                       ct=0, cum.n=11, pt=1)
new.fit <- predict(nuke.lm, new.data)

nuke.fun <- function(dat, inds, i.pred, fit.pred, x.pred)
{
     assign(".inds", inds, envir=.GlobalEnv)
     lm.b <- glm(fit+resid[.inds] ~date+log(cap)+ne+ct+
          log(cum.n)+pt, data=dat)
     pred.b <- predict(lm.b,x.pred)
     remove(".inds", envir=.GlobalEnv)
     c(coef(lm.b), pred.b-(fit.pred+dat$resid[i.pred]))
}

nuke.boot <- boot(nuke.data, nuke.fun, R=999, m=1, 
     fit.pred=new.fit, x.pred=new.data)
#  The bootstrap prediction error would then be found by
mean(nuke.boot$t[,8]^2)
#  Basic bootstrap prediction limits would be
new.fit-sort(nuke.boot$t[,8])[c(975,25)]



#  Finally a parametric bootstrap.  For this example we shall look 
#  at the air-conditioning data.  In this example our aim is to test 
#  the hypothesis that the true value of the index is 1 (i.e. that 
#  the data come from an exponential distribution) against the 
#  alternative that the data come from a gamma distribution with
#  index not equal to 1.
air.fun <- function(data)
{    ybar <- mean(data$hours)
     para <- c(log(ybar),mean(log(data$hours)))
     ll <- function(k) {
          if (k <= 0) out <- 1e200 # not NA
          else out <- lgamma(k)-k*(log(k)-1-para[1]+para[2])
         out
     }
     khat <- nlm(ll,ybar^2/var(data$hours))$estimate
     c(ybar, khat)
}

air.rg <- function(data, mle)
#  Function to generate random exponential variates.  mle will contain 
#  the mean of the original data
{    out <- data
     out$hours <- rexp(nrow(out), 1/mle)
     out
}

air.boot <- boot(aircondit, air.fun, R=999, sim="parametric",
     ran.gen=air.rg, mle=mean(aircondit$hours))

# The bootstrap p-value can then be approximated by
sum(abs(air.boot$t[,2]-1) > abs(air.boot$t0[2]-1))/(1+air.boot$R)