### Encoding: latin1 ### Name: crimtab ### Title: Student's 3000 Criminals Data ### Aliases: crimtab ### Keywords: datasets ### ** Examples require(stats) dim(crimtab) utils::str(crimtab) ## for nicer printing: local({cT <- crimtab colnames(cT) <- substring(colnames(cT), 2,3) print(cT, zero.print = " ") }) ## Repeat Student's experiment: # 1) Reconstitute 3000 raw data for heights in inches and rounded to # nearest integer as in Student's paper: (heIn <- round(as.numeric(colnames(crimtab)) / 2.54)) d.hei <- data.frame(height = rep(heIn, colSums(crimtab))) # 2) shuffle the data: set.seed(1) d.hei <- d.hei[sample(1:3000), , drop = FALSE] # 3) Make 750 samples each of size 4: d.hei$sample <- as.factor(rep(1:750, each = 4)) # 4) Compute the means and standard deviations (n) for the 750 samples: h.mean <- with(d.hei, tapply(height, sample, FUN = mean)) h.sd <- with(d.hei, tapply(height, sample, FUN = sd)) * sqrt(3/4) # 5) Compute the difference between the mean of each sample and # the mean of the population and then divide by the # standard deviation of the sample: zobs <- (h.mean - mean(d.hei[,"height"]))/h.sd # 6) Replace infinite values by +/- 6 as in Student's paper: zobs[infZ <- is.infinite(zobs)] # 3 of them zobs[infZ] <- 6 * sign(zobs[infZ]) # 7) Plot the distribution: require(grDevices); require(graphics) hist(x = zobs, probability = TRUE, xlab = "Student's z", col = grey(0.8), border = grey(0.5), main = "Distribution of Student's z score for 'crimtab' data")