############################################################ # # Simulation to investigate size and power of the t-test # ############################################################ # function to view the first k lines of a data frame view <- function(dat,k){ message <- paste("First",k,"rows") krows <- dat[1:k,] cat(message,"\n","\n") print(krows) } # function to generate S data sets of size n from normal # distribution with mean mu and variance sigma^2 generate.normal <- function(S,n,mu,sigma){ dat <- matrix(rnorm(n*S,mu,sigma),ncol=n,byrow=T) # Note: for this very simple data generation, we can get the data # in one step like this, which requires no looping. In more complex # statistical models, looping is often required to set up each # data set, because the scenario is much more complicated. Here is # a loop to get the same data as above; try running the program and see # how much longer it takes! # dat <- NULL # # for (i in 1:S){ # # Y <- rnorm(n,mu,sigma) # dat <- rbind(dat,Y) # # } out <- list(dat=dat) return(out) } # function to generate S data sets of size n from gamma # distribution with mean mu, variance sigma^2 mu^2 generate.gamma <- function(S,n,mu,sigma){ a <- 1/(sigma^2) s <- mu/a dat <- matrix(rgamma(n*S,shape=a,scale=s),ncol=n,byrow=T) # Alternative loop # dat <- NULL # # for (i in 1:S){ # # Y <- rgamma(n,shape=a,scale=s) # dat <- rbind(dat,Y) # # } out <- list(dat=dat) return(out) } # function to generate S data sets of size n from a t distribution # with df degrees of freedom centered at the value mu (a t distribution # has mean 0 and variance df/(df-2) for df>2) generate.t <- function(S,n,mu,df){ dat <- matrix(mu + rt(n*S,df),ncol=n,byrow=T) # Alternative loop # dat <- NULL # # for (i in 1:S){ # # Y <- mu + rt(n,df) # dat <- rbind(dat,Y) # # } out <- list(dat=dat) return(out) } # set the seed for the simulation set.seed(3) # set number of simulated data sets and sample size S <- 10000 n <- 15 # generate data --Distribution choices are normal with mu,sigma # (rnorm), gamma (rgamma) and student t (rt) # mu0 is the value of mu under the null hypothesis # mu is the actual value for the true distribution of the data # if mu0=mu, then we are investigating size of the test # if mu0 is different from mu, then we are investigating power # of the test at the departure mu from the null hypothesis value mu0 mu0 <- 1 mu <- 1.75 sigma <- sqrt(5/3) out <- generate.normal(S,n,mu,sigma) # generate normal samples # out <- generate.gamma(S,n,mu,sigma) # generate gamma samples # out <- generate.t(S,n,mu,5) # generate t_5 samples # get the sample means from each data set outsampmean <- apply(out$dat,1,mean) # get the estimated standard errors from each data set sampmean.ses <- sqrt(apply(out$dat,1,var)/n) # form the t-statistics for each data set under the null ttests <- (outsampmean-mu0)/sampmean.ses # critical value for test with 0.05 significance level t05 <- qt(0.975,n-1) power <- sum(abs(ttests)>t05)/S