#this function is a helper function for gen.density.plot
d_limit <- function(x, alpha, test, dist, n) {
if(dist == 'z'){
y = dnorm(x)
q_a = qnorm(1-alpha)
}else{
y = dt(x, df = n-1)
q_a = qt(1-alpha, df = n-1)
}
if(test == 'two.tailed'){
y[which(x>-q_a & x<q_a)] <- NA
return(y)
}else if (test == 'upper.tail'){
y[which(x < q_a)] <- NA
return(y)
}else{
y[which(x > -q_a)] <- NA
return(y)
}
}
# this function plots a density curve for the z and t distributions and allows for shading the
# the rejection regions on the plot
gen.density.plot=function(bbox = c(-4,4), n = 10, dist = c('z','t'), alpha = 0.05, obs = 0.5,
test = c('two.tail','lower.tail', 'upper.tail'), shade = TRUE,
txt.sz = 5){
p = ggplot(data.frame(x = bbox), aes(x = x))+
theme_bw()
if(shade == TRUE){
p = p + stat_function(fun = d_limit,
args = list(alpha = alpha,
test = test,
dist = dist,
n = n),
geom = "area",
fill = "cyan3",
alpha = 0.4)
}
if(dist == 'z'){
q_a = qnorm(1-alpha)
d_obs = dnorm(obs)
d_a = dnorm(q_a)
func = dnorm
args = list()
xlabel = "z"
}else{
q_a = qt(1-alpha, df = n-1)
d_obs = dt(obs, df = n-1)
d_a = dt(q_a, df = n-1)
func = dt
args = list(df = n-1)
xlabel = 't'
}
if(test == 'two.tailed'){
xcoords = c(-q_a, q_a, obs)
ycoords = c(0,0,0)
xendcoords = c(-q_a, q_a, obs)
yendcoords = c(d_a, d_a, d_obs)
if(dist == 'z'){
lbl1 = c(TeX("$z_{\\alpha/2}$"),TeX("$z_{1-\\alpha/2}$"),TeX("$z_{obs}$"))
}else{
lbl1 = c(TeX("$t_{\\alpha/2}$"),TeX("$t_{1-\\alpha/2}$"),TeX("$t_{obs}$"))
}
}else if (test == 'upper.tail'){
xcoords = c(q_a, obs)
ycoords = c(0,0)
xendcoords = c(q_a, obs)
yendcoords = c(d_a, d_obs)
if(dist == 'z'){
lbl1 = c(TeX("$z_{1-\\alpha}$"),TeX("$z_{obs}$"))
}else{
lbl1 = c(TeX("$t_{1-\\alpha}$"),TeX("$t_{obs}$"))
}
}else{
xcoords = c(-q_a, obs)
ycoords = c(0,0)
xendcoords = c(-q_a, obs)
yendcoords = c(d_a, d_obs)
if(dist == 'z'){
lbl1 = c(TeX("$z_{\\alpha}$"), TeX("$z_{obs}$"))
}else{
lbl1 = c(TeX("$t_{\\alpha}$") ,TeX("$t_{obs}$"))
}
}
if(test == 'two.tailed'){
final = p+stat_function(fun = func,
args = args,
size = 1)+
geom_segment(aes(x=xcoords[1:2],
y=ycoords[1:2],
xend=xendcoords[1:2],
yend=yendcoords[1:2]),
size = 0.9,
linetype = 'dashed')+
geom_segment(aes(x=xcoords[3],
y=ycoords[3],
xend=xendcoords[3],
yend=yendcoords[3]),
size = 0.9,
linetype = 'dashed')+
geom_point(aes(x = xcoords[1:2], y = ycoords[1:2]), size = 3)+
geom_label(aes(x = xcoords[1:2], y = ycoords[1:2]-0.025), label = lbl1[1:2], size = txt.sz)+
geom_point(aes(x = xcoords[3], y = ycoords[3]), size = 3)+
geom_label(aes(x = xcoords[3], y = ycoords[3]-0.025), label = lbl1[3], size = txt.sz)+
geom_hline(yintercept = 0, size = 1)+
xlab(xlabel)+
ylab('Probability Density')+
ggtitle(TeX('Distribution under $H_0$'))+
theme(axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))
}else{
final = p+stat_function(fun = func,
args = args,
size = 1)+
geom_segment(aes(x=xcoords,
y=ycoords,
xend=xendcoords,
yend=yendcoords),
size = rep(0.9, length(xcoords)),
linetype = rep('dashed',length(xcoords)))+
geom_point(aes(x = xcoords, y = ycoords), size = 3)+
geom_label(aes(x = xcoords, y = ycoords-0.025), label = lbl1, size = txt.sz)+
geom_hline(yintercept = 0, size = 1)+
xlab(xlabel)+
ylab('Probability Density')+
ggtitle(TeX('Distribution under $H_0$'))+
theme(axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12))
}
return(final)
}
# standard error for a sample proportion
one.sample.prop.SE = function(phat, n){
SE = sqrt((phat*(1-phat))/n)
return(SE)
}
# confidence interval for a population proportion
one.sample.prop.CI = function(phat, n, alpha = 0.05, verbose = FALSE){
SE = one.sample.prop.SE(phat, n)
standard.score = qnorm(1-(alpha/2))
MOE = standard.score*SE
lower.bound = phat-MOE
upper.bound = phat+MOE
if(isTRUE(verbose)){
print(noquote(paste0(rep('-', 50), collapse = '')))
print(noquote(paste0('Estimate = ', round(phat, 4))))
print(noquote(paste0('Critical Value = ', round(standard.score, 4))))
print(noquote(paste0('Estimated Standard Error = ', round(SE, 4))))
print(noquote(paste0('Margin of error = ', round(MOE, 4))))
print(noquote(paste0((1-alpha)*100, ' % CI = ', paste0('[', round(max(lower.bound,0),4), ',',
round(min(upper.bound, 1), 4), ']',
collapse = ''))))
print(noquote(paste0(rep('-', 50), collapse = '')))
}
return(c(lower.bound, upper.bound))
}
# test for a population proportion
one.sample.prop.test = function(p0, phat, n, alpha = 0.05, test = c('lower.tail','upper.tail','two.tail'),
verbose = TRUE){
estimate.SE = one.sample.prop.SE(p0, n)
Zobs = (phat - p0)/estimate.SE
if(test == 'two.tail'){
critical.value = qnorm((1-(alpha/2)))
alt.hyp = 'p != '
pvalue = 2*(1-pnorm(abs(Zobs)))
}else if(test == 'lower.tail'){
critical.value = qnorm(alpha)
alt.hyp = 'p < '
pvalue = pnorm(Zobs)
}else{
critical.value = qnorm(1-alpha)
alt.hyp = 'p > '
pvalue = 1-pnorm(Zobs)
}
CI = one.sample.prop.CI(phat, n, alpha)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: p0 = ', p0)))
print(noquote(paste0('HA: ', alt.hyp, p0)))
print(noquote(paste0('estimate = ', round(phat,4))))
print(noquote(paste0('Estimated Standard Error = ', round(estimate.SE,4))))
print(noquote(paste0('Critical Value = ', round(critical.value, 4))))
print(noquote(paste0((1-alpha)*100, '% CI = ', paste0('[', max(round(CI[1],4), 0),',',
round(CI[2],4),']',
collapse = ''))))
print(noquote(paste0('Test statistic = ', round(Zobs, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
#standard error for a sample mean
one.sample.t.SE = function(s, n){
SE = s/sqrt(n)
return(SE)
}
#confidence interval for a population mean
one.sample.t.CI = function(xbar, s, n, alpha = 0.05, verbose = FALSE){
SE = one.sample.t.SE(s, n)
t.score = qt(1-(alpha/2), df = n-1)
MOE = t.score*SE
lower.bound = xbar-MOE
upper.bound = xbar+MOE
if(isTRUE(verbose)){
print(noquote(paste0(rep('-', 50), collapse = '')))
print(noquote(paste0('Estimate = ', round(xbar, 4))))
print(noquote(paste0('Critical Value = ', round(t.score, 4))))
print(noquote(paste0('Estimated Standard Error = ', round(SE, 4))))
print(noquote(paste0('Margin of error = ', round(MOE, 4))))
print(noquote(paste0((1-alpha)*100, ' % CI = ', paste0('[', round(lower.bound,4),',',
round(upper.bound,4), ']',
collapse = ''))))
print(noquote(paste0(rep('-', 50), collapse = '')))
}
return(c(lower.bound, upper.bound))
}
# one sample t test
one.sample.t.test = function(m0, xbar, s, n, alpha = 0.05, test = c('lower.tail','upper.tail','two.tail'),
verbose = TRUE){
df = n - 1
estimate.SE = one.sample.t.SE(s, n)
tobs = (xbar - m0)/estimate.SE
if(test == 'two.tail'){
critical.value = qt((1-(alpha/2)), df = df)
alt.hyp = 'm != '
pvalue = 2*(1-pt(abs(tobs), df = df))
}else if(test == 'lower.tail'){
critical.value = qt(alpha, df = df)
alt.hyp = 'm < '
pvalue = pt(tobs, df = df)
}else{
critical.value = qt(1-alpha, df = df)
alt.hyp = 'm > '
pvalue = 1-pt(tobs, df = df)
}
CI = one.sample.t.CI(xbar, s, n, alpha)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: m0 = ', m0)))
print(noquote(paste0('HA: ', alt.hyp, m0)))
print(noquote(paste0('Estimate = ', round(xbar,4))))
print(noquote(paste0('Estimated Standard Error = ', round(estimate.SE,4))))
print(noquote(paste0('Critical Value = ', round(critical.value, 4))))
print(noquote(paste0((1-alpha)*100, '% CI = ', paste0('[', max(round(CI[1],4), 0),',',
round(CI[2],4),']',
collapse = ''))))
print(noquote(paste0('Test statistic = ', round(tobs, 4))))
print(noquote(paste0('Degrees of Freedom = ', round(df, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
#unpooled standard error for difference in two proportions
two.sample.prop.SE = function(p1,p2,n1,n2){
SE = sqrt((p1*(1-p1)/n1)+(p2*(1-p2)/n2))
return(SE)
}
#computing confidence intervals for the difference between two population proportions
two.sample.prop.CI = function(p1,p2,n1,n2,tail = 'two.tail',alpha = 0.05, verbose = FALSE){
diff = p1-p2
bounds=c(0,0)
if(tail != 'two.tail'){
Z = qnorm(1-alpha)
}else{
Z = qnorm(1-(alpha/2))
}
SE = two.sample.prop.SE(p1,p2,n1,n2)
MOE = Z*SE
bounds[1] = diff - MOE
bounds[2] = diff + MOE
bounds = sort(bounds)
if(isTRUE(verbose)){
print(noquote(paste0(rep('-', 50), collapse = '')))
print(noquote(paste0('Estimate = ', round(diff, 4))))
print(noquote(paste0('Critical Value = ', round(Z, 4))))
print(noquote(paste0('Estimated Standard Error = ', round(SE, 4))))
print(noquote(paste0('Margin of error = ', round(MOE, 4))))
print(noquote(paste0((1-alpha)*100, ' % CI = ', paste0('[',round(bounds[1], 4), ',',
round(bounds[2], 4), ']'))))
print(noquote(paste0(rep('-', 50), collapse = '')))
}
return(list(estimate = diff, interval = bounds, margin.of.error = MOE, critical.value = Z))
}
#two sample proportion test
two.sample.prop.test = function(p0,x1,x2,n1,n2,alpha = 0.05, test = c('lower.tail','upper.tail','two.tail'),
pooled = TRUE, verbose = TRUE){
p1 = x1/n1
p2 = x2/n2
estimate = p1 - p2
estimate.SE = two.sample.prop.SE(p1, p2, n1, n2)
if(isTRUE(pooled)){
calc = 'pooled'
p.pooled = (x1+x2)/(n1+n2)
test.SE = sqrt( (p.pooled*(1-p.pooled))*(1/n1 + 1/n2) )
}else{
calc = 'unpooled'
test.SE = two.sample.prop.SE(p1,p2,n1,n2)
}
Zobs = (estimate - p0)/test.SE
if(test == 'two.tail'){
critical.value = qnorm((1-(alpha/2)))
alt.hyp = 'p1 - p2 != '
pvalue = 2*(1-pnorm(abs(Zobs)))
}else if(test == 'lower.tail'){
critical.value = qnorm(alpha)
alt.hyp = 'p1 - p2 < '
pvalue = pnorm(Zobs)
}else{
critical.value = qnorm(1-alpha)
alt.hyp = 'p1 - p2 > '
pvalue = 1-pnorm(Zobs)
}
CI = sort(c(estimate - critical.value * estimate.SE, estimate + critical.value * estimate.SE))
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: p1 - p2 = ', p0)))
print(noquote(paste0('HA: ', alt.hyp, p0)))
print(noquote(paste0('estimate = ', round(estimate,4))))
print(noquote(paste0(calc,' SE = ', round(test.SE,4))))
print(noquote(paste0((1-alpha)*100, '% CI = ', paste0('[', max(round(CI[1],4), 0),',',
round(CI[2],4),']',
collapse = ''))))
print(noquote(paste0('Critical Value = ', round(critical.value,4))))
print(noquote(paste0('Test statistic = ', round(Zobs, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
#two sample unpooled standard error for means
two.sample.t.SE = function(s1,s2,n1,n2){
SE = sqrt((s1^2/n1)+(s2^2/n2))
return(SE)
}
#confidence intervals for a difference between two means
two.sample.t.CI = function(x1,x2,s1,s2,n1,n2, tail = 'two.tail',alpha = 0.05, verbose = FALSE){
diff = x1-x2
bounds=c(0,0)
df = min(n1-1, n2-1)
if(tail != 'two.tail'){
t.crit = qt(1-alpha, df = df)
}else{
t.crit = qt(1-(alpha/2), df = df)
}
SE = two.sample.t.SE(s1,s2,n1,n2)
MOE = t.crit*SE
bounds[1] = diff - MOE
bounds[2] = diff + MOE
bounds = sort(bounds)
if(isTRUE(verbose)){
print(noquote(paste0(rep('-', 50), collapse = '')))
print(noquote(paste0('Estimate = ', round(diff, 4))))
print(noquote(paste0('Critical Value = ', round(t.crit, 4))))
print(noquote(paste0('Estimated Standard Error = ', round(SE, 4))))
print(noquote(paste0('Margin of error = ', round(MOE, 4))))
print(noquote(paste0((1-alpha)*100, ' % CI = ', paste0('[',round(bounds[1], 4), ',',
round(bounds[2], 4), ']'))))
print(noquote(paste0(rep('-', 50), collapse = '')))
}
return(list(estimate = diff, interval = bounds, margin.of.error = MOE, critical.value = t.crit))
}
#two sample t test
two.sample.t.test = function(m0,x1,x2,s1,s2,n1,n2,alpha = 0.05, test = c('lower.tail','upper.tail','two.tail'),
pooling = c('pooled', 'unpooled','approx.unpooled'), verbose = TRUE){
estimate = x1 - x2
estimate.SE = two.sample.t.SE(s1, s2, n1, n2)
if(pooling == 'pooled'){
calc = 'pooled'
df = n1 + n2 - 2
s.pooled = sqrt( ((n1-1)*s1^2 + (n2-1)*s2^2)/(n1+n2-2))
test.SE = s.pooled * sqrt((1/n1)+(1/n2))
}else if (pooling == 'unpooled'){
calc = 'unpooled'
df = (((s1^2/n1)+(s2^2/n2))^2)/(((s1^2/n1)^2/(n1-1)) + ((s2^2/n2)^2/(n2-1)))
test.SE = two.sample.t.SE(s1,s2,n1,n2)
}else{
calc = 'approx.unpooled'
df = min(n1-1, n2-1)
test.SE = two.sample.t.SE(s1,s2,n1,n2)
}
tobs = (estimate - m0)/test.SE
if(test == 'two.tail'){
critical.value = qt((1-(alpha/2)), df = df)
alt.hyp = 'm1 - m2 != '
pvalue = 2*(1-pt(abs(tobs), df = df))
}else if(test == 'lower.tail'){
critical.value = qt(alpha, df = df)
alt.hyp = 'm1 - m2 < '
pvalue = pnorm(tobs)
}else{
critical.value = qt(1-alpha, df = df)
alt.hyp = 'm1 - m2 > '
pvalue = 1-pnorm(tobs)
}
CI = sort(c(estimate - critical.value * estimate.SE, estimate + critical.value * estimate.SE))
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: m1 - m2 = ', m0)))
print(noquote(paste0('HA: ', alt.hyp, m0)))
print(noquote(paste0('estimate = ', round(estimate,4))))
print(noquote(paste0(calc,' SE = ', round(test.SE,4))))
print(noquote(paste0((1-alpha)*100, '% CI = ', paste0('[',round(CI[1],4),',',
round(CI[2],4),']',
collapse = ''))))
print(noquote(paste0('Critical Value = ', round(critical.value,4))))
print(noquote(paste0('Test statistic = ', round(tobs, 4))))
print(noquote(paste0('degrees of freedom = ', round(df, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
# the classic sign test
sign.test = function(p0, x1, x2, alpha = 0.05, test = c('lower.tail','upper.tail','two.tail'),
verbose = TRUE){
diffs = x1 - x2
total.signs = sum(diffs>0)+sum(diffs<0)
positive.signs = sum(diffs>0)
phat = positive.signs/total.signs
SE = sqrt(phat*(1-phat)/total.signs)
if(test == 'two.tail'){
upper.bound = qbinom(1-(alpha/2), total.signs, phat)
lower.bound = qbinom(alpha/2, total.signs, phat)
alt.hyp = 'p != '
pvalue = 2*(1-pbinom(positive.signs-1, total.signs, p0))
if(pvalue > 1){
pvalue = 1
}
}else if(test == 'lower.tail'){
upper.bound = qbinom(1-alpha/2, total.signs, phat)
lower.bound = qbinom(alpha/2, total.signs, phat)
alt.hyp = 'p < '
pvalue = pbinom(positive.signs, total.signs, p0)
}else{
upper.bound = qbinom(1-(alpha/2), total.signs, phat)
lower.bound = qbinom(alpha/2, total.signs, phat)
alt.hyp = 'p > '
pvalue = 1-pbinom(positive.signs-1, total.signs, p0)
}
CI = c(lower.bound/total.signs, upper.bound/total.signs)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: p0 = ', p0)))
print(noquote(paste0('HA: ', alt.hyp, p0)))
print(noquote(paste0('# of positive signs = ', positive.signs)))
print(noquote(paste0('# of total signs = ', total.signs)))
print(noquote(paste0('Estimated P(+ sign) = ', round(phat, 4))))
print(noquote(paste0('Estimated Standard Error P(+ sign) = ', round(SE,4))))
print(noquote(paste0((1-alpha)*100, '% CI P(+ sign) = ', paste0('[', round(CI[1],4),',',
round(CI[2],4),']',
collapse = ''))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
rank.sum.stat=function(X, Y){
D = sort(c(X, Y), decreasing = FALSE)
ranks = match(X,D)
S = sum(ranks)
return(S)
}
# the classic Wilcoxon sum-rank test for two indepdent samples
Wilcoxon.rank.sum.test = function(m0, X=NULL, Y=NULL, S=NULL, n1=NULL, n2=NULL, alpha = 0.05,
test = c('lower.tail','upper.tail','two.tail'), verbose = TRUE){
if(is.null(S)){
S = rank.sum.stat(X,Y)
n1 = length(X)
n2 = length(Y)
}
U = S - (n1*(n1+1))/2
if(test == 'two.tail'){
upper.bound = qwilcox(1-(alpha/2), n1, n2)
lower.bound = qwilcox(alpha/2, n1, n2)
alt.hyp = 'true location shift != '
pvalue = 2*pwilcox(U, n1, n2)
if(pvalue > 1){
pvalue = 1
}
}else if(test == 'lower.tail'){
upper.bound = qwilcox(1-(alpha/2), n1, n2)
lower.bound = qwilcox(alpha/2, n1, n2)
alt.hyp = 'true location shift is < '
pvalue = pwilcox(U, n1, n2)
}else{
upper.bound = qwilcox(1-(alpha/2), n1, n2)
lower.bound = qwilcox(alpha/2, n1, n2)
alt.hyp = 'true location shift is > '
pvalue = 1-pwilcox(U, n1, n2)
}
CI = c(lower.bound, upper.bound)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: the true location shift = ', m0)))
print(noquote(paste0('HA: ', alt.hyp, m0)))
print(noquote(paste0('Sign rank statistic = ', S)))
print(noquote(paste0('Mann-Whitney U = ', round(U, 4))))
print(noquote(paste0('E[S] = ', round((n1*(n1+n2+1))/2, 4 ))))
print(noquote(paste0('SE(S) =', round(sqrt( (n1*n2*(n1+n2+1))/12 ), 4 ))))
print(noquote(paste0((1-alpha)*100, '% CI for S = ', paste0('[',max(round(CI[1],4),0),',',
round(CI[2],4),']',
collapse = ''))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
#A simple function to compute the signed-rank statistic
sign.rank.stat = function(X,Y, ...){
diffs = X-Y
if(sum(diffs == 0)>0){
omit = which(diffs == 0)
abs.diffs = abs(diffs[-omit])
}else{
abs.diffs = abs(diffs)
}
negative = which(diffs[-omit] < 0)
positive = which(diffs[-omit] > 0)
ranks = rank(abs.diffs, ...)
signed.ranks = ranks
signed.ranks[negative] = -1*ranks[negative]
W = sum(signed.ranks)
return(list(W = W, differences = diffs, absolute.diff = abs.diffs, ranks = ranks, signed.ranks = signed.ranks))
}
# the classic Wilcoxon sign-rank test for two dependent samples
Wilcoxon.sign.rank.test = function(m0, X=NULL, Y=NULL, W=NULL, n=NULL, alpha = 0.05,
test = c('lower.tail','upper.tail','two.tail'), verbose = TRUE,
...){
if(is.null(W)){
W = sign.rank.stat(X,Y,...)$W
if(length(X)==length(Y)){
n = length(X)
EW = m0
VW = (n*(n+1)*(2*n +1))/6
Z = (W-m0)/sqrt(VW)
}else{
stop('length(X) != length(Y)!...stopping')
}
}
if(test == 'two.tail'){
crit = qnorm(1-alpha/2, EW, sqrt(VW))
alt.hyp = 'true location shift != '
pvalue = 2*(1-pnorm(W, EW, sqrt(VW)))
if(pvalue > 1){
pvalue = 1
}
}else if(test == 'lower.tail'){
crit = qnorm(alpha, EW, sqrt(VW))
alt.hyp = 'true location shift < '
pvalue = pnorm(W, EW, sqrt(VW))
}else{
crit = qnorm(1-alpha, EW, sqrt(VW))
alt.hyp = 'true location shift > '
pvalue = 1-pnorm(W, EW, sqrt(VW))
}
upper.bound = qnorm(1-alpha/2, EW, sqrt(VW))*sqrt(VW)+EW
lower.bound = qnorm(alpha/2, EW, sqrt(VW))*sqrt(VW)+EW
CI = c(lower.bound, upper.bound)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: true location shift = ', m0)))
print(noquote(paste0('HA: ', alt.hyp, m0)))
print(noquote(paste0('Rank-Sum statistic = ', W)))
print(noquote(paste0('E[W] = ', round( EW, 4 ))))
print(noquote(paste0('SE(W) =', round(sqrt( VW ), 4 ))))
print(noquote(paste0('Approximate Z-statistic = ', round(Z, 4))))
print(noquote(paste0((1-alpha)*100, '% CI for W = ', paste0('[',round(CI[1],2),',',
round(CI[2],2),']',
collapse = ''))))
print(noquote(paste0('critical value = ', round(crit, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
# a function to conduct the chi-square goodness of fit test
chi.squared.GOF.test = function(observed.ct=NULL, expected.freq=NULL, expected.ct = NULL, alpha = 0.05,
verbose = TRUE){
n = sum(observed.ct)
if(is.null(expected.ct)){
if(is.null(expected.freq)){
stop('missing one of expected.freq or expected.ct')
}else{
expected.ct = n*expected.freq
}
}else{
expect.freq = expected.ct/n
}
df = length(observed.ct)-1
chi.dist = (observed.ct - expected.ct)^2 / expected.ct
chi.obs = sum(chi.dist)
tabres = cbind.data.frame(Observed = observed.ct, Expected = round(expected.ct, 3),
Distance = round(chi.dist, 3))
row.names(tabres) = paste0('category ', 1:length(observed.ct))
null.hyp = paste0(paste0('P(category ', 1:length(observed.ct), ') = '), round(expected.freq, 4))
alt.hyp ='The probabilities are different than those stated in H0'
crit = qchisq(1-alpha, df)
pvalue = 1-pchisq(chi.obs, df)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('H0: ', null.hyp)))
print(noquote(paste0('HA: ', alt.hyp)))
print(noquote(paste0('Degrees of freedom = ', df)))
print(paste0(rep('-', 54), collapse = ''))
print(tabres)
print(paste0(rep('-', 54), collapse = ''))
print(noquote(paste0('test statistic = ', round(chi.obs, 4))))
if(df == 1){
print(noquote(paste0('Z-statistic = ', round(sqrt(chi.obs), 4))))
}
print(noquote(paste0('critical value = ', round(crit, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}
# chi square test of independence and homogeneity
chi.squared.test = function(cont.table, alpha = 0.05, test = c('homogeneity','independence'),
verbose = TRUE,...){
r.t = rowSums(cont.table)
c.t = colSums(cont.table)
n = sum(cont.table)
exp.cts = outer(r.t, c.t)/n
ct.dists = (cont.table - exp.cts)^2/exp.cts
chi.obs = sum(ct.dists)
df = prod(dim(cont.table)-1)
final.observed = cbind(rbind(cont.table, c.t), c(r.t, n))
colnames(final.observed) = c(paste0('Var A: category ', 1:dim(cont.table)[2]),'Row Total')
row.names(final.observed) = c(paste0('Var B: category ', 1:dim(cont.table)[1]),'Column Total')
colnames(exp.cts) = paste0('Var B: category ', 1:dim(cont.table)[2])
row.names(exp.cts) = paste0('Var A: category ', 1:dim(cont.table)[1])
colnames(ct.dists) = paste0('Var A: category ', 1:dim(cont.table)[2])
row.names(ct.dists) = paste0('Var B: category ', 1:dim(cont.table)[1])
crit = qchisq(1-alpha, df)
if(test == 'homogeneity'){
null.hyp = 'The conditional distributions of the rows are homogeneous'
alt.hyp = 'The conditional distributions of the rows are not homogeneous'
}else{
null.hyp = 'The row variable and column variable are independent'
alt.hyp = 'The row variable and column variable are dependent'
}
pvalue = 1-pchisq(chi.obs, df)
decision = ifelse(pvalue<alpha, 'reject H0', 'fail to reject H0')
if(isTRUE(verbose)){
print(noquote(paste0(paste0(rep('=', 20), collapse = ''), ' test results ', paste0(rep('=', 20), collapse = ''))))
print(noquote(paste0('test type = ', test)))
print(noquote(paste0('H0: ', null.hyp)))
print(noquote(paste0('HA: ', alt.hyp)))
print(noquote(paste0('Degrees of freedom = ', df)))
print(noquote(paste0(rep('-', 54), collapse = '')))
print('Observed Counts')
print(round(final.observed, 4))
print('Expected Counts: (R * C)/n')
print(round(exp.cts, 4))
print('Distances: (observed - expected)^2 / expected')
print(round(ct.dists, 4))
print(noquote(paste0(rep('-', 54), collapse = '')))
print(noquote(paste0('test statistic = ', round(chi.obs, 4))))
if(df == 1){
print(noquote(paste0('Z-statistic = ', round(sqrt(chi.obs), 4))))
}
print(noquote(paste0('critical value = ', round(crit, 4))))
print(noquote(paste0('Pvalue = ', round(pvalue, 4))))
print(noquote(paste0('Decision: ', decision)))
print(noquote(paste0(rep("=", 54), collapse = '')))
}
}