F检验与Tukey HSD检验的备择假设方向图解

F检验与Tukey HSD检验的备择假设方向图解

《统计软件应用》第12周课堂练习

lixiaoxu@gmail.com
November 29, 2018


本例演示虚无假设 \mu_1=\mu_2=\mu_3 下的10000次独立重复模拟,每次重复得到独立三组均值的 T 统计量 T_1,T_2,T_3 在三维空间中的点,从 \left[ \begin{align} & {{T}_{1}} \\ & {{T}_{2}} \\ & {{T}_{3}} \\ \end{align} \right] = \left[ \begin{align} & 1 \\ & 1 \\ & 1 \\ \end{align} \right] 向量指向的视角窥视。题图中,三个 T 轴分别画为红、绿、蓝。在视角下,所见平面的 x 轴表征 T_{M_1-\left( M_2+M_3 \right)/2} ,与三维空间的红轴在该视角下重叠。与 x 轴垂直的平面 y 轴表征 T_{M_2-M_3}

F 统计量的等值线是同心圆;Tukey统计量的等值线是正六边形。方差分析 F 检验与事后Tukey HSD检验的虚无假设相同。两个检验的备择假设在K-1维(本例即2维平面)方向不同。本文最后两图展示方差分析与Tukey检验可以互有出入,达到0.05显著阈值的模拟点画为空心圆。

n=1e4;df=20

library(latex2exp)

Ts <- matrix(ncol=3,rnorm(3*n) )/ cbind((rchisq(n,df=df)/df)^.5)%*%rbind(c(1,1,1))

## 独立三组,均衡设计,模拟生成三组均值的T统计量


{{T}_{j}}=\frac{\sqrt{n}\left( {{M}_{j}}-{{\mu }_{Nul}} \right)}{\sqrt{MSe}};j=1,2,3

F_t <- apply(Ts,1,function(ys) sum((ys-mean(ys))^2)/2)


\frac{SS\left( \left. {{T}_{j}} \right|j=1,2,3 \right)}{3-1}=\frac{S{{S}_{between}}/\left( 3-1 \right)}{MSe}={{F}_{3-1,d{{f}_{e}}}}


Tukey_t <- apply(Ts,1,function(ys) max(ys)-min(ys))


Range\left( \left. {{T}_{j}} \right|j=1,2,3 \right)=\frac{\sqrt{n}\times Range\left( \left. {{M}_{j}} \right|j=1,2,3 \right)}{\sqrt{MSe}}=Tuke{{y}_{3,d{{f}_{e}}}}


qqplot(F_t,x = qf(ppoints(F_t),df1 = 2,df2 = df),asp=1,xlab = 'Expected F',ylab = 'Observed F')
abline(c(0,1),lty=2)



qqplot(qtukey(ppoints(Tukey_t),nmeans = 3,df = df),Tukey_t,asp=1,xlab = 'Expected Tukey',ylab = 'Observed Tukey')
abline(c(0,1),lty=2)



plot(pf(F_t,2,df),ptukey(Tukey_t,nmeans = 3,df = df),asp=1,pch='.',xlim = c(0,1),ylim = c(0,1),xlab = 'Cumulative Probability of F',ylab = 'Cumulative Probability of Tukey')
abline(c(0,1),lty=2)
abline(v=1-.05,h=1-.05,lty=3)



P_mt <- cbind(c(1,1,1)/ 3^.5,
               c(1,-1/2,-1/2)/ 1.5^.5,
               c(0,1,-1)/ 2^.5)
round(P_mt %*% t(P_mt),digits = 14)
##      [,1] [,2] [,3]
## [1,]    1    0    0
## [2,]    0    1    0
## [3,]    0    0    1
xyz <- function(Ts){
  Ts %*% P_mt[,2:3]
}

xy <- xyz(Ts)
col_F <- floor(pf(F_t,df1 = 2,df2 = df)*df)+1
col_Tukey <- floor(ptukey(Tukey_t,nmeans = 3,df = df)*df)+1 
  
col10<-c(rbind(grey(seq(0.7,0.3,length.out = 5)),rainbow(5)))
col10<-col10[c(10:1,10:1)]

plot(xy,asp=1,pch=ifelse(col_F<20,'.','o'),col=col10[col_F],
     main = 'F Test',
     xlab=TeX("$T_{M_1 - (M_2 + M_3)/2}$"),
     ylab=TeX("T_{M_2-M_3}"))
points(xyz(cbind(seq(0,max(Ts),length.out = n),0,0)),pch='.',col='red')
points(xyz(cbind(0,seq(0,max(Ts),length.out = n),0)),pch='.',col='green')
points(xyz(cbind(0,0,seq(0,max(Ts),length.out = n))),pch='.',col='blue')
for (p in seq(.05,.95,.05)){
  r_p <-xyz(c(0,t2 <- (qf(p,2,df) )^.5, -t2))[2];
  lines(r_p*cos(theta<-seq(0,6,length.out = 1e3) * pi/3),r_p*sin(theta),col=ifelse(p<.95,'grey','black'))
}
r_p <-xyz(c(0,t2 <- qtukey(p,nmeans = 3,df) /2, -t2))[2] / (3^.5 /2);
lines(r_p*cos(theta<-(0:6) * pi/3),r_p*sin(theta),lty=2)



plot(xy,asp=1,pch=ifelse(col_Tukey<20,'.','o'),col=col10[col_Tukey],
     main = "Tukey Test",
     xlab=TeX("$T_{M_1 - (M_2 + M_3)/2}$"),
     ylab=TeX("T_{M_2-M_3}"))
points(xyz(cbind(seq(0,max(Ts),length.out = n),0,0)),pch='.',col='red')
points(xyz(cbind(0,seq(0,max(Ts),length.out = n),0)),pch='.',col='green')
points(xyz(cbind(0,0,seq(0,max(Ts),length.out = n))),pch='.',col='blue')

for (p in seq(.05,.95,.05)){
  r_p <-xyz(c(0,t2 <- qtukey(p,nmeans = 3,df) /2, -t2))[2] / (3^.5 /2);
  lines(r_p*cos(theta<-(0:6) * pi/3),r_p*sin(theta),col=ifelse(p<.95,'grey','black'))
}
r_p <-xyz(c(0,t2 <- (qf(p,2,df) )^.5, -t2))[2];
lines(r_p*cos(theta<-seq(0,6,length.out = 1e3) * pi/3),r_p*sin(theta),lty=2)


编辑于 2020-05-05 17:06