Turkey HSD检验法/W法

最新推荐文章于 2024-01-22 01:00:00 发布

VIP文章 weixin_34102807

最新推荐文章于 2024-01-22 01:00:00 发布

阅读量9.9k

点赞数

文章标签： python r语言人工智能

sklearn实战-乳腺癌细胞数据挖掘（博主亲自录视频）

https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share

医药统计项目联系QQ：231469242

python 2.7

# -*- coding: utf-8 -*-
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
                                         MultiComparison)
                                         
# Import standard packages
import numpy as np
from scipy import stats
import pandas as pd      
import variance_check

                                                                    
#数据excel名               
excel="sample.xlsx"
#读取数据
df=pd.read_excel(excel)
#获取第一组数据，结构为列表
group_mentaln=list(df.StressReduction[(df.Treatment=="mental")])
group_physical=list(df.StressReduction[(df.Treatment=="physical")])
group_medical=list(df.StressReduction[(df.Treatment=="medical")])
list_groups=[group_mentaln,group_physical,group_medical]
list_total=group_mentaln+group_physical+group_medical

print"equal test-----------------------------------------------------"
# #比较组内的样本是否相等，如果不相等，不适合于tukey等方法                     
equal_lenth=variance_check.Equal_lenth(list_groups)  
if equal_lenth==False:
    print("the length of groups are not equal")                               
                                                          
multiComp = MultiComparison(df['StressReduction'], df['Treatment']) 
tukey=multiComp.tukeyhsd()
summary=multiComp.tukeyhsd().summary()
print(summary) 

q=tukey.q_crit
print("q values:",q)
'''
q值
Out[41]: 3.5057698487864877
'''

'''
Multiple Comparison of Means - Tukey HSD,FWER=0.05
===============================================
 group1  group2  meandiff  lower  upper  reject
-----------------------------------------------
medical  mental    1.5     0.3217 2.6783  True 
medical physical   1.0    -0.1783 2.1783 False 
 mental physical   -0.5   -1.6783 0.6783 False 
-----------------------------------------------
'''
                               
print("data details:",summary.data) 
'''
[['group1', 'group2', 'meandiff', 'lower', 'upper', 'reject'], 
[u'medical', u'mental', 1.5, 0.32169999999999999, 2.6783000000000001, True], 
[u'medical', u'physical', 1.0, -0.17829999999999999, 2.1783000000000001, False],
[u'mental', u'physical', -0.5, -1.6782999999999999, 0.67830000000000001, False]]
'''

variance_check.py

# -*- coding: utf-8 -*-
'''
用于方差齐性检验
正太性检验
配对相等检验
'''
import scipy,math
from scipy.stats import f
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
# additional packages
from statsmodels.stats.diagnostic import lillifors
#多重比较
from statsmodels.sandbox.stats.multicomp import multipletests
#用于排列组合
import itertools
'''
#测试数据
group1=[2,3,7,2,6]
group2=[10,8,7,5,10]
group3=[10,13,14,13,15]
list_groups=[group1,group2,group3]
list_total=group1+group2+group3
'''
a=0.05

#正态分布测试
def check_normality(testData):
     
    #20<样本数<50用normal test算法检验正态分布性
    if 20<len(testData) <50:
       p_value= stats.normaltest(testData)[1]
       if p_value<0.05:
           print"use normaltest"
           print "data are not normal distributed"
           return  False
       else:
           print"use normaltest"
           print "data are normal distributed"
           return True
     
    #样本数小于50用Shapiro-Wilk算法检验正态分布性
    if len(testData) <50:
       p_value= stats.shapiro(testData)[1]
       if p_value<0.05:
           print "use shapiro:"
           print "data are not normal distributed"
           return  False
       else:
           print "use shapiro:"
           print "data are normal distributed"
           return True
       
    if 300>=len(testData) >=50:
       p_value= lillifors(testData)[1]
       if p_value<0.05:
           print "use lillifors:"
           print "data are not normal distributed"
           return  False
       else:
           print "use lillifors:"
           print "data are normal distributed"
           return True
     
    if len(testData) >300: 
       p_value= stats.kstest(testData,'norm')[1]
       if p_value<0.05:
           print "use kstest:"
           print "data are not normal distributed"
           return  False
       else:
           print "use kstest:"
           print "data are normal distributed"
           return True
 
 
#对所有样本组进行正态性检验
def NormalTest(list_groups):
    for group in list_groups:
        #正态性检验
        status=check_normality(group)
        if st

最低0.47元/天解锁文章

weixin_34102807

关注

0
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
Turkey HSD检验法/W法

sklearn实战-乳腺癌细胞数据挖掘（博主亲自录视频）https://study.163.com/course/introduction.htm?courseId=1005269003&amp;utm_campaign=commission&amp;utm_source=cp-400000000398149&amp;utm_medium=share 医药统计项目联系QQ：2314...
复制链接

扫一扫