首先讲讲根据行列索引的查询
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(50, 100, size=(4, 4)),
columns=pd.MultiIndex.from_product(
[['math', 'physics'], ['term1', 'term2']]),
index=pd.MultiIndex.from_tuples(
[('class1', 'LiLei'), ('class2', 'HanMeiMei'),
('class2', 'LiLei'), ('class2', 'HanMeiMei')]))
df.index.names = ['class', 'name']
df
# 输出:
math physics
term1 term2 term1 term2
class name
class1 LiLei 54 71 54 96
class2 HanMeiMei 93 77 86 66
LiLei 53 68 98 89
HanMeiMei 58 61 57 89
1. 根据行索引查询:
# 取外层索引为'class1'的数据
df.loc['class1']
# 输出:
math physics
term1 term2 term1 term2
name
LiLei 54 71 54 96
# 同时根据多个索引筛选取值,法一:
df.loc[('class2', 'HanMeiMei')]
# 输出:
math physics
term1 term2 term1 term2
class name
class2 HanMeiMei 93 77 86 66
HanMeiMei 58 61 57 89
# 同时根据多个索引筛选取值,法二:(这个方法不会带上外层索引)
df.loc['class2'].loc['HanMeiMei']
# 输出:
math physics
term1 term2 term1 term2
name
HanMeiMei 93 77 86 66
HanMeiMei 58 61 57 89
# 根据内层索引取值,先交换内外层索引位置
df.swaplevel()
# 输出:
math physics
term1 term2 term1 term2
name class
LiLei class1 81 81 77 91
HanMeiMei class2 82 83 84 79
LiLei class2 78 50 81 64
HanMeiMei class2 59 94 89 52
# 再通过取外层索引的方法取值
df.swaplevel().loc['HanMeiMei']
# 输出:
math physics
term1 term2 term1 term2
class
class2 93 77 86 66
class2 58 61 57 89
2. 根据clomuns查询:
df
# 输出:
math physics
term1 term2 term1 term2
class name
class1 LiLei 54 71 54 96
class2 HanMeiMei 93 77 86 66
LiLei 53 68 98 89
HanMeiMei 58 61 57 89
# 根据外层column取值:
df['math']
# 输出
term1 term2
class name
class1 LiLei 54 71
class2 HanMeiMei 93 77
LiLei 53 68
HanMeiMei 58 61
# 根据多层column联合取值:
# 以下4句代码等效:
df['math','term2']
df.loc[:, ('math','term1')]
df['math']['term2']
df[('math','term1')]
# 输出
class name
class1 LiLei 54
class2 HanMeiMei 93
LiLei 53
HanMeiMei 58
Name: (math, term1), dtype: int32
# 与行索引类似,取内层索引先交换轴
df.swaplevel(axis=1)
# 输出
term1 term2 term1 term2
math math physics physics
class name
class1 LiLei 54 71 54 96
class2 HanMeiMei 93 77 86 66
LiLei 53 68 98 89
HanMeiMei 58 61 57 89
# 交换轴后取外层列索引即可
df.swaplevel(axis=1)['term1']
# 输出
math physics
class name
class1 LiLei 54 54
class2 HanMeiMei 93 86
LiLei 53 98
HanMeiMei 58 57
以上内容参考:
https://www.cnblogs.com/jaysonteng/p/13475618.html
接下来讲解行列索引的转换:
set_index(): 指定列为索引
reset_index(): 将索引转化为列
1.列转化位索引
df1=pd.DataFrame({'X':range(5),'Y':range(5),'S':list("aaabb"),'Z':[1,1,2,2,2]})
df1
# 输出(使用默认索引)
X Y S Z
0 0 0 a 1
1 1 1 a 1
2 2 2 a 2
3 3 3 b 2
4 4 4 b 2
# 指定某一列为索引
df1.set_index('S')
# 输出
X Y Z
S
a 0 0 1
a 1 1 1
a 2 2 2
b 3 3 2
b 4 4 2
# 也可以保留索引列
df1.set_index('S', drop=False)
# 输出
X Y S Z
S
a 0 0 a 1
a 1 1 a 1
a 2 2 a 2
b 3 3 b 2
b 4 4 b 2
# 指定多行列作为索引
df1.set_index(['S', 'Z'], drop=False)
# 输出
X Y S Z
S Z
a 1 0 0 a 1
1 1 1 a 1
2 2 2 a 2
b 2 3 3 b 2
2 4 4 b 2
2.索引转化为列
df2=df1.set_index(['S','Z'])
df2
# 输出
X Y
S Z
a 1 0 0
1 1 1
2 2 2
b 2 3 3
2 4 4
# 将单个索引作为DataFrame对象的列
df2.reset_index('Z')
# 输出
Z X Y
S
a 1 0 0
a 1 1 1
a 2 2 2
b 2 3 3
b 2 4 4
# 将多级索引作为列
df2.reset_index()
# 输出
S Z X Y
0 a 1 0 0
1 a 1 1 1
2 a 2 2 2
3 b 2 3 3
4 b 2 4 4
# 删除对指定索引
df2.reset_index(inplace=True)
df2
# 输出
S Z X Y
0 a 1 0 0
1 a 1 1 1
2 a 2 2 2
3 b 2 3 3
4 b 2 4 4
# 以上操作都不会直接对原DataFrame进行修改,若要直接对原DataFrame进行修改, 加上参数inplace=True
df2.reset_index(inplace=True)
df2
# 输出
index S Z X Y
0 0 a 1 0 0
1 1 a 1 1 1
2 2 a 2 2 2
3 3 b 2 3 3
4 4 b 2 4 4
以上内容参考:
https://blog.csdn.net/weixin_38168620/article/details/80100014