dataframe 删除一列或某几列
因项目需要,网上查阅了很久,看了各种回答,也没有自己操作一遍来的记忆深刻
1、删除一列
使用del函数
del df['A']
del df['B']
del df['C']
2、删除某几列
axis =1 是按列删除,=0是按行删除。
df2 = df1.drop(['vehicle_energyConsumed','vehicle_lane','vehicle_chargingStationId',
'vehicle_energyCharged','vehicle_energyChargedInTransit',
'vehicle_maximumBatteryCapacity','vehicle_id',
'vehicle_energyChargedStopped'],axis=1)
3、代码应用
def xgboost(path):
df = pd.read_csv(path, sep=";")
df = df[df['vehicle_energyConsumed']!=0]
df1 = df[df['vehicle_id'] == '0.110']
y = df1['vehicle_energyConsumed']
df2 = df1.drop(['vehicle_energyConsumed','vehicle_lane','vehicle_chargingStationId',
'vehicle_energyCharged','vehicle_energyChargedInTransit',
'vehicle_maximumBatteryCapacity','vehicle_id','vehicle_energyChargedStopped'],axis=1)
X = df2
# XGBoost训练过程
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
params = {
'booster': 'gbtree',
'objective': 'reg:squarederror',
'gamma': 0.1,
'max_depth': 5,
'lambda': 3,
'subsample': 0.7,
'colsample_bytree': 0.7,
'min_child_weight': 3,
'silent': 1,
'eta': 0.1,
'seed': 1000,
'nthread': 4,
}
dtrain = xgb.DMatrix(X_train, y_train)
num_rounds = 300
plst = list(params.items())
model = xgb.train(plst, dtrain, num_rounds)
# 对测试集进行预测
expected_y = y_test
dtest = xgb.DMatrix(X_test)
predicted_y = model.predict(dtest)
# 显示重要特征
plot_importance(model)
编辑于 2021-07-19 19:55