机器学习模型部署:使用Flask 库的 Python Web 框架将XGBoost模型部署在服务器上(简单小模型)从模型训练到部署再到本地调用

06-01 1115阅读

1.XGBoost模型训练预测风速模型

2.保存训练好的模型

3.服务器端部署及运行

4.本地PyCharm调用

5.一些报错问题及注意

一、XGBoost模型训练预测风速模型

这里不解释代码,后面其他文章说明,使用了贝叶斯优化寻找最佳参数组合。

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor  # 导入 XGBoost 回归模型
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import optuna
import joblib
import pickle
# 1. 加载数据
data1 = pd.read_csv(r'ID01.csv')
# 2. 确保 date_time 列是 datetime 类型
data1['date_time'] = pd.to_datetime(data1['date_time'], format='%Y/%m/%d %H:%M')
# 3. 添加滞后特征
lags = 9  # 滞后步长为 12
for lag in range(1, lags + 1):
    data1[f'wind_obs_lag_{lag}'] = data1['wind_obs'].shift(lag)
lags1 = 9 # 滞后步长为 12
for lag in range(1, lags1 + 1):
    data1[f'ec_lag_{lag}'] = data1['ec'].shift(lag)
# 4. 添加滑动窗口特征
window_size = 12  # 滑动窗口大小
data1['wind_obs_rolling_mean'] = data1['wind_obs'].shift(1).rolling(window=window_size, min_periods=1).mean()  # 滑动平均值
data1['wind_obs_rolling_std'] = data1['wind_obs'].shift(1).rolling(window=window_size, min_periods=1).std()    # 滑动标准差
data1['wind_obs_rolling_max'] = data1['wind_obs'].shift(1).rolling(window=window_size, min_periods=1).max()  # 最大值
data1['wind_obs_rolling_min'] = data1['wind_obs'].shift(1).rolling(window=window_size, min_periods=1).min()  # 最小值
window_size1 = 12 # 滑动窗口大小
data1['ec_mean'] = data1['ec'].shift(1).rolling(window=window_size1, min_periods=1).mean()  # 滑动平均值
data1['ec_std'] = data1['ec'].shift(1).rolling(window=window_size1, min_periods=1).std()    # 滑动标准差
data1['ec_max'] = data1['ec'].shift(1).rolling(window=window_size1, min_periods=1).max()  # 最大值
data1['ec_min'] = data1['ec'].shift(1).rolling(window=window_size1, min_periods=1).min()  # 最小值
# 5. 定义时间范围并筛选数据
start_time = '2023-06-01 00:00:00'
end_time = '2024-06-30 18:00:00'
data1 = data1[(data1['date_time'] >= start_time) & (data1['date_time'] = train_start) & (data1['date_time'] = test_start) & (data1['date_time'] 'Spring': 1, 'Summer': 2, 'Autumn': 3, 'Winter': 4}
    data.loc[:, 'season'] = data['season'].map(season_mapping)
# 8. 提取特征和目标列
X_train = train_data[['ec', 'hour', 'day', 'month', 'year', 'season'] + [f'wind_obs_lag_{lag}' for lag in range(1, lags + 1)] + [f'ec_lag_{lag}' for lag in range(1, lags + 1)] + ['wind_obs_rolling_mean', 'wind_obs_rolling_std', 'wind_obs_rolling_max', 'wind_obs_rolling_min'] +['ec_mean', 'ec_std', 'ec_max', 'ec_min']]
y_train = train_data['wind_obs']
X_test = test_data[['ec', 'hour', 'day', 'month', 'year', 'season'] + [f'wind_obs_lag_{lag}' for lag in range(1, lags + 1)] + [f'ec_lag_{lag}' for lag in range(1, lags + 1)] + ['wind_obs_rolling_mean', 'wind_obs_rolling_std', 'wind_obs_rolling_max', 'wind_obs_rolling_min'] +['ec_mean', 'ec_std', 'ec_max', 'ec_min']]
y_test = test_data['wind_obs']
# 9. 数据归一化(最大最小归一化)
scaler = MinMaxScaler()
y_scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
y_train_scaled = y_scaler.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = y_scaler.transform(y_test.values.reshape(-1, 1)).flatten()
# 8. 定义 Optuna 目标函数
def objective(trial):
    # 定义超参数搜索范围
    n_estimators = trial.suggest_int('n_estimators', 50, 700)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.2)
    max_depth = trial.suggest_int('max_depth', 3, 30)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    subsample = trial.suggest_float('subsample', 0.5, 1.0)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
    # 构建 XGBoost 回归模型
    xgb = XGBRegressor(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        min_child_weight=min_child_weight,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        random_state=42,
        n_jobs=-1
    )
    # 训练和评估模型
    xgb.fit(X_train_scaled, y_train_scaled)
    y_pred = xgb.predict(X_test_scaled)
    rmse = np.sqrt(mean_squared_error(y_test_scaled, y_pred))
    return rmse
# 10. 运行 Optuna 优化
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=500)
# 获取最佳超参数
best_params = study.best_params
print("最佳超参数:", best_params)
# 11. 使用最佳超参数训练模型
best_model = XGBRegressor(
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    min_child_weight=best_params['min_child_weight'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree'],
    random_state=42
    #n_jobs=-1
)
best_model.fit(X_train_scaled, y_train_scaled)
# 12. 预测
y_train_pred = best_model.predict(X_train_scaled)
y_test_pred = best_model.predict(X_test_scaled)
# 反归一化
y_train_pred = y_scaler.inverse_transform(y_train_pred.reshape(-1, 1)).flatten()
y_test_pred = y_scaler.inverse_transform(y_test_pred.reshape(-1, 1)).flatten()
# 13. 评估指标
def evaluate(y_true, y_pred, dataset_name):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mbe = np.mean(y_pred - y_true)  # Mean Bias Error
    r2 = r2_score(y_true, y_pred)
    print(f"{dataset_name} RMSE: {rmse:.4f}")
    print(f"{dataset_name} MAE: {mae:.4f}")
    print(f"{dataset_name} MBE: {mbe:.4f}")
    print(f"{dataset_name} R2 Score: {r2:.4f}")
# 输出训练集和测试集的评价指标
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_test, y_test_pred, "Test Set")
# 14. 可视化
# 绘制训练集和测试集的观测值与预测值散点图
plt.figure(figsize=(12, 6))
plt.scatter(y_test, y_test_pred, alpha=0.5, label='Test Set')
plt.scatter(y_train, y_train_pred, alpha=0.5, label='Training Set')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--', label='Ideal Line')
plt.xlabel('Observed Wind Speed')
plt.ylabel('Predicted Wind Speed')
plt.title('Observed vs Predicted Wind Speed')
plt.legend()
plt.grid(True)
plt.show()
# 15. 可视化预测结果
# 绘制测试集的实际值和预测值曲线
plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Observed', color='blue')
plt.plot(y_test_pred, label='Predicted', color='red')
plt.legend()
plt.title('Observed vs Predicted Wind Speed (Test Set)')
plt.xlabel('Time')
plt.ylabel('Wind Speed')
plt.grid(True)
plt.show()
# 绘制训练集的实际值和预测值曲线
plt.figure(figsize=(12, 6))
plt.plot(y_train, label='Observed', color='blue')
plt.plot(y_train_pred, label='Predicted', color='red')
plt.legend()
plt.title('Observed vs Predicted Wind Speed (Training Set)')
plt.xlabel('Time')
plt.ylabel('Wind Speed')
plt.grid(True)
plt.show()
# 16. 输出特征重要性
# 获取特征名称
feature_names = X_train.columns
# 获取特征重要性
feature_importances = best_model.feature_importances_
# 将特征名称与重要性对应起来
feature_importance = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
# 按重要性排序,以查看最重要的特征
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
# 输出特征重要性
print("特征重要性:")
print(feature_importance)
# 可视化特征重要性
plt.figure(figsize=(10, 6))
plt.barh(feature_importance['Feature'], feature_importance['Importance'], color='skyblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.show()
# 17. 保存模型和预处理对象
# 使用 joblib 保存模型
joblib.dump(best_model, 'xgb_wind_speed_model.joblib')
# 使用 pickle 保存模型(另一种方式)
with open('xgb_wind_speed_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)
# 保存 scaler 对象(以便后续对新数据进行相同的预处理)
joblib.dump(scaler, 'scaler.joblib')
joblib.dump(y_scaler, 'y_scaler.joblib')
print("模型和预处理对象已保存")
'Spring': 1, 'Summer': 2, 'Autumn': 3, 'Winter': 4}
    data['season'] = data['season'].map(season_mapping)
    # 添加滞后特征 - 这些值应该由客户端提供
    for lag in range(1, lags + 1):
        if f'wind_obs_lag_{lag}' not in data.columns:
            raise ValueError(f"Missing required feature: wind_obs_lag_{lag}")
        if f'ec_lag_{lag}' not in data.columns:
            raise ValueError(f"Missing required feature: ec_lag_{lag}")
    # 添加滑动窗口特征 - 这些值应该由客户端计算后提供
    window_features = [
        'wind_obs_rolling_mean', 'wind_obs_rolling_std',
        'wind_obs_rolling_max', 'wind_obs_rolling_min',
        'ec_mean', 'ec_std', 'ec_max', 'ec_min'
    ]
    for feature in window_features:
        if feature not in data.columns:
            raise ValueError(f"Missing required window feature: {feature}")
    # 选择模型需要的所有特征列
    features = [
                   'ec', 'hour', 'day', 'month', 'year', 'season'
               ] + [f'wind_obs_lag_{lag}' for lag in range(1, lags + 1)] \
               + [f'ec_lag_{lag}' for lag in range(1, lags + 1)] \
               + [
                   'wind_obs_rolling_mean', 'wind_obs_rolling_std',
                   'wind_obs_rolling_max', 'wind_obs_rolling_min',
                   'ec_mean', 'ec_std', 'ec_max', 'ec_min'
               ]
    # 确保所有特征都存在
    missing_features = [f for f in features if f not in data.columns]
    if missing_features:
        raise ValueError(f"Missing features: {missing_features}")
    # 缩放特征
    features_scaled = scaler.transform(data[features])
    return features_scaled
@app.route('/predict', methods=['POST'])
def predict():
    """预测端点"""
    try:
        # 获取JSON数据
        data = request.get_json()
        # 准备特征
        features = prepare_features(data)
        # 预测
        prediction_scaled = model.predict(features)
        # 反归一化
        prediction = y_scaler.inverse_transform(prediction_scaled.reshape(-1, 1))[0][0]
        # 返回结果
        return jsonify({
            'prediction': float(prediction),  # 转换为Python原生float类型
            'status': 'success'
        })
    except ValueError as e:
        return jsonify({
            'error': str(e),
            'status': 'error',
            'message': '缺少必要的特征字段,请确保提供所有滞后特征和滑动窗口特征'
        }), 400
    except Exception as e:
        return jsonify({
            'error': str(e),
            'status': 'error'
        }), 500
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)

    "date_time": "2024-06-01 12:00:00",
    "ec": 5.3,
    "hour": 12,
    "day": 1,
    "month": 6,
    "year": 2024,
    "season": 2,
    # 滞后特征
    "wind_obs_lag_1": 4.8,
    "wind_obs_lag_2": 4.5,
    "wind_obs_lag_3": 4.2,
    "wind_obs_lag_4": 4.0,
    "wind_obs_lag_5": 3.9,
    "wind_obs_lag_6": 3.8,
    "wind_obs_lag_7": 3.7,
    "wind_obs_lag_8": 3.6,
    "wind_obs_lag_9": 3.5,
    "ec_lag_1": 5.1,
    "ec_lag_2": 5.0,
    "ec_lag_3": 4.9,
    "ec_lag_4": 4.8,
    "ec_lag_5": 4.7,
    "ec_lag_6": 4.6,
    "ec_lag_7": 4.5,
    "ec_lag_8": 4.4,
    "ec_lag_9": 4.3,
    # 滑动窗口特征
    "wind_obs_rolling_mean": 4.2,
    "wind_obs_rolling_std": 0.5,
    "wind_obs_rolling_max": 5.0,
    "wind_obs_rolling_min": 3.5,
    "ec_mean": 4.8,
    "ec_std": 0.3,
    "ec_max": 5.2,
    "ec_min": 4.5
}
response = requests.post(
    "http://123.45.67.8:5000/predict",%这里看你服务器给的URL是多少
    headers={"Content-Type": "application/json"},
    data=json.dumps(sample_data)
)
print(response.json())
免责声明:我们致力于保护作者版权,注重分享,被刊用文章因无法核实真实出处,未能及时与作者取得联系,或有版权异议的,请联系管理员,我们会立即处理! 部分文章是来自自研大数据AI进行生成,内容摘自(百度百科,百度知道,头条百科,中国民法典,刑法,牛津词典,新华词典,汉语词典,国家院校,科普平台)等数据,内容仅供学习参考,不准确地方联系删除处理! 图片声明:本站部分配图来自人工智能系统AI生成,觅知网授权图片,PxHere摄影无版权图库和百度,360,搜狗等多加搜索引擎自动关键词搜索配图,如有侵权的图片,请第一时间联系我们。

相关阅读

目录[+]

取消
微信二维码
微信二维码
支付宝二维码