import optuna
import mlflow
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
def objective(trial):
params = {
"max_depth": trial.suggest_int("max_depth", 3, 12),
"n_estimators": trial.suggest_int("n_estimators", 100, 500, step=50),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"min_child_weight": trial.suggest_float("min_child_weight", 1, 10),
"subsample": trial.suggest_float("subsample", 0.5, 1.0),
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
"gamma": trial.suggest_float("gamma", 0, 5),
}
with mlflow.start_run(nested=True):
mlflow.log_params(params)
model = XGBClassifier(**params, use_label_encoder=False, eval_metric="logloss")
scores = cross_val_score(model, X_train, y_train, cv=5, scoring="f1_weighted")
avg_score = scores.mean()
mlflow.log_metric("cv_f1_score", avg_score)
return avg_score
# Optuna 스터디 생성 및 최적화
with mlflow.start_run(run_name="optuna-tuning"):
study = optuna.create_study(
direction="maximize",
sampler=optuna.samplers.TPESampler(seed=42),
pruner=optuna.pruners.MedianPruner() # 성능이 낮은 시행 조기 중단
)
study.optimize(objective, n_trials=100)
print(f"Best params: {study.best_params}")
print(f"Best F1: {study.best_value:.4f}")
mlflow.log_params(study.best_params)
mlflow.log_metric("best_f1_score", study.best_value)