import numpy as np
from hyperopt import fmin, tpe, hp, rand, SparkTrials
# Stage 1: Random Search (넓은 범위)
coarse_space = {
"max_depth": hp.choice("max_depth", range(2, 20)),
"learning_rate": hp.loguniform("lr", np.log(0.001), np.log(1.0)),
"n_estimators": hp.choice("n_est", range(50, 1000, 50)),
"min_child_weight": hp.uniform("mcw", 0.1, 20),
"subsample": hp.uniform("ss", 0.3, 1.0),
}
coarse_trials = SparkTrials(parallelism=8)
coarse_best = fmin(fn=objective, space=coarse_space,
algo=rand.suggest, # Random Search
max_evals=80, trials=coarse_trials)
# Stage 1 결과 분석: 상위 10%의 파라미터 범위 추출
top_results = sorted(coarse_trials.results, key=lambda x: x['loss'])[:8]
# → max_depth: 5~9, lr: 0.01~0.1, n_estimators: 200~500 ...
# Stage 2: Bayesian Search (좁은 범위)
fine_space = {
"max_depth": hp.choice("max_depth", [5, 6, 7, 8, 9]),
"learning_rate": hp.loguniform("lr", np.log(0.01), np.log(0.1)),
"n_estimators": hp.choice("n_est", [200, 300, 400, 500]),
"min_child_weight": hp.uniform("mcw", 1, 8),
"subsample": hp.uniform("ss", 0.6, 0.95),
}
fine_trials = SparkTrials(parallelism=4) # 병렬도 줄여서 TPE 효과 극대화
fine_best = fmin(fn=objective, space=fine_space,
algo=tpe.suggest, # Bayesian (TPE)
max_evals=150, trials=fine_trials)