Skip to content

Commit

Permalink
add hyperparameter tuning time limit, pipeline number, and documentat…
Browse files Browse the repository at this point in the history
…ions
EdenWuyifan committed Jul 13, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent f9ef217 commit 35f771c
Showing 5 changed files with 1,574 additions and 1,480 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -57,3 +57,5 @@ desktop.ini
tmp
lightning_logs
**/lightning_logs
smac3_output
**/smac3_output
23 changes: 14 additions & 9 deletions alpha_automl/automl_api.py
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ class BaseAutoML():

def __init__(self, output_folder, time_bound=15, metric=None, split_strategy='holdout', time_bound_run=5, task=None,
score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, start_mode='auto',
verbose=False, optimizing=False):
verbose=False, optimizing=False, optimizing_number=10):
"""
Create/instantiate an BaseAutoML object.
@@ -40,6 +40,8 @@ def __init__(self, output_folder, time_bound=15, metric=None, split_strategy='ho
:param split_strategy_kwargs: Additional arguments for splitting_strategy.
:param start_mode: The mode to start the multiprocessing library. It could be `auto`, `fork` or `spawn`.
:param verbose: Whether or not to show additional logs
:param optimizing: Whether or not tuning the top pipelines using SMAC3 optimizer
:param optimizing_number: The number of top pipelines to be optimized
"""

self.output_folder = output_folder
@@ -57,7 +59,7 @@ def __init__(self, output_folder, time_bound=15, metric=None, split_strategy='ho
self.X = None
self.y = None
self.leaderboard = None
self.automl_manager = AutoMLManager(output_folder, time_bound, time_bound_run, task, verbose)
self.automl_manager = AutoMLManager(output_folder, time_bound*0.8, time_bound_run, task, verbose)

if not verbose:
hide_logs()
@@ -69,7 +71,8 @@ def __init__(self, output_folder, time_bound=15, metric=None, split_strategy='ho
check_input_for_multiprocessing(self._start_method, self.splitter, 'split strategy')

self.optimizing = optimizing
self.optimizing_number = 10
self.optimizing_number = optimizing_number
self.optimizing_timelimit = time_bound*0.2

def fit(self, X, y):
"""
@@ -107,7 +110,8 @@ def fit(self, X, y):

# [SMAC] added here!!
if self.optimizing:
optimizer = SmacOptimizer(X=X, y=y, splitter=self.splitter, scorer=self.scorer, n_trials=200)
optimizer = SmacOptimizer(X=X, y=y, splitter=self.splitter, scorer=self.scorer,
n_trials=200, time_limit=self.optimizing_timelimit)
for index, pipeline in enumerate(sorted_pipelines, start=1):
pipeline_id = PIPELINE_PREFIX + str(index)
if index <= self.optimizing_number:
@@ -297,7 +301,7 @@ class AutoMLClassifier(BaseAutoML):

def __init__(self, output_folder, time_bound=15, metric='accuracy_score', split_strategy='holdout',
time_bound_run=5, score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None,
start_mode='auto', verbose=False, optimizing=False):
start_mode='auto', verbose=False, optimizing=False, optimizing_number=10):
"""
Create/instantiate an AutoMLClassifier object.
@@ -318,7 +322,7 @@ def __init__(self, output_folder, time_bound=15, metric='accuracy_score', split_
self.label_enconder = LabelEncoder()
task = 'CLASSIFICATION'
super().__init__(output_folder, time_bound, metric, split_strategy, time_bound_run, task, score_sorting,
metric_kwargs, split_strategy_kwargs, start_mode, verbose, optimizing)
metric_kwargs, split_strategy_kwargs, start_mode, verbose, optimizing, optimizing_number)

def fit(self, X, y):
y = self.label_enconder.fit_transform(y)
@@ -352,7 +356,7 @@ class AutoMLRegressor(BaseAutoML):

def __init__(self, output_folder, time_bound=15, metric='mean_squared_error', split_strategy='holdout',
time_bound_run=5, score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None,
start_mode='auto', verbose=False, optimizing=False):
start_mode='auto', verbose=False, optimizing=False, optimizing_number=10):
"""
Create/instantiate an AutoMLRegressor object.
@@ -372,12 +376,13 @@ def __init__(self, output_folder, time_bound=15, metric='mean_squared_error', sp

task = 'REGRESSION'
super().__init__(output_folder, time_bound, metric, split_strategy, time_bound_run, task, score_sorting,
metric_kwargs, split_strategy_kwargs, start_mode, verbose, optimizing)
metric_kwargs, split_strategy_kwargs, start_mode, verbose, optimizing, optimizing_number)


class AutoMLTimeSeries(BaseAutoML):
def __init__(self, output_folder, time_bound=15, metric='mean_squared_error', split_strategy='timeseries',
time_bound_run=5, score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None, verbose=False, date_column=None, target_column=None):
time_bound_run=5, score_sorting='auto', metric_kwargs=None, split_strategy_kwargs=None,
verbose=False, date_column=None, target_column=None):
"""
Create/instantiate an AutoMLTimeSeries object.
128 changes: 83 additions & 45 deletions alpha_automl/hyperparameter_tuning/smac_parameters.json
Original file line number Diff line number Diff line change
@@ -57,7 +57,7 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
@@ -91,7 +91,7 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
@@ -107,9 +107,12 @@
},
"sklearn.ensemble.GradientBoostingClassifier": {
"n_estimators": {
"type": "Categorical",
"value": [1, 2, 4, 8, 16, 32, 64, 100, 200],
"default": 32
"type": "Integer",
"value": [
100,
2000
],
"default": 400
},
"min_samples_split": {
"type": "Float",
@@ -124,7 +127,7 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
@@ -291,9 +294,12 @@
"sklearn.tree.DecisionTreeClassifier": {},
"xgboost.XGBClassifier": {
"n_estimators": {
"type": "Categorical",
"value": [100, 200, 500],
"default": 100
"type": "Integer",
"value": [
100,
2000
],
"default": 400
},
"learning_rate": {
"type": "Categorical",
@@ -330,16 +336,16 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
},
"max_depth": {
"type": "Integer",
"value": [
3,
20
1,
120
],
"default": 3
},
@@ -362,8 +368,8 @@
"max_depth": {
"type": "Integer",
"value": [
3,
20
1,
120
],
"default": 3
},
@@ -393,53 +399,74 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
},
"max_features": {
"type": "Float",
"type": "Categorical",
"value": ["auto", "log2","sqrt"],
"default": "auto"
},
"max_depth": {
"type": "Integer",
"value": [
0,
1
1,
120
],
"default": 0.5
"default": 3
},
"min_samples_split": {
"type": "Integer",
"value": [
1,
100
],
"default": 3
},
"min_samples_leaf": {
"type": "Integer",
"value": [
1,
1000
100
],
"default": 1
"default": 3
}
},
"sklearn.gaussian_process.GaussianProcessRegressor": {},
"sklearn.ensemble.GradientBoostingRegressor": {
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
},
"max_leaf_nodes": {
"max_depth": {
"type": "Integer",
"value": [
1,
10000
120
],
"default": 100
"default": 3
},
"min_samples_split": {
"type": "Float",
"type": "Integer",
"value": [
0,
1
1,
100
],
"default": 0.8
"default": 3
},
"min_samples_leaf": {
"type": "Integer",
"value": [
1,
100
],
"default": 3
}
},
"sklearn.neighbors.KNeighborsRegressor": {
@@ -541,7 +568,7 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
@@ -554,18 +581,31 @@
"max_depth": {
"type": "Integer",
"value": [
3,
50
1,
120
],
"default": 3
},
"max_leaf_nodes": {
"min_samples_split": {
"type": "Integer",
"value": [
1,
10000
100
],
"default": 3
},
"min_samples_leaf": {
"type": "Integer",
"value": [
1,
100
],
"default": 3
},
"bootstrap": {
"type": "Categorical",
"value": [true, false],
"default": true
}
},
"sklearn.linear_model.Ridge": {
@@ -673,9 +713,12 @@
"sklearn.linear_model.TheilSenRegressor": {},
"xgboost.XGBRegressor": {
"n_estimators": {
"type": "Categorical",
"value": [100, 200, 500],
"default": 100
"type": "Integer",
"value": [
100,
2000
],
"default": 400
},
"learning_rate": {
"type": "Categorical",
@@ -687,11 +730,6 @@
"value": ["gbtree", "gblinear"],
"default": "gbtree"
},
"gamma": {
"type": "Categorical",
"value": [0, 0.5, 1],
"default": 0
},
"reg_alpha": {
"type": "Categorical",
"value": [0, 0.5, 1],
@@ -712,16 +750,16 @@
"n_estimators": {
"type": "Integer",
"value": [
200,
100,
2000
],
"default": 400
},
"max_depth": {
"type": "Integer",
"value": [
3,
20
1,
120
],
"default": 3
},
1,738 changes: 891 additions & 847 deletions examples/smac_tabular_classification.ipynb

Large diffs are not rendered by default.

1,163 changes: 584 additions & 579 deletions examples/smac_tabular_regression.ipynb

Large diffs are not rendered by default.

0 comments on commit 35f771c

Please sign in to comment.