From 5b5dfeefbc7eb9dcbd9923544005c5d281262c03 Mon Sep 17 00:00:00 2001 From: Way2Learn <118058822+Xisen-Wang@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:25:14 +0800 Subject: [PATCH] fix: Update prompts.yaml to constrain only one model type (#341) * Update prompts.yaml * Update prompts.yaml * fix a bug --------- Co-authored-by: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> Co-authored-by: WinstonLiye <1957922024@qq.com> --- rdagent/scenarios/kaggle/developer/runner.py | 4 ++-- .../model/model_randomforest.py | 2 +- .../playground-series-s4e8_template/model/model_xgboost.py | 2 +- rdagent/scenarios/kaggle/prompts.yaml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rdagent/scenarios/kaggle/developer/runner.py b/rdagent/scenarios/kaggle/developer/runner.py index 87c4e64d..0785c937 100644 --- a/rdagent/scenarios/kaggle/developer/runner.py +++ b/rdagent/scenarios/kaggle/developer/runner.py @@ -97,8 +97,8 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment: self.build_from_SOTA(exp) sub_ws = exp.sub_workspace_list[0] - # TODO: There's a possibility of generating a hybrid model (lightgbm + xgboost), which results in having two items in the model_type list. Hardcoded now. - model_type = sub_ws.target_task.model_type[0] + # TODO: There's a possibility of generating a hybrid model (lightgbm + xgboost), which results in having two items in the model_type list. + model_type = sub_ws.target_task.model_type if sub_ws.code_dict == {}: raise ModelEmptyError("No model is implemented.") diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py index 3c64a094..377683b9 100644 --- a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py +++ b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py @@ -51,4 +51,4 @@ def predict(model, X): y_pred_prob = model.predict_proba(X_selected)[:, 1] # Apply threshold to get boolean predictions - return y_pred_prob + return y_pred_prob.reshape(-1, 1) diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py index a70fa680..b25e87d9 100644 --- a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py +++ b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py @@ -37,4 +37,4 @@ def predict(model, X): X = select(X) dtest = xgb.DMatrix(X) y_pred_prob = model.predict(dtest) - return y_pred_prob + return y_pred_prob.reshape(-1, 1) diff --git a/rdagent/scenarios/kaggle/prompts.yaml b/rdagent/scenarios/kaggle/prompts.yaml index 5820294b..c4250602 100644 --- a/rdagent/scenarios/kaggle/prompts.yaml +++ b/rdagent/scenarios/kaggle/prompts.yaml @@ -95,7 +95,7 @@ feature_experiment_output_format: |- model_experiment_output_format: |- According to the hypothesis, please help user design one model task. - Since we only build one model from four model types: ["XGBoost", "RandomForest", "LightGBM", "NN"]. + We only build one model from four main model types: ["XGBoost", "RandomForest", "LightGBM", "NN"]. The output should follow JSON format. The schema is as follows: { "model_name": "model_name", @@ -106,7 +106,7 @@ model_experiment_output_format: |- "hyperparameter_name_2": "value of hyperparameter 2", "hyperparameter_name_3": "value of hyperparameter 3" }, - "model_type": "model type" + "model_type": "Select only one model type: XGBoost, RandomForest, LightGBM, or NN. The primary model must be unique, but you may use auxiliary models for support if you think it can have a good result like choosing A model as the main model, with B Model used for auxiliary support or optimization on specific details." } Usually, a larger model works better than a smaller one. Hence, the parameters should be larger.