fix: Update prompts.yaml to constrain only one model type (#341)

* Update prompts.yaml * Update prompts.yaml * fix a bug --------- Co-authored-by: WinstonLiyt <[email protected]> Co-authored-by: WinstonLiye <[email protected]>
microsoft · Sep 25, 2024 · 5b5dfee · 5b5dfee
1 parent f12ce72
commit 5b5dfee
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 6 deletions.
diff --git a/rdagent/scenarios/kaggle/developer/runner.py b/rdagent/scenarios/kaggle/developer/runner.py
@@ -97,8 +97,8 @@ def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
         self.build_from_SOTA(exp)
 
         sub_ws = exp.sub_workspace_list[0]
-        # TODO: There's a possibility of generating a hybrid model (lightgbm + xgboost), which results in having two items in the model_type list. Hardcoded now.
-        model_type = sub_ws.target_task.model_type[0]
+        # TODO: There's a possibility of generating a hybrid model (lightgbm + xgboost), which results in having two items in the model_type list.
+        model_type = sub_ws.target_task.model_type
 
         if sub_ws.code_dict == {}:
             raise ModelEmptyError("No model is implemented.")

diff --git a/...t/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py b/...t/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_randomforest.py
@@ -51,4 +51,4 @@ def predict(model, X):
     y_pred_prob = model.predict_proba(X_selected)[:, 1]
 
     # Apply threshold to get boolean predictions
-    return y_pred_prob
+    return y_pred_prob.reshape(-1, 1)
diff --git a/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py b/rdagent/scenarios/kaggle/experiment/playground-series-s4e8_template/model/model_xgboost.py
@@ -37,4 +37,4 @@ def predict(model, X):
     X = select(X)
     dtest = xgb.DMatrix(X)
     y_pred_prob = model.predict(dtest)
-    return y_pred_prob
+    return y_pred_prob.reshape(-1, 1)
diff --git a/rdagent/scenarios/kaggle/prompts.yaml b/rdagent/scenarios/kaggle/prompts.yaml
@@ -95,7 +95,7 @@ feature_experiment_output_format: |-
 
 model_experiment_output_format: |-
   According to the hypothesis, please help user design one model task.
-  Since we only build one model from four model types: ["XGBoost", "RandomForest", "LightGBM", "NN"].  
+  We only build one model from four main model types: ["XGBoost", "RandomForest", "LightGBM", "NN"].
   The output should follow JSON format. The schema is as follows: 
   {
       "model_name": "model_name",
@@ -106,7 +106,7 @@ model_experiment_output_format: |-
           "hyperparameter_name_2": "value of hyperparameter 2",
           "hyperparameter_name_3": "value of hyperparameter 3"
       },
-      "model_type": "model type"
+      "model_type": "Select only one model type: XGBoost, RandomForest, LightGBM, or NN. The primary model must be unique, but you may use auxiliary models for support if you think it can have a good result like choosing A model as the main model, with B Model used for auxiliary support or optimization on specific details."
   }
   Usually, a larger model works better than a smaller one. Hence, the parameters should be larger.