From 984bc8ea9bbe6ddb24adb41fa67f56a517613586 Mon Sep 17 00:00:00 2001 From: "a.cherkaoui" Date: Thu, 2 Jan 2025 07:25:07 +0100 Subject: [PATCH] Renamed xgboost_training_summary.py to summary.py and _XGBoostTrainingSummary to XGBoostTrainingSummary --- python-package/xgboost/spark/core.py | 10 ++++------ .../spark/{xgboost_training_summary.py => summary.py} | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) rename python-package/xgboost/spark/{xgboost_training_summary.py => summary.py} (92%) diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 1e06ef9c514c..df9a57ba8428 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -82,6 +82,7 @@ HasFeaturesCols, HasQueryIdCol, ) +from .summary import XGBoostTrainingSummary from .utils import ( CommunicatorContext, _get_default_params_from_func, @@ -100,7 +101,6 @@ serialize_booster, use_cuda, ) -from .xgboost_training_summary import _XGBoostTrainingSummary # Put pyspark specific params here, they won't be passed to XGBoost. # like `validationIndicatorCol`, `base_margin_col` @@ -706,7 +706,7 @@ def _pyspark_model_cls(cls) -> Type["_SparkXGBModel"]: raise NotImplementedError() def _create_pyspark_model( - self, xgb_model: XGBModel, training_summary: _XGBoostTrainingSummary + self, xgb_model: XGBModel, training_summary: XGBoostTrainingSummary ) -> "_SparkXGBModel": return self._pyspark_model_cls()(xgb_model, training_summary) @@ -1202,9 +1202,7 @@ def _run_job() -> Tuple[str, str, str]: result_xgb_model = self._convert_to_sklearn_model( bytearray(booster, "utf-8"), config ) - training_summary = _XGBoostTrainingSummary.from_metrics( - json.loads(evals_result) - ) + training_summary = XGBoostTrainingSummary.from_metrics(json.loads(evals_result)) spark_model = self._create_pyspark_model(result_xgb_model, training_summary) # According to pyspark ML convention, the model uid should be the same # with estimator uid. @@ -1229,7 +1227,7 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable): def __init__( self, xgb_sklearn_model: Optional[XGBModel] = None, - training_summary: Optional[_XGBoostTrainingSummary] = None, + training_summary: Optional[XGBoostTrainingSummary] = None, ) -> None: super().__init__() self._xgb_sklearn_model = xgb_sklearn_model diff --git a/python-package/xgboost/spark/xgboost_training_summary.py b/python-package/xgboost/spark/summary.py similarity index 92% rename from python-package/xgboost/spark/xgboost_training_summary.py rename to python-package/xgboost/spark/summary.py index 7c3c6f5093d5..eca5f6b128b7 100644 --- a/python-package/xgboost/spark/xgboost_training_summary.py +++ b/python-package/xgboost/spark/summary.py @@ -5,7 +5,7 @@ @dataclass -class _XGBoostTrainingSummary: +class XGBoostTrainingSummary: """ A class that holds the training and validation objective history of an XGBoost model during its training process. @@ -17,7 +17,7 @@ class _XGBoostTrainingSummary: @staticmethod def from_metrics( metrics: Dict[str, Dict[str, List[float]]] - ) -> "_XGBoostTrainingSummary": + ) -> "XGBoostTrainingSummary": """ Create an XGBoostTrainingSummary instance from a nested dictionary of metrics. @@ -38,6 +38,6 @@ def from_metrics( """ train_objective_history = metrics.get("training", {}) validation_objective_history = metrics.get("validation", {}) - return _XGBoostTrainingSummary( + return XGBoostTrainingSummary( train_objective_history, validation_objective_history )