From 2622c1b02c501e8881b6ddc9ce4c493f006c501e Mon Sep 17 00:00:00 2001 From: DanSava Date: Wed, 6 Nov 2024 14:29:44 +0200 Subject: [PATCH] Limit the use of EverestConfig in function signatures in everest csv exporting --- src/everest/__init__.py | 4 +- src/everest/bin/everexport_script.py | 14 +- src/everest/bin/everload_script.py | 3 +- src/everest/bin/utils.py | 22 +-- src/everest/config/everest_config.py | 48 ++++++ src/everest/config/export_config.py | 69 ++++++++ src/everest/detached/__init__.py | 10 +- src/everest/detached/jobs/everserver.py | 22 ++- src/everest/export.py | 162 ++++-------------- tests/everest/entry_points/test_everexport.py | 56 ++---- tests/everest/test_egg_simulation.py | 7 +- tests/everest/test_everserver.py | 6 +- tests/everest/test_export.py | 62 +++++-- tests/everest/test_math_func.py | 7 +- .../everest/unit/everest/bin/test_everload.py | 12 +- 15 files changed, 255 insertions(+), 249 deletions(-) diff --git a/src/everest/__init__.py b/src/everest/__init__.py index 188f73299a8..f6aa2421ce2 100644 --- a/src/everest/__init__.py +++ b/src/everest/__init__.py @@ -15,7 +15,7 @@ from everest import detached, docs, jobs, templates, util from everest.bin.utils import export_to_csv, export_with_progress from everest.config_keys import ConfigKeys -from everest.export import MetaDataColumnNames, export, filter_data, validate_export +from everest.export import MetaDataColumnNames, filter_data __author__ = "Equinor ASA and TNO" __all__ = [ @@ -23,7 +23,6 @@ "MetaDataColumnNames", "detached", "docs", - "export", "export_to_csv", "export_with_progress", "filter_data", @@ -31,5 +30,4 @@ "load", "templates", "util", - "validate_export", ] diff --git a/src/everest/bin/everexport_script.py b/src/everest/bin/everexport_script.py index 814201b951d..b7ba0bd62f6 100755 --- a/src/everest/bin/everexport_script.py +++ b/src/everest/bin/everexport_script.py @@ -4,7 +4,7 @@ import logging from functools import partial -from everest import export_to_csv, validate_export +from everest import export_to_csv, export_with_progress from everest.config import EverestConfig from everest.config.export_config import ExportConfig from everest.strings import EVEREST @@ -30,10 +30,18 @@ def everexport_entry(args=None): batch_list = [int(item) for item in options.batches] config.export.batches = batch_list - err_msgs, export_ecl = validate_export(config) + err_msgs, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) for msg in err_msgs: logger.warning(msg) - export_to_csv(config, export_ecl=export_ecl) + + export_to_csv( + data_frame=export_with_progress(config, export_ecl), + export_path=config.export_path, + ) def _build_args_parser(): diff --git a/src/everest/bin/everload_script.py b/src/everest/bin/everload_script.py index 129779626e8..d6602efd245 100755 --- a/src/everest/bin/everload_script.py +++ b/src/everest/bin/everload_script.py @@ -11,7 +11,6 @@ from ert.config import ErtConfig from ert.storage import open_storage from everest import MetaDataColumnNames as MDCN -from everest import export from everest.config import EverestConfig from everest.config.export_config import ExportConfig from everest.simulator.everest_to_ert import _everest_to_ert_config_dict @@ -162,7 +161,7 @@ def reload_data(ever_config: EverestConfig, backup_path=None): ert_config = ErtConfig.with_plugins().from_dict(config_dict=ert_config_dict) # load information about batches from previous run - df = export(ever_config, export_ecl=False) + df = ever_config.export_data(export_ecl=False) groups = df.groupby(by=MDCN.BATCH) # backup or delete the previous internal storage diff --git a/src/everest/bin/utils.py b/src/everest/bin/utils.py index c02034fc634..9b1392e9952 100644 --- a/src/everest/bin/utils.py +++ b/src/everest/bin/utils.py @@ -8,6 +8,7 @@ import colorama from colorama import Fore +from pandas import DataFrame from ert.resources import all_shell_script_fm_steps from ert.simulator.batch_simulator_context import Status @@ -20,7 +21,6 @@ get_opt_status, start_monitor, ) -from everest.export import export from everest.simulator import JOB_FAILURE, JOB_RUNNING, JOB_SUCCESS from everest.strings import EVEREST @@ -35,24 +35,14 @@ def export_with_progress(config, export_ecl=True): if ProgressBar is not None: widgets = [Percentage(), " ", Bar(), " ", Timer(), " ", AdaptiveETA()] with ProgressBar(max_value=1, widgets=widgets) as bar: - export_data = export( - config=config, export_ecl=export_ecl, progress_callback=bar.update + return config.export_data( + export_ecl=export_ecl, progress_callback=bar.update ) - else: - export_data = export(config=config, export_ecl=export_ecl) - - return export_data - - -def export_to_csv(config: EverestConfig, data_frame=None, export_ecl=True): - if data_frame is None: - data_frame = export_with_progress(config, export_ecl) + return config.export_data(export_ecl=export_ecl) - export_path = config.export_path - output_folder = os.path.dirname(export_path) - if not os.path.exists(output_folder): - os.makedirs(output_folder) +def export_to_csv(data_frame: DataFrame, export_path: str) -> None: + os.makedirs(os.path.dirname(export_path), exist_ok=True) data_frame.to_csv(export_path, sep=";", index=False) logging.getLogger(EVEREST).info("Data exported to {}".format(export_path)) diff --git a/src/everest/config/everest_config.py b/src/everest/config/everest_config.py index f8c30b74797..f4a06d2d6b7 100644 --- a/src/everest/config/everest_config.py +++ b/src/everest/config/everest_config.py @@ -15,6 +15,7 @@ no_type_check, ) +import pandas as pd from pydantic import ( AfterValidator, BaseModel, @@ -41,6 +42,12 @@ unique_items, validate_forward_model_configs, ) +from everest.export import ( + MetaDataColumnNames, + export_metadata, + filter_data, + load_simulation_data, +) from everest.jobs import script_names from everest.util.forward_models import collect_forward_models @@ -828,3 +835,44 @@ def dump(self, fname: Optional[str] = None) -> Optional[str]: yaml.dump(stripped_conf, out) return None + + def export_data(self, export_ecl=True, progress_callback=lambda _: None): + """Export everest data into a pandas dataframe. If the config specifies + a data_file and @export_ecl is True, simulation data is included. When + exporting simulation data, only keywords matching elements in @ecl_keywords + are exported. Note that wildcards are allowed. + + @progress_callback will be called with a number between 0 and 1 indicating + the fraction of batches that has been loaded. + """ + + ecl_keywords = None + # If user exports with a config file that has the SKIP_EXPORT + # set to true export nothing + if self.export is not None: + if self.export.skip_export or self.export.batches == []: + return pd.DataFrame([]) + + ecl_keywords = self.export.keywords + + metadata = export_metadata(self.export, self.optimization_output_dir) + data_file = self.model.data_file + if data_file is None or not export_ecl: + return pd.DataFrame(metadata) + + data = load_simulation_data( + output_path=self.output_dir, + metadata=metadata, + progress_callback=progress_callback, + ) + + if ecl_keywords is not None: + keywords = tuple(ecl_keywords) + # NOTE: Some of these keywords are necessary to export successfully, + # we should not leave this to the user + keywords += tuple(pd.DataFrame(metadata).columns) + keywords += tuple(MetaDataColumnNames.get_all()) + keywords_set = set(keywords) + data = filter_data(data, keywords_set) + + return data diff --git a/src/everest/config/export_config.py b/src/everest/config/export_config.py index 60f612b4b54..4a31c72c5ec 100644 --- a/src/everest/config/export_config.py +++ b/src/everest/config/export_config.py @@ -3,6 +3,7 @@ from pydantic import BaseModel, Field, field_validator from everest.config.validation_utils import check_writable_filepath +from everest.export import available_batches, get_internalized_keys class ExportConfig(BaseModel, extra="forbid"): # type: ignore @@ -41,3 +42,71 @@ class ExportConfig(BaseModel, extra="forbid"): # type: ignore def validate_output_file_writable(cls, csv_output_filepath): # pylint:disable=E0213 check_writable_filepath(csv_output_filepath) return csv_output_filepath + + def check_for_errors( + self, + optimization_output_path: str, + storage_path: str, + data_file_path: Optional[str], + ): + """ + Checks for possible errors when attempting to export current optimization + case. + """ + export_ecl = True + export_errors: List[str] = [] + + if self.batches: + _available_batches = available_batches(optimization_output_path) + for batch in set(self.batches).difference(_available_batches): + export_errors.append( + "Batch {} not found in optimization " + "results. Skipping for current export." + "".format(batch) + ) + self.batches = list(set(self.batches).intersection(_available_batches)) + + if self.batches == []: + export_errors.append( + "No batches selected for export. " + "Only optimization data will be exported." + ) + return export_errors, False + + if not data_file_path: + export_ecl = False + export_errors.append( + "No data file found in config." + "Only optimization data will be exported." + ) + + # If no user defined keywords are present it is no longer possible to check + # availability in internal storage + if self.keywords is None: + return export_errors, export_ecl + + if not self.keywords: + export_ecl = False + export_errors.append( + "No eclipse keywords selected for export. Only" + " optimization data will be exported." + ) + + internal_keys = get_internalized_keys( + config=self, + storage_path=storage_path, + optimization_output_path=optimization_output_path, + batch_ids=set(self.batches) if self.batches else None, + ) + + extra_keys = set(self.keywords).difference(set(internal_keys)) + if extra_keys: + export_ecl = False + export_errors.append( + f"Non-internalized ecl keys selected for export '{' '.join(extra_keys)}'." + " in order to internalize missing keywords " + f"run 'everest load '. " + "Only optimization data will be exported." + ) + + return export_errors, export_ecl diff --git a/src/everest/detached/__init__.py b/src/everest/detached/__init__.py index beb26313f73..c882fef63c4 100644 --- a/src/everest/detached/__init__.py +++ b/src/everest/detached/__init__.py @@ -86,7 +86,8 @@ def start_server(config: EverestConfig, ert_config: ErtConfig, storage): ) try: - _save_running_config(config) + save_config_path = os.path.join(config.output_dir, config.config_file) + config.dump(save_config_path) except (OSError, LookupError) as e: logging.getLogger(EVEREST).error( "Failed to save optimization config: {}".format(e) @@ -121,13 +122,6 @@ def start_server(config: EverestConfig, ert_config: ErtConfig, storage): return _context -def _save_running_config(config: EverestConfig): - assert config.output_dir is not None - assert config.config_file is not None - save_config_path = os.path.join(config.output_dir, config.config_file) - config.dump(save_config_path) - - def context_stop_and_wait(): global _context # noqa: PLW0602 if _context: diff --git a/src/everest/detached/jobs/everserver.py b/src/everest/detached/jobs/everserver.py index aee3bf8fdff..e1be83087ad 100755 --- a/src/everest/detached/jobs/everserver.py +++ b/src/everest/detached/jobs/everserver.py @@ -22,7 +22,7 @@ from ert.config import QueueSystem from ert.ensemble_evaluator import EvaluatorServerConfig from ert.run_models.everest_run_model import EverestRunModel -from everest import export_to_csv, validate_export +from everest import export_to_csv, export_with_progress from everest.config import EverestConfig from everest.detached import ServerStatus, get_opt_status, update_everserver_status from everest.simulator import JOB_FAILURE @@ -304,10 +304,22 @@ def main(): try: # Exporting data update_everserver_status(config, ServerStatus.exporting_to_csv) - err_msgs, export_ecl = validate_export(config) - for msg in err_msgs: - logging.getLogger(EVEREST).warning(msg) - export_to_csv(config, export_ecl=export_ecl) + + if config.export is not None: + err_msgs, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) + for msg in err_msgs: + logging.getLogger(EVEREST).warning(msg) + else: + export_ecl = True + + export_to_csv( + data_frame=export_with_progress(config, export_ecl), + export_path=config.export_path, + ) except: update_everserver_status( config, ServerStatus.failed, message=traceback.format_exc() diff --git a/src/everest/export.py b/src/everest/export.py index 0a0642652f1..c885f816253 100644 --- a/src/everest/export.py +++ b/src/everest/export.py @@ -1,16 +1,18 @@ import os import re import sys -from typing import Any, Dict, List, Optional, Set +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set import pandas as pd from pandas import DataFrame from seba_sqlite.snapshot import SebaSnapshot from ert.storage import open_storage -from everest.config import EverestConfig from everest.strings import STORAGE_DIR +if TYPE_CHECKING: + from everest.config import ExportConfig + if sys.version_info < (3, 11): from enum import Enum @@ -66,35 +68,33 @@ def filter_data(data: DataFrame, keyword_filters: Set[str]): return data[filtered_columns] -def _valid_batches(batches: List[int], config: EverestConfig): - snapshot = SebaSnapshot(config.optimization_output_dir).get_snapshot( +def available_batches(optimization_output_dir: str) -> Set[int]: + snapshot = SebaSnapshot(optimization_output_dir).get_snapshot( filter_out_gradient=False, batches=None ) - available_batches = {data.batch for data in snapshot.simulation_data} - valid_batches = [batch for batch in batches if batch in available_batches] - return valid_batches + return {data.batch for data in snapshot.simulation_data} -def _metadata(config: EverestConfig): +def export_metadata(config: Optional["ExportConfig"], optimization_output_dir: str): discard_gradient = True discard_rejected = True batches = None - if config.export is not None: - if config.export.discard_gradient is not None: - discard_gradient = config.export.discard_gradient + if config: + if config.discard_gradient is not None: + discard_gradient = config.discard_gradient - if config.export.discard_rejected is not None: - discard_rejected = config.export.discard_rejected + if config.discard_rejected is not None: + discard_rejected = config.discard_rejected - if config.export.batches: + if config.batches: # If user defined batches to export in the conf file, ignore previously # discard gradient and discard rejected flags if defined and true discard_rejected = False discard_gradient = False - batches = config.export.batches + batches = config.batches - snapshot = SebaSnapshot(config.optimization_output_dir).get_snapshot( + snapshot = SebaSnapshot(optimization_output_dir).get_snapshot( filter_out_gradient=discard_gradient, batches=batches, ) @@ -154,12 +154,17 @@ def _metadata(config: EverestConfig): return metadata -def get_internalized_keys(config: EverestConfig, batch_ids: Optional[Set[int]] = None): +def get_internalized_keys( + config: "ExportConfig", + storage_path: str, + optimization_output_path: str, + batch_ids: Optional[Set[int]] = None, +): if batch_ids is None: - metadata = _metadata(config) + metadata = export_metadata(config, optimization_output_path) batch_ids = {data[MetaDataColumnNames.BATCH] for data in metadata} internal_keys: Set = set() - with open_storage(config.storage_dir, "r") as storage: + with open_storage(storage_path, "r") as storage: for batch_id in batch_ids: case_name = f"batch_{batch_id}" experiments = [*storage.experiments] @@ -177,120 +182,12 @@ def get_internalized_keys(config: EverestConfig, batch_ids: Optional[Set[int]] = return internal_keys -def validate_export(config: EverestConfig): - """ - Checks for possible errors when attempting to export current optimization - case. - :param config: Everest config - :return: List of error messages - """ - export_ecl = True - export_errors: List[str] = [] - if config.export is None: - return export_errors, export_ecl - - # TODO turn into attr accessor when ExplicitNone & everlint is phased out - user_def_batches = config.export.batches - if user_def_batches: - valid_batches = _valid_batches(user_def_batches, config) - for batch in user_def_batches: - if batch not in valid_batches: - export_errors.append( - "Batch {} not found in optimization " - "results. Skipping for current export." - "".format(batch) - ) - user_def_batches = valid_batches - config.export.batches = user_def_batches - - if user_def_batches == []: - export_ecl = False - export_errors.append( - "No batches selected for export. " - "Only optimization data will be exported." - ) - return export_errors, export_ecl - - if not config.model.data_file: - export_ecl = False - export_errors.append( - "No data file found in config." "Only optimization data will be exported." - ) - - user_def_kw = config.export.keywords - if user_def_kw == []: - export_ecl = False - export_errors.append( - "No eclipse keywords selected for export. Only" - " optimization data will be exported." - ) - - # If no user defined keywords are present it is no longer possible to check - # availability in internal storage - if user_def_kw is None: - return export_errors, export_ecl - - internal_keys = get_internalized_keys( - config, set(user_def_batches) if user_def_batches else None - ) - - extra_keys = set(user_def_kw).difference(set(internal_keys)) - if extra_keys: - export_ecl = False - export_errors.append( - f"Non-internalized ecl keys selected for export '{' '.join(extra_keys)}'." - " in order to internalize missing keywords " - f"run 'everest load {config.config_file}'. " - "Only optimization data will be exported." - ) - - return export_errors, export_ecl - - -def export(config: EverestConfig, export_ecl=True, progress_callback=lambda _: None): - """Export everest data into a pandas dataframe. If the config specifies - a data_file and @export_ecl is True, simulation data is included. When - exporting simulation data, only keywords matching elements in @ecl_keywords - are exported. Note that wildcards are allowed. - - @progress_callback will be called with a number between 0 and 1 indicating - the fraction of batches that has been loaded. - """ - - ecl_keywords = None - # If user exports with a config file that has the SKIP_EXPORT - # set to true export nothing - if config.export is not None: - if config.export.skip_export or config.export.batches == []: - return pd.DataFrame([]) - - ecl_keywords = config.export.keywords - - metadata = _metadata(config) - data_file = config.model.data_file - if data_file is None or not export_ecl: - return pd.DataFrame(metadata) - - data = _load_simulation_data(config, metadata, progress_callback=progress_callback) - - if ecl_keywords is not None: - keywords = tuple(ecl_keywords) - # NOTE: Some of these keywords are necessary for a success full export, - # should not leave this to the user.. - keywords += tuple(pd.DataFrame(metadata).columns) - keywords += tuple(MetaDataColumnNames.get_all()) - keywords_set = set(keywords) - data = filter_data(data, keywords_set) - - return data - - -def _load_simulation_data( - config: EverestConfig, metadata: List[dict], progress_callback=lambda _: None +def load_simulation_data( + output_path: str, metadata: List[dict], progress_callback=lambda _: None ): """Export simulations to a pandas DataFrame - @config the case configuration - @tags is a one ora a list of dictionaries. Keys from the dictionary become + @output_path optimization output folder path. + @metadata is a one ora a list of dictionaries. Keys from the dictionary become columns in the resulting dataframe. The values from the dictionary are assigned to those columns for the corresponding simulation. If a column is defined for some simulations but not for others, the value @@ -310,8 +207,7 @@ def _load_simulation_data( 4 2 pi True sim_2_row_0... 5 2 pi True sim_3_row_0... """ - assert config.output_dir is not None # avoiding mypy error - ens_path = os.path.join(config.output_dir, STORAGE_DIR) + ens_path = os.path.join(output_path, STORAGE_DIR) with open_storage(ens_path, "r") as storage: # pylint: disable=unnecessary-lambda-assignment def load_batch_by_id(): diff --git a/tests/everest/entry_points/test_everexport.py b/tests/everest/entry_points/test_everexport.py index 383d50c5c5a..30d66ee2fb7 100644 --- a/tests/everest/entry_points/test_everexport.py +++ b/tests/everest/entry_points/test_everexport.py @@ -13,7 +13,6 @@ from everest.config import EverestConfig from tests.everest.utils import ( create_cached_mocked_test_case, - satisfy, satisfy_callable, ) @@ -58,7 +57,7 @@ def empty_mock(config, export_ecl=True, progress_callback=lambda _: None): return pd.DataFrame() -def validate_export_mock(config): +def validate_export_mock(**_): return ([], True) @@ -67,7 +66,7 @@ def cache_dir(request, monkeypatch): return create_cached_mocked_test_case(request, monkeypatch) -@patch("everest.bin.utils.export_with_progress", side_effect=export_mock) +@patch("everest.bin.everexport_script.export_with_progress", side_effect=export_mock) def test_everexport_entry_run(mocked_func, copy_math_func_test_data_to_tmp): """Test running everexport with not flags""" # NOTE: there is probably a bug concerning output folders. Everexport @@ -83,7 +82,7 @@ def test_everexport_entry_run(mocked_func, copy_math_func_test_data_to_tmp): assert df.equals(TEST_DATA) -@patch("everest.bin.utils.export_with_progress", side_effect=empty_mock) +@patch("everest.bin.everexport_script.export_with_progress", side_effect=empty_mock) def test_everexport_entry_empty(mocked_func, copy_math_func_test_data_to_tmp): """Test running everexport with no data""" # NOTE: When there is no data (ie, the optimization has not yet run) @@ -103,10 +102,10 @@ def test_everexport_entry_empty(mocked_func, copy_math_func_test_data_to_tmp): @patch( - "everest.bin.everexport_script.validate_export", + "everest.config.export_config.ExportConfig.check_for_errors", side_effect=validate_export_mock, ) -@patch("everest.bin.utils.export") +@patch("everest.config.everest_config.EverestConfig.export_data") @pytest.mark.fails_on_macos_github_workflow def test_everexport_entry_batches( mocked_func, validate_export_mock, copy_math_func_test_data_to_tmp @@ -122,12 +121,11 @@ def check_export_batches(config: EverestConfig): if ProgressBar: # different calls if ProgressBar available or not mocked_func.assert_called_once_with( - config=satisfy(check_export_batches), export_ecl=True, progress_callback=satisfy_callable(), ) else: - mocked_func.assert_called_once_with(config=satisfy(check_export_batches)) + mocked_func.assert_called_once() @patch("everest.bin.everexport_script.export_to_csv") @@ -164,7 +162,7 @@ def test_everexport_entry_empty_export(mocked_func, copy_math_func_test_data_to_ mocked_func.assert_called_once() -@patch("everest.bin.utils.export") +@patch("everest.config.everest_config.EverestConfig.export_data") @pytest.mark.fails_on_macos_github_workflow def test_everexport_entry_no_usr_def_ecl_keys( mocked_func, copy_mocked_test_data_to_tmp @@ -183,22 +181,16 @@ def test_everexport_entry_no_usr_def_ecl_keys( everexport_entry([CONFIG_FILE_MOCKED_TEST_CASE]) - def condition(config: EverestConfig): - batches = config.export.batches if config.export is not None else None - keys = config.export.keywords if config.export is not None else None - return batches is None and keys is None - if ProgressBar: mocked_func.assert_called_once_with( - config=satisfy(condition), export_ecl=True, progress_callback=satisfy_callable(), ) else: - mocked_func.assert_called_once_with(config=satisfy(condition), export_ecl=True) + mocked_func.assert_called_once_with(export_ecl=True) -@patch("everest.bin.utils.export") +@patch("everest.config.everest_config.EverestConfig.export_data") @pytest.mark.fails_on_macos_github_workflow def test_everexport_entry_internalized_usr_def_ecl_keys( mocked_func, cache_dir, copy_mocked_test_data_to_tmp @@ -225,23 +217,16 @@ def test_everexport_entry_internalized_usr_def_ecl_keys( everexport_entry([CONFIG_FILE_MOCKED_TEST_CASE]) - def condition(config: EverestConfig): - batches = config.export.batches if config.export is not None else None - keys = config.export.keywords if config.export is not None else None - - return batches is None and keys == user_def_keys - if ProgressBar: mocked_func.assert_called_once_with( - config=satisfy(condition), export_ecl=True, progress_callback=satisfy_callable(), ) else: - mocked_func.assert_called_once_with(config=satisfy(condition), export_ecl=True) + mocked_func.assert_called_once_with(export_ecl=True) -@patch("everest.bin.utils.export") +@patch("everest.config.everest_config.EverestConfig.export_data") @pytest.mark.fails_on_macos_github_workflow def test_everexport_entry_non_int_usr_def_ecl_keys( mocked_func, cache_dir, caplog, copy_mocked_test_data_to_tmp @@ -274,23 +259,16 @@ def test_everexport_entry_non_int_usr_def_ecl_keys( in "\n".join(caplog.messages) ) - def condition(config: EverestConfig): - batches = config.export.batches if config.export is not None else None - keys = config.export.keywords if config.export is not None else None - - return batches is None and keys == user_def_keys - if ProgressBar: mocked_func.assert_called_once_with( - config=satisfy(condition), export_ecl=False, progress_callback=satisfy_callable(), ) else: - mocked_func.assert_called_once_with(config=satisfy(condition), export_ecl=False) + mocked_func.assert_called_once_with(export_ecl=False) -@patch("everest.bin.utils.export") +@patch("everest.config.everest_config.EverestConfig.export_data") @pytest.mark.fails_on_macos_github_workflow def test_everexport_entry_not_available_batches( mocked_func, cache_dir, caplog, copy_mocked_test_data_to_tmp @@ -324,16 +302,10 @@ def test_everexport_entry_not_available_batches( f" Skipping for current export" in "\n".join(caplog.messages) ) - def condition(config: EverestConfig): - batches = config.export.batches if config.export is not None else None - keys = config.export.keywords if config.export is not None else None - return batches == [0] and keys is None - if ProgressBar: mocked_func.assert_called_once_with( - config=satisfy(condition), export_ecl=True, progress_callback=satisfy_callable(), ) else: - mocked_func.assert_called_once_with(config=satisfy(condition), export_ecl=True) + mocked_func.assert_called_once_with(export_ecl=True) diff --git a/tests/everest/test_egg_simulation.py b/tests/everest/test_egg_simulation.py index d1b995abbeb..dfc021a108a 100644 --- a/tests/everest/test_egg_simulation.py +++ b/tests/everest/test_egg_simulation.py @@ -3,7 +3,6 @@ import pytest -import everest from ert.config import ErtConfig, QueueSystem from ert.config.parsing import ConfigKeys as ErtConfigKeys from ert.ensemble_evaluator import EvaluatorServerConfig @@ -715,7 +714,7 @@ def sweetcallbackofmine(self, *args, **kwargs): # self.assertAlmostEqual(result.total_objective, 0.851423, delta=0.5) # Test conversion to pandas DataFrame - df = everest.export(config) + df = config.export_data() # Check meta data export for meta_key in MetaDataColumnNames.get_all(): @@ -777,7 +776,7 @@ def sweetcallbackofmine(self, *args, **kwargs): # Check export filter config.export = ExportConfig(keywords=["*OPT*"]) - filtered_df = everest.export(config) + filtered_df = config.export_data() exp_keywords += MetaDataColumnNames.get_all() columns = sorted(set(filtered_df.columns)) @@ -831,7 +830,7 @@ def sweetcallbackofmine(self, *args, **kwargs): assert cbtracker.called snapshot.assert_match( - everest.export(config) + config.export_data() .drop(columns=["TCPUDAY", "start_time", "end_time"], axis=1) .round(6) .to_csv(), diff --git a/tests/everest/test_everserver.py b/tests/everest/test_everserver.py index 08e1328ca97..14fc79da8cb 100644 --- a/tests/everest/test_everserver.py +++ b/tests/everest/test_everserver.py @@ -114,11 +114,11 @@ def test_everserver_status_failure(_1, copy_math_func_test_data_to_tmp): self.everest_config, status=ServerStatus.running ), ) -@patch("everest.detached.jobs.everserver.validate_export", return_value=([], False)) @patch( - "everest.detached.jobs.everserver.export_to_csv", - side_effect=partial(check_status, status=ServerStatus.exporting_to_csv), + "everest.config.export_config.ExportConfig.check_for_errors", + return_value=([], False), ) +@patch("everest.detached.jobs.everserver.export_to_csv") def test_everserver_status_running_complete( _1, _2, _3, _4, _5, _6, _7, _8, _9, copy_math_func_test_data_to_tmp ): diff --git a/tests/everest/test_export.py b/tests/everest/test_export.py index 3652c668ff2..b55448af879 100644 --- a/tests/everest/test_export.py +++ b/tests/everest/test_export.py @@ -8,7 +8,6 @@ from everest.bin.utils import export_with_progress from everest.config import EverestConfig from everest.config.export_config import ExportConfig -from everest.export import export, validate_export from tests.everest.utils import create_cached_mocked_test_case, relpath CONFIG_FILE_MOCKED_TEST_CASE = "mocked_multi_batch.yml" @@ -74,7 +73,7 @@ def test_export_only_non_gradient_with_increased_merit(copy_math_func_test_data_ ) # Default export functionality when no export section is defined - df = export(config) + df = config.export_data() # Test that the default export functionality generated data frame # contains only non gradient simulations @@ -98,7 +97,7 @@ def test_export_only_non_gradient(copy_math_func_test_data_to_tmp): # Add export section to config config.export = ExportConfig(discard_rejected=False) - df = export(config) + df = config.export_data() # Check if only discard rejected key is set to False in the export # section the export will contain only non-gradient simulations @@ -121,7 +120,7 @@ def test_export_only_increased_merit(copy_math_func_test_data_to_tmp): # Add export section to config config.export = ExportConfig(discard_gradient=False) - df = export(config) + df = config.export_data() # Check the export contains both gradient and non-gradient simulation # when discard gradient key is set to False @@ -144,7 +143,7 @@ def test_export_all_batches(copy_math_func_test_data_to_tmp): # Add export section to config config.export = ExportConfig(discard_gradient=False, discard_rejected=False) - df = export(config) + df = config.export_data() # Check the export contains both gradient and non-gradient simulation assert 1 in df["is_gradient"].values @@ -166,7 +165,7 @@ def test_export_only_give_batches(copy_math_func_test_data_to_tmp): # Add export section to config config.export = ExportConfig(discard_gradient=True, batches=[2]) - df = export(config) + df = config.export_data() # Check only simulations from given batches are present in export for id in df["batch"].values: assert id == 2 @@ -203,7 +202,7 @@ def test_export_nothing_for_empty_batch_list(copy_math_func_test_data_to_tmp): config.export = ExportConfig( discard_gradient=True, discard_rejected=True, batches=[] ) - df = export(config) + df = config.export_data() # Check export returns empty data frame assert df.empty @@ -221,7 +220,7 @@ def test_export_nothing(copy_math_func_test_data_to_tmp): config.export = ExportConfig( skip_export=True, discard_gradient=True, discard_rejected=True, batches=[3] ) - df = export(config) + df = config.export_data() # Check export returns empty data frame assert df.empty @@ -290,31 +289,51 @@ def check_error(expected_error, reported_errors): assert found assert expected_export_ecl == export_ecl - # Test export validator outputs no errors when the config file contains - # an empty export section - config.export = None - check_error(("", True), validate_export(config)) - # Test error when user defines an empty list for the eclipse keywords config.export = ExportConfig() config.export.keywords = [] + errors, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) check_error( - ("No eclipse keywords selected for export", False), validate_export(config) + expected_error=("No eclipse keywords selected for export", False), + reported_errors=(errors, export_ecl), ) # Test error when user defines an empty list for the eclipse keywords # and empty list of for batches to export config.export.batches = [] - check_error(("No batches selected for export.", False), validate_export(config)) + errors, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) + check_error( + expected_error=("No batches selected for export.", False), + reported_errors=(errors, export_ecl), + ) # Test export validator outputs no errors when the config file contains # only keywords that represent a subset of already internalized keys config.export.keywords = ["FOPT"] config.export.batches = None - check_error(("", True), validate_export(config)) + errors, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) + check_error(expected_error=("", True), reported_errors=(errors, export_ecl)) non_int_key = "STANGE_KEY" config.export.keywords = [non_int_key, "FOPT"] + errors, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) + check_error( ( "Non-internalized ecl keys selected for export '{keys}'." "".format( @@ -322,20 +341,25 @@ def check_error(expected_error, reported_errors): ), False, ), - validate_export(config), + (errors, export_ecl), ) # Test that validating the export spots non-valid batches and removes # them from the list of batches selected for export. non_valid_batch = 42 config.export = ExportConfig(batches=[0, non_valid_batch]) + errors, export_ecl = config.export.check_for_errors( + optimization_output_path=config.optimization_output_dir, + storage_path=config.storage_dir, + data_file_path=config.model.data_file, + ) check_error( ( "Batch {} not found in optimization results. Skipping for" " current export".format(non_valid_batch), True, ), - validate_export(config), + (errors, export_ecl), ) assert config.export.batches == [0] @@ -348,7 +372,7 @@ def test_export_gradients(copy_math_func_test_data_to_tmp): os.path.join(config.optimization_output_dir, "seba.db"), ) - df = export(config) + df = config.export_data() for function in config.objective_functions: for control in config.controls: diff --git a/tests/everest/test_math_func.py b/tests/everest/test_math_func.py index 418cc23217e..ef3a5948ea0 100644 --- a/tests/everest/test_math_func.py +++ b/tests/everest/test_math_func.py @@ -9,7 +9,6 @@ from everest import ConfigKeys as CK from everest.config import EverestConfig from everest.config.export_config import ExportConfig -from everest.export import export from everest.util import makedirs_if_needed CONFIG_FILE_MULTIOBJ = "config_multiobj.yml" @@ -49,7 +48,7 @@ def test_math_func_multiobj( if config.export is None: config.export = ExportConfig(discard_rejected=False) - df = export(config) + df = config.export_data() ok_evals = df[(df["is_gradient"] == 0) & (df["success"] == 1)] # Three points in this case are increasing the merit @@ -136,7 +135,7 @@ def test_math_func_advanced( assert expected_opt == pytest.approx(run_model.result.total_objective, abs=0.001) # Test conversion to pandas DataFrame - df = export(config) + df = config.export_data() ok_evals = df[(df["is_gradient"] == 0) & (df["success"] == 1)] ok_evals_0 = ok_evals[ok_evals["realization"] == 0] @@ -169,7 +168,7 @@ def test_math_func_advanced( batches_list = [0, 2] config.export.batches = batches_list - batch_filtered_df = export(config) + batch_filtered_df = config.export_data() n_unique_batches = batch_filtered_df["batch"].nunique() unique_batches = np.sort(batch_filtered_df["batch"].unique()).tolist() diff --git a/tests/everest/unit/everest/bin/test_everload.py b/tests/everest/unit/everest/bin/test_everload.py index 38c9b5da349..305c0a5e405 100644 --- a/tests/everest/unit/everest/bin/test_everload.py +++ b/tests/everest/unit/everest/bin/test_everload.py @@ -11,7 +11,6 @@ from ert.config import ErtConfig from everest import MetaDataColumnNames as MDCN -from everest import export from everest.bin.everload_script import everload_entry from everest.config import EverestConfig from everest.strings import STORAGE_DIR @@ -66,8 +65,7 @@ def test_everload_entry_run( """Test running everload on an optimization case""" config = get_config(cache_dir) everload_entry([CONFIG_FILE, "-s"]) - - df = export(config, export_ecl=False) + df = config.export_data(export_ecl=False) batch_ids = set(df[MDCN.BATCH]) assertInternalizeCalls(batch_ids, mocked_internalize) assertBackup(config) @@ -109,7 +107,7 @@ def test_everload_entry_batches( """Test running everload with a selection of batches""" config = get_config(cache_dir) # pick every second batch (assume there are at least 2) - df = export(config, export_ecl=False) + df = config.export_data(export_ecl=False) batch_ids = list(set(df[MDCN.BATCH])) assert len(batch_ids) > 1 batch_ids = batch_ids[::2] @@ -146,7 +144,7 @@ def test_everload_entry_overwrite( config = get_config(cache_dir) everload_entry([CONFIG_FILE, "-s", "--overwrite"]) - df = export(config, export_ecl=False) + df = config.export_data(export_ecl=False) batch_ids = set(df[MDCN.BATCH]) assertInternalizeCalls(batch_ids, mocked_internalize) @@ -183,11 +181,11 @@ def test_everload_entry_not_silent( with patch("everest.bin.everload_script.input", side_effect=yes): everload_entry([CONFIG_FILE]) assert len(stdout.getvalue()) > 0 - df = export(config, export_ecl=False) + df = config.export_data(export_ecl=False) batch_ids = set(df[MDCN.BATCH]) assertInternalizeCalls(batch_ids, mocked_internalize) - df = export(config, export_ecl=False) + df = config.export_data(export_ecl=False) batch_ids = set(df[MDCN.BATCH]) assertInternalizeCalls(batch_ids, mocked_internalize) assertBackup(config)