From 6cba81b8a619f4457939c4e6062e5dbd82ed23ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Mon, 21 Jun 2021 13:16:13 +0200 Subject: [PATCH] Ensure regions and zones are always strings in dataframe (#143) --- src/fmu/tools/rms/volumetrics.py | 15 +++++++++------ tests/rms/test_rmsvolumetrics.py | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/fmu/tools/rms/volumetrics.py b/src/fmu/tools/rms/volumetrics.py index 57c9a858..888c2c21 100644 --- a/src/fmu/tools/rms/volumetrics.py +++ b/src/fmu/tools/rms/volumetrics.py @@ -140,14 +140,17 @@ def myregionrenamer(s): if zonerenamer: vol_df["ZONE"] = vol_df["ZONE"].apply(zonerenamer) - # Remove the Totals rows in case they are present. - # (todo: do this for all columns that are not not of numeric type) - checkfortotals = ["ZONE", "REGION", "LICENSE", "FACIES"] + index_columns = ["ZONE", "REGION", "LICENSE", "FACIES"] + present_index_columns = list(set(index_columns).intersection(vol_df.columns)) + # Index columns should always be of string datatype: + vol_df[present_index_columns] = vol_df[present_index_columns].astype(str) + + # Remove the Totals rows in case they are present, signified by the + # magic value "Totals" in any of the index columns: totalsrows = pd.Series([False] * len(vol_df)) - for col in checkfortotals: - if col in vol_df.columns: - totalsrows = totalsrows | (vol_df[col] == "Totals") + for col in present_index_columns: + totalsrows = totalsrows | (vol_df[col] == "Totals") vol_df = vol_df[~totalsrows].reset_index(drop=True) if outfile: diff --git a/tests/rms/test_rmsvolumetrics.py b/tests/rms/test_rmsvolumetrics.py index a7dbcb52..8e194cec 100755 --- a/tests/rms/test_rmsvolumetrics.py +++ b/tests/rms/test_rmsvolumetrics.py @@ -43,6 +43,25 @@ # This "fails" as there was not a double space between Zone and Bulk pd.DataFrame([{"Zone Bulk": "Upper 1"}]), ), + ( + # Integer zone, returned as string datatype + "Zone Bulk\n1 1.0", + "oil.txt", + pd.DataFrame([{"ZONE": "1", "BULK_OIL": 1.0}]), + ), + ( + # Integer Region, returned as string datatype + "Region index Bulk\n1 1.0", + "oil.txt", + pd.DataFrame([{"REGION": "1", "BULK_OIL": 1.0}]), + ), + ( + # Floating point Region, returned as string datatype. + # User is probably up for trouble if this is used. + "Region index Bulk\n1.0 1.0", + "oil.txt", + pd.DataFrame([{"REGION": "1.0", "BULK_OIL": 1.0}]), + ), ( # Two spaces: "Zone Bulk\nUpper 1.0",