Skip to content

Commit

Permalink
Strip spaces in parameter names, solve Excel user errors (#38)
Browse files Browse the repository at this point in the history
* Strip spaces in parameter names, solve Excel user errors

* Preserve NaNs

* Flake8 code
  • Loading branch information
berland authored May 18, 2020
1 parent 8432dbe commit 3eb2cec
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 15 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ universal = 1
[flake8]
exclude = docs,
tests/data
max-line-length = 88

[aliases]
test = pytest
Expand Down
2 changes: 1 addition & 1 deletion src/fmu/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from .sensitivities import find_combinations # noqa
from .sensitivities import excel2dict_design # noqa

from .qcforward import qcforward
from .qcforward import qcforward # noqa

try:
from .sensitivities import add_webviz_tornadoplots # noqa
Expand Down
9 changes: 4 additions & 5 deletions src/fmu/tools/sensitivities/_add_webviz_tornado_onebyone.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
import pandas as pd
import yaml

from fmu.tools.sensitivities import summarize_design
from fmu.tools.sensitivities import calc_tornadoinput, find_combinations
from fmu import ensemble

try:
from webviz import SubMenu, Page
from webviz.page_elements import TornadoPlot
Expand All @@ -27,11 +31,6 @@
"""


from fmu.tools.sensitivities import summarize_design
from fmu.tools.sensitivities import calc_tornadoinput, find_combinations
from fmu import ensemble


def yconfig(inputfile):
"""Read from YAML file."""
with open(inputfile, "r") as stream:
Expand Down
26 changes: 20 additions & 6 deletions src/fmu/tools/sensitivities/_excel2dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import pandas as pd
import yaml
import six

SEEDS_DEPRECATION_WARNING = """
The keyword "seeds" in the "general_input" sheet is changed
Expand Down Expand Up @@ -170,8 +171,8 @@ def _check_designinput(dsgn_input):
if _has_value(row.sensname):
if row.sensname in sensitivity_names:
raise ValueError(
"sensname '{}' was found on more than one row in designinput sheet. "
"Two sensitivities can not share the same sensname. "
"sensname '{}' was found on more than one row in designinput "
"sheet. Two sensitivities can not share the same sensname. "
"Please correct this and rerun".format(row.sensname)
)
else:
Expand All @@ -186,10 +187,9 @@ def _check_for_mixed_sensitivities(sens_name, sens_group):
if len(types) > 1:
raise ValueError(
"The sensitivity with sensname '{}' in designinput sheet contains more "
"than one sensitivity type. For each sensname all parameters must be specified "
"using the same type (seed, scenario, dist, ref, background, extern)".format(
sens_name
)
"than one sensitivity type. For each sensname all parameters must be "
"specified using the same type (seed, scenario, dist, ref, background, "
"extern)".format(sens_name)
)


Expand Down Expand Up @@ -278,6 +278,14 @@ def _excel2dict_onebyone(input_filename, sheetnames=None):
# Read input for sensitivities
inputdict["sensitivities"] = OrderedDict()
designinput = pd.read_excel(input_filename, design_inp_sheet)

# First column with parameter names should have spaces stripped,
# but we need to preserve NaNs:
not_nan_sensnames = ~designinput["sensname"].isnull()
designinput.loc[not_nan_sensnames, "sensname"] = (
designinput.loc[not_nan_sensnames, "sensname"].astype(str).str.strip()
)

_check_designinput(designinput)

designinput["sensname"].fillna(method="ffill", inplace=True)
Expand Down Expand Up @@ -366,6 +374,12 @@ def _read_defaultvalues(filename, sheetname):
"""
default_dict = OrderedDict()
default_df = pd.read_excel(filename, sheetname, header=0, index_col=0)
# Strip spaces before and after parameter names, if they are there
# it is probably invisible user errors in Excel.
default_df.index = [
paramname.strip() if isinstance(paramname, six.string_types) else paramname
for paramname in default_df.index
]
for row in default_df.itertuples():
if str(row[0]) in default_dict.keys():
print(
Expand Down
2 changes: 1 addition & 1 deletion src/fmu/tools/sensitivities/create_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def generate(self, realnums, seedname, seedvalues, parameters):
distributions or values.
"""
self.sensvalues = pd.DataFrame(index=realnums)
self.sensvalues[seedname] = seedvalues[0 : len(realnums)]
self.sensvalues[seedname] = seedvalues[0:len(realnums)]

if parameters is not None:
for key in parameters.keys():
Expand Down
41 changes: 39 additions & 2 deletions tests/test_excel2dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,44 @@ def test_duplicate_sensname_exception(tmpdir):
with pytest.raises(
ValueError, match="Two sensitivities can not share the same sensname"
):
dict_design = excel2dict_design("designinput3.xlsx")
dict_design = excel2dict_design("designinput3.xlsx") # noqa


def test_strip_spaces(tmpdir):
"""Spaces before and after parameter names are probabaly
invisible user errors in Excel sheets. Remove them."""
mock_spacious_designinput = pd.DataFrame(
data=[
["sensname", "numreal", "type", "param_name"],
["rms_seed ", "", "seed"],
]
)
defaultvalues_spacious = pd.DataFrame(
data=[
["parametername", "value"],
[" spacious_multiplier", 1.2],
["spacious2 ", 3.3],
]
)
tmpdir.chdir()
writer = pd.ExcelWriter("designinput_spaces.xlsx")
MOCK_GENERAL_INPUT.to_excel(
writer, sheet_name="general_input", index=False, header=None
)
mock_spacious_designinput.to_excel(
writer, sheet_name="designinput", index=False, header=None
)
defaultvalues_spacious.to_excel(
writer, sheet_name="defaultvalues", index=False, header=None
)
writer.save()

dict_design = excel2dict_design("designinput_spaces.xlsx")
assert list(dict_design["sensitivities"].keys())[0] == "rms_seed"

# Check default values parameter names:
def_params = list(dict_design["defaultvalues"].keys())
assert [par.strip() for par in def_params] == def_params


def test_mixed_senstype_exception(tmpdir):
Expand All @@ -129,4 +166,4 @@ def test_mixed_senstype_exception(tmpdir):
writer.save()

with pytest.raises(ValueError, match="contains more than one sensitivity type"):
dict_design = excel2dict_design("designinput4.xlsx")
dict_design = excel2dict_design("designinput4.xlsx") # noqa

0 comments on commit 3eb2cec

Please sign in to comment.