diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..3f1720f --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,16 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +build: + image: latest + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + + +python: + version: 3.8 + +requirements_file: requirements.txt \ No newline at end of file diff --git a/data/erp/l10_gbalo.xlsx b/data/erp/l10_gbalo.xlsx new file mode 100644 index 0000000..0ac01bf Binary files /dev/null and b/data/erp/l10_gbalo.xlsx differ diff --git a/data/erp/l11_gbalo.xlsx b/data/erp/l11_gbalo.xlsx new file mode 100644 index 0000000..a2b79d4 Binary files /dev/null and b/data/erp/l11_gbalo.xlsx differ diff --git a/data/erp/l2_gbalo.xlsx b/data/erp/l2_gbalo.xlsx new file mode 100644 index 0000000..7ada83a Binary files /dev/null and b/data/erp/l2_gbalo.xlsx differ diff --git a/data/erp/test_anomaly.xlsx b/data/erp/test_anomaly.xlsx new file mode 100644 index 0000000..01e7688 Binary files /dev/null and b/data/erp/test_anomaly.xlsx differ diff --git a/data/erp/testsafedata.csv b/data/erp/testsafedata.csv new file mode 100644 index 0000000..87c8dd4 --- /dev/null +++ b/data/erp/testsafedata.csv @@ -0,0 +1,46 @@ +pk,x,y,rho +0,790752,1092750,1101 +10,790747,1092758,1147 +20,790743,1092763,1345 +30,790738,1092770,1369 +40,790733,1092776.5,1406 +50,790729,1092783,1543 +60,790724,1092789.5,1480 +70,790720,1092796,1517 +80,790715,1092802.5,1754 +90,790711,1092809,1591 +100,790284,1093124,1260 +110,790281,1093118,1200 +120,790277,1093110,1160 +130,790270,1093104,1280 +140,790265,1093097,1100 +150,790260,1093092,1410 +160,790254,1093086,1680 +170,790248,1093079,1580 +180,790243,1093073,950 +190,790237,1093067,1750 +200,790231,1093061,1320 +210,790224,1093054,1370 +220,790218,1093049,1390 +230,790211,1093043,1700 +240,790206,1093037,1230 +250,790200,1093031,1480 +260,790194,1093027,1400 +270,790187,1093022,800 +280,790181,1093016,1660 +290,790175,1093011,1930 +300,790210,1093010,1680 +310,790214,1093016,1300 +320,790218,1093026,930 +330,790221,1093033,1460 +340,790224,1093040,1450 +350,790228,1093049,950 +360,790232,1093057,500 +370,790234,1093063,1300 +380,790237,1093071,1630 +390,790240,1093078,1400 +400,790244,1093085,1670 +410,790246,1093094,1540 +420,790250,1093101,930 +430,790254,1093108,1130 +440,790255,1093116,1380 diff --git a/data/erp/testsafedata.xlsx b/data/erp/testsafedata.xlsx new file mode 100644 index 0000000..20f99fa Binary files /dev/null and b/data/erp/testsafedata.xlsx differ diff --git a/data/erp/testunsafedata.csv b/data/erp/testunsafedata.csv new file mode 100644 index 0000000..b7142cb --- /dev/null +++ b/data/erp/testunsafedata.csv @@ -0,0 +1,46 @@ +x,stations,resapprho,NORTH +790752,40,1101,1092750 +790747,70,1147,1092758 +790743,100,1345,1092763 +790738,130,1369,1092770 +790733,160,1406,1092776.5 +790729,190,1543,1092783 +790724,220,1480,1092789.5 +790720,250,1517,1092796 +790715,280,1754,1092802.5 +790711,310,1591,1092809 +790284,340,1260,1093124 +790281,370,1200,1093118 +790277,400,1160,1093110 +790270,430,1280,1093104 +790265,460,1100,1093097 +790260,490,1410,1093092 +790254,520,1680,1093086 +790248,550,1580,1093079 +790243,580,950,1093073 +790237,610,1750,1093067 +790231,640,1320,1093061 +790224,670,1370,1093054 +790218,700,1390,1093049 +790211,730,1700,1093043 +790206,760,1230,1093037 +790200,790,1480,1093031 +790194,820,1400,1093027 +790187,850,800,1093022 +790181,880,1660,1093016 +790175,910,1930,1093011 +790210,940,1680,1093010 +790214,970,1300,1093016 +790218,1000,930,1093026 +790221,1030,1460,1093033 +790224,1060,1450,1093040 +790228,1090,950,1093049 +790232,1120,500,1093057 +790234,1150,1300,1093063 +790237,1180,1630,1093071 +790240,1210,1400,1093078 +790244,1240,1670,1093085 +790246,1270,1540,1093094 +790250,1300,930,1093101 +790254,1330,1130,1093108 +790255,1360,1380,1093116 diff --git a/data/erp/testunsafedata.xlsx b/data/erp/testunsafedata.xlsx new file mode 100644 index 0000000..e844fcf Binary files /dev/null and b/data/erp/testunsafedata.xlsx differ diff --git a/data/erp/testunsafedata_extra.csv b/data/erp/testunsafedata_extra.csv new file mode 100644 index 0000000..a427633 --- /dev/null +++ b/data/erp/testunsafedata_extra.csv @@ -0,0 +1,46 @@ +x,stations,resapprho,NORTH,oppp ,add1 +790752,40,1101,1092750,12,0.25 +790747,70,1147,1092758,15,0.89 +790743,100,1345,1092763,18,0.1 +790738,130,1369,1092770,21,0.263333333 +790733,160,1406,1092776.5,24,0.188333333 +790729,190,1543,1092783,27,0.113333333 +790724,220,1480,1092789.5,30,0.038333333 +790720,250,1517,1092796,33,-0.036666667 +790715,280,1754,1092802.5,36,-0.111666667 +790711,310,1591,1092809,39,-0.186666667 +790284,340,1260,1093124,42,-0.261666667 +790281,370,1200,1093118,45,-0.336666667 +790277,400,1160,1093110,48,-0.411666667 +790270,430,1280,1093104,51,-0.486666667 +790265,460,1100,1093097,54,-0.561666667 +790260,490,1410,1093092,57,-0.636666667 +790254,520,1680,1093086,60,-0.711666667 +790248,550,1580,1093079,63,-0.786666667 +790243,580,950,1093073,66,-0.861666667 +790237,610,1750,1093067,69,-0.936666667 +790231,640,1320,1093061,72,-1.011666667 +790224,670,1370,1093054,75,-1.086666667 +790218,700,1390,1093049,78,-1.161666667 +790211,730,1700,1093043,81,-1.236666667 +790206,760,1230,1093037,84,-1.311666667 +790200,790,1480,1093031,87,-1.386666667 +790194,820,1400,1093027,90,-1.461666667 +790187,850,800,1093022,93,-1.536666667 +790181,880,1660,1093016,96,-1.611666667 +790175,910,1930,1093011,99,-1.686666667 +790210,940,1680,1093010,102,-1.761666667 +790214,970,1300,1093016,105,-1.836666667 +790218,1000,930,1093026,108,-1.911666667 +790221,1030,1460,1093033,111,-1.986666667 +790224,1060,1450,1093040,114,-2.061666667 +790228,1090,950,1093049,117,-2.136666667 +790232,1120,500,1093057,120,-2.211666667 +790234,1150,1300,1093063,123,-2.286666667 +790237,1180,1630,1093071,126,-2.361666667 +790240,1210,1400,1093078,129,-2.436666667 +790244,1240,1670,1093085,132,-2.511666667 +790246,1270,1540,1093094,135,-2.586666667 +790250,1300,930,1093101,138,-2.661666667 +790254,1330,1130,1093108,141,-2.736666667 +790255,1360,1380,1093116,144,-2.811666667 diff --git a/data/ves/ves_gbalo.csv b/data/ves/ves_gbalo.csv new file mode 100644 index 0000000..d7d6674 --- /dev/null +++ b/data/ves/ves_gbalo.csv @@ -0,0 +1,33 @@ +AB/2,MN/2,SE1,SE2,SE3,SE4 +1,0.4,943,294,1125,457 +2,0.4,1179,502,1345,582 +3,0.4,1103,572,1213,558 +4,0.4,1062,641,1007,421 +3,1,1121,453,1130,407 +4,1,1102,517,1077,387 +5,1,1380,485,927,351 +6,1,706,454,902,223 +8,1,484,327,878,213 +10,1,420,259,731,203 +12,1,357,170,540,181 +14,1,300,123,451,175 +16,1,258,89,407,162 +18,1,211,61,331,152 +20,1,159,45,321,122 +24,1,100,31,230,86 +20,5,168,97,206,72 +24,5,103,63,188,70 +28,5,75,46,102,63 +32,5,63,45,98,51 +36,5,60,43,87,46 +40,5,68,44,81,35 +45,5,77,44,78,48 +50,5,82,48,65,59 +55,5,94,46,54,66 +60,5,100,62,67,72 +55,10,71,56,79,83 +60,10,80,70,88,85 +70,10,95,74,101,92 +80,10,108,82,112,96 +90,10,119,103,120,101 +100,10,142,95,156,110 diff --git a/data/ves/ves_gbalo.xlsx b/data/ves/ves_gbalo.xlsx new file mode 100644 index 0000000..ce01167 Binary files /dev/null and b/data/ves/ves_gbalo.xlsx differ diff --git a/data/ves/ves_gbalo_unique.xlsx b/data/ves/ves_gbalo_unique.xlsx new file mode 100644 index 0000000..48aaefa Binary files /dev/null and b/data/ves/ves_gbalo_unique.xlsx differ diff --git a/kalfeat/__init__.py b/kalfeat/__init__.py new file mode 100644 index 0000000..1c8d708 --- /dev/null +++ b/kalfeat/__init__.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# author-email: +# Licence: GPL-3.0 +""" + A package for fast detecting the geo-electrical features +========================================================== + +`kalfeat`_ (stands for `Kouadio et al.`_ features detection) is designed for +predicting the groundwater flow rate from the geology and DC resistivity data +is designed to bring a piece solution in the detection of the geo-electrical +features which are known as the foremost criteria to select the right location +before any drilling locations. The aim of kalfeat is twofold:: + 1. to minimize the rate of unsuccessful drillings after the geological + survey during CDWS and save money from geophysical and drilling companies. + 2. to maximize the number of boreholes intended for the populations and + encourage the partners to indirectly solve the problem of water scarcity. + +.. _Kouadio et al. : https://doi.org/10.1029/2021wr031623 +.. _kalfeat: https://github.com/WEgeophysics/kalfeat/ + +""" +import os +import sys + +__version__='0.1.0' +__author__='Kouadio Laurent' + +from . import ( + _kalfeatlog, + methods, + tools, + decorators, + documentation, + exceptions, + property, + sklearn, + typing, + __main__, + + ) + +if __name__ =='__main__' or __package__ is None: + sys.path.append( os.path.dirname(os.path.dirname(__file__))) + sys.path.insert(0, os.path.dirname(__file__)) + __package__ ='kalfeat' + + diff --git a/kalfeat/__main__.py b/kalfeat/__main__.py new file mode 100644 index 0000000..2898ed4 --- /dev/null +++ b/kalfeat/__main__.py @@ -0,0 +1,12 @@ +import os +import sys + +if __name__ =='__main__' or __package__ is None: + sys.path.append( os.path.dirname(os.path.dirname(__file__))) + sys.path.insert(0, os.path.dirname(__file__)) + __package__ ='watex' + # another way to say the sys.path so to force use the relative import: + # sys.path(os.path.dirname(os.path.dirname(__file__))) + # root_folder = r'{}'.format(pathlib.Path( + # pathlib.Path(__file__).parent.absolute().parent)) + diff --git a/kalfeat/_kalfeatlog.py b/kalfeat/_kalfeatlog.py new file mode 100644 index 0000000..d99a9d9 --- /dev/null +++ b/kalfeat/_kalfeatlog.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + +""" +`kalfeat`_ Logger +=================== + +Module to track bugs and issues, and also deal with all exceptions in +:mod:`~.exceptions`. + +.. _kalfeat : https://github.com/WEgeophysics/kalfeat/ + +""" + +import os +import yaml +import logging +import logging.config +import inspect + +class kalfeatlog: + """ + Field to logs `kalfeat`_ module Files in order to tracks all + exceptions. + + """ + + @staticmethod + def load_configure (path2configure =None, OwnloggerBaseConf=False) : + """ + configure/setup the logging according to the input configfile + + :param configfile: .yml, .ini, .conf, .json, .yaml. + Its default is the logging.yml located in the same dir as this module. + It can be modified to use env variables to search for a log config file. + """ + + configfile=path2configure + + if configfile is None or configfile == "": + if OwnloggerBaseConf ==False : + logging.basicConfig() + else : + kalfeatlog().set_logger_output() + + elif configfile.endswith(".yaml") or configfile.endswith(".yml") : + this_module_path=os.path.abspath(__file__) + + print('module path', this_module_path) + + logging.info ("this module is : %s", this_module_path) + print('os.path.dirname(this_module_path)=', os.path.dirname(this_module_path)) + + yaml_path=os.path.join(os.path.dirname(this_module_path), + configfile) + print("yaml_path", yaml_path) + + logging.info('Effective yaml configuration file %s', yaml_path) + + if os.path.exists(yaml_path) : + with open (yaml_path,"rt") as f : + config=yaml.safe_load(f.read()) + logging.config.dictConfig(config) + else : + logging.exception( + "the config yaml file %s does not exist?", yaml_path) + + elif configfile.endswith(".conf") or configfile.endswith(".ini") : + logging.config.fileConfig(configfile, + disable_existing_loggers=False) + + elif configfile.endswith(".json") : + pass + else : + logging.exception("logging configuration file %s is not supported" % + configfile) + + + @staticmethod + def get_kalfeat_logger(loggername=''): + """ + create a named logger (try different) + :param loggername: the name (key) of the logger object in this Python interpreter. + :return: + """ + logger =logging.getLogger(loggername) + kalfeatlog.load_configure() #set configuration + + return logger + + @staticmethod + def load_configure_set_logfile (path2configfile=None): # loggername =None, setLevel=Name + """ + configure/setup the logging according to the input configure .yaml file. + + :param configfile: .yml, or add ownUsersConfigYamlfile (*.yml) + Its default is the logging.yml located in logfiles folder + It can be modified to use env variables to search for a log config file. + + """ + + ownUserLogger="wlog.yml" + if path2configfile is None : + env_var=os.environ['watex'] + path2configfile =os.path.join( env_var, 'watex','utils', + ownUserLogger) + + + elif path2configfile is not None : + if os.path.isdir(os.path.dirname(path2configfile)): + if path2configfile.endswith('.yml') or path2configfile.endswith('.yaml'): + + logging.info('Effective yaml configuration file :%s', path2configfile) + else : + logging.exception('File provided {%s}, is not a .yaml config file !'%os.path.basename(path2configfile)) + else : + + logging.exception ('Wrong path to .yaml config file.') + + yaml_path=path2configfile + os.chdir(os.path.dirname(yaml_path)) + if os.path.exists(yaml_path) : + with open (yaml_path,"rt") as f : + config=yaml.safe_load(f.read()) + logging.config.dictConfig(config) + else : + logging.exception( + "the config yaml file %s does not exist?", yaml_path) + +def test_yaml_configfile(yamlfile="wlog.yml"): + + this_func_name = inspect.getframeinfo(inspect.currentframe())[2] + + UsersOwnConfigFile = yamlfile + kalfeatlog.load_configure(UsersOwnConfigFile) + logger = kalfeatlog.get_kalfeat_logger(__name__) + + print((logger, id(logger), logger.name, logger.level, logger.handlers)) + + # 4 use the named-logger + logger.debug(this_func_name + ' __debug message') + logger.info(this_func_name + ' __info message') + logger.warn(this_func_name + ' __warn message') + logger.error(this_func_name + ' __error message') + logger.critical(this_func_name + ' __critical message') + + print(("End of: ", this_func_name)) + +if __name__=='__main__': + # ownerlogfile = '/utils/wlog.yml' + kalfeatlog().load_configure(path2configure='klog.yml') + + kalfeatlog().get_watex_logger().error('First pseudo test error') + kalfeatlog().get_watex_logger().info('Just a logging test') + + + + + + + \ No newline at end of file diff --git a/kalfeat/decorators.py b/kalfeat/decorators.py new file mode 100644 index 0000000..57aab10 --- /dev/null +++ b/kalfeat/decorators.py @@ -0,0 +1,929 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + +from __future__ import print_function + +import functools +import inspect +import os +import copy +import shutil +import warnings + +import datetime +import numpy as np +# import pandas as pd +import matplotlib.pyplot as plt + +from .typing import ( + Iterable, + Optional, + Callable , + T, F +) +from ._kalfeatlog import kalfeatlog + +_logger = kalfeatlog.get_kalfeat_logger(__name__) + +__docformat__='restructuredtext' + +class refAppender (object): + """ Append the module docstring with reStructured Text references. + + Indeed, when a `func` is decorated, it will add the reStructured Text + references as an appender to its reference docstring. So, sphinx + can auto-retrieve some replacing values found inline from the + :doc:`kalfeat.documentation`. + + .. |VES| replace:: Vertical Electrical Sounding + .. |ERP| replace:: Electrical Resistivity Profiling + + Parameters + ---------- + docref: str + Reference of the documentation for appending. + + Examples + --------- + >>> from kalfeat.documentation import __doc__ + >>> from kalfeat.tools import decorators + >>> def donothing (): + ''' Im here to just replace the `|VES|` and `|RES|` values by their + real meanings.''' + pass + >>> decorated_donothing = decorators.refAppender(__doc__)(donothing) + >>> decorated_donothing.__doc__ + ... #new doctring appended and `|VES|` and `|ERP|` are replaced by + ... #Vertical Electrical Sounding and Electrical resistivity profiling + ... #during compilation in ReadTheDocs. + + """ + + def __init__(self, docref= None ): + self.docref = docref + + def __call__(self, cls_or_func): + return self.nfunc (cls_or_func) + def nfunc (self, f): + f.__doc__ += "\n" + self.docref or '' + setattr(f , '__doc__', f.__doc__) + return f + + + +class deprecated(object): + """ + Used to mark functions, methods and classes deprecated, and prints + warning message when it called + decorators based on https://stackoverflow.com/a/40301488 . + + Author: YingzhiGou + Date: 20/06/2017 + """ + def __init__(self, reason): # pragma: no cover + if inspect.isclass(reason) or inspect.isfunction(reason): + raise TypeError("Reason for deprecation must be supplied") + self.reason = reason + + def __call__(self, cls_or_func): # pragma: no cover + if inspect.isfunction(cls_or_func): + if hasattr(cls_or_func, 'func_code'): + _code = cls_or_func.__code__ + else: + _code = cls_or_func.__code__ + fmt = "Call to deprecated function or method {name} ({reason})." + filename = _code.co_filename + lineno = _code.co_firstlineno + 1 + + elif inspect.isclass(cls_or_func): + fmt = "Call to deprecated class {name} ({reason})." + filename = cls_or_func.__module__ + lineno = 1 + + else: + raise TypeError(type(cls_or_func)) + + msg = fmt.format(name=cls_or_func.__name__, reason=self.reason) + + @functools.wraps(cls_or_func) + def new_func(*args, **kwargs): # pragma: no cover + import warnings + warnings.simplefilter('always', DeprecationWarning) # turn off filter + warnings.warn_explicit(msg, category=DeprecationWarning, + filename=filename, lineno=lineno) + warnings.simplefilter('default', DeprecationWarning) # reset filter + return cls_or_func(*args, **kwargs) + + return new_func + + +class gdal_data_check(object): + + _has_checked = False + _gdal_data_found = False + _gdal_data_variable_resources = 'https://trac.osgeo.org/gdal/wiki/FAQInstallationAndBuilding#HowtosetGDAL_DATAvariable ' + _gdal_wheel_resources ='https://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal' + _gdal_installation_guide = 'https://opensourceoptions.com/blog/how-to-install-gdal-for-python-with-pip-on-windows/' + + + def __init__(self, func, raise_error=False): + """ + The decorator should only be used for the function that requires + gdal and gdal-data correctly. + + GDAL standas for Geospatial Data Abstraction Library. + It is a translator library for raster geospatial data formats. + Its distribution includes a complete GDAL installation + It will check whether the GDAL_DATA is set and the path + in GDAL_DATA exists. If GDAL_DATA is not set, then try to + use external program "gdal-config --datadir" to + findout where the data files are installed. + + If failed to find the data file, then ImportError will be raised. + + :param func: function to be decorated + + """ + + self._func = func + if not self._has_checked: + self._gdal_data_found = self._check_gdal_data() + self._has_checked = True + if not self._gdal_data_found: + if(raise_error): + raise ImportError( + "GDAL is NOT installed correctly. " + f"GDAL wheel can be downloaded from {self._gdal_wheel_resources}" + " and use `pip install `" + "for installing. Get more details here: " + f" {self._gdal_installation_guide}." + ) + else: + warnings.warn( + "Ignore GDAL as it is not working. Will use `pyproj` " + f"OR download the GDAL wheel from {self._gdal_wheel_resources}" + " and use `pip install ` " + "for GDAL installation. Get furher details via " + f"{self._gdal_installation_guide}" + ) + + def __call__(self, *args, **kwargs): # pragma: no cover + return self._func(*args, **kwargs) + + def _check_gdal_data(self): + if 'GDAL_DATA' not in os.environ: + # gdal data not defined, try to define + from subprocess import Popen, PIPE + _logger.warning("GDAL_DATA environment variable is not set " + f" Please see {self._gdal_data_variable_resources}") + try: + # try to find out gdal_data path using gdal-config + _logger.info("Trying to find gdal-data path ...") + process = Popen(['gdal-config', '--datadir'], stdout=PIPE) + (output, err) = process.communicate() + exit_code = process.wait() + output = output.strip() + if exit_code == 0 and os.path.exists(output): + os.environ['GDAL_DATA'] = output + _logger.info("Found gdal-data path: {}".format(output)) + return True + else: + _logger.error( + "\tCannot find gdal-data path. Please find the" + " gdal-data path of your installation and set it to" + "\"GDAL_DATA\" environment variable. Please see " + f"{self._gdal_data_variable_resources} for " + "more information.") + return False + except Exception: + return False + else: + if os.path.exists(os.environ['GDAL_DATA']): + _logger.info("GDAL_DATA is set to: {}". + format(os.environ['GDAL_DATA'])) + + try: + from osgeo import osr + from osgeo.ogr import OGRERR_NONE + except: + _logger.error("Failed to load module osgeo; " + "looks like GDAL is NOT working") + # print ("Failed to load module osgeo !!! ") + + return False + # end try + + return True + else: + _logger.error("GDAL_DATA is set to: {}," + " but the path does not exist.". + format(os.environ['GDAL_DATA'])) + return False + +class redirect_cls_or_func(object) : + """Used to redirected functions or classes. Deprecated functions or class + can call others use functions or classes. + + Use new function or class to replace old function method or class with + multiple parameters. + + Author: @Daniel03 + Date: 18/10/2020 + + """ + def __init__(self, *args, **kwargs) : + """ + self.new_func_or_cls is just a message of deprecating + warning . It could be a name of new function to let user + tracking its code everytime he needs . + + """ + + self._reason=[func_or_reason for func_or_reason in args + if type(func_or_reason)==str][0] + if self._reason is None : + + raise TypeError(" Redirected reason must be supplied") + + + self._new_func_or_cls = [func_or_reason for func_or_reason in + args if type(func_or_reason)!=str][0] + + if self._new_func_or_cls is None: + raise Exception( + " At least one argument must be a func_method_or class." + "\but it's %s."%type(self._new_func_or_cls)) + _logger.warn("\t first input argument argument must" + " be a func_method_or class." + "\but it's %s."%type(self._new_func_or_cls)) + + + def __call__(self, cls_or_func) : #pragma :no cover + + if inspect.isfunction(self._new_func_or_cls) : + if hasattr(self._new_func_or_cls, 'func_code'): + _code =self._new_func_or_cls.__code__ + lineno=_code.co_firstlineno+1 + else : + # do it once the method is decorated method like staticmethods + try: + _code =self._new_func_or_cls.__code__ + except : + pass + + lineno=self._new_func_or_cls.__code__.co_firstlineno + + fmt="redirected decorated func/methods .<{reason}> "\ + "see line {lineno}." + + elif inspect.isclass(self._new_func_or_cls): + _code=self._new_func_or_cls.__module__ + # filename=os.path.basename(_code.co_filename) + lineno= 1 + + fmt="redirected decorated class :<{reason}> "\ + "see line {lineno}." + else : + # lineno=cls_or_func.__code__.co_firstlineno + lineno= inspect.getframeinfo(inspect.currentframe())[1] + fmt="redirected decorated method :<{reason}> "\ + "see line {lineno}." + + msg=fmt.format(reason = self._reason, lineno=lineno) + # print(msg) + _logger.info(msg) + #count variables : func.__code__.co_argscounts + #find variables in function : func.__code__.co_varnames + @functools.wraps(cls_or_func) + def new_func (*args, **kwargs): + + return cls_or_func(*args, **kwargs) + return self._new_func_or_cls + + +class writef(object): + """ + Used to redirected functions or classes. Deprecated functions or class can + call others use functions or classes. + + Decorate function or class to replace old function method or class with + multiple parameters and export files into many other format. `.xlsx` , + `.csv` or regular format. Decorator mainly focus to export data to other + files. Exported file can `regular` file or excel sheets. + + :param reason: + Explain the "What to do?". Can be `write` or `convert`. + + :param from_: + Can be ``df`` or ``regular``. If ``df``, `func` is called and collect + its input argguments and write to appropriate extension. If `from_`is + ``regular``, Can be a simple data put on list of string ready + to output file into other format. + :type from_: str ``df`` or ``regular`` + + :param to_: + Exported file extension. Can be excel sheeet (`.xlsx`, `csv`) + or other kind of format. + + :param savepath: + Give the path to save the new file written. + + *Author: @Daniel03* + *Date: 09/07/2021* + + """ + + def __init__(self, reason:Optional[str]=None, from_:Optional[str]=None, + to:Optional[str]=None, savepath:Optional[str] =None, **kws): + self._logging =kalfeatlog().get_kalfeat_logger(self.__class__.__name__) + + self.reason = reason + self.from_=from_ + self.to= to + + self.refout =kws.pop('refout', None) + self.writedfIndex =kws.pop('writeindex', False) + + self.savepath =savepath + + + for key in list(kws.keys()): + setattr(self, key, kws[key]) + + def __call__(self, func): + """ Call function and return new function decorated""" + + @functools.wraps(func) + def decorated_func(*args, **kwargs): + """ + New decorated function and holds `func` args and kwargs arguments. + :params args: positional arguments of `func` + :param kwargs: keywords arguments of `func`. + + """ + self._logging.info('Func <{}> decorated !'.format(func.__name__)) + + cfw = 0 # write file type + + for addf in ['savepath', 'filename']: + if not hasattr(self, addf): + setattr(self, addf, None) + + erp_time = '{0}_{1}'.format(datetime.datetime.now().date(), + datetime.datetime.now().time()) + + if self.refout is None : + self.refout = 'w-{0}'.format( + erp_time ) + + if self.reason is None : + print('--> No reason is set. What do you want to do?' + ' `write` file or `convert` file into other format?.') + return func(*args, **kwargs) + + if self.reason is not None : + if self.reason.lower().find('write')>=0 : + cfw = 1 + if self.from_=='df': + self.df , to_, refout_, savepath_, windex = func(*args, + **kwargs) + fromdf =True + self.writedfIndex = windex + + if fromdf is True and cfw ==1 : + if to_ is not None : + self.to= '.'+ to_.replace('.','') + + else: + self.to = '.csv' + if refout_ is not None : + self.refout =refout_ + + self.refout = self.refout.replace(':','-') + self.to + + if savepath_ is not None: + self.savepath =savepath_ + if self.to =='.csv': + self.df.to_csv(self.refout, header=True, + index =self.writedfIndex) + elif self.to =='.xlsx': + + self.df.to_excel(self.refout , sheet_name='{0}'.format( + self.refout[: int(len(self.refout)/2)]), + index=self.writedfIndex) + + + # savepath + generatedfile = '_kalfeat{}_'.format( + datetime.datetime.now().time()).replace(':', '.') + if self.savepath is None : + self.savepath = savepath_(generatedfile) + if self.savepath is not None : + if not os.path.isdir(self.savepath): + self.savepath = savepath_(generatedfile) + try : + shutil.move(os.path.join(os.getcwd(),self.refout) , + os.path.join(self.savepath , self.refout)) + except : + self.logging.debug("We don't find any path to save file.") + else: + print( + '--> reference output file <{0}> is well exported to {1}'. + format(self.refout, self.savepath)) + + return func(*args, **kwargs) + return decorated_func + + +class docstring: + """ Generate new doctring of a function or class by appending the doctring + of another function from the words considered as the startpoint `start` + to endpoint `end`. + + Sometimes two functions inherit the same parameters. Repeat the writing + of the same parameters is redundancy. So the most easier part is to + collect the doctring of the inherited function and paste to the new + function from the `startpoint`. + + Parameters + ----------- + func0: callable, + function to use its doctring + + start: str + Value from which the new docstring should be start. + + end: str + endpoint Value of the doctring. Stop considering point. + + + Examples + -------- + + .. In the followings examples let try to append the `writedf` function + from ``param reason`` (start) to `param to_` (end) to the + dostring to `predPlot` class. `predPlot` class class will holds new + doctring with writedf.__doc__ appended from `param reason` to + `param to_`. + + >>> from kalfeat.decorators import writedf , predPlot, docstring + >>> docs = doctring(writedf, start ='param reason', end='param to_')(predPlot) + >>> docs.__doc__ + >>> predPlot.__doc__ # doc modified and holds the writedf docstring too. + + *Author: @Daniel03* + *Date: 18/09/2021* + """ + def __init__(self, func0, start='Parameters', end=None ): + + self.func0 = func0 + self.start =start + self.end =end + + def __call__(self, func): + self._func =func + return self._decorator(self._func ) + + def _decorator(self, func): + """ Collect the doctring of `func0` from `start` to `end` and + add to a new doctring of wrapper`. + """ + func0_dstr = self.func0.__doc__ + # keet the only part you need + if self.start is None: + start_ix =0 + else: + start_ix = func0_dstr.find(self.start) # index of start point + + if self.end is not None: + end_ix = func0_dstr.find(self.end) + # remain_end_substring = func0_dstr[end_ix:] + substring = func0_dstr[start_ix :end_ix] + else : + substring = func0_dstr[start_ix :] + end_ix = -1 + + if start_ix <0 : + warnings.warn(f'`{self.start}` not find in the given ' + f'{self.func0.__name__!r} doctring` function will ' + f'append all the doctring of {self.func0.__name__!r}' + ' by default.') + start_ix =0 + + if end_ix <0 : + warnings.warn(f'`{self.end} not found in the given' + f' {self.func0.__name__!r} doctring` function will ' + f'append all the doctring of {self.func0.__name__!r}' + ' thin the end by default.') + + if self.start is not None: + try: + param_ix = func.__doc__.find(self.start) + except AttributeError: + if inspect.isclass(func): + fname = func.__class__.__name__ + else: fname = func.__name__ + # mean there is no doctrings. + # but silent the warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + warnings.warn(" Object `%s` has none doctrings!`NoneType`" + " object has no attribute `find`."%fname) + return func + # find end _ix and remove + if func.__doc__.find(self.end)>=0: + example_ix = func.__doc__.find(self.end) + + str_betw_param_example = func.__doc__[ + param_ix:example_ix] + else : + str_betw_param_example= func.__doc__[param_ix:] + example_ix =None + # remove --- `start`value and `\n` at the end of + # in func substring + str_betw_param_example = str_betw_param_example.replace( + self.start +'\n', '').replace('-\n', '').replace('-', '') + # now remove start point in + for i, item in enumerate(str_betw_param_example): + if item !=' ': + str_betw_param_example= str_betw_param_example[i:] + break + # in the concat string to new docstring of func. + func.__doc__ = func.__doc__[:param_ix] + f'{substring}'+\ + str_betw_param_example + + if example_ix is not None: + func.__doc__+= func.__doc__[example_ix:] + # set new_attributes + setattr(func, '__doc__', func.__doc__) + + return func + + +class docAppender: + """ + Decorator to generate a new doctring from appending the other class docstrings. + + Indeed from the startpoint <`from_`> and the endpoint<`to`>, one can select + the part of the any function or class doctrings to append to the existing + doctring for a new doctring creation. This trip is useful to avoid + redundancing parameters definitions everywhere in the scripts. + + Parameters + ----------- + func0: callable, + Function or class to collect the doctring from. + from_: str + Reference word or expression to start the collection of the + necessary doctring from the `func0`. It is the startpoint. The + *default* is ``Parameters``. + + to: str + Reference word to end the collection of the necessary part of the + docstring of `func0`. It is the endpoint. The *default* is ``Returns``. + + insert: str, + Reference word or expression to insert the collected doctring from + the `func0` and append of the index of the `insert` word in `func`. + If not found in the `func` doctring, it should retun None so nothing + should be appended. The *default* is ``Parameters``. + + Examples + --------- + >>> from kalfeat.decorators import docAppender + >>> def func0 (*args, **kwargs): + ... '''Im here so share my doctring. + ... + ... Parameters + ... ----------- + ... * args: list, + ... Collection of the positional arguments + ... ** kwargs: dict + ... Collection of keywords arguments + ... Returns + ... ------- + ... None: nothing + ... ''' + ... pass + >>> def func(s, k=0): + ... ''' Im here to append the docstring from func0 + ... Parameters + ... ---------- + ... s: str , + ... Any string value + ... k: dict, + ... first keyword arguments + ... + ... Returns + ... -------- + ... None, I return nothing + ... ''' + >>> deco = docAppender(func0 , from_='Parameters', + ... to='Returns', insert ='---\\n')(func) + >>> deco.__doc__ + ... + + Warnings + -------- + Be sure to append two doctrings with the same format. One may choose + either the sphinx or the numpy doc formats. Not Mixing the both. + + """ + insert_=('parameters', + 'returns', + 'raises', + 'examples', + 'notes', + 'references', + 'see also', + 'warnings' + ) + + def __init__ (self, + func0: Callable[[F], F] , + from_: str ='Parameters', + to: str ='Returns', + insertfrom: str = 'Parameters', + remove =True ): + self.func0 = func0 + self.from_=from_ + self.to=to + self.remove= remove + self.insert = insertfrom + + def __call__(self, func): + self._func = copy.deepcopy(func ) + return self.make_newdoc (self._func) + + def make_newdoc(self, func): + """ make a new docs from the given class of function """ + + def sanitize_docstring ( strv): + """Sanitize string values and force the string to be + on the same level for parameters and the arguments of the + parameters. + :param strv: str + + return a new string sanitized that match the correct spaces for + the sphinx documentation. + + """ + if isinstance(strv, str): + strv = strv.split('\n') + # remove the '' in the first string + if strv[0].strip() =='':strv=strv[1:] + # get the first occurence for parameters definitions + ix_ = 0 ; + for ix , value in enumerate (strv): + if (value.lower().find(':param') >=0) or (value.lower( + ).find('parameters')>=0): + ix_ = ix ; break + # Put all explanations in the same level + # before the parameters + for k in range(ix_ +1): + strv[k]= strv[k].strip() + + for ii, initem in enumerate (strv): + for v in self.insert_: + if initem.lower().find(v)>=0: + initem= initem.strip() + strv[ii]= initem + break + + if '--' in initem or (':' in initem and len(initem) < 50) : + strv[ii]= initem.strip() + elif (initem.lower() not in self.insert_) and ii > ix_: + strv[ii]=' ' + initem.strip() + + return '\n'.join(strv) + + # get the doctring from the main func0 + func0_dstr = self.func0.__doc__ + # select the first occurence and remove '----' if exists + if self.from_ is None: + warnings.warn('Argument `from_` is missing. Should be the first' + f' word of {self.func0.__name__!r} doctring.') + self.from_ = func0_dstr.split()[0] + + from_ix = func0_dstr.find(self.from_) + func0_dstr = func0_dstr [from_ix:] + # remove the first occurence of the from_ value and --- under if exists. + # in the case where from =':param' remove can be set to False + if self.remove: + func0_dstr = func0_dstr.replace(self.from_, '', 1).replace('-', '') + # get the index of 'to' or set None if not given + # now we are selected the part and append to the + # existing doc func where do you want to insert + to_ix = func0_dstr.find (self.to ) if self.to is not None else None + func0_dstr= func0_dstr [:to_ix if to_ix >=0 else None] + + if self.insert.lower() not in (self.insert_): + warnings.warn(f"It's seems the given {self.insert!r} for docstring" + f" insertion is missing to {self.insert_} list") + + in_ix = self._func.__doc__.lower().find(self.insert.lower()) + # assert whether the given value insert from exists . + if in_ix < 0 : + warnings.warn(f"Insert {self.insert!r} value is not found in the " + "{'class' if inspect.isclass(self._func) else 'function'") + # split the string with `\n` + # and loop to find the first occurence + # by default skip the next item which could be '----' + # and insert to the list next point + func0_dstr = func0_dstr.split('\n') + finalstr = self._func.__doc__.split('\n') + + rpop(func0_dstr) + func0_dstr = '\n'.join(func0_dstr) + for ii, oc in enumerate(finalstr) : + if oc.lower().find(self.insert.lower()) >=0 : + finalstr.insert (ii+2, func0_dstr) + finalstr = '\n'.join(finalstr);break + + setattr(func, '__doc__', sanitize_docstring (finalstr)) + + return func + +class docSanitizer: + """Decorator to clean the doctring and set all values of sections to + the same level. + + It sanitizes the doctring for the use of sphinx documentation. + + Examples + -------- + >>> from kalfeat.decorators import docSanitizer + >>> def messdocfunc(): + ... '''My doctring is mess. I need to be polished and well arranged. + ... + ... Im here to sanitize the mess doctring. + ... + ... Parameters + ... ---------- + ... * args: list, + ... Collection of the positional arguments + ... ** kwargs: dict + ... Collection of keywords arguments + ... + ... * kwargs: list, + ... Collection of the keyword arguments + ... + ... Warnings + ... -------- + ... Let check for warnings string ... + ... + ... ''' + ... pass + >>> cleandocfunc = docSanitizer()(messfocfunc) + >>> print(cleandocfunc.__doc__) + ... ''' + ... My doctring is mess. I need to be polished and well arranged. + ... + ... Parameters + ... ---------- + ... * args: list, + ... Collection of the positional arguments + ... ** kwargs: dict + ... Collection of keywords arguments + ... * kwargs: list, + ... Collection of the keyword arguments + ... ''' + + """ + + insert_= ('parameters','returns','raises', 'examples','notes', + 'references', 'see also', 'warnings', ':param', ':rtype', + ) + + def __call__(self, func): + + func =copy.deepcopy(func) + docstring = copy.deepcopy(func.__doc__) + + if isinstance(docstring , str): + docstring = docstring .split('\n') + # remove the '' in the first string + if docstring [0].strip() =='':docstring =docstring [1:] + # get the first occurence for parameters definitions + # and separate the doctring into two parts: descriptions + #and corpus doctring as the remainings + + ix_ = 0 + for ix , value in enumerate (docstring ): + if (value.lower().find(':param') >=0) or (value.lower( + ).find('parameters')>=0): + ix_ = ix ; break + + #--> sanitize the descriptions part + description =docstring [: ix_] ; + # before the parameters + for k in range(len(description)): + description [k]= description [k].strip() + # remove at the end of description the blanck space '\n' + description = description[:-1] if description[-1].strip( + )== '' else description + + # --> work with the corpus docstrings + # get indexes for other sections and removes spaces + docstring = docstring [ix_:] + rpop (docstring) + ixb = len(docstring) + for ind , values in enumerate (docstring): + if values.lower().strip() in ( + 'examples', 'see also', 'warnings', + 'notes', 'references'): + ixb = ind ; break + # all values in same level + for k in range(ixb, len(docstring)): + docstring [k]= docstring [k].strip() + for ii, initem in enumerate (docstring ): + for v in self.insert_: + if initem.lower().find(v)>=0: + initem= initem.strip() + docstring [ii]= initem + break + if '--' in initem or ( + ':' in initem and len(initem) < 50 + ) or ix_>=ixb : + docstring [ii]= initem.strip() + elif (initem.lower() not in self.insert_ + ) and ix_< ii < ixb: + docstring [ii]=' ' + initem.strip() + # add blanck line from indexes list ixs + ixs=list() + for k, item in enumerate (docstring): + for param in self.insert_[:-2]: + if item.lower().strip() == param: + ixs.append(k) + break + ki =0 + for k in ixs : + docstring.insert (k+ki, '') + ki+=1 # add number of insertions + + # --> combine the descriptions and docstring and set attributes + setattr(func, '__doc__' , '\n'.join(description + docstring )) + + return func + +############################################################################### +# decorators utilities +def rpop(listitem): + """ remove all blank line in the item list. + :param listitem: list- list of the items and pop all + the existing blanck lines. """ + # now pop all the index for blanck line + isblanck = False + for ii, item in enumerate (listitem) : + if item.strip()=='': + listitem.pop(ii) + isblanck =True + return rpop(listitem) if isblanck else False + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kalfeat/documentation.py b/kalfeat/documentation.py new file mode 100644 index 0000000..c4f96a5 --- /dev/null +++ b/kalfeat/documentation.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 +""" +.. |ohmS| replace:: Pseudo-area of fractured zone +.. |sfi| replace:: Pseudo-fracturing index +.. |VES| replace:: Vertical Electrical Sounding +.. |ERP| replace:: Electrical Resistivity Profiling + +.. _Bagoue region: https://en.wikipedia.org/wiki/Bagou%C3%A9 + +.. _Dieng et al: http://documents.irevues.inist.fr/bitstream/handle/2042/36362/2IE_2004_12_21.pdf?sequence=1 + +.. _FlowRatePredictionUsingSVMs: https://agupubs.onlinelibrary.wiley.com/doi/epdf/10.1029/2021WR031623 + +.. _GeekforGeeks: https://www.geeksforgeeks.org/style-plots-using-matplotlib/#:~:text=Matplotlib%20is%20the%20most%20popular,without%20using%20any%20other%20GUIs + +.. _IUPAC nommenclature: https://en.wikipedia.org/wiki/IUPAC_nomenclature_of_inorganic_chemistry + +.. _Matplotlib scatter: https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.scatter.html +.. _Matplotlib plot: https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.plot.html +.. _Matplotlib pyplot: https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.plot.html +.. _Matplotlib figure: https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.figure.html +.. _Matplotlib figsuptitle: https://matplotlib.org/3.5.0/api/_as_gen/matplotlib.pyplot.suptitle.html + +.. _Properties of water: https://en.wikipedia.org/wiki/Properties_of_water#Electrical_conductivity +.. _pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html +.. _pandas Series: https://pandas.pydata.org/docs/reference/api/pandas.Series.html + +.. _scipy.optimize.curve_fit: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html + +.. _Water concept: https://en.wikipedia.org/wiki/Water +.. _Water triple point: https://en.wikipedia.org/wiki/Properties_of_water#/media/File:Phase_diagram_of_water.svg +.. _kalfeat: https://github.com/WEgeophysics/kalfeat/ + +""" + diff --git a/kalfeat/exceptions.py b/kalfeat/exceptions.py new file mode 100644 index 0000000..8cd3174 --- /dev/null +++ b/kalfeat/exceptions.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +class ArgumentError(Exception): + pass +class SiteError(Exception): + pass + +class DatasetError(Exception): + pass + +class EDIError(Exception): + pass + +class HeaderError(Exception): + pass + +class ConfigurationError(Exception): + pass + +class FileHandlingError(Exception): + pass + +class TipError(Exception): + pass + +class PlotError(Exception): + pass + +class ParameterNumberError(Exception): + pass + +class ProcessingError(Exception): + pass + +class ResistivityError(Exception): + pass + +class StationError(Exception): + pass + +class FeatureError(Exception): + pass + +class EstimatorError(Exception): + pass + +class GeoArgumentError(Exception): + pass + +class HintError(Exception): + pass + +class ERPError(Exception): + pass + +class VESError(Exception): + pass + +class ExtractionError(Exception): + pass + + +class CoordinateError (Exception): + pass + +class TopModuleError (Exception): + pass +class FitError (Exception): + pass + +class GISError (Exception): + pass \ No newline at end of file diff --git a/kalfeat/klog.yml b/kalfeat/klog.yml new file mode 100644 index 0000000..c22a487 --- /dev/null +++ b/kalfeat/klog.yml @@ -0,0 +1,47 @@ +version: 1 +disable_existing_loggers: False +formatters: + simple: + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + datefmt: "%Y-%m-%dT %H:%M:%S %p" + +handlers: + console: # screen display print out + class: logging.StreamHandler + level: DEBUG #INFO + formatter: simple + stream: ext://sys.stdout + + info_file_handler: + class: logging.handlers.RotatingFileHandler + level: DEBUG + formatter: simple + filename: kalfeat/klogfiles/infos.log + maxBytes: 1000000 # 1MB + backupCount: 2 + encoding: utf8 + + error_file_handler: + class: logging.handlers.RotatingFileHandler + level: ERROR + formatter: simple + filename: kalfeat/klogfiles/errors.log + maxBytes: 1000000 # 1MB + backupCount: 2 + encoding: utf8 + +loggers: + my_module: + level: DEBUG + handlers: [console,info_file_handler, error_file_handler] + propagate: no + + __main__: + level: DEBUG + handlers: [console,info_file_handler, error_file_handler] + propagate: no + +root: + level: DEBUG + handlers: [console, info_file_handler, error_file_handler] + diff --git a/kalfeat/methods/__init__.py b/kalfeat/methods/__init__.py new file mode 100644 index 0000000..b1a208c --- /dev/null +++ b/kalfeat/methods/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +from .dc import ( + ResistivityProfiling , + VerticalSounding +) + diff --git a/kalfeat/methods/dc.py b/kalfeat/methods/dc.py new file mode 100644 index 0000000..4fec40b --- /dev/null +++ b/kalfeat/methods/dc.py @@ -0,0 +1,763 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + +from __future__ import annotations + +import os +import copy +import warnings +import numpy as np +import pandas as pd + +from ..documentation import __doc__ +from ..tools.funcutils import ( + repr_callable_obj, + smart_format, + smart_strobj_recognition + ) +from ..tools.coreutils import ( + _assert_station_positions, + defineConductiveZone, + fill_coordinates, + erpSelector, + vesSelector, + +) +from ..tools.exmath import ( + shape, + type_, + power, + magnitude, + sfi, + ohmicArea, + invertVES, + ) +from ..decorators import refAppender +from ..typing import ( + List, + Optional, + NDArray, + Series , + DataFrame, + + ) +from ..property import( + ElectricalMethods + ) +from .._kalfeatlog import kalfeatlog +from ..exceptions import ( + FitError, + VESError + ) + + +@refAppender(__doc__) +class ResistivityProfiling(ElectricalMethods): + """ Class deals with the Electrical Resistivity Profiling (ERP). + + The electrical resistivity profiling is one of the cheap geophysical + subsurface imaging method. It is most preferred to find groundwater during + the campaigns of drinking water supply, especially in developing countries. + Commonly, it is used in combinaision with the the vertical electrical + sounding |VES| to speculated about the layer thickesses and the existence of + the fracture zone. + + Arguments + ---------- + + **station**: str + Station name where the drilling is expected to be located. The + station should numbered from 1 not 0. So if ``S00` is given, the + station name should be set to ``S01``. Moreover, if `dipole` value + is set as keyword argument,i.e. the station is named according + to the value of the dipole. For instance for `dipole` equals to + ``10m``, the first station should be ``S00``, the second ``S10`` , + the third ``S20`` and so on. However, it is recommend to name the + station using counting numbers rather than using the dipole + position. + + **dipole**: float + The dipole length used during the exploration area. + + **auto**: bool + Auto dectect the best conductive zone. If ``True``, the station + position should be the position `station` of the lower + resistivity value in |ERP|. + + **kws**: dict + Additional |ERP| keywords arguments + + Examples + -------- + >>> from kalfeat.methods.dc import ResistivityProfiling + >>> rObj = ResistivityProfiling(AB= 200, MN= 20,station ='S7') + >>> rObj.fit('data/erp/testunsafedata.csv') + >>> rObj.sfi_ + ... array([0.03592814]) + >>> rObj.power_, robj.position_zone_ + ... 90, array([ 0, 30, 60, 90]) + >>> rObj.magnitude_, rObj.conductive_zone_ + ... 268, array([1101, 1147, 1345, 1369], dtype=int64) + >>> rObj.dipole + ... 30 + + """ + + def __init__ (self, + station: str | None = None, + dipole: float = 10., + auto: bool = False, + **kws): + super().__init__(**kws) + + self._logging = kalfeatlog.get_kalfeat_logger(self.__class__.__name__) + self.dipole=dipole + self.station=station + self.auto=auto + + for key in list( kws.keys()): + setattr(self, key, kws[key]) + + + + def fit(self, data : str | NDArray | Series | DataFrame , + columns: str | List [str] = None, + **kws + ) -> object: + """ Fitting the :class:`~.ResistivityProfiling` + and populate the class attributes. + + Parameters + ---------- + **data**: Path-like obj, Array, Series, Dataframe. + Data containing the the collected resistivity values in + survey area. + + **columns**: list, + Only necessary if the `data` is given as an array. No need to + to explicitly defin when `data` is a dataframe or a Pathlike + object. + + **kws**: dict, + Additional keyword arguments; e.g. to force the station to + match at least the best minimal resistivity value in the + whole data collected in the survey area. + + Returns + ------- + object instanciated for chaining methods. + + Notes + ------ + The station should numbered from 1 not 0. So if ``S00` is given, the + station name should be set to ``S01``. Moreover, if `dipole` value is + set as keyword argument, i.e. the station is named according to the + value of the dipole. For instance for `dipole` equals to ``10m``, + the first station should be ``S00``, the second ``S10``, the third + ``S20`` and so on. However, it is recommend to name the station using + counting numbers rather than using the dipole position. + + """ + + self._logging.info('`Fit` method from {self.__class__.__name__!r}' + ' is triggered ') + if isinstance(data, str): + if not os.path.isfile (data): + raise TypeError ( f'{data!r} object should be a file,' + f' got {type(data).__name__!r}' + ) + + data = erpSelector(data, columns) + self.data_ = copy.deepcopy(data) + + self.data_, self.utm_zone = fill_coordinates( + self.data_, utm_zone= self.utm_zone, + datum = self.datum , epsg= self.epsg ) + self.resistivity_ = self.data_.resistivity + # convert app.rho to the concrete value + # if log10 rho are provided. + if self.fromlog10: + self.resistivity_ = np.power( + 10, self.resistivity_) + if self.verbose > 7 : + print("Resistivity profiling data should be overwritten to " + " take the concrete values rather than log10(ohm.meters)" + ) + self.data_['resistivity'] = self.resistivity_ + + self._logging.info(f'Retrieving the {self.__class__.__name__!r} ' + ' components and recompute the coordinate values...') + + self.position_ = self.data_.station + self.lat_ = self.data_.latitude + self.lon_= self.data_.longitude + self.east_ = self.data_.easting + self.north_ = self.data_.northing + + if self.verbose > 7: + print(f'Compute {self.__class__.__name__!r} parameter numbers.' ) + + self._logging.info(f'Assert the station {self.station!r} if given' + 'or auto-detected otherwise.') + + # assert station and use the automatic station detection + ########################################################## + if self.auto and self.station is not None: + warnings.warn ( + f"Station {self.station!r} is given while 'auto' is 'True'." + " Only the auto-detection is used instead...", UserWarning) + + self.station = None + + if self.station is None: + if not self.auto: + warnings.warn("Station number is missing! By default the " + "automatic-detection should be triggered.") + self.auto = True + + if self.station is not None: + if self.verbose > 7 : + print("Assert the given station and recomputed the array position." + ) + self._logging.warn( + f'Station value {self.station!r} in the given data ' + 'should be overwritten...') + + # recompute the position and dipolelength + self.position_, self.dipole = _assert_station_positions( + df = self.data_, **kws) + self.data_['station'] = self.position_ + + ############################################################ + # Define the selected anomaly (conductive_zone ) + # ix: s the index of drilling point in the selected + # conductive zone while + # pos: is the index of drilling point in the whole + # survey position + self.conductive_zone_, self.position_zone_, ix, pos, =\ + defineConductiveZone( + self.resistivity_, + s= self.station, + auto = self.auto, + #keep Python numbering index (from 0 ->...), + keepindex = True, + + # No need to implement the dipole computation + # for detecting the sation position if the + # station is given + # dipole =self.dipole if self.station is None else None, + + p = self.position_ + ) + + if self.verbose >7 : + print('Compute the property values at the station location ' + ' expecting for drilling location <`sves`> at' + f' position {str(pos+1)!r}') + + # Note that `sves` is the station location expecting to + # hold the drilling operations at this point. It is considered + # as the select point of the conductive zone. + self.sves_ = f'S{pos:03}' + + self._logging.info ('Loading main params value from the expecting' + f' drilling location: {self.sves_!r}') + + self.sves_lat_ = self.lat_[pos] + self.sves_lon_= self.lon_[pos] + self.sves_east_ = self.east_[pos] + self.sves_north_= self.north_[pos] + self.sves_resistivity_= self.resistivity_[pos] + + # Compute the predictor parameters + self.power_ = power(self.position_zone_) + self.shape_ = shape(self.conductive_zone_ , + s= ix , + p= self.position_) + self.magnitude_ = magnitude(self.conductive_zone_) + self.type_ = type_ (self.resistivity_) + self.sfi_ = sfi(cz = self.conductive_zone_, + p = self.position_zone_, + s = ix, + dipolelength= self.dipole + ) + + if self.verbose > 7 : + pn = ('type', 'shape', 'magnitude', 'power' , 'sfi') + print(f"Parameter numbers {smart_format(pn)}" + " were successfully computed.") + + return self + + def summary(self, keeponlyparams: bool = False) -> DataFrame : + """ Summarize the most import parameters for prediction purpose. + + If `keeponlyparams` is set to ``True``. Method should output only + the main important params for prediction purpose... + + """ + + try: + getattr(self, 'type_'); getattr(self, 'sfi_') + except FitError: + raise FitError( + "Can't call the method 'summary' without fitting the" + f" {self.__class__.__name__!r} object first.") + + usefulparams = ( + 'station','dipole', 'sves_lon_', 'sves_lat_','sves_east_', + 'sves_north_', 'sves_resistivity_', 'power_', 'magnitude_', + 'shape_','type_','sfi_') + + table_= pd.DataFrame ( + {f"{k[:-1] if k.endswith('_') else k }": getattr(self, k , np.nan ) + for k in usefulparams}, index=range(1) + ) + table_.station = self.sves_ + table_.set_index ('station', inplace =True) + table_.rename (columns= {'sves_lat':'latitude', 'sves_lon':'longitude', + 'sves_east':'easting', 'sves_north':'northing'}, + inplace =True) + if keeponlyparams: + table_.reset_index(inplace =True ) + table_.drop( + ['station', 'dipole', 'sves_resistivity', + 'latitude', 'longitude', 'easting', 'northing'], + axis='columns', inplace =True ) + + return table_ + + + def __repr__(self): + """ Pretty format for programmer guidance following the API... """ + return repr_callable_obj (self) + + + def __getattr__(self, name): + if name.endswith ('_'): + if name not in self.__dict__.keys(): + if name in ('data_', 'resistivity_', 'lat_', 'lon_', + 'easting_', 'northing_', 'sves_lon_', 'sves_lat_', + 'sves_east_', 'sves_north_', 'sves_resistivity_', + 'power_', 'magnitude_','shape_','type_','sfi_'): + raise FitError ( + f'Fit the {self.__class__.__name__!r} object first' + ) + + rv = smart_strobj_recognition(name, self.__dict__, deep =True) + appender = "" if rv is None else f'. Do you mean {rv!r}' + + raise AttributeError ( + f'{self.__class__.__name__!r} object has no attribute {name!r}' + f'{appender}{"" if rv is None else "?"}' + ) + + +@refAppender(__doc__) +class VerticalSounding (ElectricalMethods): + """ + Vertical Electrical Sounding (VES) class; inherits of ElectricalMethods + base class. + + The VES is carried out to speculate about the existence of a fracture zone + and the layer thicknesses. Commonly, it comes as supplement methods to |ERP| + after selecting the best conductive zone when survey is made on + one-dimensional. + + Arguments + ----------- + + **fromS**: float + The depth in meters from which one expects to find a fracture zone + outside of pollutions. Indeed, the `fromS` parameter is used to + speculate about the expected groundwater in the fractured rocks + under the average level of water inrush in a specific area. For + instance in `Bagoue region`_ , the average depth of water inrush + is around ``45m``.So the `fromS` can be specified via the water inrush + average value. + + **rho0**: float + Value of the starting resistivity model. If ``None``, `rho0` should be + the half minumm value of the apparent resistivity collected. Units is + in Ω.m not log10(Ω.m) + + **h0**: float + Thickness in meter of the first layers in meters.If ``None``, it + should be the minimum thickess as possible ``1.m`` . + + **strategy**: str + Type of inversion scheme. The defaut is Hybrid Monte Carlo (HMC) known + as ``HMCMC``. Another scheme is Bayesian neural network approach (``BNN``). + + **vesorder**: int + The index to retrieve the resistivity data of a specific sounding point. + Sometimes the sounding data are composed of the different sounding + values collected in the same survey area into different |ERP| line. + For instance: + + +------+------+----+----+----+----+----+ + | AB/2 | MN/2 |SE1 | SE2| SE3| ...|SEn | + +------+------+----+----+----+----+----+ + + Where `SE` are the electrical sounding data values and `n` is the + number of the sounding points selected. `SE1`, `SE2` and `SE3` are + three points selected for |VES| i.e. 3 sounding points carried out + either in the same |ERP| or somewhere else. These sounding data are + the resistivity data with a specific numbers. Commonly the number + are randomly chosen. It does not refer to the expected best fracture + zone selected after the prior-interpretation. After transformation + via the function :func:`~kalfeat.tools.coreutils.vesSelector`, the header + of the data should hold the `resistivity`. For instance, refering to + the table above, the data should be: + + +----+----+-------------+-------------+-------------+-----+ + | AB | MN |resistivity | resistivity | resistivity | ... | + +----+----+-------------+-------------+-------------+-----+ + + Therefore, the `vesorder` is used to select the specific resistivity + values i.e. select the corresponding sounding number of the |VES| + expecting to locate the drilling operations or for computation. For + esample, `vesorder`=1 should figure out: + + +------+------+----+--------+----+----+------------+ + | AB/2 | MN/2 |SE2 | --> | AB | MN |resistivity | + +------+------+----+--------+----+----+------------+ + + If `vesorder` is ``None`` and the number of sounding curves are more + than one, by default the first sounding curve is selected ie + `rhoaIndex` equals to ``0`` + + **typeofop**: str + Type of operation to apply to the resistivity + values `rhoa` of the duplicated spacing points `AB`. The *default* + operation is ``mean``. Sometimes at the potential electrodes ( `MN` ),the + measurement of `AB` are collected twice after modifying the distance + of `MN` a bit. At this point, two or many resistivity values are + targetted to the same distance `AB` (`AB` still remains unchangeable + while while `MN` is changed). So the operation consists whether to the + average ( ``mean`` ) resistiviy values or to take the ``median`` values + or to ``leaveOneOut`` (i.e. keep one value of resistivity among the + different values collected at the same point `AB` ) at the same spacing + `AB`. Note that for the ``LeaveOneOut``, the selected + resistivity value is randomly chosen. + + **objective**: str + Type operation to output. By default, the function outputs the value + of pseudo-area in :math:`$ohm.m^2$`. However, for plotting purpose by + setting the argument to ``view``, its gives an alternatively outputs of + X and Y, recomputed and projected as weel as the X and Y values of the + expected fractured zone. Where X is the AB dipole spacing when imaging + to the depth and Y is the apparent resistivity computed. + + **kws**: dict + Additionnal keywords arguments from |VES| data operations. + See :func:`kalfeat.tools.exmath.vesDataOperator` for futher details. + + See also + --------- + `Kouadio et al 2022 `_ + + References + ---------- + *Koefoed, O. (1970)*. A fast method for determining the layer distribution + from the raised kernel function in geoelectrical sounding. Geophysical + Prospecting, 18(4), 564–570. https://doi.org/10.1111/j.1365-2478.1970.tb02129.x . + + *Koefoed, O. (1976)*. Progress in the Direct Interpretation of Resistivity + Soundings: an Algorithm. Geophysical Prospecting, 24(2), 233–240. + https://doi.org/10.1111/j.1365-2478.1976.tb00921.x . + + + Examples + -------- + >>> from kalfeat.methods import VerticalSounding + >>> from kalfeat.tools import vesSelector + >>> vobj = VerticalSounding(fromS= 45, vesorder= 3) + >>> vobj.fit('data/ves/ves_gbalo.xlsx') + >>> vobj.ohmic_area_ # in ohm.m^2 + ... 349.6432550517697 + >>> vobj.nareas_ # number of areas computed + ... 2 + >>> vobj.area1_, vobj.area2_ # value of each area in ohm.m^2 + ... (254.28891096053943, 95.35434409123027) + >>> vobj.roots_ # different boundaries in pairs + ... [array([45. , 57.55255255]), array([ 96.91691692, 100. ])] + >>> data = vesSelector ('data/ves/ves_gbalo.csv', index_rhoa=3) + >>> vObj = VerticalSounding().fit(data) + >>> vObj.fractured_zone_ # AB/2 position from 45 to 100 m depth. + ... array([ 45., 50., 55., 60., 70., 80., 90., 100.]) + >>> vObj.fractured_zone_resistivity_ + ...array([57.67588974, 61.21142365, 64.74695755, 68.28249146, 75.35355927, + 82.42462708, 89.4956949 , 96.56676271]) + >>> vObj.nareas_ + ... 2 + >>> vObj.ohmic_area_ + ... 349.6432550517697 + + """ + + def __init__(self, + fromS: float = 45., + rho0: float = None, + h0 : float = 1., + strategy: str = 'HMCMC', + vesorder: int = None, + typeofop: str = 'mean', + objective: Optional[str] = 'coverall', + **kws) -> None : + super().__init__(**kws) + + self._logging = kalfeatlog.get_kalfeat_logger(self.__class__.__name__) + self.fromS=fromS + self.vesorder=vesorder + self.typeofop=typeofop + self.objective=objective + self.rho0=rho0, + self.h0=h0 + self.strategy = strategy + + for key in list( kws.keys()): + setattr(self, key, kws[key]) + + + def fit(self, data: str | DataFrame, **kwd ): + """ Fit the sounding |VES| curves and computed the ohmic-area and set + all the features for demarcating fractured zone from the selected + anomaly. + + Parameters + ----------- + data: Path-like object, DataFrame + The string argument is a path-like object. It must be a valid file + wich encompasses the collected data on the field. It shoud be + composed of spacing values `AB` and the apparent resistivity + values `rhoa`. By convention `AB` is half-space data i.e `AB/2`. + So, if `data` is given, params `AB` and `rhoa` should be kept to + ``None``. If `AB` and `rhoa` is expected to be inputted, user must + set the `data` to ``None`` values for API purpose. If not an error + will raise. Or the recommended way is to use the `vesSelector` tool + in :func:`kalfeat.tools.vesSelector` to buid the |VES| data before + feeding it to the algorithm. See the example below. + + AB: array-like + The spacing of the current electrodes when exploring in deeper. Units + are in meters. Note that the `AB` is by convention equals to `AB/2`. + It's taken as half-space of the investigation depth. + + MN: array-like + Potential electrodes distances at each investigation depth. Note + by convention the values are half-space and equals to `MN/2`. + + rhoa: array-like + Apparent resistivity values collected in imaging in depth. Units + are in Ω.m not log10(Ω.m) + + readableformats: tuple + Specific readable files. The default of reading files are ``xlsx`` + and ``csv``. Other formats should be add for future release. + + Returns + ------- + object: + Useful for chaining methods. + + .. |VES| replace:: Vertical Electrical Sounding + + """ + + def prettyprinter (n, r,v): + """ Display some details when verbose is higher... + + :param n: int : number of areas + :param r: array-like. Pair values of integral bounds (-inf, +inf) + :param v: array-float - values of pseudo-areas computed. """ + print('=' * 73 ) + print('| {0:^15} | {1:>15} | {2:>15} | {3:>15} |'.format( + 'N-area', 'lb:-AB/2 (m)','ub:-AB/2(m)', 'ohmS (Ω.m^2)' )) + print('=' * 73 ) + for ii in range (n): + print('| {0:^15} | {1:>15} | {2:>15} | {3:>15} |'.format( + ii+1, round(r[ii][0]), round(r[ii][1]), round(v[ii], 3))) + print('-'*73) + + self._logging.info (f'`Fit` method from {self.__class__.__name__!r}' + ' is triggered') + + if self.verbose >= 7 : + print(f'Range {str(self.vesorder)!r} of resistivity data of the ' + 'sshould be selected as the main sounding data. ') + self.data_ = vesSelector( + data = data, index_rhoa= self.vesorder, **kwd ) + self.max_depth_ = self.data_.AB.max() + + if self.fromlog10: + self.resistivity_ = np.power( + 10, self.resistivity_) + if self.verbose > 7 : + print("Sounding resistivity data should be converted to " + "the concrete resistivity values (ohm.meters)" + ) + self.data_['resistivity'] = self.resistivity_ + + if self.fromS >= self.max_depth_ : + raise VESError( + " Process of the depth monitoring is aborted! The searching" + f" point of param 'fromS'<{self.fromS}m> ' is expected to " + f" be less than the maximum depth <{self.max_depth_}m>.") + + if self.verbose >= 3 : + print("Pseudo-area should be computed from AB/2 ={str(self.fromS)}" + f" to {self.max_depth_} meters. " + ) + r = ohmicArea( data = self.data_ , sum = False, ohmSkey = self.fromS, + objective = self.objective , typeofop = self.typeofop, + ) + self._logging.info(f'Populating {self.__class__.__name__!r} property' + ' attributes.') + oc, gc = r + + ohmS, self.err_, self.roots_ = list(oc) + self.nareas_ = len(ohmS) + + self._logging.info(f'Setting the {self.nareas_} pseudo-areas calculated.') + for ii in range(self.nareas_): + self.__setattr__(f"area{ii+1}_", ohmS[ii]) + + self.roots_ = np.split(self.roots_, len(self.roots_)//2 ) if len( + self.roots_) > 2 else [np.array(self.roots_)] + + if self.verbose >= 7 : + prettyprinter(n= self.nareas_, r= self.roots_, v= ohmS) + + self.ohmic_area_= sum(ohmS) # sum the different spaces + + self.XY_ , _, self.XYarea_ = list(gc) + self.AB_ = self.XY_[:, 0] + self.resistivity_ = self.XY_[:, 1] + self.fractured_zone_= self.XYarea_[:, 0] + self.fractured_zone_resistivity_ = self.XYarea_[:, 1] + + if self.verbose > 7 : + print("The Parameter numbers were successfully computed.") + return self + + def summary(self, keeponlyparams: bool = False) -> DataFrame : + """ Summarize the most import features for prediction purpose. + + If `keeponlyparams` is set to ``True``. Method should output only + the main important params for prediction purpose... + """ + + try: + getattr(self, 'ohmic_area_'); getattr(self, 'fz_') + except FitError: + raise FitError( + "Can't call the method 'summary' without fitting the" + f" {self.__class__.__name__!r} object first.") + + usefulparams = ( + 'area', 'AB','MN', 'arrangememt','utm_zone', 'objective', 'rho0', + 'h0', 'fromS', 'max_depth_', 'ohmic_area_', 'nareas_') + + table_= pd.DataFrame ( + {f"{k}": getattr(self, k , np.nan ) + for k in usefulparams}, index=range(1) + ) + table_.area = self.area + table_.set_index ('area', inplace =True) + table_.rename (columns= { + 'max_depth_':'max_depth', + 'ohmic_area_':'ohmic_area', + 'nareas_':'nareas' + }, + inplace =True) + if keeponlyparams: + table_.reset_index(inplace =True ) + table_.drop( + [ el for el in list(table_.columns) if el !='ohmic_area'], + axis='columns', inplace =True + ) + + return table_ + + def invert( self, data: str | DataFrame , strategy=None, **kwd): + """ Invert1D the |VES| data collected in the exporation area. + + :param data: Dataframe pandas - contains the depth measurement AB from + current electrodes, the potentials electrodes MN and the collected + apparents resistivities. + + :param rho0: float - Value of the starting resistivity model. If ``None``, + `rho0` should be the half minumm value of the apparent resistivity + collected. Units is in Ω.m not log10(Ω.m) + :param h0: float - Thickness in meter of the first layers in meters. + If ``None``, it should be the minimum thickess as possible ``1.``m. + + :param strategy: str - Type of inversion scheme. The defaut is Hybrid Monte + Carlo (HMC) known as ``HMCMC``. Another scheme is Bayesian neural network + approach (``BNN``). + + :param kwd: dict - Additionnal keywords arguments from |VES| data + operations. See :doc:`kalfeat.utils.exmath.vesDataOperator` for futher + details. + + .. |VES| replace: Vertical Electrical Sounding + + """ + self.data_ = getattr(self, 'data_', None) + if self.data_ is None: + raise FitError(f'Fit the {self.__class__.__name__!r} object first') + + # invert data + #XXX TODO + if strategy is not None: + self.strategy = strategy + + invertVES(data= self.data_, h0 = self.h0 , rho0 = self.rho0, + typeof = self.strategy , **kwd) + + return self + + def __repr__(self): + """ Pretty format for programmer following the API... """ + return repr_callable_obj(self) + + def __getattr__(self, name): + if name.endswith ('_'): + if name not in self.__dict__.keys(): + if name in ('data_', 'resistivity_', 'ohmic_area_', 'err_', + 'roots_', 'XY_', 'XYarea_', 'AB_','resistivity_', + 'fractured_zone_', 'fractured_zone__resistivity_'): + raise FitError ( + f'Fit the {self.__class__.__name__!r} object first' + ) + + rv = smart_strobj_recognition(name, self.__dict__, deep =True) + appender = "" if rv is None else f'. Do you mean {rv!r}' + + raise AttributeError ( + f'{self.__class__.__name__!r} object has no attribute {name!r}' + f'{appender}{"" if rv is None else "?"}' + ) + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kalfeat/property.py b/kalfeat/property.py new file mode 100644 index 0000000..ba69add --- /dev/null +++ b/kalfeat/property.py @@ -0,0 +1,836 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + + +""" +`kalfeat`_ property +===================== + +**Water**: Base class module. It contains all the water properties usefull + for pure hydrogeological module writting. Instanciated the class should + raise an error, however, its special attributes can be used by the child + class object. + +**BasePlot**: The base class of all plots. It can be the parent class of all + other plotting classes. The module :mod:`~.view.plot` uses the `BasePlot` + class for `Matplotlib plot`_. + +**P**: Is a property class that handles the |ERP| and |VES| attributes. Along + the :mod:`~.methods.electrical`, it deals with the electrical dipole + arrangements, the data classsification and assert whether it is able to + be read by the scripts. It is a lind of "asserter". Accept data or reject + data provided by the used indicated the way to sanitize it before feeding + to the algorithm:: + + >>> from kalfeat.property import P + >>> pObj = P() + >>> P.idictags + ... + >>> pObj.idicttags + ... {'station': ['pk', 'sta', 'pos'], + ... 'resistivity': ['rho', 'app', 'res', 'se', 'sounding.values'], + ... 'longitude': ['long', 'lon'], + ... 'latitude': ['lat'], + ... 'easting': ['east', 'x'], + ... 'northing': ['north', 'y']} + >>> rphead = ['res', 'x', 'y', ''] + >>> pObj (rphead) # sanitize the given resistivity profiling head data. + ... ['resistivity', 'easting', 'northing'] + >>> rphead = ['lat', 'x', 'rho', ''] + ... ['latitude', 'easting', 'resistivity'] + >>> rphead= ['pos', 'x', 'lon', 'north', 'latitud', 'app.res' ] + >>> pObj (rphead) + ... ['station', 'easting', 'longitude', 'northing', 'latitude', 'resistivity'] + >>> # --> for sounding head assertion + >>> vshead=['ab', 's', 'rho', 'potential'] + >>> pObj (vshead, kind ='ves') + ... ['AB', 'resistivity'] # in the list of vshead, + ... # only 'AB' and 'resistivity' columns are recognized. + +**ElectricalMethods**: Is another Base class of :mod:`~.methods.dc` + especially the :class:`~.methods.dc.ResistivityProfiling` and + :class:`~.methods.electrical.VerticalSounding`. It is composed of the + details of geolocalisation of the survey area and the array configuration. + It expects to hold other attributes as the development is still ongoing. + +.. _kalfeat: https://github.com/WEgeophysics/kalfeat/ +.. |ERP| replace:: Electrical resistivity profiling +.. |VES| replace:: Vertical Electrical Sounding + +""" +# import warnings +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, + ) + +from .decorators import refAppender +from .documentation import __doc__ + + + +__all__ = [ + "Water", + 'P', + "ElectricalMethods", + "assert_arrangement" +] + +array_configuration ={ + 1 : ( + ['Schlumberger','AB>> MN','slbg'], + 'S' + ), + 2 : ( + ['Wenner','AB=MN'], + 'W' + ), + 3: ( + ['Dipole-dipole','dd','ABMN','MNAB'], + 'DD' + ), + 4: ( + ['Gradient-rectangular','[AB]MN', 'MN[AB]','[AB]'], + 'GR' + ) + } + + +utm_zone_designator ={ + 'X':[72,84], + 'W':[64,72], + 'V':[56,64], + 'U':[48,56], + 'T':[40,48], + 'S':[32,40], + 'R':[24,32], + 'Q':[16,24], + 'P':[8,16], + 'N':[0,8], + 'M':[-8,0], + 'L':[-16, 8], + 'K':[-24,-16], + 'J':[-32,-24], + 'H':[-40,-32], + 'G':[-48,-40], + 'F':[-56,-48], + 'E':[-64, -56], + 'D':[-72,-64], + 'C':[-80,-72], + 'Z':[-80,84] +} + + +@refAppender(__doc__) +class Water (ABC): + r""" Should be a SuperClass for methods classes which deals with water + properties and components. Instanciate the class shoud raise an error. + + Water (H2O) is a polar inorganic compound that is at room temperature a + tasteless and odorless liquid, which is nearly colorless apart from an + inherent hint of blue. It is by far the most studied chemical compound + and is described as the "universal solvent"and the "solvent of life". + It is the most abundant substance on the surface of Earth and the only + common substance to exist as a solid, liquid, and gas on Earth's surface. + It is also the third most abundant molecule in the universe + (behind molecular hydrogen and carbon monoxide). + + The Base class initialize arguments for different methods such as the + |ERP| and for |VES|. The `Water` should set the attributes and check + whether attributes are suitable for what the specific class expects to. + + Hold some properties informations: + + ================= ======================================================= + Property Description + ================= ======================================================= + state official names for the chemical compound r"$H_2O$". It + can be a matter of ``solid``, ``ice``, ``gaseous``, + ``water vapor`` or ``steam``. The *default* is ``None``. + taste water from ordinary sources, including bottled mineral + water, usually has many dissolved substances, that may + give it varying tastes and odors. Humans and other + animals have developed senses that enable them to + evaluate the potability of water in order to avoid + water that is too ``salty`` or ``putrid``. + The *default* is ``potable``. + odor Pure water is usually described as tasteless and odorless, + although humans have specific sensors that can feel + the presence of water in their mouths,and frogs are known + to be able to smell it. The *default* is ``pure``. + color The color can be easily observed in a glass of tap-water + placed against a pure white background, in daylight. + The **default** is ``pure white background``. + appearance Pure water is visibly blue due to absorption of light + in the region ca. 600 nm – 800 nm. The *default* is + ``visible``. + density Water differs from most liquids in that it becomes + less dense as it freezes. In 1 atm pressure, it reaches + its maximum density of ``1.000 kg/m3`` (62.43 lb/cu ft) + at 3.98 °C (39.16 °F). The *default* units and values + are ``kg/m3``and ``1.`` + magnetism Water is a diamagnetic material. Though interaction + is weak, with superconducting magnets it can attain a + notable interaction. the *default* value is + :math:`-0.91 \chi m`". Note that the magnetism + succeptibily has no unit. + capacity stands for `heat capacity`. In thermodynamics, the + specific heat capacity (symbol cp) of a substance is the + heat capacity of a sample of the substance divided by + the mass of the sample. Water has a very high specific + heat capacity of 4184 J/(kg·K) at 20 °C + (4182 J/(kg·K) at 25 °C).The *default* is is ``4182 `` + vaporization stands for `heat of vaporization`. Indeed, the enthalpy + of vaporization (symbol :math:`\delta H_{vap}`), also + known as the (latent) heat of vaporization or heat of + evaporation, is the amount of energy (enthalpy) that + must be added to a liquid substance to transform a + quantity of that substance into a gas. Water has a high + heat of vaporization i.e. 40.65 kJ/mol or 2257 kJ/kg + at the normal boiling point), both of which are a + result of the extensive hydrogen bonding between its + molecules. The *default* is ``2257 kJ/kg``. + fusion stands for `enthalpy of fusion` more commonly known as + latent heat of water is 333.55 kJ/kg at 0 °C. The + *default* is ``33.55``. + miscibility Water is miscible with many liquids, including ethanol + in all proportions. Water and most oils are immiscible + usually forming layers according to increasing density + from the top. *default* is ``True``. + condensation As a gas, water vapor is completely miscible with air so + the vapor's partial pressure is 2% of atmospheric + pressure and the air is cooled from 25 °C, starting at + about 22 °C, water will start to condense, defining the + dew point, and creating fog or dew. The *default* is the + degree of condensation set to ``22°C``. + pressure stands for `vapour pressure` of water. It is the pressure + exerted by molecules of water vapor in gaseous form + i.e. whether pure or in a mixture with other gases such + as air. The vapor pressure is given as a list from the + temperature T, 0°C (0.6113kPa) to 100°C(101.3200kPa). + *default* is ``(0.611, ..., 101.32)``. + compressibility The compressibility of water is a function of pressure + and temperature. At 0 °C, at the limit of zero pressure, + the compressibility is ``5.1x10^−10 P^{a^−1}``. + The *default* value is the value at 0 °C. + triple stands for `triple point`. The temperature and pressure + at which ordinary solid, liquid, and gaseous water + coexist in equilibrium is a triple point of water. The + `triple point` are set to (.001°C,611.657 Pa) and + (100 , 101.325kPa) for feezing (0°C) and boiling point + (100°C) points. In addition, the `triple point` can be + set as ``(20. , 101.325 kPa)`` for 20°C. By *default*, + the `triple point` solid/liquid/vapour is set to + ``(.001, 611.657 Pa )``. + melting stands for `melting point`. Water can remain in a fluid + state down to its homogeneous nucleation point of about + 231 K (−42 °C; −44 °F). The melting point of ordinary + hexagonal ice falls slightly under moderately high + pressures, by 0.0073 °C (0.0131 °F)/atm[h] or about + ``0.5 °C`` (0.90 °F)/70 atm considered as the + *default* value. + conductivity In pure water, sensitive equipment can detect a very + slight electrical conductivity of 0.05501 ± 0.0001 + μS/cm at 25.00 °C. *default* is ``.05501``. + polarity An important feature of water is its polar nature. The + structure has a bent molecular geometry for the two + hydrogens from the oxygen vertex. The *default* is + ``bent molecular geometry`` or ``angular or V-shaped``. + Other possibility is ``covalent bonds `` + ``VSEPR theory`` for Valence Shell Electron Repulsion. + cohesion stands for the collective action of hydrogen bonds + between water molecules. The *default* is ``coherent`` + for the water molecules staying close to each other. + In addition, the `cohesion` refers to the tendency of + similar or identical particles/surfaces to cling to + one another. + adhesion stands for the tendency of dissimilar particles or + surfaces to cling to one another. It can be + ``chemical adhesion``, ``dispersive adhesion``, + ``diffusive adhesion`` and ``disambiguation``. + The *default* is ``disambiguation``. + tension stands for the tendency of liquid surfaces at rest to + shrink into the minimum surface area possible. Water + has an unusually high surface tension of 71.99 mN/m + at 25 °C[63] which is caused by the strength of the + hydrogen bonding between water molecules. This allows + insects to walk on water. The *default* value is to + ``71.99 mN/m at 25 °C``. + action stands for `Capillary action`. Water has strong cohesive + and adhesive forces, it exhibits capillary action. + Strong cohesion from hydrogen bonding and adhesion + allows trees to transport water more than 100 m upward. + So the *default* value is set to ``100.``meters. + issolvent Water is an excellent solvent due to its high dielectric + constant. Substances that mix well and dissolve in water + are known as hydrophilic ("water-loving") substances, + while those that do not mix well with water are known + as hydrophobic ("water-fearing") substances. + tunnelling stands for `quantum tunneling`. It is a quantum + mechanical phenomenon whereby a wavefunction can + propagate through a potential barrier. It can be + ``monomers`` for the motions which destroy and + regenerate the weak hydrogen bond by internal rotations, + or ``hexamer`` involving the concerted breaking of two + hydrogen bond. The *default* is ``hexamer`` discovered + on 18 March 2016. + reaction stands for `acide-base reactions`. Water is + ``amphoteric`` i.e. it has the ability to act as either + an acid or a base in chemical reactions. + ionization In liquid water there is some self-ionization giving + ``hydronium`` ions and ``hydroxide`` ions. *default* is + ``hydroxide``. + earthmass stands for the earth mass ration in "ppm" unit. Water + is the most abundant substance on Earth and also the + third most abundant molecule in the universe after the + :math:`H_2 \quad \text{and} \quad CO` . The *default* + value is ``0.23``ppm of the earth's mass. + occurence stands for the abundant molecule in the Earth. Water + represents ``97.39%`` of the global water volume of + 1.38×109 km3 is found in the oceans considered as the + *default* value. + pH stands for `Potential of Hydrogens`. It also shows the + acidity in nature of water. For instance the "rain" is + generally mildly acidic, with a pH between 5.2 and 5.8 + if not having any acid stronger than carbon dioxide. At + neutral pH, the concentration of the hydroxide ion + (:math:`OH^{-}`) equals that of the (solvated) hydrogen + ion(:math:`H^{+}`), with a value close to ``10^−7 mol L^−1`` + at 25 °C. The *default* is ``7.`` or ``neutral`` or the + name of any substance `pH` close to. + nommenclature The accepted IUPAC name of water is ``oxidane`` or + simply ``water``. ``Oxidane`` is only intended to be + used as the name of the mononuclear parent hydride used + for naming derivatives of water by substituent + nomenclature. The *default* name is ``oxidane``. + ================= ======================================================= + + + See also + ---------- + Water (chemical formula H2O) is an inorganic, transparent, tasteless, + odorless, and nearly colorless chemical substance, which is the main + constituent of Earth's hydrosphere and the fluids of all known living + organisms (in which it acts as a solvent). It is vital for all known + forms of life, even though it provides neither food, energy, nor organic + micronutrients. Its chemical formula, H2O, indicates that each of its + molecules contains one oxygen and two hydrogen atoms, connected by covalent + bonds. The hydrogen atoms are attached to the oxygen atom at an angle of + 104.45°. "Water" is the name of the liquid state of H2O at standard + temperature and pressure. + + """ + + @abstractmethod + def __init__(self, + state: str = None, + taste: str = 'potable', + odor: int | str = 'pure', + appearance: str = 'visible', + color: str = 'pure white background', + capacity: float = 4184. , + vaporization: float = 2257., + fusion: float = 33.55, + density: float = 1. , + magnetism: float = -.91, + miscibility: bool =True , + condensation: float = 22, + pressure: tuple = (.6113, ..., 101.32), + compressibility: float =5.1e-10, + triple: tuple = (.001, 611.657 ), + conductivity: float = .05501, + melting: float = .5, + polarity: str ='bent molecular geometry ', + cohesion: str = 'coherent', + adhesion: str ='disambiguation', + tension: float = 71.99, + action: float = 1.e2 , + issolvent: bool =True, + reaction:str = 'amphoteric', # + ionisation:str = "hydroxide", + tunneling: str = 'hexamer' , + nommenclature: str ='oxidane', + earthmass: float =.23 , + occurence: float = .9739, + pH: float| str = 7., + ): + + self.state=state + self.taste=taste + self.odor=odor + self.appearance=appearance + self.color=color + self.capacity=capacity + self.vaporization=vaporization + self.fusion=fusion + self.density=density + self.magnetism=magnetism + self.miscibility=miscibility + self.condensation=condensation + self.pressure=pressure, + self.compressibility=compressibility + self.triple=triple + self.conductivity=conductivity + self.melting=melting + self.polarity=polarity + self.cohesion=cohesion + self.adhesion=adhesion + self.tension=tension + self.action=action + self.issolvent=issolvent + self.reaction=reaction + self.ionisation=ionisation + self.tunneling=tunneling + self.nommenclature=nommenclature + self.earthmass=earthmass + self.occurence=occurence + self.pH=pH + + +class ElectricalMethods (ABC) : + """ Base class of geophysical electrical methods + + The electrical geophysical methods are used to determine the electrical + resistivity of the earth's subsurface. Thus, electrical methods are + employed for those applications in which a knowledge of resistivity + or the resistivity distribution will solve or shed light on the problem + at hand. The resolution, depth, and areal extent of investigation are + functions of the particular electrical method employed. Once resistivity + data have been acquired, the resistivity distribution of the subsurface + can be interpreted in terms of soil characteristics and/or rock type and + geological structure. Resistivity data are usually integrated with other + geophysical results and with surface and subsurface geological data to + arrive at an interpretation. Get more infos by consulting this + `link `_ . + + + The :class:`kalfeat.methods.electrical.ElectricalMethods` compose the base + class of all the geophysical methods that images the underground using + the resistivity values. + + Holds on others optionals infos in ``kws`` arguments: + + ====================== ============== =================================== + Attributes Type Description + ====================== ============== =================================== + AB float, array Distance of the current electrodes + in meters. `A` and `B` are used + as the first and second current + electrodes by convention. Note that + `AB` is considered as an array of + depth measurement when using the + vertical electrical sounding |VES| + method i.e. AB/2 half-space. Default + is ``200``meters. + MN float, array Distance of the current electrodes + in meters. `M` and `N` are used as + the first and second potential + electrodes by convention. Note that + `MN` is considered as an array of + potential electrode spacing when + using the collecting data using the + vertical electrical sounding |VES| + method i.e MN/2 halfspace. Default + is ``20.``meters. + arrangement str Type of dipoles `AB` and `MN` + arrangememts. Can be *schlumberger* + *Wenner- alpha / wenner beta*, + *Gradient rectangular* or *dipole- + dipole*. Default is *schlumberger*. + area str The name of the survey location or + the exploration area. + fromlog10 bool Set to ``True`` if the given + resistivities values are collected + on base 10 logarithm. + utm_zone str string (##N or ##S). utm zone in + the form of number and North or South + hemisphere, 10S or 03N. + datum str well known datum ex. WGS84, NAD27, + etc. + projection str projected point in lat and lon in + Datum `latlon`, as decimal degrees + or 'UTM'. + epsg int epsg number defining projection (see + http://spatialreference.org/ref/ + for moreinfo). Overrides utm_zone + if both are provided. + ====================== ============== =================================== + + + Notes + ------- + The `ElectricalMethods` consider the given resistivity values as + a normal values and not on base 10 logarithm. So if log10 values + are given, set the argument `fromlog10` value to ``True``. + + .. |VES| replace:: Vertical Electrical Sounding + + """ + + @abstractmethod + def __init__(self, + AB: float = 200. , + MN: float = 20., + arrangement: str = 'schlumberger', + area : str = None, + projection: str ='UTM', + datum: str ='WGS84', + epsg: int =None, + utm_zone: str = None, + fromlog10:bool =False, + verbose: int = 0, + ) -> None: + + self.AB=AB + self.MN=MN + self.arrangememt=assert_arrangement(arrangement) + self.utm_zone=utm_zone + self.projection=projection + self.datum=datum + self.epsg=epsg + self.area=area + self.fromlog10=fromlog10 + self.verbose=verbose + + +class P: + """ + Data properties are values that are hidden to avoid modifications alongside + the packages. Its was used for assertion, comparison etceteara. These are + enumerated below into a property objects. + + .. |ERP| replace: Electrical resistivity profiling + + Parameters + ----------- + + **frcolortags**: Stands for flow rate colors tags. Values are + '#CED9EF','#9EB3DD', '#3B70F2', '#0A4CEF'. + + **ididctags**: Stands for the list of index set in dictionary which encompasses + key and values of all different prefixes. + + **isation**: List of prefixes used for indexing the stations in the |ERP|. + + **ieasting**: List of prefixes used for indexing the easting coordinates array. + + **inorthing**: List of prefixes used to index the northing coordinates. + + **iresistivity** List of prefix used for indexing the apparent resistivity + values in the |ERP| data collected during the survey. + + **isren**: Is the list of heads columns during the data collections. Any data + head |ERP| data provided should be converted into + the following arangement: + + +----------+-------------+-----------+-----------+ + |station | resistivity | easting | northing | + +----------+-------------+-----------+-----------+ + + **isrll**: Is the list of heads columns during the data collections. Any data + head |ERP| data provided should be converted into + the following arangement: + + +----------+-------------+-------------+----------+ + |station | resistivity | longitude | latitude | + +----------+-------------+-------------+----------+ + + **P**: Typing class for fectching the properties. + + Examples + --------- + >>> from kalfeat.property import P + >>> P.idicttags + ... + >>> P().idictags + ... + {'station': ['pk', 'sta', 'pos'], 'longitude': ['east', 'x', 'long', 'lon'], + 'latitude': ['north', 'y', 'lat'], 'resistivity': ['rho', 'app', 'res']} + >>> {k:v for k, v in P.__dict__.items() if '__' not in k} + ... {'_station': ['pk', 'sta', 'pos'], + '_easting': ['east', 'x', 'long'], + '_northing': ['north', 'y', 'lat'], + '_resistivity': ['rho', 'app', 'res'], + 'frcolortags': , + 'idicttags': , + 'istation': , + 'ieasting': , + 'inorthing': , + 'iresistivity': , + 'isenr': } + >>> P().isrll + ... ['station','resistivity','longitude','latitude'] + + """ + + station_prefix = [ + 'pk','sta','pos' + ] + easting_prefix =[ + 'east','x', + ] + northing_prefix = [ + 'north','y', + ] + lon_prefix =[ + 'long', 'lon' + ] + + lat_prefix = [ + 'lat' + ] + + resistivity_prefix = [ + 'rho','app','res', 'se', 'sounding.values' + ] + erp_headll= [ + 'station', 'resistivity', 'longitude','latitude', + ] + erp_headen= [ + 'station', 'resistivity', 'easting','northing', + ] + ves_head =['AB', 'MN', 'rhoa'] + + param_options = [ + ['bore', 'for'], + ['x','east'], + ['y', 'north'], + ['pow', 'puiss', 'pa'], + ['magn', 'amp', 'ma'], + ['shape', 'form'], + ['type'], + ['sfi', 'if'], + ['lat'], + ['lon'], + ['lwi', 'wi'], + ['ohms', 'surf'], + ['geol'], + ['flow', 'deb'] + ] + param_ids =[ + 'id', + 'east', + 'north', + 'power', + 'magnitude', + 'shape', + 'type', + 'sfi', + 'lat', + 'lon', + 'lwi', + 'ohmS', + 'geol', + 'flow' + ] + + ves_props = dict (_AB= ['ab', 'ab/2', 'current.electrodes', + 'depth', 'thickness'], + _MN=['mn', 'mn/2', 'potential.electrodes', 'mnspacing'], + ) + + all_prefixes = { f'_{k}':v for k, v in zip ( + erp_headll + erp_headen[2:] , [ + station_prefix, + resistivity_prefix, + lon_prefix, + lat_prefix, + easting_prefix, + northing_prefix, + northing_prefix, + ] + )} + all_prefixes = {**all_prefixes , **ves_props} + + def __init__( self, hl =None ) : + self.hl = hl + for key , value in self.all_prefixes.items() : + self.__setattr__( key , value) + + + def _check_header_item (self, it , kind ='erp'): + """ Check whether the item exists in the property dictionnary. + Use param `kind` to select the type of header that the data must + collected: + `kind` = ``erp`` -> for Electrical Resistivity Profiling + `kind` = ``ves`` - > for Vertical Electrical Sounding + """ + + dict_ = self.idictcpr if kind =='ves' else self.idicttags + for k, val in dict_.items(): + for s in val : + if str(it).lower().find(s)>=0: + return k + return + + def __call__(self, hl: list = None , kind :str ='erp'): + """ Rename the given header to hold the properties + header values. + + Call function could return ``None`` whether the + given header item in `hl` does not match any item in property + headers. + + :param hl: list or array, + list of the given headers. + :param kind: str + Type of data fed into the algorithm. Can be ``ves`` for Vertical + Electrical Sounding and ``erp`` for Electrical Resistivity Profiling . + + :Example: + >>> from kalfeat.property import P + >>> test_v= ['pos', 'easting', 'north', 'rhoa', 'lat', 'longitud'] + >>> pobj = P(test_v) + >>> pobj () + ... ['station', 'easting', 'northing', 'resistivity', + 'latitude', 'longitude'] + >>> test_v2 = test_v + ['straa', 'nourmai', 'opirn'] + >>> pobj (test_v2) + ... ['station', 'easting', 'northing', 'resistivity', + 'latitude', 'longitude'] + """ + + v_ =list() + + self.hl = hl or self.hl + if self.hl is not None: + self.hl = [self.hl] if isinstance(self.hl, str ) else self.hl + if hasattr(self.hl, '__iter__'): + for item in self.hl : + v_.append( self._check_header_item(item, kind)) + v_=list(filter((None).__ne__, v_)) + return None if len (v_) ==0 else v_ + + @property + def frcolortags (self): + """ set the dictionnary""" + return dict ((f'fr{k}', f'#{v}') for k, v in zip( + range(4), ('CED9EF','9EB3DD', '3B70F2', '0A4CEF' ) + ) + ) + @property + def idicttags (self): + """ Is the collection of data properties """ + return dict ( (k, v) for k, v in zip( + self.isrll + self.isren[2:], + [self.istation, self.iresistivity, self.ilon, + self.ilat, self.ieasting, self.inorthing ]) + ) + @property + def istation(self) : + """ Use prefix to identify station location positions """ + return self._station + + @property + def ilon (self): + """ Use prefix to identify longitude coordinates if given in the + dataset. """ + return self._longitude + + @property + def ilat(self): + """ Use prefix to identify latitude coordinates if given in the + dataset. """ + return self._latitude + @property + def ieasting (self): + """ Use prefix to identify easting coordinates if given in the + dataset. """ + return self._easting + + @property + def inorthing(self): + """ Use prefix to identify northing coordinates if given in the + dataset. """ + return self._northing + + @property + def iresistivity(self): + """ Use prefix to identify the resistivity values in the dataset""" + return self._resistivity + + @property + def isrll(self): + """ `SRLL` is the abbreviation of `S`for ``Stations``,`R`` for + resistivity, `L` for ``Longitude`` and `L` for ``Latitude``. + `SRLL` is the expected columns in Electrical resistivity profiling. + Indeed, it keeps the traditional collections sheets + during the survey. """ + return self.erp_headll + + @property + def isren(self): + """ `SREN` is the abbreviation of `S`for ``Stations``,`R``for + resistivity, `E` for ``easting`` and `N` for ``northing``. + `SREN` is the expected columns in Electrical resistivity profiling. + Indeed, it keeps the traditional collections sheets + during the survey. """ + return self.erp_headen + @property + def icpr (self): + """ Keep only the Vertical Electrical Sounding header data ...""" + return [k.replace('_', '') + for k in self.ves_props.keys() ] +['resistivity'] + + @property + def idictcpr (self): + """ cpr stands for current-potentials and resistivity. They compose the + main property values when collected the vertical electrical sounding + data.""" + return {f'{k.replace("_", "")}': v for k , v in { + **self.ves_props, **{'resistivity': self.iresistivity}}.items()} + + +def assert_arrangement(a: int | str ): + """ Assert whether the given arrangement is correct. + + :param a: int, float, str - Type of given electrical arrangement. + + :returns: + - The correct arrangement name + - ``0`` which means ``False`` or a wrong given arrangements. + """ + + for k, v in array_configuration.items(): + if a == k or str(a).lower().strip() in ','.join ( + v[0]).lower() or a ==v[1]: + return v[0][0].lower() + + return 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kalfeat/tools/__init__.py b/kalfeat/tools/__init__.py new file mode 100644 index 0000000..1f59e76 --- /dev/null +++ b/kalfeat/tools/__init__.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +import os +import re +import numpy as np + +from .coreutils import ( + plotAnomaly, + vesSelector, + erpSelector, + defineConductiveZone, + ) +from .exmath import ( + type_, + shape, + power, + magnitude, + sfi, + ohmicArea, + invertVES, + vesDataOperator, + scalePosition, + ) +from ..decorators import gdal_data_check + +HAS_GDAL = gdal_data_check(None)._gdal_data_found +NEW_GDAL = False + +if (not HAS_GDAL): + try: + import pyproj + except ImportError: + raise RuntimeError("Either GDAL or PyProj must be installed") +else: + import osgeo + if hasattr(osgeo, '__version__') and int(osgeo.__version__[0]) >= 3: + NEW_GDAL = True + + diff --git a/kalfeat/tools/coreutils.py b/kalfeat/tools/coreutils.py new file mode 100644 index 0000000..a81ad1c --- /dev/null +++ b/kalfeat/tools/coreutils.py @@ -0,0 +1,1459 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + +""" +`kalfeat`_ core functionalities +=============================== +Encompasses the main functionalities for class and methods + +.. _kalfeat: https://github.com/WEgeophysics/kalfeat/ + +""" +from __future__ import annotations +import os +import warnings +import copy + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from ..documentation import __doc__ +from ..decorators import ( + refAppender, + docSanitizer + ) +from ..property import P +from ..typing import ( + Any, + List , + Union, + Tuple, + Dict, + Optional, + NDArray, + DataFrame, + Series, + Array, + DType, + Sub, + SP +) +from ..exceptions import ( + StationError, + HeaderError, + ResistivityError, + ERPError, + VESError +) +from .funcutils import ( + smart_format as smft, + _isin , + _assert_all_types, + accept_types, + read_from_excelsheets + ) +from .gistools import ( + project_point_ll2utm, + project_point_utm2ll + ) + +def _is_readable ( + f:str, + readableformats : Tuple[str] = ('.csv', '.xlsx'), + **kws + ) -> DataFrame: + """ Specific files that can be read file throughout the packages + :param f: Path-like object -Should be a readable files. + :param readableformats: tuple -Specific readable files + + :return: dataframe - A dataframe with head contents... + + """ + if not os.path.isfile: + raise TypeError ( + f'Expected a Path-like object, got : {type(f).__name__!r}') + + if os.path.splitext(f)[1].lower() not in readableformats: + raise TypeError(f'Can only parse the {smft(readableformats)} files' + ) + + if f.endswith ('.csv'): + f = pd.read_csv (f,**kws) + elif f.endswith ('.xlsx'): + f = pd.read_excel(f, **kws ) + + return f + +@refAppender(__doc__) +def vesSelector( + data:str | DataFrame[DType[float|int]] = None, + *, + rhoa: Array |Series | List [float] = None, + AB :Array |Series = None, + MN: Array|Series | List[float] =None, + index_rhoa: Optional[int] = None, + **kws +) -> DataFrame : + """ Assert the validity of |VES| data and return a sanitize dataframe. + + :param rhoa: array-like - Apparent resistivities collected during the + sounding. + + :param AB: array-like - Investigation distance between the current + electrodes. Note that the `AB` is by convention equals to `AB/2`. + It's taken as half-space of the investigation depth. + + :param MN: array-like - Potential electrodes distances at each investigation + depth. Note by convention the values are half-space and equals to + `MN/2`. + + :param f: Path-like object or sounding dataframe. If given, the + others parameters could keep the ``None` values. + + :param index_rhoa: int - The index to retrieve the resistivity data of a + specific sounding point. Sometimes the sounding data are composed of + the different sounding values collected in the same survey area into + different |ERP| line. For instance: + + +------+------+----+----+----+----+----+ + | AB/2 | MN/2 |SE1 | SE2| SE3| ...|SEn | + +------+------+----+----+----+----+----+ + + Where `SE` are the electrical sounding data values and `n` is the + number of the sounding points selected. `SE1`, `SE2` and `SE3` are + three points selected for |VES| i.e. 3 sounding points carried out + either in the same |ERP| or somewhere else. These sounding data are + the resistivity data with a specific numbers. Commonly the number + are randomly chosen. It does not refer to the expected best fracture + zone selected after the prior-interpretation. After transformation + via the function `ves_selector`, the header of the data should hold + the `resistivity`. For instance, refering to the table above, the + data should be: + + +----+----+-------------+-------------+-------------+-----+ + | AB | MN |resistivity | resistivity | resistivity | ... | + +----+----+-------------+-------------+-------------+-----+ + + Therefore, the `index_rhoa` is used to select the specific resistivity + values i.e. select the corresponding sounding number of the |VES| + expecting to locate the drilling operations or for computation. For + esample, ``index_rhoa=1`` should figure out: + + +------+------+----+--------+-----+----+------------+ + | AB/2 | MN/2 |SE2 | --> | AB | MN |resistivity | + +------+------+----+--------+-----+----+------------+ + + If `index_rhoa` is ``None`` and the number of sounding curves are more + than one, by default the first sounding curve is selected ie + `index_rhoa` equals to ``0``. + + :param kws: dict - Pandas dataframe reading additionals + keywords arguments. + + :return: -dataframe -Sanitize |VES| dataframe with ` AB`, `MN` and + `resistivity` as the column headers. + + :Example: + + >>> from kalfeat.tools.coreutils import vesSelector + >>> df = vesSelector (data='data/ves/ves_gbalo.csv') + >>> df.head(3) + ... AB MN resistivity + 0 1 0.4 943 + 1 2 0.4 1179 + 2 3 0.4 1103 + >>> df = vesSelector ('data/ves/ves_gbalo.csv', index_rhoa=3 ) + >>> df.head(3) + ... AB MN resistivity + 0 1 0.4 457 + 1 2 0.4 582 + 2 3 0.4 558 + """ + + for arr in (AB , MN, rhoa): + if arr is not None: + _assert_all_types(arr, list, tuple, np.ndarray, pd.Series) + + try: + index_rhoa = index_rhoa if index_rhoa is None else int(index_rhoa) + except: + raise TypeError ( + f'Index is an integer, not {type(index_rhoa).__name__!r}') + + if data is not None: + if isinstance(data, str): + try : + data = _is_readable(data, **kws) + except TypeError as typError: + raise VESError (str(typError)) + + data = _assert_all_types(data, pd.DataFrame ) + + # sanitize the dataframe + + pObj =P() ; ncols = pObj(hl = list(data.columns), kind ='ves') + if ncols is None: + raise HeaderError (f"Columns {smft(pObj.icpr)} are missing in " + "the given dataset.") + data.columns = ncols + try : + rhoa= data.resistivity + except : + raise ResistivityError( + "Data validation aborted! Missing resistivity values.") + else : + # In the case, we got a multiple resistivity values + # corresponding to the different sounding values + if rhoa.ndim > 1 : + if index_rhoa is None: + index_rhoa = 0 + elif index_rhoa >= len(rhoa.columns): + warnings.warn(f'The index `{index_rhoa}` is out of the range' + f' `{len(rhoa.columns)-1}` for selecting the' + ' specific resistivity data. By default, we ' + 'only keep the data at the index 0.' + ) + index_rhoa= 0 + + rhoa = rhoa.iloc[:, index_rhoa] if rhoa.ndim > 1 else rhoa + + if 'MN' in data.columns: + MN = data.MN + try: + AB= data.AB + except: + raise VESError("Data validation aborted! Current electrodes values" + " are missing. Specify the deep measurement!") + + if rhoa is None: + raise ResistivityError( + "Data validation aborted! Missing resistivity values.") + if AB is None: + raise VESError("Data validation aborted! Current electrodes values" + " are missing. Specify the deep measurement!") + + AB = np.array(AB) ; MN = np.array(MN) ; rhoa = np.array(rhoa) + + if len(AB) !=len(rhoa): + raise VESError(" Deep measurement from the current electrodes `AB` and" + " the resistiviy values `rhoa` must have the same length" + f'. But `{len(AB)}` and `{len(rhoa)}` were given.') + + sdata =pd.DataFrame( + {'AB': AB, 'MN': MN, 'resistivity':rhoa},index =range(len(AB))) + + return sdata + +@docSanitizer() +def fill_coordinates( + data: DataFrame =None, + lon: Array = None, + lat: Array = None, + east: Array = None, + north: Array = None, + epsg: Optional[int] = None , + utm_zone: Optional [str] = None, + datum: str = 'WGS84' +) -> Tuple [DataFrame, str] : + """ Recompute coordinates values + + Compute the couples (easting, northing) or (longitude, latitude ) + and set the new calculated values into a dataframe. + + Parameters + ----------- + + data : dataframe, + Dataframe contains the `lat`, `lon` or `east` and `north`. + All data dont need to be provided. If ('lat', 'lon') and + (`east`, `north`) are given, ('`easting`, `northing`') + should be overwritten. + + lat: array-like float or string (DD:MM:SS.ms) + Values composing the `longitude` of point + + lon: array-like float or string (DD:MM:SS.ms) + Values composing the `longitude` of point + + east : array-le float + Values composing the northing coordinate in meters + + north : array-like float + Values composing the northing coordinate in meters + + datum: string + well known datum ex. WGS84, NAD27, etc. + + projection: string + projected point in lat and lon in Datum `latlon`, as decimal + degrees or 'UTM'. + + epsg: int + epsg number defining projection (see + http://spatialreference.org/ref/ for moreinfo) + Overrides utm_zone if both are provided + + + datum: string + well known datum ex. WGS84, NAD27, etc. + + utm_zone : string + zone number and 'S' or 'N' e.g. '55S'. Defaults to the + centre point of the provided points + + Returns + ------- + - `data`: Dataframe with new coodinates values computed + - `utm_zone`: zone number and 'S' or 'N' + + + """ + def _get_coordcomps (str_, df): + """ Retrieve coordinate values and assert whether values are given. + If ``True``, retunrs `array` of `given item` and valid type of the + data. Note that if data equals to ``0``, we assume values are not + provided. + + :param str_: str - item in the `df` columns + :param df: DataFrame - dataframe expected containing the `str_` item. + """ + + if str_ in df.columns: + return df[str_] , np.all(df[str_])!=0 + return None, None + + def _set_coordinate_values (x, y, *, func ): + """ Iterate `x` and `y` and output new coordinates values computed + from `func` . + param x: iterable values + :param y: iterabel values + :param func: function F + can be: + - ``project_point_utm2ll`` for `UTM` to `latlon`` or + - `` project_point_ll2utm`` for `latlon`` to `UTM` + :retuns: + - xx new calculated + - yy new calculated + - utm zone + """ + xx = np.zeros_like(x); + yy = np.zeros_like(xx) + for ii, (la, lo) in enumerate (zip(x, y)): + e , n, uz = func ( + la, lo, utm_zone = utm_zone, datum = datum, epsg =epsg + ) + xx [ii] = e ; yy[ii] = n + + return xx, yy , uz + + if data is None: + data = pd.DataFrame ( + dict ( + longitude = lon , + latitude = lat , + easting = east, + northing=north + ), + #pass index If using all scalar values + index = range(4) + ) + + if data is not None : + data = _assert_all_types(data, pd.DataFrame) + + lon , lon_isvalid = _get_coordcomps( + 'longitude', data ) + lat , lat_isvalid = _get_coordcomps( + 'latitude', data ) + east , e_isvalid = _get_coordcomps( + 'easting', data ) + north, n_isvalid = _get_coordcomps( + 'northing', data ) + + if lon_isvalid and lat_isvalid: + try : + east , north , uz = _set_coordinate_values( + lat.values, lon.values, + project_point_ll2utm, + ) + except :# pass if an error occurs + pass + else : data['easting'] = east ; data['northing'] = north + + elif e_isvalid and n_isvalid: + if utm_zone is None: + warnings.warn( + 'Should provide the `UTM` for `latitute` and `longitude`' + ' calculus. `NoneType` can not be used as UTM zone number.' + ' Refer to the documentation.') + try : + lat , lon, utm_zone = _set_coordinate_values( + east.values, north.values, + func = project_point_utm2ll, + ) + except : pass + else : data['longitude'] = lon ; data['latitude'] = lat + + + return data, utm_zone + + +def _assert_data (data :DataFrame ): + """ Assert the data and return the property dataframe """ + data = _assert_all_types( + data, list, tuple, np.ndarray, pd.Series, pd.DataFrame) + + if isinstance(data, pd.DataFrame): + cold , ixc =list(), list() + for i , ckey in enumerate(data.columns): + for kp in P().isrll : + if ckey.lower() .find(kp) >=0 : + cold.append (kp); ixc.append(i) + break + + if len (cold) ==0: + raise ValueError (f'Expected {smft(P().isrll)} ' + ' columns, but not found in the given dataframe.' + ) + + dup = cold.copy() + # filter and remove one by one duplicate columns. + list(filter (lambda x: dup.remove(x), set(cold))) + dup = set(dup) + if len(dup) !=0 : + raise HeaderError( + f'Duplicate column{"s" if len(dup)>1 else ""}' + f' {smft(dup)} found. It seems to be {smft(dup)}' + f'column{"s" if len(dup)>1 else ""}. Please provide' + ' the right column name in the dataset.' + ) + data_ = data [cold] + + col = list(data_.columns) + for i, vc in enumerate (col): + for k in P().isrll : + if vc.lower().find(k) >=0 : + col[i] = k ; break + + return data_ + +def is_erp_series ( + data : Series , + dipolelength : Optional [float] = None + ) -> DataFrame : + """ Validate the series. + + The `data` should be the resistivity values with the one of the following + property index names ``resistivity`` or ``rho``. Will raises error + if not detected. If a`dipolelength` is given, a data should include + each station positions values. + + Parameters + ----------- + + data : pandas Series object + Object of resistivity values + + dipolelength: float + Distance of dipole during the whole survey line. If it is + is not given , the station location should be computed and + filled using the default value of the dipole. The *default* + value is set to ``10 meters``. + + Returns + -------- + A dataframe of the property indexes such as + ``['station', 'easting','northing', 'resistivity']``. + + Raises + ------ + ResistivityError + If name does not match the `resistivity` column name. + + Examples + -------- + >>> import numpy as np + >>> import pandas as pd + >>> from kalfeat.tools.coreutils imprt is_erp_series + >>> data = pd.Series (np.abs (np.random.rand (42)), name ='res') + >>> data = is_erp_series (data) + >>> data.columns + ... Index(['station', 'easting', 'northing', 'resistivity'], dtype='object') + >>> data = pd.Series (np.abs (np.random.rand (42)), name ='NAN') + >>> data = _is_erp_series (data) + ... ResistivityError: Unable to detect the resistivity column: 'NAN'. + + """ + + data = _assert_all_types(data, pd.Series) + is_valid = False + for p in P().iresistivity : + if data.name.lower().find(p) >=0 : + data.name = p ; is_valid = True ; break + + if not is_valid : + raise ResistivityError( + f"Unable to detect the resistivity column: {data.name!r}." + ) + + if is_valid: + df = is_erp_dataframe (pd.DataFrame ( + { + data.name : data , + 'NAN' : np.zeros_like(data ) + } + ), + dipolelength = dipolelength, + ) + return df + +def is_erp_dataframe ( + data :DataFrame , + dipolelength : Optional[float] = None + ) -> DataFrame: + """ Ckeck whether the dataframe contains the electrical resistivity + profiling (ERP) index properties. + + DataFrame should be reordered to fit the order of index properties. + Anyway it should he dataframe filled by ``0.`` where the property is + missing. However, if `station` property is not given. station` property + should be set by using the dipolelength default value equals to ``10.``. + + Parameters + ---------- + + data : Dataframe object + Dataframe object. The columns dataframe should match the property + ERP property object such as ``['station','resistivity', 'longitude','latitude']`` + or ``['station','resistivity', 'easting','northing']``. + + dipolelength: float + Distance of dipole during the whole survey line. If the station + is not given as `data` columns, the station location should be + computed and filled the station columns using the default value + of the dipole. The *default* value is set to ``10 meters``. + + Returns + -------- + A new data with index properties. + + Raises + ------ + - None of the column matches the property indexes. + - Find duplicated values in the given data header. + + Examples + -------- + >>> import numpy as np + >>> from kalfeat.tools.coreutils import is_erp_dataframe + >>> df = pd.read_csv ('data/erp/testunsafedata.csv') + >>> df.columns + ... Index(['x', 'stations', 'resapprho', 'NORTH'], dtype='object') + >>> df = _is_erp_dataframe (df) + >>> df.columns + ... Index(['station', 'easting', 'northing', 'resistivity'], dtype='object') + + """ + + data = _assert_all_types(data, pd.DataFrame) + datac= data.copy() + + def _is_in_properties (h ): + """ check whether the item header `h` is in the property values. + Return `h` and it correspondence `key` in the property values. """ + for key, values in P().idicttags.items() : + for v in values : + if h.lower().find (v)>=0 : + return h, key + return None, None + + def _check_correspondence (pl, dl): + """ collect the duplicated name in the data columns """ + return [ l for l in pl for d in dl if d.lower().find(l)>=0 ] + + cold , c = list(), list() + # create property object + pObj = P(data.columns) + for i , ckey in enumerate(list(datac.columns)): + h , k = _is_in_properties(ckey) + cold.append (h) if h is not None else h + c.append(k) if k is not None else k + + if len (cold) ==0: + raise HeaderError ( + f'Wrong column headers {list(data.columns)}.' + f' Unable to find the expected {smft(pObj.isrll)}' + ' column properties.' + ) + + dup = cold.copy() + # filter and remove one by one duplicate columns. + list(filter (lambda x: dup.remove(x), set(cold))) + + dup = set(dup) ; ress = _check_correspondence( + pObj() or pObj.idicttags.keys(), dup) + + if len(dup) !=0 : + raise HeaderError( + f'Duplicate column{"s" if len(dup)>1 else ""}' + f' {smft(dup)} {"are" if len(dup)>1 else "is"} ' + f'found. It seems correspond to {smft(ress)}. ' + 'Please ckeck your data column names. ' + ) + + # fetch the property column names and + # replace by 0. the non existence column + # reorder the column to match + # ['station','resistivity', 'easting','northing', ] + data_ = data[cold] + data_.columns = c + data_= data_.reindex (columns =pObj.idicttags.keys(), fill_value =0.) + dipolelength = _assert_all_types( + dipolelength , float, int) if dipolelength is not None else None + + if (np.all (data_.station) ==0. + and dipolelength is None + ): + dipolelength = 10. + data_.station = np.arange ( + 0 , data_.shape[0] * dipolelength , dipolelength ) + + return data_ + + +def erpSelector ( + f: str | NDArray | Series | DataFrame , + columns: str | List[str] = ..., + **kws:Any +) -> DataFrame : + """ Read and sanitize the data collected from the survey. + + `data` should be an array, a dataframe, series, or arranged in ``.csv`` + or ``.xlsx`` formats. Be sure to provide the header of each columns in' + the worksheet. In a file is given, header columns should be aranged as + ``['station','resistivity' ,'longitude', 'latitude']``. Note that + coordinates columns (`longitude` and `latitude`) are not compulsory. + + Parameters + ---------- + + f: Path-like object, ndarray, Series or Dataframe, + If a path-like object is given, can only parse `.csv` and `.xlsx` + file formats. However, if ndarray is given and shape along axis 1 + is greater than 4, the ndarray should be shrunked. + + columns: list + list of the valuable columns. It can be used to fix along the axis 1 + of the array the specific values. It should contain the prefix or + the whole name of each item in + ``['station','resistivity' ,'longitude', 'latitude']``. + + kws: dict + Additional pandas `pd.read_csv` and `pd.read_excel` + methods keyword arguments. Be sure to provide the right argument. + when reading `f`. For instance, provide ``sep= ','`` argument when + the file to read is ``xlsx`` format will raise an error. Indeed, + `sep` parameter is acceptable for parsing the `.csv` file format + only. + + + Returns + ------- + DataFrame with valuable column(s). + + Notes + ------ + The length of acceptable columns is ``4``. If the size of the columns is + higher than `4`, the data should be shrunked to match the expected columns. + Futhermore, if the header is not specified in `f` , the defaut column + arrangement should be used. Therefore, the second column should be + considered as the ``resistivity`` column. + + Examples + --------- + >>> import numpy as np + >>> from kalfeat.tools.coreutils import erpSelector + >>> df = erpSelector ('data/erp/testsafedata.csv') + >>> df.shape + ... (45, 4) + >>> list(df.columns) + ... ['station','resistivity', 'longitude', 'latitude'] + >>> df = erp_selector('data/erp/testunsafedata.xlsx') + >>> list(df.columns) + ... ['easting', 'station', 'resistivity', 'northing'] + >>> df = erpSelector(np.random.randn(7, 7)) + >>> df.shape + ... (7, 4) + >>> list(df.columns) + ... ['station', 'resistivity', 'longitude', 'latitude'] + + """ + + if columns is ...: columns=None + if columns is not None: + if isinstance(columns, str): + columns =columns.replace(':', ',').replace(';', ',') + if ',' in columns: columns =columns.split(',') + + if isinstance(f, str): + if os.path.isfile(f): + try : + f = _is_readable(f, **kws) + except TypeError as typError: + raise ERPError (str(typError)) + + if isinstance( f, np.ndarray): + name = copy.deepcopy(columns) + columns = P().isrll if columns is None else columns + colnum = 1 if f.ndim ==1 else f.shape[1] + + if colnum==1: + if isinstance (name, list) : + if len(name) ==1: name = name[0] + f = is_erp_series ( + pd.Series (f, name = name or columns[1] + ) + ) + + elif colnum==2 : + f= pd.DataFrame (f, columns = columns + if columns is None + else columns[:2] + ) + + elif colnum==3: + warnings.warn("One missing column `longitude|latitude` value." + "If the `longitude` and `latitude` data are" + f" not available. Use {smft(P().isrll[:2])} " + "columns instead.", UserWarning) + columns = name or columns [:colnum] + f= pd.DataFrame (f[:, :len(columns)], + columns =columns ) + + elif f.shape[1]==4: + f =pd.DataFrame (f, columns =columns + ) + elif colnum > 4: + # add 'none' columns for the remaining columns. + f =pd.DataFrame ( + f, columns = columns + [ + 'none' for i in range(colnum-4)] + ) + + if isinstance(f, pd.DataFrame): + f = is_erp_dataframe( f) + elif isinstance(f , pd.Series ): + f = is_erp_series(f) + else : + amsg = smft(accept_types ( + pd.Series, pd.DataFrame, np.ndarray) + ['*.xls', '*.csv']) + raise ValueError (f" Unacceptable data. Accept only {amsg}." + ) + if np.all(f.resistivity)==0: + raise ResistivityError('Resistivity values need to be supply.') + + return f + +def _fetch_prefix_index ( + arr:NDArray [DType[float]] = None, + col: List[str] = None, + df : DataFrame = None, + prefixs: List [str ] =None +) -> Tuple [int | int]: + """ Retrieve index at specific column. + + Use the given station positions collected on the field to + compute the dipole length during the whole survey. + + :param arr: array. Ndarray of data where one colum must the + positions values. + :param col: list. The list should be considered as the head of array. Each + position in the list sould fit the column data in the array. It raises + an error if the number of item in the list is different to the size + of array in axis=1. + :param df: dataframe. When supply, the `arr` and `col` is not + compulsory. + + :param prefixs: list. Contains specific column prefixs to + fetch the corresponding data. For instance:: + + - Station prefix : ['pk','sta','pos'] + - Easting prefix : ['east', 'x', 'long'] + - Northing prefix: ['north', 'y', 'lat'] + :returns: + - index of the position columns in the data + - station position array-like. + + :Example: + >>> from numpy as np + >>> from kalfeat.tools.coreutils import _assert_positions + >>> array1 = np.c_[np.arange(0, 70, 10), np.random.randn (7,3)] + >>> col = ['pk', 'x', 'y', 'rho'] + >>> index, = _fetch_prefix_index (array1 , col = ['pk', 'x', 'y', 'rho'], + ... prefixs = EASTPREFIX) + ... 1 + >>> index, _fetch_prefix_index (array1 , col = ['pk', 'x', 'y', 'rho'], + ... prefixs = NOTHPREFIX ) + ... 2 + """ + if prefixs is None: + raise ValueError('Please specify the list of items to compose the ' + 'prefix to fetch the columns data. For instance' + f' `station prefix` can be `{P().istation}`.') + + if arr is None and df is None : + raise TypeError ( 'Expected and array or a dataframe not' + ' a Nonetype object.' + ) + elif df is None and col is None: + raise StationError( 'Column list is missing.' + ' Could not detect the position index.') + + if isinstance( df, pd.DataFrame): + # collect the resistivity from the index + # if a dataFrame is given + arr, col = df.values, df.columns + + if arr.ndim ==1 : + # Here return 0 as colIndex + return 0, arr + if isinstance(col, str): col =[col] + if len(col) != arr.shape[1]: + raise ValueError ( + f'Column should match the array shape in axis =1 <{arr.shape[1]}>.' + f' But {"was" if len(col)==1 else "were"} given') + + # convert item in column in lowercase + comsg = col.copy() + col = list(map(lambda x: x.lower(), col)) + colIndex = [col.index (item) for item in col + for pp in prefixs if item.find(pp) >=0] + + if len(colIndex) is None or len(colIndex) ==0: + raise ValueError (f'Unable to detect the position in `{smft(comsg)}`' + ' columns. Columns must contain at least' + f' `{smft(prefixs)}`.') + + return colIndex[0], arr + + +def _assert_station_positions( + arr: SP = None, + prefixs: List [str] =..., + **kws +) -> Tuple [int, float]: + """ Assert positions and compute dipole length. + + Use the given station positions collected on the field to + detect the dipole length during the whole survey. + + :param arr: array. Ndarray of data where one column must the + positions values. + :param col: list. The list should be considered as the head of array. Each + position in the list sould fit the column data in the array. It raises + an error if the number of item in the list is different to the size + of array in axis=1. + :param df: dataframe. When supply, the `arr` and `col` are not needed. + + :param prefixs: list. Contains all the station column names prefixs to + fetch the corresponding data. + :returns: + - positions: new positions numbering from station `S00` to ... + - dipolelength: recomputed dipole value + :Example: + + >>> from numpy as np + >>> from kalfeat.tools.coreutils import _assert_station_positions + >>> array1 = np.c_[np.arange(0, 70, 10), np.random.randn (7,3)] + >>> col = ['pk', 'x', 'y', 'rho'] + >>> _assert_positions(array1, col) + ... (array([ 0, 10, 20, 30, 40, 50, 60]), 10) + >>> array1 = np.c_[np.arange(30, 240, 30), np.random.randn (7,3)] + ... (array([ 0, 30, 60, 90, 120, 150, 180]), 30) + + """ + if prefixs is (None or ...): prefixs = P().istation + + colIndex, arr =_fetch_prefix_index( arr=arr, prefixs = prefixs, **kws ) + positions = arr[:, colIndex] + # assert the position is aranged from lower to higher + # if there is not wrong numbering. + fsta = np.argmin(positions) + lsta = np.argmax (positions) + if int(fsta) !=0 or int(lsta) != len(positions)-1: + raise StationError( + 'Wrong numbering! Please number the position from first station ' + 'to the last station. Check your array positionning numbers.') + + dipoleLength = int(np.abs (positions.min() - positions.max () + ) / (len(positions)-1)) + # renamed positions + positions = np.arange(0 , len(positions) *dipoleLength , + dipoleLength ) + + return positions, dipoleLength + +@refAppender(__doc__) +def plotAnomaly( + erp: Array | List[float], + cz: Optional [Sub[Array], List[float]] = None, + s: Optional [str] = None, + figsize: Tuple [int, int] = (10, 4), + fig_dpi: int = 300 , + savefig: str | None = None, + show_fig_title: bool = True, + style: str = 'seaborn', + fig_title_kws: Dict[str, str|Any] = ..., + czkws: Dict [str , str|Any] = ... , + legkws: Dict [Any , str|Any] = ... , + **kws, +) -> None: + + """ Plot the whole |ERP| line and selected conductive zone. + + Conductive zone can be supplied nannualy as a subset of the `erp` or by + specifyting the station expected for drilling location. For instance + ``S07`` for the seventh station. Futhermore, for automatic detection, one + should set the station argument `s` to ``auto``. However, it 's recommended + to provide the `cz` or the `s` to have full control. The conductive zone + is juxtaposed to the whole |ERP| survey. One can customize the `cz` plot by + filling with `Matplotlib pyplot`_ additional keywords araguments thought + the kewords argument `czkws`. + + :param sample: array_like - the |ERP| survey line. The line is an array of + resistivity values. + + :param cz: array_like - the selected conductive zone. If ``None``, only + the `erp` should be displayed. Note that `cz` is an subset of `erp` + array. + + :param s: str - The station location given as string (e.g. ``s= "S10"``) + or as a station number (indexing; e.g ``s =10``). If value is set to + ``"auto"``, `s` should be find automatically and fetching `cz` as well. + + :param figsize: tuple- Tuple value of figure size. Refer to the + web resources `Matplotlib figure`_. + + :param fig_dpi: int - figure resolution "dot per inch". Refer to + `Matplotlib figure`_. + + :param savefig: str - save figure. Refer to `Matplotlib figure`_. + + :param show_fig_tile: bool - display the title of the figure. + + :param fig_title_kws: dict - Keywords arguments of figure suptile. Refer to + `Matplotlib figsuptitle`_. + + :param style: str - the style for customizing visualization. For instance to + get the first seven available styles in pyplot, one can run + the script below:: + + plt.style.available[:7] + Futher details can be foud in Webresources below or click on + `GeekforGeeks`_. + :param czkws: dict - keywords `Matplotlib pyplot`_ additional arguments to + customize the `cz` plot. + :param legkws: dict - keywords Matplotlib legend additional keywords + arguments. + :param kws: dict - additional keywords argument for `Matplotlib pyplot`_ to + customize the `erp` plot. + + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.coreutils import ( + ... plot_anomaly, _define_conductive_zone) + >>> test_array = np.random.randn (10) + >>> selected_cz ,*_ = _define_conductive_zone(test_array, 7) + >>> plot_anomaly(test_array, selected_cz ) + >>> plot_anomaly(tes_array, selected_cz , s= 5) + >>> plot_anomaly(tes_array, s= 's02') + >>> plot_anomaly(tes_array) + + .. note:: + + If `cz` is given, one does not need to worry about the station `s`. + `s` can stay with it default value``None``. + + + References + ----------- + See Matplotlib Axes: https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.tick_params.html + GeekforGeeks: https://www.geeksforgeeks.org/style-plots-using-matplotlib/#:~:text=Matplotlib%20is%20the%20most%20popular,without%20using%20any%20other%20GUIs. + + """ + + def format_thicks (value, tick_number): + """ Format thick parameter with 'FuncFormatter(func)' + rather than using:: + + axi.xaxis.set_major_locator (plt.MaxNLocator(3)) + + ax.xaxis.set_major_formatter (plt.FuncFormatter(format_thicks)) + """ + if value % 7 ==0: + return 'S{:02}'.format(int(value)+ 1) + else: None + + + erp = _assert_all_types( + erp, tuple, list , np.ndarray , pd.Series) + if cz is not None: + cz = _assert_all_types( + cz, tuple, list , np.ndarray , pd.Series) + cz = np.array (cz) + + erp =np.array (erp) + + plt.style.use (style) + + kws =dict ( + color=P().frcolortags.get('fr1') if kws.get( + 'color') is None else kws.get('color'), + linestyle='-' if kws.get('ls') is None else kws.get('ls'), + linewidth=2. if kws.get('lw') is None else kws.get('lw'), + label = 'Electrical resistivity profiling' if kws.get( + 'label') is None else kws.get('label') + ) + + if czkws is ( None or ...) : + czkws =dict (color=P().frcolortags.get('fr3'), + linestyle='-', + linewidth=3, + label = 'Conductive zone' + ) + + if czkws.get('color') is None: + czkws['color']= P().frcolortags.get(czkws['color']) + + if (xlabel := kws.get('xlabel')) is not None : + del kws['xlabel'] + if (ylabel := kws.get('ylabel')) is not None : + del kws['ylabel'] + + if (rotate:= kws.get ('rotate')) is not None: + del kws ['rotate'] + + fig, ax = plt.subplots(1,1, figsize =figsize) + + leg =[] + + zl, = ax.plot(np.arange(len(erp)), erp, + **kws + ) + leg.append(zl) + + if s =='' : s= None # for consistency + if s is not None: + auto =False ; keepindex =True + if isinstance (s , str): + auto = True if s.lower()=='auto' else s + if 's' or 'pk' in s.upper(): + # if provide the station. + keepindex =False + cz , _ , _, ix = defineConductiveZone( + erp, s = s , auto = auto, keepindex=keepindex + ) + + s = "S{:02}".format(ix +1) if s is not None else s + + if cz is not None: + # construct a mask array with np.isin to check whether + if not _isin (erp, cz ): + raise ValueError ('Expected a conductive zone to be a subset of ' + ' the resistivity profiling line.') + # `cz` is subset array + z = np.ma.masked_values (erp, np.isin(erp, cz )) + # a masked value is constructed so we need + # to get the attribute fill_value as a mask + # However, we need to use np.invert or the tilde operator + # to specify that other value except the `CZ` values mus be + # masked. Note that the dtype must be changed to boolean + sample_masked = np.ma.array( + erp, mask = ~z.fill_value.astype('bool') ) + + czl, = ax.plot( + np.arange(len(erp)), sample_masked, 'o', + **czkws) + leg.append(czl) + + + ax.tick_params (labelrotation = 0. if rotate is None else rotate) + ax.set_xticks(range(len(erp)), + ) + + if len(erp ) >= 14 : + ax.xaxis.set_major_formatter (plt.FuncFormatter(format_thicks)) + else : + + ax.set_xticklabels( + ['S{:02}'.format(int(i)+1) for i in range(len(erp))], + rotation =0. if rotate is None else rotate ) + + + if legkws is( None or ...): + legkws =dict() + + ax.set_xlabel ('Stations') if xlabel is None else ax.set_xlabel (xlabel) + ax.set_ylabel ('Resistivity (Ω.m)' + ) if ylabel is None else ax.set_ylabel (ylabel) + + ax.legend( handles = leg, + **legkws ) + + + if show_fig_title: + title = 'Plot ERP line with SVES = {0}'.format(s if s is not None else '') + if fig_title_kws is ( None or ...): + fig_title_kws = dict ( + t = title if s is not None else title.replace ( + 'with SVES =', ''), + style ='italic', + bbox =dict(boxstyle='round',facecolor ='lightgrey')) + + plt.tight_layout() + fig.suptitle(**fig_title_kws, + ) + if savefig is not None : + plt.savefig(savefig, + dpi=fig_dpi, + ) + + plt.show() + + +def defineConductiveZone( + erp:Array| pd.Series | List[float] , + s: Optional [str , int] = None, + p: SP = None, + auto: bool = False, + **kws, +) -> Tuple [Array, int] : + """ Define conductive zone as subset of the erp line. + + Indeed the conductive zone is a specific zone expected to hold the + drilling location `s`. If drilling location is not provided, it would be + by default the very low resistivity values found in the `erp` line. + + + :param erp: array_like, the array contains the apparent resistivity values + :param s: str or int, is the station position. + :param auto: bool. If ``True``, the station position should be + the position of the lower resistivity value in |ERP|. + + :returns: + - conductive zone of resistivity values + - conductive zone positionning + - station position index in the conductive zone + - station position index in the whole |ERP| line + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.coreutils import _define_conductive_zone + >>> test_array = np.random.randn (10) + >>> selected_cz ,*_ = _define_conductive_zone(test_array, 's20') + >>> shortPlot(test_array, selected_cz ) + """ + if isinstance(erp, pd.Series): erp = erp.values + + # conductive zone positioning + pcz : Optional [Array] = None + + if s is None and auto is False: + raise TypeError ('Expected the station position. NoneType is given.') + elif s is None and auto: + s, = np.where (erp == erp.min()) + s=int(s) + s, pos = _assert_stations(s, **kws ) + # takes the last position if the position is outside + # the number of stations. + pos = len(erp) -1 if pos >= len(erp) else pos + # frame the `sves` (drilling position) and define the conductive zone + ir = erp[:pos][-3:] ; il = erp[pos:pos +3 +1 ] + cz = np.concatenate((ir, il)) + if p is not None: + if len(p) != len(erp): + raise StationError ( + 'Array of position and conductive zone must have the same ' + f'length: `{len(p)}` and `{len(cz)}` were given.') + + sr = p[:pos][-3:] ; sl = p[pos:pos +3 +1 ] + pcz = np.concatenate((sr, sl)) + + # Get the new position in the selected conductive zone + # from the of the whole erp + pix, = np.where (cz == erp[pos]) + + return cz , pcz, int(pix), pos + +def _assert_stations( + s:Any , + dipole:Any = None, + keepindex:bool = False +) -> Tuple[str, int]: + """ Sanitize stations and returns station name and index. + + ``pk`` and ``S`` can be used as prefix to define the station `s`. For + instance ``S01`` and ``PK01`` means the first station. + + :param s: Station name + :type s: str, int + + :param dipole: dipole_length in meters. + :type dipole: float + + :param keepindex: bool - Stands for keeping the Python indexing. If set to + ``True`` so the station should start by `S00` and so on. + + :returns: + - station name + - index of the station. + + .. note:: + + The defaut station numbering is from 1. SO if ``S00` is given, and + the argument `keepindex` is still on its default value i.e ``False``, + the station name should be set to ``S01``. Moreover, if `dipole` + value is given, the station should named according to the + value of the dipole. For instance for `dipole` equals to ``10m``, + the first station should be ``S00``, the second ``S10`` , + the third ``S30`` and so on. However, it is recommend to name the + station using counting numbers rather than using the dipole + position. + + :Example: + >>> from kalfeat.tools.coreutils import _assert_stations + >>> _assert_stations('pk01') + ... ('S01', 0) + >>> _assert_stations('S1') + ... ('S01', 0) + >>> _assert_stations('S1', keepindex =True) + ... ('S01', 1) # station here starts from 0 i.e `S00` + >>> _assert_stations('S00') + ... ('S00', 0) + >>> _assert_stations('S1000',dipole ='1km') + ... ('S02', 1) # by default it does not keep the Python indexing + >>> _assert_stations('S10', dipole ='10m') + ... ('S02', 1) + >>> _assert_stations(1000,dipole =1000) + ... ('S02', 1) + """ + # in the case s is string: eg. "00", "pk01", "S001" + ix = 0 + + s = _assert_all_types(s, str, int, float) + + if isinstance(s, str): + s =s.lower().replace('pk', '').replace('s', '').replace('ta', '') + try : + s = int(s ) + except : + raise TypeError ('Unable to convert str to float.') + else : + # set index to 0 , is station `S00` is found for instance. + if s ==0 : + keepindex =True + + st = copy.deepcopy(s) + + if isinstance(s, int): + msg = 'Station numbering must start'\ + ' from {0!r} or set `keepindex` argument to {1!r}.' + msg = msg.format('0', 'False') if keepindex else msg.format( + '1', 'True') + if not keepindex: # station starts from 1 + if s <=0: + raise ValueError (msg ) + s , ix = "S{:02}".format(s), s - 1 + + elif keepindex: + + if s < 0: raise ValueError (msg) # for consistency + s, ix = "S{:02}".format(s ), s + # Recompute the station position if the dipole value are given + if dipole is not None: + if isinstance(dipole, str): #'10m' + if dipole.find('km')>=0: + + dipole = dipole.lower().replace('km', '000') + + dipole = dipole.lower().replace('m', '') + try : + dipole = float(dipole) + except : + raise StationError( 'Invalid literal value for' + f' dipole : {dipole!r}') + # since the renamed from dipole starts at 0 + # e.g. 0(S1)---10(S2)---20(S3) ---30(S4)etc .. + ix = int(st//dipole) ; s= "S{:02}".format(ix +1) + + return s, ix + +def _parse_args ( + args:Union[List | str ] +)-> Tuple [ pd.DataFrame, List[str|Any]]: + """ `Parse_args` function returns array of rho and coordinates + values (X, Y). + + Arguments can be a list of data, a dataframe or a Path like object. If + a Path-like object is set, it should be the priority of reading. + + :param args: arguments + + :return: ndarray or array-like arranged with apparent + resistivity at the first index + + .. note:: If a list of arrays is given or numpy.ndarray is given, + we assume that the columns at the first index fits the + apparent resistivity values. + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.coreutils import _parse_args + >>> a, b = np.arange (1, 10 , 0.5), np.random.randn(9).reshape(3, 3) + >>> _parse_args ([a, 'data/erp/l2_gbalo.xlsx', b]) + ... array([[1.1010000e+03, 0.0000000e+00, 7.9075200e+05, 1.0927500e+06], + [1.1470000e+03, 1.0000000e+01, 7.9074700e+05, 1.0927580e+06], + [1.3450000e+03, 2.0000000e+01, 7.9074300e+05, 1.0927630e+06], + [1.3690000e+03, 3.0000000e+01, 7.9073800e+05, 1.0927700e+06], + [1.4060000e+03, 4.0000000e+01, 7.9073300e+05, 1.0927765e+06], + [1.5430000e+03, 5.0000000e+01, 7.9072900e+05, 1.0927830e+06], + [1.4800000e+03, 6.0000000e+01, 7.9072400e+05, 1.0927895e+06], + [1.5170000e+03, 7.0000000e+01, 7.9072000e+05, 1.0927960e+06], + [1.7540000e+03, 8.0000000e+01, 7.9071500e+05, 1.0928025e+06], + [1.5910000e+03, 9.0000000e+01, 7.9071100e+05, 1.0928090e+06]]) + + """ + + keys= ['res', 'rho', 'app.res', 'appres', 'rhoa'] + + col=None + if isinstance(args, list): + args, isfile = _assert_file(args) # file to datafame + if not isfile: # list of values + # _assert _list of array_length + args = np.array(args, dtype =np.float64).T + + if isinstance(args, pd.DataFrame): + # firt drop all untitled items + # if data is from xlsx sheets + args.drop([ c for c in args.columns if c.find('untitle')>=0 ], + axis =1, inplace =True) + + # get the index of items `resistivity` + ixs = [ii for ii, name in enumerate(args.columns ) + for item in keys if name.lower().find(item)>=0] + if len(set(ixs))==0: + raise ValueError( + f"Column name `resistivity` not found in {list(args.columns)}" + " Please provide the resistivity column.") + elif len(set(ixs))>1: + raise ValueError ( + f"Expected 1 but got {len(ixs)} resistivity columns " + f"{tuple([list(args.columns)[i] for i in ixs])}.") + + rc= args.pop(args.columns[ixs[0]]) + args.insert(0, 'app.res', rc) + col =list(args.columns ) + args = args.values + + if isinstance(args, pd.Series): + col =args.name + args = args.values + + return args, col + +def _assert_file ( + args: List[str, Any] +)-> Tuple [List [str , pd.DataFrame] | Any , bool]: + """ Check whether the data is gathering into a Excel sheet workbook file. + + If the workbook is detected, will read the data and grab all into a + dataframe. + + :param args: argument into a list + :returns: + - dataframe + - assert whether workbook was successful read. + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.coreutils import _assert_file + >>> a, b = np.arange (1, 10 , 0.5), np.random.randn(9).reshape(3, 3) + >>> data = [a, 'data/erp/l2_gbalo', b] # collection of 03 objects + >>> # but read only the Path-Like object + >>> _assert_file([a, 'data/erp/l2_gbalo.xlsx', b]) + ... + ['l2_gbalo', + pk x y rho + 0 0 790752 1092750.0 1101 + 1 10 790747 1092758.0 1147 + 2 20 790743 1092763.0 1345 + 3 30 790738 1092770.0 1369 + 4 40 790733 1092776.5 1406 + 5 50 790729 1092783.0 1543 + 6 60 790724 1092789.5 1480 + 7 70 790720 1092796.0 1517 + 8 80 790715 1092802.5 1754 + 9 90 790711 1092809.0 1591] + """ + + isfile =False + file = [ item for item in args if isinstance(item, str) + if os.path.isfile (item)] + + if len(file) > 1: + raise ValueError ( + f"Expected a single file but got {len(file)}. " + "Please select the right file expected to contain the data.") + if len(file) ==1 : + _, args = read_from_excelsheets(file[0]) + isfile =True + + return args , isfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kalfeat/tools/exmath.py b/kalfeat/tools/exmath.py new file mode 100644 index 0000000..4a5cb12 --- /dev/null +++ b/kalfeat/tools/exmath.py @@ -0,0 +1,1960 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + +from __future__ import annotations + +import copy +import inspect +import warnings + +from scipy.signal import argrelextrema +import scipy.integrate as integrate +from scipy.optimize import curve_fit +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +from .._kalfeatlog import kalfeatlog +from ..documentation import __doc__ +from ..decorators import ( + deprecated, + refAppender, + docSanitizer +) +from .. import exceptions as Wex +from ..property import P +from ..typing import ( + T, + F, + List, + Tuple, + Union, + Array, + DType, + Optional, + Sub, + SP, + Series, + DataFrame, +) + +from .funcutils import ( + _assert_all_types, + smart_format, + +) +_logger =kalfeatlog.get_kalfeat_logger(__name__) + +_msg= ''.join([ + 'Note: need scipy version 0.14.0 or higher or interpolation,', + ' might not work.'] +) +_msg0 = ''.join([ + 'Could not find scipy.interpolate, cannot use method interpolate' + 'check installation you can get scipy from scipy.org.'] +) + +try: + import scipy + scipy_version = [int(ss) for ss in scipy.__version__.split('.')] + if scipy_version[0] == 0: + if scipy_version[1] < 14: + warnings.warn(_msg, ImportWarning) + _logger.warning(_msg) + + import scipy.interpolate as spi + + interp_import = True + # pragma: no cover +except ImportError: + + warnings.warn(_msg0) + _logger.warning(_msg0) + + interp_import = False + + +def dummy_basement_curve( + func: F , + ks: float , + slope: float | int = 45, +)-> Tuple[F, float]: + """ Compute the pseudodepth from the search zone. + + :param f: callable - Polyfit1D function + :param mz: array-zone - Expected Zone for groundwater search + :param ks: float - The depth from which the expected fracture + zone must starting looking for groundwater. + :param slope: float - Degree angle for slope in linear function + of the dummy curve + :returns: + - lambda function of basement curve `func45` + - beta is intercept value compute for keysearch `ks` + """ + # Use kesearch (ks) to compute the beta value from the function f + beta = func(ks) + # note that 45 degree is used as the slope of the + # imaginary basement curve + # fdummy (x) = slope (45degree) * x + intercept (beta) + slope = np.sin(np.deg2rad(slope)) + func45 = lambda x: slope * x + beta + + return func45, beta + + +def find_limit_for_integration( + ix_arr: Array[DType[int]], + b0: List[T] =[] +)-> List[T]: + r""" Use the roots between f curve and basement curves to + detect the limit of integration. + + :param ix_arr: array-like - Indexes array from masked array where + the value are true i.e. where :math:` b-f > 0 \Rightarrow b> f` . + + :param b0: list - Empy list to hold the limit during entire loop + + .. note:: + :math:`b > f \Longrightarrow` Curve b (basement) is above the fitting + curve :math:`f` . :math:`b < f` otherwise. The pseudoarea is the area + where :math:` b > f` . + + :return: list - integration bounds + + """ + + s = ix_arr.min() - 1 # 0 -1 =-1 + oc = ix_arr.min() + for jj, v in enumerate(ix_arr): + s = v - s + if s !=1: + b0.append(oc); b0.append(ix_arr[jj-1]) + oc= v + s= v + if v ==ix_arr[-1]: + b0.append(oc); b0.append(v) + + return b0 + + +def find_bound_for_integration( + ix_arr: Array[DType[int]], + b0: List[T] =[] +)-> List[T]: + r""" Recursive function. Use the roots between f curve and basement + curves to detect the integration bounds. The function use entirely + numpy for seaching integration bound. Since it is much faster than + `find_limit_for_integration` although both did the same tasks. + + :param ix_arr: array-like - Indexes array from masked array where + the value are true i.e. where :math:`b-f > 0 \Rightarrow b > f` . + + :param b0: list - Empy list to hold the limit during entire loop + + :return: list - integration bounds + + .. note:: + :math:`b > f \Longrightarrow` Curve b (basement) is above the fitting curve + :math:`f` . :math:`b < f` otherwise. The pseudoarea is the area where + :math:`b > f` . + + """ + + # get the first index and arange this thin the end + psdiff = np.arange(ix_arr.min(), len(ix_arr) + ix_arr.min(), 1) + # make the difference to find the zeros values + diff = ix_arr - psdiff + index, = np.where(diff ==0) ; + # take the min index and max index + b0.append(min(ix_arr[index])) + b0.append(max(ix_arr[index])) + #now take the max index and add +1 and start by this part + # retreived the values + array_init = ix_arr[int(max(index)) +1:] + return b0 if len( + array_init)==0 else find_bound_for_integration(array_init, b0) + + +def fitfunc( + x: Array[T], + y: Array[T], + deg: float | int =None, + sample: int =1000 +)-> Tuple[F, Array[T]]: + """ Create polyfit function from a specifc sample data points. + + :param x: array-like of x-axis. + + :param y: array-like of y-axis. + + :param deg: polynomial degree. If ``None`` should compute using the + length of extrema (local + global). + + :param sample: int - Number of data points should use for fitting + function. Default is ``1000``. + + :returns: + - Polynomial function `f` + - new axis `x_new` generated from the samples. + - projected sample values got from `f`. + """ + + # generate a sample of values to cover the fit function + # thus compute ynew (yn) from the poly function f + minl, = argrelextrema(y, np.less) + maxl, = argrelextrema(y,np.greater) + # get the number of degrees + degree = len(minl) + len(maxl) + + coeff = np.polyfit(x, y, deg if deg is not None else degree + 1 ) + f = np.poly1d(coeff) + xn = np.linspace(min(x), max(x), sample) + yp = f(xn) + + return f, xn, yp + +def vesDataOperator( + AB : Array = None, + rhoa: Array= None , + data: DataFrame =None, + typeofop: str = None, + outdf: bool = False, +)-> Tuple[Array] | DataFrame : + """ Check the data in the given deep measurement and set the suitable + operations for duplicated spacing distance of current electrodes `AB`. + + Sometimes at the potential electrodes (`MN`), the measurement of `AB` are + collected twice after modifying the distance of `MN` a bit. At this point, + two or many resistivity values are targetted to the same distance `AB` + (`AB` still remains unchangeable while while `MN` is changed). So the + operation consists whether to average (``mean``) the resistiviy values or + to take the ``median`` values or to ``leaveOneOut`` (i.e. keep one value + of resistivity among the different values collected at the same point`AB`) + at the same spacing `AB`. Note that for the `LeaveOneOut``, the selected + resistivity value is randomly chosen. + + :param AB: array-like - Spacing of the current electrodes when exploring + in deeper. Units are in meters. + + :param rhoa: array-like - Apparent resistivity values collected in imaging + in depth. Units are in :math:`\Omega {.m}` not :math:`log10(\Omega {.m})` + + :param data: DataFrame - It is composed of spacing values `AB` and the + apparent resistivity values `rhoa`. If `data` is given, params `AB` and + `rhoa` should be kept to ``None``. + + :param typeofop: str - Type of operation to apply to the resistivity + values `rhoa` of the duplicated spacing points `AB`. The default + operation is ``mean``. + + :param outdf: bool - Outpout a new dataframe composed of `AB` and `rhoa` + data renewed. + + :returns: + - Tuple of (AB, rhoa): New values computed from `typeofop` + - DataFrame: New dataframe outputed only if ``outdf`` is ``True``. + + :note: + By convention `AB` and `MN` are half-space dipole length which + correspond to `AB/2` and `MN/2` respectively. + + :Example: + + >>> from kalfeat.tools.exmath import vesDataOperator + >>> from kalfeat.tools.coreutils import vesSelector + >>> data = vesSelector (f= 'data/ves/ves_gbalo.xlsx') + >>> len(data) + ... (32, 3) # include the potentiel electrode values `MN` + >>> df= vesDataOperator(data.AB, data.resistivity, + typeofop='leaveOneOut', outdf =True) + >>> df.shape + ... (26, 2) # exclude `MN` values and reduce(-6) the duplicated values. + """ + op = copy.deepcopy(typeofop) + typeofop= str(typeofop).lower() + if typeofop not in ('none', 'mean', 'median', 'leaveoneout'): + raise ValueError( + f'Unacceptable argument {op!r}. Use one of the following ' + f'argument {smart_format([None,"mean", "median", "leaveOneOut"])}' + ' instead.') + + typeofop ='mean' if typeofop =='none' else typeofop + + if data is not None: + data = _assert_all_types(data, pd.DataFrame) + rhoa = np.array(data.resistivity ) + AB= np.array(data.AB) + + AB= np.array( _assert_all_types( + AB, np.ndarray, list, tuple, pd.Series)) + rhoa = np.array( _assert_all_types( + rhoa, np.ndarray, list, tuple, pd.Series)) + + if len(AB)!= len(rhoa): + raise Wex.VESError( + 'Deep measurement `AB` must have the same size with ' + ' the collected apparent resistivity `rhoa`.' + f' {len(AB)} and {len(rhoa)} were given.') + + #----> When exploring in deeper, after changing the distance + # of MN , measure are repeated at the same points. So, we will + # selected these points and take the mean values of tyhe resistivity + + # make copies + AB_ = AB.copy() ; rhoa_= rhoa.copy() + # find the duplicated values + # with np.errstate(all='ignore'): + mask = np.zeros_like (AB_, dtype =bool) + mask[np.unique(AB_, return_index =True)[1]]=True + dup_values = AB_[~mask] + + indexes, = np.where(AB_==dup_values) + #make a copy of unique values and filled the duplicated + # values by their corresponding mean resistivity values + X, rindex = np.unique (AB_, return_index=True); Y = rhoa_[rindex] + d0= np.zeros_like(dup_values) + for ii, d in enumerate(dup_values): + index, = np.where (AB_==d) + if typeofop =='mean': + d0[ii] = rhoa_[index].mean() + elif typeofop =='median': + d0[ii] = np.median(rhoa_[index]) + elif typeofop =='leaveoneout': + d0[ii] = np.random.permutation(rhoa_[index])[0] + + maskr = np.isin(X, dup_values, assume_unique=True) + Y[maskr] = d0 + + return (X, Y) if not outdf else pd.DataFrame ( + {'AB': X,'resistivity':Y}, index =range(len(X))) + +# XXXTODO +def invertVES (data: DataFrame[DType[float|int]] = None, + rho0: float = None , + h0 : float = None, + typeof : str = 'HMCMC', + + **kwd)->Tuple [Array]: + """ Invert the |VES| data collected in the exporation area. + + :param data: Dataframe pandas - contains the depth measurement AB from + current electrodes, the potentials electrodes MN and the collected + apparents resistivities. + + :param rho0: float - Value of the starting resistivity model. If ``None``, + `rho0` should be the half minumm value of the apparent resistivity + collected. Units is in Ω.m not log10(Ω.m) + + :param h0: float - Thickness in meter of the first layers in meters. + If ``None``, it should be the minimum thickess as possible ``1.`` m. + + :param typeof: str - Type of inversion scheme. The defaut is Hybrid Monte + Carlo (HMC) known as ``HMCMC`` . Another scheme is Bayesian neural network + approach (``BNN``). + + :param kws: dict - Additionnal keywords arguments from |VES| data operations. + See :func:`kalfeat.utils.exmath.vesDataOperator` for futher details. + + """ + + X, Y = vesDataOperator(data =data, **kwd) + + pass + + +def ohmicArea( + data: DataFrame[DType[float|int]] = None, + ohmSkey: float = 45., + sum : bool = False, + objective: str = 'ohmS', + **kws +) -> float: + r""" + Compute the ohmic-area from the |VES| data collected in exploration area. + + Parameters + ----------- + * data: Dataframe pandas - contains the depth measurement AB from current + electrodes, the potentials electrodes MN and the collected apparents + resistivities. + + * ohmSkey: float - The depth in meters from which one expects to find a + fracture zone outside of pollutions. Indeed, the `ohmSkey` parameter is + used to speculate about the expected groundwater in the fractured rocks + under the average level of water inrush in a specific area. For instance + in `Bagoue region`_ , the average depth of water inrush + is around ``45m``. So the `ohmSkey` can be specified via the water inrush + average value. + + * objective: str - Type operation to outputs. By default, the function + outputs the value of pseudo-area in :math:`$ \Omega .m^2 $`. However, for + plotting purpose by setting the argument to ``view``, its gives an + alternatively outputs of X and Y, recomputed and projected as weel as + the X and Y values of the expected fractured zone. Where X is the AB dipole + spacing when imaging to the depth and Y is the apparent resistivity computed + + kws: dict - Additionnal keywords arguments from |VES| data operations. + See :func:`kalfeat.tools.exmath.vesDataOperator` for futher details. + + Returns + -------- + List of twice tuples: + + - Tuple(ohmS, error, roots): + - `ohmS`is the pseudo-area computed expected to be a fractured zone + - `error` is the integration error + - `roots` is the integration boundaries of the expected fractured + zone where the basement rocks is located above the resistivity + transform function. At these points both curves values equal + to null. + - Tuple `(XY, fit XY,XYohmSarea)`: + - `XY` is the ndarray(nvalues, 2) of the operated of `AB` dipole + spacing and resistivity `rhoa` values. + - `fit XY` is the fitting ndarray(nvalues, 2) uses to redraw the + dummy resistivity transform function. + - `XYohmSarea` is `ndarray(nvalues, 2)` of the dipole spacing and + resistiviy values of the expected fracture zone. + + Raises + ------- + VESError + If the `ohmSkey` is greater or equal to the maximum investigation + depth in meters. + + Examples + --------- + >>> from kalfeat.tools.exmath import ohmicArea + >>> from kalfeat.tools.coreutils import vesSelector + >>> data = vesSelector (f= 'data/ves/ves_gbalo.xlsx') + >>> (ohmS, err, roots), *_ = ohmicArea(data = data, ohmSkey =45, sum =True ) + ... (13.46012197818152, array([5.8131967e-12]), array([45. , 98.07307307])) + # pseudo-area is computed between the spacing point AB =[45, 98] depth. + >>> _, (XY.shape, XYfit.shape, XYohms_area.shape) = ohmicArea( + AB= data.AB, rhoa =data.resistivity, ohmSkey =45, + objective ='plot') + ... ((26, 2), (1000, 2), (8, 2)) + + + See also + --------- + + The `ohmS` value calculated from `pseudo-area` is a fully data-driven + parameter and is used to evaluate a pseudo-area of the fracture zone + from the depth where the basement rock is supposed to start. Usually, + when exploring deeper using the |VES|, we are looking for groundwater + in thefractured rock that is outside the anthropic pollution (Biemi, 1992). + Since the VES is an indirect method, we cannot ascertain whether the + presumed fractured rock contains water inside. However, we assume that + the fracture zone could exist and should contain groundwater. Mathematically, + based on the VES1D model proposed by `Koefoed, O. (1976)`_ , we consider + a function :math:`$ \rho_T(l)$`, a set of reducing resistivity transform + function to lower the boundary plane at half the current electrode + spacing :math:`$(l)$`. From the sounding curve :math:`$\rho_T(l)$`, + curve an imaginary basement rock :math:`$b_r (l)$` of slope equal to ``45°`` + with the horizontal :math:`$h(l)$` was created. A pseudo-area :math:`$S(l)$` + should be defined by extending from :math:`$h(l)$` the :math:`$b_r (l)$` + curve when the sounding curve :math:`$\rho_T(l)$` is below :math:`$b_r(l)$`, + otherwise :math:`$S(l)$` is equal to null. The computed area is called the + ohmic-area :math:`$ohmS$` expressed in :math:`$\Omega .m^2$` and constitutes + the expected *fractured zone*. Thus :math:`$ohmS$` ≠ :math:`0` confirms the + existence of the fracture zone while of :math:`$Ohms=0$` raises doubts. + The equation to determine the parameter is given as: + + .. math:: + + ohmS & = &\int_{ l_i}^{l_{i+1}} S(l)dl \quad {s.t.} + + S(l) & = & b_r (l) - \rho_T (l) \quad \text{if} \quad b_r (l) > \rho_T (l) \\ + & = & 0. \quad \text{if} \quad b_r (l) \leq \rho_T (l) + + b_r(l) & = & l + h(l) \quad ; \quad h(l) = \beta + + \rho_T(l) & = & l^2 \int_{0}^{\infty} T_i( \lambda ) h_1( \lambda l) \lambda d\lambda + + where :math:`l_i \quad \text{and} \quad l_{i+1}` solve the equation + :math:`S(l=0)`; :math:`$l$` is half the current electrode spacing :math:`$AB/2$`, + and :math:`$h_1$` denotes the first-order of the Bessel function of the first + kind, :math:`$ \beta $` is the coordinate value on y-axis direction of the + intercept term of the :math:`$b_r(l)$` and :math:`$h(l)$`, :math:`$T_i(\lambda )$` + resistivity transform function, :math:`$lamda$` denotes the integral variable, + where n denotes the number of layers, :math:`$rho_i$` and :math:`$h_i$` are + the resistivity and thickness of the :math:`$i-th$` layer, respectively. + Get more explanations and cleareance of formula in the paper of + `Kouadio et al 2022`_. + + References + ---------- + *Kouadio, K.L., Nicolas, K.L., Binbin, M., Déguine, G.S.P. & Serge*, + *K.K. (2021, October)* Bagoue dataset-Cote d’Ivoire: Electrical profiling, + electrical sounding and boreholes data, Zenodo. doi:10.5281/zenodo.5560937 + + *Koefoed, O. (1970)*. A fast method for determining the layer distribution + from the raised kernel function in geoelectrical sounding. Geophysical + Prospecting, 18(4), 564–570. https://doi.org/10.1111/j.1365-2478.1970.tb02129.x + + *Koefoed, O. (1976)*. Progress in the Direct Interpretation of Resistivity + Soundings: an Algorithm. Geophysical Prospecting, 24(2), 233–240. + https://doi.org/10.1111/j.1365-2478.1976.tb00921.x + + *Biemi, J. (1992)*. Contribution à l’étude géologique, hydrogéologique et par télédétection + de bassins versants subsaheliens du socle précambrien d’Afrique de l’Ouest: + hydrostructurale hydrodynamique, hydrochimie et isotopie des aquifères discontinus + de sillons et aires gran. In Thèse de Doctorat (IOS journa, p. 493). Abidjan, Cote d'Ivoire + + .. _Kouadio et al 2022: https://doi.org/10.1029/2021WR031623 or refer to + the paper `FlowRatePredictionWithSVMs `_ + .. _Koefoed, O. (1970): https://doi.org/10.1111/j.1365-2478.1970.tb02129.x + .. _Koefoed, O. (1976): https://doi.org/10.1111/j.1365-2478.1976.tb00921.x + .. _Bagoue region: https://en.wikipedia.org/wiki/Bagou%C3%A9 + .. |VES| replace: Vertical Electrical Sounding + + """ + + objkeys = ( 'ohms','none','eval', 'area', 'ohmic','true', + 'plot', 'mpl', 'false', 'graph','visual', 'view') + + objr = copy.deepcopy(objective) + objective = str(objective).lower() + compout, viewout = np.split(np.array(objkeys), 2) + for oo, pp in zip(compout, viewout): + if objective.find(oo)>=0 : + objective ='ohms'; break + elif objective.find(pp)>=0: + objective ='graph'; break + + if objective not in list(objkeys)+ ['full', 'coverall']: + raise ValueError(f"Unacceptable argument {str(objr)!r}. Objective" + " argument can only be 'ohmS' for pseudo-area" + " evaluation or 'graph' for visualization outputs." + ) + + bound0=[] + X, Y = vesDataOperator(data =data, **kws) + + try : + ohmSkey = str(ohmSkey).lower().replace('m', '') + if ohmSkey.find('none')>=0 : + ohmSkey = X.max()/2 + ohmSkey = float(ohmSkey) + except: + raise ValueError (f'Could not convert value {ohmSkey!r} to float') + + if ohmSkey >= X.max(): + raise Wex.VESError(f"The startpoint 'ohmSkey={ohmSkey}m'is expected " + f"to be less than the 'maxdepth={X.max()}m'.") + + #-------> construct the fitting curves for 1000 points + # create the polyfit function fitting raw(f) from coefficents + # (coefs) of the initial function + f_rhotl, x_new, y_projected = fitfunc (X, Y) + + # Finding the intercepts between the fitting curve and the dummy + # basement curves + #--> e. g. start from 20m (oix) --> ... searching and find the index + oIx = np.argmin (np.abs(X - ohmSkey)) + # from this index (oIx) , read the remain depth. + oB = X[int(oIx):] # from O-> end [OB] + #--< construct the basement curve from the index of ohmSkey + f_brl, beta = dummy_basement_curve( f_rhotl, ohmSkey) + # 1000 points from OB (xx) + xx = np.linspace(oB.min(), oB.max(), 1000) + b45_projected= f_brl(xx) + + # create a fit function for b45 and find the limits + # find the intersection between the b45_projected values and + # fpartial projected values are the solution of equations f45 -fpartials + diff_arr = b45_projected - f_rhotl(xx) #ypartial_projected + + # # if f-y < 0 => f< y so fitting curve is under the basement curve + # # we keep the limit indexes for integral computation + # # we want to keep the + array_masked = np.ma.masked_where (diff_arr < 0 , diff_arr , copy =True) + # get indexes of valid values + indexes, = array_masked.nonzero() + + try : + ib_indexes = find_bound_for_integration(indexes, b0=bound0) + except : + bound0=[] #initialize the bounds lists + ib_indexes =find_limit_for_integration(indexes, b0= bound0) + + roots = xx[ib_indexes] + f45, *_ = fitfunc(oB, Y[oIx:]) + ff = f45 - f_rhotl + pairwise_r = np.split(roots, len(roots)//2 ) if len( + roots) > 2 else [np.array(roots)] + ohmS = np.zeros((len(pairwise_r,))) + err_ohmS = np.zeros((len(pairwise_r,))) + for ii, (inf, sup) in enumerate(pairwise_r): + values, err = integrate.quad(ff, a = inf, b = sup) + ohmS[ii] = np.zeros((1,)) if values < 0 else values + err_ohmS[ii] = err + + + if sum: + ohmS = ohmS.sum() + + rv =[ + (ohmS, err_ohmS, roots), + ( np.hstack((X[:, np.newaxis], Y[:, np.newaxis]) ), + np.hstack((x_new[:, np.newaxis], y_projected[:, np.newaxis])), + np.hstack((oB[:, np.newaxis], f_brl(oB)[:, np.newaxis]) ) + ) + ] + + for ii, ( obj , ix) in enumerate( zip(('ohms', 'graph'), [1, -1])): + if objective ==obj : + rv[ii + ix ]= (None, None, None) + break + + return rv + + +def _type_mechanism ( + cz: Array |List[float], + dipolelength : float =10. +) -> Tuple[str, float]: + """ Using the type mechanism helps to not repeat several time the same + process during the `type` definition. + + :param cz: array-like - conductive zone; is a subset of the whole |ERP| + survey line. + + .. note:: + Here, the position absolutely refer to the global minimum + resistivity value. + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.exmath import _type_mechanism + >>> rang = random.RandomState(42) + >>> test_array2 = rang.randn (7) + >>> _type_mechanism(np.abs(test_array2)) + ... ('yes', 60.0) + + """ + s_index = np.argmin(cz) + lc , rc = cz[:s_index +1] , cz[s_index :] + lm , rm = lc.max() , rc.max() + # get the index of different values + ixl, = np.where (lc ==lm) ; ixr, = np.where (rc ==rm) + # take the far away value if the index is more than one + ixl = ixl[0] if len(ixl) > 1 else ixl + ixr =ixr [-1] + s_index if len(ixr) > 1 else ixr + s_index + + wcz = dipolelength * abs (int(ixl) - int(ixr)) + status = 'yes' if wcz > 4 * dipolelength else 'no' + + return status, wcz + +def type_ (erp: Array[DType[float]] ) -> str: + """ Compute the type of anomaly. + + The type parameter is defined by the African Hydraulic Study + Committee report (CIEH, 2001). Later it was implemented by authors such as + (Adam et al., 2020; Michel et al., 2013; Nikiema, 2012). `Type` comes to + help the differenciation of two or several anomalies with the same `shape`. + For instance, two anomalies with the same shape ``W`` will differ + from the order of priority of their types. The `type` depends on the lateral + resistivity distribution of underground (resulting from the pace of the + apparent resistivity curve) along with the whole |ERP| survey line. Indeed, + four types of anomalies were emphasized: + + **"EC"**, **"CB2P"**, **"NC"** and **"CP"**. + + For more details refers to references. + + :param erp: array-like - Array of |ERP| line composed of apparent + resistivity values. + + :return: str -The `type` of anomaly. + + :Example: + + >>> import numpy as np + >>> from kalfeat.tools.exmath import type_ + >>> rang = random.RandomState(42) + >>> test_array2 = rang.randn (7) + >>> type_(np.abs(test_array2)) + ... 'EC' + >>> long_array = np.abs (rang.randn(71)) + >>> type(long_array) + ... 'PC' + + + References + ----------- + + *Adam, B. M., Abubakar, A. H., Dalibi, J. H., Khalil Mustapha,M., & Abubakar,* + *A. H. (2020)*. Assessment of Gaseous Emissions and Socio-Economic Impacts + From Diesel Generators used in GSM BTS in Kano Metropolis. African Journal + of Earth and Environmental Sciences, 2(1),517–523. https://doi.org/10.11113/ajees.v3.n1.104 + + *CIEH. (2001)*. L’utilisation des méthodes géophysiques pour la recherche + d’eaux dans les aquifères discontinus. Série Hydrogéologie, 169. + + *Michel, K. A., Drissa, C., Blaise, K. Y., & Jean, B. (2013)*. Application + de méthodes géophysiques à l ’ étude de la productivité des forages + d ’eau en milieu cristallin : cas de la région de Toumodi + ( Centre de la Côte d ’Ivoire). International Journal of Innovation + and Applied Studies, 2(3), 324–334. + + *Nikiema, D. G. C. (2012)*. Essai d‘optimisation de l’implantation géophysique + des forages en zone de socle : Cas de la province de Séno, Nord Est + du Burkina Faso (IRD). (I. / I. Ile-de-France, Ed.). IST / IRD + Ile-de-France, Ouagadougou, Burkina Faso, West-africa. Retrieved + from http://documentation.2ie-edu.org/cdi2ie/opac_css/doc_num.php?explnum_id=148 + + .. |ERP| replace:: Electrical Resistivity Profiling + + """ + # split array + type_ ='PC' # initialize type + + erp = _assert_all_types(erp, tuple, list, np.ndarray, pd.Series) + erp = np.array (erp) + + try : + ssets = np.split(erp, len(erp)//7) + except ValueError: + # get_indices + if len(erp) < 7: ssets =[erp ] + else : + remains = len(erp) % 7 + indices = np.arange(7 , len(erp) - remains , 7) + ssets = np.split(erp , indices ) + + status =list() + for czx in ssets : + sta , _ = _type_mechanism(czx) + status.append(sta) + + if len(set (status)) ==1: + if status [0] =='yes': + type_= 'EC' + elif status [0] =='no': + type_ ='NC' + elif len(set(status)) ==2: + yes_ix , = np.where (np.array(status) =='yes') + # take the remain index + no_ix = np.array (status)[len(yes_ix):] + + # check whether all indexes are sorted + sort_ix_yes = all(yes_ix[i] < yes_ix[i+1] + for i in range(len(yes_ix) - 1)) + sort_ix_no = all(no_ix[i] < no_ix[i+1] + for i in range(len(no_ix) - 1)) + + # check whether their difference is 1 even sorted + if sort_ix_no == sort_ix_yes == True: + yes = set ([abs(yes_ix[i] -yes_ix[i+1]) + for i in range(len(yes_ix)-1)]) + no = set ([abs(no_ix[i] -no_ix[i+1]) + for i in range(len(no_ix)-1)]) + if yes == no == {1}: + type_= 'CB2P' + + return type_ + +def shape ( + cz : Array | List [float], + s : Optional [str, int] = ..., + p: SP = ..., +) -> str: + """ Compute the shape of anomaly. + + The `shape` parameter is mostly used in the basement medium to depict the + better conductive zone for the drilling location. According to Sombo et + al. (2011; 2012), various shapes of anomalies can be described such as: + + **"V"**, **"U"**, **"W"**, **"M"**, **"K"**, **"C"**, and **"H"** + + The `shape` consists to feed the algorithm with the |ERP| resistivity + values by specifying the station :math:`$(S_{VES})$`. Indeed, + mostly, :math:`$S_{VES}$` is the station with a very low resistivity value + expected to be the drilling location. + + :param cz: array-like - Conductive zone resistivity values + :param s: int, str - Station position index or name. + :param p: Array-like - Should be the position of the conductive zone. + + .. note:: + If `s` is given, `p` should be provided. If `p` is missing an + error will raises. + + :return: str - the shape of anomaly. + + :Example: + >>> import numpy as np + >>> rang = random.RandomState(42) + >>> from kalfeat.tools.exmath import shape_ + >>> test_array1 = np.arange(10) + >>> shape_ (test_array1) + ... 'C' + >>> test_array2 = rang.randn (7) + >>> _shape(test_array2) + ... 'K' + >>> test_array3 = np.power(10, test_array2 , dtype =np.float32) + >>> _shape (test_array3) + ... 'K' # does not change whatever the resistivity values. + + References + ---------- + + *Sombo, P. A., Williams, F., Loukou, K. N., & Kouassi, E. G. (2011)*. + Contribution de la Prospection Électrique à L’identification et à la + Caractérisation des Aquifères de Socle du Département de Sikensi + (Sud de la Côte d’Ivoire). European Journal of Scientific Research, + 64(2), 206–219. + + *Sombo, P. A. (2012)*. Application des methodes de resistivites electriques + dans la determination et la caracterisation des aquiferes de socle + en Cote d’Ivoire. Cas des departements de Sikensi et de Tiassale + (Sud de la Cote d’Ivoire). Universite Felix Houphouet Boigny. + + + """ + shape = 'V' # initialize the shape with the most common + + cz = _assert_all_types( cz , tuple, list, np.ndarray, pd.Series) + cz = np.array(cz) + # detect the staion position index + if s is (None or ... ): + s_index = np.argmin(cz) + elif s is not None: + if isinstance(s, str): + try: + s= int(s.lower().replace('s', '')) + except: + if p is ( None or ...): + raise Wex.StationError( + "Need the positions `p` of the conductive zone " + "to be supplied.'NoneType' is given.") + + s_index,*_ = detect_station_position(s,p) + else : s_index = s + else : + s_index= _assert_all_types(s, int) + + if s_index >= len(cz): + raise Wex.StationError( + f"Position should be less than '7': got '{s_index}'") + lbound , rbound = cz[:s_index +1] , cz[s_index :] + ls , rs = lbound[0] , rbound [-1] # left side and right side (s) + lminls, = argrelextrema(lbound, np.less) + lminrs, = argrelextrema(rbound, np.less) + lmaxls, = argrelextrema(lbound, np.greater) + lmaxrs, = argrelextrema(rbound, np.greater) + # median helps to keep the same shape whatever + # the resistivity values + med = np.median(cz) + + if (ls >= med and rs < med ) or (ls < med and rs >= med ): + if len(lminls) == 0 and len(lminrs) ==0 : + shape = 'C' + elif (len(lminls) ==0 and len(lminrs) !=0) or ( + len(lminls) !=0 and len(lminrs)==0) : + shape = 'K' + + elif (ls and rs) > med : + if len(lminls) ==0 and len(lminrs) ==0 : + shape = 'U' + elif (len(lminls) ==0 and len(lminrs) ==1 ) or ( + len(lminrs) ==0 and len(lminls) ==1): + shape = 'H' + elif len(lminls) >=1 and len(lminrs) >= 1 : + return 'W' + elif (ls < med ) and rs < med : + if (len(lmaxls) >=1 and len(lmaxrs) >= 0 ) or ( + len(lmaxls) <=0 and len(lmaxrs) >=1): + shape = 'M' + + return shape + +@refAppender(__doc__) +@docSanitizer() +def scalePosition( + ydata: Array | SP | Series | DataFrame , + xdata: Array| Series = None, + func : Optional [F] = None , + c_order: Optional[int|str] = 0, + show: bool =False, + **kws): + """ Correct data location or position and return new corrected location + + Parameters + ---------- + ydata: array_like, series or dataframe + The dependent data, a length M array - nominally ``f(xdata, ...)``. + + xdata: array_like or object + The independent variable where the data is measured. Should usually + be an M-length sequence or an (k,M)-shaped array for functions with + k predictors, but can actually be any object. If ``None``, `xdata` is + generated by default using the length of the given `ydata`. + + func: callable + The model function, ``f(x, ...)``. It must take the independent variable + as the first argument and the parameters to fit as separate remaining + arguments. The default `func` is ``linear`` function i.e for ``f(x)= ax +b``. + where `a` is slope and `b` is the intercept value. Setting your own + function for better fitting is recommended. + + c_order: int or str + The index or the column name if ``ydata`` is given as a dataframe to + select the right column for scaling. + show: bool + Quick visualization of data distribution. + + kws: dict + Additional keyword argument from `scipy.optimize_curvefit` parameters. + Refer to `scipy.optimize.curve_fit`_. + + Returns + -------- + - ydata - array -like - Data scaled + - popt - array-like Optimal values for the parameters so that the sum of + the squared residuals of ``f(xdata, *popt) - ydata`` is minimized. + - pcov - array like The estimated covariance of popt. The diagonals provide + the variance of the parameter estimate. To compute one standard deviation + errors on the parameters use ``perr = np.sqrt(np.diag(pcov))``. How the + sigma parameter affects the estimated covariance depends on absolute_sigma + argument, as described above. If the Jacobian matrix at the solution + doesn’t have a full rank, then ‘lm’ method returns a matrix filled with + np.inf, on the other hand 'trf' and 'dogbox' methods use Moore-Penrose + pseudoinverse to compute the covariance matrix. + + Examples + -------- + >>> from kalfeat.tools import erpSelector, scalePosition + >>> df = erpSelector('data/erp/l10_gbalo.xlsx') + >>> df.columns + ... Index(['station', 'resistivity', 'longitude', 'latitude', 'easting', + 'northing'], + dtype='object') + >>> # correcting northing coordinates from easting data + >>> northing_corrected, popt, pcov = scalePosition(ydata =df.northing , + xdata = df.easting, show=True) + >>> len(df.northing.values) , len(northing_corrected) + ... (20, 20) + >>> popt # by default popt =(slope:a ,intercept: b) + ... array([1.01151734e+00, 2.93731377e+05]) + >>> # corrected easting coordinates using the default x. + >>> easting_corrected, *_= scalePosition(ydata =df.easting , show=True) + >>> df.easting.values + ... array([790284, 790281, 790277, 790270, 790265, 790260, 790254, 790248, + ... 790243, 790237, 790231, 790224, 790218, 790211, 790206, 790200, + ... 790194, 790187, 790181, 790175], dtype=int64) + >>> easting_corrected + ... array([790288.18571705, 790282.30300999, 790276.42030293, 790270.53759587, + ... 790264.6548888 , 790258.77218174, 790252.88947468, 790247.00676762, + ... 790241.12406056, 790235.2413535 , 790229.35864644, 790223.47593938, + ... 790217.59323232, 790211.71052526, 790205.8278182 , 790199.94511114, + ... 790194.06240407, 790188.17969701, 790182.29698995, 790176.41428289]) + + """ + def linfunc (x, a, b): + """ Set the simple linear function""" + return a * x + b + + if str(func).lower() in ('none' , 'linear'): + func = linfunc + elif not hasattr(func, '__call__') or not inspect.isfunction (func): + raise TypeError( + f'`func` argument is a callable not {type(func).__name__!r}') + + ydata = _assert_all_types(ydata, list, tuple, np.ndarray, + pd.Series, pd.DataFrame ) + c_order = _assert_all_types(c_order, int, float, str) + try : c_order = int(c_order) + except: pass + + if isinstance(ydata, pd.DataFrame): + if c_order ==0: + warnings.warn("The first column of the data should be considered" + " as the `y` target.") + if c_order is None: + raise TypeError('Dataframe is given. The `c_order` argument should ' + 'be defined for column selection. Use column name' + ' instead') + if isinstance(c_order, str): + # check whether the value is on the column name + if c_order.lower() not in list(map( + lambda x :x.lower(), ydata.columns)): + raise ValueError ( + f'c_order {c_order!r} not found in {list(ydata.columns)}' + ' Use the index instead.') + # if c_order exists find the index and get the + # right column name + ix_c = list(map( lambda x :x.lower(), ydata.columns) + ).index(c_order.lower()) + ydata = ydata.iloc [:, ix_c] # series + elif isinstance (c_order, (int, float)): + c_order =int(c_order) + if c_order >= len(ydata.columns): + raise ValueError( + f"`c_order`'{c_order}' should be less than the number of " + f"given columns '{len(ydata.columns)}'. Use column name instead.") + ydata= ydata.iloc[:, c_order] + + ydata = np.array(ydata) + if xdata is None: + xdata = np.linspace(0, 4, len(ydata)) + if len(xdata) != len(ydata): + raise ValueError(" `x` and `y` arrays must have the same length." + "'{len(xdata)}' and '{len(ydata)}' are given.") + + popt, pcov = curve_fit(func, xdata, ydata, **kws) + ydata_new = func(xdata, *popt) + + if show: + plt.plot(xdata, ydata, 'b-', label='data') + plt.plot(xdata, func(xdata, *popt), 'r-', + label='fit: a=%5.3f, b=%5.3f' % tuple(popt)) + plt.xlabel('x') + plt.ylabel('y') + plt.legend() + plt.show() + + return ydata_new, popt, pcov + + +def __sves__ ( + s_index: int , + cz: Array | List[float], +) -> Tuple[Array, Array]: + """ Divide the conductive zone in leftzone and rightzone from + the drilling location index . + + :param s_index - station location index expected for dilling location. + It refers to the position of |VES|. + + :param cz: array-like - Conductive zone . + + :returns: + - <--Sves: Left side of conductive zone from |VES| location. + - --> Sves: Right side of conductive zone from |VES| location. + + .. note:: Both sides included the |VES| `Sves` position. + .. |VES| replace:: Vertical Electrical Sounding + """ + try: s_index = int(s_index) + except: raise TypeError( + f'Expected integer value not {type(s_index).__name__}') + + s_index = _assert_all_types( s_index , int) + cz = _assert_all_types(cz, np.ndarray, pd.Series, list, tuple ) + + rmax_ls , rmax_rs = max(cz[:s_index + 1]), max(cz[s_index :]) + # detect the value of rho max (rmax_...) + # from lower side bound of the anomaly. + rho_ls= rmax_ls if rmax_ls < rmax_rs else rmax_rs + + side =... + # find with positions + for _, sid in zip((rmax_ls , rmax_rs ) , ('leftside', 'rightside')) : + side = sid ; break + + return (rho_ls, side), (rmax_ls , rmax_rs ) + + +def detect_station_position ( + s : Union[str, int] , + p: SP, +) -> Tuple [int, float]: + """ Detect station position and return the index in positions + + :param s: str, int - Station location in the position array. It should + be the positionning of the drilling location. If the value given + is type string. It should be match the exact position to + locate the drilling. Otherwise, if the value given is in float or + integer type, it should be match the index of the position array. + + :param p: Array-like - Should be the conductive zone as array of + station location values. + + :returns: + - `s_index`- the position index location in the conductive zone. + - `s`- the station position in distance. + + :Example: + + >>> import numpy as np + >>> from kalfeat.utils.exmath import detect_station_position + >>> pos = np.arange(0 , 50 , 10 ) + >>> detect_station_position (s ='S30', p = pos) + ... (3, 30.0) + >>> detect_station_position (s ='40', p = pos) + ... (4, 40.0) + >>> detect_station_position (s =2, p = pos) + ... (2, 20) + >>> detect_station_position (s ='sta200', p = pos) + ... kalfeatError_station: Station sta200 \ + is out of the range; max position = 40 + """ + s = _assert_all_types( s, float, int, str) + p = _assert_all_types( p, tuple, list, np.ndarray, pd.Series) + + S=copy.deepcopy(s) + if isinstance(s, str): + s =s.lower().replace('s', '').replace('pk', '').replace('ta', '') + try : + s=int(s) + except : + raise ValueError (f'could not convert string to float: {S}') + + p = np.array(p, dtype = np.int32) + dl = (p.max() - p.min() ) / (len(p) -1) + if isinstance(s, (int, float)): + if s > len(p): # consider this as the dipole length position: + # now let check whether the given value is module of the station + if s % dl !=0 : + raise Wex.kalfeatError_station ( + f'Unable to detect the station position {S}') + elif s % dl == 0 and s <= p.max(): + # take the index + s_index = s//dl + return int(s_index), s_index * dl + else : + raise Wex.StationError ( + f'Station {S} is out of the range; max position = {max(p)}' + ) + else : + if s >= len(p): + raise Wex.kalfeatError_station ( + 'Location index must be less than the number of' + f' stations = {len(p)}. {s} is gotten.') + # consider it as integer index + # erase the last variable + # s_index = s + # s = S * dl # find + return s , p[s ] + + # check whether the s value is in the p + if True in np.isin (p, s): + s_index , = np.where (p ==s ) + s = p [s_index] + + return int(s_index) , s + +def sfi ( + cz: Sub[Array[T, DType[T]]] | List[float] , + p: Sub[SP[Array, DType [int]]] | List [int] = None, + s: Optional [str] =None, + dipolelength: Optional [float] = None, + plot: bool = False, + raw : bool = False, + **plotkws +) -> float: + r""" + Compute the pseudo-fracturing index known as *sfi*. + + The sfi parameter does not indicate the rock fracturing degree in + the underground but it is used to speculate about the apparent resistivity + dispersion ratio around the cumulated sum of the resistivity values of + the selected anomaly. It uses a similar approach of IF parameter proposed + by `Dieng et al`_ (2004). Furthermore, its threshold is set to + :math:`$sqrt{2}$` for symmetrical anomaly characterized by a perfect + distribution of resistivity in a homogenous medium. The formula is + given by: + + .. math:: + + sfi=\sqrt{(P_a^{*}/P_a )^2+(M_a^{*}/M_a )^2} + + where :math:`$P_a$` and :math:`$M_a$` are the anomaly power and the magnitude + respectively. :math:`$P_a^{*}$` is and :math:`$M_a^{*}$` are the projected + power and magnitude of the lower point of the selected anomaly. + + :param cz: array-like. Selected conductive zone + :param p: array-like. Station positions of the conductive zone. + :param dipolelength: float. If `p` is not given, it will be set + automatically using the default value to match the ``cz`` size. + The **default** value is ``10.``. + :param plot: bool. Visualize the fitting curve. *Default* is ``False``. + :param raw: bool. Overlaining the fitting curve with the raw curve from `cz`. + :param plotkws: dict. `Matplotlib plot`_ keyword arguments. + + + :Example: + + >>> from numpy as np + >>> from kalfeat.properties import P + >>> from kalfeat.tools.exmath import sfi + >>> rang = np.random.RandomState (42) + >>> condzone = np.abs(rang.randn (7)) + >>> # no visualization and default value `s` with gloabl minimal rho + >>> pfi = sfi (condzone) + ... 3.35110143 + >>> # visualize fitting curve + >>> plotkws = dict (rlabel = 'Conductive zone (cz)', + label = 'fitting model', + color=f'{P().frcolortags.get("fr3")}', + ) + >>> sfi (condzone, plot= True , s= 5, figsize =(7, 7), + **plotkws ) + ... Out[598]: (array([ 0., 10., 20., 30.]), 1) + + References + ---------- + - See `Numpy Polyfit `_ + - See `Stackoverflow `_ + the answer of AkaRem edited by Tobu and Migilson. + - See `Numpy Errorstate `_ and + how to implement the context manager. + + """ + + # Determine the number of curve inflection + # to find the number of degree to compose + # cz fonction + if p is None : + dipolelength = 10. if dipolelength is None else dipolelength + p = np.arange (0, len(cz) * dipolelength, dipolelength) + + if len(p) != len(cz): + raise Wex.StationError ( + 'Array of position and conductive zone must have the same length:' + f' `{len(p)}` and `{len(cz)}` were given.') + + minl, = argrelextrema(cz, np.less) + maxl, = argrelextrema(cz,np.greater) + ixf = len(minl) + len(maxl) + + # create the polyfit function f from coefficents (coefs) + coefs = np.polyfit(x=p, y=cz, deg =ixf + 1 ) + f = np.poly1d(coefs ) + # generate a sample of values to cover the fit function + # for degree 2: eq => f(x) =ax2 +bx + c or c + bx + ax2 as + # the coefs are aranged. + # coefs are ranged for index0 =c, index1 =b and index 2=a + # for instance for degree =2 + # model (f)= [coefs[2] + coefs[1] * x + coefs [0]* x**2 for x in xmod] + # where x_new(xn ) = 1000 points generated + # thus compute ynew (yn) from the poly function f + xn = np.linspace (min(p), max(p), 1000) + yn = f(xn) + + # solve the system to find the different root + # from the min resistivity value bound. + # -> Get from each anomaly bounds (leftside and right side ) + # the maximum resistivity and selected the minumum + # value to project to the other side in order to get + # its positions on the station location p. + if s is not None : + # explicity giving s + s_ix , spos = detect_station_position(s , p ) + (rho_side, side ), (rho_ls_max , rho_rs_max) = __sves__(s_ix , cz ) + + elif s is None: + # take the index of min value of cz + s_ix = np.argmin(cz) ; spos = p[s_ix] + (rho_side, side ), (rho_ls_max , rho_rs_max) = __sves__(s_ix , cz ) + + # find the roots from rhoa_side: + # f(x) =y => f (x) = rho_side + fn = f - rho_side + roots = np.abs(fn.r ) + # detect the rho_side positions + ppow = roots [np.where (roots > spos )] if side =='leftside' else roots[ + np.where (roots < spos)] + ppow = ppow [0] if len (ppow) > 1 else ppow + + # compute sfi + pw = power(p) + ma= magnitude(cz) + pw_star = np.abs (p.min() - ppow) + ma_star = np.abs(cz.min() - rho_side) + + with np.errstate(all='ignore'): + # $\sqrt2# is the threshold + sfi = np.sqrt ( (pw_star/pw)**2 + (ma_star / ma )**2 ) % np.sqrt(2) + if sfi == np.inf : + sfi = np.sqrt ( (pw/pw_star)**2 + (ma / ma_star )**2 ) % np.sqrt(2) + + if plot: + plot_(p,cz,'-ok', xn, yn, raw = raw , **plotkws) + + + return sfi + +@refAppender(__doc__) +def plot_ ( + *args : List [Union [str, Array, ...]], + figsize: Tuple[int] = None, + raw : bool = False, + style : str = 'seaborn', + dtype: str ='erp', + kind: Optional[str] = None , + **kws + ) -> None : + """ Quick visualization for fitting model, |ERP| and |VES| curves. + + :param x: array-like - array of data for x-axis representation + :param y: array-like - array of data for plot y-axis representation + :param figsize: tuple - Mtplotlib (MPL) figure size; should be a tuple + value of integers e.g. `figsize =(10, 5)`. + :param raw: bool- Originally the `plot_` function is intended for the + fitting |ERP| model i.e. the correct value of |ERP| data. However, + when the `raw` is set to ``True``, it plots the both curves: The + fitting model as well as the uncorrected model. So both curves are + overlaining or supperposed. + :param style: str - Pyplot style. Default is ``seaborn`` + :param dtype: str - Kind of data provided. Can be |ERP| data or |VES| data. + When the |ERP| data are provided, the common plot is sufficient to + visualize all the data insignt i.e. the default value of `kind` is kept + to ``None``. However, when the data collected is |VES| data, the + convenient plot for visualization is the ``loglog`` for parameter + `kind`` while the `dtype` can be set to `VES` to specify the labels + into the x-axis. The default value of `dtype` is ``erp`` for common + visualization. + :param kind: str - Use to specify the kind of data provided. See the + explanation of `dtype` parameters. By default `kind` is set to ``None`` + i.e. its keep the normal plots. It can be ``loglog``, ``semilogx`` and + ``semilogy``. + + :param kws: dict - Additional `Matplotlib plot`_ keyword arguments. To cus- + tomize the plot, one can provide a dictionnary of MPL keyword + additional arguments like the example below. + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.exmath import plot_ + >>> x, y = np.arange(0 , 60, 10) ,np.abs( np.random.randn (6)) + >>> KWS = dict (xlabel ='Stations positions', ylabel= 'resistivity(ohm.m)', + rlabel = 'raw cuve', rotate = 45 ) + >>> plot_(x, y, '-ok', raw = True , style = 'seaborn-whitegrid', + figsize = (7, 7) ,**KWS ) + + """ + plt.style.use(style) + # retrieve all the aggregated data from keywords arguments + if (rlabel := kws.get('rlabel')) is not None : + del kws['rlabel'] + if (xlabel := kws.get('xlabel')) is not None : + del kws['xlabel'] + if (ylabel := kws.get('ylabel')) is not None : + del kws['ylabel'] + if (rotate:= kws.get ('rotate')) is not None: + del kws ['rotate'] + + if (title:= kws.get ('title')) is not None: + del kws ['title'] + x , y, *args = args + fig = plt.figure(1, figsize =figsize) + plt.plot (x, y,*args, + **kws) + if raw: + kind = kind.lower( + ) if isinstance(kind, str) else kind + if kind =='semilogx': + plt.semilogx (x, y, + color = '{}'.format(P().frcolortags.get("fr1")), + label =rlabel, + ) + elif kind =='semilogy': + plt.semilogy (x, y, + color = '{}'.format(P().frcolortags.get("fr1")), + label =rlabel, + ) + elif kind =='loglog': + print('yes') + plt.loglog (x, y, + color = '{}'.format(P().frcolortags.get("fr1")), + label =rlabel, + ) + else: + plt.plot (x, y, + color = '{}'.format(P().frcolortags.get("fr1")), + label =rlabel, + ) + + dtype = dtype.lower() if isinstance(dtype, str) else dtype + + if dtype is None: + dtype ='erp' + if dtype not in ('erp', 'ves'): kind ='erp' + + if dtype =='erp': + plt.xticks (x, + labels = ['S{:02}'.format(int(i)) for i in x ], + rotation = 0. if rotate is None else rotate + ) + elif dtype =='ves': + plt.xticks (x, + rotation = 0. if rotate is None else rotate + ) + + plt.xlabel ('Stations') if xlabel is None else plt.xlabel (xlabel) + plt.ylabel ('Resistivity (Ω.m)' + ) if ylabel is None else plt.ylabel (ylabel) + + fig_title_kws = dict ( + t = 'Plot fit model' if dtype =='erp' else title, + style ='italic', + bbox =dict(boxstyle='round',facecolor ='lightgrey')) + + plt.tight_layout() + fig.suptitle(**fig_title_kws) + plt.legend () + plt.show () + + +def quickplot (arr: Array | List[float], dl:float =10)-> None: + """Quick plot to see the anomaly""" + + plt.plot(np.arange(0, len(arr) * dl, dl), arr , ls ='-', c='k') + plt.show() + + + +def magnitude (cz:Sub[Array[float, DType[float]]] ) -> float: + r""" + Compute the magnitude of selected conductive zone. + + The magnitude parameter is the absolute resistivity value between + the minimum :math:`\min \rho_a` and maximum :math:`\max \rho_a` + value of selected anomaly: + + .. math:: + + magnitude=|\min\rho_a -\max\rho_a| + + :param cz: array-like. Array of apparent resistivity values composing + the conductive zone. + + :return: Absolute value of anomaly magnitude in ohm.meters. + """ + return np.abs (cz.max()- cz.min()) + +def power (p:Sub[SP[Array, DType [int]]] | List[int] ) -> float : + """ + Compute the power of the selected conductive zone. Anomaly `power` + is closely referred to the width of the conductive zone. + + The power parameter implicitly defines the width of the conductive zone + and is evaluated from the difference between the abscissa + :math:`$X_{LB}$` and the end :math:`$X_{UB}$` points of + the selected anomaly: + + .. math:: + + power=|X_{LB} - X_{UB} | + + :param p: array-like. Station position of conductive zone. + + :return: Absolute value of the width of conductive zone in meters. + + """ + return np.abs(p.min()- p.max()) + + +def _find_cz_bound_indexes ( + erp: Union[Array[float, DType[float]], List[float], pd.Series], + cz: Union [Sub[Array], List[float]] +)-> Tuple[int, int]: + """ + Fetch the limits 'LB' and 'UB' of the selected conductive zone. + + Indeed the 'LB' and 'UB' fit the lower and upper boundaries of the + conductive zone respectively. + + :param erp: array-like. Apparent resistivities collected during the survey. + :param cz: array-like. Array of apparent resistivies composing the + conductive zone. + + :return: The index of boundaries 'LB' and 'UB'. + + .. note:: + + `cz` must be self-containing of `erp`. If ``False`` should raise and error. + + """ + # assert whether cz is a subset of erp. + if isinstance( erp, pd.Series): erp = erp.values + + if not np.isin(True, (np.isin (erp, cz))): + raise ValueError ('Expected the conductive zone array being a ' + 'subset of the resistivity array.') + # find the indexes using np.argwhere + cz_indexes = np.argwhere(np.isin(erp, cz)).ravel() + + return cz_indexes [0] , cz_indexes [-1] + + +def convert_distance_to_m( + value:T , + converter:float =1e3, + unit:str ='km' +)-> float: + """ Convert distance from `km` to `m` or vice versa even a string + value is given. + + :param value: value to convert. + :paramm converter: Equivalent if given in ``km`` rather than ``m``. + :param unit: unit to convert to. + + """ + + if isinstance(value, str): + try: + value = float(value.replace(unit, '') + )*converter if value.find( + 'km')>=0 else float(value.replace('m', '')) + except: + raise TypeError(f"Expected float not {type(value)!r}." + ) + + return value + + +def get_station_number ( + dipole:float, + distance:float , + from0:bool = False, + **kws +)-> float: + """ Get the station number from dipole length and + the distance to the station. + + :param distance: Is the distance from the first station to `s` in + meter (m). If value is given, please specify the dipole length in + the same unit as `distance`. + :param dipole: Is the distance of the dipole measurement. + By default the dipole length is in meter. + :param kws: :func:`convert_distance_to_m` additional arguments + + """ + + dipole=convert_distance_to_m(dipole, **kws) + distance =convert_distance_to_m(distance, **kws) + + return distance/dipole if from0 else distance/dipole + 1 + +@deprecated('Function is going to be removed for the next release ...') +def define_conductive_zone ( + erp: Array | List[float], + stn: Optional [int] = None, + sres:Optional [float] = None, + *, + distance:float | None = None , + dipole_length:float | None = None, + extent:int =7): + """ Detect the conductive zone from `s`ves point. + + :param erp: Resistivity values of electrical resistivity profiling(ERP). + + :param stn: Station number expected for VES and/or drilling location. + + :param sres: Resistivity value at station number `stn`. + If `sres` is given, the auto-search will be triggered to + find the station number that fits the resistivity value. + + :param distance: Distance from the first station to `stn`. If given, + be sure to provide the `dipole_length` + :param dipole_length: Length of the dipole. Comonly the distante between + two close stations. Since we use config AB/2 + :param extent: Is the width to depict the anomaly. If provide, need to be + consistent along all ERP line. Should keep unchanged for other + parameters definitions. Default is ``7``. + :returns: + - CZ:Conductive zone including the station position + - sres: Resistivity value of the station number + - ix_stn: Station position in the CZ + + .. note:: + If many stations got the same `sres` value, the first station + is flagged. This may not correspond to the station number that is + searching. Use `sres` only if you are sure that the + resistivity value is unique on the whole ERP. Otherwise it's + not recommended. + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.exmath import define_conductive_zone + >>> sample = np.random.randn(9) + >>> cz, stn_res = define_conductive_zone(sample, 4, extent = 7) + ... (array([ 0.32208638, 1.48349508, 0.6871188 , -0.96007639, + -1.08735204,0.79811492, -0.31216716]), + -0.9600763919368086, + 3) + """ + try : + iter(erp) + except : raise Wex.ArgumentError ( + f'`erp` must be a sequence of values not {type(erp)!r}') + finally: erp = np.array(erp) + + # check the distance + if stn is None: + if (dipole_length and distance) is not None: + stn = get_station_number(dipole_length, distance) + elif sres is not None: + snix, = np.where(erp==sres) + if len(snix)==0: + raise Wex.ParameterNumberError( + "Could not find the resistivity value of the VES " + "station. Please provide the right value instead.") + + elif len(snix)==2: + stn = int(snix[0]) + 1 + else : + raise Wex.ArgumentError ( + '`stn` is needed or at least provide the survey ' + 'dipole length and the distance from the first ' + 'station to the VES station. ') + + if erp.size < stn : + raise Wex.StationError( + f"Wrong station number =`{stn}`. Is larger than the " + f" number of ERP stations = `{erp.size}` ") + + # now defined the anomaly boundaries from sn + stn = 1 if stn == 0 else stn + stn -=1 # start counting from 0. + if extent %2 ==0: + if len(erp[:stn]) > len(erp[stn:])-1: + ub = erp[stn:][:extent//2 +1] + lb = erp[:stn][len(ub)-int(extent):] + elif len(erp[:stn]) < len(erp[stn:])-1: + lb = erp[:stn][stn-extent//2 +1:stn] + ub= erp[stn:][:int(extent)- len(lb)] + + else : + lb = erp[:stn][-extent//2:] + ub = erp[stn:][:int(extent//2)+ 1] + + # read this part if extent anomaly is not reached + if len(ub) +len(lb) < extent: + if len(erp[:stn]) > len(erp[stn:])-1: + add = abs(len(ub)-len(lb)) # remain value to add + lb = erp[:stn][-add -len(lb) - 1:] + elif len(erp[:stn]) < len(erp[stn:])-1: + add = abs(len(ub)-len(lb)) # remain value to add + ub = erp[stn:][:len(ub)+ add -1] + + conductive_zone = np.concatenate((lb, ub)) + # get the index of station number from the conductive zone. + ix_stn, = np.where (conductive_zone == conductive_zone[stn]) + ix_stn = int(ix_stn[0]) if len(ix_stn)> 1 else int(ix_stn) + + return conductive_zone, conductive_zone[stn], ix_stn + + +def shortPlot (sample, cz=None): + """ + Quick plot to visualize the `sample` line as well as the selected + conductive zone if given. + + :param sample: array_like, the electrical profiling array + :param cz: array_like, the selected conductive zone. If ``None``, `cz` + should be plotted. + + :Example: + >>> import numpy as np + >>> from kalfeat.tools.exmath import shortPlot, define_conductive_zone + >>> test_array = np.random.randn (10) + >>> selected_cz ,*_ = define_conductive_zone(test_array, 7) + >>> shortPlot(test_array, selected_cz ) + + """ + import matplotlib.pyplot as plt + fig, ax = plt.subplots(1,1, figsize =(10, 4)) + leg =[] + ax.scatter (np.arange(len(sample)), sample, marker ='.', c='b') + zl, = ax.plot(np.arange(len(sample)), sample, + c='r', + label ='Electrical resistivity profiling') + leg.append(zl) + if cz is not None: + # construct a mask array with np.isin to check whether + # `cz` is subset array + z = np.ma.masked_values (sample, np.isin(sample, cz )) + # a masked value is constructed so we need + # to get the attribute fill_value as a mask + # However, we need to use np.invert or tilde operator + # to specify that other value except the `CZ` values mus be + # masked. Note that the dtype must be changed to boolean + sample_masked = np.ma.array( + sample, mask = ~z.fill_value.astype('bool') ) + + czl, = ax.plot( + np.arange(len(sample)), sample_masked, + ls='-', + c='#0A4CEE', + lw =2, + label ='Conductive zone') + leg.append(czl) + + ax.set_xticks(range(len(sample))) + ax.set_xticklabels( + ['S{0:02}'.format(i+1) for i in range(len(sample))]) + + ax.set_xlabel('Stations') + ax.set_ylabel('app.resistivity (ohm.m)') + ax.legend( handles = leg, + loc ='best') + + plt.show() + + +def compute_anr ( + sfi: float , + rhoa_array: Array | List[float], + pos_bound_indexes: Array[DType[int]] | List[int] + )-> float: + r""" + Compute the select anomaly ratio (ANR) along with the whole profile from + SFI. The standardized resistivity values`rhoa` of is averaged from + :math:`X_{begin}` to :math:`X_{end}`. The ANR is a positive value and the + equation is given as: + + .. math:: + + ANR= sfi * \frac{1}{N} * | \sum_{i=1}^{N} \frac{ + \rho_{a_i} - \bar \rho_a}{\sigma_{\rho_a}} | + + + where :math:`$\sigma_{rho_a}$` and :math:`\bar \rho_a` are the standard + deviation and the mean of the resistivity values composing the selected + anomaly. + + :param sfi: + Is standard fracturation index. please refer to :func:`compute_sfi`. + + :param rhoa_array: Resistivity values of Electrical Resistivity Profiling + line + :type rhoa_array: array_like + + :param pos_bound_indexes: + Select anomaly station location boundaries indexes. Refer to + :func:`compute_power` of ``pos_bounds``. + + :return: Anomaly ratio + :rtype: float + + :Example: + + >>> from kalfeat.tools.exmath import compute_anr + >>> import pandas as pd + >>> anr = compute_anr(sfi=sfi, + ... rhoa_array=data = pd.read_excel( + ... 'data/l10_gbalo.xlsx').to_numpy()[:, -1], + ... pk_bound_indexes = [9, 13]) + >>> anr + """ + + stand = (rhoa_array - rhoa_array.mean())/np.std(rhoa_array) + try: + stand_rhoa =stand[int(min(pos_bound_indexes)): + int(max(pos_bound_indexes))+1] + except: + stand_rhoa = np.array([0]) + + return sfi * np.abs(stand_rhoa.mean()) + + +@deprecated('Deprecated function to `:func:`kalfeat.methods.erp.get_type`' + ' more efficient using median and index computation. It will ' + 'probably deprecate soon for neural network pattern recognition.') +def get_type ( + erp_array: Array | List [float], + posMinMax:Tuple[int] | List[int], + pk: float | int, + pos_array: SP[DType[float]], + dl: float + )-> str: + """ + Find anomaly type from app. resistivity values and positions locations + + :param erp_array: App.resistivty values of all `erp` lines + :type erp_array: array_like + + :param posMinMax: Selected anomaly positions from startpoint and endpoint + :type posMinMax: list or tuple or nd.array(1,2) + + :param pk: Position of selected anomaly in meters + :type pk: float or int + + :param pos_array: Stations locations or measurements positions + :type pos_array: array_like + + :param dl: + + Distance between two receiver electrodes measurement. The same + as dipole length in meters. + + :returns: + - ``EC`` for Extensive conductive. + - ``NC`` for narrow conductive. + - ``CP`` for conductive plane + - ``CB2P`` for contact between two planes. + + :Example: + + >>> from kalfeat.utils.exmath import get_type + >>> x = [60, 61, 62, 63, 68, 65, 80, 90, 100, 80, 100, 80] + >>> pos= np.arange(0, len(x)*10, 10) + >>> ano_type= get_type(erp_array= np.array(x), + ... posMinMax=(10,90), pk=50, pos_array=pos, dl=10) + >>> ano_type + ...CB2P + + """ + + # Get position index + anom_type ='CP' + index_pos = int(np.where(pos_array ==pk)[0]) + # if erp_array [:index_pos +1].mean() < np.median(erp_array) or\ + # erp_array[index_pos:].mean() < np.median(erp_array) : + # anom_type ='CB2P' + if erp_array [:index_pos+1].mean() < np.median(erp_array) and \ + erp_array[index_pos:].mean() < np.median(erp_array) : + anom_type ='CB2P' + + elif erp_array [:index_pos +1].mean() >= np.median(erp_array) and \ + erp_array[index_pos:].mean() >= np.median(erp_array) : + + if dl <= (max(posMinMax)- min(posMinMax)) <= 5* dl: + anom_type = 'NC' + + elif (max(posMinMax)- min(posMinMax))> 5 *dl: + anom_type = 'EC' + + return anom_type + +@deprecated('`Deprecated function. Replaced by :meth:~core.erp.get_shape` ' + 'more convenient to recognize anomaly shape using ``median line``' + 'rather than ``mean line`` below.') +def get_shape ( + rhoa_range: Array | List [float] + )-> str : + """ + Find anomaly `shape` from apparent resistivity values framed to + the best points using the mean line. + + :param rhoa_range: The apparent resistivity from selected anomaly bounds + :attr:`~core.erp.ERP.anom_boundaries` + :type rhoa_range: array_like or list + + :returns: + - V + - W + - K + - C + - M + - U + + :Example: + + >>> from kalfeat.tools.exmath import get_shape + >>> x = [60, 70, 65, 40, 30, 31, 34, 40, 38, 50, 61, 90] + >>> shape = get_shape (rhoa_range= np.array(x)) + ... U + + """ + minlocals = argrelextrema(rhoa_range, np.less) + shape ='V' + average_curve = rhoa_range.mean() + if len (minlocals[0]) >1 : + shape ='W' + average_curve = rhoa_range.mean() + minlocals_slices = rhoa_range[ + int(minlocals[0][0]):int(minlocals[0][-1])+1] + average_minlocals_slices = minlocals_slices .mean() + + if average_curve >= 1.2 * average_minlocals_slices: + shape = 'U' + if rhoa_range [-1] < average_curve and\ + rhoa_range [-1]> minlocals_slices[ + int(argrelextrema(minlocals_slices, np.greater)[0][0])]: + shape ='K' + elif rhoa_range [0] < average_curve and \ + rhoa_range [-1] < average_curve : + shape ='M' + elif len (minlocals[0]) ==1 : + if rhoa_range [0] < average_curve and \ + rhoa_range [-1] < average_curve : + shape ='M' + elif rhoa_range [-1] <= average_curve : + shape = 'C' + + return shape + +def compute_power ( + posMinMax:float =None, + pk_min: Optional[float]=None , + pk_max: Optional[float]=None, + )-> float: + """ + Compute the power Pa of anomaly. + + :param pk_min: + Min boundary value of anomaly. `pk_min` is min value (lower) + of measurement point. It's the position of the site in meter. + :type pk_min: float + + :param pk_max: + Max boundary of the select anomaly. `pk_max` is the maximum value + the measurement point in meter. It's the upper boundary position of + the anomaly in the site in m. + :type pk_max: float + + :return: The absolute value between the `pk_min` and `pk_max`. + :rtype: float + + :Example: + + >>> from kalfeat.tools.exmath import compute_power + >>> power= compute_power(80, 130) + + + """ + if posMinMax is not None: + pk_min = np.array(posMinMax).min() + pk_max= np.array(posMinMax).max() + + if posMinMax is None and (pk_min is None or pk_max is None) : + raise Wex.kalfeatError_parameter_number( + 'Could not compute the anomaly power. Provide at least' + 'the anomaly position boundaries or the left(`pk_min`) ' + 'and the right(`pk_max`) boundaries.') + + return np.abs(pk_max - pk_min) + +def compute_magnitude( + rhoa_max: float =None , + rhoa_min: Optional[float]=None, + rhoaMinMax:Optional [float] =None + )-> float: + """ + Compute the magnitude `Ma` of selected anomaly expressed in Ω.m. + ano + :param rhoa_min: resistivity value of selected anomaly + :type rhoa_min: float + + :param rhoa_max: Max boundary of the resistivity value of select anomaly. + :type rhoa_max: float + + :return: The absolute value between the `rhoa_min` and `rhoa_max`. + :rtype: float + + :Example: + + >>> from kalfeat.utils.exmath import compute_power + >>> power= compute_power(80, 130) + + """ + if rhoaMinMax is not None : + rhoa_min = np.array(rhoaMinMax).min() + rhoa_max= np.array(rhoaMinMax).max() + + if rhoaMinMax is None and (rhoa_min is None or rhoa_min is None) : + raise Wex.ParameterNumberError( + 'Could not compute the anomaly magnitude. Provide at least' + 'the anomaly resistivy value boundaries or the buttom(`rhoa_min`)' + 'and the top(`rhoa_max`) boundaries.') + + return np.abs(rhoa_max -rhoa_min) + diff --git a/kalfeat/tools/funcutils.py b/kalfeat/tools/funcutils.py new file mode 100644 index 0000000..b3954e0 --- /dev/null +++ b/kalfeat/tools/funcutils.py @@ -0,0 +1,586 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + +from __future__ import annotations + +import os +import sys +import inspect +import subprocess +import warnings + +import numpy as np +import pandas as pd + +from .._kalfeatlog import kalfeatlog +from ..typing import ( + Tuple, + Dict, + Any, + Array, + F, + T, + List , + DataFrame, + Sub, + ) + +_logger = kalfeatlog.get_kalfeat_logger(__name__) + +def smart_strobj_recognition( + name: str , + container: List | Tuple | Dict[Any, Any ], + stripitems: str | List | Tuple = '_', + deep: bool = False, +) -> str : + """ Find the likelihood word in the whole containers and + returns the value. + + :param name: str - Value of to search. I can not match the exact word in + the `container` + :param container: list, tuple, dict- container of the many string words. + :param stripitems: str - 'str' items values to sanitize the content + element of the dummy containers. if different items are provided, they + can be separated by ``:``, ``,`` and ``;``. The items separators + aforementioned can not be used as a component in the `name`. For + isntance:: + + name= 'dipole_'; stripitems='_' -> means remove the '_' + under the ``dipole_`` + name= '+dipole__'; stripitems ='+;__'-> means remove the '+' and + '__' under the value `name`. + + :param deep: bool - Kind of research. Go deeper by looping each items + for find the initials that can fit the name. Note that, if given, + the first occurence should be consider as the best name... + + :return: Likelihood object from `container` or Nonetype if none object is + detected. + + :Example: + >>> from kalfeat.tools.funcutils import smart_strobj_recognition + >>> from kalfeat.methods import ResistivityProfiling + >>> rObj = ResistivityProfiling(AB= 200, MN= 20,) + >>> smart_strobj_recognition ('dip', robj.__dict__)) + ... None + >>> smart_strobj_recognition ('dipole_', robj.__dict__)) + ... dipole + >>> smart_strobj_recognition ('dip', robj.__dict__,deep=True ) + ... dipole + >>> smart_strobj_recognition ( + '+_dipole___', robj.__dict__,deep=True , stripitems ='+;_') + ... 'dipole' + + """ + + stripitems =_assert_all_types(stripitems , str, list, tuple) + container = _assert_all_types(container, list, tuple, dict) + ix , rv = None , None + + if isinstance (stripitems , str): + for sep in (':', ",", ";"): # when strip ='a,b,c' seperated object + if sep in stripitems: + stripitems = stripitems.strip().split(sep) ; break + if isinstance(stripitems, str): + stripitems =[stripitems] + + # sanitize the name. + for s in stripitems : + name = name.strip(s) + + if isinstance(container, dict) : + #get only the key values and lower them + container_ = list(map (lambda x :x.lower(), container.keys())) + else : + # for consistency put on list if values are in tuple. + container_ = list(container) + + # sanitize our dummny container item ... + #container_ = [it.strip(s) for it in container_ for s in stripitems ] + if name.lower() in container_: + ix = container_.index (name) + + if deep and ix is None: + # go deeper in the search... + for ii, n in enumerate (container_) : + if n.find(name.lower())>=0 : + ix =ii ; break + + if ix is not None: + if isinstance(container, dict): + rv= list(container.keys())[ix] + else : rv= container[ix] + + return rv + +def repr_callable_obj(obj: F ): + """ Represent callable objects. + + Format class, function and instances objects. + + :param obj: class, func or instances + object to format. + :Raises: TypeError - If object is not a callable or instanciated. + + :Examples: + >>> from kalfeat.tools.funcutils import repr_callable_obj + >>> from kalfeat.methods.dc import ( + ElectricalMethods, ResistivityProfiling) + >>> callable_format(ElectricalMethods) + ... 'ElectricalMethods(AB= None, arrangement= schlumberger, + area= None, MN= None, projection= UTM, datum= WGS84, + epsg= None, utm_zone= None, fromlog10= False)' + >>> callable_format(ResistivityProfiling) + ... 'ResistivityProfiling(station= None, dipole= 10.0, + auto_station= False, kws= None)' + >>> robj= ResistivityProfiling (AB=200, MN=20, station ='S07') + >>> repr_callable_obj(robj) + ... 'ResistivityProfiling(AB= 200, MN= 20, arrangememt= schlumberger, + utm_zone= None, projection= UTM, datum= WGS84, epsg= None, + area= None, fromlog10= False, dipole= 10.0, station= S07)' + >>> repr_callable_obj(robj.fit) + ... 'fit(data= None, kws= None)' + """ + + # inspect.formatargspec(*inspect.getfullargspec(cls_or_func)) + if not hasattr (obj, '__call__') and not hasattr(obj, '__dict__'): + raise TypeError ( + f'Format only callabe objects: {type (obj).__name__!r}') + + if hasattr (obj, '__call__'): + cls_or_func_signature = inspect.signature(obj) + objname = obj.__name__ + PARAMS_VALUES = {k: None if v.default is (inspect.Parameter.empty + or ...) else v.default + for k, v in cls_or_func_signature.parameters.items() + # if v.default is not inspect.Parameter.empty + } + elif hasattr(obj, '__dict__'): + objname=obj.__class__.__name__ + PARAMS_VALUES = {k:v for k, v in obj.__dict__.items() + if not (k.endswith('_') or k.startswith('_'))} + + return str (objname) + '(' + str (PARAMS_VALUES).replace ( + '{', '').replace('}', '').replace( + ':', '=').replace("'", '') + ')' + +def accept_types (*objtypes: list , + format: bool = False + ) -> List[str] | str : + """ List the type format that can be accepted by a function. + + :param objtypes: List of object types. + + :param format: bool - format the list of the name of objects. + + :return: list of object type names or str of object names. + + :Example: + >>> import numpy as np; import pandas as pd + >>> from kalfeat.tools.funcutils import accept_types + >>> accept_types (pd.Series, pd.DataFrame, tuple, list, str) + ... "'Series','DataFrame','tuple','list' and 'str'" + >>> atypes= accept_types ( + pd.Series, pd.DataFrame,np.ndarray, format=True ) + ..."'Series','DataFrame' and 'ndarray'" + """ + return smart_format( + [f'{o.__name__}' for o in objtypes] + ) if format else [f'{o.__name__}' for o in objtypes] + +def read_from_excelsheets(erp_file: str = None ) -> List[DataFrame]: + + """ Read all Excelsheets and build a list of dataframe of all sheets. + + :param erp_file: + Excell workbooks containing `erp` profile data. + + :return: A list composed of the name of `erp_file` at index =0 and the + datataframes. + + """ + + allfls:Dict [str, Dict [T, List[T]] ] = pd.read_excel( + erp_file, sheet_name=None) + + list_of_df =[os.path.basename(os.path.splitext(erp_file)[0])] + for sheets , values in allfls.items(): + list_of_df.append(pd.DataFrame(values)) + + return list_of_df + +def check_dimensionality(obj, data, z, x): + """ Check dimensionality of data and fix it. + + :param obj: Object, can be a class logged or else. + + :param data: 2D grid data of ndarray (z, x) dimensions. + + :param z: array-like should be reduced along the row axis. + + :param x: arraylike should be reduced along the columns axis. + + """ + def reduce_shape(Xshape, x, axis_name=None): + """ Reduce shape to keep the same shape""" + mess ="`{0}` shape({1}) {2} than the data shape `{0}` = ({3})." + ox = len(x) + dsh = Xshape + if len(x) > Xshape : + x = x[: int (Xshape)] + obj._logging.debug(''.join([ + f"Resize {axis_name!r}={ox!r} to {Xshape!r}.", + mess.format(axis_name, len(x),'more',Xshape)])) + + elif len(x) < Xshape: + Xshape = len(x) + obj._logging.debug(''.join([ + f"Resize {axis_name!r}={dsh!r} to {Xshape!r}.", + mess.format(axis_name, len(x),'less', Xshape)])) + return int(Xshape), x + + sz0, z = reduce_shape(data.shape[0], + x=z, axis_name ='Z') + sx0, x =reduce_shape (data.shape[1], + x=x, axis_name ='X') + data = data [:sz0, :sx0] + + return data , z, x + +def is_installing (module, upgrade=True , DEVNULL=False, + action=True, verbose =0, **subpkws): + """ Install or uninstall a module using the subprocess under the hood. + + :param module: str, module name. + + :param upgrade:bool, install the lastest version. + + :param verbose:output a message. + + :param DEVNULL: decline the stdoutput the message in the console. + + :param action: str, install or uninstall a module. + + :param subpkws: additional subprocess keywords arguments. + + :Example: + >>> from pycsamt.tools.funcutils import is_installing + >>> is_installing( + 'tqdm', action ='install', DEVNULL=True, verbose =1) + >>> is_installing( + 'tqdm', action ='uninstall', verbose =1) + """ + #implement pip as subprocess + # refer to https://pythongeeks.org/subprocess-in-python/ + if not action: + if verbose > 0 : + print("---> No action `install`or `uninstall`" + f" of the module {module!r} performed.") + return action # DO NOTHING + + MOD_IMP=False + + action_msg ='uninstallation' if action =='uninstall' else 'installation' + + if action in ('install', 'uninstall', True) and verbose > 0: + print(f'---> Module {module!r} {action_msg} will take a while,' + ' please be patient...') + + cmdg =f' | '\ + if action in (True, 'install') else ''.join([ + f' or .']) + + upgrade ='--upgrade' if upgrade else '' + + if action == 'uninstall': + upgrade= '-y' # Don't ask for confirmation of uninstall deletions. + elif action in ('install', True): + action = 'install' + + cmd = ['-m', 'pip', f'{action}', f'{module}', f'{upgrade}'] + + try: + STDOUT = subprocess.DEVNULL if DEVNULL else None + STDERR= subprocess.STDOUT if DEVNULL else None + + subprocess.check_call( + [sys.executable] + cmd, stdout= STDOUT, stderr=STDERR, + **subpkws) + if action in (True, 'install'): + # freeze the dependancies + reqs = subprocess.check_output( + [sys.executable,'-m', 'pip','freeze']) + [r.decode().split('==')[0] for r in reqs.split()] + _logger.info( f"{action_msg.capitalize()} of `{module}` " + "and dependancies was successfully done!") + MOD_IMP=True + + except: + _logger.error(f"Failed to {action} the module =`{module}`.") + + if verbose > 0 : + print(f'---> Module {module!r} {action_msg} failed. Please use' + f' the following command: {cmdg} to manually do it.') + else : + if verbose > 0: + print(f"{action_msg.capitalize()} of `{module}` " + "and dependancies was successfully done!") + + return MOD_IMP + + +def smart_format(iter_obj): + """ Smart format iterable ob. + + :param iter_obj: iterable obj + + :Example: + >>> from kalfeat.tools.funcutils import smart_format + >>> smart_format(['model', 'iter', 'mesh', 'data']) + ... 'model','iter','mesh' and 'data' + """ + str_litteral ='' + try: + iter(iter_obj) + except: return f"{iter_obj}" + + iter_obj = [str(obj) for obj in iter_obj] + if len(iter_obj) ==1: + str_litteral= ','.join([f"{i!r}" for i in iter_obj ]) + elif len(iter_obj)>1: + str_litteral = ','.join([f"{i!r}" for i in iter_obj[:-1]]) + str_litteral += f" and {iter_obj[-1]!r}" + return str_litteral + +def make_introspection(Obj: object , subObj: Sub[object])->None: + """ Make introspection by using the attributes of instance created to + populate the new classes created. + + :param Obj: callable + New object to fully inherits of `subObject` attributes. + + :param subObj: Callable + Instance created. + """ + # make introspection and set the all attributes to self object. + # if Obj attribute has the same name with subObj attribute, then + # Obj attributes get the priority. + for key, value in subObj.__dict__.items(): + if not hasattr(Obj, key) and key != ''.join(['__', str(key), '__']): + setattr(Obj, key, value) + +def cpath (savepath=None , dpath=None): + """ Control the existing path and create one of it does not exist. + + :param savepath: Pathlike obj, str + :param dpath: str, default pathlike obj + + """ + if dpath is None: + file, _= os.path.splitext(os.path.basename(__file__)) + dpath = ''.join(['_', file, + '_']) #.replace('.py', '') + if savepath is None : + savepath = os.path.join(os.getcwd(), dpath) + try:os.mkdir(savepath) + except: pass + if savepath is not None: + try : + if not os.path.isdir(savepath): + os.mkdir(savepath)# mode =0o666) + except : pass + return savepath + + + +def sPath (name_of_path:str): + """ Savepath func. Create a path with `name_of_path` if path not exists. + + :param name_of_path: str, Path-like object. If path does not exist, + `name_of_path` should be created. + """ + + try : + savepath = os.path.join(os.getcwd(), name_of_path) + if not os.path.isdir(savepath): + os.mkdir(name_of_path)# mode =0o666) + except : + warnings.warn("The path seems to be existed!") + return + return savepath + + +def format_notes(text:str , cover_str: str ='~', inline=70, **kws): + """ Format note + :param text: Text to be formated. + + :param cover_str: type of ``str`` to surround the text. + + :param inline: Nomber of character before going in liine. + + :param margin_space: Must be <1 and expressed in %. The empty distance + between the first index to the inline text + :Example: + + >>> from kalfeat.tools import funcutils as func + >>> text ='Automatic Option is set to ``True``.'\ + ' Composite estimator building is triggered.' + >>> func.format_notes(text= text , + ... inline = 70, margin_space = 0.05) + + """ + + headnotes =kws.pop('headernotes', 'notes') + margin_ratio = kws.pop('margin_space', 0.2 ) + margin = int(margin_ratio * inline) + init_=0 + new_textList= [] + if len(text) <= (inline - margin): + new_textList = text + else : + for kk, char in enumerate (text): + if kk % (inline - margin)==0 and kk !=0: + new_textList.append(text[init_:kk]) + init_ =kk + if kk == len(text)-1: + new_textList.append(text[init_:]) + + print('!', headnotes.upper(), ':') + print('{}'.format(cover_str * inline)) + for k in new_textList: + fmtin_str ='{'+ '0:>{}'.format(margin) +'}' + print('{0}{1:>2}{2:<51}'.format(fmtin_str.format(cover_str), '', k)) + + print('{0}{1:>51}'.format(' '* (margin -1), cover_str * (inline -margin+1 ))) + + + +def round_dipole_length(value, round_value =5.): + """ + small function to graduate dipole length 5 to 5. Goes to be reality and + simple computation . + + :param value: value of dipole length + :type value: float + + :returns: value of dipole length rounded 5 to 5 + :rtype: float + """ + mm = value % round_value + if mm < 3 :return np.around(value - mm) + elif mm >= 3 and mm < 7 :return np.around(value -mm +round_value) + else:return np.around(value - mm +10.) + + +def _isin ( + arr: Array | List [float] , + subarr: Sub [Array] |Sub[List[float]] | float +) -> bool : + """ Check whether the subset array `subcz` is in `cz` array. + + :param arr: Array-like - Array of item elements + :param subarr: Array-like, float - Subset array containing a subset items. + :return: True if items in test array `subarr` are in array `arr`. + + """ + arr = np.array (arr ); subarr = np.array(subarr ) + + return True if True in np.isin (arr, subarr) else False + +def _assert_all_types ( + obj: object , + *expected_objtype: type + ) -> object: + """ Quick assertion of object type. Raise an `TypeError` if + wrong type is given.""" + # if np.issubdtype(a1.dtype, np.integer): + if not isinstance( obj, expected_objtype): + raise TypeError ( + f'Expected {smart_format(tuple (o.__name__ for o in expected_objtype))}' + f' type{"s" if len(expected_objtype)>1 else ""} ' + f'but `{type(obj).__name__}` is given.') + + return obj + + +def savepath_ (nameOfPath): + """ + Shortcut to create a folder + :param nameOfPath: Path name to save file + :type nameOfPath: str + + :return: + New folder created. If the `nameOfPath` exists, will return ``None`` + :rtype:str + + """ + + try : + savepath = os.path.join(os.getcwd(), nameOfPath) + if not os.path.isdir(savepath): + os.mkdir(nameOfPath)# mode =0o666) + except : + warnings.warn("The path seems to be existed !") + return + return savepath + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kalfeat/tools/gistools.py b/kalfeat/tools/gistools.py new file mode 100644 index 0000000..7ba4d6c --- /dev/null +++ b/kalfeat/tools/gistools.py @@ -0,0 +1,981 @@ +# -*- coding: utf-8 -*- +# Created on Fri Apr 14 14:47:48 2017 + +import numpy as np + +from .._kalfeatlog import kalfeatlog +from ..decorators import ( + deprecated , + gdal_data_check + ) +from ..exceptions import GISError +try : + import HAS_GDAL + if HAS_GDAL: + from osgeo import osr + from osgeo.ogr import OGRERR_NONE + else: + import pyproj +except : + HAS_GDAL =False + pass + +_logger = kalfeatlog.get_kalfeat_logger(__name__) + +# Make sure lat and lon are in decimal degrees +def _assert_minutes(minutes): + assert 0 <= minutes < 60., \ + 'minutes needs to be <60 and >0, currently {0:.0f}'.format(minutes) + + return minutes + + +def _assert_seconds(seconds): + assert 0 <= seconds < 60., \ + 'seconds needs to be <60 and >0, currently {0:.3f}'.format(seconds) + return seconds + + +def _resume_count(deg_or_min, value): + """ resume the countdown and add one if seconds or degree is equal to 60. + + Commonly seconds and minutes should not end by 60, otherwise one + min or hour is topped. Thus the second and minute restart counting. + This is useful to skip the assertion error when second and minutes are + equal to 60. Conversion is done instead. + """ + return ( deg_or_min + value//60, value%60 ) if float (value) >=60. else ( + deg_or_min, value ) + +def convert_position_str2float(position_str): + """ + Convert a position string in the format of DD:MM:SS to decimal degrees + + Arguments + ------------- + **position_str** : string ('DD:MM:SS.ms') + degrees of latitude or longitude + + Returns + -------------- + **position** : float + latitude or longitude in decimal degrees + + Example + ------------- + >>> import mtpy.utils.gis_tools as gis_tools + >>> gis_tools.convert_position_str2float('-118:34:56.3') + """ + + if position_str in [None, 'None']: + return None + + p_list = position_str.split(':') + if len(p_list) != 3: + raise ValueError( + f'{position_str} not correct format,should be DD:MM:SS') + + deg = float(p_list[0]) + minutes = float(p_list[1]) + sec = float(p_list[2]) + + minutes, sec = _resume_count(minutes, sec) + deg, minutes = _resume_count(deg, minutes) + + # get the sign of the position so that when all are added together the + # position is in the correct place + sign = 1 + if deg < 0: + sign = -1 + + position_value = sign * (abs(deg) + minutes / 60. + sec / 3600.) + + return position_value + + +def assert_lat_value(latitude): + """ + make sure latitude is in decimal degrees + """ + # if latitude in [None, 'None']: + # return None + try: + lat_value = float(latitude) + + except TypeError: + return None + + except ValueError: + lat_value = convert_position_str2float(latitude) + + if abs(lat_value) >= 90: + print("==> The lat_value =", lat_value) + raise ValueError('|Latitude| > 90, unacceptable!') + + return lat_value + + +def assert_lon_value(longitude): + """ + make sure longitude is in decimal degrees + """ + if longitude in [None, 'None']: + return None + try: + lon_value = float(longitude) + + except TypeError: + return None + + except ValueError: + lon_value = convert_position_str2float(longitude) + + if abs(lon_value) >= 180: + raise ValueError('|Longitude| > 180, unacceptable!') + + return lon_value + + +def assert_elevation_value(elevation): + """ + make sure elevation is a floating point number + """ + + try: + elev_value = float(elevation) + except (ValueError, TypeError): + elev_value = 0.0 + _logger.warn('{0} is not a number, setting elevation to 0'.format(elevation)) + + return elev_value + + +def convert_position_float2str(position): + """ + convert position float to a string in the format of DD:MM:SS + + Arguments + ------------- + **position** : float + decimal degrees of latitude or longitude + + Returns + -------------- + **position_str** : string + latitude or longitude in format of DD:MM:SS.ms + + Example + ------------- + >>> import mtpy.utils.gis_tools as gis_tools + >>> gis_tools.convert_position_float2str(-118.34563) + + """ + + assert type(position) is float, 'Given value is not a float' + + deg = int(position) + sign = 1 + if deg < 0: + sign = -1 + + deg = abs(deg) + minutes = (abs(position) - deg) * 60. + # need to round seconds to 4 decimal places otherwise machine precision + # keeps the 60 second roll over and the string is incorrect. + sec = np.round((minutes - int(minutes)) * 60., 4) + if sec >= 60.: + minutes += 1 + sec = 0 + + if int(minutes) == 60: + deg += 1 + minutes = 0 + + position_str = '{0}:{1:02.0f}:{2:05.2f}'.format(sign * int(deg), + int(minutes), + sec) + + return position_str + + +# ============================================================================== +# Project a point +# ============================================================================== +@deprecated("NATO UTM zone is used in other part of mtpy," + " this function is for Standard UTM") +def get_utm_string_from_sr(spatialreference): + """ + return utm zone string from spatial reference instance + """ + zone_number = spatialreference.GetUTMZone() + if zone_number > 0: + return str(zone_number) + 'N' + elif zone_number < 0: + return str(abs(zone_number)) + 'S' + else: + return str(zone_number) + + +def get_utm_zone(latitude, longitude): + """ + Get utm zone from a given latitude and longitude + """ + zone_number = (int(1 + (longitude + 180.0) / 6.0)) + ## why is this needed here, GDAL only needs N-orth or S-outh + ## n_str = _utm_letter_designator(latitude) + is_northern = bool(latitude >= 0) + + # if latitude < 0.0: + # n_str = 'S' + # else: + # n_str = 'N' + n_str = _utm_letter_designator(latitude) + + return zone_number, is_northern, '{0:02.0f}{1}'.format(zone_number, n_str) + +def project_point_ll2utm(lat, lon, datum='WGS84', utm_zone=None, epsg=None): + """ + Project a point that is in Lat, Lon (will be converted to decimal degrees) + into UTM coordinates. + + Arguments: + --------------- + **lat** : float or string (DD:MM:SS.ms) + latitude of point + + **lon** : float or string (DD:MM:SS.ms) + longitude of point + + **datum** : string + well known datum ex. WGS84, NAD27, NAD83, etc. + + **utm_zone** : string + zone number and 'S' or 'N' e.g. '55S' + + **epsg** : int + epsg number defining projection (see + http://spatialreference.org/ref/ for moreinfo) + Overrides utm_zone if both are provided + + Returns: + -------------- + **proj_point**: tuple(easting, northing, zone) + projected point in UTM in Datum + + """ + if lat is None or lon is None: + return None, None, None + + # make sure the lat and lon are in decimal degrees + if np.iterable(lon) and np.iterable(lat): + lat = np.array([assert_lat_value(lat_value) for lat_value in lat]) + lon = np.array([assert_lon_value(lon_value) for lon_value in lon]) + assert lat.size == lon.size + else: + lat = np.array([assert_lat_value(lat)]) + lon = np.array([assert_lon_value(lon)]) + + if HAS_GDAL: + # set lat lon coordinate system + ll_cs = osr.SpatialReference() + if isinstance(datum, int): + ogrerr = ll_cs.ImportFromEPSG(datum) + if ogrerr != OGRERR_NONE: + raise GISError("GDAL/osgeo ogr error code: {}".format(ogrerr)) + elif isinstance(datum, str): + ogrerr = ll_cs.SetWellKnownGeogCS(datum) + if ogrerr != OGRERR_NONE: + raise GISError("GDAL/osgeo ogr error code: {}".format(ogrerr)) + else: + raise GISError("""datum {0} not understood, needs to be EPSG as int + or a well known datum as a string""".format(datum)) + + # set utm coordinate system + utm_cs = osr.SpatialReference() + # end if + + # project point on to EPSG coordinate system if given + if isinstance(epsg, int): + if HAS_GDAL: + ogrerr = utm_cs.ImportFromEPSG(epsg) + if ogrerr != OGRERR_NONE: + raise GISError("GDAL/osgeo ogr error code: {}".format(ogrerr)) + else: + import pyproj + pp = pyproj.Proj('+init=EPSG:%d'%(epsg)) + # end if + # otherwise project onto given datum + elif epsg is None: + if HAS_GDAL: + ogrerr = utm_cs.CopyGeogCSFrom(ll_cs) + if ogrerr != OGRERR_NONE: + raise GISError("GDAL/osgeo ogr error code: {}".format(ogrerr)) + # end if + if utm_zone is None or not isinstance(None, str) or utm_zone.lower() == 'none': + # get the UTM zone in the datum coordinate system, otherwise + zone_number, is_northern, utm_zone = get_utm_zone(lat.mean(), + lon.mean()) + else: + # get zone number and is_northern from utm_zone string + zone_number = int(utm_zone[0:-1]) + is_northern = True if utm_zone[-1].lower() > 'n' else False + + if(HAS_GDAL): + utm_cs.SetUTM(zone_number, is_northern) + else: + projstring = '+proj=utm +zone=%d +%s +datum=%s' % \ + (zone_number, 'north' if is_northern else 'south', datum) + pp = pyproj.Proj(projstring) + # end if + # end if + + # return different results depending on if lat/lon are iterable + projected_point = np.zeros_like(lat, dtype=[('easting', np.float), + ('northing', np.float), + ('elev', np.float), + ('utm_zone', 'U4')]) + + if(HAS_GDAL): + ll2utm = osr.CoordinateTransformation(ll_cs, utm_cs).TransformPoint + else: + ll2utm = pp + # end if + + for ii in range(lat.size): + point = ll2utm(lon[ii], lat[ii]) + projected_point['easting'][ii] = point[0] + projected_point['northing'][ii] = point[1] + if(HAS_GDAL): projected_point['elev'][ii] = point[2] + projected_point['utm_zone'][ii] = utm_zone if utm_zone is not None else get_utm_zone(lat[ii], lon[ii])[2] + # end for + + # if just projecting one point, then return as a tuple so as not to break + # anything. In the future we should adapt to just return a record array + if len(projected_point) == 1: + return (projected_point['easting'][0], + projected_point['northing'][0], + projected_point['utm_zone'][0]) + else: + return np.rec.array(projected_point) + +#espg = 3149 +def project_point_utm2ll(easting, northing, utm_zone, datum='WGS84', epsg=None): + """ + Project a point that is in Lat, Lon (will be converted to decimal degrees) + into UTM coordinates. + + Arguments: + --------------- + **easting** : float + easting coordinate in meters + + **northing** : float + northing coordinate in meters + + **utm_zone** : string (##N or ##S) + utm zone in the form of number and North or South + hemisphere, 10S or 03N + + **datum** : string + well known datum ex. WGS84, NAD27, etc. + + Returns: + -------------- + **proj_point**: tuple(lat, lon) + projected point in lat and lon in Datum, as decimal + degrees. + + """ + try: + easting = float(easting) + except ValueError: + raise GISError("easting is not a float") + try: + northing = float(northing) + except ValueError: + raise GISError("northing is not a float") + + if HAS_GDAL: + # set utm coordinate system + utm_cs = osr.SpatialReference() + utm_cs.SetWellKnownGeogCS(datum) + # end if + + if epsg is not None: + if HAS_GDAL: + ogrerr = utm_cs.ImportFromEPSG(epsg) + if ogrerr != OGRERR_NONE: + raise Exception("GDAL/osgeo ogr error code: {}".format(ogrerr)) + else: + import pyproj + pp = pyproj.Proj('+init=EPSG:%d'%(epsg)) + # end if + elif isinstance(utm_zone, str) or isinstance(utm_zone, np.bytes_): + # the isinstance(utm_zone, str) could be False in python3 due to numpy datatype change. + # So FZ added isinstance(utm_zone, np.bytes_) and convert the utm_zone into string + if isinstance(utm_zone, np.bytes_): + utm_zone = utm_zone.decode('UTF-8') # b'54J' + try: + zone_number = int(utm_zone[0:-1]) #b'54J' + zone_letter = utm_zone[-1] + except ValueError: + raise ValueError('Zone number {0} is not a number'.format(utm_zone[0:-1])) + is_northern = True if zone_letter.lower() >= 'n' else False + elif isinstance(utm_zone, int): + # std UTM code returned by gdal + is_northern = False if utm_zone < 0 else True + zone_number = abs(utm_zone) + else: + print("epsg and utm_zone", str(epsg), str(utm_zone)) + + raise NotImplementedError( + "utm_zone type (%s, %s) not supported"%(type(utm_zone), str(utm_zone))) + + if epsg is None: + if HAS_GDAL: + utm_cs.SetUTM(zone_number, is_northern) + else: + import pyproj + projstring = '+proj=utm +zone=%d +%s +datum=%s' % \ + (zone_number, 'north' if is_northern else 'south', datum) + pp = pyproj.Proj(projstring) + # end if + # end if + + if HAS_GDAL: + ll_cs = utm_cs.CloneGeogCS() + utm2ll = osr.CoordinateTransformation(utm_cs, ll_cs).TransformPoint + ll_point = list(utm2ll(easting, northing, 0.)) + else: + ll_point = pp(easting, northing, inverse=True) + # end if + + # be sure to round out the numbers to remove computing with floats + return round(ll_point[1], 6), round(ll_point[0], 6), utm_zone + + +def project_points_ll2utm(lat, lon, datum='WGS84', utm_zone=None, epsg=None): + """ + Project a list of points that is in Lat, Lon (will be converted to decimal + degrees) into UTM coordinates. + + Arguments: + --------------- + **lat** : float or string (DD:MM:SS.ms) + latitude of point + + **lon** : float or string (DD:MM:SS.ms) + longitude of point + + **datum** : string + well known datum ex. WGS84, NAD27, NAD83, etc. + + **utm_zone** : string + zone number and 'S' or 'N' e.g. '55S'. Defaults to the + centre point of the provided points + + **epsg** : int + epsg number defining projection (see + http://spatialreference.org/ref/ for moreinfo) + Overrides utm_zone if both are provided + + Returns: + -------------- + **proj_point**: tuple(easting, northing, zone) + projected point in UTM in Datum + + """ + + lat = np.array(lat) + lon = np.array(lon) + + # check length of arrays + if np.shape(lat) != np.shape(lon): + raise ValueError("latitude and longitude arrays are of different lengths") + + # flatten, if necessary + flattened = False + llshape = np.shape(lat) + if llshape[0] > 1: + flattened = True + lat = lat.flatten() + lon = lon.flatten() + + ''' + # check lat/lon values + # this is incredibly slow; disabling for the time being + for ii in range(len(lat)): + lat[ii] = assert_lat_value(lat[ii]) + lon[ii] = assert_lon_value(lon[ii]) + ''' + + if lat is None or lon is None: + return None, None, None + + if HAS_GDAL: + # set utm coordinate system + utm_cs = osr.SpatialReference() + utm_cs.SetWellKnownGeogCS(datum) + + # set lat, lon coordinate system + ll_cs = utm_cs.CloneGeogCS() + ll_cs.ExportToPrettyWkt() + # end if + + # get zone number, north and zone name + if epsg is not None: + if HAS_GDAL: + # set projection info + ogrerr = utm_cs.ImportFromEPSG(epsg) + if ogrerr != OGRERR_NONE: + raise Exception("GDAL/osgeo ogr error code: {}".format(ogrerr)) + # get utm zone (for information) if applicable + utm_zone = utm_cs.GetUTMZone() + + # Whilst some projections e.g. Geoscience Australia Lambert (epsg3112) do + # not yield UTM zones, they provide eastings and northings for the whole + # Australian region. We therefore set UTM zones, only when a valid UTM zone + # is available + if(utm_zone>0): + # set projection info + utm_cs.SetUTM(abs(utm_zone), utm_zone > 0) + else: + pp = pyproj.Proj('+init=EPSG:%d'%(epsg)) + # end if + else: + if utm_zone is not None: + # get zone number and is_northern from utm_zone string + zone_number = int(utm_zone[0:-1]) + is_northern = True if utm_zone[-1].lower() > 'n' else False + else: + # get centre point and get zone from that + latc = (np.nanmax(lat) + np.nanmin(lat)) / 2. + lonc = (np.nanmax(lon) + np.nanmin(lon)) / 2. + zone_number, is_northern, utm_zone = get_utm_zone(latc, lonc) + # set projection info + + if(HAS_GDAL): + utm_cs.SetUTM(zone_number, is_northern) + else: + projstring = '+proj=utm +zone=%d +%s +datum=%s' % \ + (zone_number, 'north' if is_northern else 'south', datum) + pp = pyproj.Proj(projstring) + # end if + # end if + + if HAS_GDAL: + ll2utm = osr.CoordinateTransformation(ll_cs, utm_cs).TransformPoints + easting, northing, elev = np.array(ll2utm(np.array([lon, lat]).T)).T + + else: + ll2utm = pp + easting, northing = ll2utm(lon, lat) + # end if + + projected_point = (easting, northing, utm_zone) + + # reshape back into original shape + if flattened: + lat = lat.reshape(llshape) + lon = lon.reshape(llshape) + + return projected_point +# end func + +# ================================= +# functions from latlon_utm_conversion.py + + +_deg2rad = np.pi / 180.0 +_rad2deg = 180.0 / np.pi +_equatorial_radius = 2 +_eccentricity_squared = 3 + +_ellipsoid = [ + # id, Ellipsoid name, Equatorial Radius, square of eccentricity + # first once is a placeholder only, To allow array indices to match id + # numbers + [-1, "Placeholder", 0, 0], + [1, "Airy", 6377563, 0.00667054], + [2, "Australian National", 6378160, 0.006694542], + [3, "Bessel 1841", 6377397, 0.006674372], + [4, "Bessel 1841 (Nambia] ", 6377484, 0.006674372], + [5, "Clarke 1866", 6378206, 0.006768658], + [6, "Clarke 1880", 6378249, 0.006803511], + [7, "Everest", 6377276, 0.006637847], + [8, "Fischer 1960 (Mercury] ", 6378166, 0.006693422], + [9, "Fischer 1968", 6378150, 0.006693422], + [10, "GRS 1967", 6378160, 0.006694605], + [11, "GRS 1980", 6378137, 0.00669438], + [12, "Helmert 1906", 6378200, 0.006693422], + [13, "Hough", 6378270, 0.00672267], + [14, "International", 6378388, 0.00672267], + [15, "Krassovsky", 6378245, 0.006693422], + [16, "Modified Airy", 6377340, 0.00667054], + [17, "Modified Everest", 6377304, 0.006637847], + [18, "Modified Fischer 1960", 6378155, 0.006693422], + [19, "South American 1969", 6378160, 0.006694542], + [20, "WGS 60", 6378165, 0.006693422], + [21, "WGS 66", 6378145, 0.006694542], + [22, "WGS-72", 6378135, 0.006694318], + [23, "WGS-84", 6378137, 0.00669438] +] + + +# Reference ellipsoids derived from Peter H. Dana's website- +# http://www.utexas.edu/depts/grg/gcraft/notes/datum/elist.html +# Department of Geography, University of Texas at Austin +# Internet: pdana@mail.utexas.edu +# 3/22/95 + +# Source +# Defense Mapping Agency. 1987b. DMA Technical Report: Supplement to Department of Defense World Geodetic System +# 1984 Technical Report. Part I and II. Washington, DC: Defense Mapping Agency + +# @deprecated("This function may be removed in later release." +# " kalfeat.utils.gistools.project_point_ll2utm() should be " +# "used instead.") +def ll_to_utm(reference_ellipsoid, lat, lon): + """ + converts lat/long to UTM coords. Equations from USGS Bulletin 1532 + East Longitudes are positive, West longitudes are negative. + North latitudes are positive, South latitudes are negative + Lat and Long are in decimal degrees + Written by Chuck Gantz- chuck.gantz@globalstar.com + + Outputs: + UTMzone, easting, northing""" + + a = _ellipsoid[reference_ellipsoid][_equatorial_radius] + ecc_squared = _ellipsoid[reference_ellipsoid][_eccentricity_squared] + k0 = 0.9996 + + # Make sure the longitude is between -180.00 .. 179.9 + long_temp = (lon + 180) - int((lon + 180) / 360) * 360 - 180 # -180.00 .. 179.9 + + lat_rad = lat * _deg2rad + long_rad = long_temp * _deg2rad + + zone_number = int((long_temp + 180) / 6) + 1 + + if 56.0 <= lat < 64.0 and 3.0 <= long_temp < 12.0: + zone_number = 32 + + # Special zones for Svalbard + if 72.0 <= lat < 84.0: + if 0.0 <= long_temp < 9.0: + zone_number = 31 + elif 9.0 <= long_temp < 21.0: + zone_number = 33 + elif 21.0 <= long_temp < 33.0: + zone_number = 35 + elif 33.0 <= long_temp < 42.0: + zone_number = 37 + + long_origin = (zone_number - 1) * 6 - 180 + 3 # +3 puts origin in middle of zone + long_origin_rad = long_origin * _deg2rad + + # compute the UTM Zone from the latitude and longitude + utm_zone = "%d%c" % (zone_number, _utm_letter_designator(lat)) + + ecc_prime_squared = ecc_squared / (1 - ecc_squared) + N = a / np.sqrt(1 - ecc_squared * np.sin(lat_rad) ** 2) + T = np.tan(lat_rad) ** 2 + C = ecc_prime_squared * np.cos(lat_rad) ** 2 + A = np.cos(lat_rad) * (long_rad - long_origin_rad) + + M = a * ( + (1 + - ecc_squared / 4 + - 3 * ecc_squared ** 2 / 64 + - 5 * ecc_squared ** 3 / 256) * lat_rad + - (3 * ecc_squared / 8 + + 3 * ecc_squared ** 2 / 32 + + 45 * ecc_squared ** 3 / 1024) * np.sin(2 * lat_rad) + + (15 * ecc_squared ** 2 / 256 + + 45 * ecc_squared ** 3 / 1024) * np.sin(4 * lat_rad) + - (35 * ecc_squared ** 3 / 3072) * np.sin(6 * lat_rad)) + + utm_easting = (k0 * N * (A + + (1 - T + C) * A ** 3 / 6 + + (5 - 18 * T + + T ** 2 + + 72 * C + - 58 * ecc_prime_squared) * A ** 5 / 120) + + 500000.0) + + utm_northing = (k0 * (M + + N * np.tan(lat_rad) * (A ** 2 / 2 + + (5 + - T + + 9 * C + + 4 * C ** 2) * A ** 4 / 24 + + (61 + - 58 * T + + T ** 2 + + 600 * C + - 330 * ecc_prime_squared + ) * A ** 6 / 720))) + + if lat < 0: + utm_northing = utm_northing + 10000000.0 # 10000000 meter offset for southern hemisphere + return utm_zone, utm_easting, utm_northing + + +def _utm_letter_designator(lat): + # This routine determines the correct UTM letter designator for the given latitude + # returns 'Z' if latitude is outside the UTM limits of 84N to 80S + # Written by Chuck Gantz- chuck.gantz@globalstar.com + + if 84 >= lat >= 72: + return 'X' + elif 72 > lat >= 64: + return 'W' + elif 64 > lat >= 56: + return 'V' + elif 56 > lat >= 48: + return 'U' + elif 48 > lat >= 40: + return 'T' + elif 40 > lat >= 32: + return 'S' + elif 32 > lat >= 24: + return 'R' + elif 24 > lat >= 16: + return 'Q' + elif 16 > lat >= 8: + return 'P' + elif 8 > lat >= 0: + return 'N' + elif 0 > lat >= -8: + return 'M' + elif -8 > lat >= -16: + return 'L' + elif -16 > lat >= -24: + return 'K' + elif -24 > lat >= -32: + return 'J' + elif -32 > lat >= -40: + return 'H' + elif -40 > lat >= -48: + return 'G' + elif -48 > lat >= -56: + return 'F' + elif -56 > lat >= -64: + return 'E' + elif -64 > lat >= -72: + return 'D' + elif -72 > lat >= -80: + return 'C' + else: + return 'Z' # if the Latitude is outside the UTM limits + + +def utm_to_ll(reference_ellipsoid, northing, easting, zone): + """ + converts UTM coords to lat/long. Equations from USGS Bulletin 1532 + East Longitudes are positive, West longitudes are negative. + North latitudes are positive, South latitudes are negative + Lat and Long are in decimal degrees. + Written by Chuck Gantz- chuck.gantz@globalstar.com + Converted to Python by Russ Nelson + + Outputs: + Lat,Lon + """ + + k0 = 0.9996 + a = _ellipsoid[reference_ellipsoid][_equatorial_radius] + ecc_squared = _ellipsoid[reference_ellipsoid][_eccentricity_squared] + e1 = (1 - np.sqrt(1 - ecc_squared)) / (1 + np.sqrt(1 - ecc_squared)) + # NorthernHemisphere; //1 for northern hemispher, 0 for southern + + x = easting - 500000.0 # remove 500,000 meter offset for longitude + y = northing + + zone_letter = zone[-1] + zone_number = int(zone[:-1]) + if zone_letter >= 'N': + NorthernHemisphere = 1 # point is in northern hemisphere + else: + NorthernHemisphere = 0 # point is in southern hemisphere + y -= 10000000.0 # remove 10,000,000 meter offset used for southern hemisphere + + # +3 puts origin in middle of zone + long_origin = (zone_number - 1) * 6 - 180 + 3 + + ecc_prime_squared = ecc_squared / (1 - ecc_squared) + + M = y / k0 + mu = M / (a * (1 - ecc_squared / 4 - 3 * ecc_squared ** 2 / + 64 - 5 * ecc_squared ** 3 / 256)) + + phi1_rad = (mu + (3 * e1 / 2 - 27 * e1 ** 3 / 32) * np.sin(2 * mu) + + (21 * e1 ** 2 / 16 - 55 * e1 ** 4 / 32) * np.sin(4 * mu) + + (151 * e1 ** 3 / 96) * np.sin(6 * mu)) + phi1 = phi1_rad * _rad2deg + + n1 = a / np.sqrt(1 - ecc_squared * np.sin(phi1_rad) ** 2) + t1 = np.tan(phi1_rad) ** 2 + c1 = ecc_prime_squared * np.cos(phi1_rad) ** 2 + r1 = a * (1 - ecc_squared) / np.power(1 - ecc_squared * + np.sin(phi1_rad) ** 2, 1.5) + d = x / (n1 * k0) + + lat = phi1_rad - (n1 * np.tan(phi1_rad) / r1) * ( + d ** 2 / 2 - (5 + 3 * t1 + 10 * c1 - 4 * c1 ** 2 - 9 * ecc_prime_squared) * d ** 4 / 24 + + ( + 61 + 90 * t1 + 298 * c1 + 45 * t1 ** 2 - 252 * ecc_prime_squared - 3 * c1 ** 2) * d ** 6 / 720) + lat = lat * _rad2deg + + lon = (d - (1 + 2 * t1 + c1) * d ** 3 / 6 + ( + 5 - 2 * c1 + 28 * t1 - 3 * c1 ** 2 + 8 * ecc_prime_squared + 24 * t1 ** 2) + * d ** 5 / 120) / np.cos(phi1_rad) + lon = long_origin + lon * _rad2deg + return lat, lon + +# http://spatialreference.org/ref/epsg/28350/proj4/ +epsg_dict = {28350: ['+proj=utm +zone=50 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 50], + 28351: ['+proj=utm +zone=51 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 51], + 28352: ['+proj=utm +zone=52 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 52], + 28353: ['+proj=utm +zone=53 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 53], + 28354: ['+proj=utm +zone=54 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 54], + 28355: ['+proj=utm +zone=55 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 55], + 28356: ['+proj=utm +zone=56 +south +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', 56], + 3112: [ + '+proj=lcc +lat_1=-18 +lat_2=-36 +lat_0=0 +lon_0=134 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs', + 0], + 4326: ['+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs', 0], + 32609: ['+proj=utm +zone=9 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 9], + 32610: ['+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 10], + 32611: ['+proj=utm +zone=11 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 11], + 32612: ['+proj=utm +zone=12 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 12], + 32613: ['+proj=utm +zone=13 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 13], + 32614: ['+proj=utm +zone=14 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 14], + 32615: ['+proj=utm +zone=15 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 15], + 32616: ['+proj=utm +zone=16 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 16], + 32617: ['+proj=utm +zone=17 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 17], + 32618: ['+proj=utm +zone=18 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 18], + 32619: ['+proj=utm +zone=19 +ellps=WGS84 +datum=WGS84 +units=m +no_defs', 19] + } + +def utm_wgs84_conv(lat, lon): + """ + Bidirectional UTM-WGS84 converter https://github.com/Turbo87/utm/blob/master/utm/conversion.py + :param lat: + :param lon: + :return: tuple(e, n, zone, lett) + """ + + import utm # pip install utm + tup = utm.from_latlon(lat, lon) + + (new_lat, new_lon) = utm.to_latlon(tup[0], tup[1], tup[2], tup[3]) + # print (new_lat,new_lon) # should be same as the input param + + # checking correctess + if abs(lat - new_lat) > 1.0 * np.e - 10: + print("Warning: lat and new_lat should be equal!") + + if abs(lon - new_lon) > 1.0 * np.e - 10: + print("Warning: lon and new_lon should be equal!") + + return tup + + +@gdal_data_check +@deprecated("This function may be removed in later release." + " kalfeat.utils.gistools.project_point_utm2ll() should be " + "used instead.") +def transform_utm_to_ll(easting, northing, zone, + reference_ellipsoid='WGS84'): + utm_coordinate_system = osr.SpatialReference() + # Set geographic coordinate system to handle lat/lon + utm_coordinate_system.SetWellKnownGeogCS(reference_ellipsoid) + + try: + zone_number = int(zone[0:-1]) + zone_letter = zone[-1] + except ValueError: + raise ValueError('Zone number {0} is not a number'.format(zone[0:-1])) + is_northern = True if zone_letter.lower() >= 'n' else False + + utm_coordinate_system.SetUTM(zone_number, is_northern) + + # Clone ONLY the geographic coordinate system + ll_coordinate_system = utm_coordinate_system.CloneGeogCS() + + # create transform component + utm_to_ll_geo_transform = osr.CoordinateTransformation(utm_coordinate_system, + ll_coordinate_system) + # returns lon, lat, altitude + return utm_to_ll_geo_transform.TransformPoint(easting, northing, 0) + + +@gdal_data_check +@deprecated("This function may be removed in later release. " + "kalfeat.utils.gistools.project_point_ll2utm() should be " + "used instead.") +def transform_ll_to_utm(lon, lat, reference_ellipsoid='WGS84'): + """ + transform a (lon,lat) to a UTM coordinate. + The UTM zone number will be determined by longitude. South-North will be determined by Lat. + :param lon: degree + :param lat: degree + :param reference_ellipsoid: + :return: utm_coordinate_system, utm_point + """ + + def get_utm_zone(longitude): + return (int(1 + (longitude + 180.0) / 6.0)) + + def is_northern(latitude): + """ + Determines if given latitude is a northern for UTM + """ + # if (latitude < 0.0): + # return 0 + # else: + # return 1 + return latitude >= 0 + + utm_coordinate_system = osr.SpatialReference() + # Set geographic coordinate system to handle lat/lon + utm_coordinate_system.SetWellKnownGeogCS(reference_ellipsoid) + utm_coordinate_system.SetUTM(get_utm_zone(lon), is_northern(lat)) + + # Clone ONLY the geographic coordinate system + ll_coordinate_system = utm_coordinate_system.CloneGeogCS() + # create transform component + ll_to_utm_geo_transform = osr.CoordinateTransformation(ll_coordinate_system, + utm_coordinate_system) + + utm_point = ll_to_utm_geo_transform.TransformPoint(lon, lat, 0) + + # returns easting, northing, altitude + return utm_coordinate_system, utm_point + +################################################################# +# Example usages of this script/module +# python gis_tools.py +#================================================================= +if __name__ == "__main__": + + mylat=-35.0 + mylon=149.5 + utm = project_point_ll2utm(mylat, mylon) + print ("project_point_ll2utm(mylat, mylon) =: ", utm) + + if HAS_GDAL: + utm2 = transform_ll_to_utm(mylon, mylat) + print ("The transform_ll_to_utm(mylon, mylat) results lat, long, elev =: ", utm2[1]) + + spref_obj=utm2[0] + print("The spatial ref string =:", str(spref_obj)) + print("The spatial ref WKT format =:", spref_obj.ExportToWkt()) + print("********* Details of the spatial reference object ***************") + print ("spref.GetAttrValue('projcs')", spref_obj.GetAttrValue('projcs')) + print( "spref.GetUTMZone()", spref_obj.GetUTMZone()) + # end if + + diff --git a/kalfeat/typing.py b/kalfeat/typing.py new file mode 100644 index 0000000..ee0f623 --- /dev/null +++ b/kalfeat/typing.py @@ -0,0 +1,411 @@ +# -*- coding: utf-8 -*- +# author: KLaurent +# Licence: GPL-3.0 + + +""" +`kalfeat`_ Type variables +=========================== + +.. |ERP| replace:: Electrical resistivity profiling + +.. _kalfeat: https://github.com/WEgeophysics/kalfeat/ +.. _pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html +.. _Series: https://pandas.pydata.org/docs/reference/api/pandas.Series.html + +Some customized type variables need to be explained for easy understanding +in the whole package. Indeed, customized type hints is used to define the +type of arguments. + +**M**: Suppose to be the interger variable `IntVar` to denote the number of + rows in the ``Array``. + +**N**: Like the ``M``, *N* means the number of column in the ``Array``. It + is bound with integer variable. + +**T**: Is known as generic type standing for `Any` type of variable. We keep + it unchanged. + +**U**: Unlike `T`, `U` stands for nothing. Use to sepcify the one dimentional + array. For instance:: + + >>> import numpy as np + >>> array = np.arange(4).shape + ... (4, ) + +**S**: Indicates the `Shape` status. It is bound by `M`, `U`, `N`. 'U' stands + for nothing for one dimensional array. While, the common shape expects + for one of two dimensional arrays, it is possible to extend array for + more than one dimensional. The class object :class:`AddShape` is + created to grand all the remaining value of integers shape. + +**D**: Stands for dtype object. It is bound with :class:`DType`. + +**Array**: Defined for one dimensional array and `DType` can be specify. For + instance, we generated two arrays (`arr1`and `arr2`) for different types:: + + >>> import numpy as np + >>> from kalfeat.typing import TypeVar, Array, DType + >>> T = TypeVar ('T', float) + >>> A = TypeVar ('A', str, bytes ) + >>> arr1:Array[T, DType[T]] = np.arange(21) # dtype ='float' + >>> arr2: Array[A, DType[A]] = arr1.astype ('str') # dtype ='str' + +**NDArray**: Stands for multi-dimensional arrays i.e more than two. Here, the + difference between the one dimensional type variable ``Array`` is that + while the latter accepts the ``DType`` argument as the second parameter. + It could be turn to the number of multidimentional rows including the + `Array as first argument and specify the DType as the second argument + like this:: + + >>> import numpy as np + >>> from kalfeat.typing import TypeVar, Array, NDarray, DType + >>> T =TypeVar ('T', int) + >>> U = TypeVar ('U') + >>> multidarray = np.arange(7, 7).astype (np.int32) + >>> def accept_multid( + arrays: NDArray[Array[T, U], DType [T]]= multidarray + ): + ''' asserted with MyPy and work-fine.''' + ... + +**Sub**: Stands for subset. Indeed, the class is created to define the + conductive zone. It is a subset ``Sub`` of ``Array``. For example, we first + build an array secondly extract the conductive zone from |ERP| line. + Finally, we checked the type hint to assert whether the extracted zone + is a subset of the whole |ERP| line. The demo is given below:: + + >>> import numpy as np + >>> from kalfeat.typing import TypeVar, DType, Array , Sub + >>> from kalfeat.tools.exmath import _define_conductive_zone + >>> T= TypeVar ('T', float) + >>> erp_array: Array[T, DType[T]] = np.random.randn (21) # whole line + >>> select_zone, _ = _define_conductive_zone (erp = erp_array , auto =True) + >>> select_zone: Array[T, DType[T]] + >>> def check_cz (select_zone: Sub[Array]): + ''' assert with MyPy and return ``True`` as it works fine. ''' + ... + +**SP**: Stands for Station positions. The unit of position may vary, however, + we keep for :mod:`kalfeat.method.electrical.ElectricalResistivityProfiling` + the default unit in ``meters`` by starting at position 0. Typically, + positions are recording according to the dipole length. For the example, + we can generated a position values for ``121 stations`` with dipole + length equals to ``50m`` i.e the length of the survey line is ``6 km``. + Here we go: + + * Import required modules and generate the whole survey line:: + + >>> import numpy as np + >>> from kalfeat.typing import TypeVar, DType, SP, Sub + >>> T =TypeVar ('T', bound =int) + >>> surveyL:SP = np.arange(0, 50 *121 , 50.).astype (np.int32) + ... (work fine with MyPy ) + + * Let's verify whether the extract data from surveyL is also a subset + of station positions: + + - We use the following fonction to to extract the specific + part of whole survey line `surveyL`:: + + >>> from kalfeat.tools.exmath import define_conductive_zone + >>> subpos,_ = define_conductive_zone (surveyL, s='S10') + + - Now, we check the instance value `subpos` as subset array of + of `SP`. Note that the station 'S10' is included in the + extracted locations and is extented for seven points. For + further details, refer to `define_conductive_zone.__doc__`:: + + >>> def checksup_type (sp: Sub[SP[T, DType[T]]] = subpos ): + ''' SP is an array of positions argument `sp` + shoud be asserted as a subestof the whole line.''' + ... + ... (test verified. subpos is a subset of `SP`) + +**Series**: Stands for `pandas Series`_ object rather than using the specific + ``pandas.Series`` everywhere in the package. + +**DataFrame**: Likewise the ``Series`` generic type hint, it stands for + ``pandas DataFrame`_ object. It used to replace ``pandas.DataFrame`` object + to identify the callable arguments in the whole packages. + Both can be instanciated as below:: + + >>> import numpy as np + >>> import pandas pd + >>> from kalfeat.typing import TypeVar , Any, DType , Series, DataFrame + >>> T =TypeVar('T') + >>> seriesStr = pd.Series ([f'obx{s}' for s in range(21)], + name ='stringobj') + >>> seriesFloat = pd.Series (np.arange(7).astype(np.float32), + name =floatobj) + >>> SERs = Series [DType[str]] # pass + >>> SERf =Series [DType [float]] # pass + + .. + + >>> dfStr= pd.DataFrame {'ser1':seriesStr , + 'obj2': [f'none' for i in range (21)]} + >>> dfFloat= pd.DataFrame {'ser1':seriesFloat , + 'obj2': np.linspace (3, 28 , 7)} + >>> dfAny= pd.DataFrame {'ser1':seriesStr, + 'ser2':seriesFloat} + >>> DFs = DataFrame [SERs] | DataFrame [DType[str]] + >>> DFf = DataFrame [SERf] | DataFrame [DType[float]] + >>> DFa = DataFrame [Series[Any]] | DataFrame [DType[T]] + +--- + +Additional definition for common arguments +=========================================== + +To better construct a hugue API, an explanation of some argument is useful +to let the user aware when meeting such argument in a callable function. + +**erp** : Stand for Electrical Resistivity Profiling. Typically, the type hint + for |ERP| is ``Array[float, DType [float]]`` or ``List[float]``. Its + array is supposed to hold the apparent resistivy values collected + during the survey. + +**p**: Typically mean position but by preference means station location + positions. The type hint used to defined the `p` is `` + ``Array[int, DType [int]]`` or ``List[int]``. Indeed, the position + supposed to be on integer array and the given values enven in float + should be casted to integers. + +**cz**: Stands for Conductive Zone. It is a subset of |ERP| so they share the + same type hint. However, for better demarcation, ``Sub`` is convenient to + use to avoid any confusion about the full |ERP| and the extracted + conductive as demontrated in the example above in ``Sub`` type hint + definition. + +""" + +from typing import ( + TypeVar, + List, + Tuple, + Sequence, + Dict, + Iterable, + Callable, + Union, + Any , + Generic, + Optional, + Union, + Type , + Mapping, + Text, + +) + +T = TypeVar('T') +V = TypeVar('V') +K = TypeVar('K') +M =TypeVar ('M', bound= int ) +N= TypeVar('N', bound =int ) +U= TypeVar('U') +D =TypeVar ('D', bound ='DType') +S = TypeVar('S', bound='Shape') + +class AddShape (Generic [S]): + """ Suppose to be an extra bound to top the `Shape` for dimensional + more than two. + + Example + ------- + >>> import numpy as np + >>> np.random.randn(7, 3, 3) + >>> def check_valid_type ( + array: NDArray [Array[float], Shape[M, AddShape[N]]]): + ... + + """ +class Shape (Generic[M, S], AddShape[S]): + """ Generic to construct a tuple shape for NDarray. `Shape` has is + written wait for two dimensional arrays with M-row and N-columns. However + for three dimensional,`Optional` Type could be: + + :Example: + >>> import numpy as np + >>> # For 1D array + >>> np + >>> np.random.rand(7) + >>> def check_array1d( + array: Array[float, Shape[M, None]]) + >>> np.random.rand (7, 7).astype('>U12'): + >>> def check_array2d_type ( + array: NDArray[Array[str], Shape [M, N], DType ['>U12']]) + + """ + def __getitem__ (self, M, N) -> S: + """ Get the type of rown and type of columns + and return Tuple of ``M`` and ``N``. """ + ... + +class DType (Generic [T]): + """ DType can be Any Type so it holds 'T' type variable. """ + def __getitem__ (self, T) -> T: + """ Get Generic Type object and return Type Variable""" + ... + +class Array(Generic[T, D]): + """ Arry Type here means the 1D array i.e singular column. """ + + def __getitem__ (self, T) -> Union ['Array', T]: + """ Return Type of the given Type variable. """ + ... + + +class NDArray(Array[T, DType [T]], Generic [T, D ]) : + """ NDarray has ``M``rows, ``N`` -columns, `Shape` and `DType` object. + and Dtype. `Shape` is unbound for this class since it does not make since + to sepecify more integers. However, `DType` seems useful to provide. + + :Example: + >>> import numpy as np + >>> T= TypeVar (T, str , float) # Dtype here is gone to be "str" + >>> array = np.c_[np.arange(7), np.arange(7).astype ('str')] + >>> def test_array (array: NDArray[T, DType [T]]):... + """ + def __getitem__ (self,T ) -> T: + """ Return type variable. Truly the ``NDArray``""" + ... + +class F (Generic [T]): + """ Generic class dedicated for functions, methods and class and + return the given types i.e callable object with arguments or `Any`. + + :Example: + >>> import functools + >>> def decorator (appender ='get only the documention and pass.'): + @functools.wraps(func): + def wrapper(*args, **kwds) + func.__doc__ = appender + func.__doc__ + return func (*args, **kwds) + return wrapper + >>> @decorator # do_nothing = decorator (anyway) + def anyway(*args, **kwds): + ''' Im here to ''' + ... + >>> def check_F(anyway:F): + pass + """ + def __getitem__ (self, item: Callable [...,T] + ) -> Union ['F', Callable[..., T], T, Any]: + """ Accept any type of variable supposing to be a callable object + functions, methods or even classes and return the given type + object or another callable object with its own or different specific + parameters or itself or Any.""" + return self + +class Sub (Generic [T]): + """ Return subset of whatever Array""" + ... + +class SP(Generic [T, D]): + """ Station position arrays hold integer values of the survey location. + Most likely, the station position is given according to the dipole length. + Assume the dipole length is ``10 meters`` and survey is carried out on + 21 stations. The station position array should be an array of interger + values from 0. to 200 meters. as like:: + + >>> import numpy as np + >>> positions: SP = np.arange(0, 21 * 10, 10. + ).astype (np.int32) # integer values + """ + ... + +class Series (DType[T], Generic [T]): + """ To reference the pandas `Series`_ object. + + .. _Series: https://pandas.pydata.org/docs/reference/api/pandas.Series.html + + :Example: + >>> import numpy as np + >>> import pandas as pd + >>> from kalfeat.typing import DType, Series + >>> ser = pd.Series (np.arange (21), name ='nothing') + + .. code: Python + + def check_type (serObj:Series): + ''' pass anyway''' + ... + check_type (seObj: Series[DType[str]]=ser ) + + """ + def __getitem__ (self, item: T) -> 'Series': + """ Get the type variable of item T and return `Series`_ object.""" + return self + + + +class DataFrame (Series[T], Generic[T]): + """ Type hint variable to illutsrate the `pandas DataFrame`_ object. + + .. _pandas DataFrame: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html + .. _Series: https://pandas.pydata.org/docs/reference/api/pandas.Series.html + + Indeed, `pandas DataFrame`_ can be considered as an aggregation of `Series`_, + thus, the generic type hint variable is supposed to hold a `Series`_ + object. + + :Example: + + >>> import numpy as np + >>> import pandas as pd + >>> from kalfeat.typing import DType, DataFrame + + .. code: Python + + df =pd.DataFrame ({serie1: np.arange(7), + serie2: np.linspace (0, 1000, 7), + serie3: [f'0b{i} for i in range(7)] + }) + def check_type (dfObj:DataFrame): + ... + ckeck_type (dfObj: DataFrame [DType [object]] =df) + + """ + + def __getitem__(self, item: T)->'DataFrame': + """ Get the type hint variable of `pandas DataFrame`_ and return the + object type variable.""" + + return self + +if __name__=='__main__': + def test (array:Sub[SP[Array[int, DType[int]], DType [int]]]):... + def test2 (array:Sub[SP[Array, DType [int]]]):... + + DFSTR = DataFrame [Series[DType[str]]] + DF = DataFrame [DType [object]] + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ce199d0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +sphinx >=3.5 +matplotlib>=3.3.0 +numpy +scipy +numpydoc >=1.0.0 +pytest +pyyaml +pyproj>=1.9.6 +pandas +python-coveralls +sklearn +joblib +seaborn +tqdm +autoapi diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..819f56c --- /dev/null +++ b/setup.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python + +import kalfeat +import os + +# Check for setuptools package: + +try: + from setuptools import setup +except ImportError: + setuptools = False + from distutils.core import setup +else: + setuptools = True + + +with open(os.path.join(os.path.abspath('.'), + 'README.md'), 'r') as fm: + + LONG_DESCRIPTION =fm.read() + +setup_kwargs = {} +setup_kwargs['entry_points'] = { + 'console_scripts':[ + 'occam2d_build_in = kalfeat.gui.oc2d_bdin:main', + 'write_avg2edi= kalfeat.gui.wa2edi:main', + + ] + } + + +# But many people will not have setuptools installed, so we need to handle +# the default Python installation, which only has Distutils: + +if setuptools is False: + # Different script specification style for ordinary Distutils: + setup_kwargs['scripts'] = [ + s.split(' = ')[1].replace('.', '/').split(':')[0] + '.py' for s in + setup_kwargs['entry_points']['console_scripts']] + del setup_kwargs['entry_points'] + + # "You must explicitly list all packages in packages: the Distutils will not + # recursively scan your source tree looking for any directory with an + # __init__.py file" + +setup_kwargs['packages'] = [ + 'kalfeat', + 'kalfeat.method', + 'kalfeat.tools.core', + ] +# force install kalfeat. Once kalfeat is installed , pyyaml and pyproj +# should already installed too. + +setup_kwargs['install_requires'] = ['numpy>=1.8.1', + 'scipy>=0.14.0', + 'matplotlib', + 'pyyaml', + 'pyproj', + 'configparser', + 'tqdm'] + +setup_kwargs['python_requires'] ='>=3.7' + +authors =["Kouadio Laurent"] +authors_emails =['etanoyau@gmail.com'] +setup( + name="kalfeat", + version=kalfeat.__version__, + author=' '.join([aa for aa in authors]), + author_email='etanoyau@gmail.com', + maintainer="Kouadio K. Laurent", + maintainer_email='etanoyau@gmail.com', + description="A light package for fast detecting the geo-electrical features", + long_description=LONG_DESCRIPTION, + long_description_content_type="text/markdown", + url="https://github.com/WEgeophysics/kalfeat", + project_urls={ + "API Documentation" : "https://kalfeat.readthedocs.io/en/master/", + "Home page" : "https://github.com/WEgeophysics/kalfeat/wiki", + "Bugs tracker": "https://github.com/WEgeophysics/kalfeat/issues", + # "Installation guide" : "https://github.com/WEgeophysics/kalfeat/wiki/kalfeat-installation-guide-for-Windows--and-Linux", + # "User guide" : "https://github.com/WEgeophysics/kalfeat/blob/develop/docs/kalfeat%20User%20Guide.pdf", + }, + #data_files=[('', ['kalfeat/utils/epsg.npy',]),], #this will install datafiles in wearied palce such as ~/.local/ + include_package_data=True, + license="GNU LESSER GENERAL PUBLIC LICENSE v3", + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + # "Topic :: Software Development :: Build Tools", + #"License :: OSI Approved :: GNU License", + 'Topic :: Scientific/Engineering :: Geophysics', + 'Topic :: Scientific/Engineering :: Geosciences', + + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + "Operating System :: OS Independent", + ], + keywords="hydrogeophysic, groundwater, exploration, csamt", + #package_dir={"": "kalfeat"}, # Optional + package_data={'kalfeat': [ + 'utils/p.configlog.yml', + 'utils/espg.npy', + '_loggerfiles/*.txt', + + ], + "":[ + 'data/ves/*', + 'data/erp/*', + ] + }, + + **setup_kwargs +) + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..10faf39 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,54 @@ + +import os +import shutil +# import sys + +from kalfeat._kalfeatlog import kalfeatlog +# sys.path.insert(0, os.path.abspath('..')) + +TEST_KALFEAT_ROOT = os.path.normpath( + os.path.abspath( + os.path.dirname( + os.path.dirname(__file__) + ) + ) +) # assume tests is on the root level of pyCSAMT goes one step backward + +TEST_DIR = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) + + +TEST_TEMP_DIR = os.path.normpath(os.path.join(TEST_DIR, "temp")) + +if not os.path.isdir(TEST_TEMP_DIR): + os.mkdir(TEST_TEMP_DIR) + + +def make_temp_dir(dir_name, base_dir=TEST_TEMP_DIR): + _temp_dir = os.path.normpath(os.path.join(base_dir, dir_name)) + if os.path.isdir(_temp_dir): + shutil.rmtree(_temp_dir) # clean the existing directory + os.mkdir(_temp_dir) # make a new director to collect temp files + return _temp_dir + +# declare some main data directories for test samples + +ERP_DATA_DIR = os.path.normpath( + os.path.join(TEST_KALFEAT_ROOT , 'data/erp')) +VES_DATA_DIR = os.path.normpath( + os.path.join(TEST_KALFEAT_ROOT , 'data/ves')) + +ves_test_location_name = 'ves_gbalo.xlsx' +PREFIX = ['station', 'resistivity', 'longitude', 'latitude', 'easting', 'northing'] + +DATA_UNSAFE= os.path.join(ERP_DATA_DIR, 'testunsafedata.csv') +DATA_SAFE = os.path.join(ERP_DATA_DIR, 'testsafedata.csv') + +DATA_UNSAFE_XLS = os.path.join(ERP_DATA_DIR, 'testunsafedata.xlsx') +DATA_SAFE_XLS = os.path.join(ERP_DATA_DIR, 'testsafedata.xlsx') + +DATA_EXTRA = os.path.join(ERP_DATA_DIR, 'testunsafedata_extra.csv') +DATA_VES = os.path.join(VES_DATA_DIR, ves_test_location_name) +# set test logging configure +kalfeatlog.load_configure( + os.path.join(os.path.abspath('./kalfeat'),'tools', + "klog.yml")) diff --git a/tests/methods/__init__.py b/tests/methods/__init__.py new file mode 100644 index 0000000..8084c4c --- /dev/null +++ b/tests/methods/__init__.py @@ -0,0 +1,77 @@ + +from __future__ import print_function + +import os +import sys +from difflib import unified_diff + +import matplotlib + +from kalfeat._kalfeatlog import kalfeatlog + +if os.name == "posix" and 'DISPLAY' not in os.environ: + + print("MATPLOTLIB: No Display found, using non-interactive svg backend", file=sys.stderr) + matplotlib.use('svg') + import matplotlib.pyplot as plt + + kalfeat_TEST_HAS_DISPLAY = False +else: + #matplotlib.use('svg') + import matplotlib.pyplot as plt + kalfeat_TEST_HAS_DISPLAY = True + plt.ion() + +kalfeatlog.get_kalfeat_logger(__name__).info("Testing using matplotlib backend {}".\ + format(matplotlib.rcParams['backend'])) +def diff_files(after, before, ignores=None): + """ + compare two files using diff + :param ignores: + :param before: + :param after: + :return: the number count of different lines + """ + + with open(before) as f2p: + before_lines = f2p.readlines() + with open(after) as f1p: + after_lines = f1p.readlines() + + before_lines = [line.strip() for line in before_lines] + after_lines = [line.strip() for line in after_lines] + + if ignores: + for ignored_term in ignores: + before_lines = [line for line in before_lines if ignored_term not in line] + after_lines = [line for line in before_lines if ignored_term not in line] + + msg = "Comparing {} and {}:\n".format(before, after) + + lines = [line for line in unified_diff( + before_lines, + after_lines, + fromfile="baseline ({})".format(before), + tofile="test ({})".format(after), + n=0)] + + + if lines: + msg += " Found differences:\n\t" + "\n\t".join(lines) + is_identical = False + else: + msg += " NO differences found." + is_identical = True + + return is_identical, msg + +def reset_matplotlib(): + + interactive = matplotlib.rcParams['interactive'] + backend = matplotlib.rcParams['backend'] + matplotlib.rcdefaults() # reset the rcparams to default + matplotlib.rcParams['backend'] = backend + matplotlib.rcParams['interactive'] = interactive + logger = kalfeatlog().get_kalfeat_logger(__name__) + + logger.info("Testing using matplotlib backend {}".format(matplotlib.rcParams['backend'])) \ No newline at end of file diff --git a/tests/methods/test_methods.py b/tests/methods/test_methods.py new file mode 100644 index 0000000..28c09b1 --- /dev/null +++ b/tests/methods/test_methods.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + + +""" +Test module methods + +""" +import os +import numpy as np +# import datetime +import unittest +import pytest +from kalfeat.methods import ( + ResistivityProfiling, + VerticalSounding + ) +from tests import ( + DATA_UNSAFE, + TEST_TEMP_DIR, + make_temp_dir , + DATA_VES + ) +from tests.methods.__init__ import reset_matplotlib, kalfeatlog, diff_files + +class TestERP(unittest.TestCase): + """ + Test electrical resistivity profile and compute geo-lectrical features + as followings : + - type + - shape + - sfi + - power + - magnitude + - anr + - select_best_point + - select_best_value + """ + dipole_length = 10. + + @classmethod + def setUpClass(cls): + """ + Reset building matplotlib plot and generate tempdir inputfiles + + """ + reset_matplotlib() + cls._temp_dir = make_temp_dir(cls.__name__) + + def setUp(self): + + if not os.path.isdir(TEST_TEMP_DIR): + print('--> outdir not exist , set to None !') + kalfeatlog.get_kalfeat_logger().error('Outdir does not exist !') + + @pytest.mark.skip(reason='Test succeeded on Windox env. With Python 3.7' + 'but required latest version of pandas library on ' + 'Linux env.') + def fit_erp(self): + """ + Test geo-electricals features computations from ERP + + Examples + -------- + >>> from kalfeat.methods.electrical import ResistivityProfiling + >>> rObj = ResistivityProfiling(AB= 200, MN= 20,station ='S7') + >>> rObj.fit('data/erp/testunsafedata.csv') + >>> rObj.sfi_ + ... array([0.03592814]) + >>> rObj.power_, robj.position_zone_ + ... 90, array([ 0, 30, 60, 90]) + >>> rObj.magnitude_, rObj.conductive_zone_ + ... 268, array([1101, 1147, 1345, 1369], dtype=int64) + >>> rObj.dipole + ... 30 + + """ + + rObj = ResistivityProfiling(AB= 200, MN= 20,station ='S7') + rObj.fit(DATA_UNSAFE) + self.assertAlmostEqual(rObj.sfi_, .03) + self.assertIsInstance(rObj.power_, float) + self.assertIsInstance(rObj.position_zone_, np.ndarray) + self.assertIsInstance(rObj.magnitude_, float) + self.assertAlmostEqual(rObj.magnitude_, 268) + self.assertIsInstance(rObj.conductive_zone_, np.ndarray) + self.assertAlmostEqual(rObj.dipole, 30) + + def fit_ves(self): + """ + Test geo-electricals features computations from VES + Examples + -------- + >>> from kalfeat.methods import VerticalSounding + >>> from kalfeat.tools import vesSelector + >>> vobj = VerticalSounding(fromS= 45, vesorder= 3) + >>> vobj.fit('data/ves/ves_gbalo.xlsx') + >>> vobj.ohmic_area_ # in ohm.m^2 + ... 349.6432550517697 + >>> vobj.nareas_ # number of areas computed + ... 2 + >>> vobj.area1_, vobj.area2_ # value of each area in ohm.m^2 + ... (254.28891096053943, 95.35434409123027) + >>> vobj.roots_ # different boundaries in pairs + ... [array([45. , 57.55255255]), array([ 96.91691692, 100. ])] + >>> data = vesSelector ('data/ves/ves_gbalo.csv', index_rhoa=3) + >>> vobj = VerticalSounding().fit(data) + >>> vobj.fractured_zone_ # AB/2 position from 45 to 100 m depth. + ... array([ 45., 50., 55., 60., 70., 80., 90., 100.]) + >>> vobj.fractured_zone_resistivity_ + ...array([57.67588974, 61.21142365, 64.74695755, 68.28249146, 75.35355927, + 82.42462708, 89.4956949 , 96.56676271]) + >>> vobj.nareas_ + ... 2 + >>> vobj.ohmic_area_ + ... 349.6432550517697 + + """ + vobj = VerticalSounding(fromS= 45, vesorder= 3) + vobj.fit(DATA_VES) + + self.assertAlmostEqual(vobj.area1_, 349.) + self.assertAlmostEqual(vobj.area2_, 95.) + self.assertIsInstance(vobj.roots_, np.ndarray) + self.assertIsInstance(vobj.fractured_zone_, np.ndarray) + self.assertIsInstance(vobj.fractured_zone_resistivity_, np.ndarray) + self.assertAlmostEqual(vobj.nareas_ , 2) + +def compare_diff_files(refout, refexp): + """ + Compare diff files like expected files and output files generated after + runnning scripts. + + :param refout: + + list of reference output files generated after runing scripts + + :type refout: list + + :param refexp: recreated expected files for comparison + :param refexp: list + + """ + for outfile , expfile in zip(sorted(refout), + sorted(refexp)): + unittest.TestCase.assertTrue(os.path.isfile(outfile), + "Ref output data file does not exist," + "nothing to compare with" + ) + + print(("Comparing", outfile, "and", expfile)) + + is_identical, msg = diff_files(outfile, expfile, ignores=['Date/Time:']) + print(msg) + unittest.TestCase.assertTrue(is_identical, + "The output file is not the same with the baseline file.") + + + +if __name__=='__main__': + + unittest.main() + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/utilities/__init__.py b/tests/utilities/__init__.py new file mode 100644 index 0000000..2de9972 --- /dev/null +++ b/tests/utilities/__init__.py @@ -0,0 +1,19 @@ + +import numpy as np + +rangn = np.random.RandomState(42) +array1D = np.abs(rangn.randn(7)) # for res values +# for [stations, easting, northing , resistivity] +# 10 m is used as dipole length value +array2D = np.abs(rangn.randn(21, 4)) +dipoleLength = 10. +array2D[:, 0] = np.arange(0 , array2D.shape[0] * dipoleLength , dipoleLength ) +# make a copy of arrayx with position start with 150 m +# dipole length is 50 m +dipoleLengthX = 50. +array2DX = array2D.copy() +array2DX[:, 0] = np.arange( + 3*dipoleLengthX , array2DX.shape[0] * dipoleLengthX + 3*dipoleLengthX , dipoleLengthX ) +# extra -data +extraarray2D = np.abs(rangn.randn(21, 7)) +extraarray2D [:, 0] = array2DX[:, 0] diff --git a/tests/utilities/test_tools.py b/tests/utilities/test_tools.py new file mode 100644 index 0000000..294cdf8 --- /dev/null +++ b/tests/utilities/test_tools.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +""" + Test utilities + ^^^^^^^^^^^^^^ + +""" +import os +# import datetime +import unittest +import pytest +import numpy as np +import pandas as pd + +from tests import ( + ERP_DATA_DIR, + TEST_TEMP_DIR, + DATA_UNSAFE_XLS, + DATA_UNSAFE, + DATA_SAFE, + DATA_SAFE_XLS, + DATA_EXTRA , + PREFIX +) +from tests.utilities.__init__ import ( + dipoleLength, + dipoleLengthX, + array1D , + array2D, + array2DX, + extraarray2D, +) +from tests import make_temp_dir +from tests.methods.__init__ import (reset_matplotlib, + kalfeatlog, + diff_files) +from kalfeat.tools.coreutils import erpSelector + +from kalfeat.tools.exmath import ( + power , + magnitude , + _find_cz_bound_indexes + +) +class TestUtils(unittest.TestCase): + """ + Test electrical resistivity profile and compute geo-lectrical features + as followings : + - type + - shape + - sfi + - power + - magnitude + - anr + - select_best_point + - select_best_value + """ + data_collections = ( DATA_UNSAFE_XLS, DATA_UNSAFE, DATA_SAFE, + DATA_SAFE_XLS, DATA_EXTRA, + array1D , array2D, array2D [:, :2], array2DX, extraarray2D + + ) + + @classmethod + def setUpClass(cls): + """ + Reset building matplotlib plot and generate tempdir inputfiles + + """ + reset_matplotlib() + cls._temp_dir = make_temp_dir(cls.__name__) + + def setUp(self): + if not os.path.isdir(TEST_TEMP_DIR): + print('--> outdir not exist , set to None !') + kalfeatlog.get_kalfeat_logger().error('Outdir does not exist !') + + def test_find_cz_bound_indexes (self): + + pass + def test_assert_station_positions (self): + pass + + def test_sanitize_collected_data (self) : + """ Test the capability of the func to read and fetch data + straigthly from `csv` and `xlsx` and sanitize data to fit the + corresponding ``PREFIX``. """ + + for i, f in enumerate(self.data_collections): + print('i=', i) + df = erpSelector( f) + col = list(df.columns) if isinstance(df, pd.DataFrame) else [df.name] # for Series + if os.path.isfile (f): + print(os.path.basename(os.path.splitext(f)[0].lower()) ) + if os.path.basename(os.path.splitext(f)[0].lower()) in ( + 'testunsafedata', 'testunsafedata_extra'): + print('PASSED') + print('col ==', col) + self.assertListEqual(col , ['station', 'resistivity', + 'longitude', 'latitude', + 'easting', 'northing']) + + elif os.path.basename(os.path.splitext(f)[0].lower()) =='testsafedata': + self.assertEqual(len(col), len(PREFIX ), + f'The length of data columns={col} is ' + f' different from the expected length ={len(PREFIX)}.') + + elif isinstance(f, pd.Series): + self.assertListEqual (col , ['resistivity'], + 'Expected a sery of "resistivity" by got' + f'{f.name}') + elif isinstance(f, pd.DataFrame): + self.assertListEqual (col , ['station', 'resistivity'], + 'Expected a sery of "[station , resistivity]" by got' + f'{col}') + +if __name__=='__main__': + unittest.main() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +