From 2446574e3b72d83cf66328b1f64a5ab64f7eddfb Mon Sep 17 00:00:00 2001 From: Sylvain Haupert Date: Wed, 30 Nov 2022 22:36:58 +0100 Subject: [PATCH] Update A_VIRER__TEST_full_process_test.py, birdnet_test.py, and 16 more files... --- article/A_VIRER__TEST_full_process_test.py | 169 ----------------- article/birdnet_test.py | 9 +- article/birdnet_train.py | 10 +- article/birdnet_validation.py | 8 +- article/clustering_only_test.py | 8 +- article/clustering_only_train.py | 10 +- article/clustering_only_validation.py | 6 +- article/config_article.yaml | 4 +- article/full_process_test.py | 9 +- article/full_process_validation.py | 7 +- bambird/__init__.py | 31 ++-- bambird/cluster.py | 174 +++++++++++++++++- bambird/config.py | 98 ++++++++-- bambird/dataset.py | 3 +- bambird/features.py | 5 +- bambird/segmentation.py | 16 +- bambird/segmentation_extract_rois_core.py | 2 +- bambird/segmentation_extract_rois_full_sig.py | 25 ++- 18 files changed, 327 insertions(+), 267 deletions(-) delete mode 100644 article/A_VIRER__TEST_full_process_test.py diff --git a/article/A_VIRER__TEST_full_process_test.py b/article/A_VIRER__TEST_full_process_test.py deleted file mode 100644 index 1c9fa5b..0000000 --- a/article/A_VIRER__TEST_full_process_test.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Updated 19 October 2022 - -Authors : Felix Michaud and Sylvain Haupert - -""" - -from IPython import get_ipython -print(__doc__) -# Clear all the variables -get_ipython().magic('reset -sf') - -import yaml -from pathlib import Path -import matplotlib.pyplot as plt -plt.close("all") -import pandas as pd - -import bambird - -# %% -# Define constants -# ---------------- - -RANDOM_SEED = 1979 - -# Choose the path to store the mp3 -DIR_DATA = Path('../../temporary_data') -# Choose the name of the dataset -DATASET_NAME = Path('A_VIRER_DATASET_PALEARTIC_PART2') -# Choose the name of the ROIs dataset -ROIS_NAME = Path(str(DATASET_NAME) +'_ROIS_TEST') -# Select the csv file with the metadata collected from xeno-canto (=> links to the mp3 to download) -XC_CSV_FILE = Path('./data/test') / 'xc_metadata.csv' -# Select the annotation file corresponding to the ROIs dataset -ANNOT_CSV_FILE = Path('./data/test') / 'manual_annotations.csv' -# Select the configuration file to segment, compute features and get the clusters -CONFIG_FILE = 'config_article.yaml' - -# %% -if __name__ == '__main__': - - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) - - # load the inital dataset with the metadata stored from XC - df_dataset = pd.read_csv(XC_CSV_FILE, sep=';') - - #======================================== - #======================================== - df_dataset = df_dataset[0:10] - #======================================== - #======================================== - -#%% - - # Download audio Xeno-Canto - # ------------------------- - - df_xc, csv_xc = bambird.download_xc ( - df_dataset = df_dataset, - rootdir = DIR_DATA, - dataset_name = DATASET_NAME, - overwrite = True, - verbose = True - ) - - - -#%% - - # Extract ROIS - # ------------------------------- - - # extract ROIS - df_rois, csv_rois = bambird.multicpu_extract_rois( - dataset =df_xc, - fun =params['PARAMS_EXTRACT']['FUNC'], - params =params['PARAMS_EXTRACT'], - save_path =DIR_DATA / ROIS_NAME, - overwrite =True, - verbose =True - ) - -#%% - - # process all the ROIS - # --------------------------------------------------------------------- - - # compute features - df_features, csv_features = bambird.multicpu_compute_features( - dataset =df_rois, - params =params['PARAMS_FEATURES'], - save_path =DIR_DATA / ROIS_NAME, - overwrite =True, - verbose =True - ) - -#%% - # Cluster ROIS - # --------------------------------------------------------------------- - - # with dataframe or csv file - df_cluster = bambird.find_cluster( - dataset =df_features, - params =params['PARAMS_CLUSTER'], - display =False, - verbose =True - ) - - # Evaluation of the clustering : precision and recall - # --------------------------------------------------------------------- - - df_scores, p, r, f, markers = bambird.cluster_eval( - df_cluster, - path_to_csv_with_gt = ANNOT_CSV_FILE, - colname_label_gt ='manual_label', - verbose =True - ) - -#%% - # Display the ROIS - # --------------------------------------------------------------------- - filename = bambird.overlay_rois( - cluster =df_cluster, - params =params['PARAMS_EXTRACT'], - filename =None, - random_seed =None, - verbose =True - ) - - # Display the ROIS with TP=1 TN = 2 FN = 3 FP = 4 - # --------------------------------------------------------------------- - - bambird.overlay_rois( - cluster =df_cluster, - markers =markers, - params =params['PARAMS_EXTRACT'], - column_labels ='marker', - unique_labels =['FP', 'TP', 'FN', 'TN'], - filename =filename, - random_seed =None, - verbose =True - ) - -#%% - # if 'mark_rois' in PROCESS : - - # # Mark the ROIS with the prefix TN FN TP FP according to the clustering - # # --------------------------------------------------------------------- - - # df_rois, flag = bambird.mark_rois( - # markers, - # dataset_csv =csv_rois, - # verbose =True - # ) - -#%% - # if 'unmark_rois' in PROCESS : - - # # Unmark ROIS with the prefix TN FN TP FP - # # --------------------------------------------------------------------- - - # df_rois, flag = bambird.unmark_rois( - # dataset_csv =csv_rois, - # verbose =True - # ) \ No newline at end of file diff --git a/article/birdnet_test.py b/article/birdnet_test.py index ab50c98..113299b 100644 --- a/article/birdnet_test.py +++ b/article/birdnet_test.py @@ -12,7 +12,6 @@ # Clear all the variables get_ipython().magic('reset -sf') -import yaml from pathlib import Path import pandas as pd import matplotlib.pyplot as plt @@ -28,15 +27,9 @@ DIR_DATA = Path('./data/test') ANNOT_CSV_FILE = 'manual_annotations.csv' BIRDNET_CSV_FILE= "birdnet_annotations.csv" -CONFIG_FILE = 'config_article.yaml' # %% -if __name__ == '__main__': - - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) - -# %% +if __name__ == '__main__': # BirdNET on ROIS # --------------------------------------------------------------------- diff --git a/article/birdnet_train.py b/article/birdnet_train.py index 9ad274b..27a62ff 100644 --- a/article/birdnet_train.py +++ b/article/birdnet_train.py @@ -12,12 +12,12 @@ # Clear all the variables get_ipython().magic('reset -sf') -import yaml from pathlib import Path import pandas as pd import matplotlib.pyplot as plt plt.close("all") + import bambird # %% @@ -28,16 +28,10 @@ DIR_DATA = Path('./data/train') ANNOT_CSV_FILE = 'manual_annotations.csv' BIRDNET_CSV_FILE= "birdnet_annotations.csv" -CONFIG_FILE = 'config_article.yaml' # %% if __name__ == '__main__': - - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) - -# %% - + # BirdNET on ROIS # --------------------------------------------------------------------- # Load the dataframe with the result from BirdNET diff --git a/article/birdnet_validation.py b/article/birdnet_validation.py index cf74a9d..0a559fd 100644 --- a/article/birdnet_validation.py +++ b/article/birdnet_validation.py @@ -28,16 +28,10 @@ DIR_DATA = Path('./data/validation') ANNOT_CSV_FILE = 'manual_annotations.csv' BIRDNET_CSV_FILE= "birdnet_annotations.csv" -CONFIG_FILE = 'config_article.yaml' # %% if __name__ == '__main__': - - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) - -# %% - + # BirdNET on ROIS # --------------------------------------------------------------------- # Load the dataframe with the result from BirdNET diff --git a/article/clustering_only_test.py b/article/clustering_only_test.py index 75494c6..df487ac 100644 --- a/article/clustering_only_test.py +++ b/article/clustering_only_test.py @@ -12,13 +12,13 @@ # Clear all the variables get_ipython().magic('reset -sf') -import yaml from pathlib import Path -import pandas as pd import matplotlib.pyplot as plt plt.close("all") import bambird +import bambird.config as cfg + # %% # Define constants # ---------------- @@ -32,8 +32,8 @@ # %% if __name__ == '__main__': - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) + # Load the configuration file + params = cfg.load_config(CONFIG_FILE) # Name of the csv file with feaetures FEATURES_CSV_FILE = ( diff --git a/article/clustering_only_train.py b/article/clustering_only_train.py index 4219cd2..e083d13 100644 --- a/article/clustering_only_train.py +++ b/article/clustering_only_train.py @@ -17,10 +17,8 @@ import matplotlib.pyplot as plt plt.close("all") -bambird_path = Path('/home/haupert/DATA/mes_projets/Z_THESE_FELIX_MICHAUD/CHAPITRE_1/bambird.git') -import os -os.sys.path.append(bambird_path.as_posix()) import bambird +import bambird.config as cfg # %% # Define constants @@ -34,9 +32,9 @@ # %% if __name__ == '__main__': - - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) + + # Load the configuration file + params = cfg.load_config(CONFIG_FILE) # Name of the csv file with feaetures FEATURES_CSV_FILE = ( diff --git a/article/clustering_only_validation.py b/article/clustering_only_validation.py index bf17e9c..070cbd0 100644 --- a/article/clustering_only_validation.py +++ b/article/clustering_only_validation.py @@ -18,6 +18,8 @@ plt.close("all") import bambird +import bambird.config as cfg + # %% # Define constants # ---------------- @@ -31,8 +33,8 @@ # %% if __name__ == '__main__': - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) + # Load the configuration file + params = cfg.load_config(CONFIG_FILE) # Name of the csv file with feaetures FEATURES_CSV_FILE = ( diff --git a/article/config_article.yaml b/article/config_article.yaml index d5a1ed6..d2d5c2a 100644 --- a/article/config_article.yaml +++ b/article/config_article.yaml @@ -17,14 +17,14 @@ PARAMS_XC: - len:30-60 - q:">C" - type:song - #CSV_XC_FILE: 'dataset_xc_embcit.csv' # The file will contain all the metadata collected from Xeno-canto + CSV_XC_FILE: 'bam_dataset_xc.csv' # The file will contain all the metadata collected from Xeno-canto ################################ # ROIS EXTRACTION PARAMETERS ################################ PARAMS_EXTRACT: # function - FUNC: !fun bambird.extract_rois_core # select the function used to extract the rois {bambird.extract_rois_core, bambird.extract_rois_full_sig} + FUNC: !FUNC bambird.extract_rois_core # select the function used to extract the rois {bambird.extract_rois_core, bambird.extract_rois_full_sig} # Extract Audio resampling SAMPLE_RATE: 44100 # Sampling frequency in Hz # Audio preprocess diff --git a/article/full_process_test.py b/article/full_process_test.py index 49f4dd6..e5efccb 100644 --- a/article/full_process_test.py +++ b/article/full_process_test.py @@ -12,13 +12,13 @@ # Clear all the variables get_ipython().magic('reset -sf') -import yaml from pathlib import Path import matplotlib.pyplot as plt plt.close("all") import pandas as pd import bambird +import bambird.config as cfg # %% # Define constants @@ -42,8 +42,8 @@ # %% if __name__ == '__main__': - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) + # Load the configuration file + params = cfg.load_config(CONFIG_FILE) # load the inital dataset with the metadata stored from XC df_dataset = pd.read_csv(XC_CSV_FILE, sep=';') @@ -69,7 +69,6 @@ # extract ROIS df_rois, csv_rois = bambird.multicpu_extract_rois( dataset =df_xc, - fun =params['PARAMS_EXTRACT']['FUNC'], params =params['PARAMS_EXTRACT'], save_path =DIR_DATA / ROIS_NAME, overwrite =True, @@ -98,7 +97,7 @@ df_cluster, csv_cluster = bambird.find_cluster( dataset =df_features, params =params['PARAMS_CLUSTER'], - save_path =DIR_DATA / ROIS_NAME, + save_path =DIR_DATA / ROIS_NAME, display =False, verbose =True ) diff --git a/article/full_process_validation.py b/article/full_process_validation.py index 3c3b89e..f74ea8d 100644 --- a/article/full_process_validation.py +++ b/article/full_process_validation.py @@ -12,13 +12,13 @@ # Clear all the variables get_ipython().magic('reset -sf') -import yaml from pathlib import Path import matplotlib.pyplot as plt plt.close("all") import pandas as pd import bambird +import bambird.config as cfg # %% # Define constants @@ -42,8 +42,8 @@ # %% if __name__ == '__main__': - with open(CONFIG_FILE) as f: - params = yaml.load(f, Loader=bambird.get_loader()) + # Load the configuration file + params = cfg.load_config(CONFIG_FILE) # load the inital dataset with the metadata stored from XC df_dataset = pd.read_csv(XC_CSV_FILE, sep=';') @@ -69,7 +69,6 @@ # extract ROIS df_rois, csv_rois = bambird.multicpu_extract_rois( dataset =df_xc, - fun =params['PARAMS_EXTRACT']['FUNC'], params =params['PARAMS_EXTRACT'], save_path =DIR_DATA / ROIS_NAME, overwrite =True, diff --git a/bambird/__init__.py b/bambird/__init__.py index 56843b7..ef99273 100644 --- a/bambird/__init__.py +++ b/bambird/__init__.py @@ -15,7 +15,7 @@ ----------- .. autosummary:: :toctree: generated/ - get_loader + load_config Dataset ------- @@ -39,7 +39,6 @@ ------------- .. autosummary:: :toctree: generated/ - compute_features multicpu_compute_features @@ -47,8 +46,6 @@ --------------- .. autosummary:: :toctree: generated/ - - prepare_features find_cluster cluster_eval overlay_rois @@ -57,8 +54,20 @@ """ +from .segmentation_extract_rois_full_sig import( + extract_rois_full_sig, + ) + +from .segmentation_extract_rois_core import( + extract_rois_core, + ) + +from .segmentation_extract_rois_in_soundscape import( + extract_rois_in_soundscape, + ) + from .config import ( - get_loader + load_config ) from .dataset import( @@ -73,13 +82,6 @@ multicpu_extract_rois, ) -from .segmentation_extract_rois_full_sig import( - extract_rois_full_sig, - ) - -from .segmentation_extract_rois_core import( - extract_rois_core, - ) from .features import( compute_features, @@ -87,7 +89,6 @@ ) from .cluster import ( - prepare_features, find_cluster, cluster_eval, overlay_rois, @@ -98,7 +99,7 @@ __all__ = [ # config.py - 'get_loader', + 'load_config', # dataset.py 'grab_audio_to_df', 'change_path', @@ -108,12 +109,12 @@ 'extract_rois_core', 'extract_rois_full_sig', 'single_file_extract_rois', + 'extract_rois_in_soundscape', 'multicpu_extract_rois', # features.py 'compute_features', 'multicpu_compute_features', # cluster.py - 'prepare_features', 'find_cluster', 'cluster_eval', 'overlay_rois', diff --git a/bambird/cluster.py b/bambird/cluster.py index a238d58..38ef0b4 100644 --- a/bambird/cluster.py +++ b/bambird/cluster.py @@ -49,6 +49,7 @@ import maad from bambird import config as cfg +# cfg.get_config() warnings.filterwarnings("ignore", module="librosa") warnings.filterwarnings("ignore", module="maad") @@ -141,7 +142,7 @@ def _prepare_features(df_features, ############################################################################### def find_cluster( dataset, - params=cfg.DEFAULT_PARAMS_CLUSTER, + params=cfg.PARAMS['PARAMS_CLUSTER'], save_path=None, save_csv_filename=None, display=False, @@ -509,12 +510,177 @@ def find_cluster( return df_cluster, csv_fullfilename + ############################################################################### def cluster_eval(df_cluster, path_to_csv_with_gt, colname_label = 'auto_label' , colname_label_gt = 'manual_label', verbose=False): + + fp_initial = [] + tp_initial = [] + precision_initial = [] + precision = [] + recall = [] + tp = [] + fp = [] + tn = [] + fn = [] + number_rois_initial = [] + number_rois_final = [] + + df = df_cluster.copy() + + #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # HACK to DELETE in the future. For compliance with data of the article + # The column categories does not exit + if ('categories' in df.columns) == False : + df["categories"] = df["species"] + #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + try : + # load all annotations + df_labels = pd.read_csv(path_to_csv_with_gt, sep=';') + try : + df_labels.drop('species', axis=1, inplace=True) + except: + pass + try : + df_labels.drop('code', axis=1, inplace=True) + except: + pass + df_labels.set_index('filename_ts', inplace=True) + df_labels.loc[df_labels[colname_label_gt] == '0', colname_label_gt] = 0 + df_labels.loc[df_labels[colname_label_gt] == '1', colname_label_gt] = 1 + + # join df_label and df then drop rows with NaN + if 'filename_ts' in df : + df.set_index('filename_ts', inplace=True) + df = df.join(df_labels[colname_label_gt]) + df = df.dropna(axis=0) + + except : + raise Exception("WARNING: path_to_csv_with_gt must be a valid path to a csv ") + + # Create a new column 'marker' with tp, tn, fp, fn + df['marker'] = None + #TP + df.loc[(df[colname_label]==1) * (df[colname_label_gt]==1), 'marker'] = 'TP' + #TN + df.loc[(df[colname_label]==0) * (df[colname_label_gt]==0), 'marker'] = 'TN' + #FP + df.loc[(df[colname_label]==1) * (df[colname_label_gt]==0), 'marker'] = 'FP' + #FN + df.loc[(df[colname_label]==0) * (df[colname_label_gt]==1), 'marker'] = 'FN' + + # select Rois that belongs to the species depending on the clustering + + for categories in np.sort(df.categories.unique()): + + number_rois_initial += [len(df[df["categories"] == categories])] + number_rois_final += [np.sum((df["categories"] == categories) & (df[colname_label] == 1))] + + fp_initial += [np.sum(df[df["categories"] == categories][colname_label_gt] == 0)] + tp_initial += [np.sum( df[df["categories"] == categories][colname_label_gt] == 1)] + + precision_initial += [round(tp_initial[-1] / (tp_initial[-1] + fp_initial[-1]) * 100)] + + _tn, _fp, _fn, _tp = confusion_matrix( + df.dropna()[df["categories"] == categories][colname_label_gt].to_list(), + df.dropna()[df["categories"] == categories][colname_label].to_list()).ravel() + + tp += [_tp] + fp += [_fp] + tn += [_tn] + fn += [_fn] + + if (_tp + _fp) > 0: + precision += [round(_tp / (_tp + _fp) * 100)] + else: + precision += [0] + if (_tp + _fn) > 0: + recall += [round(_tp / (_tp + _fn) * 100)] + else: + recall += [0] + + if verbose: + print( + "Initial number of ROIs is {} / Final number of ROIs is {} => {}% reduction / noise {}% => {}% / recall {}% ({})".format( + number_rois_initial[-1], + number_rois_final[-1], + round(100 - number_rois_final[-1] / + number_rois_initial[-1] * 100, 1), + 100 - precision_initial[-1], + 100 - precision[-1], + recall[-1], + categories, + ) + ) + + # dataframe with scores + df_scores = pd.DataFrame(list(zip(np.sort(df.categories.unique()), + number_rois_initial, + number_rois_final, + tp_initial, + fp_initial, + tp, + fp, + tn, + fn, + precision_initial, + precision, + recall)), + columns=['species', + 'number_rois_initial', + 'number_rois_final', + 'tp_initial', + 'fp_initial', + 'tp', + 'fp', + 'tn', + 'fn', + 'precision_initial', + 'precision', + 'recall']) + # set species as index + df_scores.set_index('species', inplace = True) + + if verbose: + print("------------------------------------------------------") + print("------->Median initial noise {:.1f}%".format( + 100-np.percentile(precision_initial, 50))) + print("Lower outlier Initial noise {:.1f}%".format( + 100-np.percentile(precision_initial, 95))) + print("Higher outlier Initial noise {:.1f}%".format( + 100-np.percentile(precision_initial, 5))) + print("-------> Median Final noise {:.1f}%".format( + 100-np.percentile(precision, 50))) + print("Lower outlier Final noise {:.1f}%".format( + 100-np.percentile(precision,95))) + print("Higher outlier Final noise {:.1f}%".format( + 100-np.percentile(precision,5))) + print("------------------------------------------------------") + # calculate the F1-SCORE (macro and micro) + y_true = (df.categories * df[colname_label_gt].apply(np.int64)) + y_pred = (df.categories * df[colname_label].apply(np.int64)) + print("******************************************************") + print("avg intial noise {:2.1f}% >>> avg final noise {:2.1f}%".format(100-np.mean(precision_initial), + 100-np.mean(precision))) + p, r, f, _ = precision_recall_fscore_support( y_true, + y_pred, + average='macro') + print("MACRO precision {:.2f} | recall {:.2f} | F {:.2f}".format(p,r,f)) + print("******************************************************") + + return df_scores, p, r, f, df.marker + +############################################################################### +def cluster_eval_(df_cluster, + path_to_csv_with_gt, + colname_label = 'auto_label' , + colname_label_gt = 'manual_label', + verbose=False): """ Evalation of the clustering (requires annotations or any other files to @@ -571,11 +737,11 @@ def cluster_eval(df_cluster, # load all annotations df_labels = pd.read_csv(path_to_csv_with_gt, sep=';') try : - df_labels.drop('categories', axis=1, inplace=True) + df_labels.drop('species', axis=1, inplace=True) except: pass try : - df_labels.drop('categories', axis=1, inplace=True) + df_labels.drop('code', axis=1, inplace=True) except: pass df_labels.set_index('filename_ts', inplace=True) @@ -713,7 +879,7 @@ def overlay_rois (cluster, 'tab:purple','tab:pink','tab:brown','tab:olive', 'tab:cyan','tab:gray','yellow'], textbox_label=True, - params=cfg.DEFAULT_PARAMS_EXTRACT, + params=cfg.PARAMS['PARAMS_EXTRACT'], filename=None, random_seed=None, verbose=False, diff --git a/bambird/config.py b/bambird/config.py index b062c07..c9801e4 100644 --- a/bambird/config.py +++ b/bambird/config.py @@ -12,20 +12,25 @@ #%% # general packages import sys +import os # basic packages import yaml +import bambird + #%% RANDOM_SEED = 1979 # Fix the random seed to be able to repeat the results -DEFAULT_PARAMS_XC = { - 'PARAM_XC_LIST': ['len:"20-180"', 'q:">C"'], - 'NUM_FILES': 10 +PARAMS_XC = { + 'PARAM_XC_LIST': ['len:"20-60"', 'q:">C"', 'type:"song"'], + 'NUM_FILES': 20, + 'CSV_XC_FILE': 'xc_metadata.csv' } -DEFAULT_PARAMS_EXTRACT = { +PARAMS_EXTRACT = { + "FUNC" : bambird.extract_rois_full_sig, # Extract Audio resampling "SAMPLE_RATE": 48000, # Sampling frequency in Hz # Audio preprocess @@ -34,10 +39,10 @@ # butterworth filter order to select the bandwidth corresponding to the ROI "BUTTER_ORDER": 1, # Max duration of the audio files that we will use to compute the features - "AUDIO_DURATION": 60, + "AUDIO_DURATION": 30, # Split the audio signal of chunk with duration = SIGNAL LENGTH (in second) - "SIGNAL_LENGTH": 10, - "OVLP": 0.5, # Define the overlap ratio between each chunk + "CHUNK_DURATION": 10, + "OVLP": 0, # Define the overlap ratio between each chunk # Spectrogram # Mode to compute the remove_background ('mean', 'median') "MODE_RMBCKG": "median", @@ -61,7 +66,7 @@ "FILTER_ORDER": 5, } -DEFAULT_PARAMS_FEATURES = { +PARAMS_FEATURES = { # Extract Audio resampling "SAMPLE_RATE": 48000, # Sampling frequency in Hz # Audio preprocess @@ -75,14 +80,22 @@ "SHAPE_RES": "high", } -DEFAULT_PARAMS_CLUSTER = { - "PERCENTAGE_PTS": 5, # in % - "METHOD": "DBSCAN", # HDBSCAN or DBSCAN - "SCALER": "MINMAXSCALER", # STANDARDSCALER or ROBUSTSCALER or MINMAXSCALER - "KEEP": "BIGGEST", # ALL or BIGGEST - "EPS": "auto" # set the maximum distance between elements in a single clusters {a number or 'auto'} +PARAMS_CLUSTER = { + "FEATURES": ['shp', 'centroid_f'], # choose the features used to cluster {'shp', 'centroid_f', 'peak_f', 'duration_t', 'bandwidth_f', 'bandwidth_min_f', 'bandwidth_max_f', 'min_f', 'max_f' } + "PERCENTAGE_PTS": 5, # minimum number of ROIs to form a cluster (in % of the total number of ROIs) {number between 0 and 1 or blank} + "MIN_PTS": None, # minimum number of ROIs to form a cluster {integer or blank} + "METHOD": "DBSCAN", # HDBSCAN or DBSCAN + "SCALER": "MINMAXSCALER", # STANDARDSCALER or ROBUSTSCALER or MINMAXSCALER + "KEEP": "BIGGEST", # ALL or BIGGEST + "EPS": "auto" # set the maximum distance between elements in a single clusters {a number or 'auto'} } +PARAMS = { + 'PARAMS_XC' : PARAMS_XC, + 'PARAMS_EXTRACT' : PARAMS_EXTRACT, + 'PARAMS_FEATURES' : PARAMS_FEATURES, + 'PARAMS_CLUSTER' : PARAMS_CLUSTER + } #%% @@ -113,14 +126,61 @@ def _fun_constructor(loader, node): print(val) return _fun_call_by_name(val) +def _get_loader(): + """Add constructors to PyYAML loader.""" + loader = yaml.SafeLoader + loader.add_constructor("!FUNC", _fun_constructor) + return loader + """ =========================================================================== Public function ============================================================================""" -def get_loader(): - """Add constructors to PyYAML loader.""" - loader = yaml.SafeLoader - loader.add_constructor("!fun", _fun_constructor) - return loader +def load_config(fullfilename = None): + """ + Load the configuration file to set all the parameters of bambird + + Parameters + ---------- + fullfilename : string, optional + Path to the configuration file. + if no valid configuration file is given, the parameters are set to the + default values. + + Returns + ------- + PARAMS : dictionary + Dictionary with all the parameters that are required for the bambird's + functions + """ + + global PARAMS + global PARAMS_XC + global PARAMS_EXTRACT + global PARAMS_FEATURES + global PARAMS_CLUSTER + + if os.path.isfile(str(fullfilename)): + with open(fullfilename) as f: + PARAMS = yaml.load(f, Loader=_get_loader()) + PARAMS_XC = PARAMS['PARAMS_XC'] + PARAMS_EXTRACT = PARAMS['PARAMS_EXTRACT'] + PARAMS_FEATURES = PARAMS['PARAMS_FEATURES'] + PARAMS_CLUSTER = PARAMS['PARAMS_CLUSTER'] + else : + print("The config file {} could not be loaded. Default parameters are loaded".format(fullfilename)) + + return PARAMS + +def get_config() : + PARAMS = { + 'PARAMS_XC' : PARAMS_XC, + 'PARAMS_EXTRACT' : PARAMS_EXTRACT, + 'PARAMS_FEATURES' : PARAMS_FEATURES, + 'PARAMS_CLUSTER' : PARAMS_CLUSTER + } + return PARAMS + + diff --git a/bambird/dataset.py b/bambird/dataset.py index bf27a21..7dc49d7 100644 --- a/bambird/dataset.py +++ b/bambird/dataset.py @@ -24,12 +24,13 @@ # from bambird import config as cfg +# cfg.get_config() #%% ############################################################################### def query_xc (species_list, - params=cfg.DEFAULT_PARAMS_XC, + params=cfg.PARAMS['PARAMS_XC'], format_time=False, format_date=False, random_seed=cfg.RANDOM_SEED, diff --git a/bambird/features.py b/bambird/features.py index 71f3d26..d99048a 100644 --- a/bambird/features.py +++ b/bambird/features.py @@ -33,6 +33,7 @@ # from bambird import config as cfg +# cfg.get_config() from bambird import grab_audio_to_df @@ -41,7 +42,7 @@ ############################################################################### def compute_features( audio_path, - params=cfg.DEFAULT_PARAMS_FEATURES, + params=cfg.PARAMS['PARAMS_FEATURES'], display=False, verbose=False): """ @@ -243,7 +244,7 @@ def compute_features( ############################################################################### def multicpu_compute_features( dataset, - params=cfg.DEFAULT_PARAMS_FEATURES, + params=cfg.PARAMS['PARAMS_FEATURES'], save_path=None, save_csv_filename=None, nb_cpu=None, diff --git a/bambird/segmentation.py b/bambird/segmentation.py index 97ccdb4..cf5fbb9 100644 --- a/bambird/segmentation.py +++ b/bambird/segmentation.py @@ -166,7 +166,7 @@ def _save_rois( def single_file_extract_rois( audio_path, fun, - params=cfg.DEFAULT_PARAMS_EXTRACT, + params=cfg.PARAMS['PARAMS_EXTRACT'], save_path=None, display=False, verbose=False): @@ -316,8 +316,7 @@ def single_file_extract_rois( ############################################################################### def multicpu_extract_rois( dataset, - fun, - params=cfg.DEFAULT_PARAMS_EXTRACT, + params=cfg.PARAMS['PARAMS_EXTRACT'], save_path=None, save_csv_filename='rois.csv', overwrite=False, @@ -337,8 +336,6 @@ def multicpu_extract_rois( "filename" and a column "fullfilename" with the full path to the audio files to process. This dataframe can be obtained by called the function grab_audio_to_df - fun : function - name of the function that is called to segment the rois params : dictionnary, optioanl contains all the parameters to extract the rois save_path : string, default is None @@ -467,10 +464,15 @@ def multicpu_extract_rois( nb_cpu = os.cpu_count() # define a new function with fixed parameters to give to the multicpu pool - #------------------------------------------------------------------------- + #------------------------------------------------------------------------- + + # Print the characteristics of the function used to segment the files + if verbose : + print(params['FUNC']) + multicpu_func = partial( single_file_extract_rois, - fun=fun, + fun=params['FUNC'], params=params, save_path=save_path, display=False, diff --git a/bambird/segmentation_extract_rois_core.py b/bambird/segmentation_extract_rois_core.py index e8f26eb..5342dcf 100644 --- a/bambird/segmentation_extract_rois_core.py +++ b/bambird/segmentation_extract_rois_core.py @@ -162,7 +162,7 @@ def _merge_bbox(df_rois, margins): ############################################################################### def extract_rois_core( sig, - params=cfg.DEFAULT_PARAMS_EXTRACT, + params=cfg.PARAMS_EXTRACT, display=False, verbose=False, **kwargs): diff --git a/bambird/segmentation_extract_rois_full_sig.py b/bambird/segmentation_extract_rois_full_sig.py index 8fec607..f0205c1 100644 --- a/bambird/segmentation_extract_rois_full_sig.py +++ b/bambird/segmentation_extract_rois_full_sig.py @@ -24,8 +24,27 @@ # Scikit-Maad (ecoacoustics functions) package import maad -# import bamx -from bambird import config as cfg +PARAMS_EXTRACT = {'SAMPLE_RATE': 48000, + 'LOW_FREQ': 250, + 'HIGH_FREQ': 12000, + 'BUTTER_ORDER': 1, + 'AUDIO_DURATION': 30, + 'CHUNK_DURATION': 10, + 'OVLP': 0, + 'MODE_RMBCKG': 'median', + 'N_RUNNING_MEAN': 10, + 'NFFT': 1024, + 'MASK_PARAM1': 26, + 'MASK_PARAM2': 10, + 'MAX_RATIO_YX': 7, + 'MIN_DURATION': 0.1, + 'MARGIN_T_LEFT': 0.2, + 'MARGIN_T_RIGHT': 0.2, + 'MARGIN_F_TOP': 250, + 'MARGIN_F_BOTTOM': 250, + 'MARGIN_T': 0.1, + 'MARGIN_F': 250, + 'FILTER_ORDER': 5} #%% @@ -235,7 +254,7 @@ def _select_rois(im_bin, ############################################################################### def extract_rois_full_sig( sig, - params=cfg.DEFAULT_PARAMS_EXTRACT, + params=PARAMS_EXTRACT, display=False, verbose=False, **kwargs):