-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprofile_filters.py
223 lines (176 loc) · 7.66 KB
/
profile_filters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
"""
import numpy as np
import logging
import os
from ooidac import processing
from configuration import DATA_CONFIG_LIST, TIMESENSOR
logger = logging.getLogger(os.path.basename(__name__))
def filter_no_data(profile_data):
"""Profile filter that will remove a profile if all of the relevant science
sensors' data (listed by the SCI_DATA_PROFILE_LIST configuration parameter)
is missing. It keeps the profile if ANY of the science sensors have any
data present.
Note: a profile not removed by this filter might still be removed by
another active filter.
:return:
"""
remove_profile = False
allbad_scidata = []
for scidata_sensor in DATA_CONFIG_LIST:
data = profile_data.getdata(scidata_sensor)
any_data = np.all(np.isnan(data))
# if there isn't any CTD pressure data at all, we don't want the profile
if scidata_sensor == 'sci_water_pressure' and any_data:
remove_profile = True
break
allbad_scidata.append(any_data)
if not remove_profile:
remove_profile = np.all(allbad_scidata)
return remove_profile
def filter_small_data_ratio(profile_data, threshold=.1, data_pts_threshold=4):
"""Profile filter that will remove a profile if all of the relevant science
sensors ( listed by the SCI_DATA_PROFILE_LIST configuration parameter)
have a ratio of good data to missing data that is smaller than the
threshold. It keeps the profile if ANY of the science sensors have a
ratio larger than the threshold.
Note: a profile not removed by this filter might still be removed by
another active filter.
:return:
"""
remove_profile = False
data_ratios_too_small = []
timestamps = profile_data.getdata(TIMESENSOR)
total_profile_time = timestamps[-1] - timestamps[0]
for scidata_sensor in DATA_CONFIG_LIST:
data = profile_data.getdata(scidata_sensor)
# data_ratio uses ratio of data record time vs total profile time
finites = np.flatnonzero(np.isfinite(data))
if len(finites) >= data_pts_threshold:
good_data_length = cum_data_time_sum(timestamps[finites])
else:
good_data_length = 0
data_ratio = good_data_length/total_profile_time
data_ratios_too_small.append(data_ratio < threshold)
if np.all(data_ratios_too_small):
remove_profile = True
return remove_profile
def filter_time_lessthan(profile_data, threshold=1):
"""Profile filter that will remove a profile if the elapsed time for
the profile is less than `threshold` minutes.
Note: a profile not removed by this filter might still be removed by
another active filter.
:return:
"""
remove_profile = False
timestamps = profile_data.getdata(TIMESENSOR)
time1 = timestamps[0]
time2 = timestamps[-1]
minutes_of_profile = (time2 - time1) / 60.
if minutes_of_profile < threshold:
remove_profile = True
return remove_profile
def filter_datatime_lessthan(profile_data, threshold=1, data_pts_threshold=4):
"""Profile filter that will remove a profile if the elapsed time for
the data collected in a profile is less than `threshold` minutes.
Note: a profile not removed by this filter might still be removed by
another active filter.
:return:
"""
remove_profile = False
timestamps = profile_data.getdata(TIMESENSOR)
data_indices = processing.all_sci_indices(profile_data)
if len(data_indices) < data_pts_threshold:
remove_profile = True
sci_time = timestamps[data_indices]
minutes_of_data = cum_data_time_sum(sci_time) / 60.
if minutes_of_data < threshold:
remove_profile = True
return remove_profile
def filter_no_data_at_profile_start(profile_data, threshold=1):
""" Profile filter that will remove a profile if there is no science data at
the beginning (defined as the first 10%) of the profile with extra
emphasis on pressure from the CTD. This tries to eliminate the case where
a short profile that should have no data, turns on data sampling just
before the inflection point and has enough science data to pass the other
filters. The rationale being that if there is no data at the start of a
profile, it was not intended to be sampled.
Note: a profile not removed by this filter might still be removed by
another active filter.
:param profile_data:
:param threshold: The minimum number of minutes at the start of the profile
that requires data to occur in.
:return: bool value if the profile is to be removed or not.
"""
remove_profile = False
if 'rtime' in profile_data.source_file:
return remove_profile
timestamps = profile_data.getdata(TIMESENSOR)
# ToDo: change explicit pressure here to a PRESSURESENSOR variable
pres = profile_data.getdata('sci_water_pressure')
first_portion_of_dive = list(range(int(len(timestamps)/10)))
time_len = (
timestamps[first_portion_of_dive][-1]
- timestamps[first_portion_of_dive][0]
)
# use the amount of time that is greater, the first 10% of the dive,
# or at least `threshold` minutes
if time_len/60. < threshold:
first_portion_of_dive = np.flatnonzero(
timestamps < timestamps[0]+60*threshold)
data_indices = processing.all_sci_indices(profile_data)
pressure_ii = np.flatnonzero(np.isfinite(pres))
if len(np.intersect1d(pressure_ii, first_portion_of_dive)) == 0:
remove_profile = True
elif len(np.intersect1d(data_indices, first_portion_of_dive)) == 0:
remove_profile = True
return remove_profile
def filter_small_data_depth_ratio(
profile_data, threshold=.1, data_pts_threshold=4):
"""Profile filter that will remove a profile if the ratio of
the cumulative sum of CTD pressure data (excluding large gaps) to the full
depth of the profile is smaller than `threshold`.
Note: a profile not removed by this filter might still be removed by
another active filter.
:return:
"""
remove_profile = False
depth = profile_data.depth
total_profile_depth = np.nanmax(depth) - np.nanmin(depth)
pres = profile_data.getdata('llat_pressure')
if (
np.count_nonzero(np.isfinite(pres)) > data_pts_threshold
and total_profile_depth > 0):
sum_pres_depth = abs(cum_depth_sum(pres))
depth_ratio = sum_pres_depth / total_profile_depth
if depth_ratio < threshold:
remove_profile = True
else:
remove_profile = True
return remove_profile
def cum_data_time_sum(sci_timestamps):
"""To eliminate the case where a small amount of science data points are at
the beginning of a profile, and a small amount exists at the end of a
profile, with a large non-data gap in between, this function calculates
the cumulative sum of data time excluding time gaps larger than 3 median
time steps.
:param sci_timestamps: timestamps of non-nan science data records
:return: The cumulative sum of data sample time excluding large gaps
"""
sci_dt = np.diff(sci_timestamps)
sci_dt_median = np.nanmedian(sci_dt)
no_gaps_ii = sci_dt < 3 * sci_dt_median
cum_sci_sample_time = np.sum(sci_dt[no_gaps_ii])
return cum_sci_sample_time
def cum_depth_sum(pressure):
pres = pressure[np.isfinite(pressure)]
diff_pres = np.diff(pres)
non_zero = np.flatnonzero(abs(diff_pres) > 0.0)
diff_pres = diff_pres[non_zero]
#
mean = np.mean(diff_pres)
std = np.std(diff_pres)
# exclude any large jumps in depth which are considered gaps
no_gaps = np.flatnonzero(abs(diff_pres) < abs(mean) + 3*std)
cum_depth = np.sum(diff_pres[no_gaps])
return cum_depth