Skip to content

Commit

Permalink
fix: Avoid computing top_values and quantiles when not necessary
Browse files Browse the repository at this point in the history
  • Loading branch information
rantolin committed Dec 4, 2024
1 parent 3ee2263 commit d2369f7
Showing 1 changed file with 23 additions and 21 deletions.
44 changes: 23 additions & 21 deletions raster_loader/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,15 +498,15 @@ def raster_band_approx_stats(
_sum = int(np.sum(samples_band))
sum_squares = int(np.sum(np.array(samples_band) ** 2))

quantiles = compute_quantiles(samples_band, int)

most_common = dict()
if not band_is_float(raster_dataset, band):
most_common = most_common_approx(samples_band)

if omit_stats:
quantiles = None
most_common = None
else:
quantiles = compute_quantiles(samples_band, int)

most_common = dict()
if not band_is_float(raster_dataset, band):
most_common = most_common_approx(samples_band)

return {
"min": stats.min,
Expand Down Expand Up @@ -588,24 +588,26 @@ def raster_band_stats(
print("Removing masked data...")
qdata = raster_band.compressed()

casting_function = (
int if np.issubdtype(raster_band.dtype, np.integer) else float
)

quantiles = compute_quantiles(qdata, casting_function)

print("Computing most commons values...")
warnings.warn(
"Most common values are meant for categorical data. "
"Computing them for float bands can be meaningless."
)
most_common = Counter(qdata).most_common(100)
most_common.sort(key=lambda x: x[1], reverse=True)
most_common = dict([(casting_function(x[0]), x[1]) for x in most_common])

if omit_stats:
quantiles = None
most_common = None
else:
casting_function = (
int if np.issubdtype(raster_band.dtype, np.integer) else float
)

quantiles = compute_quantiles(qdata, casting_function)

print("Computing most commons values...")
if casting_function == float:
warnings.warn(
"Most common values are meant for categorical data. "
"Computing them for float bands can be meaningless.\n"
"Please, consider to use the --omit_stats option.",
)
most_common = Counter(qdata).most_common(100)
most_common.sort(key=lambda x: x[1], reverse=True)
most_common = dict([(casting_function(x[0]), x[1]) for x in most_common])

version = ".".join(__version__.split(".")[:3])

Expand Down

0 comments on commit d2369f7

Please sign in to comment.