Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto chunking for HDF5 #222

Merged
merged 8 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions kwave/kWaveSimulation_helper/save_to_disk_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from kwave.kmedium import kWaveMedium
from kwave.kgrid import kWaveGrid
from kwave.options.simulation_options import SimulationOptions
from kwave.options.simulation_execution_options import SimulationExecutionOptions
from kwave.utils.data import scale_time
from kwave.utils.dotdictionary import dotdict
from kwave.utils.io import write_attributes, write_matrix
Expand All @@ -16,7 +17,8 @@

def save_to_disk_func(
kgrid: kWaveGrid, medium: kWaveMedium, source,
opt: SimulationOptions, values: dotdict, flags: dotdict):
opt: SimulationOptions, exec_opt: SimulationExecutionOptions,
values: dotdict, flags: dotdict):
# update command line status
logging.log(logging.INFO, ' precomputation completed in ', scale_time(TicToc.toc()))
TicToc.tic()
Expand Down Expand Up @@ -56,7 +58,8 @@ def save_to_disk_func(
# =========================================================================

remove_z_dimension(float_variables, kgrid.dim)
save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_level)
save_file(opt.input_filename, integer_variables, float_variables, opt.hdf_compression_level,
exec_opt.auto_chunking)

# update command line status
logging.log(logging.INFO, ' completed in ', scale_time(TicToc.toc()))
Expand Down Expand Up @@ -445,12 +448,12 @@ def enforce_filename_standards(filepath):
return filepath, filename_ext


def save_file(filepath, integer_variables, float_variables, hdf_compression_level):
def save_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk):
filepath, filename_ext = enforce_filename_standards(filepath)

# save file
if filename_ext == '.h5':
save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level)
save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk)

elif filename_ext == '.mat':
save_mat_file(filepath, integer_variables, float_variables)
Expand All @@ -459,7 +462,7 @@ def save_file(filepath, integer_variables, float_variables, hdf_compression_leve
raise NotImplementedError('unknown file extension for ''save_to_disk'' filename')


def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level):
def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_level, auto_chunk):
# ----------------
# SAVE HDF5 FILE
# ----------------
Expand All @@ -474,15 +477,15 @@ def save_h5_file(filepath, integer_variables, float_variables, hdf_compression_l
for key, value in float_variables.items():
# cast matrix to single precision
value = np.array(value, dtype=np.float32)
write_matrix(filepath, value, key, hdf_compression_level)
write_matrix(filepath, value, key, hdf_compression_level, auto_chunk)
del value

# change all the index variables to be in 64-bit unsigned integers
# (long in C++), then add to HDF5 file
for key, value in integer_variables.items():
# cast matrix to 64-bit unsigned integer
value = np.array(value, dtype=np.uint64)
write_matrix(filepath, value, key, hdf_compression_level)
write_matrix(filepath, value, key, hdf_compression_level, auto_chunk)
del value

# set additional file attributes
Expand Down
2 changes: 1 addition & 1 deletion kwave/kspaceFirstOrder2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def kspaceFirstOrder2D(
retract_size = [[options.pml_x_size, options.pml_y_size, options.pml_z_size]]

# run subscript to save files to disk
save_to_disk_func(k_sim.kgrid, k_sim.medium, k_sim.source, k_sim.options,
save_to_disk_func(k_sim.kgrid, k_sim.medium, k_sim.source, k_sim.options, execution_options,
dotdict({
'ddx_k_shift_pos': k_sim.ddx_k_shift_pos,
'ddx_k_shift_neg': k_sim.ddx_k_shift_neg,
Expand Down
2 changes: 1 addition & 1 deletion kwave/kspaceFirstOrder3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def kspaceFirstOrder3D(
retract_size = [[options.pml_x_size, options.pml_y_size, options.pml_z_size]]

# run subscript to save files to disk
save_to_disk_func(k_sim.kgrid, k_sim.medium, k_sim.source, k_sim.options,
save_to_disk_func(k_sim.kgrid, k_sim.medium, k_sim.source, k_sim.options, execution_options,
dotdict({
'ddx_k_shift_pos': k_sim.ddx_k_shift_pos,
'ddx_k_shift_neg': k_sim.ddx_k_shift_neg,
Expand Down
2 changes: 1 addition & 1 deletion kwave/kspaceFirstOrderAS.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def kspaceFirstOrderAS(
retract_size = [[options.pml_x_size, options.pml_y_size, options.pml_z_size]]

# run subscript to save files to disk
save_to_disk_func(k_sim.kgrid, k_sim.medium, k_sim.source, k_sim.options,
save_to_disk_func(k_sim.kgrid, k_sim.medium, k_sim.source, k_sim.options, execution_options,
dotdict({
'ddx_k_shift_pos': k_sim.ddx_k_shift_pos,
'ddx_k_shift_neg': k_sim.ddx_k_shift_neg,
Expand Down
3 changes: 3 additions & 0 deletions kwave/options/simulation_execution_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ class SimulationExecutionOptions:
system_call: Optional[str] = None
verbose_level: int = 0

# determine whether chunking is handled automatically (the default), or manually
auto_chunking: Optional[bool] = True

# show simulation log
show_sim_log: bool = True

Expand Down
78 changes: 47 additions & 31 deletions kwave/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_h5_literals():
return literals


def write_matrix(filename, matrix: np.ndarray, matrix_name, compression_level=None):
def write_matrix(filename, matrix: np.ndarray, matrix_name, compression_level=None, auto_chunk=True):
# get literals
h5_literals = get_h5_literals()

Expand All @@ -78,36 +78,45 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name, compression_level=No

# check size of matrix and set chunk size and compression level
if dims == 3:
# set chunk size to Nx * Ny
chunk_size = [Nx, Ny, 1]
if (auto_chunk):
chunk_size = True
else:
# set chunk size to Nx * Ny
chunk_size = [Nx, Ny, 1]
elif dims == 2:
# set chunk size to Nx
chunk_size = [Nx, 1, 1]
if (auto_chunk):
chunk_size = True
else:
# set chunk size to Nx
chunk_size = [Nx, 1, 1]
elif dims <= 1:
# check that the matrix size is greater than 1 MB
one_mb = (1024 ** 2) / 8
if matrix.size > one_mb:
# set chunk size to 1 MB
if Nx > Ny:
chunk_size = [one_mb, 1, 1]
elif Ny > Nz:
chunk_size = [1, one_mb, 1]
else:
chunk_size = [1, 1, one_mb]
if (auto_chunk):
chunk_size = True
waltsims marked this conversation as resolved.
Show resolved Hide resolved
else:

# set no compression
compression_level = 0

# set chunk size to grid size
if matrix.size == 1:
chunk_size = (1, 1, 1)
elif Nx > Ny:
chunk_size = (Nx, 1, 1)
elif Ny > Nz:
chunk_size = (1, Ny, 1)
# check that the matrix size is greater than 1 MB
one_mb = (1024 ** 2) / 8
if matrix.size > one_mb:
# set chunk size to 1 MB
if Nx > Ny:
chunk_size = [one_mb, 1, 1]
elif Ny > Nz:
chunk_size = [1, one_mb, 1]
else:
chunk_size = [1, 1, one_mb]
else:
chunk_size = (1, 1, Nz)

# set no compression
compression_level = 0

# set chunk size to grid size
if matrix.size == 1:
chunk_size = (1, 1, 1)
elif Nx > Ny:
chunk_size = (Nx, 1, 1)
elif Ny > Nz:
chunk_size = (1, Ny, 1)
else:
chunk_size = (1, 1, Nz)
else:
# throw error for unknown matrix size
raise ValueError('Input matrix must have 1, 2 or 3 dimensions.')
Expand Down Expand Up @@ -179,10 +188,17 @@ def write_matrix(filename, matrix: np.ndarray, matrix_name, compression_level=No
raise NotImplementedError('Currently there is no support for saving 2D complex matrices.')

# allocate a holder for the new matrix within the file
opts = {
'dtype': data_type_matlab,
'chunks': tuple(chunk_size)
}
if (isinstance(chunk_size, bool) and (chunk_size is True)):
opts = {
'dtype': data_type_matlab,
'chunks': chunk_size
}
else:
waltsims marked this conversation as resolved.
Show resolved Hide resolved
opts = {
'dtype': data_type_matlab,
'chunks': tuple(chunk_size)
}

if compression_level != 0:
# use compression
opts['compression'] = compression_level
Expand Down
Loading