-
Notifications
You must be signed in to change notification settings - Fork 664
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
'MDAnalysis.analysis.density' parallelization #4729
Changes from 11 commits
d95a13a
3731096
129cf62
d335131
a15d9e9
9a0b3b2
1f1cf8d
53a5d74
18245b8
87d2103
746a6d3
506597d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -169,7 +169,7 @@ | |
from ..lib import distances | ||
from MDAnalysis.lib.log import ProgressBar | ||
|
||
from .base import AnalysisBase | ||
from .base import AnalysisBase, ResultsGroup | ||
|
||
import logging | ||
|
||
|
@@ -395,8 +395,16 @@ class DensityAnalysis(AnalysisBase): | |
:func:`_set_user_grid` is now a method of :class:`DensityAnalysis`. | ||
:class:`Density` results are now stored in a | ||
:class:`MDAnalysis.analysis.base.Results` instance. | ||
|
||
.. versionchanged:: 2.9.0 | ||
Introduced :meth:`get_supported_backends` allowing | ||
for parallel execution on :mod:`multiprocessing` | ||
and :mod:`dask` backends. | ||
""" | ||
_analysis_algorithm_is_parallelizable = True | ||
|
||
@classmethod | ||
def get_supported_backends(cls): | ||
return ('serial', 'multiprocessing', 'dask') | ||
|
||
def __init__(self, atomgroup, delta=1.0, | ||
metadata=None, padding=2.0, | ||
|
@@ -412,7 +420,6 @@ def __init__(self, atomgroup, delta=1.0, | |
self._ydim = ydim | ||
self._zdim = zdim | ||
|
||
def _prepare(self): | ||
coord = self._atomgroup.positions | ||
if (self._gridcenter is not None or | ||
any([self._xdim, self._ydim, self._zdim])): | ||
|
@@ -465,7 +472,7 @@ def _prepare(self): | |
grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, | ||
range=arange, density=False) | ||
grid *= 0.0 | ||
self._grid = grid | ||
self.results._grid = grid | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The overall grid shape needs to be determined in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah okay, yes that makes sense. I think there was also an additional case, where I encountered this problem with the parallelization; Other than that I added the Changelog and the docs, so should be ready to go :) |
||
self._edges = edges | ||
self._arange = arange | ||
self._bins = bins | ||
|
@@ -474,21 +481,22 @@ def _single_frame(self): | |
h, _ = np.histogramdd(self._atomgroup.positions, | ||
bins=self._bins, range=self._arange, | ||
density=False) | ||
# reduce (proposed change #2542 to match the parallel version in pmda.density) | ||
# return self._reduce(self._grid, h) | ||
# | ||
# serial code can simply do | ||
self._grid += h | ||
self.results._grid += h | ||
|
||
def _conclude(self): | ||
# average: | ||
self._grid /= float(self.n_frames) | ||
density = Density(grid=self._grid, edges=self._edges, | ||
self.results._grid /= float(self.n_frames) | ||
density = Density(grid=self.results._grid, edges=self._edges, | ||
units={'length': "Angstrom"}, | ||
parameters={'isDensity': False}) | ||
density.make_density() | ||
self.results.density = density | ||
|
||
def _get_aggregator(self): | ||
return ResultsGroup(lookup={ | ||
'_grid': ResultsGroup.ndarray_sum} | ||
) | ||
|
||
@property | ||
def density(self): | ||
wmsg = ("The `density` attribute was deprecated in MDAnalysis 2.0.0 " | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -230,12 +230,20 @@ def universe(self): | |
|
||
|
||
class TestDensityAnalysis(DensityParameters): | ||
def check_DensityAnalysis(self, ag, ref_meandensity, | ||
tmpdir, runargs=None, **kwargs): | ||
def check_DensityAnalysis( | ||
self, | ||
ag, | ||
ref_meandensity, | ||
tmpdir, | ||
client_DensityAnalysis, | ||
runargs=None, | ||
**kwargs | ||
): | ||
runargs = runargs if runargs else {} | ||
with tmpdir.as_cwd(): | ||
D = density.DensityAnalysis( | ||
ag, delta=self.delta, **kwargs).run(**runargs) | ||
D = density.DensityAnalysis(ag, delta=self.delta, **kwargs).run( | ||
**runargs, **client_DensityAnalysis | ||
) | ||
assert_almost_equal(D.results.density.grid.mean(), ref_meandensity, | ||
err_msg="mean density does not match") | ||
D.results.density.export(self.outfile) | ||
|
@@ -247,125 +255,176 @@ def check_DensityAnalysis(self, ag, ref_meandensity, | |
) | ||
|
||
@pytest.mark.parametrize("mode", ("static", "dynamic")) | ||
def test_run(self, mode, universe, tmpdir): | ||
def test_run(self, mode, universe, tmpdir, client_DensityAnalysis): | ||
updating = (mode == "dynamic") | ||
self.check_DensityAnalysis( | ||
universe.select_atoms(self.selections[mode], updating=updating), | ||
self.references[mode]['meandensity'], | ||
tmpdir=tmpdir | ||
tmpdir=tmpdir, | ||
client_DensityAnalysis=client_DensityAnalysis, | ||
) | ||
|
||
def test_sliced(self, universe, tmpdir): | ||
def test_sliced(self, universe, tmpdir, client_DensityAnalysis): | ||
self.check_DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
self.references['static_sliced']['meandensity'], | ||
tmpdir=tmpdir, | ||
client_DensityAnalysis=client_DensityAnalysis, | ||
runargs=dict(start=1, stop=-1, step=2), | ||
) | ||
|
||
def test_userdefn_eqbox(self, universe, tmpdir): | ||
def test_userdefn_eqbox(self, universe, tmpdir, client_DensityAnalysis): | ||
with warnings.catch_warnings(): | ||
# Do not need to see UserWarning that box is too small | ||
warnings.simplefilter("ignore") | ||
self.check_DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
self.references['static_defined']['meandensity'], | ||
tmpdir=tmpdir, | ||
client_DensityAnalysis=client_DensityAnalysis, | ||
gridcenter=self.gridcenters['static_defined'], | ||
xdim=10.0, | ||
ydim=10.0, | ||
zdim=10.0, | ||
) | ||
|
||
def test_userdefn_neqbox(self, universe, tmpdir): | ||
def test_userdefn_neqbox(self, universe, tmpdir, client_DensityAnalysis): | ||
self.check_DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
self.references['static_defined_unequal']['meandensity'], | ||
tmpdir=tmpdir, | ||
client_DensityAnalysis=client_DensityAnalysis, | ||
gridcenter=self.gridcenters['static_defined'], | ||
xdim=10.0, | ||
ydim=15.0, | ||
zdim=20.0, | ||
) | ||
|
||
def test_userdefn_boxshape(self, universe): | ||
def test_userdefn_boxshape(self, universe, client_DensityAnalysis): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=1.0, xdim=8.0, ydim=12.0, zdim=17.0, | ||
gridcenter=self.gridcenters['static_defined']).run() | ||
universe.select_atoms(self.selections["static"]), | ||
delta=1.0, | ||
xdim=8.0, | ||
ydim=12.0, | ||
zdim=17.0, | ||
gridcenter=self.gridcenters["static_defined"], | ||
).run(**client_DensityAnalysis) | ||
assert D.results.density.grid.shape == (8, 12, 17) | ||
|
||
def test_warn_userdefn_padding(self, universe): | ||
def test_warn_userdefn_padding(self, universe, client_DensityAnalysis): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not 100% sure if we need to test warnings for all parallel modalities, too, but it's the safe approach and you've already converted all tests so let's leave it in. Just something to think about in the future if our test run times are too long (again...). |
||
regex = (r"Box padding \(currently set at 1\.0\) is not used " | ||
r"in user defined grids\.") | ||
with pytest.warns(UserWarning, match=regex): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim=100.0, ydim=100.0, zdim=100.0, padding=1.0, | ||
gridcenter=self.gridcenters['static_defined']).run(step=5) | ||
|
||
def test_warn_userdefn_smallgrid(self, universe): | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim=100.0, | ||
ydim=100.0, | ||
zdim=100.0, | ||
padding=1.0, | ||
gridcenter=self.gridcenters["static_defined"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_warn_userdefn_smallgrid(self, universe, client_DensityAnalysis): | ||
regex = ("Atom selection does not fit grid --- " | ||
"you may want to define a larger box") | ||
with pytest.warns(UserWarning, match=regex): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0, | ||
gridcenter=self.gridcenters['static_defined']).run(step=5) | ||
|
||
def test_ValueError_userdefn_gridcenter_shape(self, universe): | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim=1.0, | ||
ydim=2.0, | ||
zdim=2.0, | ||
padding=0.0, | ||
gridcenter=self.gridcenters["static_defined"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_ValueError_userdefn_gridcenter_shape( | ||
self, universe, client_DensityAnalysis | ||
): | ||
# Test len(gridcenter) != 3 | ||
with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0, | ||
gridcenter=self.gridcenters['error1']).run(step=5) | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim=10.0, | ||
ydim=10.0, | ||
zdim=10.0, | ||
gridcenter=self.gridcenters["error1"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_ValueError_userdefn_gridcenter_type(self, universe): | ||
def test_ValueError_userdefn_gridcenter_type( | ||
self, universe, client_DensityAnalysis | ||
): | ||
# Test gridcenter includes non-numeric strings | ||
with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0, | ||
gridcenter=self.gridcenters['error2']).run(step=5) | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim=10.0, | ||
ydim=10.0, | ||
zdim=10.0, | ||
gridcenter=self.gridcenters["error2"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_ValueError_userdefn_gridcenter_missing(self, universe): | ||
def test_ValueError_userdefn_gridcenter_missing( | ||
self, universe, client_DensityAnalysis | ||
): | ||
# Test no gridcenter provided when grid dimensions are given | ||
regex = ("Gridcenter or grid dimensions are not provided") | ||
with pytest.raises(ValueError, match=regex): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0).run(step=5) | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim=10.0, | ||
ydim=10.0, | ||
zdim=10.0, | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_ValueError_userdefn_xdim_type(self, universe): | ||
def test_ValueError_userdefn_xdim_type(self, universe, | ||
client_DensityAnalysis): | ||
# Test xdim != int or float | ||
with pytest.raises(ValueError, match="xdim, ydim, and zdim must be numbers"): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim="MDAnalysis", ydim=10.0, zdim=10.0, | ||
gridcenter=self.gridcenters['static_defined']).run(step=5) | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim="MDAnalysis", | ||
ydim=10.0, | ||
zdim=10.0, | ||
gridcenter=self.gridcenters["static_defined"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_ValueError_userdefn_xdim_nanvalue(self, universe): | ||
def test_ValueError_userdefn_xdim_nanvalue(self, universe, | ||
client_DensityAnalysis): | ||
# Test xdim set to NaN value | ||
regex = ("Gridcenter or grid dimensions have NaN element") | ||
with pytest.raises(ValueError, match=regex): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static']), | ||
delta=self.delta, xdim=np.nan, ydim=10.0, zdim=10.0, | ||
gridcenter=self.gridcenters['static_defined']).run(step=5) | ||
universe.select_atoms(self.selections["static"]), | ||
delta=self.delta, | ||
xdim=np.nan, | ||
ydim=10.0, | ||
zdim=10.0, | ||
gridcenter=self.gridcenters["static_defined"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_warn_noatomgroup(self, universe): | ||
def test_warn_noatomgroup(self, universe, client_DensityAnalysis): | ||
regex = ("No atoms in AtomGroup at input time frame. " | ||
"This may be intended; please ensure that " | ||
"your grid selection covers the atomic " | ||
"positions you wish to capture.") | ||
with pytest.warns(UserWarning, match=regex): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['none']), | ||
delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0, | ||
gridcenter=self.gridcenters['static_defined']).run(step=5) | ||
|
||
def test_ValueError_noatomgroup(self, universe): | ||
universe.select_atoms(self.selections["none"]), | ||
delta=self.delta, | ||
xdim=1.0, | ||
ydim=2.0, | ||
zdim=2.0, | ||
padding=0.0, | ||
gridcenter=self.gridcenters["static_defined"], | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_ValueError_noatomgroup(self, universe, client_DensityAnalysis): | ||
with pytest.raises(ValueError, match="No atoms in AtomGroup at input" | ||
" time frame. Grid for density" | ||
" could not be automatically" | ||
|
@@ -374,12 +433,13 @@ def test_ValueError_noatomgroup(self, universe): | |
" defined grid will " | ||
"need to be provided instead."): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['none'])).run(step=5) | ||
universe.select_atoms(self.selections["none"]) | ||
).run(step=5, **client_DensityAnalysis) | ||
|
||
def test_warn_results_deprecated(self, universe): | ||
def test_warn_results_deprecated(self, universe, client_DensityAnalysis): | ||
D = density.DensityAnalysis( | ||
universe.select_atoms(self.selections['static'])) | ||
D.run(stop=1) | ||
D.run(stop=1, **client_DensityAnalysis) | ||
wmsg = "The `density` attribute was deprecated in MDAnalysis 2.0.0" | ||
with pytest.warns(DeprecationWarning, match=wmsg): | ||
assert_equal(D.density.grid, D.results.density.grid) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like the right change.
I am noticing that the necessary changes for parallelization really shift a lot of what
_prepare()
was doing back into__init__
. I guess that's ok and it's better than introducing yet another method like_prepare_init()
... We probably need to review the docs for writing analysis code with AnalysisBase.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment so that future readers know why we are doing everything here
and not in
_prepare()
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
manually added to the file, suggestions don't work on deleted lines