Skip to content

Commit

Permalink
'MDAnalysis.analysis.density' parallelization (#4729)
Browse files Browse the repository at this point in the history
* Fixes #4677
* parallelize DensityAnalysis
* Changes made in this Pull Request:
    - added backends and aggregators to DensityAnalysis in analysis.density
    - added client_DensityAnalysis in conftest.py
    - added client_DensityAnalysis to the tests in test_density.py
* Update CHANGELOG

---------

Co-authored-by: Yuxuan Zhuang <[email protected]>
Co-authored-by: Oliver Beckstein <[email protected]>
  • Loading branch information
3 people authored Dec 18, 2024
1 parent a3672f2 commit ac145ec
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 62 deletions.
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Fixes
the function to prevent shared state. (Issue #4655)

Enhancements
* Enables parallelization for analysis.density.DensityAnalysis (Issue #4677, PR #4729)
* Enables parallelization for analysis.contacts.Contacts (Issue #4660)
* Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
* Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
Expand Down
36 changes: 25 additions & 11 deletions package/MDAnalysis/analysis/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@
from ..lib import distances
from MDAnalysis.lib.log import ProgressBar

from .base import AnalysisBase
from .base import AnalysisBase, ResultsGroup

import logging

Expand Down Expand Up @@ -395,8 +395,16 @@ class DensityAnalysis(AnalysisBase):
:func:`_set_user_grid` is now a method of :class:`DensityAnalysis`.
:class:`Density` results are now stored in a
:class:`MDAnalysis.analysis.base.Results` instance.
.. versionchanged:: 2.9.0
Introduced :meth:`get_supported_backends` allowing
for parallel execution on :mod:`multiprocessing`
and :mod:`dask` backends.
"""
_analysis_algorithm_is_parallelizable = True

@classmethod
def get_supported_backends(cls):
return ('serial', 'multiprocessing', 'dask')

def __init__(self, atomgroup, delta=1.0,
metadata=None, padding=2.0,
Expand All @@ -412,7 +420,12 @@ def __init__(self, atomgroup, delta=1.0,
self._ydim = ydim
self._zdim = zdim

def _prepare(self):
# The grid with its dimensions has to be set up in __init__
# so that parallel analysis works correctly: each process
# needs to have a results._grid of the same size and the
# same self._bins and self._arange (so this cannot happen
# in _prepare(), which is executed in parallel on different
# parts of the trajectory).
coord = self._atomgroup.positions
if (self._gridcenter is not None or
any([self._xdim, self._ydim, self._zdim])):
Expand Down Expand Up @@ -465,7 +478,7 @@ def _prepare(self):
grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins,
range=arange, density=False)
grid *= 0.0
self._grid = grid
self.results._grid = grid
self._edges = edges
self._arange = arange
self._bins = bins
Expand All @@ -474,21 +487,22 @@ def _single_frame(self):
h, _ = np.histogramdd(self._atomgroup.positions,
bins=self._bins, range=self._arange,
density=False)
# reduce (proposed change #2542 to match the parallel version in pmda.density)
# return self._reduce(self._grid, h)
#
# serial code can simply do
self._grid += h
self.results._grid += h

def _conclude(self):
# average:
self._grid /= float(self.n_frames)
density = Density(grid=self._grid, edges=self._edges,
self.results._grid /= float(self.n_frames)
density = Density(grid=self.results._grid, edges=self._edges,
units={'length': "Angstrom"},
parameters={'isDensity': False})
density.make_density()
self.results.density = density

def _get_aggregator(self):
return ResultsGroup(lookup={
'_grid': ResultsGroup.ndarray_sum}
)

@property
def density(self):
wmsg = ("The `density` attribute was deprecated in MDAnalysis 2.0.0 "
Expand Down
8 changes: 8 additions & 0 deletions testsuite/MDAnalysisTests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from MDAnalysis.analysis.nucleicacids import NucPairDist
from MDAnalysis.analysis.contacts import Contacts
from MDAnalysis.analysis.density import DensityAnalysis
from MDAnalysis.lib.util import is_installed


Expand Down Expand Up @@ -157,3 +158,10 @@ def client_NucPairDist(request):
@pytest.fixture(scope="module", params=params_for_cls(Contacts))
def client_Contacts(request):
return request.param


# MDAnalysis.analysis.density

@pytest.fixture(scope='module', params=params_for_cls(DensityAnalysis))
def client_DensityAnalysis(request):
return request.param
162 changes: 111 additions & 51 deletions testsuite/MDAnalysisTests/analysis/test_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,12 +230,20 @@ def universe(self):


class TestDensityAnalysis(DensityParameters):
def check_DensityAnalysis(self, ag, ref_meandensity,
tmpdir, runargs=None, **kwargs):
def check_DensityAnalysis(
self,
ag,
ref_meandensity,
tmpdir,
client_DensityAnalysis,
runargs=None,
**kwargs
):
runargs = runargs if runargs else {}
with tmpdir.as_cwd():
D = density.DensityAnalysis(
ag, delta=self.delta, **kwargs).run(**runargs)
D = density.DensityAnalysis(ag, delta=self.delta, **kwargs).run(
**runargs, **client_DensityAnalysis
)
assert_almost_equal(D.results.density.grid.mean(), ref_meandensity,
err_msg="mean density does not match")
D.results.density.export(self.outfile)
Expand All @@ -247,125 +255,176 @@ def check_DensityAnalysis(self, ag, ref_meandensity,
)

@pytest.mark.parametrize("mode", ("static", "dynamic"))
def test_run(self, mode, universe, tmpdir):
def test_run(self, mode, universe, tmpdir, client_DensityAnalysis):
updating = (mode == "dynamic")
self.check_DensityAnalysis(
universe.select_atoms(self.selections[mode], updating=updating),
self.references[mode]['meandensity'],
tmpdir=tmpdir
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
)

def test_sliced(self, universe, tmpdir):
def test_sliced(self, universe, tmpdir, client_DensityAnalysis):
self.check_DensityAnalysis(
universe.select_atoms(self.selections['static']),
self.references['static_sliced']['meandensity'],
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
runargs=dict(start=1, stop=-1, step=2),
)

def test_userdefn_eqbox(self, universe, tmpdir):
def test_userdefn_eqbox(self, universe, tmpdir, client_DensityAnalysis):
with warnings.catch_warnings():
# Do not need to see UserWarning that box is too small
warnings.simplefilter("ignore")
self.check_DensityAnalysis(
universe.select_atoms(self.selections['static']),
self.references['static_defined']['meandensity'],
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
gridcenter=self.gridcenters['static_defined'],
xdim=10.0,
ydim=10.0,
zdim=10.0,
)

def test_userdefn_neqbox(self, universe, tmpdir):
def test_userdefn_neqbox(self, universe, tmpdir, client_DensityAnalysis):
self.check_DensityAnalysis(
universe.select_atoms(self.selections['static']),
self.references['static_defined_unequal']['meandensity'],
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
gridcenter=self.gridcenters['static_defined'],
xdim=10.0,
ydim=15.0,
zdim=20.0,
)

def test_userdefn_boxshape(self, universe):
def test_userdefn_boxshape(self, universe, client_DensityAnalysis):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=1.0, xdim=8.0, ydim=12.0, zdim=17.0,
gridcenter=self.gridcenters['static_defined']).run()
universe.select_atoms(self.selections["static"]),
delta=1.0,
xdim=8.0,
ydim=12.0,
zdim=17.0,
gridcenter=self.gridcenters["static_defined"],
).run(**client_DensityAnalysis)
assert D.results.density.grid.shape == (8, 12, 17)

def test_warn_userdefn_padding(self, universe):
def test_warn_userdefn_padding(self, universe, client_DensityAnalysis):
regex = (r"Box padding \(currently set at 1\.0\) is not used "
r"in user defined grids\.")
with pytest.warns(UserWarning, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=100.0, ydim=100.0, zdim=100.0, padding=1.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)

def test_warn_userdefn_smallgrid(self, universe):
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=100.0,
ydim=100.0,
zdim=100.0,
padding=1.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_warn_userdefn_smallgrid(self, universe, client_DensityAnalysis):
regex = ("Atom selection does not fit grid --- "
"you may want to define a larger box")
with pytest.warns(UserWarning, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)

def test_ValueError_userdefn_gridcenter_shape(self, universe):
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=1.0,
ydim=2.0,
zdim=2.0,
padding=0.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_gridcenter_shape(
self, universe, client_DensityAnalysis
):
# Test len(gridcenter) != 3
with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['error1']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=10.0,
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["error1"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_gridcenter_type(self, universe):
def test_ValueError_userdefn_gridcenter_type(
self, universe, client_DensityAnalysis
):
# Test gridcenter includes non-numeric strings
with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['error2']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=10.0,
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["error2"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_gridcenter_missing(self, universe):
def test_ValueError_userdefn_gridcenter_missing(
self, universe, client_DensityAnalysis
):
# Test no gridcenter provided when grid dimensions are given
regex = ("Gridcenter or grid dimensions are not provided")
with pytest.raises(ValueError, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=10.0,
ydim=10.0,
zdim=10.0,
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_xdim_type(self, universe):
def test_ValueError_userdefn_xdim_type(self, universe,
client_DensityAnalysis):
# Test xdim != int or float
with pytest.raises(ValueError, match="xdim, ydim, and zdim must be numbers"):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim="MDAnalysis", ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim="MDAnalysis",
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_xdim_nanvalue(self, universe):
def test_ValueError_userdefn_xdim_nanvalue(self, universe,
client_DensityAnalysis):
# Test xdim set to NaN value
regex = ("Gridcenter or grid dimensions have NaN element")
with pytest.raises(ValueError, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=np.nan, ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=np.nan,
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_warn_noatomgroup(self, universe):
def test_warn_noatomgroup(self, universe, client_DensityAnalysis):
regex = ("No atoms in AtomGroup at input time frame. "
"This may be intended; please ensure that "
"your grid selection covers the atomic "
"positions you wish to capture.")
with pytest.warns(UserWarning, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['none']),
delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)

def test_ValueError_noatomgroup(self, universe):
universe.select_atoms(self.selections["none"]),
delta=self.delta,
xdim=1.0,
ydim=2.0,
zdim=2.0,
padding=0.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_noatomgroup(self, universe, client_DensityAnalysis):
with pytest.raises(ValueError, match="No atoms in AtomGroup at input"
" time frame. Grid for density"
" could not be automatically"
Expand All @@ -374,12 +433,13 @@ def test_ValueError_noatomgroup(self, universe):
" defined grid will "
"need to be provided instead."):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['none'])).run(step=5)
universe.select_atoms(self.selections["none"])
).run(step=5, **client_DensityAnalysis)

def test_warn_results_deprecated(self, universe):
def test_warn_results_deprecated(self, universe, client_DensityAnalysis):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']))
D.run(stop=1)
D.run(stop=1, **client_DensityAnalysis)
wmsg = "The `density` attribute was deprecated in MDAnalysis 2.0.0"
with pytest.warns(DeprecationWarning, match=wmsg):
assert_equal(D.density.grid, D.results.density.grid)
Expand Down

0 comments on commit ac145ec

Please sign in to comment.