From ac145ec3892a8483f1f7b71b0b8decb2ebf3a741 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:26:15 +0100 Subject: [PATCH] 'MDAnalysis.analysis.density' parallelization (#4729) * Fixes #4677 * parallelize DensityAnalysis * Changes made in this Pull Request: - added backends and aggregators to DensityAnalysis in analysis.density - added client_DensityAnalysis in conftest.py - added client_DensityAnalysis to the tests in test_density.py * Update CHANGELOG --------- Co-authored-by: Yuxuan Zhuang Co-authored-by: Oliver Beckstein --- package/CHANGELOG | 1 + package/MDAnalysis/analysis/density.py | 36 ++-- .../MDAnalysisTests/analysis/conftest.py | 8 + .../MDAnalysisTests/analysis/test_density.py | 162 ++++++++++++------ 4 files changed, 145 insertions(+), 62 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 03255eb4ad..909020a866 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -25,6 +25,7 @@ Fixes the function to prevent shared state. (Issue #4655) Enhancements + * Enables parallelization for analysis.density.DensityAnalysis (Issue #4677, PR #4729) * Enables parallelization for analysis.contacts.Contacts (Issue #4660) * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) diff --git a/package/MDAnalysis/analysis/density.py b/package/MDAnalysis/analysis/density.py index f5c270e7dc..eb2522531f 100644 --- a/package/MDAnalysis/analysis/density.py +++ b/package/MDAnalysis/analysis/density.py @@ -169,7 +169,7 @@ from ..lib import distances from MDAnalysis.lib.log import ProgressBar -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup import logging @@ -395,8 +395,16 @@ class DensityAnalysis(AnalysisBase): :func:`_set_user_grid` is now a method of :class:`DensityAnalysis`. :class:`Density` results are now stored in a :class:`MDAnalysis.analysis.base.Results` instance. - + .. versionchanged:: 2.9.0 + Introduced :meth:`get_supported_backends` allowing + for parallel execution on :mod:`multiprocessing` + and :mod:`dask` backends. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask') def __init__(self, atomgroup, delta=1.0, metadata=None, padding=2.0, @@ -412,7 +420,12 @@ def __init__(self, atomgroup, delta=1.0, self._ydim = ydim self._zdim = zdim - def _prepare(self): + # The grid with its dimensions has to be set up in __init__ + # so that parallel analysis works correctly: each process + # needs to have a results._grid of the same size and the + # same self._bins and self._arange (so this cannot happen + # in _prepare(), which is executed in parallel on different + # parts of the trajectory). coord = self._atomgroup.positions if (self._gridcenter is not None or any([self._xdim, self._ydim, self._zdim])): @@ -465,7 +478,7 @@ def _prepare(self): grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, range=arange, density=False) grid *= 0.0 - self._grid = grid + self.results._grid = grid self._edges = edges self._arange = arange self._bins = bins @@ -474,21 +487,22 @@ def _single_frame(self): h, _ = np.histogramdd(self._atomgroup.positions, bins=self._bins, range=self._arange, density=False) - # reduce (proposed change #2542 to match the parallel version in pmda.density) - # return self._reduce(self._grid, h) - # - # serial code can simply do - self._grid += h + self.results._grid += h def _conclude(self): # average: - self._grid /= float(self.n_frames) - density = Density(grid=self._grid, edges=self._edges, + self.results._grid /= float(self.n_frames) + density = Density(grid=self.results._grid, edges=self._edges, units={'length': "Angstrom"}, parameters={'isDensity': False}) density.make_density() self.results.density = density + def _get_aggregator(self): + return ResultsGroup(lookup={ + '_grid': ResultsGroup.ndarray_sum} + ) + @property def density(self): wmsg = ("The `density` attribute was deprecated in MDAnalysis 2.0.0 " diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 91e9bd760b..df17c05c06 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -16,6 +16,7 @@ ) from MDAnalysis.analysis.nucleicacids import NucPairDist from MDAnalysis.analysis.contacts import Contacts +from MDAnalysis.analysis.density import DensityAnalysis from MDAnalysis.lib.util import is_installed @@ -157,3 +158,10 @@ def client_NucPairDist(request): @pytest.fixture(scope="module", params=params_for_cls(Contacts)) def client_Contacts(request): return request.param + + +# MDAnalysis.analysis.density + +@pytest.fixture(scope='module', params=params_for_cls(DensityAnalysis)) +def client_DensityAnalysis(request): + return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_density.py b/testsuite/MDAnalysisTests/analysis/test_density.py index c99b671e3d..68dbed6839 100644 --- a/testsuite/MDAnalysisTests/analysis/test_density.py +++ b/testsuite/MDAnalysisTests/analysis/test_density.py @@ -230,12 +230,20 @@ def universe(self): class TestDensityAnalysis(DensityParameters): - def check_DensityAnalysis(self, ag, ref_meandensity, - tmpdir, runargs=None, **kwargs): + def check_DensityAnalysis( + self, + ag, + ref_meandensity, + tmpdir, + client_DensityAnalysis, + runargs=None, + **kwargs + ): runargs = runargs if runargs else {} with tmpdir.as_cwd(): - D = density.DensityAnalysis( - ag, delta=self.delta, **kwargs).run(**runargs) + D = density.DensityAnalysis(ag, delta=self.delta, **kwargs).run( + **runargs, **client_DensityAnalysis + ) assert_almost_equal(D.results.density.grid.mean(), ref_meandensity, err_msg="mean density does not match") D.results.density.export(self.outfile) @@ -247,23 +255,25 @@ def check_DensityAnalysis(self, ag, ref_meandensity, ) @pytest.mark.parametrize("mode", ("static", "dynamic")) - def test_run(self, mode, universe, tmpdir): + def test_run(self, mode, universe, tmpdir, client_DensityAnalysis): updating = (mode == "dynamic") self.check_DensityAnalysis( universe.select_atoms(self.selections[mode], updating=updating), self.references[mode]['meandensity'], - tmpdir=tmpdir + tmpdir=tmpdir, + client_DensityAnalysis=client_DensityAnalysis, ) - def test_sliced(self, universe, tmpdir): + def test_sliced(self, universe, tmpdir, client_DensityAnalysis): self.check_DensityAnalysis( universe.select_atoms(self.selections['static']), self.references['static_sliced']['meandensity'], tmpdir=tmpdir, + client_DensityAnalysis=client_DensityAnalysis, runargs=dict(start=1, stop=-1, step=2), ) - def test_userdefn_eqbox(self, universe, tmpdir): + def test_userdefn_eqbox(self, universe, tmpdir, client_DensityAnalysis): with warnings.catch_warnings(): # Do not need to see UserWarning that box is too small warnings.simplefilter("ignore") @@ -271,101 +281,150 @@ def test_userdefn_eqbox(self, universe, tmpdir): universe.select_atoms(self.selections['static']), self.references['static_defined']['meandensity'], tmpdir=tmpdir, + client_DensityAnalysis=client_DensityAnalysis, gridcenter=self.gridcenters['static_defined'], xdim=10.0, ydim=10.0, zdim=10.0, ) - def test_userdefn_neqbox(self, universe, tmpdir): + def test_userdefn_neqbox(self, universe, tmpdir, client_DensityAnalysis): self.check_DensityAnalysis( universe.select_atoms(self.selections['static']), self.references['static_defined_unequal']['meandensity'], tmpdir=tmpdir, + client_DensityAnalysis=client_DensityAnalysis, gridcenter=self.gridcenters['static_defined'], xdim=10.0, ydim=15.0, zdim=20.0, ) - def test_userdefn_boxshape(self, universe): + def test_userdefn_boxshape(self, universe, client_DensityAnalysis): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=1.0, xdim=8.0, ydim=12.0, zdim=17.0, - gridcenter=self.gridcenters['static_defined']).run() + universe.select_atoms(self.selections["static"]), + delta=1.0, + xdim=8.0, + ydim=12.0, + zdim=17.0, + gridcenter=self.gridcenters["static_defined"], + ).run(**client_DensityAnalysis) assert D.results.density.grid.shape == (8, 12, 17) - def test_warn_userdefn_padding(self, universe): + def test_warn_userdefn_padding(self, universe, client_DensityAnalysis): regex = (r"Box padding \(currently set at 1\.0\) is not used " r"in user defined grids\.") with pytest.warns(UserWarning, match=regex): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim=100.0, ydim=100.0, zdim=100.0, padding=1.0, - gridcenter=self.gridcenters['static_defined']).run(step=5) - - def test_warn_userdefn_smallgrid(self, universe): + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim=100.0, + ydim=100.0, + zdim=100.0, + padding=1.0, + gridcenter=self.gridcenters["static_defined"], + ).run(step=5, **client_DensityAnalysis) + + def test_warn_userdefn_smallgrid(self, universe, client_DensityAnalysis): regex = ("Atom selection does not fit grid --- " "you may want to define a larger box") with pytest.warns(UserWarning, match=regex): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0, - gridcenter=self.gridcenters['static_defined']).run(step=5) - - def test_ValueError_userdefn_gridcenter_shape(self, universe): + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim=1.0, + ydim=2.0, + zdim=2.0, + padding=0.0, + gridcenter=self.gridcenters["static_defined"], + ).run(step=5, **client_DensityAnalysis) + + def test_ValueError_userdefn_gridcenter_shape( + self, universe, client_DensityAnalysis + ): # Test len(gridcenter) != 3 with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0, - gridcenter=self.gridcenters['error1']).run(step=5) + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim=10.0, + ydim=10.0, + zdim=10.0, + gridcenter=self.gridcenters["error1"], + ).run(step=5, **client_DensityAnalysis) - def test_ValueError_userdefn_gridcenter_type(self, universe): + def test_ValueError_userdefn_gridcenter_type( + self, universe, client_DensityAnalysis + ): # Test gridcenter includes non-numeric strings with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0, - gridcenter=self.gridcenters['error2']).run(step=5) + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim=10.0, + ydim=10.0, + zdim=10.0, + gridcenter=self.gridcenters["error2"], + ).run(step=5, **client_DensityAnalysis) - def test_ValueError_userdefn_gridcenter_missing(self, universe): + def test_ValueError_userdefn_gridcenter_missing( + self, universe, client_DensityAnalysis + ): # Test no gridcenter provided when grid dimensions are given regex = ("Gridcenter or grid dimensions are not provided") with pytest.raises(ValueError, match=regex): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0).run(step=5) + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim=10.0, + ydim=10.0, + zdim=10.0, + ).run(step=5, **client_DensityAnalysis) - def test_ValueError_userdefn_xdim_type(self, universe): + def test_ValueError_userdefn_xdim_type(self, universe, + client_DensityAnalysis): # Test xdim != int or float with pytest.raises(ValueError, match="xdim, ydim, and zdim must be numbers"): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim="MDAnalysis", ydim=10.0, zdim=10.0, - gridcenter=self.gridcenters['static_defined']).run(step=5) + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim="MDAnalysis", + ydim=10.0, + zdim=10.0, + gridcenter=self.gridcenters["static_defined"], + ).run(step=5, **client_DensityAnalysis) - def test_ValueError_userdefn_xdim_nanvalue(self, universe): + def test_ValueError_userdefn_xdim_nanvalue(self, universe, + client_DensityAnalysis): # Test xdim set to NaN value regex = ("Gridcenter or grid dimensions have NaN element") with pytest.raises(ValueError, match=regex): D = density.DensityAnalysis( - universe.select_atoms(self.selections['static']), - delta=self.delta, xdim=np.nan, ydim=10.0, zdim=10.0, - gridcenter=self.gridcenters['static_defined']).run(step=5) + universe.select_atoms(self.selections["static"]), + delta=self.delta, + xdim=np.nan, + ydim=10.0, + zdim=10.0, + gridcenter=self.gridcenters["static_defined"], + ).run(step=5, **client_DensityAnalysis) - def test_warn_noatomgroup(self, universe): + def test_warn_noatomgroup(self, universe, client_DensityAnalysis): regex = ("No atoms in AtomGroup at input time frame. " "This may be intended; please ensure that " "your grid selection covers the atomic " "positions you wish to capture.") with pytest.warns(UserWarning, match=regex): D = density.DensityAnalysis( - universe.select_atoms(self.selections['none']), - delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0, - gridcenter=self.gridcenters['static_defined']).run(step=5) - - def test_ValueError_noatomgroup(self, universe): + universe.select_atoms(self.selections["none"]), + delta=self.delta, + xdim=1.0, + ydim=2.0, + zdim=2.0, + padding=0.0, + gridcenter=self.gridcenters["static_defined"], + ).run(step=5, **client_DensityAnalysis) + + def test_ValueError_noatomgroup(self, universe, client_DensityAnalysis): with pytest.raises(ValueError, match="No atoms in AtomGroup at input" " time frame. Grid for density" " could not be automatically" @@ -374,12 +433,13 @@ def test_ValueError_noatomgroup(self, universe): " defined grid will " "need to be provided instead."): D = density.DensityAnalysis( - universe.select_atoms(self.selections['none'])).run(step=5) + universe.select_atoms(self.selections["none"]) + ).run(step=5, **client_DensityAnalysis) - def test_warn_results_deprecated(self, universe): + def test_warn_results_deprecated(self, universe, client_DensityAnalysis): D = density.DensityAnalysis( universe.select_atoms(self.selections['static'])) - D.run(stop=1) + D.run(stop=1, **client_DensityAnalysis) wmsg = "The `density` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): assert_equal(D.density.grid, D.results.density.grid)