Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

'MDAnalysis.analysis.density' parallelization #4729

Merged
merged 12 commits into from
Dec 18, 2024
1 change: 1 addition & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Fixes
the function to prevent shared state. (Issue #4655)

Enhancements
* Enables parallelization for analysis.density.DensityAnalysis (Issue #4677, PR #4729)
* Enables parallelization for analysis.contacts.Contacts (Issue #4660)
* Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
* Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
Expand Down
30 changes: 19 additions & 11 deletions package/MDAnalysis/analysis/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@
from ..lib import distances
from MDAnalysis.lib.log import ProgressBar

from .base import AnalysisBase
from .base import AnalysisBase, ResultsGroup

import logging

Expand Down Expand Up @@ -395,8 +395,16 @@ class DensityAnalysis(AnalysisBase):
:func:`_set_user_grid` is now a method of :class:`DensityAnalysis`.
:class:`Density` results are now stored in a
:class:`MDAnalysis.analysis.base.Results` instance.

.. versionchanged:: 2.9.0
Introduced :meth:`get_supported_backends` allowing
for parallel execution on :mod:`multiprocessing`
and :mod:`dask` backends.
"""
_analysis_algorithm_is_parallelizable = True

@classmethod
def get_supported_backends(cls):
return ('serial', 'multiprocessing', 'dask')

def __init__(self, atomgroup, delta=1.0,
metadata=None, padding=2.0,
Expand All @@ -412,7 +420,6 @@ def __init__(self, atomgroup, delta=1.0,
self._ydim = ydim
self._zdim = zdim

def _prepare(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like the right change.

I am noticing that the necessary changes for parallelization really shift a lot of what _prepare() was doing back into __init__. I guess that's ok and it's better than introducing yet another method like _prepare_init() ... We probably need to review the docs for writing analysis code with AnalysisBase.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment so that future readers know why we are doing everything here
and not in _prepare().

Suggested change
def _prepare(self):
# The grid with its dimensions has to be set up in __init__
# so that parallel analysis works correctly: each process
# needs to have a `results._grid` of the same size and the
# same `_bins`.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

manually added to the file, suggestions don't work on deleted lines

coord = self._atomgroup.positions
if (self._gridcenter is not None or
any([self._xdim, self._ydim, self._zdim])):
Expand Down Expand Up @@ -465,7 +472,7 @@ def _prepare(self):
grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins,
range=arange, density=False)
grid *= 0.0
self._grid = grid
self.results._grid = grid
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The overall grid shape needs to be determined in __init__, then each worker can have its own.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay, yes that makes sense.

I think there was also an additional case, where I encountered this problem with the parallelization;
I will try to apply the same fix there to see if it works.

Other than that I added the Changelog and the docs, so should be ready to go :)

self._edges = edges
self._arange = arange
self._bins = bins
Expand All @@ -474,21 +481,22 @@ def _single_frame(self):
h, _ = np.histogramdd(self._atomgroup.positions,
bins=self._bins, range=self._arange,
density=False)
# reduce (proposed change #2542 to match the parallel version in pmda.density)
# return self._reduce(self._grid, h)
#
# serial code can simply do
self._grid += h
self.results._grid += h

def _conclude(self):
# average:
self._grid /= float(self.n_frames)
density = Density(grid=self._grid, edges=self._edges,
self.results._grid /= float(self.n_frames)
density = Density(grid=self.results._grid, edges=self._edges,
units={'length': "Angstrom"},
parameters={'isDensity': False})
density.make_density()
self.results.density = density

def _get_aggregator(self):
return ResultsGroup(lookup={
'_grid': ResultsGroup.ndarray_sum}
)

@property
def density(self):
wmsg = ("The `density` attribute was deprecated in MDAnalysis 2.0.0 "
Expand Down
8 changes: 8 additions & 0 deletions testsuite/MDAnalysisTests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from MDAnalysis.analysis.nucleicacids import NucPairDist
from MDAnalysis.analysis.contacts import Contacts
from MDAnalysis.analysis.density import DensityAnalysis
from MDAnalysis.lib.util import is_installed


Expand Down Expand Up @@ -157,3 +158,10 @@ def client_NucPairDist(request):
@pytest.fixture(scope="module", params=params_for_cls(Contacts))
def client_Contacts(request):
return request.param


# MDAnalysis.analysis.density

@pytest.fixture(scope='module', params=params_for_cls(DensityAnalysis))
def client_DensityAnalysis(request):
return request.param
162 changes: 111 additions & 51 deletions testsuite/MDAnalysisTests/analysis/test_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,12 +230,20 @@ def universe(self):


class TestDensityAnalysis(DensityParameters):
def check_DensityAnalysis(self, ag, ref_meandensity,
tmpdir, runargs=None, **kwargs):
def check_DensityAnalysis(
self,
ag,
ref_meandensity,
tmpdir,
client_DensityAnalysis,
runargs=None,
**kwargs
):
runargs = runargs if runargs else {}
with tmpdir.as_cwd():
D = density.DensityAnalysis(
ag, delta=self.delta, **kwargs).run(**runargs)
D = density.DensityAnalysis(ag, delta=self.delta, **kwargs).run(
**runargs, **client_DensityAnalysis
)
assert_almost_equal(D.results.density.grid.mean(), ref_meandensity,
err_msg="mean density does not match")
D.results.density.export(self.outfile)
Expand All @@ -247,125 +255,176 @@ def check_DensityAnalysis(self, ag, ref_meandensity,
)

@pytest.mark.parametrize("mode", ("static", "dynamic"))
def test_run(self, mode, universe, tmpdir):
def test_run(self, mode, universe, tmpdir, client_DensityAnalysis):
updating = (mode == "dynamic")
self.check_DensityAnalysis(
universe.select_atoms(self.selections[mode], updating=updating),
self.references[mode]['meandensity'],
tmpdir=tmpdir
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
)

def test_sliced(self, universe, tmpdir):
def test_sliced(self, universe, tmpdir, client_DensityAnalysis):
self.check_DensityAnalysis(
universe.select_atoms(self.selections['static']),
self.references['static_sliced']['meandensity'],
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
runargs=dict(start=1, stop=-1, step=2),
)

def test_userdefn_eqbox(self, universe, tmpdir):
def test_userdefn_eqbox(self, universe, tmpdir, client_DensityAnalysis):
with warnings.catch_warnings():
# Do not need to see UserWarning that box is too small
warnings.simplefilter("ignore")
self.check_DensityAnalysis(
universe.select_atoms(self.selections['static']),
self.references['static_defined']['meandensity'],
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
gridcenter=self.gridcenters['static_defined'],
xdim=10.0,
ydim=10.0,
zdim=10.0,
)

def test_userdefn_neqbox(self, universe, tmpdir):
def test_userdefn_neqbox(self, universe, tmpdir, client_DensityAnalysis):
self.check_DensityAnalysis(
universe.select_atoms(self.selections['static']),
self.references['static_defined_unequal']['meandensity'],
tmpdir=tmpdir,
client_DensityAnalysis=client_DensityAnalysis,
gridcenter=self.gridcenters['static_defined'],
xdim=10.0,
ydim=15.0,
zdim=20.0,
)

def test_userdefn_boxshape(self, universe):
def test_userdefn_boxshape(self, universe, client_DensityAnalysis):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=1.0, xdim=8.0, ydim=12.0, zdim=17.0,
gridcenter=self.gridcenters['static_defined']).run()
universe.select_atoms(self.selections["static"]),
delta=1.0,
xdim=8.0,
ydim=12.0,
zdim=17.0,
gridcenter=self.gridcenters["static_defined"],
).run(**client_DensityAnalysis)
assert D.results.density.grid.shape == (8, 12, 17)

def test_warn_userdefn_padding(self, universe):
def test_warn_userdefn_padding(self, universe, client_DensityAnalysis):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not 100% sure if we need to test warnings for all parallel modalities, too, but it's the safe approach and you've already converted all tests so let's leave it in. Just something to think about in the future if our test run times are too long (again...).

regex = (r"Box padding \(currently set at 1\.0\) is not used "
r"in user defined grids\.")
with pytest.warns(UserWarning, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=100.0, ydim=100.0, zdim=100.0, padding=1.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)

def test_warn_userdefn_smallgrid(self, universe):
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=100.0,
ydim=100.0,
zdim=100.0,
padding=1.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_warn_userdefn_smallgrid(self, universe, client_DensityAnalysis):
regex = ("Atom selection does not fit grid --- "
"you may want to define a larger box")
with pytest.warns(UserWarning, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)

def test_ValueError_userdefn_gridcenter_shape(self, universe):
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=1.0,
ydim=2.0,
zdim=2.0,
padding=0.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_gridcenter_shape(
self, universe, client_DensityAnalysis
):
# Test len(gridcenter) != 3
with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['error1']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=10.0,
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["error1"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_gridcenter_type(self, universe):
def test_ValueError_userdefn_gridcenter_type(
self, universe, client_DensityAnalysis
):
# Test gridcenter includes non-numeric strings
with pytest.raises(ValueError, match="Gridcenter must be a 3D coordinate"):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['error2']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=10.0,
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["error2"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_gridcenter_missing(self, universe):
def test_ValueError_userdefn_gridcenter_missing(
self, universe, client_DensityAnalysis
):
# Test no gridcenter provided when grid dimensions are given
regex = ("Gridcenter or grid dimensions are not provided")
with pytest.raises(ValueError, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=10.0,
ydim=10.0,
zdim=10.0,
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_xdim_type(self, universe):
def test_ValueError_userdefn_xdim_type(self, universe,
client_DensityAnalysis):
# Test xdim != int or float
with pytest.raises(ValueError, match="xdim, ydim, and zdim must be numbers"):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim="MDAnalysis", ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim="MDAnalysis",
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_userdefn_xdim_nanvalue(self, universe):
def test_ValueError_userdefn_xdim_nanvalue(self, universe,
client_DensityAnalysis):
# Test xdim set to NaN value
regex = ("Gridcenter or grid dimensions have NaN element")
with pytest.raises(ValueError, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']),
delta=self.delta, xdim=np.nan, ydim=10.0, zdim=10.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)
universe.select_atoms(self.selections["static"]),
delta=self.delta,
xdim=np.nan,
ydim=10.0,
zdim=10.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_warn_noatomgroup(self, universe):
def test_warn_noatomgroup(self, universe, client_DensityAnalysis):
regex = ("No atoms in AtomGroup at input time frame. "
"This may be intended; please ensure that "
"your grid selection covers the atomic "
"positions you wish to capture.")
with pytest.warns(UserWarning, match=regex):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['none']),
delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0,
gridcenter=self.gridcenters['static_defined']).run(step=5)

def test_ValueError_noatomgroup(self, universe):
universe.select_atoms(self.selections["none"]),
delta=self.delta,
xdim=1.0,
ydim=2.0,
zdim=2.0,
padding=0.0,
gridcenter=self.gridcenters["static_defined"],
).run(step=5, **client_DensityAnalysis)

def test_ValueError_noatomgroup(self, universe, client_DensityAnalysis):
with pytest.raises(ValueError, match="No atoms in AtomGroup at input"
" time frame. Grid for density"
" could not be automatically"
Expand All @@ -374,12 +433,13 @@ def test_ValueError_noatomgroup(self, universe):
" defined grid will "
"need to be provided instead."):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['none'])).run(step=5)
universe.select_atoms(self.selections["none"])
).run(step=5, **client_DensityAnalysis)

def test_warn_results_deprecated(self, universe):
def test_warn_results_deprecated(self, universe, client_DensityAnalysis):
D = density.DensityAnalysis(
universe.select_atoms(self.selections['static']))
D.run(stop=1)
D.run(stop=1, **client_DensityAnalysis)
wmsg = "The `density` attribute was deprecated in MDAnalysis 2.0.0"
with pytest.warns(DeprecationWarning, match=wmsg):
assert_equal(D.density.grid, D.results.density.grid)
Expand Down
Loading