From d67bccb98a1aa84fcd266fe93e3509c8dc26546a Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sat, 28 Dec 2024 20:43:10 -0700 Subject: [PATCH] ENH: add TPR position read support * Add support for reading positions from GMX `.tpr` files at `tpx` version `134`. --- package/MDAnalysis/coordinates/TPR.py | 73 +++++++++++++++++++ package/MDAnalysis/coordinates/__init__.py | 1 + package/MDAnalysis/topology/tpr/utils.py | 42 +++++++++++ .../MDAnalysisTests/coordinates/test_tpr.py | 34 +++++++++ 4 files changed, 150 insertions(+) create mode 100644 package/MDAnalysis/coordinates/TPR.py create mode 100644 testsuite/MDAnalysisTests/coordinates/test_tpr.py diff --git a/package/MDAnalysis/coordinates/TPR.py b/package/MDAnalysis/coordinates/TPR.py new file mode 100644 index 0000000000..56fcbfd79b --- /dev/null +++ b/package/MDAnalysis/coordinates/TPR.py @@ -0,0 +1,73 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 + +from . import base +from ..lib import util +from .timestep import Timestep +import MDAnalysis.topology.tpr.utils as tpr_utils +import MDAnalysis.topology.tpr.setting as S + + +import numpy as np + + +class TPRReader(base.SingleFrameReaderBase): + # TODO: reduce duplication with `TPRparser`; + # we could share some state for the position + # in the binary file to avoid re-reading topology + # or perhaps combine the topology and coordinate reading + # with some inheritance shenanigans? + format = "TPR" + units = {"length": "nm"} + _Timestep = Timestep + + def _read_first_frame(self): + # Read header/move over topology + # TODO: reduce duplication with TPRparser perhaps... + with util.openany(self.filename, mode='rb') as infile: + tprf = infile.read() + data = tpr_utils.TPXUnpacker(tprf) + try: + th = tpr_utils.read_tpxheader(data) # tpxheader + except (EOFError, ValueError): + msg = f"{self.filename}: Invalid tpr file or cannot be recognized" + logger.critical(msg) + raise IOError(msg) + + self.ts = ts = self._Timestep(th.natoms, **self._ts_kwargs) + self.n_atoms = th.natoms + + # Starting with gromacs 2020 (tpx version 119), the body of the file + # is encoded differently. We change the unpacker accordingly. + if th.fver >= S.tpxv_AddSizeField and th.fgen >= 27: + actual_body_size = len(data.get_buffer()) - data.get_position() + if actual_body_size == 4 * th.sizeOfTprBody: + # See issue #2428. + msg = ( + "TPR files produced with beta versions of gromacs 2020 " + "are not supported." + ) + logger.critical(msg) + raise IOError(msg) + data = tpr_utils.TPXUnpacker2020.from_unpacker(data) + + state_ngtc = th.ngtc # done init_state() in src/gmxlib/tpxio.c + if th.bBox: + tpr_utils.extract_box_info(data, th.fver) + + if state_ngtc > 0: + if th.fver < 69: # redundancy due to different versions + tpr_utils.ndo_real(data, state_ngtc) + tpr_utils.ndo_real(data, state_ngtc) # relevant to Berendsen tcoupl_lambda + + if th.bTop: + tpr_top = tpr_utils.do_mtop(data, th.fver, + tpr_resid_from_one=True) + else: + msg = f"{self.filename}: No topology found in tpr file" + logger.critical(msg) + raise IOError(msg) + + if th.bX: + self.ts._pos = np.asarray(tpr_utils.ndo_rvec(data, th.natoms), + dtype=np.float32) diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 602621e5ad..a8ec94526d 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -776,6 +776,7 @@ class can choose an appropriate reader automatically. from . import PDB from . import PDBQT from . import PQR +from . import TPR from . import TRC from . import TRJ from . import TRR diff --git a/package/MDAnalysis/topology/tpr/utils.py b/package/MDAnalysis/topology/tpr/utils.py index 98b4a4bb84..4a4d03bc24 100644 --- a/package/MDAnalysis/topology/tpr/utils.py +++ b/package/MDAnalysis/topology/tpr/utils.py @@ -423,6 +423,48 @@ def do_mtop(data, fver, tpr_resid_from_one=False): elements = Elements(np.array(elements, dtype=object)) top.add_TopologyAttr(elements) + # NOTE: the tpr striding code below serves the + # purpose of placing us in a suitable "seek" position + # in the binary file such that we are well placed + # for reading coords and velocities if they are present + # the order of operations is based on an analysis of + # the C++ code in do_mtop() function in the GROMACS + # source at: + # src/gromacs/fileio/tpxio.cpp + # TODO: expand tpx version support for striding to + # the coordinates + if fver == 134: + # TODO: the following value is important, and not sure + # how to access programmatically yet... + # from GMX source code: + # api/legacy/include/gromacs/topology/topology_enums.h + # worst case scenario we hard code it based on + # tpx/GMX version? + SimulationAtomGroupType_size = 10 + n_atoms = data.unpack_int() + interm = data.unpack_uchar() + ngrid = data.unpack_int() + grid_spacing = data.unpack_int() + n_elements = grid_spacing ** 2 + for i in range(ngrid): + for j in range(n_elements): + ndo_real(data, 4) + for i in range(SimulationAtomGroupType_size): + group_size = data.unpack_int() + ndo_int(data, group_size) + n_group_names = data.unpack_int() + for i in range(n_group_names): + data.unpack_int() + for i in range(SimulationAtomGroupType_size): + n_grp_numbers = data.unpack_int() + if n_grp_numbers != 0: + for i in range(n_grp_numbers): + data.unpack_uchar() + im_excl_grp_size = data.unpack_int() + ndo_int(data, im_excl_grp_size) + # TODO: why is this needed? + data.unpack_int() + return top diff --git a/testsuite/MDAnalysisTests/coordinates/test_tpr.py b/testsuite/MDAnalysisTests/coordinates/test_tpr.py new file mode 100644 index 0000000000..e3d2d2c28b --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_tpr.py @@ -0,0 +1,34 @@ +from MDAnalysisTests.datafiles import (TPR2024_4_bonded, + TPR_EXTRA_2024_4, + TPR2024_4) +import MDAnalysis as mda + + +import pytest +from numpy.testing import assert_allclose, assert_equal + + +@pytest.mark.parametrize("tpr_file, exp_first_atom, exp_last_atom, exp_shape", [ + (TPR2024_4_bonded, + [4.446, 4.659, 2.384], + [4.446, 4.659, 2.384], + (14, 3), + ), + # same coordinates, different shape + (TPR_EXTRA_2024_4, + [4.446, 4.659, 2.384], + [4.446, 4.659, 2.384], + (18, 3), + ), + # different coordinates and different shape + (TPR2024_4, + [3.25000e-01, 1.00400e+00, 1.03800e+00], + [-2.56000e-01, 1.37300e+00, 3.59800e+00], + (2263, 3), + ), +]) +def test_basic_read_tpr(tpr_file, exp_first_atom, exp_last_atom, exp_shape): + u = mda.Universe(tpr_file) + assert_allclose(u.atoms.positions[0, ...], exp_first_atom) + assert_allclose(u.atoms.positions[-1, ...], exp_last_atom) + assert_equal(u.atoms.positions.shape, exp_shape)