diff --git a/.gitattributes b/.gitattributes index 7819397f84..0f32c09dfe 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,9 @@ +# Set default behaviour, in case users don't have core.autocrlf set. +* text=auto + +# Whitespace +* whitespace=tab-in-indent,space-before-tab,trailing-space,tabwidth=2 +*.{py,pyx,pxd,pxi} whitespace=tab-in-indent,space-before-tab,trailing-space,tabwidth=4 +Makefile whitespace=space-before-tab,trailing-space,tabwidth=2 +pyop2/_version.py export-subst firedrake/_version.py export-subst diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0fff428f43..909db6990a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -96,7 +96,7 @@ jobs: . ../firedrake_venv/bin/activate echo OMP_NUM_THREADS is "$OMP_NUM_THREADS" echo OPENBLAS_NUM_THREADS is "$OPENBLAS_NUM_THREADS" - python -m pytest -v tests/test_0init.py + python -m pytest -v tests/firedrake/test_0init.py python -m pytest \ --durations=200 \ --timeout=1800 \ @@ -104,7 +104,7 @@ jobs: -o faulthandler_timeout=1860 \ -n 12 --dist worksteal \ --junit-xml=firedrake.xml \ - -sv tests + -sv tests/firedrake timeout-minutes: 120 - name: Publish Test Report uses: mikepenz/action-junit-report@v5.0.0-a02 diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml new file mode 100644 index 0000000000..c358b70141 --- /dev/null +++ b/.github/workflows/pip.yml @@ -0,0 +1,129 @@ +name: Pip install Firedrake + +on: + # Push to master or PR + push: + branches: + - master + pull_request: + +concurrency: + # Cancels jobs running if new commits are pushed + group: > + ${{ github.workflow }}- + ${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + build: + name: "Build Firedrake using pip" + runs-on: ubuntu-latest + container: + image: firedrakeproject/firedrake-env:latest + options: --user root + defaults: + run: + shell: bash + working-directory: /home/firedrake + strategy: + # Don't immediately kill real if complex fails and vice versa. + fail-fast: false + matrix: + include: + - scalar-type: real + petsc_arch: default + - scalar-type: complex + petsc_arch: complex + env: + # PETSC_DIR, HDF5_DIR and MPICH_DIR are set inside the docker image + FIREDRAKE_CI_TESTS: 1 + PYOP2_CI_TESTS: 1 + PETSC_ARCH: ${{ matrix.petsc_arch }} + OMP_NUM_THREADS: 1 + OPENBLAS_NUM_THREADS: 1 + RDMAV_FORK_SAFE: 1 + steps: + - name: Cleanup + if: ${{ always() }} + run: rm -rf pip_venv + + - name: Create a venv + run: | + python3 -m venv pip_venv + ln -s /__w/firedrake/firedrake/src pip_venv/ + + - uses: actions/checkout@v4 + with: + path: src/firedrake + + - name: Install libsupermesh + run: | + source pip_venv/bin/activate + python -m pip install 'rtree>=1.2' + cd pip_venv/src + git clone https://github.com/firedrakeproject/libsupermesh.git + mkdir -p libsupermesh/build + cd libsupermesh/build + cmake .. \ + -DBUILD_SHARED_LIBS=ON \ + -DCMAKE_INSTALL_PREFIX="$VIRTUAL_ENV" \ + -DMPI_C_COMPILER="$MPICH_DIR/mpicc" \ + -DMPI_CXX_COMPILER="$MPICH_DIR/mpicxx" \ + -DMPI_Fortran_COMPILER="$MPICH_DIR/mpif90" \ + -DCMAKE_Fortran_COMPILER="$MPICH_DIR/mpif90" \ + -DMPIEXEC_EXECUTABLE="$MPICH_DIR/mpiexec" + make + make install + + - name: Pip install + run: | + source pip_venv/bin/activate + cd pip_venv/src + export CC="$MPICH_DIR/mpicc" + export CXX="$MPICH_DIR/mpicxx" + export MPICC="$MPICH_DIR/mpicc" + export MPI_HOME="$PETSC_DIR/packages" + pip install \ + --log=firedrake-install.log \ + --no-binary mpi4py,h5py \ + -v -e './firedrake[test]' + + - name: Add mpiexec to the venv and install timeout + run: | + source pip_venv/bin/activate + cat << EOF > "$VIRTUAL_ENV/bin/mpiexec" + #!/bin/bash + "$MPICH_DIR"/mpiexec "\$@" + EOF + chmod +x "$VIRTUAL_ENV"/bin/mpiexec + pip install -U pytest-timeout + + - name: Run Firedrake smoke tests + run: | + source pip_venv/bin/activate + cd pip_venv/src/firedrake + pytest -v tests/firedrake/test_0init.py + pytest \ + --durations=200 \ + --timeout=1800 \ + --timeout-method=thread \ + -o faulthandler_timeout=1860 \ + -n 12 --dist worksteal \ + --junit-xml=firedrake.xml \ + -sv tests/firedrake/regression -k "poisson_strong or stokes_mini or dg_advection" + timeout-minutes: 120 + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v5.0.0-a02 + if: ${{ always() && ( github.ref != 'refs/heads/master') }} + with: + report_paths: '/home/firedrake/pip_venv/src/firedrake/firedrake.xml' + comment: true + check_name: "Firedrake ${{ matrix.scalar-type }}" + updateComment: true + flaky_summary: true + + - name: Cleanup + # Belt and braces: clean up before and after the run. + if: ${{ always() }} + run: rm -rf pip_venv diff --git a/.github/workflows/pyop2.yml b/.github/workflows/pyop2.yml new file mode 100644 index 0000000000..36065accf1 --- /dev/null +++ b/.github/workflows/pyop2.yml @@ -0,0 +1,119 @@ +name: PyOP2 + +# Trigger the workflow on push or pull request, +# but only for the master branch +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + test: + runs-on: ubuntu-latest + strategy: + # Don't immediately kill all if one Python version fails + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + env: + CC: mpicc + PETSC_DIR: ${{ github.workspace }}/petsc + PETSC_ARCH: default + RDMAV_FORK_SAFE: 1 + PYOP2_CI_TESTS: 1 + timeout-minutes: 60 + + steps: + - name: Install system dependencies + shell: bash + run: | + sudo apt update + sudo apt install build-essential mpich libmpich-dev \ + libblas-dev liblapack-dev gfortran libhwloc-dev libfabric-dev + + - name: Set correct Python version + uses: actions/setup-python@v5 + id: setup-python + with: + python-version: ${{ matrix.python-version }} + # By default setup-python pollutes the environment in such a way that virtual + # environments cannot be used. This prevents us from building libsupermesh because + # it relies on having rtree installed into a venv. + # https://github.com/actions/setup-python/issues/851 + # https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md#using-update-environment-flag + update-environment: false + + - name: Create virtual environment + shell: bash + run: | + ${{ steps.setup-python.outputs.python-path }} -m venv venv + + - name: Clone PETSc + uses: actions/checkout@v4 + with: + repository: firedrakeproject/petsc + path: ${{ env.PETSC_DIR }} + + - name: Build and install PETSc + shell: bash + working-directory: ${{ env.PETSC_DIR }} + run: | + ./configure \ + --download-hdf5 \ + --with-debugging=1 \ + --with-shared-libraries=1 \ + --with-c2html=0 \ + --with-fortran-bindings=0 + make + + - name: Install libsupermesh + shell: bash + run: | + source venv/bin/activate + python -m pip install 'rtree>=1.2' + git clone https://github.com/firedrakeproject/libsupermesh.git + mkdir -p libsupermesh/build + cd libsupermesh/build + cmake .. \ + -DBUILD_SHARED_LIBS=ON \ + -DCMAKE_INSTALL_PREFIX="$VIRTUAL_ENV" \ + -DMPI_C_COMPILER=mpicc \ + -DMPI_CXX_COMPILER=mpicxx \ + -DMPI_Fortran_COMPILER=mpif90 \ + -DCMAKE_Fortran_COMPILER=mpif90 \ + -DMPIEXEC_EXECUTABLE=mpiexec + make + make install + + - name: Checkout PyOP2 + uses: actions/checkout@v4 + with: + path: PyOP2 + + - name: Install PyOP2 dependencies + shell: bash + working-directory: PyOP2 + run: | + source ../venv/bin/activate + python -m pip install -U pip + python -m pip install -U pytest-timeout + + - name: Install PyOP2 + shell: bash + working-directory: PyOP2 + run: | + source ../venv/bin/activate + python -m pip install -v ".[test]" + + - name: Run tests + shell: bash + working-directory: PyOP2 + run: | + source ../venv/bin/activate + # Running parallel test cases separately works around a bug in pytest-mpi + pytest -k "not parallel" --tb=native --timeout=480 --timeout-method=thread -o faulthandler_timeout=540 -v tests/pyop2 + mpiexec -n 3 pytest -k "parallel[3]" --tb=native --timeout=480 --timeout-method=thread -o faulthandler_timeout=540 -v tests/pyop2 + timeout-minutes: 10 diff --git a/.gitignore b/.gitignore index 0db0bd8d5d..86fa965e18 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,34 @@ __pycache__/ /firedrake_configuration/configuration.json /firedrake.egg-info /docs/source/element_list.csv + + +# Build +build +dist +MANIFEST +PyOP2.egg-info +*.py[cdo] + +# Extension modules +sparsity.so +sparsity.c +sparsity.cpython*.so +# Docs +pyop2.coffee.rst +pyop2.rst +pyop2.pdf +pyop2.aux +pyop2.log + +# Testing +.pytest-incremental +.tox +.vagrant + +# Meshes +*.edge +*.ele +*.msh +*.node +*.geo diff --git a/.mailmap b/.mailmap index f1e23ff294..58cb7ada9c 100644 --- a/.mailmap +++ b/.mailmap @@ -124,3 +124,22 @@ Joe Wallwork Jingmin Xia Jingmin Xia <48932582+jingminxia@users.noreply.github.com> +Gheorghe-Teodor Bercea +George Boutsioukis +David A Ham +David A Ham +Miklós Homolya +Nicolas Loriant +Nicolas Loriant +Nicolas Loriant +Nicolas Loriant +Nicolas Loriant +Fabio Luporini +Graham Markall +Graham Markall +Andrew McRae +Andrew McRae +Lawrence Mitchell +Lawrence Mitchell +Kaho Sato +Reuben W. Nixon-Hill diff --git a/LICENSE b/LICENSE index 35cd706863..105d90b465 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,4 @@ +Firedrake: Firedrake is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your @@ -11,3 +12,33 @@ License for more details. You should have received a copy of the GNU Lesser General Public License along with Firedrake. If not, see . +PyOP2: +Copyright (c) 2012, Imperial College London and others. Please see the +AUTHORS file in the main source directory for a full list of copyright +holders. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * The name of Imperial College London or that of other + contributors may not be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in index 501b3be8aa..1b367e86bc 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,5 @@ include versioneer.py include firedrake/_version.py +recursive-include pyop2 *.c +include pyop2/_version.py + diff --git a/Makefile b/Makefile index f2108a7b16..033504d3ef 100644 --- a/Makefile +++ b/Makefile @@ -13,12 +13,17 @@ else endif lint: - @echo " Linting firedrake codebase" + @echo " Linting firedrake" @python -m flake8 $(FLAKE8_FORMAT) firedrake - @echo " Linting firedrake test suite" - @python -m flake8 $(FLAKE8_FORMAT) tests @echo " Linting firedrake scripts" + @python -m flake8 $(FLAKE8_FORMAT) firedrake/scripts --filename=* @python -m flake8 $(FLAKE8_FORMAT) scripts --filename=* + @echo " Linting firedrake tests" + @python -m flake8 $(FLAKE8_FORMAT) tests + @echo " Linting PyOP2" + @python -m flake8 $(FLAKE8_FORMAT) pyop2 + @echo " Linting PyOP2 scripts" + @python -m flake8 $(FLAKE8_FORMAT) pyop2/scripts --filename=* actionlint: @echo " Pull latest actionlint image" @@ -65,6 +70,10 @@ clean: -@rm -f firedrake/cython/mg/impl.so > /dev/null 2>&1 @echo " RM firedrake/cython/mg/impl.c" -@rm -f firedrake/cython/mg/impl.c > /dev/null 2>&1 + @echo " RM pyop2/*.so" + -@rm -f pyop2/*.so > /dev/null 2>&1 + @echo " RM tinyasm/*.so" + -@rm -f tinyasm/*.so > /dev/null 2>&1 THREADS=1 @@ -76,15 +85,15 @@ endif test_regression: modules @echo " Running non-extruded regression tests" - @python -m pytest tests/regression $(PYTEST_ARGS) + @python -m pytest tests/firedrake/regression $(PYTEST_ARGS) test_extrusion: modules @echo " Running extruded regression tests" - @python -m pytest tests/extrusion $(PYTEST_ARGS) + @python -m pytest tests/firedrake/extrusion $(PYTEST_ARGS) test_demos: modules @echo " Running test of demos" - @python -m pytest tests/demos $(PYTEST_ARGS) + @python -m pytest tests/firedrake/demos $(PYTEST_ARGS) test: modules @echo " Running all regression tests" @@ -94,4 +103,4 @@ alltest: modules lint test shorttest: modules lint @echo " Running short regression tests" - @python -m pytest --short tests $(PYTEST_ARGS) + @python -m pytest --short tests/firedrake $(PYTEST_ARGS) diff --git a/demos/saddle_point_pc/saddle_point_systems.py.rst b/demos/saddle_point_pc/saddle_point_systems.py.rst index 60d7133ebd..9c8325c5fd 100644 --- a/demos/saddle_point_pc/saddle_point_systems.py.rst +++ b/demos/saddle_point_pc/saddle_point_systems.py.rst @@ -613,6 +613,5 @@ A runnable python script version of this demo is available :demo:`here .. _PETSc: https://petsc.org/ .. _hypre: https://hypre.readthedocs.io/en/latest/ -.. _PyOP2: https://github.com/OP2/PyOP2/ .. _numpy: https://www.numpy.org .. _MUMPS: https://mumps-solver.org/index.php diff --git a/docs/Makefile b/docs/Makefile index b5a682d1bf..55a8611c57 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -49,9 +49,9 @@ publishpdf: env FIREDRAKE_MANUAL_RELEASE=`date +%Y-%m` $(MAKE) latexpdf validate-bibtex: - ../scripts/firedrake-preprocess-bibtex --validate source/_static/bibliography.bib - ../scripts/firedrake-preprocess-bibtex --validate source/_static/firedrake-apps.bib - ../scripts/firedrake-preprocess-bibtex --validate source/_static/references.bib + ../firedrake/scripts/firedrake_preprocess_bibtex.py --validate source/_static/bibliography.bib + ../firedrake/scripts/firedrake_preprocess_bibtex.py --validate source/_static/firedrake-apps.bib + ../firedrake/scripts/firedrake_preprocess_bibtex.py --validate source/_static/references.bib livehtml: sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html diff --git a/docs/source/conf.py b/docs/source/conf.py index e47118391f..bcf7580176 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -101,7 +101,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = [] +exclude_patterns = ['old_pyop2'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None diff --git a/docs/source/documentation.rst b/docs/source/documentation.rst index 34a43da723..277770b435 100644 --- a/docs/source/documentation.rst +++ b/docs/source/documentation.rst @@ -19,7 +19,6 @@ * `Firedrake on GitHub `__ * `TSFC on GitHub `__ - * `PyOP2 on GitHub `__ * `FIAT on GitHub `__ * `Firedrake version of UFL on GitHub `__ diff --git a/docs/source/download.rst b/docs/source/download.rst index 4b9a2cfbd0..30c37e78b0 100644 --- a/docs/source/download.rst +++ b/docs/source/download.rst @@ -61,7 +61,7 @@ that Firedrake is fully functional. Activate the venv_ as above and then run:: cd $VIRTUAL_ENV/src/firedrake - pytest tests/regression/ -k "poisson_strong or stokes_mini or dg_advection" + pytest tests/firedrake/regression/ -k "poisson_strong or stokes_mini or dg_advection" This command will run a few of the unit tests, which exercise a good chunk of the functionality of the library. These tests should take a @@ -104,6 +104,8 @@ gives a full list of update options. For instance additional Firedrake packages can be installed into an existing Firedrake installation using ``firedrake-update``. +.. _system-requirements: + System requirements ------------------- @@ -227,6 +229,49 @@ type:: You should now be able to run ``firedrake-update``. +Installing Firedrake with pip (experimental, Linux only) +-------------------------------------------------------- + +Firedrake has experimental support for installing using ``pip``, avoiding the need for the ``firedrake-install`` script. At present only Linux is tested using this install method. + +Requirements +~~~~~~~~~~~~ + +* An activated virtual environment. +* All the system requirements listed in :ref:`system-requirements`. +* A Firedrake-compatible PETSc installation (using our `fork of PETSc `_). The set of flags passed to PETSc can be retrieved by passing the command ``--show-petsc-configure-options`` to ``firedrake-install``. +* `libsupermesh `_ to be installed inside the virtual environment (see `here `_ for an example of how to do this). +* The following environment variables to be set: + + * ``PETSC_DIR`` and ``PETSC_ARCH`` to point to the correct location for the PETSc installation. + * ``HDF5_DIR`` to ``$PETSC_DIR/$PETSC_ARCH``. + * ``CC`` and ``MPICC`` to point to the ``mpicc`` compiler wrapper. + * ``CXX`` to point to the ``mpicxx`` compiler wrapper. + +Installation +~~~~~~~~~~~~ + +Having set up this environment, Firedrake can now be installed with the command:: + + pip install --no-binary mpi4py,h5py git+https://github.com/firedrakeproject/firedrake.git + +Removing Firedrake +------------------ +Firedrake and its dependencies can be removed by deleting the Firedrake +install directory. This is usually the ``firedrake`` subdirectory +created after having run ``firedrake-install``. Note that this will not +undo the installation of any system packages which are Firedrake +dependencies: removing these might affect subsequently installed +packages for which these are also dependencies. + +.. _Paraview: http://www.paraview.org +.. _venv: https://docs.python.org/3/tutorial/venv.html +.. _homebrew: https://brew.sh/ +.. _PETSc: https://www.mcs.anl.gov/petsc/ +.. _discussions: https://github.com/firedrakeproject/firedrake/discussions +.. _issue: https://github.com/firedrakeproject/firedrake/issues +.. _WSL: https://github.com/firedrakeproject/firedrake/wiki/Installing-on-Windows-Subsystem-for-Linux + Visualisation software ---------------------- @@ -254,20 +299,3 @@ and can be built by executing:: This will generate the HTML documentation (this website) on your local machine. - -Removing Firedrake ------------------- -Firedrake and its dependencies can be removed by deleting the Firedrake -install directory. This is usually the ``firedrake`` subdirectory -created after having run ``firedrake-install``. Note that this will not -undo the installation of any system packages which are Firedrake -dependencies: removing these might affect subsequently installed -packages for which these are also dependencies. - -.. _Paraview: http://www.paraview.org -.. _venv: https://docs.python.org/3/tutorial/venv.html -.. _homebrew: https://brew.sh/ -.. _PETSc: https://www.mcs.anl.gov/petsc/ -.. _discussions: https://github.com/firedrakeproject/firedrake/discussions -.. _issue: https://github.com/firedrakeproject/firedrake/issues -.. _WSL: https://github.com/firedrakeproject/firedrake/wiki/Installing-on-Windows-Subsystem-for-Linux diff --git a/docs/source/interpolation.rst b/docs/source/interpolation.rst index c1629995bc..e604a1fbf9 100644 --- a/docs/source/interpolation.rst +++ b/docs/source/interpolation.rst @@ -25,18 +25,18 @@ where :math:`\bar{\phi}^*_i` is the :math:`i`-th dual basis function to The extension of dual basis functions to :math:`e` usually follows from the definition of the dual basis. For example, point evaluation and integral nodes can naturally be extended to any expression which is evaluatable at - the relevant points, or integrable over that domain. - + the relevant points, or integrable over that domain. + Firedrake will not impose any constraints on the expression to be interpolated beyond that its value shape matches that of the space into which it is interpolated. If the user interpolates an expression for which the nodes are not well defined (for example point evaluation at a - discontinuity), the result is implementation-dependent. + discontinuity), the result is implementation-dependent. The interpolate operator ------------------------ -.. note:: +.. note:: The semantics for interpolation in Firedrake are in the course of changing. The documentation provided here is for the new behaviour, in which the `interpolate` operator is symbolic. In order to access the behaviour @@ -50,7 +50,7 @@ The interpolate operator The basic syntax for interpolation is: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_interpolate_operator 1] @@ -59,7 +59,7 @@ The basic syntax for interpolation is: It is also possible to interpolate an expression directly into an existing :py:class:`~.Function`: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_interpolate_operator 3] @@ -67,7 +67,7 @@ It is also possible to interpolate an expression directly into an existing This is a numerical operation, equivalent to: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_interpolate_operator 5] @@ -93,7 +93,7 @@ including: Here is an example demonstrating some of these features: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_interpolate_operator 7] @@ -102,7 +102,7 @@ Here is an example demonstrating some of these features: This also works as expected when interpolating into a a space defined on the facets of the mesh: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_interpolate_operator 9] @@ -155,13 +155,13 @@ surface or line of interest in the domain. The integral itself is calculated by calling :py:func:`~.assemble` on an approriate form over the target mesh function space: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_line_integral 1] :end-before: [test_line_integral 2] -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_line_integral 3] @@ -181,13 +181,13 @@ Interpolating onto other meshes If the target mesh extends outside the source mesh domain, then cross-mesh interpolation will raise a :py:class:`~.DofNotDefinedError`. -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 1] :end-before: [test_cross_mesh 2] -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 3] @@ -196,13 +196,13 @@ interpolation will raise a :py:class:`~.DofNotDefinedError`. This can be overriden with the optional ``allow_missing_dofs`` keyword argument: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 5] :end-before: [test_cross_mesh 6] -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 7] @@ -211,7 +211,7 @@ argument: In this case, the missing degrees of freedom (DoFs, the global basis function coefficients which could not be set) are, by default, set to zero: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 9] @@ -223,7 +223,7 @@ unmodified. We can optionally specify a value to use for our missing DoFs. Here we set them to be ``nan`` ('not a number') for easy identification: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 11] @@ -235,7 +235,7 @@ DoFs. When using :py:class:`~.Interpolator`\s, the ``allow_missing_dofs`` keyword argument is set at construction: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 13] @@ -244,7 +244,7 @@ argument is set at construction: The ``default_missing_val`` keyword argument is then set whenever we call :py:meth:`~.Interpolator.interpolate`: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 15] @@ -254,19 +254,19 @@ If we supply an output :py:class:`~.Function` and don't set ``default_missing_val`` then any missing DoFs are left as they were prior to interpolation: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 17] :end-before: [test_cross_mesh 18] -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 19] :end-before: [test_cross_mesh 20] -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_cross_mesh 21] @@ -294,7 +294,7 @@ now. In this case, interpolation into a target function space ``V`` proceeds as follows: -.. literalinclude:: ../../tests/regression/test_interpolation_manual.py +.. literalinclude:: ../../tests/firedrake/regression/test_interpolation_manual.py :language: python3 :dedent: :start-after: [test_interpolate_external 1] diff --git a/docs/source/old_pyop2/Makefile b/docs/source/old_pyop2/Makefile new file mode 100644 index 0000000000..5d0b7b4ba9 --- /dev/null +++ b/docs/source/old_pyop2/Makefile @@ -0,0 +1,2 @@ +all: + pdflatex pyop2.tex diff --git a/docs/source/old_pyop2/pyop2.tex b/docs/source/old_pyop2/pyop2.tex new file mode 100644 index 0000000000..3e911bb1a0 --- /dev/null +++ b/docs/source/old_pyop2/pyop2.tex @@ -0,0 +1,114 @@ +\documentclass[a4paper]{article} + +\usepackage{fullpage} + +\author{Graham Markall} +\title{PyOP2 Draft Proposal} + + +\begin{document} + +\maketitle + +\section{Motivation} + +This is part of an attempt at defining an implementation of OP2 that generates code at runtime (later referred to as PyOP2, for reasons which will be explained later). Coarsely, the compile-time translator iterates over \verb|op_par_loop| calls in the source code and performs the following operations: + +\begin{itemize} +\item Generates a host stub for the kernel that is called. +\item Generates a wrapper around the OP2 kernel, that, for example, stages data into and out of shared memory. +\item Inserts a call to the original OP2 kernel inline in the generated wrapper, but leaves the kernel untouched. +\end{itemize} + +\noindent The OP2 runtime manages: + +\begin{itemize} +\item Transfer of data to/from the device. +\item Planning parallel execution. +\item Invoking the host stubs for kernels. +\end{itemize} + +The question of which parts of the ROSE-based translator should be used arises. The position outlined in this document is that: + +\begin{itemize} +\item The code that performs the generation of the host stub should be replaced by support in the runtime that calls the plan function and executes the kernel for each colour according to the plan. +\item The plan function from OP2 should be re-used as-is. +\item Since this leaves effectively no source-to-source transformation to perform (only inserting an essentially unmodified kernel into generated code) it should be possible to avoid the use of ROSE altogether. Should transformation need to be performed on OP2 kernels in future, this functionality may be added, either by integrating ROSE or using a simpler framework, since the operations performed in a kernel are limited to a fairly restricted subset of C/CUDA. +\item In order to speed development, maintainability and integration with MCFC and Fluidity, a sensible choice of language for the re-implementation is Python (hence PyOP2). +\end{itemize} + +The remainder of this document describes the PyOP2 API, and how this API may be implemented. One may also refer to the implementation folder in the same repository as this document, for a skeleton API implementation and a complete (though non-functioning without an API implementation) version of the Airfoil code written using PyOP2. + +\section{API} + +\subsection{Declaring data} + +Each data item is an instance of an object of one of the types \verb|Set|, \verb|Dat|, \verb|Mat|, \verb|Map|, \verb|Global| or \verb|Const|. Each of these objects may be constructed as follows: + +\begin{description} + \item[\texttt{Set(size, name)}] Construct a set with \verb|size| elements named \verb|name|. The name is for debugging purposes. + \item[\texttt{Dat(set, dim, type, data, name)}] Construct a dat that holds a data item of type \verb|type| and dimension \verb|dim| for each element of the set \verb|set|. The data specifies the data to initialise the dat with, and may be a list or tuple. The name is for debugging purposes. + \item[\texttt{Mat(row\_set, col\_set, dim, type, name)}] Construct a matrix which has entries that are the product of the two sets. The elements are of dimension \verb|dim| and type \verb|type|. The name is for debugging purposes. + \item[\texttt{Map(from, to, dim, values, name)}] Construct a mapping from one set to another. The \verb|dim| of the map indicates how many different relations between the two sets the map holds. \verb|values| is used to initialise the mapping, and may be a list or tuple. The name is used for debugging. + \item[\texttt{Global(name, val)}] Constructs a global value. The name is used for debugging purposes. \verb|val| is used to specify an initial value and may be a scalar, a list or a tuple. + \item[\texttt{Const(dim, type, value, name)}] Construct a constant value of dimension \verb|dim|, type \verb|type|, and value \verb|value|. The name is used for debugging purposes. +\end{description} + +\subsection{Declaring kernels} + +To construct a kernel object with name \verb|name|, that implements the code string \verb|code|: + +\begin{verbatim} +Kernel(name, code) +\end{verbatim} + +The name is used only for debugging purposes. The code is an OP2 kernel, with the same semantics as are used in the current implementations of OP2. + +\subsection{Invoking a parallel loop} + +A parallel loop object is constructed with the following syntax: + +\begin{verbatim} +ParLoop(kernel, iteration_space, *args) +\end{verbatim} + +The arguments to the kernel are as follows: + +\begin{description} + \item[\texttt{kernel}] is a \verb|Kernel| object. + \item[\texttt{iteration\_space}] is an \verb|IterationSpace| object or a \verb|Set| object. + \item[\texttt{args}] is any number of \verb|Arg| objects. +\end{description} + +At the time of construction, the \verb|ParLoop| object proceeds with compiling the kernel if it is in the uncompiled state, and then checks if a plan has already been constructed for the given iteration space and access descriptors. If there is no suitable plan, then the planner is called. Once a plan has been obtained, the ParLoop object calls the kernel for each colour in the plan. + +The \verb|IterationSpace| object is used to declare an iteration space that consists of a set as well as extra indices over a local matrix or vector. For example, one may pass \verb|IterationSpace(elements, 3, 3)| when assembling a matrix over elements, or \verb|IterationSpace(elements, 3)| when assembling a vector. + +The \verb|Arg| class should not be used directly, but instead one of the subclasses of \verb|Arg| should be used: + +\begin{description} + \item[\texttt{ArgDat(dat, index, map, access)}] is used to pass a \verb|Dat| argument. The \verb|index| parameter selects which of the relations in the \verb|map| should be used to access the data indirectly. If the runtime system is to gather together all the values of the dat that are pointed to by all the different relations in the mapping, then \verb|idx_all| may be passed as the \verb|index| argument. If the dataset is to be accessed directly, then \verb|None| should be passed as int \verb|index| and \verb|map| parameters. \verb|access| is one of \verb|read|, \verb|write|, \verb|inc| or \verb|rw|, with similar meaning to in the current OP2 implementation. + \item[\texttt{ArgMat(mat, row\_idx, row\_map, col\_idx, col\_map, access)}] is used to pass a \verb|Mat| argument. The index and map arguments are used similarly into the \verb|ArgDat|, with the exception that the \verb|row_map| is used to index into the rows of the matrix and the \verb|col_map| is used to index into the columns of the matrix. The \verb|access| parameter works as for the \verb|ArgDat| case. + \item[\texttt{ArgGbl(var, access)}] is for passing a \verb|Global| argument. \verb|var| is an instance of a \verb|Global|, and \verb|access| specifies the access method in the same way as for the previous two cases. +\end{description} + +\section{Implementation considerations and issues} + +This is a list of notes for now: + +\begin{itemize} + \item All classes must be designed so that their representation uniquely describes an object with its particular state, in order for caching of compiled code to work. + \item There are several possibilities for implementing compilation and dynamic linking of code: + \begin{itemize} + \item Instant, from the FEniCS Project for compilation, caching and linking of CPU code + \item PyCUDA/PyOpenCL from Andreas Kl\"ockner for GPU/accelerator code + \item CodePy, also from Andreas Kl\"ockner for C/C++ code compilation and dynamic linking into the Python interpreter. + \end{itemize} + \item The possibilities for an interface allowing different OP2 backends to be implemented include: + \begin{itemize} + \item Each backend overrides the classes in \verb|op2.py| so that they implement the functionality required to run on their target. + \item We define a ``backend API'' that is used to implement a backend. The implementation of classes in \verb|op2.py| don't change, but instead it contains code to drive the backend. This appears more preferable since I believe it will allow a cleaner separation between the user-facing API and the backend implementation. + \end{itemize} +\end{itemize} + +\end{document} diff --git a/docs/source/old_pyop2/sphinx/Makefile b/docs/source/old_pyop2/sphinx/Makefile new file mode 100644 index 0000000000..e7fc1d9eff --- /dev/null +++ b/docs/source/old_pyop2/sphinx/Makefile @@ -0,0 +1,160 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +APIDOCOPTS = -f +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean livehtml html dirhtml singlehtml pickle json htmlhelp qthelp \ +devhelp epub latex latexpdf text man changes linkcheck doctest gettext apidoc + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +apidoc: + sphinx-apidoc ../../pyop2 -o source/ -T $(APIDOCOPTS) + +clean: + -rm -rf $(BUILDDIR)/* + +buildhtml: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + +html: apidoc buildhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: apidoc + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: apidoc + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: apidoc + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: apidoc + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: apidoc + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: apidoc + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyOP2.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyOP2.qhc" + +devhelp: apidoc + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/PyOP2" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyOP2" + @echo "# devhelp" + +epub: apidoc + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: apidoc + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: apidoc + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: apidoc + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: apidoc + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: apidoc + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: apidoc + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: apidoc + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: apidoc + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: apidoc + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: apidoc + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/docs/source/old_pyop2/sphinx/source/architecture.rst b/docs/source/old_pyop2/sphinx/source/architecture.rst new file mode 100644 index 0000000000..f14a6da10b --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/architecture.rst @@ -0,0 +1,76 @@ +.. _architecture: + +PyOP2 Architecture +================== + +As described in :ref:`concepts`, PyOP2 exposes an API that allows users to +declare the topology of unstructured meshes in the form of :class:`Sets +` and :class:`Maps ` and data in the form of +:class:`Dats `, :class:`Mats `, :class:`Globals +` and :class:`Consts `. Computations on this data +are described by :class:`Kernels ` described in :ref:`kernels` +and executed by :func:`parallel loops `. + +The API is the frontend to the PyOP2 runtime compilation architecture, which +supports the generation and just-in-time (JIT) compilation of low-level code +for a range of backends described in :doc:`backends` and the efficient +scheduling of parallel computations. A schematic overview of the PyOP2 +architecture is given below: + +.. figure:: images/pyop2_architecture.svg + :align: center + + Schematic overview of the PyOP2 architecture + +From an outside perspective, PyOP2 is a conventional Python library, with +performance critical library functions implemented in Cython_. A user's +application code makes calls to the PyOP2 API, most of which are conventional +library calls. The exception are :func:`~pyop2.par_loop` calls, which +encapsulate PyOP2's runtime core functionality performing backend-specific +code generation. Executing a parallel loop comprises the following steps: + +1. Compute a parallel execution plan, including information for efficient + staging of data and partitioning and colouring of the iteration set for + conflict-free parallel execution. This process is described in :doc:`plan` + and does not apply to the sequential backend. +2. Generate backend-specific code for executing the computation for a given + set of :func:`~pyop2.par_loop` arguments as detailed in :doc:`backends` + according to the execution plan computed in the previous step. +3. Pass the generated code to a backend-specific toolchain for just-in-time + compilation, producing a shared library callable as a Python module which + is dynamically loaded. This module is cached on disk to save recompilation + when the same :func:`~pyop2.par_loop` is called again for the same backend. +4. Build the backend-specific list of arguments to be passed to the generated + code, which may initiate host to device data transfer for the CUDA and + OpenCL backends. +5. Call into the generated module to perform the actual computation. For + distributed parallel computations this involves separate calls for the + regions owned by the current processor and the halo as described in + :doc:`mpi`. +6. Perform any necessary reductions for :class:`Globals `. +7. Call the backend-specific matrix assembly procedure on any + :class:`~pyop2.Mat` arguments. + +.. _backend-support: + +Multiple Backend Support +------------------------ + +The backend is selected by passing the keyword argument ``backend`` to the +:func:`~pyop2.init` function. If omitted, the ``sequential`` backend is +selected by default. This choice can be overridden by exporting the +environment variable ``PYOP2_BACKEND``, which allows switching backends +without having to touch the code. Once chosen, the backend cannot be changed +for the duration of the running Python interpreter session. + +PyOP2 provides a single API to the user, regardless of which backend the +computations are running on. All classes and functions that form the public +API defined in :mod:`pyop2.op2` are interfaces, whose concrete implementations +are initialised according to the chosen backend. A metaclass takes care of +instantiating a backend-specific version of the requested class and setting +the corresponding docstrings such that this process is entirely transparent to +the user. The implementation of the PyOP2 backends is completely orthogonal to +the backend selection process and free to use established practices of +object-oriented design. + +.. _Cython: http://cython.org diff --git a/docs/source/old_pyop2/sphinx/source/backends.rst b/docs/source/old_pyop2/sphinx/source/backends.rst new file mode 100644 index 0000000000..189e4cf60e --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/backends.rst @@ -0,0 +1,457 @@ +.. _backends: + +PyOP2 Backends +============== + +PyOP2 provides a number of different backends to be able to run parallel +computations on different hardware architectures. The currently supported +backends are + +* ``sequential``: runs sequentially on a single CPU core. +* ``openmp``: runs multiple threads on an SMP CPU using OpenMP. The number of + threads is set with the environment variable ``OMP_NUM_THREADS``. +* ``cuda``: offloads computation to a NVIDA GPU (requires :ref:`CUDA and pycuda + `) +* ``opencl``: offloads computation to an OpenCL device, either a multi-core + CPU or a GPU (requires :ref:`OpenCL and pyopencl `) + +Distributed parallel computations using MPI are supported by PyOP2 and +described in detail in :doc:`mpi`. Datastructures must be partitioned among +MPI processes with overlapping regions, so called halos. The host backends +``sequential`` and ``openmp`` have full MPI support, the device backends +``cuda`` and ``opencl`` only support parallel loops on :class:`Dats +`. Hybrid parallel computations with OpenMP are possible, where +``OMP_NUM_THREADS`` threads are launched per MPI rank. + +.. _host_backends: + +Host backends +------------- + +Any computation in PyOP2 requires the generation of code at runtime specific +to each individual :func:`~pyop2.par_loop`. The host backends generate code +which is just-in-time (JIT) compiled into a shared library callable +via :mod:`ctypes`. The compilation procedure also takes care of +caching the compiled library on disk, such that the compilation cost +is not paid every time. + +.. _sequential_backend: + +Sequential backend +~~~~~~~~~~~~~~~~~~ + +Since there is no parallel computation for the sequential backend, the +generated code is a C wrapper function with a ``for`` loop calling the kernel +for the respective :func:`~pyop2.par_loop`. This wrapper also takes care of +staging in and out the data as requested by the access descriptors requested +in the parallel loop. Both the kernel and the wrapper function are +just-in-time compiled in a single compilation unit such that the kernel call +can be inlined and does not incur any function call overhead. + +Recall the :func:`~pyop2.par_loop` calling the ``midpoint`` kernel from +:doc:`kernels`: :: + + op2.par_loop(midpoint, cells, + midpoints(op2.WRITE), + coordinates(op2.READ, cell2vertex)) + +.. highlight:: c + :linenothreshold: 5 + +The JIT compiled code for this loop is the kernel followed by the generated +wrapper code: :: + + inline void midpoint(double p[2], double *coords[2]) { + p[0] = (coords[0][0] + coords[1][0] + coords[2][0]) / 3.0; + p[1] = (coords[0][1] + coords[1][1] + coords[2][1]) / 3.0; + } + + void wrap_midpoint__(PyObject *_start, PyObject *_end, + PyObject *_arg0_0, + PyObject *_arg1_0, PyObject *_arg1_0_map0_0) { + int start = (int)PyInt_AsLong(_start); + int end = (int)PyInt_AsLong(_end); + double *arg0_0 = (double *)(((PyArrayObject *)_arg0_0)->data); + double *arg1_0 = (double *)(((PyArrayObject *)_arg1_0)->data); + int *arg1_0_map0_0 = (int *)(((PyArrayObject *)_arg1_0_map0_0)->data); + double *arg1_0_vec[3]; + for ( int n = start; n < end; n++ ) { + int i = n; + arg1_0_vec[0] = arg1_0 + arg1_0_map0_0[i * 3 + 0] * 2; + arg1_0_vec[1] = arg1_0 + arg1_0_map0_0[i * 3 + 1] * 2; + arg1_0_vec[2] = arg1_0 + arg1_0_map0_0[i * 3 + 2] * 2; + midpoint(arg0_0 + i * 2, arg1_0_vec); + } + } + +Note that the wrapper function is called directly from Python and therefore +all arguments are plain Python objects, which first need to be unwrapped. The +arguments ``_start`` and ``_end`` define the iteration set indices to iterate +over. The remaining arguments are :class:`arrays ` +corresponding to a :class:`~pyop2.Dat` or :class:`~pyop2.Map` passed to the +:func:`~pyop2.par_loop`. Arguments are consecutively numbered to avoid name +clashes. + +The first :func:`~pyop2.par_loop` argument ``midpoints`` is direct and +therefore no corresponding :class:`~pyop2.Map` is passed to the wrapper +function and the data pointer is passed straight to the kernel with an +appropriate offset. The second argument ``coordinates`` is indirect and hence +a :class:`~pyop2.Dat`-:class:`~pyop2.Map` pair is passed. Pointers to the data +are gathered via the :class:`~pyop2.Map` of arity 3 and staged in the array +``arg1_0_vec``, which is passed to the kernel. The coordinate data can +therefore be accessed in the kernel via double indirection with the +:class:`~pyop2.Map` already applied. Note that for both arguments, the +pointers are to two consecutive double values, since the +:class:`~pyop2.DataSet` is of dimension two in either case. + +.. _openmp_backend: + +OpenMP backend +~~~~~~~~~~~~~~ + +In contrast to the sequential backend, the outermost ``for`` loop in the +OpenMP backend is annotated with OpenMP pragmas to execute in parallel with +multiple threads. To avoid race conditions on data access, the iteration set +is coloured and a thread safe execution plan is computed as described in +:ref:`plan-colouring`. + +The JIT compiled code for the parallel loop from above changes as follows: :: + + void wrap_midpoint__(PyObject* _boffset, + PyObject* _nblocks, + PyObject* _blkmap, + PyObject* _offset, + PyObject* _nelems, + PyObject *_arg0_0, + PyObject *_arg1_0, PyObject *_arg1_0_map0_0) { + int boffset = (int)PyInt_AsLong(_boffset); + int nblocks = (int)PyInt_AsLong(_nblocks); + int* blkmap = (int *)(((PyArrayObject *)_blkmap)->data); + int* offset = (int *)(((PyArrayObject *)_offset)->data); + int* nelems = (int *)(((PyArrayObject *)_nelems)->data); + double *arg0_0 = (double *)(((PyArrayObject *)_arg0_0)->data); + double *arg1_0 = (double *)(((PyArrayObject *)_arg1_0)->data); + int *arg1_0_map0_0 = (int *)(((PyArrayObject *)_arg1_0_map0_0)->data); + double *arg1_0_vec[32][3]; + #ifdef _OPENMP + int nthread = omp_get_max_threads(); + #else + int nthread = 1; + #endif + #pragma omp parallel shared(boffset, nblocks, nelems, blkmap) + { + int tid = omp_get_thread_num(); + #pragma omp for schedule(static) + for (int __b = boffset; __b < boffset + nblocks; __b++) + { + int bid = blkmap[__b]; + int nelem = nelems[bid]; + int efirst = offset[bid]; + for (int n = efirst; n < efirst+ nelem; n++ ) + { + int i = n; + arg1_0_vec[tid][0] = arg1_0 + arg1_0_map0_0[i * 3 + 0] * 2; + arg1_0_vec[tid][1] = arg1_0 + arg1_0_map0_0[i * 3 + 1] * 2; + arg1_0_vec[tid][2] = arg1_0 + arg1_0_map0_0[i * 3 + 2] * 2; + midpoint(arg0_0 + i * 2, arg1_0_vec[tid]); + } + } + } + } + +Computation is split into ``nblocks`` blocks which start at an initial offset +``boffset`` and correspond to colours that can be executed conflict free in +parallel. This loop over colours is therefore wrapped in an OpenMP parallel +region and is annotated with an ``omp for`` pragma. The block id ``bid`` for +each of these blocks is given by the block map ``blkmap`` and is the index +into the arrays ``nelems`` and ``offset`` provided as part of the execution +plan. These are the number of elements that are part of the given block and +its starting index. Note that each thread needs its own staging array +``arg1_0_vec``, which is therefore scoped by the thread id. + +.. _device_backends: + +Device backends +--------------- + +As with the host backends, the device backends have most of the implementation +in common. The PyOP2 data carriers :class:`~pyop2.Dat`, :class:`~pyop2.Global` +and :class:`~pyop2.Const` have a data array in host memory and a separate +array in device memory. Flags indicate the present state of a given data +carrier: + +* ``DEVICE_UNALLOCATED``: no data is allocated on the device +* ``HOST_UNALLOCATED``: no data is allocated on the host +* ``DEVICE``: data is up-to-date (valid) on the device, but invalid on the + host +* ``HOST``: data is up-to-date (valid) on the host, but invalid on the device +* ``BOTH``: data is up-to-date (valid) on both the host and device + +When a :func:`~pyop2.par_loop` is called, PyOP2 uses the +:ref:`access-descriptors` to determine which data needs to be allocated or +transferred from host to device prior to launching the kernel. Data is only +transferred if it is out of date at the target location and all data transfer +is triggered lazily i.e. the actual copy only occurs once the data is +requested. In particular there is no automatic transfer back of data from +device to host unless it is accessed on the host. + +A newly created device :class:`~pyop2.Dat` has no associated device data and +starts out in the state ``DEVICE_UNALLOCATED``. The diagram below shows all +actions that involve a state transition, which can be divided into three +groups: calling explicit data transfer functions (red), access data on the +host (black) and using the :class:`~pyop2.Dat` in a :func:`~pyop2.par_loop` +(blue). There is no need for users to explicitly initiate data transfers and +the tranfer functions are only given for completeness. + +.. figure:: images/pyop2_device_data_state.svg + :align: center + + State transitions of a data carrier on PyOP2 device backends + +When a device :class:`~pyop2.Dat` is used in a :func:`~pyop2.par_loop` for the +first time, data is allocated on the device. If the :class:`~pyop2.Dat` is +only read, the host array is transferred to device if it was in state ``HOST`` +or ``DEVICE_UNALLOCATED`` before the :func:`~pyop2.par_loop` and the +:class:`~pyop2.Dat` is in the state ``BOTH`` afterwards, unless it was in +state ``DEVICE`` in which case it remains in that state. If the +:class:`~pyop2.Dat` is written to, data transfer before the +:func:`~pyop2.par_loop` is necessary unless the access descriptor is +:data:`~pyop2.WRITE` and the host data is out of date afterwards and the +:class:`~pyop2.Dat` is in the state ``DEVICE``. An overview of the state +transitions and necessary memory allocations and data transfers for the two +cases is given in the table below: + +====================== ============================== ================================================== +Initial state :func:`~pyop2.par_loop` read :func:`~pyop2.par_loop` written to +====================== ============================== ================================================== +``DEVICE_UNALLOCATED`` ``BOTH`` (alloc, transfer h2d) ``DEVICE`` (alloc, transfer h2d unless write-only) +``DEVICE`` ``DEVICE`` ``DEVICE`` +``HOST`` ``BOTH`` (transfer h2d) ``DEVICE`` (transfer h2d unless write-only) +``BOTH`` ``BOTH`` ``DEVICE`` +====================== ============================== ================================================== + +Accessing data on the host initiates a device to host data transfer if the +:class:`~pyop2.Dat` is in state ``DEVICE`` and leaves it in state ``HOST`` +when using the :meth:`~pyop2.Dat.data` property and ``BOTH`` when using +:meth:`~pyop2.Dat.data_ro`. + +The state transitions described above apply in the same way to a +:class:`~pyop2.Global`. A :class:`~pyop2.Const` is read-only, never modified +on device and therefore never out of date on the host. Hence there is no +state ``DEVICE`` and it is not necessary to copy back :class:`~pyop2.Const` +data from device to host. + +.. _cuda_backend: + +CUDA backend +~~~~~~~~~~~~ + +The CUDA backend makes extensive use of PyCUDA_ and its infrastructure for +just-in-time compilation of CUDA kernels and interfacing them to Python. +Linear solvers and sparse matrix data structures are implemented on top of the +`CUSP library`_ and are described in greater detail in :doc:`linear_algebra`. +Code generation uses a template based approach, where a ``__global__`` stub +routine to be called from the host is generated, which takes care of data +marshalling and calling the user kernel as an inline ``__device__`` function. + +We consider the same ``midpoint`` kernel as in the previous examples, which +requires no CUDA-specific modifications and is automatically annotated with a +``__device__`` qualifier. PyCUDA_ automatically generates a host stub for the +generated kernel stub ``__midpoint_stub`` given a list of parameter types. It +takes care of translating Python objects to plain C data types and pointers, +such that a CUDA kernel can be launched straight from Python. The entire CUDA +code PyOP2 generates is as follows: :: + + __device__ void midpoint(double p[2], double *coords[2]) + { + p[0] = ((coords[0][0] + coords[1][0]) + coords[2][0]) / 3.0; + p[1] = ((coords[0][1] + coords[1][1]) + coords[2][1]) / 3.0; + } + + __global__ void __midpoint_stub(int set_size, int set_offset, + double *arg0, + double *ind_arg1, + int *ind_map, + short *loc_map, + int *ind_sizes, + int *ind_offs, + int block_offset, + int *blkmap, + int *offset, + int *nelems, + int *nthrcol, + int *thrcol, + int nblocks) { + extern __shared__ char shared[]; + __shared__ int *ind_arg1_map; + __shared__ int ind_arg1_size; + __shared__ double * ind_arg1_shared; + __shared__ int nelem, offset_b, offset_b_abs; + + double *ind_arg1_vec[3]; + + if (blockIdx.x + blockIdx.y * gridDim.x >= nblocks) return; + if (threadIdx.x == 0) { + int blockId = blkmap[blockIdx.x + blockIdx.y * gridDim.x + block_offset]; + nelem = nelems[blockId]; + offset_b_abs = offset[blockId]; + offset_b = offset_b_abs - set_offset; + + ind_arg1_size = ind_sizes[0 + blockId * 1]; + ind_arg1_map = &ind_map[0 * set_size] + ind_offs[0 + blockId * 1]; + + int nbytes = 0; + ind_arg1_shared = (double *) &shared[nbytes]; + } + + __syncthreads(); + + // Copy into shared memory + for ( int idx = threadIdx.x; idx < ind_arg1_size * 2; idx += blockDim.x ) { + ind_arg1_shared[idx] = ind_arg1[idx % 2 + ind_arg1_map[idx / 2] * 2]; + } + + __syncthreads(); + + // process set elements + for ( int idx = threadIdx.x; idx < nelem; idx += blockDim.x ) { + ind_arg1_vec[0] = ind_arg1_shared + loc_map[0*set_size + idx + offset_b]*2; + ind_arg1_vec[1] = ind_arg1_shared + loc_map[1*set_size + idx + offset_b]*2; + ind_arg1_vec[2] = ind_arg1_shared + loc_map[2*set_size + idx + offset_b]*2; + + midpoint(arg0 + 2 * (idx + offset_b_abs), ind_arg1_vec); + } + } + +The CUDA kernel ``__midpoint_stub`` is launched on the GPU for a specific +number of threads in parallel. Each thread is identified inside the kernel by +its thread id ``threadIdx`` within a block of threads identified by a two +dimensional block id ``blockIdx`` within a grid of blocks. + +As for OpenMP, there is the potential for data races, which are prevented by +colouring the iteration set and computing a parallel execution plan, where all +elements of the same colour can be modified simultaneously. Each colour is +computed by a block of threads in parallel. All threads of a thread block have +access to a shared memory, which is used as a shared staging area initialised +by thread 0 of each block, see lines 30-41 above. A call to +``__syncthreads()`` ensures these initial values are visible to all threads of +the block. After this barrier, all threads cooperatively gather data from the +indirectly accessed :class:`~pyop2.Dat` via the :class:`~pyop2.Map`, followed +by another synchronisation. Following that, each thread loops over the +elements in the partition with an increment of the block size. In each +iteration a thread-private array of pointers to coordinate data in shared +memory is built which is then passed to the ``midpoint`` kernel. As for other +backends, the first, directly accessed, argument, is passed as a pointer to +global device memory with a suitable offset. + +.. _opencl_backend: + +OpenCL backend +~~~~~~~~~~~~~~ + +The other device backend OpenCL is structurally very similar to the CUDA +backend. It uses PyOpenCL_ to interface to the OpenCL drivers and runtime. +Linear algebra operations are handled by PETSc_ as described in +:doc:`linear_algebra`. PyOP2 generates a kernel stub from a template similar +to the CUDA case. + +Consider the ``midpoint`` kernel from previous examples, whose parameters in +the kernel signature are automatically annotated with OpenCL storage +qualifiers. PyOpenCL_ provides Python wrappers for OpenCL runtime functions to +build a kernel from a code string, set its arguments and enqueue the kernel +for execution. It takes care of the necessary conversion from Python objects +to plain C data types. PyOP2 generates the following code for the ``midpoint`` +example: :: + + #define ROUND_UP(bytes) (((bytes) + 15) & ~15) + + void midpoint(__global double p[2], __local double *coords[2]); + void midpoint(__global double p[2], __local double *coords[2]) + { + p[0] = ((coords[0][0] + coords[1][0]) + coords[2][0]) / 3.0; + p[1] = ((coords[0][1] + coords[1][1]) + coords[2][1]) / 3.0; + } + + __kernel __attribute__((reqd_work_group_size(668, 1, 1))) + void __midpoint_stub( + __global double* arg0, + __global double* ind_arg1, + int set_size, + int set_offset, + __global int* p_ind_map, + __global short *p_loc_map, + __global int* p_ind_sizes, + __global int* p_ind_offsets, + __global int* p_blk_map, + __global int* p_offset, + __global int* p_nelems, + __global int* p_nthrcol, + __global int* p_thrcol, + __private int block_offset) { + __local char shared [64] __attribute__((aligned(sizeof(long)))); + __local int offset_b; + __local int offset_b_abs; + __local int active_threads_count; + + int nbytes; + int block_id; + + int i_1; + // shared indirection mappings + __global int* __local ind_arg1_map; + __local int ind_arg1_size; + __local double* __local ind_arg1_shared; + __local double* ind_arg1_vec[3]; + + if (get_local_id(0) == 0) { + block_id = p_blk_map[get_group_id(0) + block_offset]; + active_threads_count = p_nelems[block_id]; + offset_b_abs = p_offset[block_id]; + offset_b = offset_b_abs - set_offset;ind_arg1_size = p_ind_sizes[0 + block_id * 1]; + ind_arg1_map = &p_ind_map[0 * set_size] + p_ind_offsets[0 + block_id * 1]; + + nbytes = 0; + ind_arg1_shared = (__local double*) (&shared[nbytes]); + nbytes += ROUND_UP(ind_arg1_size * 2 * sizeof(double)); + } + barrier(CLK_LOCAL_MEM_FENCE); + + // staging in of indirect dats + for (i_1 = get_local_id(0); i_1 < ind_arg1_size * 2; i_1 += get_local_size(0)) { + ind_arg1_shared[i_1] = ind_arg1[i_1 % 2 + ind_arg1_map[i_1 / 2] * 2]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + for (i_1 = get_local_id(0); i_1 < active_threads_count; i_1 += get_local_size(0)) { + ind_arg1_vec[0] = ind_arg1_shared + p_loc_map[i_1 + 0*set_size + offset_b] * 2; + ind_arg1_vec[1] = ind_arg1_shared + p_loc_map[i_1 + 1*set_size + offset_b] * 2; + ind_arg1_vec[2] = ind_arg1_shared + p_loc_map[i_1 + 2*set_size + offset_b] * 2; + + midpoint((__global double* __private)(arg0 + (i_1 + offset_b_abs) * 2), ind_arg1_vec); + } + } + +Parallel computations in OpenCL are executed by *work items* organised into +*work groups*. OpenCL requires the annotation of all pointer arguments with +the memory region they point to: ``__global`` memory is visible to any work +item, ``__local`` memory to any work item within the same work group and +``__private`` memory is private to a work item. PyOP2 does this annotation +automatically for the user kernel if the OpenCL backend is used. Local memory +therefore corresponds to CUDA's shared memory and private memory is called +local memory in CUDA. The work item id within the work group is accessed via +the OpenCL runtime call ``get_local_id(0)``, the work group id via +``get_group_id(0)``. A barrier synchronisation across all work items of a work +group is enforced with a call to ``barrier(CLK_LOCAL_MEM_FENCE)``. Bearing +these differences in mind, the OpenCL kernel stub is structurally almost +identical to the corresponding CUDA version above. + +The required local memory size per work group ``reqd_work_group_size`` is +computed as part of the execution plan. In CUDA this value is a launch +parameter to the kernel, whereas in OpenCL it needs to be hard coded as a +kernel attribute. + +.. _FEniCS project: http://fenicsproject.org +.. _PyCUDA: http://mathema.tician.de/software/pycuda/ +.. _CUSP library: http://cusplibrary.github.io +.. _PyOpenCL: http://mathema.tician.de/software/pyopencl/ +.. _PETSc: http://www.mcs.anl.gov/petsc/petsc-as/ diff --git a/docs/source/old_pyop2/sphinx/source/caching.rst b/docs/source/old_pyop2/sphinx/source/caching.rst new file mode 100644 index 0000000000..6e894ecbb2 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/caching.rst @@ -0,0 +1,112 @@ +.. _caching: + +Caching in PyOP2 +================ + +PyOP2 makes heavy use of caches to ensure performance is not adversely +affected by too many runtime computations. The caching in PyOP2 takes +a number of forms: + +1. Disk-based caching of generated code + + Since compiling a generated code module may be an expensive + operation, PyOP2 caches the generated code on disk such that + subsequent runs of the same simulation will not have to pay a + compilation cost. + +2. In memory caching of generated code function pointers + + Once code has been generated and loaded into the running PyOP2 + process, we cache the resulting callable function pointer for the + lifetime of the process, such that subsequent calls to the same + generated code are fast. + +3. In memory caching of expensive to build objects + + Some PyOP2 objects, in particular :class:`~pyop2.Sparsity` objects, + can be expensive to construct. Since a sparsity does not change if + it is built again with the same arguments, we only construct the + sparsity once for each unique set of arguments. + +The caching strategies for PyOP2 follow from two axioms: + +1. For PyOP2 :class:`~pyop2.Set`\s and :class:`~pyop2.Map`\s, equality + is identity +2. Caches of generated code should depend on metadata, but not data + +The first axiom implies that two :class:`~pyop2.Set`\s or +:class:`~pyop2.Map`\s compare equal if and only if they are the same +object. The second implies that generated code must be *independent* +of the absolute size of the data the :func:`~pyop2.par_loop` that +generated it executed over. For example, the size of the iteration +set should not be part of the key, but the arity of any maps and size +and type of every data item should be. + +On consequence of these rules is that there are effectively two +separate types of cache in PyOP2, object and class caches, +distinguished by where the cache itself lives. + +Class caches +------------ + +These are used to cache objects that depend on metadata, but not +object instances, such are generated code. They are implemented by +the cacheable class inheriting from :class:`~.Cached`. + +.. note:: + + There is currently no eviction strategy for class caches, should + they grow too large, for example by executing many different parallel + loops, an out of memory error can occur + +Object caches +------------- + +These are used to cache objects that are built on top of +:class:`~pyop2.Set`\s and :class:`~pyop2.Map`\s. They are implemented by the +cacheable class inheriting from :class:`~.ObjectCached` and the +caching instance defining a ``_cache`` attribute. + +The motivation for these caches is that cache key for objects such as +sparsities relies on an identical sparsity being built if the +arguments are identical. So that users of the API do not have to +worry too much about carrying around "temporary" objects forever such +that they will hit caches, PyOP2 builds up a hierarchy of caches of +transient objects on top of the immutable sets and maps. + +So, for example, the user can build and throw away +:class:`~pyop2.DataSet`\s as normal in their code. Internally, however, +these instances are cached on the set they are built on top of. Thus, +in the following snippet, we have that ``ds`` and ``ds2`` are the same +object: + +.. code-block:: python + + s = op2.Set(1) + ds = op2.DataSet(s, 10) + ds2 = op2.DataSet(s, 10) + assert ds is ds2 + +The setup of these caches is such that the lifetime of objects in the +cache is tied to the lifetime of both the caching and the cached +object. In the above example, as long as the user program holds a +reference to one of ``s``, ``ds`` or ``ds2`` all three objects will +remain live. As soon as all references are lost, all three become +candidates for garbage collection. + +.. note:: + + The cache eviction strategy for these caches relies on the Python + garbage collector, and hence on the user not holding onto + references to some of either the cached or the caching objects for + too long. Should the objects on which the caches live persist, an + out of memory error may occur. + +Debugging cache leaks +--------------------- + +To debug potential problems with the cache, PyOP2 can be instructed to +print the size of both object and class caches at program exit. This +can be done by setting the environment variable +``PYOP2_PRINT_CACHE_SIZE`` to 1 before running a PyOP2 program, or +passing the ``print_cache_size`` to :func:`~pyop2.init`. diff --git a/docs/source/old_pyop2/sphinx/source/concepts.rst b/docs/source/old_pyop2/sphinx/source/concepts.rst new file mode 100644 index 0000000000..f62ae0885b --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/concepts.rst @@ -0,0 +1,268 @@ +.. _concepts: + +PyOP2 Concepts +============== + +Many numerical algorithms and scientific computations on unstructured meshes +can be viewed as the *independent application* of a *local operation* +everywhere on a mesh. This local operation is often called a computational +*kernel* and its independent application lends itself naturally to parallel +computation. An unstructured mesh can be described by *sets of entities* +(vertices, edges, cells) and the connectivity between those sets forming the +topology of the mesh. + +PyOP2 is a domain-specific language (DSL) for the parallel executions of +computational kernels on unstructured meshes or graphs. + +.. _sets: + +Sets and mappings +----------------- + +A mesh is defined by :class:`sets ` of entities and +:class:`mappings ` between these sets. Sets are used to represent +entities in the mesh (nodes in the graph) or degrees of freedom of data +(fields) living "on" the mesh (graph), while maps define the connectivity +between entities (links in the graph) or degrees of freedom, for example +associating an edge with its incident vertices. Sets of mesh entities may +coincide with sets of degrees of freedom, but this is not necessarily the case +e.g. the set of degrees of freedom for a field may be defined on the vertices +of the mesh and the midpoints of edges connecting the vertices. + +.. note :: + There is a requirement for the map to be of *constant arity*, that is each + element in the source set must be associated with a constant number of + elements in the target set. There is no requirement for the map to be + injective or surjective. This restriction excludes certain kinds of mappings + e.g. a map from vertices to incident egdes or cells is only possible on a + very regular mesh where the multiplicity of any vertex is constant. + +In the following we declare a :class:`~pyop2.Set` ``vertices``, a +:class:`~pyop2.Set` ``edges`` and a :class:`~pyop2.Map` ``edges2vertices`` +between them, which associates the two incident vertices with each edge: :: + + vertices = op2.Set(4) + edges = op2.Set(3) + edges2vertices = op2.Map(edges, vertices, 2, [[0, 1], [1, 2], [2, 3]]) + +.. _data: + +Data +---- + +PyOP2 distinguishes three kinds of user provided data: data that lives on a +set (often referred to as a field) is represented by a :class:`~pyop2.Dat`, +data that has no association with a set by a :class:`~pyop2.Global` and data +that is visible globally and referred to by a unique identifier is declared as +:class:`~pyop2.Const`. Examples of the use of these data types are given in +the :ref:`par_loops` section below. + +.. _data_dat: + +Dat +~~~ + +Since a set does not have any type but only a cardinality, data declared on a +set through a :class:`~pyop2.Dat` needs additional metadata to allow PyOP2 to +interpret the data and to specify how much memory is required to store it. This +metadata is the *datatype* and the *shape* of the data associated with any +given set element. The shape is not associated with the :class:`~pyop2.Dat` +directly, but with a :class:`~pyop2.DataSet`. One can associate a scalar with +each element of the set or a one- or higher-dimensional vector. Similar to the +restriction on maps, the shape and therefore the size of the data associated +which each element needs to be uniform. PyOP2 supports all common primitive +data types supported by `NumPy`_. Custom datatypes are supported insofar as +the user implements the serialisation and deserialisation of that type into +primitive data that can be handled by PyOP2. + +Declaring coordinate data on the ``vertices`` defined above, where two float +coordinates are associated with each vertex, is done like this: :: + + dvertices = op2.DataSet(vertices, dim=2) + coordinates = op2.Dat(dvertices, + [[0.0, 0.0], [0.0, 1.0], [1.0, 1.0], [1.0, 0.0]], + dtype=float) + +.. _data_global: + +Global +~~~~~~ + +In contrast to a :class:`~pyop2.Dat`, a :class:`~pyop2.Global` has no +association to a set and the shape and type of the data are declared directly +on the :class:`~pyop2.Global`. A 2x2 elasticity tensor would be defined as +follows: :: + + elasticity = op2.Global((2, 2), [[1.0, 0.0], [0.0, 1.0]], dtype=float) + +.. _data_const: + +Const +~~~~~ + +Data that is globally visible and read-only to kernels is declared with a +:class:`~pyop2.Const` and needs to have a globally unique identifier. It does +not need to be declared as an argument to a :func:`~pyop2.par_loop`, but is +accessible in a kernel by name. A globally visible parameter ``eps`` would be +declared as follows: :: + + eps = op2.Const(1, 1e-14, name="eps", dtype=float) + +.. _data_mat: + +Mat +~~~ + +In a PyOP2 context, a (sparse) matrix is a linear operator from one set to +another. In other words, it is a linear function which takes a +:class:`~pyop2.Dat` on one set :math:`A` and returns the value of a +:class:`~pyop2.Dat` on another set :math:`B`. Of course, in particular, +:math:`A` may be the same set as :math:`B`. This makes the operation of at +least some matrices equivalent to the operation of a particular PyOP2 kernel. + +PyOP2 can be used to assemble :class:`matrices `, which are defined +on a :class:`sparsity pattern ` which is built from a pair of +:class:`DataSets ` defining the row and column spaces the +sparsity maps between and one or more pairs of maps, one for the row and one +for the column space of the matrix respectively. The sparsity uniquely defines +the non-zero structure of the sparse matrix and can be constructed purely from +those mappings. To declare a :class:`~pyop2.Mat` on a :class:`~pyop2.Sparsity` +only the data type needs to be given. + +Since the construction of large sparsity patterns is a very expensive +operation, the decoupling of :class:`~pyop2.Mat` and :class:`~pyop2.Sparsity` +allows the reuse of sparsity patterns for a number of matrices without +recomputation. In fact PyOP2 takes care of caching sparsity patterns on behalf +of the user, so declaring a sparsity on the same maps as a previously declared +sparsity yields the cached object instead of building another one. + +Defining a matrix of floats on a sparsity which spans from the space of +vertices to the space of vertices via the edges is done as follows: :: + + sparsity = op2.Sparsity((dvertices, dvertices), + [(edges2vertices, edges2vertices)]) + matrix = op2.Mat(sparsity, float) + +.. _par_loops: + +Parallel loops +-------------- + +Computations in PyOP2 are executed as :func:`parallel loops ` +of a :class:`~pyop2.Kernel` over an *iteration set*. Parallel loops are the +core construct of PyOP2 and hide most of its complexity such as parallel +scheduling, partitioning, colouring, data transfer from and to device and +staging of the data into on chip memory. Computations in a parallel loop must +be independent of the order in which they are executed over the set to allow +PyOP2 maximum flexibility to schedule the computation in the most efficient +way. Kernels are described in more detail in :doc:`kernels`. + +.. _loop-invocations: + +Loop invocations +~~~~~~~~~~~~~~~~ + +A parallel loop invocation requires as arguments, other than the iteration set +and the kernel to operate on, the data the kernel reads and/or writes. A +parallel loop argument is constructed by calling the underlying data object +(i.e. the :class:`~pyop2.Dat` or :class:`~pyop2.Global`) and passing an +*access descriptor* and the mapping to be used when accessing the data. The +mapping is required for an *indirectly accessed* :class:`~pyop2.Dat` not +declared on the same set as the iteration set of the parallel loop. In the +case of *directly accessed* data defined on the same set as the iteration set +the map is omitted and only an access descriptor given. + +Consider a parallel loop that translates the ``coordinate`` field by a +constant offset given by the :class:`~pyop2.Const` ``offset``. Note how the +kernel has access to the local variable ``offset`` even though it has not been +passed as an argument to the :func:`~pyop2.par_loop`. This loop is direct and +the argument ``coordinates`` is read and written: :: + + op2.Const(2, [1.0, 1.0], dtype=float, name="offset"); + + translate = op2.Kernel("""void translate(double * coords) { + coords[0] += offset[0]; + coords[1] += offset[1]; + }""", "translate") + + op2.par_loop(translate, vertices, coordinates(op2.RW)) + +.. _access-descriptors: + +Access descriptors +~~~~~~~~~~~~~~~~~~ + +Access descriptors define how the data is accessed by the kernel and give +PyOP2 crucial information as to how the data needs to be treated during +staging in before and staging out after kernel execution. They must be one of +:data:`pyop2.READ` (read-only), :data:`pyop2.WRITE` (write-only), +:data:`pyop2.RW` (read-write), :data:`pyop2.INC` (increment), +:data:`pyop2.MIN` (minimum reduction) or :data:`pyop2.MAX` (maximum +reduction). + +Not all of these descriptors apply to all PyOP2 data types. A +:class:`~pyop2.Dat` can have modes :data:`~pyop2.READ`, :data:`~pyop2.WRITE`, +:data:`~pyop2.RW` and :data:`~pyop2.INC`. For a :class:`~pyop2.Global` the +valid modes are :data:`~pyop2.READ`, :data:`~pyop2.INC`, :data:`~pyop2.MIN` and +:data:`~pyop2.MAX` and for a :class:`~pyop2.Mat` only :data:`~pyop2.WRITE` and +:data:`~pyop2.INC` are allowed. + +.. _matrix-loops: + +Loops assembling matrices +~~~~~~~~~~~~~~~~~~~~~~~~~ + +We declare a parallel loop assembling the ``matrix`` via a given ``kernel`` +which we'll assume has been defined before over the ``edges`` and with +``coordinates`` as input data. The ``matrix`` is the output argument of this +parallel loop and therefore has the access descriptor :data:`~pyop2.INC` since +the assembly accumulates contributions from different vertices via the +``edges2vertices`` mapping. Note that the mappings are being indexed with the +:class:`iteration indices ` ``op2.i[0]`` and +``op2.i[1]`` respectively. This means that PyOP2 generates a :ref:`local +iteration space ` of size ``arity * arity`` with the +``arity`` of the :class:`~pyop2.Map` ``edges2vertices`` for any given element +of the iteration set. This local iteration space is then iterated over using +the iteration indices on the maps. The kernel is assumed to only apply to a +single point in that local iteration space. The ``coordinates`` are accessed +via the same mapping, but are a read-only input argument to the kernel and +therefore use the access descriptor :data:`~pyop2.READ`: :: + + op2.par_loop(kernel, edges, + matrix(op2.INC, (edges2vertices[op2.i[0]], + edges2vertices[op2.i[1]])), + coordinates(op2.READ, edges2vertices)) + +You can stack up multiple successive parallel loops that add values to +a matrix, before you use the resulting values, you must explicitly +tell PyOP2 that you want to do so, by calling +:meth:`~pyop2.Mat.assemble` on the matrix. Note that executing a +:func:`~pyop2.solve` will do this automatically for you. + +.. _reduction-loops: + +Loops with global reductions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:class:`Globals ` are used primarily for reductions where a +given quantity on a field is reduced to a single number by summation or +finding the minimum or maximum. Consider a kernel computing the `L2 norm`_ of +the ``pressure`` field defined on the set of ``vertices`` as ``l2norm``. Note +that the :class:`~pyop2.Dat` constructor automatically creates an anonymous +:class:`~pyop2.DataSet` of dimension 1 if a :class:`~pyop2.Set` is passed as +the first argument. We assume ``pressure`` is the result of some prior +computation and only give the declaration for context. :: + + pressure = op2.Dat(vertices, [...], dtype=float) + l2norm = op2.Global(dim=1, data=[0.0]) + + norm = op2.Kernel("""void norm(double * out, double * field) { + *out += field[0] * field[0]; + }""", "norm") + + op2.par_loop(pressure, vertices, + l2norm(op2.INC), + vertices(op2.READ)) + +.. _NumPy: http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html +.. _L2 norm: https://en.wikipedia.org/wiki/L2_norm#Euclidean_norm diff --git a/docs/source/old_pyop2/sphinx/source/conf.py b/docs/source/old_pyop2/sphinx/source/conf.py new file mode 100644 index 0000000000..5addfee35c --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/conf.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +# +# PyOP2 documentation build configuration file, created by +# sphinx-quickstart on Tue Aug 14 10:10:00 2012. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../../..')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.imgmath'] +autodoc_default_flags = ['members', 'undoc-members'] +# Both the class’ and the __init__ method’s docstring are concatenated and +# inserted into the class definition +autoclass_content = 'both' + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'PyOP2' +copyright = u'2012-2013, Imperial College et al' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '2020.0' +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +autodoc_member_order = "bysource" + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'PyOP2doc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + #'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'PyOP2.tex', u'PyOP2 Documentation', + u'Imperial College et al', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'pyop2', u'PyOP2 Documentation', + [u'Imperial College et al'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'PyOP2', u'PyOP2 Documentation', + u'Imperial College et al', 'PyOP2', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' diff --git a/docs/source/old_pyop2/sphinx/source/images/assembly.svg b/docs/source/old_pyop2/sphinx/source/images/assembly.svg new file mode 100644 index 0000000000..5c87b8d89c --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/assembly.svg @@ -0,0 +1,3364 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xmldiff --git a/docs/source/old_pyop2/sphinx/source/images/csr.svg b/docs/source/old_pyop2/sphinx/source/images/csr.svg new file mode 100644 index 0000000000..b9e736a71c --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/csr.svg @@ -0,0 +1,1770 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 10 + 3 + 3 + 8 + 9 + 7 + 8 + 7 + 0 + -2 + 8 + 7 + 5 + 9 + 13 + Sparse Matrix + + + + + + + + + + + + + + + + + + + 10 + -2 + 3 + 9 + 7 + 8 + 7 + 3 + 8 + 7 + 5 + 8 + 9 + 13 + Values array + + + + + + + + + + + + + + + + + + + 0 + 4 + 0 + 1 + 1 + 2 + 3 + 0 + 2 + 3 + 4 + 1 + 3 + 4 + Column indices array + + + + + + + + + + + 0 + 2 + 4 + 7 + 11 + 14 + Row pointer array + + diff --git a/docs/source/old_pyop2/sphinx/source/images/direct_arg.svg b/docs/source/old_pyop2/sphinx/source/images/direct_arg.svg new file mode 100644 index 0000000000..7817f32281 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/direct_arg.svg @@ -0,0 +1,330 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + (dim 2) + + + argument Dat + iteration Set + i + i+1 + 2i + 2i+1 + + diff --git a/docs/source/old_pyop2/sphinx/source/images/indirect_arg.svg b/docs/source/old_pyop2/sphinx/source/images/indirect_arg.svg new file mode 100644 index 0000000000..ff737c2e90 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/indirect_arg.svg @@ -0,0 +1,833 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + argument Dat + iteration Set + i + 3i + 3i+1 + 3i+2 + 2m[i,0] + 2m[i,1] + 2m[i,2] + argument Map + (arity 3) + (dim 2) + kernel Arg + + + + + + + + + + + diff --git a/docs/source/old_pyop2/sphinx/source/images/indirect_arg_flattened.svg b/docs/source/old_pyop2/sphinx/source/images/indirect_arg_flattened.svg new file mode 100644 index 0000000000..2da6cbe8fd --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/indirect_arg_flattened.svg @@ -0,0 +1,832 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + argument Dat + iteration Set + i + 3i + 3i+1 + 3i+2 + 2m[i,0] + 2m[i,1] + 2m[i,2] + argument Map + (arity 3) + (dim 2) + kernel Arg + + + + + + + + + + + + + + + + (flattened) + + diff --git a/docs/source/old_pyop2/sphinx/source/images/iteration_spaces.svg b/docs/source/old_pyop2/sphinx/source/images/iteration_spaces.svg new file mode 100644 index 0000000000..9029c95cda --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/iteration_spaces.svg @@ -0,0 +1,5040 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unified iteration space:144 kernel output values computed by single thread + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0,0 + 0,11 + Local iteration space: 144 kernel output values computedby 144 threads (0,0) ... (0,11) (1,0) ... (1,11) ... (11,0) ... (11,11) + 0,1 + 0,2 + 0,3 + 0,4 + 0,5 + 0,6 + 0,7 + 0,8 + 0,9 + 0,10 + 1,0 + 1,11 + 1,1 + 1,2 + 1,3 + 1,4 + 1,5 + 1,6 + 1,7 + 1,8 + 1,9 + 1,10 + 2,0 + 2,11 + 2,1 + 2,2 + 2,3 + 2,4 + 2,5 + 2,6 + 2,7 + 2,8 + 2,9 + 2,10 + 3,0 + 3,11 + 3,1 + 3,2 + 3,3 + 3,4 + 3,5 + 3,6 + 3,7 + 3,8 + 3,9 + 3,10 + 4,0 + 4,11 + 4,1 + 4,2 + 4,3 + 4,4 + 4,5 + 4,6 + 4,7 + 4,8 + 4,9 + 4,10 + 5,0 + 5,11 + 5,1 + 5,2 + 5,3 + 5,4 + 5,5 + 5,6 + 5,7 + 5,8 + 5,9 + 5,10 + 6,0 + 6,11 + 6,1 + 6,2 + 6,3 + 6,4 + 6,5 + 6,6 + 6,7 + 6,8 + 6,9 + 6,10 + 7,0 + 7,11 + 7,1 + 7,2 + 7,3 + 7,4 + 7,5 + 7,6 + 7,7 + 7,8 + 7,9 + 7,10 + 8,0 + 8,11 + 8,1 + 8,2 + 8,3 + 8,4 + 8,5 + 8,6 + 8,7 + 8,8 + 8,9 + 8,10 + 9,11 + 9,1 + 9,2 + 9,3 + 9,4 + 9,5 + 9,6 + 9,7 + 9,8 + 9,9 + 9,10 + 9,0 + 10,0 + 10,11 + 10,1 + 10,2 + 10,3 + 10,4 + 10,5 + 10,6 + 10,7 + 10,8 + 10,9 + 10,10 + 11,0 + 11,11 + 11,1 + 11,2 + 11,3 + 11,4 + 11,5 + 11,6 + 11,7 + 11,8 + 11,9 + 11,10 + + diff --git a/docs/source/old_pyop2/sphinx/source/images/mixed_assembly.svg b/docs/source/old_pyop2/sphinx/source/images/mixed_assembly.svg new file mode 100644 index 0000000000..94f08d5c08 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/mixed_assembly.svg @@ -0,0 +1,3703 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xmldiff --git a/docs/source/old_pyop2/sphinx/source/images/mixed_sparsity.svg b/docs/source/old_pyop2/sphinx/source/images/mixed_sparsity.svg new file mode 100644 index 0000000000..ae9d71e136 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/mixed_sparsity.svg @@ -0,0 +1,602 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + 0,0 + 0,1 + 1,0 + 1,1 + Mapr,0 + Mapc,1 + Mapr,0 + Mapc,0 + Mapr,1 + Mapc,0 + Mapr,1 + Mapc,1 + + + + + + + + + DataSetc,0 + DataSetc,1 + DataSetr,0 + DataSetr,1 + Setit,0 + Mapc,0 + Mapc,1 + Mapr,0 + Mapr,1 + + + + + + + + diff --git a/docs/source/old_pyop2/sphinx/source/images/mixed_sparsity2.svg b/docs/source/old_pyop2/sphinx/source/images/mixed_sparsity2.svg new file mode 100644 index 0000000000..381dc886ce --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/mixed_sparsity2.svg @@ -0,0 +1,360 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + Setit + DataSetc,0 + DataSetc,1 + DataSetr,0 + DataSetr,1 + Mapr,0 + Mapr,1 + Mapc,0 + Mapc,1 + + diff --git a/docs/source/old_pyop2/sphinx/source/images/mpi_matrix.svg b/docs/source/old_pyop2/sphinx/source/images/mpi_matrix.svg new file mode 100644 index 0000000000..a305ba41cd --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/mpi_matrix.svg @@ -0,0 +1,297 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + offdiagonal + offdiagonal + + + diagonal + diagonal + diagonal + off-diagonal + off-diagonal + + + 0 + 1 + 2 + + diff --git a/docs/source/old_pyop2/sphinx/source/images/pyop2_architecture.svg b/docs/source/old_pyop2/sphinx/source/images/pyop2_architecture.svg new file mode 100644 index 0000000000..eb33a5a03f --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/pyop2_architecture.svg @@ -0,0 +1,890 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + OpenCL + CUDA + + + + + + CPU compiler + PyOpenCL + PyCUDA + CPU OpenMP + CPU seq. + MPI + + + + PyOP2 Lib & Runtime Corecolouring, parallel scheduling + + + + Lin. algebraPETSc/Cusp + + + + + + + Kernels + Data + AccessDescriptors + Application code + + + + + + + + + + + + + + + + + + + + + Backends + Code generation + PyOP2 core + User code + + diff --git a/docs/source/old_pyop2/sphinx/source/images/pyop2_colouring.svg b/docs/source/old_pyop2/sphinx/source/images/pyop2_colouring.svg new file mode 100644 index 0000000000..0544909ac1 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/pyop2_colouring.svg @@ -0,0 +1,2370 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + edges + shared / stagingmemory + vertices + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/old_pyop2/sphinx/source/images/pyop2_device_data_state.svg b/docs/source/old_pyop2/sphinx/source/images/pyop2_device_data_state.svg new file mode 100644 index 0000000000..c85170146f --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/pyop2_device_data_state.svg @@ -0,0 +1,529 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + Deviceunallocated + + Device + + Both + + Host + + + + + + + + + + + + + + allocate_device() + par_loop(write) + par_loop(write) + par_loop(write) + par_loop (read) + to_device() + access data + accessdata_ro + from_device() + accessdata + par_loop(read) + + diff --git a/docs/source/old_pyop2/sphinx/source/images/pyop2_mpi_mesh.svg b/docs/source/old_pyop2/sphinx/source/images/pyop2_mpi_mesh.svg new file mode 100644 index 0000000000..51d2636f17 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/images/pyop2_mpi_mesh.svg @@ -0,0 +1,2267 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + processor 0 + processor 1 + core + owned + exec + non-exec + core + owned + exec + non-exec + + + halos + + diff --git a/docs/source/old_pyop2/sphinx/source/index.rst b/docs/source/old_pyop2/sphinx/source/index.rst new file mode 100644 index 0000000000..50e2f8930d --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/index.rst @@ -0,0 +1,44 @@ +.. PyOP2 documentation master file, created by + sphinx-quickstart on Tue Aug 14 10:10:00 2012. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to PyOP2's documentation! +================================= + +.. warning:: + The prose documentation contained here is significantly out-of-date and thus + contains many inaccuracies. It is, nevertheless, quite a useful resource for + people new to PyOP2. Please read with care. + + The API documentation, however, is updated regularly and can be considered + accurate. + +Contents: + +.. toctree:: + :maxdepth: 2 + + installation + concepts + kernels + ir + architecture + backends + linear_algebra + plan + mixed + mpi + caching + profiling + user + pyop2 + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/old_pyop2/sphinx/source/ir.rst b/docs/source/old_pyop2/sphinx/source/ir.rst new file mode 100644 index 0000000000..9d9ea13f9a --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/ir.rst @@ -0,0 +1,324 @@ +The PyOP2 Intermediate Representation +===================================== + +The :class:`parallel loop ` is the main construct of PyOP2. +It applies a specific :class:`~pyop2.Kernel` to all elements in the iteration +set of the parallel loop. Here, we describe how to use the PyOP2 API to build +a kernel and, also, we provide simple guidelines on how to write efficient +kernels. + +Using the Intermediate Representation +------------------------------------- + +In the :doc:`previous section `, we described the API for +PyOP2 kernels in terms of the C code that gets executed. +Passing in a string of C code is the simplest way of creating a +:class:`~pyop2.Kernel`. Another possibility is to use PyOP2 Intermediate +Representation (IR) objects to express the :class:`~pyop2.Kernel` semantics. + +An Abstract Syntax Tree of the kernel code can be manually built using IR +objects. Since PyOP2 has been primarily thought to be fed by higher layers +of abstractions, rather than by users, no C-to-AST parser is currently provided. +The advantage of providing an AST, instead of C code, is that it enables PyOP2 +to inspect and transform the kernel, which is aimed at achieving performance +portability among different architectures and, more generally, better execution +times. + +For the purposes of exposition, let us consider a simple +kernel ``init`` which initialises the members of a :class:`~pyop2.Dat` +to zero. + +.. code-block:: python + + from op2 import Kernel + + code = """void init(double* edge_weight) { + for (int i = 0; i < 3; i++) + edge_weight[i] = 0.0; + }""" + kernel = Kernel(code, "init") + +Here, we describe how we can use PyOP2 IR objects to build an AST for +the this kernel. For example, the most basic AST one can come up with +is + +.. code-block:: python + + from op2 import Kernel + from ir.ast_base import * + + ast = FlatBlock("""void init(double* edge_weight) { + for (int i = 0; i < 3; i++) + edge_weight[i] = 0.0; + }""") + kernel = Kernel(ast, "init") + +The :class:`~pyop2.ir.ast_base.FlatBlock` object encapsulates a "flat" block +of code, which is not modified by the IR engine. A +:class:`~pyop2.ir.ast_base.FlatBlock` is used to represent (possibly large) +fragments of code for which we are not interested in any kind of +transformation, so it may be particularly useful to speed up code development +when writing, for example, test cases or non-expensive kernels. On the other +hand, time-demanding kernels should be properly represented using a "real" +AST. For example, an useful AST for ``init`` could be the following + +.. code-block:: python + + from op2 import Kernel + from ir.ast_base import * + + ast_body = [FlatBlock("...some code can go here..."), + c_for("i", 3, Assign(Symbol("edge_weight", ("i",)), c_sym("0.0")))] + ast = FunDecl("void", "init", + [Decl("double*", c_sym("edge_weight"))], + ast_body) + kernel = Kernel(ast, "init") + +In this example, we first construct the body of the kernel function. We have +an initial :class:`~pyop2.ir.ast_base.FlatBlock` that contains, for instance, +some sort of initialization code. :func:`~pyop2.ir.ast_base.c_for` is a shortcut +for building a :class:`for loop `. It takes an +iteration variable (``i``), the extent of the loop and its body. Multiple +statements in the body can be passed in as a list. +:func:`~pyop2.ir.ast_base.c_sym` is a shortcut for building :class:`symbols +`. You may want to use +:func:`~pyop2.ir.ast_base.c_sym` when the symbol makes no explicit use of +iteration variables. + +We use :class:`~pyop2.ir.ast_base.Symbol` instead of +:func:`~pyop2.ir.ast_base.c_sym`, when ``edge_weight`` accesses a specific +element using the iteration variable ``i``. This is fundamental to allow the +IR engine to perform many kind of transformations involving the kernel's +iteration space(s). Finally, the signature of the function is constructed +using the :class:`~pyop2.ir.ast_base.FunDecl`. + +Other examples on how to build ASTs can be found in the tests folder, +particularly looking into ``test_matrices.py`` and +``test_iteration_space_dats.py``. + + +Achieving Performance Portability with the IR +--------------------------------------------- + +One of the key objectives of PyOP2 is obtaining performance portability. +This means that exactly the same program can be executed on a range of +different platforms, and that the PyOP2 engine will strive to get the best +performance out of the chosen platform. PyOP2 allows users to write kernels +by completely abstracting from the underlying machine. This is mainly +achieved in two steps: + +* Given the AST of a kernel, PyOP2 applies a first transformation aimed at + mapping the parallelism inherent to the kernel to that available in the + backend. +* Then, PyOP2 applies optimizations to the sequential code, depending on the + underlying backend. + +To maximize the outcome of the transformation process, it is important that +kernels are written as simply as possible. That is, premature optimization, +possibly for a specific backend, might harm performance. + +A minimal language, the so-called PyOP2 Kernel Domain-Specific Language, is +used to trigger specific transformations. If we had had a parser from C +code to AST, we would have embedded this DSL in C by means of ``pragmas``. +As we directly build an AST, we achieve the same goal by decorating AST nodes +with specific attributes, added at node creation-time. An overview of the +language follows + +* ``pragma pyop2 itspace``. This is added to :class:`~pyop2.ir.ast_base.For` + nodes (i.e. written on top of for loops). It tells PyOP2 that the following + is a fully-parallel loop, that is all of its iterations can be executed in + parallel without any sort of synchronization. +* ``pragma pyop2 assembly(itvar1, itvar2)``. This is added to a statement node, + to denote that we are performing a local assembly operation along to the + ``itvar1`` and ``itvar2`` dimensions. +* ``pragma pyop2 simd``. This is added on top of the kernel signature. It is + used to suggest PyOP2 to apply SIMD vectorization along the ParLoop's + iteration set dimension. This kind of vectorization is also known as + *inter-kernel vectorization*. This feature is currently not supported + by PyOP2, and will be added only in a future release. + +The ``itspace`` pragma tells PyOP2 how to extract parallelism from the kernel. +Consider again our usual example. To expose a parallel iteration space, one +one must write + +.. code-block:: python + + from op2 import Kernel + + code = """void init(double* edge_weight) { + #pragma pyop2 itspace + for (int i = 0; i < 3; i++) + edge_weight[i] = 0.0; + }""" + kernel = Kernel(code, "init") + +The :func:`~pyop2.ir.ast_base.c_for` shortcut when creating an AST expresses +the same semantics of a for loop decorated with a ``pragma pyop2 itspace``. + +Now, imagine we are executing the ``init`` kernel on a CPU architecture. +Typically we want a single core to execute the entire kernel, because it is +very likely that the kernel's iteration space is small and its working set +fits the L1 cache, and no benefit would be gained by splitting the computation +between distinct cores. On the other end, if the backend is a GPU or an +accelerator, a different execution model might give better performance. +There's a huge amount of parallelism available, for example, in a GPU, so +delegating the execution of an individual iteration (or a chunk of iterations) +to a single thread could pay off. If that is the case, the PyOP2 IR engine +re-structures the kernel code to exploit such parallelism. + +Optimizing kernels on CPUs +-------------------------- + +So far, some effort has been spent on optimizations for CPU platforms. Being a +DSL, PyOP2 provides specific support for those (linear algebra) operations that +are common among unstructured-mesh-based numerical methods. For example, PyOP2 +is capable of aggressively optimizing local assembly codes for applications +based on the Finite Element Method. We therefore distinguish optimizations in +two categories: + +* Generic optimizations, such as data alignment and support for autovectorization. +* Domain-specific optimizations (DSO) + +To trigger DSOs, statements must be decorated using the kernel DSL. For example, +if the kernel computes the local assembly of an element in an unstructured mesh, +then a ``pragma pyop2 assembly(itvar1, itvar2)`` should be added on top of the +corresponding statement. When constructing the AST of a kernel, this can be +simply achieved by + +.. code-block:: python + + from ir.ast_base import * + + s1 = Symbol("X", ("i",)) + s2 = Symbol("Y", ("j",)) + tensor = Symbol("A", ("i", "j")) + pragma = "#pragma pyop2 outerproduct(j,k)" + code = c_for("i", 3, c_for("j", 3, Incr(tensor, Prod(s1, s2), pragma))) + +That, conceptually, corresponds to + +.. code-block:: c + + #pragma pyop2 itspace + for (int i = 0; i < 3; i++) + #pragma pyop2 itspace + for (int j = 0; j < 3; j++) + #pragma pyop2 assembly(i, j) + A[i][j] += X[i]*Y[j] + +Visiting the AST, PyOP2 finds a 2-dimensional iteration space and an assembly +statement. Currently, ``#pragma pyop2 itspace`` is ignored when the backend is +a CPU. The ``#pragma pyop2 assembly(i, j)`` can trigger multiple DSOs. +PyOP2 currently lacks an autotuning system that automatically finds out the +best possible kernel implementation; that is, the optimizations that minimize +the kernel run-time. To drive the optimization process, the user (or the +higher layer) can specify which optimizations should be applied. Currently, +PyOP2 can automate: + +* Alignment and padding of data structures: for issuing aligned loads and stores. +* Loop trip count adjustment according to padding: useful for autovectorization + when the trip count is not a multiple of the vector length +* Loop-invariant code motion and autovectorization of invariant code: this is + particularly useful since trip counts are typically small, and hoisted code + can still represent a significant proportion of the execution time +* Register tiling for rectangular iteration spaces +* (DSO for pragma assembly): Outer-product vectorization + unroll-and-jam of + outer loops to improve register re-use or to mitigate register pressure + +How to select specific kernel optimizations +------------------------------------------- + +When constructing a :class:`~pyop2.Kernel`, it is possible to specify the set +of optimizations we want PyOP2 to apply. The IR engine will analyse the kernel +AST and will try to apply, incrementally, such optimizations. The PyOP2's FFC +interface, which build a :class:`~pyop2.Kernel` object given an AST provided +by FFC, makes already use of the available optimizations. Here, we take the +emblematic case of the FFC interface and describe how to play with the various +optimizations through a series of examples. + +.. code-block:: python + + ast = ... + opts = {'licm': False, + 'tile': None, + 'ap': False, + 'vect': None} + kernel = Kernel(ast, 'my_kernel', opts) + +In this example, we have an AST ``ast`` and we specify optimizations through +the dictionary ``opts``; then, we build the :class:`~pyop2.Kernel`, passing in +the optional argument ``opts``. No optimizations are enabled here. The +possible options are: + +* ``licm``: Loop-Invariant Code Motion. +* ``tile``: Register Tiling (of rectangular iteration spaces) +* ``ap``: Data alignment, padding. Trip count adjustment. +* ``vect``: SIMD intra-kernel vectorization. + +If we wanted to apply both loop-invariant code motion and data alignment, we +would simply write + +.. code-block:: python + + ast = ... + opts = {'licm': True, + 'ap': True} + kernel = Kernel(ast, 'my_kernel', opts) + +Now, let's assume we know the kernel has a rectangular iteration space. We want +to try register tiling, with a particular tile size. The way to get it is + +.. code-block:: python + + ast = ... + opts = {'tile': (True, 8)} + kernel = Kernel(ast, 'my_kernel', opts) + +In this case, the iteration space is sliced into tiles of size 8x8. If the +iteration space is smaller than the slice, then the transformation is not +applied. By specifying ``-1`` instead of ``8``, we leave PyOP2 free to choose +automatically a certain tile size. + +A fundamental optimization for any PyOP2 kernel is SIMD vectorization. This is +because almost always kernels fit the L1 cache and are likely to be compute- +bound. Backend compilers' AutoVectorization (AV) is therefore an opportunity. +By enforcing data alignment and padding, we can increase the chance AV is +successful. To try AV, one should write + +.. code-block:: python + + import ir.ast_plan as ap + + ast = ... + opts = {'ap': True, + 'vect': (ap.AUTOVECT, -1)} + kernel = Kernel(ast, 'my_kernel', opts) + +The ``vect``'s second parameter (-1) is ignored when AV is requested. +If our kernel is computing an assembly-like operation, then we can ask PyOP2 +to optimize for register locality and register pressure, by resorting to a +different vectorization technique. Early experiments show that this approach +can be particularly useful when the amount of data movement in the assembly +loops is "significant". Of course, this depends on kernel parameters (e.g. +size of assembly loop, number and size of arrays involved in the assembly) as +well as on architecture parameters (e.g. size of L1 cache, number of available +registers). This strategy takes the name of *Outer-Product Vectorization* +(OP), and can be activated in the following way (again, we suggest to use it +along with data alignment and padding). + +.. code-block:: python + + import ir.ast_plan as ap + + ast = ... + opts = {'ap': True, + 'vect': (ap.V_OP_UAJ, 1)} + kernel = Kernel(ast, 'my_kernel', opts) + +``UAJ`` in ``V_OP_UAJ`` stands for ``Unroll-and-Jam``. It has been proved that +OP shows a much better performance when used in combination with unrolling the +outer assembly loop and incorporating (*jamming*) the unrolled iterations +within the inner loop. The second parameter, therefore, specifies the unroll- +and-jam factor: the higher it is, the larger is the number of iterations +unrolled. A factor 1 means that no unroll-and-jam is performed. The optimal +factor highly depends on the computational characteristics of the kernel. diff --git a/docs/source/old_pyop2/sphinx/source/kernels.rst b/docs/source/old_pyop2/sphinx/source/kernels.rst new file mode 100644 index 0000000000..23dcc73076 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/kernels.rst @@ -0,0 +1,234 @@ +.. _kernels: + +PyOP2 Kernels +============= + +Kernels in PyOP2 define the local operations that are to be performed for each +element of the iteration set the kernel is executed over. There must be a one +to one match between the arguments declared in the kernel signature and the +actual arguments passed to the parallel loop executing this kernel. As +described in :doc:`concepts`, data is accessed directly on the iteration set +or via mappings passed in the :func:`~pyop2.par_loop` call. + +The kernel only sees data corresponding to the current element of the +iteration set it is invoked for. Any data read by the kernel i.e. accessed as +:data:`~pyop2.READ`, :data:`~pyop2.RW` or :data:`~pyop2.INC` is automatically +gathered via the mapping relationship in the *staging in* phase and the kernel +is passed pointers to the staging memory. Similarly, after the kernel has been +invoked, any modified data i.e. accessed as :data:`~pyop2.WRITE`, +:data:`~pyop2.RW` or :data:`~pyop2.INC` is scattered back out via the +:class:`~pyop2.Map` in the *staging out* phase. It is only safe for a kernel +to manipulate data in the way declared via the access descriptor in the +parallel loop call. Any modifications to an argument accessed read-only would +not be written back since the staging out phase is skipped for this argument. +Similarly, the result of reading an argument declared as write-only is +undefined since the data has not been staged in. + +.. _kernel-api: + +Kernel API +---------- + +Consider a :func:`~pyop2.par_loop` computing the midpoint of a triangle given +the three vertex coordinates. Note that we make use of a covenience in the +PyOP2 syntax, which allow declaring an anonymous :class:`~pyop2.DataSet` of a +dimension greater one by using the ``**`` operator. We omit the actual data in +the declaration of the :class:`~pyop2.Map` ``cell2vertex`` and +:class:`~pyop2.Dat` ``coordinates``. :: + + vertices = op2.Set(num_vertices) + cells = op2.Set(num_cells) + + cell2vertex = op2.Map(cells, vertices, 3, [...]) + + coordinates = op2.Dat(vertices ** 2, [...], dtype=float) + midpoints = op2.Dat(cells ** 2, dtype=float) + + op2.par_loop(midpoint, cells, + midpoints(op2.WRITE), + coordinates(op2.READ, cell2vertex)) + +Kernels are implemented in a restricted subset of C99 and are declared by +passing a *C code string* and the *kernel function name*, which must match the +name in the C kernel signature, to the :class:`~pyop2.Kernel` constructor: :: + + midpoint = op2.Kernel(""" + void midpoint(double p[2], double *coords[2]) { + p[0] = (coords[0][0] + coords[1][0] + coords[2][0]) / 3.0; + p[1] = (coords[0][1] + coords[1][1] + coords[2][1]) / 3.0; + }""", "midpoint") + +Since kernels cannot return any value, the return type is always ``void``. The +kernel argument ``p`` corresponds to the third :func:`~pyop2.par_loop` +argument ``midpoints`` and ``coords`` to the fourth argument ``coordinates`` +respectively. Argument names need not agree, the matching is by position. + +Data types of kernel arguments must match the type of data passed to the +parallel loop. The Python types :class:`float` and :class:`numpy.float64` +correspond to a C :class:`double`, :class:`numpy.float32` to a C +:class:`float`, :class:`int` or :class:`numpy.int64` to a C :class:`long` and +:class:`numpy.int32` to a C :class:`int`. + +Direct :func:`~pyop2.par_loop` arguments such as ``midpoints`` are passed to +the kernel as a ``double *``, indirect arguments such as ``coordinates`` as a +``double **`` with the first indirection due to the map and the second +indirection due the data dimension. The kernel signature above uses arrays +with explicit sizes to draw attention to the fact that these are known. We +could have interchangibly used a kernel signature with plain pointers: + +.. code-block:: c + + void midpoint(double * p, double ** coords) + +Argument creation supports an optional flag ``flatten``, which is used +for kernels which expect data to be laid out by component: :: + + midpoint = op2.Kernel(""" + void midpoint(double p[2], double *coords[1]) { + p[0] = (coords[0][0] + coords[1][0] + coords[2][0]) / 3.0; + p[1] = (coords[3][0] + coords[4][0] + coords[5][0]) / 3.0; + }""", "midpoint") + + op2.par_loop(midpoint, cells, + midpoints(op2.WRITE), + coordinates(op2.READ, cell2vertex, flatten=True)) + +.. _data-layout: + +Data layout +----------- + +Data for a :class:`~pyop2.Dat` declared on a :class:`~pyop2.Set` is +stored contiguously for all elements of the set. For each element, +this is a contiguous chunk of data of a shape given by the +:class:`~pyop2.DataSet` ``dim`` and the datatype of the +:class:`~pyop2.Dat`. The size of this chunk is the product of the +extents of the ``dim`` tuple times the size of the datatype. + +During execution of the :func:`~pyop2.par_loop`, the kernel is called +for each element of the iteration set and passed data for each of its +arguments corresponding to the current set element ``i`` only. + +For a directly accessed argument such as ``midpoints`` above, the +kernel is passed a pointer to the beginning of the chunk of data for +the element ``i`` the kernel is currently called for. In CUDA/OpenCL +``i`` is the global thread id since the kernel is launched in parallel +for all elements. + +.. figure:: images/direct_arg.svg + :align: center + + Data layout for a directly accessed :class:`~pyop2.Dat` argument with + ``dim`` 2 + +For an indirectly accessed argument such as ``coordinates`` above, +PyOP2 gathers pointers to the data via the :class:`~pyop2.Map` +``cell2vertex`` used for the indirection. The kernel is passed a list +of pointers of length corresponding to the *arity* of the +:class:`~pyop2.Map`, in the example above 3. Each of these points to +the data chunk for the element in the target :class:`~pyop2.Set` given +by :class:`~pyop2.Map` entries ``(i, 0)``, ``(i, 1)`` and ``(i, 2)``. + +.. figure:: images/indirect_arg.svg + :align: center + + Data layout for a :class:`~pyop2.Dat` argument with ``dim`` 2 indirectly + accessed through a :class:`~pyop2.Map` of ``arity`` 3 + +If the argument is created with the keyword argument ``flatten`` set +to ``True``, a flattened vector of pointers is passed to the kernel. +This vector is of length ``dim * arity`` (where ``dim`` is the product +of the extents of the ``dim`` tuple), which is 6 in the example above. +Each entry points to a single data value of the :class:`~pyop2.Dat`. +The ordering is by component of ``dim`` i.e. the first component of +each data item for each element in the target set pointed to by the +map followed by the second component etc. + +.. figure:: images/indirect_arg_flattened.svg + :align: center + + Data layout for a flattened :class:`~pyop2.Dat` argument with ``dim`` 2 + indirectly accessed through a :class:`~pyop2.Map` of ``arity`` 3 + +.. _local-iteration-spaces: + +Local iteration spaces +---------------------- + +PyOP2 supports complex kernels with large local working set sizes, which may +not run very efficiently on architectures with a limited amount of registers +and on-chip resources. In many cases the resource usage is proportional to the +size of the *local iteration space* the kernel operates on. + +Consider a finite-element local assembly kernel for vector-valued basis +functions of second order on triangles. There are kernels more complex and +computing considerably larger local tensors commonly found in finite-element +computations, in particular for higher-order basis functions, and this kernel +only serves to illustrate the concept. For each element in the iteration set, +this kernel computes a 12x12 local tensor: + +.. code-block:: c + + void kernel(double A[12][12], ...) { + ... + // loops over the local iteration space + for (int j = 0; j < 12; j++) { + for (int k = 0; k < 12; k++) { + A[j][k] += ... + } + } + } + +PyOP2 invokes this kernel for each element in the iteration set: + +.. code-block:: c + + for (int ele = 0; ele < nele; ++ele) { + double A[12][12]; + ... + kernel(A, ...); + } + +To improve the efficiency of executing complex kernels on manycore +platforms, their operation can be distributed among several threads +which each compute a single point in this local iteration space to +increase the level of parallelism and to lower the amount of resources +required per thread. In the case of the kernel above we obtain: + +.. code-block:: c + + void mass(double A[1][1], ..., int j, int k) { + ... + A[0][0] += ... + } + +Note how the doubly nested loop over basis function is hoisted out of the +kernel, which receives its position in the local iteration space to compute as +additional arguments ``j`` and ``k``. PyOP2 then calls the kernel for +each element of the local iteration space for each set element: + +.. code-block:: c + + for (int ele = 0; ele < nele; ++ele) { + double A[1][1]; + ... + for (int j = 0; j < 12; j++) { + for (int k = 0; k < 12; k++) { + kernel(A, ..., j, k); + } + } + } + +On manycore platforms, the local iteration space does not translate into a +loop nest, but rather into a larger number of threads being launched to +compute each of its elements: + +.. figure:: images/iteration_spaces.svg + :align: center + + Local iteration space for a kernel computing a 12x12 local tensor + +PyOP2 needs to be told to loop over this local iteration space by +indexing the corresponding maps with an +:class:`~pyop2.base.IterationIndex` :data:`~pyop2.i` in the +:func:`~pyop2.par_loop` call. diff --git a/docs/source/old_pyop2/sphinx/source/linear_algebra.rst b/docs/source/old_pyop2/sphinx/source/linear_algebra.rst new file mode 100644 index 0000000000..176f15498d --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/linear_algebra.rst @@ -0,0 +1,304 @@ +.. _linear_algebra: + +PyOP2 Linear Algebra Interface +============================== + +PyOP2 supports linear algebra operations on sparse matrices using a thin +wrapper around the PETSc_ library harnessed via its petsc4py_ interface. + +As described in :doc:`concepts`, a sparse matrix is a linear operator that +maps a :class:`~pyop2.DataSet` representing its row space to a +:class:`~pyop2.DataSet` representing its column space and vice versa. These +two spaces are commonly the same, in which case the resulting matrix is +square. A sparse matrix is represented by a :class:`~pyop2.Mat`, which is +declared on a :class:`~pyop2.Sparsity`, representing its non-zero structure. + +.. _matrix_storage: + +Sparse Matrix Storage Formats +----------------------------- + +PETSc_ uses the popular Compressed Sparse Row (CSR) format to only store the +non-zero entries of a sparse matrix. In CSR, a matrix is stored as three +one-dimensional arrays of *row pointers*, *column indices* and *values*, where +the two former are of integer type and the latter of float type, usually +double. As the name suggests, non-zero entries are stored per row, where each +non-zero is defined by a pair of column index and corresponding value. The +column indices and values arrays therefore have a length equal to the total +number of non-zero entries. Row indices are given implicitly by the row +pointer array, which contains the starting index in the column index and +values arrays for the non-zero entries of each row. In other words, the +non-zeros for row ``i`` are at positions ``row_ptr[i]`` up to but not +including ``row_ptr[i+1]`` in the column index and values arrays. For each +row, entries are sorted by column index to allow for faster lookups using a +binary search. + +.. figure:: images/csr.svg + :align: center + + A sparse matrix and its corresponding CSR row pointer, column indices and + values arrays + +For distributed parallel storage with MPI, the rows of the matrix are +distribued evenly among the processors. Each row is then again divided into a +*diagonal* and an *off-diagonal* part, where the diagonal part comprises +columns ``i`` to ``j`` if ``i`` and ``j`` are the first and last row owned by +a given processor, and the off-diagonal part all other rows. + +.. figure:: images/mpi_matrix.svg + :align: center + + Distribution of a sparse matrix among 3 MPI processes + +.. _matrix_assembly: + +Matrix assembly +--------------- + +Sparse matrices are assembled by adding up local contributions which are +mapped to global matrix entries via a local-to-global mapping represented by a +pair of :class:`Maps ` for the row and column space. + +.. figure:: images/assembly.svg + :align: center + + Assembly of a local tensor :math:`A^K` into a global matrix :math:`A` using + the local-to-global mapping :math:`\iota_K^1` for rows and :math:`\iota_K^2` + for columns + +For each :func:`~pyop2.par_loop` that assembles a matrix, PyOP2 generates a +call to PETSc_'s MatSetValues_ function for each element of the iteration set, +adding the local contributions computed by the user kernel to the global +matrix using the given :class:`Maps `. At the end of the +:func:`~pyop2.par_loop` PyOP2 automatically calls MatAssemblyBegin_ and +MatAssemblyEnd_ to finalise matrix assembly. + +Consider assembling a :class:`~pyop2.Mat` on a :class:`~pyop2.Sparsity` built +from a :class:`~pyop2.Map` from ``elements`` to ``nodes``. The assembly is +done in a :func:`~pyop2.par_loop` over ``elements``, where the +:class:`~pyop2.Mat` ``A`` is accssed indirectly via the ``elem_node`` +:class:`~pyop2.Map` using the :class:`~pyop2.base.IterationIndex` +:class:`~pyop2.i`: + +.. code-block:: python + + nodes = op2.Set(NUM_NODES, "nodes") + elements = op2.Set(NUM_ELE, "elements") + + elem_node = op2.Map(elements, nodes, 3, ...) + + sparsity = op2.Sparsity((nodes, nodes), (elem_node, elem_node)) + A = op2.Mat(sparsity, np.float64) + + b = op2.Dat(nodes, dtype=np.float64) + + # Assemble the matrix mat + op2.par_loop(mat_kernel, elements, + A(op2.INC, (elem_node[op2.i[0]], elem_node[op2.i[1]])), + ...) + + # Assemble the right-hand side vector b + op2.par_loop(rhs_kernel, elements, + b(op2.INC, elem_node[op2.i[0]]), + ...) + +The code generated for the :func:`~pyop2.par_loop` assembling the +:class:`~pyop2.Mat` for the sequential backend is similar to the following, +where initialisation and staging code described in :ref:`sequential_backend` +have been omitted for brevity. For each element of the iteration +:class:`~pyop2.Set` a buffer for the local tensor is initialised to zero and +passed to the user kernel performing the local assembly operation. The +``addto_vector`` call subsequently adds this local contribution to the global +sparse matrix. + +.. code-block:: c + + void wrap_mat_kernel__(...) { + ... + for ( int n = start; n < end; n++ ) { + int i = n; + ... + double buffer_arg0_0[3][3] = {{0}}; // local tensor initialised to 0 + mat_kernel(buffer_arg0_0, ...); // local assembly kernel + addto_vector(arg0_0_0, buffer_arg0_0, // Mat objet, local tensor + 3, arg0_0_map0_0 + i * 3, // # rows, global row indices + 3, arg0_0_map1_0 + i * 3, // # cols, global column indices + 0); // mode: 0 add, 1 insert + } + } + +.. _sparsity_pattern: + +Building a sparsity pattern +--------------------------- + +The sparsity pattern of a matrix is uniquely defined by the dimensions of the +:class:`DataSets ` forming its row and column space, and one or +more pairs of :class:`Maps ` defining its non-zero structure. This +is exploited in PyOP2 by caching sparsity patterns with these unique +attributes as the cache key to save expensive recomputation. Whenever a +:class:`Sparsity` is initialised, an already computed pattern with the same +unique key is returned if it exists. + +For a valid sparsity, each row :class:`~pyop2.Map` must map to the set of the +row :class:`~pyop2.DataSet`, each column :class:`~pyop2.Map` to that of the +column :class:`~pyop2.DataSet` and the from sets of each pair must match. A +matrix on a sparsity pattern built from more than one pair of maps is +assembled by multiple parallel loops iterating over the corresponding +iteration set for each pair. + +Sparsity construction proceeds by iterating each :class:`~pyop2.Map` pair and +building a set of indices of the non-zero columns for each row. Each pair of +entries in the row and column maps gives the row and column index of a +non-zero entry in the matrix and therefore the column index is added to the +set of non-zero entries for that particular row. The array of non-zero entries +per row is then determined as the size of the set for each row and its +exclusive scan yields the row pointer array. The column index array is the +concatenation of all the sets. An algorithm for the sequential case is given +below: :: + + for rowmap, colmap in maps: + for e in range(rowmap.from_size): + for i in range(rowmap.arity): + row = rowmap.values[i + e*rowmap.arity] + for d in range(colmap.arity): + diag[row].insert(colmap.values[d + e * colmap.arity]) + +For the MPI parallel case a minor modification is required, since for each row +a set of diagonal and off-diagonal column indices needs to be built as +described in :ref:`matrix_storage`: :: + + for rowmap, colmap in maps: + for e in range(rowmap.from_size): + for i in range(rowmap.arity): + row = rowmap.values[i + e*rowmap.arity] + if row < nrows: + for d in range(colmap.arity): + if col < ncols: + diag[row].insert(colmap.values[d + e*colmap.arity]) + else: + odiag[row].insert(colmap.values[d + e*colmap.arity]) + +.. _solving: + +Solving a linear system +----------------------- + +PyOP2 provides a :class:`~pyop2.Solver`, wrapping the PETSc_ KSP_ Krylov +solvers which support various iterative methods such as Conjugate Gradients +(CG), Generalized Minimal Residual (GMRES), a stabilized version of +BiConjugate Gradient Squared (BiCGStab) and others. The solvers are +complemented with a range of preconditioners from PETSc_'s PC_ collection, +which includes Jacobi, incomplete Cholesky and LU decompositions and various +multigrid based preconditioners. + +The choice of solver and preconditioner type and other parameters uses +PETSc_'s configuration mechanism documented in the `PETSc manual`_. Options +are pased to the :class:`~pyop2.Solver` via the keyword argument +``parameters`` taking a dictionary of arguments or directly via keyword +arguments. The solver type is chosen as ``ksp_type``, the preconditioner as +``pc_type`` with the defaults ``cg`` and ``jacobi``. + +Solving a linear system of the matrix ``A`` assembled above and the right-hand +side vector ``b`` for a solution vector ``x`` is done with a call to +:meth:`~pyop2.Solver.solve`, where solver and preconditioner are chosen as +``gmres`` and ``ilu``: :: + + x = op2.Dat(nodes, dtype=np.float64) + + solver = op2.Solver(ksp_type='gmres', pc_type='ilu') + solver.solve(A, x, b) + +.. _gpu_assembly: + +GPU matrix assembly +------------------- + +In a :func:`~pyop2.par_loop` assembling a :class:`~pyop2.Mat` on the GPU, the +local contributions are first computed for all elements of the iteration set +and stored in global memory in a structure-of-arrays (SoA) data layout such +that all threads can write the data out in a coalesced manner. For the example +above, the generated CUDA wrapper code is as follows, again omitting +initialisation and staging code described in :ref:`cuda_backend`. The user +kernel only computes a single element in the local iteration space as detailed +in :ref:`local-iteration-spaces`. + +.. code-block:: c + + __global__ void __mat_kernel_stub(..., + double *arg0, // local matrix data array + int arg0_offset, // offset into the array + ... ) { + ... // omitted initialisation and shared memory staging code + for ( int idx = threadIdx.x; idx < nelem; idx += blockDim.x ) { + ... // omitted staging code + for ( int i0 = 0; i0 < 3; ++i0 ) { + for ( int i1 = 0; i1 < 3; ++i1 ) { + mass_cell_integral_0_otherwise( + (double (*)[1])(arg0 + arg0_offset + idx * 9 + i0 * 3 + i1 * 1), + ..., i0, i1); + } + } + } + } + +A separate CUDA kernel given below is launched afterwards to compress the data +into a sparse matrix in CSR storage format. Only the values array needs to be +computed, since the row pointer and column indices have already been computed +when building the sparsity on the host and subsequently transferred to GPU +memory. Memory for the local contributions and the values array only needs to +be allocated on the GPU. + +.. code-block:: c + + __global__ void __lma_to_csr(double *lmadata, // local matrix data array + double *csrdata, // CSR values array + int *rowptr, // CSR row pointer array + int *colidx, // CSR column indices array + int *rowmap, // row map array + int rowmapdim, // row map arity + int *colmap, // column map array + int colmapdim, // column map arity + int nelems) { + int nentries_per_ele = rowmapdim * colmapdim; + int n = threadIdx.x + blockIdx.x * blockDim.x; + if ( n >= nelems * nentries_per_ele ) return; + + int e = n / nentries_per_ele; // set element + int i = (n - e * nentries_per_ele) / rowmapdim; // local row + int j = (n - e * nentries_per_ele - i * colmapdim); // local column + + // Compute position in values array + int offset = pos(rowmap[e * rowmapdim + i], colmap[e * colmapdim + j], + rowptr, colidx); + __atomic_add(csrdata + offset, lmadata[n]); + } + +.. _gpu_solve: + +GPU linear algebra +------------------ + +Linear algebra on the GPU with the ``cuda`` backend uses the Cusp_ library, +which does not support all solvers and preconditioners provided by PETSc_. The +interface to the user is the same as for the ``sequential`` and ``openmp`` +backends. Supported solver types are CG (``cg``), GMRES (``gmres``) and +BiCGStab (``bicgstab``), with preconditioners of types Jacobi (``jacobi``), +Bridson approximate inverse (``ainv``) and asymptotic multigrid (``amg``). An +exception is raised if an unsupported solver or preconditioner type is +requested. A Cusp_ solver with the chosen parameters is automatically +generated when :func:`~pyop2.solve` is called. + +.. note :: + Distributed parallel linear algebra operations with MPI are currently not + supported by the ``cuda`` backend. + +.. _PETSc: http://www.mcs.anl.gov/petsc/ +.. _petsc4py: http://pythonhosted.org/petsc4py/ +.. _MatSetValues: http://www.mcs.anl.gov/petsc/petsc-dev/docs/manualpages/Mat/MatSetValues.html +.. _MatAssemblyBegin: http://www.mcs.anl.gov/petsc/petsc-dev/docs/manualpages/Mat/MatAssemblyBegin.html +.. _MatAssemblyEnd: http://www.mcs.anl.gov/petsc/petsc-dev/docs/manualpages/Mat/MatAssemblyEnd.html +.. _KSP: http://www.mcs.anl.gov/petsc/petsc-dev/docs/manualpages/KSP/ +.. _PC: http://www.mcs.anl.gov/petsc/petsc-dev/docs/manualpages/PC/ +.. _PETSc manual: http://www.mcs.anl.gov/petsc/petsc-dev/docs/manual.pdf +.. _Cusp: http://cusplibrary.github.io diff --git a/docs/source/old_pyop2/sphinx/source/mixed.rst b/docs/source/old_pyop2/sphinx/source/mixed.rst new file mode 100644 index 0000000000..2227dcf696 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/mixed.rst @@ -0,0 +1,144 @@ +.. _mixed: + +Mixed Types +=========== + +When solving linear systems of equations as they arise for instance in the +finite-element method (FEM), one is often interested in *coupled* solutions of +more than one quantity. In fluid dynamics, a common example is solving a +coupled system of velocity and pressure as it occurs in some formulations of +the Navier-Stokes equations. + +Mixed Set, DataSet, Map and Dat +------------------------------- + +PyOP2 provides the mixed types :class:`~pyop2.MixedSet` +:class:`~pyop2.MixedDataSet`, :class:`~pyop2.MixedMap` and +:class:`~pyop2.MixedDat` for a :class:`~pyop2.Set`, :class:`~pyop2.DataSet`, +:class:`~pyop2.Map` and :class:`~pyop2.Dat` respectively. A mixed type is +constructed from a list or other iterable of its base type and provides the +same attributes and methods. Under most circumstances types and mixed types +behave the same way and can be treated uniformly. Mixed types allow iteration +over their constituent parts and for convenience the base types are also +iterable, yielding themselves. + +A :class:`~pyop2.MixedSet` is defined from a list of sets: :: + + s1, s2 = op2.Set(N), op2.Set(M) + ms = op2.MixedSet([s1, s2]) + +There are a number of equivalent ways of defining a +:class:`~pyop2.MixedDataSet`: :: + + mds = op2.MixedDataSet([s1, s2], (1, 2)) + mds = op2.MixedDataSet([s1**1, s2**2]) + mds = op2.MixedDataSet(ms, (1, 2)) + mds = ms**(1, 2) + +A :class:`~pyop2.MixedDat` with no associated data is defined in one of the +following ways: :: + + md = op2.MixedDat(mds) + md = op2.MixedDat([s1**1, s2**2]) + md = op2.MixedDat([op2.Dat(s1**1), op2.Dat(s2**2)]) + +Finally, a :class:`~pyop2.MixedMap` is defined from a list of maps, all of +which must share the same source :class:`~pyop2.Set`: :: + + it = op2.Set(S) + mm = op2.MixedMap([op2.Map(it, s1, 2), op2.Map(it, s2, 3)]) + +Block Sparsity and Mat +---------------------- + +When declaring a :class:`~pyop2.Sparsity` on pairs of mixed maps, the +resulting sparsity pattern has a square block structure with as many block +rows and columns as there are components in the :class:`~pyop2.MixedDataSet` +forming its row and column space. In the most general case a +:class:`~pyop2.Sparsity` is constructed as follows: :: + + it = op2.Set(...) # Iteration set + sr0, sr1 = op2.Set(...), op2.Set(...) # Sets for row spaces + sc0, sc1 = op2.Set(...), op2.Set(...) # Sets for column spaces + # MixedMaps for the row and column spaces + mr = op2.MixedMap([op2.Map(it, sr0, ...), op2.Map(it, sr1, ...)]) + mc = op2.MixedMap([op2.Map(it, sc0, ...), op2.Map(it, sc1, ...)]) + # MixedDataSets for the row and column spaces + dsr = op2.MixedDataSet([sr0**1, sr1**1]) + dsc = op2.MixedDataSet([sc0**1, sc1**1]) + # Blocked sparsity + sparsity = op2.Sparsity((dsr, dsc), [(mr, mc), ...]) + +The relationships of each component of the mixed maps and datasets to the +blocks of the :class:`~pyop2.Sparsity` is shown in the following diagram: + +.. figure:: images/mixed_sparsity.svg + :align: center + + The contribution of sets, maps and datasets to the blocked sparsity. + +Block sparsity patterns are computed separately for each block as described in +:ref:`sparsity_pattern` and the same validity rules apply. A +:class:`~pyop2.Mat` defined on a block :class:`~pyop2.Sparsity` has the same +block structure, which is implemented using a PETSc_ MATNEST_. + +Mixed Assembly +-------------- + +When assembling into a :class:`~pyop2.MixedDat` or a block +:class:`~pyop2.Mat`, the :class:`~pyop2.Kernel` produces a local tensor of the +same block structure, which is a combination of :ref:`local-iteration-spaces` +of all its subblocks. This is entirely transparent to the kernel however, +which sees the combined local iteration space. PyOP2 ensures that indirectly +accessed data is gathered and scattered via the correct maps and packed +together into a contiguous vector to be passed to the kernel. Contributions +from the local tensor are assembled into the correct blocks of the +:class:`~pyop2.MixedDat` or :class:`~pyop2.Mat`. + +Consider the following example :func:`~pyop2.par_loop` assembling a block +:class:`~pyop2.Mat`: + +.. code-block:: python + + it, cells, nodes = op2.Set(...), op2.Set(...), op2.Set(...) + mds = op2.MixedDataSet([nodes, cells]) + mmap = op2.MixedMap([op2.Map(it, nodes, 2, ...), op2.Map(it, cells, 1, ...)]) + mat = op2.Mat(op2.Sparsity(mds, mmap)) + d = op2.MixedDat(mds) + + op2.par_loop(kernel, it, + mat(op2.INC, (mmap[op2.i[0]], mmap[op2.i[1]])), + d(op2.read, mmap)) + +The ``kernel`` for this :func:`~pyop2.par_loop` assembles a 3x3 local tensor +and is passed an input vector of length 3 for each iteration set element: + +.. code-block:: c + + void kernel(double v[3][3] , double **d ) { + for (int i = 0; i<3; i++) + for (int j = 0; j<3; j++) + v[i][j] += d[i][0] * d[j][0]; + } + +The top-left 2x2 block of the local tensor is assembled into the (0,0) block +of the matrix, the top-right 2x1 block into (0,1), the bottom-left 1x2 block +into (1,0) and finally the bottom-right 1x1 block into (1,1). Note that for +the (0,0) block only the first component of the :class:`~pyop2.MixedDat` is +read and for the (1,1) block only the second component. For the (0,1) and +(1,0) blocks, both components of the :class:`~pyop2.MixedDat` are accessed. + +This diagram illustrates the assembly of the block :class:`~pyop2.Mat`: + +.. figure:: images/mixed_assembly.svg + :align: center + + Assembling into the blocks of a global matrix :math:`A`: block + :math:`A^{0,0}` uses maps :math:`\iota^{1,0}` and :math:`\iota^{2,0}`, + :math:`A^{0,1}` uses :math:`\iota^{1,0}` and :math:`\iota^{2,1}`, + :math:`A^{1,0}` uses :math:`\iota^{1,1}` and :math:`\iota^{2,0}` and finally + :math:`A^{1,1}` uses :math:`\iota^{1,1}` and :math:`\iota^{2,1}` for the row + and column spaces respectively. + +.. _PETSc: http://www.mcs.anl.gov/petsc/ +.. _MATNEST: http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MATNEST.html diff --git a/docs/source/old_pyop2/sphinx/source/mpi.rst b/docs/source/old_pyop2/sphinx/source/mpi.rst new file mode 100644 index 0000000000..360253cdab --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/mpi.rst @@ -0,0 +1,125 @@ +.. _mpi: + +MPI +=== + +Distributed parallel computations with MPI in PyOP2 require the mesh to be +partitioned among the processors. To be able to compute over entities on their +boundaries, partitions need to access data owned by neighboring processors. +This region, called the *halo*, needs to be kept up to date and is therefore +exchanged between the processors as required. + +Local Numbering +--------------- + +The partition of each :class:`~pyop2.Set` local to each process consists of +entities *owned* by the process and the *halo*, which are entities owned by +other processes but required to compute on the boundary of the owned entities. +Each of these sections is again divided into two sections required to +efficiently overlap communication and computation and avoid communication +during matrix assembly as described below. Each locally stored +:class:`~pyop2.Set` entitity therefore belongs to one of four categories: + +* **Core**: Entities owned by this processor which can be processed without + accessing halo data. +* **Owned**: Entities owned by this processor which access halo data when + processed. +* **Exec halo**: Off-processor entities which are redundantly executed over + because they touch owned entities. +* **Non-exec halo**: Off-processor entities which are not processed, but read + when computing the exec halo. + +The following diagram illustrates the four sections for a mesh distributed +among two processors: + +.. figure:: images/pyop2_mpi_mesh.svg + :align: center + + A mesh distributed among two processors with the entities of each mesh + partition divided into *core*, *owned*, *exec halo* and *non-exec halo*. + Matching halo sections are highlighted in matching colours. The owned + section of process 0 correspondonds to the non-exec section of process 1. + +For data defined on the :class:`~pyop2.Set` to be stored contiguously per +section, local :class:`~pyop2.Set` entities must be numbered such that core +entities are first, followed by owned, exec halo and non-exec halo in that +order. A good partitioning maximises the size of the core section and +minimises the halo regions. We can therefore assume that the vast majority of +local :class:`~pyop2.Set` entities are in the core section. + +Computation-communication Overlap +--------------------------------- + +The ordering of :class:`~pyop2.Set` entities into four sections allow for a +very efficient overlap of computation and communication. Core entities that do +not access any halo data can be processed entirely without access to halo data +immediately after the halo exchange has been initiated. Execution over the +owned and exec halo regions requires up to date halo data and can only start +once the halo exchange is completed. Depending on the latency and bandwidth +of communication and the size of the core section relative to the halo, the +halo exchange may complete before the computation on the core section. + +The entire process is given below: :: + + halo_exchange_begin() # Initiate halo exchange + maybe_set_dat_dirty() # Mark Dats as modified + compute_if_not_empty(itset.core_part) # Compute core region + halo_exchange_end() # Wait for halo exchange + compute_if_not_empty(itset.owned_part) # Compute owned region + reduction_begin() # Initiate reductions + if needs_exec_halo: # Any indirect Dat not READ? + compute_if_not_empty(itset.exec_part) # Compute exec halo region + reduction_end() # Wait for reductions + maybe_set_halo_update_needed() # Mark halos as out of date + assemble() # Finalise matrix assembly + +Any reductions depend on data from the core and owned sections and are +initiated as soon as the owned section has been processed and execute +concurrently with computation on the exec halo. Similar to +`halo_exchange_begin` and `halo_exchange_end`, `reduction_begin` and +`reduction_end` do no work at all if none of the :func:`~pyop2.par_loop` +arguments requires a reduction. If the :func:`~pyop2.par_loop` assembles a +:class:`~pyop2.Mat`, the matrix assembly is finalised at the end. + +By dividing entities into sections according to their relation to the halo, +there is no need to check whether or not a given entity touches the halo or +not during computations on each section. This avoids branching in kernels or +wrapper code and allows launching separate kernels for GPU execution of each +section. The :func:`~pyop2.par_loop` execution therefore has the above +structure for all backends. + +Halo exchange +------------- + +Exchanging halo data is only required if the halo data is actually read, which +is the case for :class:`~pyop2.Dat` arguments to a :func:`~pyop2.par_loop` +used in :data:`pyop2.READ` or :data:`pyop2.RW` mode. PyOP2 keeps track +whether or not the halo region may have been modified. This is the case for +:class:`Dats ` used in :data:`pyop2.INC`, :data:`pyop2.WRITE` or +:data:`pyop2.RW` mode or when a :class:`~pyop2.Solver` or a user requests +access to the data. A halo exchange is triggered only for halos marked as out +of date. + +Distributed Assembly +-------------------- + +For an MPI distributed matrix or vector, assembling owned entities at the +boundary can contribute to off-process degrees of freedom and vice versa. + +There are different ways of accounting for these off-process contributions. +PETSc_ supports insertion and subsequent communication of off-process matrix +and vector entries, however its implementation is not thread safe. Concurrent +insertion into PETSc_ MPI matrices *is* thread safe if off-process insertions +are not cached and concurrent writes to rows are avoided, which is done +through colouring as described in :ref:`plan-colouring`. + +PyOP2 therefore disables PETSc_'s off-process insertion feature and instead +redundantly computes over all off process entities that touch local dofs, +which is the *exec halo* section described above. The price for this is +maintaining a larger halo, since we also need halo data, the *non-exec halo* +section, to perform the redundant computation. Halos grow by about a factor +two, however in practice this is still small compared to the interior region +of a partition and the main cost of halo exchange is the latency, which is +independent of the exchanged data volume. + +.. _PETSc: http://www.mcs.anl.gov/petsc/ diff --git a/docs/source/old_pyop2/sphinx/source/plan.rst b/docs/source/old_pyop2/sphinx/source/plan.rst new file mode 100644 index 0000000000..613ca8ae29 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/plan.rst @@ -0,0 +1,80 @@ +.. _plan: + +Parallel Execution Plan +======================= + +For all PyOP2 backends with the exception of sequential, a parallel execution +plan is computed for each :func:`~pyop2.par_loop`. It contains information +guiding the code generator on how to partition, stage and colour the data for +efficient parallel processing. + +.. _plan-partitioning: + +Partitioning +------------ + +The iteration set is split into a number of equally sized and contiguous +mini-partitions such that the working set of each mini-partition fits into +shared memory or last level cache. This is unrelated to the partitioning +required for MPI as described in :ref:`mpi`. + +.. _plan-renumbering: + +Local Renumbering and Staging +----------------------------- + +While a mini-partition is a contiguous chunk of the iteration set, the +indirectly accessed data it references is not necessarily contiguous. For each +mini-partition and unique :class:`~pyop2.Dat`-:class:`~pyop2.Map` pair, a +mapping from local indices within the partition to global indices is +constructed as the sorted array of unique :class:`~pyop2.Map` indices accessed +by this partition. At the same time, a global-to-local mapping is constructed +as its inverse. + +Data for indirectly accessed :class:`~pyop2.Dat` arguments is staged in shared +device memory as described in :ref:`backends`. For each partition, the +local-to-global mapping indicates where data to be staged in is read from and +the global-to-local mapping gives the location in shared memory data has been +staged at. The amount of shared memory required is computed from the size of +the local-to-global mapping. + +.. _plan-colouring: + +Colouring +--------- + +A two-level colouring is used to avoid race conditions. Partitions are +coloured such that partitions of the same colour can be executed concurrently +and threads executing on a partition in parallel are coloured such that no two +threads indirectly reference the same data. Only :func:`~pyop2.par_loop` +arguments performing an indirect reduction or assembling a matrix require +colouring. Matrices are coloured per row. + +For each element of a :class:`~pyop2.Set` indirectly accessed in a +:func:`~pyop2.par_loop`, a bit vector is used to record which colours +indirectly reference it. To colour each thread within a partition, the +algorithm proceeds as follows: + +1. Loop over all indirectly accessed arguments and collect the colours of all + :class:`~pyop2.Set` elements referenced by the current thread in a bit mask. +2. Choose the next available colour as the colour of the current thread. +3. Loop over all :class:`~pyop2.Set` elements indirectly accessed by the + current thread again and set the new colour in their colour mask. + +Since the bit mask is a 32-bit integer, up to 32 colours can be processed in a +single pass, which is sufficient for most applications. If not all threads can +be coloured with 32 distinct colours, the mask is reset and another pass is +made, where each newly allocated colour is offset by 32. Should another pass +be required, the offset is increased to 64 and so on until all threads are +coloured. + +.. figure:: images/pyop2_colouring.svg + :align: center + + Thread colouring within a mini-partition for a :class:`~pyop2.Dat` on + vertices indirectly accessed in a computation over the edges. The edges are + coloured such that no two edges touch the same vertex within the partition. + +The colouring of mini-partitions is done in the same way, except that all +:class:`~pyop2.Set` elements indirectly accessed by the entire partition are +referenced, not only those accessed by a single thread. diff --git a/docs/source/old_pyop2/sphinx/source/profiling.rst b/docs/source/old_pyop2/sphinx/source/profiling.rst new file mode 100644 index 0000000000..aa7cc2baf8 --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/profiling.rst @@ -0,0 +1,170 @@ +Profiling +========= + +Profiling PyOP2 programs +------------------------ + +Profiling a PyOP2 program is as simple as profiling any other Python +code. You can profile the jacobi demo in the PyOP2 ``demo`` folder as +follows: :: + + python -m cProfile -o jacobi.dat jacobi.py + +This will run the entire program under cProfile_ and write the profiling +data to ``jacobi.dat``. Omitting ``-o`` will print a summary to stdout, +which is not very helpful in most cases. + +Creating a graph +................ + +There is a much more intuitive way of representing the profiling data +using the excellent gprof2dot_ to generate a graph. Install from `PyPI +`__ with :: + + sudo pip install gprof2dot + +Use as follows to create a PDF: :: + + gprof2dot -f pstats -n 1 jacobi.dat | dot -Tpdf -o jacobi.pdf + +``-f pstats`` tells ``gprof2dot`` that it is dealing with Python +cProfile_ data (and not actual *gprof* data) and ``-n 1`` ignores +everything that makes up less than 1% of the total runtime - most likely +you are not interested in that (the default is 0.5). + +Consolidating profiles from different runs +.......................................... + +To aggregate profiling data from different runs, save the following as +``concat.py``: :: + + """Usage: concat.py PATTERN FILE""" + + import sys + from glob import glob + from pstats import Stats + + if len(sys.argv) != 3: + print __doc__ + sys.exit(1) + files = glob(sys.argv[1]) + s = Stats(files[0]) + for f in files[1:]: s.add(f) + s.dump_stats(sys.argv[2]) + +With profiles from different runs named ``.*.part``, use it +as :: + + python concat.py '.*.part' .dat + +and then call ``gprof2dot`` as before. + +Using PyOP2's internal timers +----------------------------- + +PyOP2 automatically times the execution of certain regions: + +* Sparsity building +* Plan construction +* Parallel loop kernel execution +* Halo exchange +* Reductions +* PETSc Krylov solver + +To output those timings, call :func:`~pyop2.profiling.summary` in your +PyOP2 program or run with the environment variable +``PYOP2_PRINT_SUMMARY`` set to 1. + +To query e.g. the timer for parallel loop execution programatically, +use the :func:`~pyop2.profiling.timing` helper: :: + + from pyop2 import timing + timing("ParLoop compute") # get total time + timing("ParLoop compute", total=False) # get average time per call + +To add additional timers to your own code, you can use the +:func:`~pyop2.profiling.timed_region` and +:func:`~pyop2.profiling.timed_function` helpers: :: + + from pyop2.profiling import timed_region, timed_function + + with timed_region("my code"): + # my code + + @timed_function("my function") + def my_func(): + # my func + +Line-by-line profiling +---------------------- + +To get a line-by-line profile of a given function, install Robert Kern's +`line profiler`_ and: + +1. Import the :func:`~pyop2.profiling.profile` decorator: :: + + from pyop2.profiling import profile + +2. Decorate the function to profile with ``@profile`` +3. Run your script with ``kernprof.py -l `` +4. Generate an annotated source file with :: + + python -m line_profiler + +Note that ``kernprof.py`` injects the ``@profile`` decorator into the +Python builtins namespace. PyOP2 provides a passthrough version of this +decorator which does nothing if ``profile`` is not found in +``__builtins__``. This means you can run your script regularly without +having to remove the decorators again. + +The :func:`~pyop2.profiling.profile` decorator also works with the +memory profiler (see below). PyOP2 therefore provides the +:func:`~pyop2.profiling.lineprof` decorator which is only enabled when +running with ``kernprof.py``. + +A number of PyOP2 internal functions are decorated such that running +your PyOP2 application with ``kernprof.py`` will produce a line-by-line +profile of the parallel loop computation (but not the generated code!). + +Memory profiling +---------------- + +To profile the memory usage of your application, install Fabian +Pedregosa's `memory profiler`_ and: + +1. Import the :func:`~pyop2.profiling.profile` decorator: :: + + from pyop2.profiling import profile + +2. Decorate the function to profile with ``@profile``. +3. Run your script with :: + + python -m memory_profiler + + to get a line-by-line memory profile of your function. +4. Run your script with :: + + memprof run --python + + to record memory usage of your program over time. +5. Generate a plot of the memory profile with ``memprof plot``. + +Note that ``memprof`` and ``python -m memory_profiler`` inject the +``@profile`` decorator into the Python builtins namespace. PyOP2 +provides a passthrough version of this decorator which does nothing if +``profile`` is not found in ``__builtins__``. This means you can run +your script regularly without having to remove the decorators again. + +The :func:`~pyop2.profiling.profile` decorator also works with the line +profiler (see below). PyOP2 therefore provides the +:func:`~pyop2.profiling.memprof` decorator which is only enabled when +running with ``memprof``. + +A number of PyOP2 internal functions are decorated such that running +your PyOP2 application with ``memprof run`` will produce a memory +profile of the parallel loop computation (but not the generated code!). + +.. _cProfile: https://docs.python.org/2/library/profile.html#cProfile +.. _gprof2dot: https://code.google.com/p/jrfonseca/wiki/Gprof2Dot +.. _line profiler: https://pythonhosted.org/line_profiler/ +.. _memory profiler: https://github.com/fabianp/memory_profiler diff --git a/docs/source/old_pyop2/sphinx/source/user.rst b/docs/source/old_pyop2/sphinx/source/user.rst new file mode 100644 index 0000000000..c44b4d4c1f --- /dev/null +++ b/docs/source/old_pyop2/sphinx/source/user.rst @@ -0,0 +1,68 @@ +pyop2 user documentation +======================== + +:mod:`pyop2` Package +-------------------- + +.. automodule:: pyop2 + :members: + :show-inheritance: + :inherited-members: + + Initialization and finalization + ............................... + + .. autofunction:: init + .. autofunction:: exit + + Data structures + ............... + + .. autoclass:: Set + :inherited-members: + .. autoclass:: ExtrudedSet + :inherited-members: + .. autoclass:: Subset + :inherited-members: + .. autoclass:: MixedSet + :inherited-members: + .. autoclass:: DataSet + :inherited-members: + .. autoclass:: MixedDataSet + :inherited-members: + .. autoclass:: Map + :inherited-members: + .. autoclass:: MixedMap + :inherited-members: + .. autoclass:: Sparsity + :inherited-members: + + .. autoclass:: Const + :inherited-members: + .. autoclass:: Global + :inherited-members: + .. autoclass:: Dat + :inherited-members: + .. autoclass:: MixedDat + :inherited-members: + .. autoclass:: Mat + :inherited-members: + + Parallel loops, kernels and linear solves + ......................................... + + .. autofunction:: par_loop + .. autofunction:: solve + + .. autoclass:: Kernel + :inherited-members: + .. autoclass:: Solver + :inherited-members: + + .. autodata:: i + .. autodata:: READ + .. autodata:: WRITE + .. autodata:: RW + .. autodata:: INC + .. autodata:: MIN + .. autodata:: MAX diff --git a/docs/source/point-evaluation.rst b/docs/source/point-evaluation.rst index b14b459c69..0da44f8396 100644 --- a/docs/source/point-evaluation.rst +++ b/docs/source/point-evaluation.rst @@ -128,7 +128,7 @@ evaluation of a function :math:`f` defined in a function space :math:`V` on the parent mesh by interpolating into the P0DG space on the :func:`~.VertexOnlyMesh`. For example: -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vertex_only_mesh_manual_example 1] @@ -223,19 +223,19 @@ tolerance ` will generate a :class:`~.VertexOnlyMeshMissingPointsError`. This can be switched to a warning or switched off entirely: -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vom_manual_points_outside_domain 1] :end-before: [test_vom_manual_points_outside_domain 2] -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vom_manual_points_outside_domain 3] :end-before: [test_vom_manual_points_outside_domain 4] -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vom_manual_points_outside_domain 5] @@ -265,7 +265,7 @@ where :math:`N` is the number of points, :math:`x_i` is the :math:`i`\th point, :math:`\delta` is a dirac delta distribition can therefore be written in Firedrake using :func:`~.VertexOnlyMesh` and :func:`~.interpolate` as -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vom_manual_keyword_arguments 1] @@ -288,7 +288,7 @@ property: this produces another vertex-only mesh which has points in the order and MPI rank that they were specified when first creating the original vertex-only mesh. For example: -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_input_ordering_input 1] @@ -301,7 +301,7 @@ of a vertex-only mesh in a parallel-safe way. If we return to our example from :ref:`the section where we introduced vertex only meshes `, we had -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vertex_only_mesh_manual_example 2] @@ -312,7 +312,7 @@ In parallel, this will print the values of ``f`` at the given ``points`` list values of ``f`` at the ``points`` list **before the points have been distributed** we can use :py:attr:`~.VertexOnlyMeshTopology.input_ordering` as follows: -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vertex_only_mesh_manual_example 4] @@ -336,7 +336,7 @@ the original vertex-only mesh. In the above example, the values would be zero at those points. To make it more obvious that those points were not found, it's a good idea to set the values to ``nan`` before the interpolation: -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_vertex_only_mesh_manual_example 6] @@ -379,7 +379,7 @@ If points are outside the mesh domain but ought to still be found a ``tolerance`` parameter can be set. The tolerance is relative to the size of the mesh cells and is a property of the mesh itself -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_mesh_tolerance 1] @@ -397,7 +397,7 @@ vertex-only mesh. This will modify the tolerance property of the parent mesh. before any point evaluations are performed, rather than making use of these keyword arguments. -.. literalinclude:: ../../tests/vertexonly/test_vertex_only_manual.py +.. literalinclude:: ../../tests/firedrake/vertexonly/test_vertex_only_manual.py :language: python3 :dedent: :start-after: [test_mesh_tolerance_change 1] diff --git a/firedrake/__init__.py b/firedrake/__init__.py index 0fc9aeeed6..3c89e61429 100644 --- a/firedrake/__init__.py +++ b/firedrake/__init__.py @@ -1,22 +1,9 @@ -import firedrake_configuration import os import sys -config = firedrake_configuration.get_config() -if "PETSC_DIR" in os.environ and not config["options"]["honour_petsc_dir"]: - if os.environ["PETSC_DIR"] != os.path.join(sys.prefix, "src", "petsc")\ - or os.environ["PETSC_ARCH"] != "default": - raise ImportError("PETSC_DIR is set, but you did not install with --honour-petsc-dir.\n" - "Please unset PETSC_DIR (and PETSC_ARCH) before using Firedrake.") -elif "PETSC_DIR" not in os.environ and config["options"]["honour_petsc_dir"]: - raise ImportError("Firedrake was installed with --honour-petsc-dir, but PETSC_DIR is not set.\n" - "Please set PETSC_DIR (and PETSC_ARCH) before using Firedrake.") -elif not config["options"]["honour_petsc_dir"]: # Using our own PETSC. - os.environ["PETSC_DIR"] = os.path.join(sys.prefix, "src", "petsc") - os.environ["PETSC_ARCH"] = "default" -del config +from firedrake.configuration import setup_cache_dirs # Set up the cache directories before importing PyOP2. -firedrake_configuration.setup_cache_dirs() +setup_cache_dirs() # Ensure petsc is initialised by us before anything else gets in there. # diff --git a/firedrake/configuration.py b/firedrake/configuration.py new file mode 100644 index 0000000000..e0a0c41d93 --- /dev/null +++ b/firedrake/configuration.py @@ -0,0 +1,12 @@ +"""Replaces functionality from the removed `firedrake_configuration` module.""" + +import os +from pathlib import Path + + +def setup_cache_dirs(): + root = Path(os.environ.get("VIRTUAL_ENV", "~")).joinpath(".cache") + if "PYOP2_CACHE_DIR" not in os.environ: + os.environ["PYOP2_CACHE_DIR"] = str(root.joinpath("pyop2")) + if 'FIREDRAKE_TSFC_KERNEL_CACHE_DIR' not in os.environ: + os.environ["FIREDRAKE_TSFC_KERNEL_CACHE_DIR"] = str(root.joinpath("tsfc")) diff --git a/firedrake/cython/dmcommon.pyx b/firedrake/cython/dmcommon.pyx index c35ff4cbd5..de0c63986e 100644 --- a/firedrake/cython/dmcommon.pyx +++ b/firedrake/cython/dmcommon.pyx @@ -255,9 +255,9 @@ def count_labelled_points(PETSc.DM dm, name, @cython.boundscheck(False) @cython.wraparound(False) def facet_numbering(PETSc.DM plex, kind, - np.ndarray[PetscInt, ndim=1, mode="c"] facets, + np.ndarray facets, PETSc.Section cell_numbering, - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closures): + np.ndarray cell_closures): """Compute the parent cell(s) and the local facet number within each parent cell for each given facet. @@ -271,8 +271,8 @@ def facet_numbering(PETSc.DM plex, kind, PetscInt f, fStart, fEnd, fi, cell PetscInt nfacets, nclosure, ncells, cells_per_facet const PetscInt *cells = NULL - np.ndarray[PetscInt, ndim=2, mode="c"] facet_cells - np.ndarray[PetscInt, ndim=2, mode="c"] facet_local_num + np.ndarray facet_cells + np.ndarray facet_local_num get_height_stratum(plex.dm, 1, &fStart, &fEnd) nfacets = facets.shape[0] @@ -540,7 +540,7 @@ def create_cell_closure(PETSc.DM dm, PetscInt closureSize = _closureSize, closureSize1 PetscInt *closure = NULL PetscInt *fiat_closure = NULL - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closure + np.ndarray cell_closure get_height_stratum(dm.dm, 0, &cStart, &cEnd) if cEnd == cStart: @@ -568,7 +568,7 @@ def create_cell_closure(PETSc.DM dm, def closure_ordering(PETSc.DM dm, PETSc.Section vertex_numbering, PETSc.Section cell_numbering, - np.ndarray[PetscInt, ndim=1, mode="c"] entity_per_cell): + np.ndarray entity_per_cell): """Apply Fenics local numbering to a cell closure. :arg dm: The DM object encapsulating the mesh topology @@ -595,7 +595,7 @@ def closure_ordering(PETSc.DM dm, PetscInt *face_indices = NULL const PetscInt *face_vertices = NULL PetscInt *facet_vertices = NULL - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closure + np.ndarray cell_closure dim = get_topological_dimension(dm) get_height_stratum(dm.dm, 0, &cStart, &cEnd) @@ -749,7 +749,7 @@ def closure_ordering(PETSc.DM dm, def quadrilateral_closure_ordering(PETSc.DM plex, PETSc.Section vertex_numbering, PETSc.Section cell_numbering, - np.ndarray[PetscInt, ndim=1, mode="c"] cell_orientations): + np.ndarray cell_orientations): """Cellwise orders mesh entities according to the given cell orientations. :arg plex: The DMPlex object encapsulating the mesh topology @@ -772,7 +772,7 @@ def quadrilateral_closure_ordering(PETSc.DM plex, PetscInt facets[4] const PetscInt *cell_cone = NULL int reverse - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closure + np.ndarray cell_closure get_height_stratum(plex.dm, 0, &cStart, &cEnd) get_height_stratum(plex.dm, 1, &fStart, &fEnd) @@ -1036,7 +1036,7 @@ cdef inline PetscInt _compute_orientation_interval_tensor_product(PetscInt *fiat cdef inline PetscInt _compute_orientation(PETSc.DM dm, - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closure, + np.ndarray cell_closure, PetscInt cell, PetscInt e, PetscInt *fiat_cone, @@ -1098,7 +1098,7 @@ cdef inline PetscInt _compute_orientation(PETSc.DM dm, @cython.wraparound(False) @cython.cdivision(True) def entity_orientations(mesh, - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closure): + np.ndarray cell_closure): """Compute entity orientations. :arg mesh: The :class:`~.MeshTopology` object encapsulating the mesh topology @@ -1120,7 +1120,7 @@ def entity_orientations(mesh, PetscInt *plex_cone_copy = NULL PetscInt *entity_cone_map = NULL PetscInt *entity_cone_map_offset = NULL - np.ndarray[PetscInt, ndim=2, mode="c"] entity_orientations + np.ndarray entity_orientations if type(mesh) is not firedrake.mesh.MeshTopology: raise TypeError(f"Unexpected mesh type: {type(mesh)}") @@ -1209,9 +1209,9 @@ def create_section(mesh, nodes_per_entity, on_base=False, block_size=1, boundary PetscInt dimension, ndof PetscInt *dof_array = NULL const PetscInt *entity_point_map - np.ndarray[PetscInt, ndim=2, mode="c"] nodes - np.ndarray[PetscInt, ndim=2, mode="c"] layer_extents - np.ndarray[PetscInt, ndim=1, mode="c"] points + np.ndarray nodes + np.ndarray layer_extents + np.ndarray points bint variable, extruded, on_base_ dm = mesh.topology_dm @@ -1343,7 +1343,7 @@ def get_cell_nodes(mesh, PETSc.Section global_numbering, entity_dofs, entity_permutations, - np.ndarray[PetscInt, ndim=1, mode="c"] offset): + np.ndarray offset): """ Builds the DoF mapping. @@ -1367,12 +1367,12 @@ def get_cell_nodes(mesh, PetscInt entity_permutations_size, num_orientations_size, perm_offset int *ceil_ndofs = NULL int *flat_index = NULL - np.ndarray[PetscInt, ndim=1, mode="c"] entity_permutations_c - np.ndarray[PetscInt, ndim=1, mode="c"] num_orientations_c - np.ndarray[PetscInt, ndim=2, mode="c"] cell_nodes - np.ndarray[PetscInt, ndim=2, mode="c"] layer_extents - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closures - np.ndarray[PetscInt, ndim=2, mode="c"] entity_orientations + np.ndarray entity_permutations_c + np.ndarray num_orientations_c + np.ndarray cell_nodes + np.ndarray layer_extents + np.ndarray cell_closures + np.ndarray entity_orientations bint is_swarm, variable, extruded_periodic_1_layer dm = mesh.topology_dm @@ -1457,8 +1457,8 @@ def get_cell_nodes(mesh, @cython.boundscheck(False) @cython.wraparound(False) -def get_facet_nodes(mesh, np.ndarray[PetscInt, ndim=2, mode="c"] cell_nodes, label, - np.ndarray[PetscInt, ndim=1, mode="c"] offset): +def get_facet_nodes(mesh, np.ndarray cell_nodes, label, + np.ndarray offset): """Build to DoF mapping from facets. :arg mesh: The mesh. @@ -1472,8 +1472,8 @@ def get_facet_nodes(mesh, np.ndarray[PetscInt, ndim=2, mode="c"] cell_nodes, lab PETSc.DM dm PETSc.Section cell_numbering DMLabel clabel = NULL - np.ndarray[PetscInt, ndim=2, mode="c"] facet_nodes - np.ndarray[PetscInt, ndim=2, mode="c"] layer_extents + np.ndarray facet_nodes + np.ndarray layer_extents PetscInt f, p, i, j, pStart, pEnd, fStart, fEnd, point PetscInt supportSize, facet, cell, ndof, dof const PetscInt *renumbering @@ -1555,8 +1555,8 @@ def facet_closure_nodes(V, sub_domain): PETSc.Section sec = V.dm.getSection() PETSc.DM dm = V.mesh().topology_dm PetscInt nnodes, p, i, dof, offset, n, j, d - np.ndarray[PetscInt, ndim=1, mode="c"] points - np.ndarray[PetscInt, ndim=1, mode="c"] nodes + np.ndarray points + np.ndarray nodes if sub_domain == "on_boundary": label = "exterior_facets" sub_domain = (1, ) @@ -1672,7 +1672,7 @@ def complete_facet_labels(PETSc.DM dm): @cython.wraparound(False) def cell_facet_labeling(PETSc.DM plex, PETSc.Section cell_numbering, - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closures): + np.ndarray cell_closures): """Computes a labeling for the facet numbers on a particular cell (interior and exterior facet labels with subdomain markers). The i-th local facet is represented as: @@ -1802,7 +1802,7 @@ def transform_vec_from_firedrake_to_petsc(PETSc.DM dm, const PetscScalar *firedrake_array PetscScalar *petsc_array PetscInt n, bs, petsc_n, petsc_bs, pStart, pEnd, firedrake_pStart, firedrake_pEnd, petsc_pStart, petsc_pEnd, p, firedrake_dof, petsc_dof, total_dof = 0, firedrake_offset, petsc_offset, i, j, height - np.ndarray[PetscInt, ndim=1, mode="c"] ndofs, perm, perm_offsets + np.ndarray ndofs, perm, perm_offsets n, _ = firedrake_vec.getSizes() petsc_n, _ = petsc_vec.getSizes() @@ -1912,8 +1912,8 @@ def reordered_coords(PETSc.DM dm, PETSc.Section global_numbering, shape, referen PETSc.Section dm_sec, coord_sec PetscInt v, vStart, vEnd, offset, dm_offset, c, cStart, cEnd PetscInt i, j, dim = shape[1] - np.ndarray[PetscScalar, ndim=2, mode="c"] dm_coords, coords - np.ndarray[PetscInt, ndim=1, mode="c"] ndofs, perm, perm_offsets + np.ndarray dm_coords, coords + np.ndarray ndofs, perm, perm_offsets get_depth_stratum(dm.dm, 0, &vStart, &vEnd) if isinstance(dm, PETSc.DMPlex): @@ -2118,8 +2118,8 @@ def get_entity_classes(PETSc.DM dm): :arg dm: The DM object encapsulating the mesh topology """ cdef: - np.ndarray[PetscInt, ndim=2, mode="c"] entity_class_sizes - np.ndarray[PetscInt, mode="c"] eStart, eEnd + np.ndarray entity_class_sizes + np.ndarray eStart, eEnd PetscInt depth, d, i, ci, class_size, start, end const PetscInt *indices = NULL PETSc.IS class_is @@ -2170,8 +2170,8 @@ def get_cell_markers(PETSc.DM dm, PETSc.Section cell_numbering, """ cdef: PetscInt i, j, n, offset, c, cStart, cEnd, ncells - np.ndarray[PetscInt, ndim=1, mode="c"] cells - np.ndarray[PetscInt, ndim=1, mode="c"] indices + np.ndarray cells + np.ndarray indices if not dm.hasLabel(CELL_SETS_LABEL): return np.empty(0, dtype=IntType) @@ -2225,7 +2225,7 @@ def get_facet_ordering(PETSc.DM plex, PETSc.Section facet_numbering): """ cdef: PetscInt fi, fStart, fEnd, offset - np.ndarray[PetscInt, ndim=1, mode="c"] facets + np.ndarray facets size = facet_numbering.getStorageSize() facets = np.empty(size, dtype=IntType) @@ -2239,7 +2239,7 @@ def get_facet_ordering(PETSc.DM plex, PETSc.Section facet_numbering): @cython.boundscheck(False) @cython.wraparound(False) def get_facets_by_class(PETSc.DM plex, label, - np.ndarray[PetscInt, ndim=1, mode="c"] ordering): + np.ndarray ordering): """Builds a list of all facets ordered according to PyOP2 entity classes and computes the respective class offsets. @@ -2254,7 +2254,7 @@ def get_facets_by_class(PETSc.DM plex, label, PETSc.IS class_is = None PetscBool has_point, is_class DMLabel lbl_facets, lbl_class - np.ndarray[PetscInt, ndim=1, mode="c"] facets + np.ndarray facets dim = get_topological_dimension(plex) get_height_stratum(plex.dm, 1, &fStart, &fEnd) @@ -2330,7 +2330,7 @@ def validate_mesh(PETSc.DM dm): @cython.wraparound(False) def plex_renumbering(PETSc.DM plex, np.ndarray entity_classes, - np.ndarray[PetscInt, ndim=1, mode="c"] reordering=None, + np.ndarray reordering=None, boundary_set=None): """ Build a global node renumbering as a permutation of Plex points. @@ -2358,7 +2358,7 @@ def plex_renumbering(PETSc.DM plex, cdef: PetscInt dim, cStart, cEnd, nfacets, nclosure, c, ci, l, p, f PetscInt pStart, pEnd, cell - np.ndarray[PetscInt, ndim=1, mode="c"] lidx, ncells + np.ndarray lidx, ncells PetscInt *facets = NULL PetscInt *closure = NULL PetscInt *perm = NULL @@ -2476,7 +2476,7 @@ def get_cell_remote_ranks(PETSc.DM plex): PetscInt nroots, nleaves const PetscInt *ilocal = NULL const PetscSFNode *iremote = NULL - np.ndarray[PetscInt, ndim=1, mode="c"] result + np.ndarray result get_height_stratum(plex.dm, 0, &cStart, &cEnd) ncells = cEnd - cStart @@ -2581,7 +2581,7 @@ cdef int CommFacet_cmp(const void *x_, const void *y_) noexcept nogil: @cython.wraparound(False) cdef inline void get_communication_lists( PETSc.DM plex, PETSc.Section vertex_numbering, - np.ndarray[PetscInt, ndim=1, mode="c"] cell_ranks, + np.ndarray cell_ranks, # Output parameters: PetscInt *nranks, PetscInt **ranks, PetscInt **offsets, PetscInt **facets, PetscInt **facet2index): @@ -2892,7 +2892,7 @@ cdef locally_orient_quadrilateral_plex(PETSc.DM plex, PetscInt start_facet, end_facet np.int8_t twist PetscInt i, j - np.ndarray[PetscInt, ndim=1, mode="c"] result + np.ndarray result get_height_stratum(plex.dm, 1, &fStart, &fEnd) nfacets = fEnd - fStart @@ -2973,8 +2973,8 @@ cdef locally_orient_quadrilateral_plex(PETSc.DM plex, @cython.wraparound(False) cdef inline void exchange_edge_orientation_data( PetscInt nranks, PetscInt *ranks, PetscInt *offsets, - np.ndarray[PetscInt, ndim=1, mode="c"] ours, - np.ndarray[PetscInt, ndim=1, mode="c"] theirs, + np.ndarray ours, + np.ndarray theirs, MPI.Comm comm): """Exchange edge orientation data between neighbouring MPI nodes. @@ -3008,7 +3008,7 @@ cdef inline void exchange_edge_orientation_data( @cython.wraparound(False) def quadrilateral_facet_orientations( PETSc.DM plex, PETSc.Section vertex_numbering, - np.ndarray[PetscInt, ndim=1, mode="c"] cell_ranks): + np.ndarray cell_ranks): """Returns globally synchronised facet orientations (edge directions) incident to locally owned quadrilateral cells. @@ -3028,8 +3028,8 @@ def quadrilateral_facet_orientations( MPI.Comm comm = plex.comm.tompi4py() PetscInt nfacets, nfacets_shared, fStart, fEnd - np.ndarray[PetscInt, ndim=1, mode="c"] affects - np.ndarray[PetscInt, ndim=1, mode="c"] ours, theirs + np.ndarray affects + np.ndarray ours, theirs PetscInt conflict, value, f, i, j PetscInt ci, size @@ -3153,7 +3153,7 @@ def quadrilateral_facet_orientations( @cython.wraparound(False) def orientations_facet2cell( PETSc.DM plex, PETSc.Section vertex_numbering, - np.ndarray[PetscInt, ndim=1, mode="c"] cell_ranks, + np.ndarray cell_ranks, np.ndarray[np.int8_t, ndim=1, mode="c"] facet_orientations, PETSc.Section cell_numbering): @@ -3174,7 +3174,7 @@ def orientations_facet2cell( np.int8_t dst_orient[4] int i, off PetscInt facet, v, V - np.ndarray[PetscInt, ndim=1, mode="c"] cell_orientations + np.ndarray cell_orientations get_height_stratum(plex.dm, 0, &cStart, &cEnd) get_height_stratum(plex.dm, 1, &fStart, &fEnd) @@ -3251,7 +3251,7 @@ def orientations_facet2cell( @cython.wraparound(False) def exchange_cell_orientations( PETSc.DM plex, PETSc.Section section, - np.ndarray[PetscInt, ndim=1, mode="c"] orientations): + np.ndarray orientations): """Halo exchange of cell orientations. @@ -3311,7 +3311,7 @@ def make_global_numbering(PETSc.Section lsec, PETSc.Section gsec): :arg gsec: Section describing global dof layout and numbers.""" cdef: PetscInt c, p, pStart, pEnd, dof, cdof, loff, goff - np.ndarray[PetscInt, ndim=1, mode="c"] val + np.ndarray val val = np.empty(lsec.getStorageSize(), dtype=IntType) pStart, pEnd = lsec.getChart() @@ -3465,8 +3465,8 @@ def compute_point_cone_global_sizes(PETSc.DM dm): const PetscInt *ilocal = NULL const PetscSFNode *iremote = NULL PetscInt i, p, pStart, pEnd, coneSize - np.ndarray[PetscInt, ndim=1, mode="c"] arraySizes - np.ndarray[PetscInt, ndim=1, mode="c"] out + np.ndarray arraySizes + np.ndarray out sf = dm.getPointSF() CHKERR(PetscSFGetGraph(sf.sf, NULL, &nleaves, &ilocal, NULL)) @@ -3492,7 +3492,7 @@ def compute_point_cone_global_sizes(PETSc.DM dm): def mark_points_with_function_array(PETSc.DM plex, PETSc.Section section, PetscInt height, - np.ndarray[PetscInt, ndim=1, mode="c"] array, + np.ndarray array, PETSc.DMLabel dmlabel, PetscInt label_value): @@ -3529,7 +3529,7 @@ def to_petsc_local_numbering(PETSc.Vec vec, V): cdef PetscInt dof, off cdef PETSc.Vec out cdef PETSc.Section section - cdef np.ndarray[PetscReal, mode="c", ndim=1] varray, oarray + cdef np.ndarray varray, oarray section = V.dm.getGlobalSection() out = vec.duplicate() @@ -3581,8 +3581,8 @@ def create_halo_exchange_sf(PETSc.DM dm): const PetscSFNode *iremote = NULL PETSc.Section local_sec PetscInt pStart, pEnd, p, dof, off, m, n, i, j - np.ndarray[PetscInt, ndim=1, mode="c"] local_offsets - np.ndarray[PetscInt, ndim=1, mode="c"] remote_offsets + np.ndarray local_offsets + np.ndarray remote_offsets point_sf = dm.getPointSF() local_sec = dm.getLocalSection() @@ -3682,8 +3682,8 @@ def submesh_correct_entity_classes(PETSc.DM dm, const PetscSFNode *iremote = NULL PETSc.IS subpoint_is const PetscInt *subpoint_indices = NULL - np.ndarray[PetscInt, ndim=1, mode="c"] ownership_loss - np.ndarray[PetscInt, ndim=1, mode="c"] ownership_gain + np.ndarray ownership_loss + np.ndarray ownership_gain DMLabel lbl_core, lbl_owned, lbl_ghost PetscBool has @@ -3812,7 +3812,7 @@ def submesh_create_cell_closure_cell_submesh(PETSc.DM subdm, PETSc.DM dm, PETSc.Section subcell_numbering, PETSc.Section cell_numbering, - np.ndarray[PetscInt, ndim=2, mode="c"] cell_closure): + np.ndarray cell_closure): """Inherit cell_closure from parent. Parameters @@ -3836,7 +3836,7 @@ def submesh_create_cell_closure_cell_submesh(PETSc.DM subdm, PetscInt subpStart, subpEnd, subp, subcStart, subcEnd, subc, subcell PetscInt pStart, pEnd, p, cStart, cEnd, c, cell PetscInt nclosure, cl - np.ndarray[PetscInt, ndim=2, mode="c"] subcell_closure + np.ndarray subcell_closure get_chart(subdm.dm, &subpStart, &subpEnd) get_height_stratum(subdm.dm, 0, &subcStart, &subcEnd) diff --git a/firedrake/cython/mgimpl.pyx b/firedrake/cython/mgimpl.pyx index 2867b5e79f..b9b41bd32f 100644 --- a/firedrake/cython/mgimpl.pyx +++ b/firedrake/cython/mgimpl.pyx @@ -29,8 +29,8 @@ def get_entity_renumbering(PETSc.DM plex, PETSc.Section section, entity_type): """ cdef: PetscInt start, end, p, ndof, entity - np.ndarray[PetscInt, ndim=1] old_to_new - np.ndarray[PetscInt, ndim=1] new_to_old + np.ndarray old_to_new + np.ndarray new_to_old if entity_type == "cell": start, end = plex.getHeightStratum(0) @@ -55,10 +55,10 @@ def get_entity_renumbering(PETSc.DM plex, PETSc.Section section, entity_type): @cython.boundscheck(False) @cython.wraparound(False) -def coarse_to_fine_nodes(Vc, Vf, np.ndarray[PetscInt, ndim=2, mode="c"] coarse_to_fine_cells): +def coarse_to_fine_nodes(Vc, Vf, np.ndarray coarse_to_fine_cells): cdef: - np.ndarray[PetscInt, ndim=2, mode="c"] fine_map, coarse_map, coarse_to_fine_map - np.ndarray[PetscInt, ndim=1, mode="c"] coarse_offset, fine_offset + np.ndarray fine_map, coarse_map, coarse_to_fine_map + np.ndarray coarse_offset, fine_offset PetscInt i, j, k, l, m, node, fine, layer PetscInt coarse_per_cell, fine_per_cell, fine_cell_per_coarse_cell, coarse_cells PetscInt fine_layer, fine_layers, coarse_layer, coarse_layers, ratio @@ -116,10 +116,10 @@ def coarse_to_fine_nodes(Vc, Vf, np.ndarray[PetscInt, ndim=2, mode="c"] coarse_t @cython.boundscheck(False) @cython.wraparound(False) -def fine_to_coarse_nodes(Vf, Vc, np.ndarray[PetscInt, ndim=2, mode="c"] fine_to_coarse_cells): +def fine_to_coarse_nodes(Vf, Vc, np.ndarray fine_to_coarse_cells): cdef: - np.ndarray[PetscInt, ndim=2, mode="c"] fine_map, coarse_map, fine_to_coarse_map - np.ndarray[PetscInt, ndim=1, mode="c"] coarse_offset, fine_offset + np.ndarray fine_map, coarse_map, fine_to_coarse_map + np.ndarray coarse_offset, fine_offset PetscInt i, j, k, node, fine_layer, fine_layers, coarse_layer, coarse_layers, ratio PetscInt coarse_per_cell, fine_per_cell, coarse_cell, fine_cells bint extruded @@ -247,9 +247,9 @@ def coarse_to_fine_cells(mc, mf, clgmaps, flgmaps): PETSc.DM cdm, fdm PetscInt cStart, cEnd, c, val, dim, nref, ncoarse PetscInt i, ccell, fcell, nfine - np.ndarray[PetscInt, ndim=2, mode="c"] coarse_to_fine - np.ndarray[PetscInt, ndim=2, mode="c"] fine_to_coarse - np.ndarray[PetscInt, ndim=1, mode="c"] co2n, fn2o, idx + np.ndarray coarse_to_fine + np.ndarray fine_to_coarse + np.ndarray co2n, fn2o, idx cdm = mc.topology_dm fdm = mf.topology_dm diff --git a/firedrake/cython/petschdr.pxi b/firedrake/cython/petschdr.pxi index 41c8974f83..55786e7184 100644 --- a/firedrake/cython/petschdr.pxi +++ b/firedrake/cython/petschdr.pxi @@ -5,14 +5,10 @@ cimport numpy as np cdef extern from "mpi-compat.h": pass -IF COMPLEX: - ctypedef np.complex128_t PetscScalar -ELSE: - ctypedef double PetscScalar - cdef extern from "petsc.h": ctypedef long PetscInt ctypedef double PetscReal + ctypedef double PetscScalar ctypedef enum PetscBool: PETSC_TRUE, PETSC_FALSE ctypedef enum PetscCopyMode: diff --git a/firedrake/cython/supermeshimpl.pyx b/firedrake/cython/supermeshimpl.pyx index 6ca3d557ef..ad3387cd82 100644 --- a/firedrake/cython/supermeshimpl.pyx +++ b/firedrake/cython/supermeshimpl.pyx @@ -39,23 +39,23 @@ cdef extern from "libsupermesh-c.h" nogil: # compute out = R_BS^T @ M_SS @ R_AS with dense matrix triple product # stuff out into relevant part of M_AB (given by outer(dofs_B, dofs_A)) def assemble_mixed_mass_matrix(V_A, V_B, candidates, - numpy.ndarray[PetscScalar, ndim=2, mode="c"] node_locations_A, - numpy.ndarray[PetscScalar, ndim=2, mode="c"] node_locations_B, - numpy.ndarray[PetscScalar, ndim=2, mode="c"] M_SS, + numpy.ndarray node_locations_A, + numpy.ndarray node_locations_B, + numpy.ndarray M_SS, lib, PETSc.Mat mat not None): cdef: numpy.ndarray[PetscInt, ndim=2, mode="c"] V_A_cell_node_map numpy.ndarray[PetscInt, ndim=2, mode="c"] V_B_cell_node_map numpy.ndarray[PetscInt, ndim=2, mode="c"] vertex_map_A, vertex_map_B - numpy.ndarray[PetscScalar, ndim=2, mode="c"] vertices_A, vertices_B - numpy.ndarray[PetscScalar, ndim=2, mode="c"] outmat + numpy.ndarray vertices_A, vertices_B + numpy.ndarray outmat PetscInt cell_A, cell_B, i, gdim, num_dof_A, num_dof_B PetscInt num_cell_B, num_cell_A, num_vertices PetscInt insert_mode = PETSc.InsertMode.ADD_VALUES const PetscInt *V_A_map const PetscInt *V_B_map - numpy.ndarray[PetscScalar, ndim=2, mode="c"] simplex_A, simplex_B - numpy.ndarray[PetscScalar, ndim=3, mode="c"] simplices_C + numpy.ndarray simplex_A, simplex_B + numpy.ndarray simplices_C compiled_call library_call = (lib)[0] num_cell_A = V_A.mesh().cell_set.size @@ -112,10 +112,10 @@ def intersection_finder(mesh_A, mesh_B): # Return the output cdef: - numpy.ndarray[long, ndim=2, mode="c"] vertex_map_A, vertex_map_B - numpy.ndarray[double, ndim=2, mode="c"] vertices_A, vertices_B + numpy.ndarray vertex_map_A, vertex_map_B + numpy.ndarray vertices_A, vertices_B long nindices - numpy.ndarray[long, ndim=1, mode="c"] indices, indptr + numpy.ndarray indices, indptr long nnodes_A, nnodes_B, ncells_A, ncells_B int dim_A, dim_B, loc_A, loc_B diff --git a/firedrake/scripts/__init__.py b/firedrake/scripts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/firedrake-zenodo b/firedrake/scripts/firedrake-zenodo similarity index 99% rename from scripts/firedrake-zenodo rename to firedrake/scripts/firedrake-zenodo index 68f1d44332..6194d9df1d 100755 --- a/scripts/firedrake-zenodo +++ b/firedrake/scripts/firedrake-zenodo @@ -20,7 +20,6 @@ ZENODO_URL = "https://zenodo.org/api" # And the same for slepc4py. descriptions = OrderedDict([ ("firedrake", "an automated finite element system"), - ("PyOP2", "Framework for performance-portable parallel computations on unstructured meshes"), ("tsfc", "The Two Stage Form Compiler"), ("ufl", "The Unified Form Language"), ("FInAT", "a smarter library of finite elements"), @@ -33,7 +32,6 @@ descriptions = OrderedDict([ projects = dict( [("firedrake", "firedrakeproject"), - ("PyOP2", "OP2"), ("tsfc", "firedrakeproject"), ("ufl", "firedrakeproject"), ("FInAT", "FInAT"), diff --git a/firedrake/scripts/firedrake_clean.py b/firedrake/scripts/firedrake_clean.py new file mode 100755 index 0000000000..f411d498c3 --- /dev/null +++ b/firedrake/scripts/firedrake_clean.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +import os +import shutil +from firedrake.configuration import setup_cache_dirs +from pyop2.compilation import clear_compiler_disk_cache as pyop2_clear_cache +from firedrake.tsfc_interface import clear_cache as tsfc_clear_cache +try: + import platformdirs as appdirs +except ImportError: + import appdirs + + +def main(): + print("Setup cache directories") + setup_cache_dirs() + + print(f"Removing cached TSFC kernels from {os.environ.get('FIREDRAKE_TSFC_KERNEL_CACHE_DIR', '???')}") + tsfc_clear_cache() + + print(f"Removing cached PyOP2 code from {os.environ.get('PYOP2_CACHE_DIR', '???')}") + pyop2_clear_cache() + + pytools_cache = appdirs.user_cache_dir("pytools", "pytools") + print(f"Removing cached pytools files from {pytools_cache}") + if os.path.exists(pytools_cache): + shutil.rmtree(pytools_cache, ignore_errors=True) + + +if __name__ == '__main__': + main() diff --git a/firedrake/scripts/firedrake_preprocess_bibtex.py b/firedrake/scripts/firedrake_preprocess_bibtex.py new file mode 100755 index 0000000000..4fd3484feb --- /dev/null +++ b/firedrake/scripts/firedrake_preprocess_bibtex.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import io +from argparse import ArgumentParser, RawDescriptionHelpFormatter +try: + from bibtexparser.bwriter import BibTexWriter + import bibtexparser +except ImportError: + raise ImportError("Failed to import bibtexparser. Run:\n firedrake-update --documentation-dependencies") + + +def main(): + parser = ArgumentParser(description="""Ensure BibTeX entries for inclusion in the Firedrake website have a +URL or DOI, and impose clean formatting.""", + formatter_class=RawDescriptionHelpFormatter) + parser.add_argument("bibtex_file", help="The BibTeX file to process") + parser.add_argument("--validate", action='store_true', + help="Instead of rewriting the bibtex file, raise an exception if anything would have changed.") + + args = parser.parse_args() + + filename = args.bibtex_file + + parser = bibtexparser.bparser.BibTexParser() + parser.common_strings = True + parser.ignore_nonstandard_types = False + + with open(filename) as bibtex_file: + bib_database = parser.parse_file(bibtex_file) + + for entry in bib_database.entries: + if "url" not in entry and \ + "doi" not in entry: + if entry.get("archiveprefix", None) == "arXiv": + entry["url"] = "https://arxiv.org/abs/" + entry["eprint"] + else: + raise ValueError("%s in bibliograpy %s\n has no url and no DOI.\n" % (entry["ID"], filename)) + + writer = BibTexWriter() + writer.indent = ' ' # indent entries with 2 spaces instead of one + writer.align_values = True + + if args.validate: + with io.StringIO() as outbuffer: + outbuffer.write(writer.write(bib_database)) + processed = outbuffer.getvalue() + with open(filename) as bibtex_file: + inbuffer = bibtex_file.read() + if processed != inbuffer: + raise ValueError("%s would be changed by firedrake-preprocess-bibtex. Please preprocess it and commit the result" % filename) + + else: + with open(filename, 'w') as bibfile: + bibfile.write(writer.write(bib_database)) + + +if __name__ == "__main__": + main() diff --git a/firedrake/scripts/firedrake_status.py b/firedrake/scripts/firedrake_status.py new file mode 100755 index 0000000000..a256d70b69 --- /dev/null +++ b/firedrake/scripts/firedrake_status.py @@ -0,0 +1,144 @@ +#! /usr/bin/env python3 +from six import iteritems + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +from pprint import pformat +import logging +import json +import sys +import os +import subprocess +from collections import OrderedDict, defaultdict + + +def check_output(args, env=None): + return subprocess.check_output(args, stderr=subprocess.STDOUT, env=env) + + +def quit(log, message): + log.error(message) + sys.exit(1) + + +def main(): + parser = ArgumentParser(description="""Provide information on the currently downloaded version of Firedrake and its configuration. + This is particularly useful information to include when reporting bugs.""", + formatter_class=RawDescriptionHelpFormatter) + parser.add_argument("--log", action='store_true', + help="Log the output of the script to firedrake-status.log as well as to the console.") + + args = parser.parse_args() + + # Set up logging + if args.log: + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(levelname)-6s %(message)s', + filename='firedrake-status.log', + filemode='w') + console = logging.StreamHandler() + console.setLevel(logging.WARNING) + formatter = logging.Formatter('%(message)s') + console.setFormatter(formatter) + logging.getLogger().addHandler(console) + else: + logging.basicConfig(level=logging.WARNING, + format='%(message)s') + log = logging.getLogger() + + try: + firedrake_env = os.environ["VIRTUAL_ENV"] + except KeyError: + quit(log, "Unable to retrieve virtualenv name from the environment.\n Please ensure the virtualenv is active before running firedrake-update.") + + try: + with open(os.path.join(os.environ["VIRTUAL_ENV"], + ".configuration.json"), "r") as f: + config = json.load(f) + except FileNotFoundError: + config = defaultdict(dict) + + try: + config["system"] = check_output(["uname", "-a"]) + except subprocess.CalledProcessError: + log.error("Failed to retrieve system information.") + + print("Firedrake Configuration:") + if not config: + print("No configuration information found.") + else: + for key, val in iteritems(config["options"]): + print(" {}: {}".format(key, val)) + + print("Additions:") + if config["additions"]: + for a in config["additions"]: + print(" " + a) + else: + print(" None") + + for var in ["PYTHONPATH", "PETSC_ARCH", "PETSC_DIR"]: + config["environment"][var] = os.environ.get(var, None) + + print("Environment:") + for key, val in iteritems(config["environment"]): + print(" {}: {}".format(key, val)) + + status = OrderedDict() + for dir in sorted(os.listdir(firedrake_env + "/src")): + try: + os.chdir(firedrake_env + "/src/" + dir) + except OSError as e: + if e.errno == 20: + # Not a directory + continue + else: + raise + try: + revision = check_output(["git", "rev-parse", "--short", "HEAD"]).decode('ascii').strip() + branch = check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]).decode('ascii').strip() + except subprocess.CalledProcessError: + log.error("Unable to retrieve git information from %s." % dir) + else: + try: + _ = check_output(["git", "diff-index", "--quiet", "HEAD"]) + modified = False + except subprocess.CalledProcessError: + modified = True + + status[dir] = {"branch": branch, + "revision": revision, + "modified": modified} + + status_string = "" + status_string += "Status of components:\n" + componentformat = "|{:20}|{:30}|{:10}|{!s:10}|\n" + header = componentformat.format("Package", "Branch", "Revision", "Modified") + line = "-" * (len(header) - 1) + "\n" + status_string += line + header + line + for dir, d in iteritems(status): + status_string += componentformat.format(dir, d["branch"], d["revision"], d["modified"]) + status_string += line + + print(status_string) + log.info("\n" + status_string) + + log.info("Firedrake configuration: ") + log.info(pformat(config)) + log.debug("\nPip packages installed:") + try: + log.debug(check_output(["pip", "freeze"])) + except subprocess.CalledProcessError: + log.error("""Failed to retrieve list of pip installed packages. Try running: + + pip freeze. + + """) + log.debug("\n Full environment:") + try: + log.debug(check_output(["env"])) + except subprocess.CalledProcessError: + log.error("""Shell command env failed.""") + + +if __name__ == "__main__": + main() diff --git a/firedrake/utils.py b/firedrake/utils.py index 2dd768fb28..e8bda53b95 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -7,7 +7,8 @@ from pyop2.datatypes import IntType # noqa: F401 from pyop2.datatypes import as_ctypes # noqa: F401 from pyop2.mpi import MPI -from firedrake_configuration import get_config +from firedrake.petsc import get_petsc_variables + # MPI key value for storing a per communicator universal identifier FIREDRAKE_UID = MPI.Comm.Create_keyval() @@ -16,7 +17,7 @@ ScalarType_c = as_cstr(ScalarType) IntType_c = as_cstr(IntType) -complex_mode = get_config()["options"].get("complex", False) +complex_mode = (get_petsc_variables()["PETSC_SCALAR"].lower() == "complex") # Remove this (and update test suite) when Slate supports complex mode. SLATE_SUPPORTS_COMPLEX = False diff --git a/firedrake_configuration/__init__.py b/firedrake_configuration/__init__.py deleted file mode 100644 index b7970ab0b7..0000000000 --- a/firedrake_configuration/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -"""The :mod:`firedrake_configuration` module records the configuration -with which Firedrake was last installed or updated. It is a separate -package from Firedrake in order to ensure that `firedrake-update` can -always access the configuration, even if the :mod:`.firedrake` module -itself is broken.""" - -import json -import os -import sys -import petsc4py - -# Attempt to read configuration from file. -try: - with open(os.path.join(sys.prefix, - ".configuration.json"), "r") as f: - _config = json.load(f) - -except IOError: - # Fall back to old location. - try: - with open(os.path.join(os.path.dirname(__file__), - "configuration.json"), "r") as f: - _config = json.load(f) - - except IOError: - _config = {} - - -def petsc_packages(): - conf = petsc4py.get_config() - with open(os.path.join(conf["PETSC_DIR"], conf["PETSC_ARCH"], "include", "petscconf.h"), "r") as f: - *_, packages = next(line for line in f if line.startswith("#define PETSC_HAVE_PACKAGES")).split() - return set(packages[2:-2].split(":")) - - -options = _config.get("options", {}) -options["with_parmetis"] = "parmetis" in petsc_packages() -_config["options"] = options - - -def get_config(): - """Return the current configuration dictionary""" - return _config - - -def get_config_json(): - """Return a json serialisation of the current configuration. This - could be output by a Firedrake application to assist in the - reproduction of results.""" - return json.dumps(_config) - - -def setup_cache_dirs(): - config = get_config() - if "PYOP2_CACHE_DIR" not in os.environ: - os.environ["PYOP2_CACHE_DIR"] = os.path.join(config["options"]["cache_dir"], "pyop2") - if 'FIREDRAKE_TSFC_KERNEL_CACHE_DIR' not in os.environ: - os.environ["FIREDRAKE_TSFC_KERNEL_CACHE_DIR"] = os.path.join(config["options"]["cache_dir"], "tsfc") diff --git a/pyop2/__init__.py b/pyop2/__init__.py new file mode 100644 index 0000000000..e9aeadf54a --- /dev/null +++ b/pyop2/__init__.py @@ -0,0 +1,12 @@ +""" +PyOP2 is a library for parallel computations on unstructured meshes. +""" +from pyop2.op2 import * # noqa +from pyop2.version import __version_info__ # noqa: just expose + +from pyop2._version import get_versions +__version__ = get_versions()['version'] +del get_versions + +from . import _version +__version__ = _version.get_versions()['version'] diff --git a/pyop2/_version.py b/pyop2/_version.py new file mode 100644 index 0000000000..d9db778c32 --- /dev/null +++ b/pyop2/_version.py @@ -0,0 +1,658 @@ + +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.28 +# https://github.com/python-versioneer/python-versioneer + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys +from typing import Callable, Dict +import functools + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "pyop2-" + cfg.versionfile_source = "pyop2/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, + env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) + break + except OSError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces): + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver): + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces): + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} diff --git a/pyop2/caching.py b/pyop2/caching.py new file mode 100644 index 0000000000..2948ddede7 --- /dev/null +++ b/pyop2/caching.py @@ -0,0 +1,580 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Provides common base classes for cached objects.""" +import atexit +import cachetools +import hashlib +import os +import pickle +import weakref +from collections.abc import MutableMapping +from pathlib import Path +from warnings import warn # noqa F401 +from collections import defaultdict +from itertools import count +from functools import wraps +from tempfile import mkstemp + +from pyop2.configuration import configuration +from pyop2.exceptions import CachingError, HashError # noqa: F401 +from pyop2.logger import debug +from pyop2.mpi import ( + MPI, COMM_WORLD, comm_cache_keyval, temp_internal_comm +) +from petsc4py import PETSc + + +# Caches created here are registered as a tuple of +# (creation_index, comm, comm.name, function, cache) +# in _KNOWN_CACHES +_CACHE_CIDX = count() +_KNOWN_CACHES = [] +# Flag for outputting information at the end of testing (do not abuse!) +_running_on_ci = bool(os.environ.get('PYOP2_CI_TESTS')) + + +# FIXME: (Later) Remove ObjectCached +class ObjectCached(object): + """Base class for objects that should be cached on another object. + + Derived classes need to implement classmethods + :meth:`_process_args` and :meth:`_cache_key` (which see for more + details). The object on which the cache is stored should contain + a dict in its ``_cache`` attribute. + + .. warning:: + + The derived class' :meth:`__init__` is still called if the + object is retrieved from cache. If that is not desired, + derived classes can set a flag indicating whether the + constructor has already been called and immediately return + from :meth:`__init__` if the flag is set. Otherwise the object + will be re-initialized even if it was returned from cache! + + """ + + @classmethod + def _process_args(cls, *args, **kwargs): + """Process the arguments to ``__init__`` into a form suitable + for computing a cache key on. + + The first returned argument is popped off the argument list + passed to ``__init__`` and is used as the object on which to + cache this instance. As such, *args* should be returned as a + two-tuple of ``(cache_object, ) + (original_args, )``. + + *kwargs* must be a (possibly empty) dict. + """ + raise NotImplementedError("Subclass must implement _process_args") + + @classmethod + def _cache_key(cls, *args, **kwargs): + """Compute a cache key from the constructor's preprocessed arguments. + If ``None`` is returned, the object is not to be cached. + + .. note:: + + The return type **must** be hashable. + + """ + raise NotImplementedError("Subclass must implement _cache_key") + + def __new__(cls, *args, **kwargs): + args, kwargs = cls._process_args(*args, **kwargs) + # First argument is the object we're going to cache on + cache_obj = args[0] + # These are now the arguments to the subclass constructor + args = args[1:] + key = cls._cache_key(*args, **kwargs) + + def make_obj(): + obj = super(ObjectCached, cls).__new__(cls) + obj._initialized = False + # obj.__init__ will be called twice when constructing + # something not in the cache. The first time here, with + # the canonicalised args, the second time directly in the + # subclass. But that one should hit the cache and return + # straight away. + obj.__init__(*args, **kwargs) + return obj + + # Don't bother looking in caches if we're not meant to cache + # this object. + if key is None or cache_obj is None: + return make_obj() + + # Does the caching object know about the caches? + try: + cache = cache_obj._cache + except AttributeError: + raise RuntimeError("Provided caching object does not have a '_cache' attribute.") + + # OK, we have a cache, let's go ahead and try and find our + # object in it. + try: + return cache[key] + except KeyError: + obj = make_obj() + cache[key] = obj + return obj + + +def cache_filter(comm=None, comm_name=None, alive=True, function=None, cache_type=None): + """ Filter PyOP2 caches based on communicator, function or cache type. + """ + caches = _KNOWN_CACHES + if comm is not None: + with temp_internal_comm(comm) as icomm: + cache_collection = icomm.Get_attr(comm_cache_keyval) + if cache_collection is None: + print(f"Communicator {icomm.name} has no associated caches") + comm_name = icomm.name + if comm_name is not None: + caches = filter(lambda c: c.comm_name == comm_name, caches) + if alive: + caches = filter(lambda c: c.comm != MPI.COMM_NULL, caches) + if function is not None: + if isinstance(function, str): + caches = filter(lambda c: function in c.func_name, caches) + else: + caches = filter(lambda c: c.func is function, caches) + if cache_type is not None: + if isinstance(cache_type, str): + caches = filter(lambda c: cache_type in c.cache_name, caches) + else: + caches = filter(lambda c: c.cache_name == cache_type.__class__.__qualname__, caches) + return [*caches] + + +class _CacheRecord: + """ Object for keeping a record of Pyop2 Cache statistics. + """ + def __init__(self, cidx, comm, func, cache): + self.cidx = cidx + self.comm = comm + self.comm_name = comm.name + self.func = func + self.func_module = func.__module__ + self.func_name = func.__qualname__ + self.cache = weakref.ref(cache) + fin = weakref.finalize(cache, self.finalize, cache) + fin.atexit = False + self.cache_name = cache.__class__.__qualname__ + try: + self.cache_loc = cache.cachedir + except AttributeError: + self.cache_loc = "Memory" + + def get_stats(self, cache=None): + if cache is None: + cache = self.cache() + hit = miss = size = maxsize = -1 + if cache is None: + hit, miss, size, maxsize = self.hit, self.miss, self.size, self.maxsize + if isinstance(cache, cachetools.Cache): + size = cache.currsize + maxsize = cache.maxsize + if hasattr(cache, "instrument__"): + hit = cache.hit + miss = cache.miss + if size == -1: + try: + size = len(cache) + except NotImplementedError: + pass + if maxsize is None: + try: + maxsize = cache.max_size + except AttributeError: + pass + return hit, miss, size, maxsize + + def finalize(self, cache): + self.hit, self.miss, self.size, self.maxsize = self.get_stats(cache) + + +def print_cache_stats(*args, **kwargs): + """ Print out the cache hit/miss/size/maxsize stats for PyOP2 caches. + """ + data = defaultdict(lambda: defaultdict(list)) + for entry in cache_filter(*args, **kwargs): + active = (entry.comm != MPI.COMM_NULL) + data[(entry.comm_name, active)][(entry.cache_name, entry.cache_loc)].append( + (entry.cidx, entry.func_module, entry.func_name, entry.get_stats()) + ) + + tab = " " + hline = "-"*120 + col = (90, 27) + stats_col = (6, 6, 6, 6) + stats = ("hit", "miss", "size", "max") + no_stats = "|".join(" "*ii for ii in stats_col) + print(hline) + print(f"|{'Cache':^{col[0]}}|{'Stats':^{col[1]}}|") + subtitles = "|".join(f"{st:^{w}}" for st, w in zip(stats, stats_col)) + print("|" + " "*col[0] + f"|{subtitles:{col[1]}}|") + print(hline) + for ecomm, cachedict in data.items(): + active = "Active" if ecomm[1] else "Freed" + comm_title = f"{ecomm[0]} ({active})" + print(f"|{comm_title:{col[0]}}|{no_stats}|") + for ecache, function_list in cachedict.items(): + cache_title = f"{tab}{ecache[0]}" + print(f"|{cache_title:{col[0]}}|{no_stats}|") + cache_location = f"{tab} ↳ {ecache[1]!s}" + if len(cache_location) < col[0]: + print(f"|{cache_location:{col[0]}}|{no_stats}|") + else: + print(f"|{cache_location:78}|") + for entry in function_list: + function_title = f"{tab*2}id={entry[0]} {'.'.join(entry[1:3])}" + stats_row = "|".join(f"{s:{w}}" for s, w in zip(entry[3], stats_col)) + print(f"|{function_title:{col[0]}}|{stats_row:{col[1]}}|") + print(hline) + + +if _running_on_ci: + print_cache_stats = atexit.register(print_cache_stats) + + +class _CacheMiss: + pass + + +CACHE_MISS = _CacheMiss() + + +def _as_hexdigest(*args): + hash_ = hashlib.md5() + for a in args: + if isinstance(a, MPI.Comm): + raise HashError("Communicators cannot be hashed, caching will be broken!") + hash_.update(str(a).encode()) + return hash_.hexdigest() + + +class DictLikeDiskAccess(MutableMapping): + """ A Dictionary like interface for storing and retrieving objects from a disk cache. + """ + def __init__(self, cachedir, extension=".pickle"): + """ + + :arg cachedir: The cache directory. + :arg extension: Optional extension to use for written files. + """ + self.cachedir = cachedir + self.extension = extension + + def __getitem__(self, key): + """Retrieve a value from the disk cache. + + :arg key: The cache key, a 2-tuple of strings. + :returns: The cached object if found. + """ + filepath = Path(self.cachedir, key[0][:2], key[0][2:] + key[1]) + try: + with self.open(filepath.with_suffix(self.extension), mode="rb") as fh: + value = self.read(fh) + except FileNotFoundError: + raise KeyError("File not on disk, cache miss") + return value + + def __setitem__(self, key, value): + """Store a new value in the disk cache. + + :arg key: The cache key, a 2-tuple of strings. + :arg value: The new item to store in the cache. + """ + k1, k2 = key[0][:2], key[0][2:] + key[1] + basedir = Path(self.cachedir, k1) + basedir.mkdir(parents=True, exist_ok=True) + + # Care must be taken here to ensure that the file is created safely as + # the filesystem may be network based. `mkstemp` does so securely without + # race conditions: + # https://docs.python.org/3/library/tempfile.html#tempfile.mkstemp + # The file descriptor must also be closed after use with `os.close()`. + fd, tempfile = mkstemp(suffix=".tmp", prefix=k2, dir=basedir, text=False) + tempfile = Path(tempfile) + # Open using `tempfile` (the filename) rather than the file descriptor + # to allow redefining `self.open` + with self.open(tempfile, mode="wb") as fh: + self.write(fh, value) + os.close(fd) + + # Renaming (moving) the file is guaranteed by any POSIX compliant + # filesystem to be atomic. This may fail if somehow the destination is + # on another filesystem, but that shouldn't happen here. + filepath = basedir.joinpath(k2) + tempfile.rename(filepath.with_suffix(self.extension)) + + def __delitem__(self, key): + raise NotImplementedError(f"Cannot remove items from {self.__class__.__name__}") + + def __iter__(self): + raise NotImplementedError(f"Cannot iterate over keys in {self.__class__.__name__}") + + def __len__(self): + raise NotImplementedError(f"Cannot query length of {self.__class__.__name__}") + + def __repr__(self): + return f"{self.__class__.__name__}(cachedir={self.cachedir}, extension={self.extension})" + + def __eq__(self, other): + # Instances are the same if they have the same cachedir + return (self.cachedir == other.cachedir and self.extension == other.extension) + + def open(self, *args, **kwargs): + return open(*args, **kwargs) + + def read(self, filehandle): + return pickle.load(filehandle) + + def write(self, filehandle, value): + pickle.dump(value, filehandle) + + +def default_comm_fetcher(*args, **kwargs): + """ A sensible default comm fetcher for use with `parallel_cache`. + """ + comms = filter( + lambda arg: isinstance(arg, MPI.Comm), + args + tuple(kwargs.values()) + ) + try: + comm = next(comms) + except StopIteration: + raise TypeError("No comms found in args or kwargs") + return comm + + +def default_parallel_hashkey(*args, **kwargs): + """ A sensible default hash key for use with `parallel_cache`. + """ + # We now want to actively remove any comms from args and kwargs to get + # the same disk cache key. + hash_args = tuple(filter( + lambda arg: not isinstance(arg, MPI.Comm), + args + )) + hash_kwargs = dict(filter( + lambda arg: not isinstance(arg[1], MPI.Comm), + kwargs.items() + )) + return cachetools.keys.hashkey(*hash_args, **hash_kwargs) + + +def instrument(cls): + """ Class decorator for dict-like objects for counting cache hits/misses. + """ + @wraps(cls, updated=()) + class _wrapper(cls): + instrument__ = True + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.hit = 0 + self.miss = 0 + + def get(self, key, default=None): + value = super().get(key, default) + if value is default: + self.miss += 1 + else: + self.hit += 1 + return value + + def __getitem__(self, key): + try: + value = super().__getitem__(key) + self.hit += 1 + except KeyError as e: + self.miss += 1 + raise e + return value + return _wrapper + + +class DEFAULT_CACHE(dict): + pass + + +# Example of how to instrument and use different default caches: +# from functools import partial +# EXOTIC_CACHE = partial(instrument(cachetools.LRUCache), maxsize=100) + +# Turn on cache measurements if printing cache info is enabled +if configuration["print_cache_info"] or _running_on_ci: + DEFAULT_CACHE = instrument(DEFAULT_CACHE) + DictLikeDiskAccess = instrument(DictLikeDiskAccess) + + +if configuration["spmd_strict"]: + def parallel_cache( + hashkey=default_parallel_hashkey, + comm_fetcher=default_comm_fetcher, + cache_factory=lambda: DEFAULT_CACHE(), + ): + """Parallel cache decorator (SPMD strict-enabled). + """ + def decorator(func): + @PETSc.Log.EventDecorator("PyOP2 Cache Wrapper") + @wraps(func) + def wrapper(*args, **kwargs): + """ Extract the key and then try the memory cache before falling back + on calling the function and populating the cache. SPMD strict ensures + that all ranks cache hit or miss to ensure that the function evaluation + always occurs in parallel. + """ + k = hashkey(*args, **kwargs) + key = _as_hexdigest(*k), func.__qualname__ + # Create a PyOP2 comm associated with the key, so it is decrefed when the wrapper exits + with temp_internal_comm(comm_fetcher(*args, **kwargs)) as comm: + # Fetch the per-comm cache_collection or set it up if not present + # A collection is required since different types of cache can be set up on the same comm + cache_collection = comm.Get_attr(comm_cache_keyval) + if cache_collection is None: + cache_collection = {} + comm.Set_attr(comm_cache_keyval, cache_collection) + # If this kind of cache is already present on the + # cache_collection, get it, otherwise create it + local_cache = cache_collection.setdefault( + (cf := cache_factory()).__class__.__name__, + cf + ) + local_cache = cache_collection[cf.__class__.__name__] + + # If this is a new cache or function add it to the list of known caches + if (comm, comm.name, func, local_cache) not in [(c.comm, c.comm_name, c.func, c.cache()) for c in _KNOWN_CACHES]: + # When a comm is freed we do not hold a reference to the cache. + # We attach a finalizer that extracts the stats before the cache + # is deleted. + _KNOWN_CACHES.append(_CacheRecord(next(_CACHE_CIDX), comm, func, local_cache)) + + # Grab value from all ranks cache and broadcast cache hit/miss + value = local_cache.get(key, CACHE_MISS) + debug_string = f"{COMM_WORLD.name} R{COMM_WORLD.rank}, {comm.name} R{comm.rank}: " + debug_string += f"key={k} in cache: {local_cache.__class__.__name__} cache " + if value is CACHE_MISS: + debug(debug_string + "miss") + cache_hit = False + else: + debug(debug_string + "hit") + cache_hit = True + all_present = comm.allgather(cache_hit) + + # If not present in the cache of all ranks we force re-evaluation on all ranks + if not min(all_present): + value = CACHE_MISS + + if value is CACHE_MISS: + value = func(*args, **kwargs) + return local_cache.setdefault(key, value) + + return wrapper + return decorator +else: + def parallel_cache( + hashkey=default_parallel_hashkey, + comm_fetcher=default_comm_fetcher, + cache_factory=lambda: DEFAULT_CACHE(), + ): + """Parallel cache decorator. + """ + def decorator(func): + @PETSc.Log.EventDecorator("PyOP2 Cache Wrapper") + @wraps(func) + def wrapper(*args, **kwargs): + """ Extract the key and then try the memory cache before falling back + on calling the function and populating the cache. + """ + k = hashkey(*args, **kwargs) + key = _as_hexdigest(*k), func.__qualname__ + # Create a PyOP2 comm associated with the key, so it is decrefed when the wrapper exits + with temp_internal_comm(comm_fetcher(*args, **kwargs)) as comm: + # Fetch the per-comm cache_collection or set it up if not present + # A collection is required since different types of cache can be set up on the same comm + cache_collection = comm.Get_attr(comm_cache_keyval) + if cache_collection is None: + cache_collection = {} + comm.Set_attr(comm_cache_keyval, cache_collection) + # If this kind of cache is already present on the + # cache_collection, get it, otherwise create it + local_cache = cache_collection.setdefault( + (cf := cache_factory()).__class__.__name__, + cf + ) + local_cache = cache_collection[cf.__class__.__name__] + + # If this is a new cache or function add it to the list of known caches + if (comm, comm.name, func, local_cache) not in [(c.comm, c.comm_name, c.func, c.cache()) for c in _KNOWN_CACHES]: + # When a comm is freed we do not hold a reference to the cache. + # We attach a finalizer that extracts the stats before the cache + # is deleted. + _KNOWN_CACHES.append(_CacheRecord(next(_CACHE_CIDX), comm, func, local_cache)) + + value = local_cache.get(key, CACHE_MISS) + + if value is CACHE_MISS: + value = func(*args, **kwargs) + return local_cache.setdefault(key, value) + + return wrapper + return decorator + + +def clear_memory_cache(comm): + """ Completely remove all PyOP2 caches on a given communicator. + """ + with temp_internal_comm(comm) as icomm: + if icomm.Get_attr(comm_cache_keyval) is not None: + icomm.Set_attr(comm_cache_keyval, {}) + + +# A small collection of default simple caches +memory_cache = parallel_cache + + +def serial_cache(hashkey, cache_factory=lambda: DEFAULT_CACHE()): + return cachetools.cached(key=hashkey, cache=cache_factory()) + + +def disk_only_cache(*args, cachedir=configuration["cache_dir"], **kwargs): + return parallel_cache(*args, **kwargs, cache_factory=lambda: DictLikeDiskAccess(cachedir)) + + +def memory_and_disk_cache(*args, cachedir=configuration["cache_dir"], **kwargs): + def decorator(func): + return memory_cache(*args, **kwargs)(disk_only_cache(*args, cachedir=cachedir, **kwargs)(func)) + return decorator diff --git a/pyop2/codegen/__init__.py b/pyop2/codegen/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pyop2/codegen/builder.py b/pyop2/codegen/builder.py new file mode 100644 index 0000000000..505dc5d2bf --- /dev/null +++ b/pyop2/codegen/builder.py @@ -0,0 +1,989 @@ +import itertools +from abc import ABCMeta, abstractmethod +from collections import OrderedDict +from functools import reduce + +import numpy +from pyop2.global_kernel import (GlobalKernelArg, DatKernelArg, MixedDatKernelArg, + MatKernelArg, MixedMatKernelArg, PermutedMapKernelArg, ComposedMapKernelArg, PassthroughKernelArg) +from pyop2.codegen.representation import (Accumulate, Argument, Comparison, Conditional, + DummyInstruction, Extent, FixedIndex, + FunctionCall, Index, Indexed, + KernelInst, Literal, LogicalAnd, + Materialise, Max, Min, MultiIndex, + NamedLiteral, PackInst, + PreUnpackInst, Product, RuntimeIndex, + Sum, Symbol, UnpackInst, Variable, + When, Zero) +from pyop2.datatypes import IntType, OpaqueType +from pyop2.op2 import (ALL, INC, MAX, MIN, ON_BOTTOM, ON_INTERIOR_FACETS, + ON_TOP, READ, RW, WRITE) +from pyop2.utils import cached_property + + +MatType = OpaqueType("Mat") + + +def _Remainder(a, b): + # ad hoc replacement of Remainder() + # Replace this with Remainder(a, b) once it gets fixed. + return Conditional(Comparison("<", a, b), a, Sum(a, Product(Literal(numpy.int32(-1)), b))) + + +class Map(object): + + __slots__ = ("values", "extruded_periodic", "offset", "offset_quotient", "interior_horizontal", + "variable", "unroll", "layer_bounds", "num_layers", + "prefetch", "_pmap_count") + + def __init__(self, interior_horizontal, layer_bounds, num_layers, + arity, dtype, + offset=None, offset_quotient=None, unroll=False, + extruded=False, extruded_periodic=False, constant_layers=False): + self.variable = extruded and not constant_layers + self.extruded_periodic = extruded_periodic + self.unroll = unroll + self.layer_bounds = layer_bounds + self.num_layers = num_layers + self.interior_horizontal = interior_horizontal + self.prefetch = {} + + shape = (None, arity) + values = Argument(shape, dtype=dtype, pfx="map") + if offset is not None: + assert type(offset) == tuple + offset = numpy.array(offset, dtype=numpy.int32) + if len(set(offset)) == 1: + offset = Literal(offset[0], casting=True) + else: + offset = NamedLiteral(offset, parent=values, suffix="offset") + if offset_quotient is not None: + assert type(offset_quotient) == tuple + offset_quotient = numpy.array(offset_quotient, dtype=numpy.int32) + offset_quotient = NamedLiteral(offset_quotient, parent=values, suffix="offset_quotient") + + self.values = values + self.offset = offset + self.offset_quotient = offset_quotient + self._pmap_count = itertools.count() + + @property + def shape(self): + return self.values.shape + + @property + def dtype(self): + return self.values.dtype + + def _permute(self, x): + return x + + def indexed(self, multiindex, layer=None): + n, i, f = multiindex + if layer is not None and self.offset is not None: + # For extruded mesh, prefetch the indirections for each map, so that they don't + # need to be recomputed. + # First prefetch the base map (not dependent on layers) + base_key = None + if base_key not in self.prefetch: + j = Index() + base = Indexed(self.values, (n, self._permute(j))) + self.prefetch[base_key] = Materialise(PackInst(), base, MultiIndex(j)) + + base = self.prefetch[base_key] + + # Now prefetch the extruded part of the map (inside the layer loop). + # This is necessary so loopy DTRT for MatSetValues + # Different f values need to be treated separately. + key = f.extent + if key is None: + key = 1 + if key not in self.prefetch: + # See comments in "sparsity.pyx". + bottom_layer, _ = self.layer_bounds + k = Index(f.extent if f.extent is not None else 1) + offset = Sum(Sum(layer, Product(Literal(numpy.int32(-1)), bottom_layer)), k) + j = Index() + base = Indexed(base, (j, )) + unit_offset = self.offset if self.offset.shape == () else Indexed(self.offset, (j,)) + if self.extruded_periodic: + if self.offset_quotient is None: + # Equivalent to offset_quotient[:] == 0. + # Avoid unnecessary logic below. + offset = _Remainder(offset, self.num_layers) + else: + effective_offset = Sum(offset, Indexed(self.offset_quotient, (j,))) + # The following code currently does not work: "undefined symbol: loopy_mod_int32" + # offset = Remainder(effective_offset, self.num_layers) + # Use less elegant and less robust way for now. + offset = Sum(_Remainder(effective_offset, self.num_layers), + Product(Literal(numpy.int32(-1)), + _Remainder(Indexed(self.offset_quotient, (j,)), self.num_layers))) + # Inline map offsets where all entries are identical. + offset = Product(unit_offset, offset) + self.prefetch[key] = Materialise(PackInst(), Sum(base, offset), MultiIndex(k, j)) + return Indexed(self.prefetch[key], (f, i)), (f, i) + else: + assert f.extent == 1 or f.extent is None + base = Indexed(self.values, (n, self._permute(i))) + return base, (f, i) + + def indexed_vector(self, n, shape, layer=None): + shape = self.shape[1:] + shape + if self.interior_horizontal: + shape = (2, ) + shape + else: + shape = (1, ) + shape + f, i, j = (Index(e) for e in shape) + base, (f, i) = self.indexed((n, i, f), layer=layer) + init = Sum(Product(base, Literal(numpy.int32(j.extent))), j) + pack = Materialise(PackInst(), init, MultiIndex(f, i, j)) + multiindex = tuple(Index(e) for e in pack.shape) + return Indexed(pack, multiindex), multiindex + + +class PMap(Map): + __slots__ = ("permutation",) + + def __init__(self, map_, permutation): + # Copy over properties + self.variable = map_.variable + self.extruded_periodic = map_.extruded_periodic + self.unroll = map_.unroll + self.layer_bounds = map_.layer_bounds + self.num_layers = map_.num_layers + self.interior_horizontal = map_.interior_horizontal + self.prefetch = {} + self.values = map_.values + self.offset = map_.offset + offset = map_.offset + # TODO: this is a hack, rep2loopy should be in charge of + # generating all names! + count = next(map_._pmap_count) + if offset is not None: + if offset.shape: + # Have a named literal + offset = offset.value[permutation] + offset = NamedLiteral(offset, parent=self.values, suffix=f"permutation{count}_offset") + else: + offset = map_.offset + self.offset = offset + self.offset_quotient = map_.offset_quotient + self.permutation = NamedLiteral(permutation, parent=self.values, suffix=f"permutation{count}") + + def _permute(self, x): + return Indexed(self.permutation, (x,)) + + +class CMap(Map): + + def __init__(self, *maps_): + # Copy over properties + self.variable = maps_[0].variable + self.unroll = maps_[0].unroll + self.layer_bounds = maps_[0].layer_bounds + self.interior_horizontal = maps_[0].interior_horizontal + self.prefetch = {} + self.values = maps_[0].values + self.offset = maps_[0].offset + self.maps_ = maps_ + + def indexed(self, multiindex, layer=None): + n, i, f = multiindex + n_ = n + for map_ in reversed(self.maps_): + if map_ is not self.maps_[0]: + n_, (_, _) = map_.indexed(MultiIndex(n_, FixedIndex(0), Index()), layer=None) + return self.maps_[0].indexed(MultiIndex(n_, i, f), layer=layer) + + +class Pack(metaclass=ABCMeta): + + def pick_loop_indices(self, loop_index, layer_index=None, entity_index=None): + """Override this to select the loop indices used by a pack for indexing.""" + return (loop_index, layer_index) + + @abstractmethod + def kernel_arg(self, loop_indices=None): + pass + + @abstractmethod + def emit_pack_instruction(self, *, loop_indices=None): + """Either yield an instruction, or else return an empty tuple (to indicate no instruction)""" + + @abstractmethod + def pack(self, loop_indices=None): + pass + + @abstractmethod + def emit_unpack_instruction(self, *, loop_indices=None): + """Either yield an instruction, or else return an empty tuple (to indicate no instruction)""" + + +class PassthroughPack(Pack): + def __init__(self, outer): + self.outer = outer + + def kernel_arg(self, loop_indices=None): + return self.outer + + def pack(self, loop_indices=None): + pass + + def emit_pack_instruction(self, **kwargs): + return () + + def emit_unpack_instruction(self, **kwargs): + return () + + +class GlobalPack(Pack): + + def __init__(self, outer, access, init_with_zero=False): + self.outer = outer + self.access = access + self.init_with_zero = init_with_zero + + def kernel_arg(self, loop_indices=None): + pack = self.pack(loop_indices) + return Indexed(pack, (Index(e) for e in pack.shape)) + + def emit_pack_instruction(self, *, loop_indices=None): + return () + + def pack(self, loop_indices=None): + if hasattr(self, "_pack"): + return self._pack + + shape = self.outer.shape + if self.access is READ: + # No packing required + return self.outer + # We don't need to pack for memory layout, however packing + # globals that are written is required such that subsequent + # vectorisation loop transformations privatise these reduction + # variables. The extra memory movement cost is minimal. + loop_indices = self.pick_loop_indices(*loop_indices) + if self.init_with_zero: + also_zero = {MIN, MAX} + else: + also_zero = set() + if self.access in {INC, WRITE} | also_zero: + val = Zero((), self.outer.dtype) + multiindex = MultiIndex(*(Index(e) for e in shape)) + self._pack = Materialise(PackInst(loop_indices), val, multiindex) + elif self.access in {READ, RW, MIN, MAX} - also_zero: + multiindex = MultiIndex(*(Index(e) for e in shape)) + expr = Indexed(self.outer, multiindex) + self._pack = Materialise(PackInst(loop_indices), expr, multiindex) + else: + raise ValueError("Don't know how to initialise pack for '%s' access" % self.access) + return self._pack + + def emit_unpack_instruction(self, *, loop_indices=None): + pack = self.pack(loop_indices) + loop_indices = self.pick_loop_indices(*loop_indices) + if pack is None: + return () + elif self.access is READ: + return () + elif self.access in {INC, MIN, MAX}: + op = {INC: Sum, + MIN: Min, + MAX: Max}[self.access] + multiindex = tuple(Index(e) for e in pack.shape) + rvalue = Indexed(self.outer, multiindex) + yield Accumulate(UnpackInst(loop_indices), rvalue, op(rvalue, Indexed(pack, multiindex))) + else: + multiindex = tuple(Index(e) for e in pack.shape) + rvalue = Indexed(self.outer, multiindex) + yield Accumulate(UnpackInst(loop_indices), rvalue, Indexed(pack, multiindex)) + + +class DatPack(Pack): + def __init__(self, outer, access, map_=None, interior_horizontal=False, + view_index=None, layer_bounds=None, + init_with_zero=False): + self.outer = outer + self.map_ = map_ + self.access = access + self.interior_horizontal = interior_horizontal + self.view_index = view_index + self.layer_bounds = layer_bounds + self.init_with_zero = init_with_zero + + def _mask(self, map_): + """Override this if the map_ needs a masking condition.""" + return None + + def _rvalue(self, multiindex, loop_indices=None): + """Returns indexed Dat and masking condition to apply to reads/writes. + + If the masking condition is None, no mask is applied, + otherwise the pack/unpack will be wrapped in When(mask, expr). + This is used for the case where maps might have negative entries. + """ + f, i, *j = multiindex + n, layer = self.pick_loop_indices(*loop_indices) + if self.view_index is not None: + j = tuple(j) + tuple(FixedIndex(i) for i in self.view_index) + map_, (f, i) = self.map_.indexed((n, i, f), layer=layer) + return Indexed(self.outer, MultiIndex(map_, *j)), self._mask(map_) + + def pack(self, loop_indices=None): + if self.map_ is None: + return None + + if hasattr(self, "_pack"): + return self._pack + + if self.interior_horizontal: + shape = (2, ) + else: + shape = (1, ) + + shape = shape + self.map_.shape[1:] + if self.view_index is None: + shape = shape + self.outer.shape[1:] + + if self.init_with_zero: + also_zero = {MIN, MAX} + else: + also_zero = set() + if self.access in {INC, WRITE} | also_zero: + val = Zero((), self.outer.dtype) + multiindex = MultiIndex(*(Index(e) for e in shape)) + self._pack = Materialise(PackInst(), val, multiindex) + elif self.access in {READ, RW, MIN, MAX} - also_zero: + multiindex = MultiIndex(*(Index(e) for e in shape)) + expr, mask = self._rvalue(multiindex, loop_indices=loop_indices) + if mask is not None: + expr = When(mask, expr) + self._pack = Materialise(PackInst(), expr, multiindex) + else: + raise ValueError("Don't know how to initialise pack for '%s' access" % self.access) + return self._pack + + def kernel_arg(self, loop_indices=None): + if self.map_ is None: + if loop_indices is None: + raise ValueError("Need iteration index") + n, layer = self.pick_loop_indices(*loop_indices) + shape = self.outer.shape + if self.view_index is None: + multiindex = (n, ) + tuple(Index(e) for e in shape[1:]) + else: + multiindex = (n, ) + tuple(FixedIndex(i) for i in self.view_index) + return Indexed(self.outer, multiindex) + else: + pack = self.pack(loop_indices) + shape = pack.shape + return Indexed(pack, (Index(e) for e in shape)) + + def emit_pack_instruction(self, *, loop_indices=None): + return () + + def emit_unpack_instruction(self, *, loop_indices=None): + pack = self.pack(loop_indices) + if pack is None: + return () + elif self.access is READ: + return () + elif self.access in {INC, MIN, MAX}: + op = {INC: Sum, + MIN: Min, + MAX: Max}[self.access] + multiindex = tuple(Index(e) for e in pack.shape) + rvalue, mask = self._rvalue(multiindex, loop_indices=loop_indices) + acc = Accumulate(UnpackInst(), rvalue, op(rvalue, Indexed(pack, multiindex))) + if mask is None: + yield acc + else: + yield When(mask, acc) + else: + multiindex = tuple(Index(e) for e in pack.shape) + rvalue, mask = self._rvalue(multiindex, loop_indices=loop_indices) + acc = Accumulate(UnpackInst(), rvalue, Indexed(pack, multiindex)) + if mask is None: + yield acc + else: + yield When(mask, acc) + + +class MixedDatPack(Pack): + def __init__(self, packs, access, dtype, interior_horizontal): + self.packs = packs + self.access = access + self.dtype = dtype + self.interior_horizontal = interior_horizontal + + def pack(self, loop_indices=None): + if hasattr(self, "_pack"): + return self._pack + + flat_shape = numpy.sum(tuple(numpy.prod(p.map_.shape[1:] + p.outer.shape[1:]) for p in self.packs)) + + if self.interior_horizontal: + _shape = (2,) + flat_shape *= 2 + else: + _shape = (1,) + + if self.access in {INC, WRITE}: + val = Zero((), self.dtype) + multiindex = MultiIndex(Index(flat_shape)) + self._pack = Materialise(PackInst(), val, multiindex) + elif self.access in {READ, RW, MIN, MAX}: + multiindex = MultiIndex(Index(flat_shape)) + val = Zero((), self.dtype) + expressions = [] + offset = 0 + for p in self.packs: + shape = _shape + p.map_.shape[1:] + p.outer.shape[1:] + mi = MultiIndex(*(Index(e) for e in shape)) + expr, mask = p._rvalue(mi, loop_indices) + extents = [numpy.prod(shape[i+1:], dtype=numpy.int32) for i in range(len(shape))] + index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(mi, extents)], Literal(IntType.type(0), casting=False)) + indices = MultiIndex(Sum(index, Literal(IntType.type(offset), casting=False)),) + offset += numpy.prod(shape, dtype=numpy.int32) + if mask is not None: + expr = When(mask, expr) + expressions.append(expr) + expressions.append(indices) + + self._pack = Materialise(PackInst(), val, multiindex, *expressions) + else: + raise ValueError("Don't know how to initialise pack for '%s' access" % self.access) + + return self._pack + + def kernel_arg(self, loop_indices=None): + pack = self.pack(loop_indices) + shape = pack.shape + return Indexed(pack, (Index(e) for e in shape)) + + def emit_pack_instruction(self, *, loop_indices=None): + return () + + def emit_unpack_instruction(self, *, loop_indices=None): + pack = self.pack(loop_indices) + if self.access is READ: + return () + else: + if self.interior_horizontal: + _shape = (2,) + else: + _shape = (1,) + offset = 0 + for p in self.packs: + shape = _shape + p.map_.shape[1:] + p.outer.shape[1:] + mi = MultiIndex(*(Index(e) for e in shape)) + rvalue, mask = p._rvalue(mi, loop_indices) + extents = [numpy.prod(shape[i+1:], dtype=numpy.int32) for i in range(len(shape))] + index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) for i, e in zip(mi, extents)], Literal(IntType.type(0), casting=False)) + indices = MultiIndex(Sum(index, Literal(IntType.type(offset), casting=False)),) + rhs = Indexed(pack, indices) + offset += numpy.prod(shape, dtype=numpy.int32) + + if self.access in {INC, MIN, MAX}: + op = {INC: Sum, + MIN: Min, + MAX: Max}[self.access] + rhs = op(rvalue, rhs) + + acc = Accumulate(UnpackInst(), rvalue, rhs) + if mask is None: + yield acc + else: + yield When(mask, acc) + + +class MatPack(Pack): + + count = itertools.count() + + insertion_names = {False: "MatSetValuesBlockedLocal", + True: "MatSetValuesLocal"} + """Function call name for inserting into the PETSc Mat. The keys + are whether or not maps are "unrolled" (addressing dofs) or + blocked (addressing nodes).""" + + def __init__(self, outer, access, maps, dims, dtype, interior_horizontal=False): + self.outer = outer + self.access = access + self.maps = maps + self.dims = dims + self.dtype = dtype + self.interior_horizontal = interior_horizontal + + @cached_property + def shapes(self): + ((rdim, cdim), ), = self.dims + rmap, cmap = self.maps + if self.interior_horizontal: + shape = (2, ) + else: + shape = (1, ) + rshape = shape + rmap.shape[1:] + (rdim, ) + cshape = shape + cmap.shape[1:] + (cdim, ) + return (rshape, cshape) + + def pack(self, loop_indices=None, only_declare=False): + if hasattr(self, "_pack"): + return self._pack + shape = tuple(itertools.chain(*self.shapes)) + if only_declare: + pack = Variable(f"matpack{next(self.count)}", shape, self.dtype) + self._pack = pack + if self.access in {WRITE, INC}: + val = Zero((), self.dtype) + multiindex = MultiIndex(*(Index(e) for e in shape)) + pack = Materialise(PackInst(), val, multiindex) + self._pack = pack + else: + raise ValueError("Unexpected access type") + return self._pack + + def kernel_arg(self, loop_indices=None): + pack = self.pack(loop_indices=loop_indices) + return Indexed(pack, tuple(Index(e) for e in pack.shape)) + + def emit_pack_instruction(self, *, loop_indices=None): + return () + + def emit_unpack_instruction(self, *, loop_indices=None): + from pyop2.codegen.rep2loopy import register_petsc_function + ((rdim, cdim), ), = self.dims + rmap, cmap = self.maps + n, layer = self.pick_loop_indices(*loop_indices) + unroll = any(m.unroll for m in self.maps) + if unroll: + maps = [map_.indexed_vector(n, (dim, ), layer=layer) + for map_, dim in zip(self.maps, (rdim, cdim))] + else: + maps = [] + for map_ in self.maps: + i = Index() + if self.interior_horizontal: + f = Index(2) + else: + f = Index(1) + maps.append(map_.indexed((n, i, f), layer=layer)) + (rmap, cmap), (rindices, cindices) = zip(*maps) + + pack = self.pack(loop_indices=loop_indices) + name = self.insertion_names[unroll] + if unroll: + # The shape of MatPack is + # (row, cols) if it has vector BC + # (block_rows, row_cmpt, block_cols, col_cmpt) otherwise + free_indices = rindices + cindices + pack = Indexed(pack, free_indices) + else: + free_indices = rindices + (Index(), ) + cindices + (Index(), ) + pack = Indexed(pack, free_indices) + + access = Symbol({WRITE: "INSERT_VALUES", + INC: "ADD_VALUES"}[self.access]) + + rextent = Extent(MultiIndex(*rindices)) + cextent = Extent(MultiIndex(*cindices)) + + register_petsc_function(name) + + call = FunctionCall(name, + UnpackInst(), + (self.access, READ, READ, READ, READ, READ, READ), + free_indices, + self.outer, + rextent, + rmap, + cextent, + cmap, + pack, + access) + + yield call + + +class MixedMatPack(Pack): + + def __init__(self, packs, access, dtype, block_shape): + self.access = access + assert len(block_shape) == 2 + self.packs = numpy.asarray(packs).reshape(block_shape) + self.dtype = dtype + + def pack(self, loop_indices=None): + if hasattr(self, "_pack"): + return self._pack + rshape = 0 + cshape = 0 + # Need to compute row and col shape based on individual pack shapes + for p in self.packs[:, 0]: + shape, _ = p.shapes + rshape += numpy.prod(shape, dtype=int) + for p in self.packs[0, :]: + _, shape = p.shapes + cshape += numpy.prod(shape, dtype=int) + shape = (rshape, cshape) + if self.access in {WRITE, INC}: + val = Zero((), self.dtype) + multiindex = MultiIndex(*(Index(e) for e in shape)) + pack = Materialise(PackInst(), val, multiindex) + self._pack = pack + return pack + else: + raise ValueError("Unexpected access type") + + def kernel_arg(self, loop_indices=None): + pack = self.pack(loop_indices=loop_indices) + return Indexed(pack, tuple(Index(e) for e in pack.shape)) + + def emit_pack_instruction(self, *, loop_indices=None): + return () + + def emit_unpack_instruction(self, *, + loop_indices=None): + pack = self.pack(loop_indices=loop_indices) + mixed_to_local = [] + local_to_global = [] + roffset = 0 + for row in self.packs: + coffset = 0 + for p in row: + rshape, cshape = p.shapes + pack_ = p.pack(loop_indices=loop_indices, only_declare=True) + rindices = tuple(Index(e) for e in rshape) + cindices = tuple(Index(e) for e in cshape) + indices = MultiIndex(*rindices, *cindices) + lvalue = Indexed(pack_, indices) + rextents = [numpy.prod(rshape[i+1:], dtype=numpy.int32) for i in range(len(rshape))] + cextents = [numpy.prod(cshape[i+1:], dtype=numpy.int32) for i in range(len(cshape))] + flat_row_index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) + for i, e in zip(rindices, rextents)], + Literal(IntType.type(0), casting=False)) + flat_col_index = reduce(Sum, [Product(i, Literal(IntType.type(e), casting=False)) + for i, e in zip(cindices, cextents)], + Literal(IntType.type(0), casting=False)) + + flat_index = MultiIndex(Sum(flat_row_index, Literal(IntType.type(roffset), casting=False)), + Sum(flat_col_index, Literal(IntType.type(coffset), casting=False))) + rvalue = Indexed(pack, flat_index) + # Copy from local mixed element tensor into non-mixed + mixed_to_local.append(Accumulate(PreUnpackInst(), lvalue, rvalue)) + # And into global matrix. + local_to_global.extend(p.emit_unpack_instruction(loop_indices=loop_indices)) + coffset += numpy.prod(cshape, dtype=numpy.int32) + roffset += numpy.prod(rshape, dtype=numpy.int32) + yield from iter(mixed_to_local) + yield from iter(local_to_global) + + +class WrapperBuilder(object): + + def __init__(self, *, kernel, subset, extruded, extruded_periodic, constant_layers, iteration_region=None, single_cell=False, + pass_layer_to_kernel=False, forward_arg_types=()): + self.kernel = kernel + self.local_knl_args = iter(kernel.arguments) + self.arguments = [] + self.argument_accesses = [] + self.packed_args = [] + self.indices = [] + self.maps = OrderedDict() + self.subset = subset + self.extruded = extruded + self.extruded_periodic = extruded_periodic + self.constant_layers = constant_layers + if iteration_region is None: + self.iteration_region = ALL + else: + self.iteration_region = iteration_region + self.pass_layer_to_kernel = pass_layer_to_kernel + self.single_cell = single_cell + self.forward_arguments = tuple(Argument((), fa, pfx="farg") for fa in forward_arg_types) + + @property + def requires_zeroed_output_arguments(self): + return self.kernel.requires_zeroed_output_arguments + + @cached_property + def loop_extents(self): + return (Argument((), IntType, name="start"), + Argument((), IntType, name="end")) + + @cached_property + def _loop_index(self): + start, end = self.loop_extents + return RuntimeIndex(start, end, + LogicalAnd( + Comparison("<=", Zero((), numpy.int32), start), + Comparison("<=", start, end)), + name="n") + + @cached_property + def _subset_indices(self): + return Argument(("end", ), IntType, name="subset_indices") + + @cached_property + def loop_index(self): + n = self._loop_index + if self.subset: + n = Materialise(PackInst(), Indexed(self._subset_indices, MultiIndex(n)), MultiIndex()) + return n + + @cached_property + def _layers_array(self): + if self.constant_layers: + return Argument((1, 2), IntType, name="layers") + else: + return Argument((None, 2), IntType, name="layers") + + @cached_property + def num_layers(self): + cellStart = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) + cellEnd = Sum(Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))) + n = Sum(cellEnd, + Product(Literal(numpy.int32(-1)), cellStart)) + return Materialise(PackInst(), n, MultiIndex()) + + @cached_property + def bottom_layer(self): + if self.iteration_region == ON_TOP: + return Materialise(PackInst(), + Indexed(self._layers_array, (self._layer_index, FixedIndex(0))), + MultiIndex()) + else: + start, _ = self.layer_extents + return start + + @cached_property + def top_layer(self): + if self.iteration_region == ON_BOTTOM: + return Materialise(PackInst(), + Sum(Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), + Literal(IntType.type(-1))), + MultiIndex()) + else: + _, end = self.layer_extents + return end + + @cached_property + def layer_extents(self): + cellStart = Indexed(self._layers_array, (self._layer_index, FixedIndex(0))) + cellEnd = Sum(Indexed(self._layers_array, (self._layer_index, FixedIndex(1))), Literal(IntType.type(-1))) + if self.iteration_region == ON_BOTTOM: + start = cellStart + end = Sum(cellStart, Literal(IntType.type(1))) + elif self.iteration_region == ON_TOP: + start = Sum(cellEnd, Literal(IntType.type(-1))) + end = cellEnd + elif self.iteration_region == ON_INTERIOR_FACETS: + start = cellStart + if self.extruded_periodic: + end = cellEnd + else: + end = Sum(cellEnd, Literal(IntType.type(-1))) + elif self.iteration_region == ALL: + start = cellStart + end = cellEnd + else: + raise ValueError("Unknown iteration region") + return (Materialise(PackInst(), start, MultiIndex()), + Materialise(PackInst(), end, MultiIndex())) + + @cached_property + def _layer_index(self): + if self.constant_layers: + return FixedIndex(0) + else: + return self.loop_index + + @cached_property + def layer_index(self): + if self.extruded: + start, end = self.layer_extents + return RuntimeIndex(start, end, + LogicalAnd( + Comparison("<=", Zero((), numpy.int32), start), + Comparison("<=", start, end)), + name="layer") + else: + return None + + @property + def loop_indices(self): + if self.extruded: + return (self.loop_index, self.layer_index, self._loop_index) + else: + return (self.loop_index, None, self._loop_index) + + def add_argument(self, arg): + local_arg = next(self.local_knl_args) + access = local_arg.access + dtype = local_arg.dtype + interior_horizontal = self.iteration_region == ON_INTERIOR_FACETS + + if isinstance(arg, PassthroughKernelArg): + argument = Argument((), dtype, pfx="arg") + pack = PassthroughPack(argument) + self.arguments.append(argument) + + elif isinstance(arg, GlobalKernelArg): + argument = Argument(arg.dim, dtype, pfx="glob") + + pack = GlobalPack(argument, access, + init_with_zero=self.requires_zeroed_output_arguments) + self.arguments.append(argument) + elif isinstance(arg, DatKernelArg): + if arg.dim == (): + shape = (None, 1) + else: + shape = (None, *arg.dim) + argument = Argument(shape, dtype, pfx="dat") + + if arg.is_indirect: + map_ = self._add_map(arg.map_) + else: + map_ = None + pack = arg.pack(argument, access, map_=map_, + interior_horizontal=interior_horizontal, + view_index=arg.index, + init_with_zero=self.requires_zeroed_output_arguments) + self.arguments.append(argument) + elif isinstance(arg, MixedDatKernelArg): + packs = [] + for a in arg: + if a.dim == (): + shape = (None, 1) + else: + shape = (None, *a.dim) + argument = Argument(shape, dtype, pfx="mdat") + + if a.is_indirect: + map_ = self._add_map(a.map_) + else: + map_ = None + + packs.append(arg.pack(argument, access, map_, + interior_horizontal=interior_horizontal, + init_with_zero=self.requires_zeroed_output_arguments)) + self.arguments.append(argument) + pack = MixedDatPack(packs, access, dtype, + interior_horizontal=interior_horizontal) + elif isinstance(arg, MatKernelArg): + argument = Argument((), MatType, pfx="mat") + maps = tuple(self._add_map(m, arg.unroll) + for m in arg.maps) + pack = arg.pack(argument, access, maps, + arg.dims, dtype, + interior_horizontal=interior_horizontal) + self.arguments.append(argument) + elif isinstance(arg, MixedMatKernelArg): + packs = [] + for a in arg: + argument = Argument((), MatType, pfx="mat") + maps = tuple(self._add_map(m, a.unroll) + for m in a.maps) + + packs.append(arg.pack(argument, access, maps, + a.dims, dtype, + interior_horizontal=interior_horizontal)) + self.arguments.append(argument) + pack = MixedMatPack(packs, access, dtype, + arg.shape) + else: + raise ValueError("Unhandled argument type") + + self.packed_args.append(pack) + self.argument_accesses.append(access) + + def _add_map(self, map_, unroll=False): + if map_ is None: + return None + interior_horizontal = self.iteration_region == ON_INTERIOR_FACETS + key = map_ + try: + return self.maps[key] + except KeyError: + if isinstance(map_, PermutedMapKernelArg): + imap = self._add_map(map_.base_map, unroll) + map_ = PMap(imap, numpy.asarray(map_.permutation, dtype=IntType)) + elif isinstance(map_, ComposedMapKernelArg): + map_ = CMap(*(self._add_map(m, unroll) for m in map_.base_maps)) + else: + map_ = Map(interior_horizontal, + (self.bottom_layer, self.top_layer), + self.num_layers, + arity=map_.arity, offset=map_.offset, offset_quotient=map_.offset_quotient, dtype=IntType, + unroll=unroll, + extruded=self.extruded, + extruded_periodic=self.extruded_periodic, + constant_layers=self.constant_layers) + self.maps[key] = map_ + return map_ + + @cached_property + def loopy_argument_accesses(self): + """Loopy wants the CallInstruction to have argument access + descriptors aligned with how the callee treats the function. + In the cases of TSFC kernels with WRITE access, this is not + how we treats the function, so we have to keep track of the + difference here.""" + if self.requires_zeroed_output_arguments: + mapping = {WRITE: INC} + else: + mapping = {} + return list(mapping.get(a, a) for a in self.argument_accesses) + + @property + def kernel_args(self): + return tuple(p.kernel_arg(self.loop_indices) for p in self.packed_args) + + @property + def wrapper_args(self): + # Loop extents come from here. + args = list(self.forward_arguments) + args.extend(self._loop_index.extents) + if self.extruded: + args.append(self._layers_array) + if self.subset: + args.append(self._subset_indices) + # parloop args passed "as is" + args.extend(self.arguments) + # maps are refcounted + for map_ in self.maps.values(): + # But we don't need to emit stuff for PMaps because they + # are a Map (already seen + a permutation [encoded in the + # indexing]). + # CMaps do not have their own arguments, either. + if not isinstance(map_, (PMap, CMap)): + args.append(map_.values) + return tuple(args) + + def kernel_call(self): + args = self.kernel_args + access = tuple(self.loopy_argument_accesses) + # assuming every index is free index + free_indices = set(itertools.chain.from_iterable(arg.multiindex for arg in args if isinstance(arg, Indexed))) + # remove runtime index + free_indices = tuple(i for i in free_indices if isinstance(i, Index)) + if self.pass_layer_to_kernel: + args = args + (self.layer_index, ) + access = access + (READ,) + if self.forward_arguments: + args = self.forward_arguments + args + access = tuple([WRITE] * len(self.forward_arguments)) + access + return FunctionCall(self.kernel.name, KernelInst(), access, free_indices, *args) + + def emit_instructions(self): + yield from itertools.chain(*(pack.emit_pack_instruction(loop_indices=self.loop_indices) + for pack in self.packed_args)) + # Sometimes, actual instructions do not refer to all the loop + # indices (e.g. all of them are globals). To ensure that loopy + # knows about these indices, we emit a dummy instruction (that + # doesn't generate any code) that does depend on them. + yield DummyInstruction(PackInst(), *(x for x in self.loop_indices if x is not None)) + yield self.kernel_call() + yield from itertools.chain(*(pack.emit_unpack_instruction(loop_indices=self.loop_indices) + for pack in self.packed_args)) diff --git a/pyop2/codegen/c/inverse.c b/pyop2/codegen/c/inverse.c new file mode 100644 index 0000000000..7f445d385a --- /dev/null +++ b/pyop2/codegen/c/inverse.c @@ -0,0 +1,47 @@ +#include +#include + +#ifndef PYOP2_WORK_ARRAYS +#define PYOP2_WORK_ARRAYS +#define BUF_SIZE 30 +static PetscBLASInt ipiv_buffer[BUF_SIZE]; +static PetscScalar work_buffer[BUF_SIZE*BUF_SIZE]; +#endif + +#ifndef PYOP2_INV_LOG_EVENTS +#define PYOP2_INV_LOG_EVENTS +PetscLogEvent ID_inv_memcpy = -1; +PetscLogEvent ID_inv_getrf = -1; +PetscLogEvent ID_inv_getri = -1; +static PetscBool log_active_inv = 0; +#endif + +void inverse(PetscScalar* __restrict__ Aout, const PetscScalar* __restrict__ A, PetscBLASInt N) +{ + PetscLogIsActive(&log_active_inv); + if (log_active_inv){PetscLogEventBegin(ID_inv_memcpy,0,0,0,0);} + PetscBLASInt info; + PetscBLASInt *ipiv = N <= BUF_SIZE ? ipiv_buffer : malloc(N*sizeof(*ipiv)); + PetscScalar *Awork = N <= BUF_SIZE ? work_buffer : malloc(N*N*sizeof(*Awork)); + memcpy(Aout, A, N*N*sizeof(PetscScalar)); + if (log_active_inv){PetscLogEventEnd(ID_inv_memcpy,0,0,0,0);} + + if (log_active_inv){PetscLogEventBegin(ID_inv_getrf,0,0,0,0);} + LAPACKgetrf_(&N, &N, Aout, &N, ipiv, &info); + if (log_active_inv){PetscLogEventEnd(ID_inv_getrf,0,0,0,0);} + + if(info == 0){ + if (log_active_inv){PetscLogEventBegin(ID_inv_getri,0,0,0,0);} + LAPACKgetri_(&N, Aout, &N, ipiv, Awork, &N, &info); + if (log_active_inv){PetscLogEventEnd(ID_inv_getri,0,0,0,0);} + } + + if(info != 0){ + fprintf(stderr, "Getri throws nonzero info."); + abort(); + } + if ( N > BUF_SIZE ) { + free(Awork); + free(ipiv); + } +} diff --git a/pyop2/codegen/c/solve.c b/pyop2/codegen/c/solve.c new file mode 100644 index 0000000000..fbabc95885 --- /dev/null +++ b/pyop2/codegen/c/solve.c @@ -0,0 +1,51 @@ +#include +#include + +#ifndef PYOP2_WORK_ARRAYS +#define PYOP2_WORK_ARRAYS +#define BUF_SIZE 30 +static PetscBLASInt ipiv_buffer[BUF_SIZE]; +static PetscScalar work_buffer[BUF_SIZE*BUF_SIZE]; +#endif + +#ifndef PYOP2_SOLVE_LOG_EVENTS +#define PYOP2_SOLVE_LOG_EVENTS +PetscLogEvent ID_solve_memcpy = -1; +PetscLogEvent ID_solve_getrf = -1; +PetscLogEvent ID_solve_getrs = -1; +static PetscBool log_active_solve = 0; +#endif + +void solve(PetscScalar* __restrict__ out, const PetscScalar* __restrict__ A, const PetscScalar* __restrict__ B, PetscBLASInt N) +{ + PetscLogIsActive(&log_active_solve); + if (log_active_solve){PetscLogEventBegin(ID_solve_memcpy,0,0,0,0);} + PetscBLASInt info; + PetscBLASInt *ipiv = N <= BUF_SIZE ? ipiv_buffer : malloc(N*sizeof(*ipiv)); + memcpy(out,B,N*sizeof(PetscScalar)); + PetscScalar *Awork = N <= BUF_SIZE ? work_buffer : malloc(N*N*sizeof(*Awork)); + memcpy(Awork,A,N*N*sizeof(PetscScalar)); + if (log_active_solve){PetscLogEventEnd(ID_solve_memcpy,0,0,0,0);} + + PetscBLASInt NRHS = 1; + const char T = 'T'; + if (log_active_solve){PetscLogEventBegin(ID_solve_getrf,0,0,0,0);} + LAPACKgetrf_(&N, &N, Awork, &N, ipiv, &info); + if (log_active_solve){PetscLogEventEnd(ID_solve_getrf,0,0,0,0);} + + if(info == 0){ + if (log_active_solve){PetscLogEventBegin(ID_solve_getrs,0,0,0,0);} + LAPACKgetrs_(&T, &N, &NRHS, Awork, &N, ipiv, out, &N, &info); + if (log_active_solve){PetscLogEventEnd(ID_solve_getrs,0,0,0,0);} + } + + if(info != 0){ + fprintf(stderr, "Gesv throws nonzero info."); + abort(); + } + + if ( N > BUF_SIZE ) { + free(ipiv); + free(Awork); + } +} diff --git a/pyop2/codegen/loopycompat.py b/pyop2/codegen/loopycompat.py new file mode 100644 index 0000000000..ae3d5feffa --- /dev/null +++ b/pyop2/codegen/loopycompat.py @@ -0,0 +1,194 @@ +# Everything in this file was formerly in loopy/transform/callable.py +# but was removed in https://github.com/inducer/loopy/pull/327. It has +# been kept here for compatibility but should be phased out. + +# Note that since this code is copypasted, the linter has been turned off. + +# flake8: noqa + +from loopy.kernel.instruction import CallInstruction, MultiAssignmentBase, \ + CInstruction, _DataObliviousInstruction +from loopy.symbolic import CombineMapper, IdentityMapper +from loopy.symbolic import simplify_via_aff +from loopy.kernel.function_interface import CallableKernel +from loopy.translation_unit import TranslationUnit + + +# Tools to match caller to callee args by (guessed) automatic reshaping +# +# (This is undocumented and not recommended, but it is currently needed +# to support Firedrake.) + +class DimChanger(IdentityMapper): + """ + Mapper to change the dimensions of an argument. + .. attribute:: callee_arg_dict + A mapping from the argument name (:class:`str`) to instances of + :class:`loopy.kernel.array.ArrayBase`. + .. attribute:: desried_shape + A mapping from argument name (:class:`str`) to an instance of + :class:`tuple`. + """ + def __init__(self, callee_arg_dict, desired_shape): + self.callee_arg_dict = callee_arg_dict + self.desired_shape = desired_shape + super().__init__() + + def map_subscript(self, expr): + if expr.aggregate.name not in self.callee_arg_dict: + return super().map_subscript(expr) + callee_arg_dim_tags = self.callee_arg_dict[expr.aggregate.name].dim_tags + flattened_index = sum(dim_tag.stride*idx for dim_tag, idx in + zip(callee_arg_dim_tags, expr.index_tuple)) + new_indices = [] + + from operator import mul + from functools import reduce + stride = reduce(mul, self.desired_shape[expr.aggregate.name], 1) + + for length in self.desired_shape[expr.aggregate.name]: + stride /= length + ind = flattened_index // int(stride) + flattened_index -= (int(stride) * ind) + new_indices.append(simplify_via_aff(ind)) + + return expr.aggregate.index(tuple(new_indices)) + + +def _match_caller_callee_argument_dimension_for_single_kernel( + caller_knl, callee_knl): + """ + :returns: a copy of *caller_knl* with the instance of + :class:`loopy.kernel.function_interface.CallableKernel` addressed by + *callee_function_name* in the *caller_knl* aligned with the argument + dimensions required by *caller_knl*. + """ + from loopy.kernel.array import ArrayBase + from loopy.kernel.data import auto + + for insn in caller_knl.instructions: + if not isinstance(insn, CallInstruction) or ( + insn.expression.function.name != + callee_knl.name): + # Call to a callable kernel can only occur through a + # CallInstruction. + continue + + def _shape_1_if_empty(shape_caller, shape_callee): + assert isinstance(shape_caller, tuple) + if shape_caller == () and shape_caller!=shape_callee: + return (1,) + else: + return shape_caller + + from loopy.kernel.function_interface import ( + ArrayArgDescriptor, get_arg_descriptor_for_expression, + get_kw_pos_association) + _, pos_to_kw = get_kw_pos_association(callee_knl) + arg_id_to_shape = {} + for arg_id, arg in insn.arg_id_to_arg().items(): + arg_id = pos_to_kw[arg_id] + + arg_descr = get_arg_descriptor_for_expression(caller_knl, arg) + if isinstance(arg_descr, ArrayArgDescriptor): + arg_id_to_shape[arg_id] = arg_descr.shape + else: + arg_id_to_shape[arg_id] = (1, ) + + dim_changer = DimChanger( + callee_knl.arg_dict, + arg_id_to_shape) + + new_callee_insns = [] + for callee_insn in callee_knl.instructions: + if isinstance(callee_insn, MultiAssignmentBase): + new_callee_insns.append(callee_insn + .with_transformed_expressions(dim_changer)) + + elif isinstance(callee_insn, (CInstruction, + _DataObliviousInstruction)): + # The layout of the args to a CInstructions is not going to be matched to the caller_kernel, + # they are appended with unmatched args. + # We only use Cinstructions exceptionally, e.g. for adding profile instructions, + # without arguments that required to be matched, so this is ok. + new_callee_insns.append(callee_insn) + else: + raise NotImplementedError("Unknown instruction %s." % + type(insn)) + + new_args = [arg if not isinstance(arg, ArrayBase) + else arg.copy(shape=arg_id_to_shape[arg.name], + dim_tags=None, strides=auto, order="C") + for arg in callee_knl.args] + + # subkernel with instructions adjusted according to the new dimensions + new_callee_knl = callee_knl.copy(instructions=new_callee_insns, + args=new_args) + + return new_callee_knl + + +class _FunctionCalledChecker(CombineMapper): + def __init__(self, func_name): + self.func_name = func_name + super().__init__() + + def combine(self, values): + return any(values) + + def map_call(self, expr): + if expr.function.name == self.func_name: + return True + return self.combine( + tuple( + self.rec(child) for child in expr.parameters) + ) + + map_call_with_kwargs = map_call + + def map_constant(self, expr): + return False + + def map_type_cast(self, expr): + return self.rec(expr.child) + + def map_algebraic_leaf(self, expr): + return False + + def map_kernel(self, kernel): + return any(self.rec(insn.expression) for insn in kernel.instructions if + isinstance(insn, MultiAssignmentBase)) + + +def _match_caller_callee_argument_dimension_(program, callee_function_name): + """ + Returns a copy of *program* with the instance of + :class:`loopy.kernel.function_interface.CallableKernel` addressed by + *callee_function_name* in the *program* aligned with the argument + dimensions required by *caller_knl*. + .. note:: + The callee kernel addressed by *callee_function_name*, should be + called at only one location throughout the program, as multiple + invocations would demand complex renaming logic which is not + implemented yet. + """ + assert isinstance(program, TranslationUnit) + assert isinstance(callee_function_name, str) + assert callee_function_name not in program.entrypoints + assert callee_function_name in program.callables_table + + is_invoking_callee = _FunctionCalledChecker( + callee_function_name).map_kernel + + caller_knl, = [in_knl_callable.subkernel for in_knl_callable in + program.callables_table.values() if isinstance(in_knl_callable, + CallableKernel) and + is_invoking_callee(in_knl_callable.subkernel)] + + from pymbolic.primitives import Call + assert len([insn for insn in caller_knl.instructions if (isinstance(insn, + CallInstruction) and isinstance(insn.expression, Call) and + insn.expression.function.name == callee_function_name)]) == 1 + new_callee_kernel = _match_caller_callee_argument_dimension_for_single_kernel( + caller_knl, program[callee_function_name]) + return program.with_kernel(new_callee_kernel) diff --git a/pyop2/codegen/node.py b/pyop2/codegen/node.py new file mode 100644 index 0000000000..1af62a635f --- /dev/null +++ b/pyop2/codegen/node.py @@ -0,0 +1,248 @@ +"""Generic abstract node class and utility functions for creating +expression DAG languages.""" + +import collections + + +class Node(object): + """Abstract node class. + + Nodes are not meant to be modified. + + A node can reference other nodes; they are called children. A node + might contain data, or reference other objects which are not + themselves nodes; they are not called children. + + Both the children (if any) and non-child data (if any) are + required to create a node, or determine the equality of two + nodes. For reconstruction, however, only the new children are + necessary. + """ + + __slots__ = ('hash_value',) + + # Non-child data as the first arguments of the constructor. + # To be (potentially) overridden by derived node classes. + __front__ = () + + # Non-child data as the last arguments of the constructor. + # To be (potentially) overridden by derived node classes. + __back__ = () + + def _cons_args(self, children): + """Constructs an argument list for the constructor with + non-child data from 'self' and children from 'children'. + + Internally used utility function. + """ + front_args = [getattr(self, name) for name in self.__front__] + back_args = [getattr(self, name) for name in self.__back__] + + return tuple(front_args) + tuple(children) + tuple(back_args) + + def __reduce__(self): + # Gold version: + return type(self), self._cons_args(self.children) + + def reconstruct(self, *args): + """Reconstructs the node with new children from + 'args'. Non-child data are copied from 'self'. + + Returns a new object. + """ + return type(self)(*self._cons_args(args)) + + def __repr__(self): + cons_args = self._cons_args(self.children) + return "%s(%s)" % (type(self).__name__, ", ".join(map(repr, cons_args))) + + def __eq__(self, other): + """Provides equality testing with quick positive and negative + paths based on :func:`id` and :meth:`__hash__`. + """ + if self is other: + return True + elif hash(self) != hash(other): + return False + else: + return self.is_equal(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + """Provides caching for hash values.""" + try: + return self.hash_value + except AttributeError: + self.hash_value = self.get_hash() + return self.hash_value + + def is_equal(self, other): + """Equality predicate. + + This is the method to potentially override in derived classes, + not :meth:`__eq__` or :meth:`__ne__`. + """ + if type(self) != type(other): + return False + self_consargs = self._cons_args(self.children) + other_consargs = other._cons_args(other.children) + return self_consargs == other_consargs + + def get_hash(self): + """Hash function. + + This is the method to potentially override in derived classes, + not :meth:`__hash__`. + """ + return hash((type(self),) + self._cons_args(self.children)) + + +def pre_traversal(expression_dags): + """Pre-order traversal of the nodes of expression DAGs.""" + seen = set() + lifo = [] + # Some roots might be same, but they must be visited only once. + # Keep the original ordering of roots, for deterministic code + # generation. + for root in expression_dags: + if root not in seen: + seen.add(root) + lifo.append(root) + + while lifo: + node = lifo.pop() + yield node + for child in reversed(node.children): + if child not in seen: + seen.add(child) + lifo.append(child) + + +def post_traversal(expression_dags): + """Post-order traversal of the nodes of expression DAGs.""" + seen = set() + lifo = [] + # Some roots might be same, but they must be visited only once. + # Keep the original ordering of roots, for deterministic code + # generation. + for root in expression_dags: + if root not in seen: + seen.add(root) + lifo.append((root, list(root.children))) + + while lifo: + node, deps = lifo[-1] + for i, dep in enumerate(deps): + if dep is not None and dep not in seen: + lifo.append((dep, list(dep.children))) + deps[i] = None + break + else: + yield node + seen.add(node) + lifo.pop() + + +# Default to the more efficient pre-order traversal +traversal = pre_traversal + + +def collect_refcount(expression_dags): + """Collects reference counts for a multi-root expression DAG.""" + result = collections.Counter(expression_dags) + for node in traversal(expression_dags): + result.update(node.children) + return result + + +def noop_recursive(function): + """No-op wrapper for functions with overridable recursive calls. + + :arg function: a function with parameters (value, rec), where + ``rec`` is expected to be a function used for + recursive calls. + :returns: a function with working recursion and nothing fancy + """ + def recursive(node): + return function(node, recursive) + return recursive + + +def noop_recursive_arg(function): + """No-op wrapper for functions with overridable recursive calls + and an argument. + + :arg function: a function with parameters (value, rec, arg), where + ``rec`` is expected to be a function used for + recursive calls. + :returns: a function with working recursion and nothing fancy + """ + def recursive(node, arg): + return function(node, recursive, arg) + return recursive + + +class Memoizer(object): + """Caching wrapper for functions with overridable recursive calls. + The lifetime of the cache is the lifetime of the object instance. + + :arg function: a function with parameters (value, rec), where + ``rec`` is expected to be a function used for + recursive calls. + :returns: a function with working recursion and caching + """ + def __init__(self, function): + self.cache = {} + self.function = function + + def __call__(self, node): + try: + return self.cache[node] + except KeyError: + result = self.function(node, self) + self.cache[node] = result + return result + + +class MemoizerArg(object): + """Caching wrapper for functions with overridable recursive calls + and an argument. The lifetime of the cache is the lifetime of the + object instance. + + :arg function: a function with parameters (value, rec, arg), where + ``rec`` is expected to be a function used for + recursive calls. + :returns: a function with working recursion and caching + """ + def __init__(self, function): + self.cache = {} + self.function = function + + def __call__(self, node, arg): + cache_key = (node, arg) + try: + return self.cache[cache_key] + except KeyError: + result = self.function(node, self, arg) + self.cache[cache_key] = result + return result + + +def reuse_if_untouched(node, self): + """Reuse if untouched recipe""" + new_children = list(map(self, node.children)) + if all(nc == c for nc, c in zip(new_children, node.children)): + return node + else: + return node.reconstruct(*new_children) + + +def reuse_if_untouched_arg(node, self, arg): + """Reuse if touched recipe propagating an extra argument""" + new_children = [self(child, arg) for child in node.children] + if all(nc == c for nc, c in zip(new_children, node.children)): + return node + else: + return node.reconstruct(*new_children) diff --git a/pyop2/codegen/optimise.py b/pyop2/codegen/optimise.py new file mode 100644 index 0000000000..f0a7b58b94 --- /dev/null +++ b/pyop2/codegen/optimise.py @@ -0,0 +1,137 @@ +from pyop2.codegen.node import traversal, reuse_if_untouched, Memoizer +from functools import singledispatch +from pyop2.codegen.representation import (Index, RuntimeIndex, Node, + FunctionCall, Variable, Argument) + + +def collect_indices(expressions): + """Collect indices in expressions. + + :arg expressions: an iterable of expressions to collect indices + from. + :returns: iterable of nodes of type :class:`Index` or + :class:`RuntimeIndex`. + """ + for node in traversal(expressions): + if isinstance(node, (Index, RuntimeIndex)): + yield node + + +@singledispatch +def replace_indices(node, self): + raise AssertionError("Unhandled node type %r" % type(node)) + + +replace_indices.register(Node)(reuse_if_untouched) + + +@replace_indices.register(Index) +def replace_indices_index(node, self): + return self.subst.get(node, node) + + +def index_merger(instructions, cache=None): + """Merge indices across an instruction stream. + + Indices are candidates for merging if they have the same extent as + an already seen index in the instruction stream, and appear at the + same level of the loop nest. + + :arg instructions: Iterable of nodes to merge indices across. + :returns: a memoized callable suitable for index merging. + """ + if cache is None: + cache = {} + + appeared = {} + subst = [] + + index_replacer = Memoizer(replace_indices) + + for insn in instructions: + if isinstance(insn, FunctionCall): + continue + + indices = tuple(i for i in collect_indices([insn])) + runtime = tuple(i for i in indices if not isinstance(i, Index)) + free = tuple(i for i in indices if isinstance(i, Index)) + + indices = runtime + free + + key = runtime + tuple(i.extent for i in free) + full_key = key + # Look for matching key prefix + while key not in cache and len(key): + key = key[:-1] + + if key in cache: + new_indices = cache[key] + indices[len(key):] + else: + new_indices = indices + + for i in range(len(key), len(full_key) + 1): + cache[full_key[:i]] = new_indices[:i] + + for i, ni in zip(indices, new_indices): + if i in appeared: + if isinstance(i, (Index)) and i.extent != 1 or isinstance(i, (RuntimeIndex)): + subst.append((i, appeared[i])) + if i != ni: + if i in appeared: + assert appeared[i] == ni + appeared[i] = ni + if isinstance(i, (Index)) and i.extent != 1 or isinstance(i, (RuntimeIndex)): + subst.append((i, ni)) + + index_replacer.subst = dict(subst) + return index_replacer + + +@singledispatch +def _rename_node(node, self): + """Rename nodes + + :param node: root of expression + :param self: function for recursive calls + """ + raise AssertionError("cannot handle type %s" % type(node)) + + +_rename_node.register(Node)(reuse_if_untouched) + + +@_rename_node.register(Index) +def _rename_node_index(node, self): + name = self.renamer(node) + return Index(extent=node.extent, name=name) + + +@_rename_node.register(FunctionCall) +def _rename_node_func(node, self): + free_indices = tuple(map(self, node.free_indices)) + children = tuple(map(self, node.children)) + return FunctionCall(node.name, node.label, node.access, free_indices, *children) + + +@_rename_node.register(Variable) +def _rename_node_variable(node, self): + name = self.renamer(node) + return Variable(name, node.shape, node.dtype) + + +@_rename_node.register(Argument) +def _rename_node_argument(node, self): + name = self.renamer(node) + return Argument(node.shape, node.dtype, name=name) + + +def rename_nodes(instructions, renamer): + """Rename the nodes in the instructions. + + :param instructions: Iterable of nodes. + :param renamer: Function that maps nodes to new names + :return: List of instructions with nodes renamed. + """ + mapper = Memoizer(_rename_node) + mapper.renamer = renamer + return list(map(mapper, instructions)) diff --git a/pyop2/codegen/rep2loopy.py b/pyop2/codegen/rep2loopy.py new file mode 100644 index 0000000000..f850411073 --- /dev/null +++ b/pyop2/codegen/rep2loopy.py @@ -0,0 +1,897 @@ +import ctypes +import numpy + +import loopy +from loopy.symbolic import SubArrayRef +from loopy.expression import dtype_to_type_context +from pymbolic.mapper.stringifier import PREC_NONE +from pymbolic import var +from loopy.types import NumpyType, OpaqueType +import abc + +import islpy as isl +import pymbolic.primitives as pym + +from collections import OrderedDict, defaultdict +from functools import singledispatch, reduce, partial +import itertools +import operator + +from pyop2.codegen.node import traversal, Node, Memoizer, reuse_if_untouched + +from pyop2.types.access import READ, WRITE +from pyop2.datatypes import as_ctypes + +from pyop2.codegen.optimise import index_merger, rename_nodes + +from pyop2.codegen.representation import (Index, FixedIndex, RuntimeIndex, + MultiIndex, Extent, Indexed, + BitShift, BitwiseNot, BitwiseAnd, BitwiseOr, + Conditional, Comparison, DummyInstruction, + LogicalNot, LogicalAnd, LogicalOr, + Materialise, Accumulate, FunctionCall, When, + Argument, Variable, Literal, NamedLiteral, + Symbol, Zero, Sum, Min, Max, Product, + Quotient, FloorDiv, Remainder) +from pyop2.codegen.representation import (PackInst, UnpackInst, KernelInst, PreUnpackInst) +from pytools import ImmutableRecord +from pyop2.codegen.loopycompat import _match_caller_callee_argument_dimension_ +from pyop2.configuration import target + +from petsc4py import PETSc + + +# Read c files for linear algebra callables in on import +import os +from pyop2.mpi import COMM_WORLD +if COMM_WORLD.rank == 0: + with open(os.path.dirname(__file__)+"/c/inverse.c", "r") as myfile: + inverse_preamble = myfile.read() + with open(os.path.dirname(__file__)+"/c/solve.c", "r") as myfile: + solve_preamble = myfile.read() +else: + solve_preamble = None + inverse_preamble = None + +inverse_preamble = COMM_WORLD.bcast(inverse_preamble, root=0) +solve_preamble = COMM_WORLD.bcast(solve_preamble, root=0) + + +class Bag(object): + pass + + +def symbol_mangler(kernel, name): + if name in {"ADD_VALUES", "INSERT_VALUES"}: + return loopy.types.to_loopy_type(numpy.int32), name + return None + + +class PetscCallable(loopy.ScalarCallable): + + def with_types(self, arg_id_to_dtype, callables_table): + new_arg_id_to_dtype = arg_id_to_dtype.copy() + return (self.copy( + name_in_target=self.name, + arg_id_to_dtype=new_arg_id_to_dtype), callables_table) + + def with_descrs(self, arg_id_to_descr, callables_table): + from loopy.kernel.function_interface import ArrayArgDescriptor + from loopy.kernel.array import FixedStrideArrayDimTag + new_arg_id_to_descr = arg_id_to_descr.copy() + for i, des in arg_id_to_descr.items(): + # petsc takes 1D arrays as arguments + if isinstance(des, ArrayArgDescriptor): + dim_tags = tuple(FixedStrideArrayDimTag(stride=int(numpy.prod(des.shape[i+1:])), + layout_nesting_level=len(des.shape)-i-1) + for i in range(len(des.shape))) + new_arg_id_to_descr[i] = des.copy(dim_tags=dim_tags) + + return (self.copy(arg_id_to_descr=new_arg_id_to_descr), + callables_table) + + def generate_preambles(self, target): + assert isinstance(target, type(target)) + yield ("00_petsc", "#include ") + return + + +petsc_functions = set() + + +def register_petsc_function(name): + petsc_functions.add(name) + + +class LACallable(loopy.ScalarCallable, metaclass=abc.ABCMeta): + """ + The LACallable (Linear algebra callable) + replaces loopy.CallInstructions to linear algebra functions + like solve or inverse by LAPACK calls. + """ + def __init__(self, name=None, arg_id_to_dtype=None, + arg_id_to_descr=None, name_in_target=None): + if name is not None: + assert name == self.name + super(LACallable, self).__init__(self.name, + arg_id_to_dtype=arg_id_to_dtype, + arg_id_to_descr=arg_id_to_descr) + self.name_in_target = name_in_target if name_in_target else self.name + + @abc.abstractproperty + def name(self): + pass + + @abc.abstractmethod + def generate_preambles(self, target): + pass + + def with_types(self, arg_id_to_dtype, callables_table): + dtypes = {} + for i in range(len(arg_id_to_dtype)): + if arg_id_to_dtype.get(i) is None: + # the types provided aren't mature enough to specialize the + # callable + return (self.copy(arg_id_to_dtype=arg_id_to_dtype), + callables_table) + else: + mat_dtype = arg_id_to_dtype[i].numpy_dtype + dtypes[i] = NumpyType(mat_dtype) + dtypes[-1] = NumpyType(dtypes[0].dtype) + + return (self.copy(name_in_target=self.name_in_target, + arg_id_to_dtype=dtypes), + callables_table) + + def emit_call_insn(self, insn, target, expression_to_code_mapper): + assert self.is_ready_for_codegen() + assert isinstance(insn, loopy.CallInstruction) + + parameters = insn.expression.parameters + + parameters = list(parameters) + par_dtypes = [self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)] + + parameters.append(insn.assignees[-1]) + par_dtypes.append(self.arg_id_to_dtype[0]) + + mat_descr = self.arg_id_to_descr[0] + arg_c_parameters = [ + expression_to_code_mapper( + par, + PREC_NONE, + dtype_to_type_context(target, par_dtype), + par_dtype + ).expr + for par, par_dtype in zip(parameters, par_dtypes) + ] + c_parameters = [arg_c_parameters[-1]] + c_parameters.extend([arg for arg in arg_c_parameters[:-1]]) + c_parameters.append(numpy.int32(mat_descr.shape[1])) # n + return var(self.name_in_target)(*c_parameters), False + + +class INVCallable(LACallable): + """ + The InverseCallable replaces loopy.CallInstructions to "inverse" + functions by LAPACK getri. + """ + name = "inverse" + + def generate_preambles(self, target): + assert isinstance(target, type(target)) + yield ("inverse", inverse_preamble) + + +class SolveCallable(LACallable): + """ + The SolveCallable replaces loopy.CallInstructions to "solve" + functions by LAPACK getrs. + """ + name = "solve" + + def generate_preambles(self, target): + assert isinstance(target, type(target)) + yield ("solve", solve_preamble) + + +class _PreambleGen(ImmutableRecord): + fields = set(("preamble", )) + + def __init__(self, preamble): + self.preamble = preamble + + def __call__(self, preamble_info): + yield ("0", self.preamble) + + +class PyOP2KernelCallable(loopy.ScalarCallable): + """Handles PyOP2 Kernel passed in as a string + """ + + fields = set(["name", "parameters", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target"]) + init_arg_names = ("name", "parameters", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target") + + def __init__(self, name, parameters, arg_id_to_dtype=None, arg_id_to_descr=None, name_in_target=None): + super(PyOP2KernelCallable, self).__init__(name, arg_id_to_dtype, arg_id_to_descr, name_in_target) + self.parameters = parameters + + def with_types(self, arg_id_to_dtype, callables_table): + new_arg_id_to_dtype = arg_id_to_dtype.copy() + return self.copy( + name_in_target=self.name, + arg_id_to_dtype=new_arg_id_to_dtype), callables_table + + def with_descrs(self, arg_id_to_descr, callables_table): + from loopy.kernel.function_interface import ArrayArgDescriptor + from loopy.kernel.array import FixedStrideArrayDimTag + new_arg_id_to_descr = arg_id_to_descr.copy() + for i, des in arg_id_to_descr.items(): + # 1D arrays + if isinstance(des, ArrayArgDescriptor): + dim_tags = tuple( + FixedStrideArrayDimTag( + stride=int(numpy.prod(des.shape[i+1:])), + layout_nesting_level=len(des.shape)-i-1 + ) + for i in range(len(des.shape)) + ) + new_arg_id_to_descr[i] = des.copy(dim_tags=dim_tags) + return (self.copy(arg_id_to_descr=new_arg_id_to_descr), callables_table) + + def emit_call_insn(self, insn, target, expression_to_code_mapper): + # reorder arguments, e.g. a,c = f(b,d) to f(a,b,c,d) + par_dtypes = tuple(expression_to_code_mapper.infer_type(p) for p in self.parameters) + + from loopy.expression import dtype_to_type_context + from pymbolic.mapper.stringifier import PREC_NONE + from pymbolic import var + + c_parameters = [ + expression_to_code_mapper( + par, PREC_NONE, dtype_to_type_context(target, par_dtype), + par_dtype).expr + for par, par_dtype in zip(self.parameters, par_dtypes)] + + assignee_is_returned = False + return var(self.name_in_target)(*c_parameters), assignee_is_returned + + +@singledispatch +def replace_materialise(node, self): + raise AssertionError("Unhandled node type %r" % type(node)) + + +replace_materialise.register(Node)(reuse_if_untouched) + + +@replace_materialise.register(Materialise) +def replace_materialise_materialise(node, self): + v = Variable(node.name, node.shape, node.dtype) + inits = list(map(self, node.children)) + label = node.label + accs = [] + for rvalue, indices in zip(*(inits[0::2], inits[1::2])): + lvalue = Indexed(v, indices) + if isinstance(rvalue, When): + when, rvalue = rvalue.children + acc = When(when, Accumulate(label, lvalue, rvalue)) + else: + acc = Accumulate(label, lvalue, rvalue) + accs.append(acc) + self.initialisers.append(tuple(accs)) + return v + + +def runtime_indices(expressions): + indices = [] + for node in traversal(expressions): + if isinstance(node, RuntimeIndex): + indices.append(node.name) + + return frozenset(indices) + + +def imperatives(exprs): + for op in traversal(exprs): + if isinstance(op, (Accumulate, FunctionCall)): + yield op + + +def loop_nesting(instructions, deps, outer_inames, kernel_name): + nesting = {} + + for insn in imperatives(instructions): + if isinstance(insn, Accumulate): + if isinstance(insn.children[1], (Zero, Literal)): + nesting[insn] = outer_inames + else: + nesting[insn] = runtime_indices([insn]) | runtime_indices(insn.label.within_inames) + else: + assert isinstance(insn, FunctionCall) + if insn.name in (petsc_functions | {kernel_name}): + nesting[insn] = outer_inames + else: + nesting[insn] = runtime_indices([insn]) + + # take care of dependencies. e.g. t1[i] = A[i], t2[j] = B[t1[j]], then t2 should depends on {i, j} + name_to_insn = dict((n, i) for i, (n, _) in deps.items()) + for insn, (name, _deps) in deps.items(): + s = set(_deps) + while s: + d = s.pop() + nesting[insn] = nesting[insn] | nesting[name_to_insn[d]] + s = s | set(deps[name_to_insn[d]][1]) - set([name]) + + # boost inames, if one instruction is inside inner inames (free indices), + # it should be inside the outer inames as dictated by other instructions. + index_nesting = defaultdict(frozenset) # free index -> {runtime indices} + for insn in instructions: + if isinstance(insn, When): + key = insn.children[1] + else: + key = insn + for fi in traversal([insn]): + if isinstance(fi, Index): + index_nesting[fi] |= nesting[key] + + for insn in imperatives(instructions): + outer = reduce(operator.or_, + iter(index_nesting[fi] for fi in traversal([insn]) if isinstance(fi, Index)), + frozenset()) + nesting[insn] = nesting[insn] | outer + + return nesting + + +def instruction_dependencies(instructions, initialisers): + deps = {} + names = {} + instructions_by_type = defaultdict(list) + c = itertools.count() + for op in imperatives(instructions): + name = "statement%d" % next(c) + names[op] = name + instructions_by_type[type(op.label)].append(op) + deps[op] = frozenset() + + # read-write dependencies in packing instructions + def variables(exprs): + for op in traversal(exprs): + if isinstance(op, (Argument, Variable)): + yield op + + def bounds(exprs): + for op in traversal(exprs): + if isinstance(op, RuntimeIndex): + for v in variables(op.extents): + yield v + + writers = defaultdict(list) + for op in instructions_by_type[PackInst]: + assert isinstance(op, Accumulate) + lvalue, _ = op.children + # Only writes to the outer-most variable + writes = next(variables([lvalue])) + if isinstance(writes, Variable): + writers[writes].append(names[op]) + + for op in instructions_by_type[PackInst]: + _, rvalue = op.children + deps[op] |= frozenset(x for x in itertools.chain(*( + writers[r] for r in itertools.chain(variables([rvalue]), bounds([op])) + ))) + deps[op] -= frozenset(names[op]) + + for typ, depends_on in [(KernelInst, [PackInst]), + (PreUnpackInst, [KernelInst]), + (UnpackInst, [KernelInst, PreUnpackInst])]: + for op in instructions_by_type[typ]: + ops = itertools.chain(*(instructions_by_type[t] for t in depends_on)) + deps[op] |= frozenset(names[o] for o in ops) + + # add sequential instructions in the initialisers + for inits in initialisers: + for i, parent in enumerate(inits[1:], 1): + for p in imperatives([parent]): + deps[p] |= frozenset(names[c] for c in imperatives(inits[:i])) - frozenset([name]) + + # add name to deps + return dict((op, (names[op], dep)) for op, dep in deps.items()) + + +def generate(builder, wrapper_name=None): + # Reset all terminal counters to avoid generated code becoming different across ranks + Argument._count = defaultdict(partial(itertools.count)) + Index._count = itertools.count() + Materialise._count = itertools.count() + RuntimeIndex._count = itertools.count() + + if builder.layer_index is not None: + outer_inames = frozenset([builder._loop_index.name, + builder.layer_index.name]) + else: + outer_inames = frozenset([builder._loop_index.name]) + + instructions = list(builder.emit_instructions()) + + parameters = Bag() + parameters.domains = OrderedDict() + parameters.assumptions = OrderedDict() + parameters.wrapper_arguments = builder.wrapper_args + parameters.layer_start = builder.layer_extents[0].name + parameters.layer_end = builder.layer_extents[1].name + parameters.conditions = [] + parameters.kernel_data = list(None for _ in parameters.wrapper_arguments) + parameters.temporaries = {} + parameters.kernel_name = builder.kernel.name + + # replace Materialise + mapper = Memoizer(replace_materialise) + mapper.initialisers = [] + instructions = list(mapper(i) for i in instructions) + + # merge indices + merger = index_merger(instructions) + instructions = list(merger(i) for i in instructions) + initialiser = list(itertools.chain(*mapper.initialisers)) + merger = index_merger(initialiser) + initialiser = list(merger(i) for i in initialiser) + instructions = instructions + initialiser + mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers] + + def name_generator(prefix): + yield from (f"{prefix}{i}" for i in itertools.count()) + + # rename indices and nodes (so that the counters start from zero) + node_names = {} + node_namers = dict((cls, name_generator(prefix)) + for cls, prefix in [(Index, "i"), (Variable, "t")]) + + def renamer(expr): + if isinstance(expr, Argument): + if expr._name is not None: + # Some arguments have given names + return expr._name + else: + # Otherwise generate one with their given prefix. + namer = node_namers.setdefault((type(expr), expr.prefix), + name_generator(expr.prefix)) + else: + namer = node_namers[type(expr)] + try: + return node_names[expr] + except KeyError: + return node_names.setdefault(expr, next(namer)) + + instructions = rename_nodes(instructions, renamer) + mapper.initialisers = [rename_nodes(inits, renamer) + for inits in mapper.initialisers] + parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, renamer) + s, e = rename_nodes([mapper(e) for e in builder.layer_extents], renamer) + parameters.layer_start = s.name + parameters.layer_end = e.name + + # scheduling and loop nesting + deps = instruction_dependencies(instructions, mapper.initialisers) + within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name) + + # generate loopy + context = Bag() + context.parameters = parameters + context.within_inames = within_inames + context.conditions = [] + context.index_ordering = [] + context.instruction_dependencies = deps + context.kernel_parameters = {} + + statements = list(statement(insn, context) for insn in instructions) + # remove the dummy instructions (they were only used to ensure + # that the kernel knows about the outer inames). + statements = list(s for s in statements if not isinstance(s, DummyInstruction)) + + domains = list(parameters.domains.values()) + if builder.single_cell: + new_domains = [] + for d in domains: + if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name: + # n = start + new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1}))) + else: + new_domains.append(d) + domains = new_domains + if builder.extruded: + new_domains = [] + for d in domains: + if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name: + # layer = t1 - 1 + t1 = parameters.layer_end + new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1}))) + else: + new_domains.append(d) + domains = new_domains + + assumptions, = reduce(operator.and_, + parameters.assumptions.values()).params().get_basic_sets() + options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True) + + # sometimes masks are not used, but we still need to create the function arguments + for i, arg in enumerate(parameters.wrapper_arguments): + if parameters.kernel_data[i] is None: + arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape, + strides=loopy.auto) + parameters.kernel_data[i] = arg + + if wrapper_name is None: + wrapper_name = "wrap_%s" % builder.kernel.name + + pwaffd = isl.affs_from_space(assumptions.get_space()) + assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0]) + if builder.single_cell: + assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"]) + else: + assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"]) + if builder.extruded: + assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end]) + assumptions = reduce(operator.and_, assumptions.get_basic_sets()) + + wrapper = loopy.make_kernel(domains, + statements, + kernel_data=parameters.kernel_data, + target=target, + temporary_variables=parameters.temporaries, + symbol_manglers=[symbol_mangler], + options=options, + assumptions=assumptions, + lang_version=(2018, 2), + name=wrapper_name) + + # prioritize loops + for indices in context.index_ordering: + wrapper = loopy.prioritize_loops(wrapper, indices) + + # register kernel + kernel = builder.kernel + headers = set(kernel.headers) + headers = headers | set(["#include ", "#include ", "#include "]) + if PETSc.Log.isActive(): + headers = headers | set(["#include "]) + preamble = "\n".join(sorted(headers)) + + if isinstance(kernel.code, loopy.TranslationUnit): + knl = kernel.code + wrapper = loopy.merge([wrapper, knl]) + # remove the local kernel from the available entrypoints + wrapper = wrapper.copy(entrypoints=wrapper.entrypoints-{kernel.name}) + wrapper = _match_caller_callee_argument_dimension_(wrapper, kernel.name) + else: + # kernel is a string, add it to preamble + assert isinstance(kernel.code, str) + code = kernel.code + wrapper = loopy.register_callable( + wrapper, + kernel.name, + PyOP2KernelCallable(name=kernel.name, + parameters=context.kernel_parameters[kernel.name])) + preamble = preamble + "\n" + code + + wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)]) + + # register petsc functions + for identifier in petsc_functions: + wrapper = loopy.register_callable(wrapper, identifier, PetscCallable(name=identifier)) + + return wrapper + + +def argtypes(kernel): + args = [] + for arg in kernel.args: + if isinstance(arg, loopy.ValueArg): + args.append(as_ctypes(arg.dtype)) + elif isinstance(arg, loopy.ArrayArg): + args.append(ctypes.c_voidp) + else: + raise ValueError("Unhandled arg type '%s'" % type(arg)) + return args + + +@singledispatch +def statement(expr, context): + raise AssertionError("Unhandled statement type '%s'" % type(expr)) + + +@statement.register(DummyInstruction) +def statement_dummy(expr, context): + new_children = tuple(expression(c, context.parameters) for c in expr.children) + return DummyInstruction(expr.label, new_children) + + +@statement.register(When) +def statement_when(expr, context): + condition, stmt = expr.children + context.conditions.append(expression(condition, context.parameters)) + stmt = statement(stmt, context) + context.conditions.pop() + return stmt + + +@statement.register(Accumulate) +def statement_assign(expr, context): + lvalue, _ = expr.children + if isinstance(lvalue, Indexed): + context.index_ordering.append(tuple(i.name for i in lvalue.index_ordering())) + lvalue, rvalue = tuple(expression(c, context.parameters) for c in expr.children) + within_inames = context.within_inames[expr] + + id, depends_on = context.instruction_dependencies[expr] + predicates = frozenset(context.conditions) + return loopy.Assignment(lvalue, rvalue, within_inames=within_inames, + within_inames_is_final=True, + predicates=predicates, + id=id, + depends_on=depends_on, depends_on_is_final=True) + + +@statement.register(FunctionCall) +def statement_functioncall(expr, context): + parameters = context.parameters + + # We cannot reconstruct the correct calling convention for C-string kernels + # without providing some additional context about the argument ordering. + # This is processed inside the ``emit_call_insn`` method of + # :class:`.PyOP2KernelCallable`. + context.kernel_parameters[expr.name] = [] + + free_indices = set(i.name for i in expr.free_indices) + writes = [] + reads = [] + for access, child in zip(expr.access, expr.children): + var = expression(child, parameters) + if isinstance(var, pym.Subscript): + # tensor argument + sweeping_indices = [] + for index in var.index_tuple: + if isinstance(index, pym.Variable) and index.name in free_indices: + sweeping_indices.append(index) + arg = SubArrayRef(tuple(sweeping_indices), var) + else: + # scalar argument or constant + arg = var + context.kernel_parameters[expr.name].append(arg) + + if access is READ or (isinstance(child, Argument) and isinstance(child.dtype, OpaqueType)): + reads.append(arg) + elif access is WRITE: + writes.append(arg) + else: + reads.append(arg) + writes.append(arg) + + within_inames = context.within_inames[expr] + predicates = frozenset(context.conditions) + id, depends_on = context.instruction_dependencies[expr] + + call = pym.Call(pym.Variable(expr.name), tuple(reads)) + + return loopy.CallInstruction(tuple(writes), call, + within_inames=within_inames, + within_inames_is_final=True, + predicates=predicates, + id=id, + depends_on=depends_on, depends_on_is_final=True) + + +@singledispatch +def expression(expr, parameters): + raise AssertionError("Unhandled expression type '%s'" % type(expr)) + + +@expression.register(Index) +def expression_index(expr, parameters): + name = expr.name + if name not in parameters.domains: + vars = isl.make_zero_and_vars([name]) + zero = vars[0] + domain = (vars[name].ge_set(zero) & vars[name].lt_set(zero + expr.extent)) + parameters.domains[name] = domain + return pym.Variable(name) + + +@expression.register(FixedIndex) +def expression_fixedindex(expr, parameters): + return expr.value + + +@expression.register(RuntimeIndex) +def expression_runtimeindex(expr, parameters): + @singledispatch + def translate(expr, vars): + raise AssertionError("Unhandled type '%s' in domain translation" % type(expr)) + + @translate.register(Sum) + def translate_sum(expr, vars): + return operator.add(*(translate(c, vars) for c in expr.children)) + + @translate.register(Argument) + def translate_argument(expr, vars): + expr = expression(expr, parameters) + return vars[expr.name] + + @translate.register(Variable) + def translate_variable(expr, vars): + return vars[expr.name] + + @translate.register(Zero) + def translate_zero(expr, vars): + assert expr.shape == () + return vars[0] + + @translate.register(LogicalAnd) + def translate_logicaland(expr, vars): + a, b = (translate(c, vars) for c in expr.children) + return a & b + + @translate.register(Comparison) + def translate_comparison(expr, vars): + a, b = (translate(c, vars) for c in expr.children) + fn = {">": "gt_set", + ">=": "ge_set", + "==": "eq_set", + "!=": "ne_set", + "<": "lt_set", + "<=": "le_set"}[expr.operator] + return getattr(a, fn)(b) + + name = expr.name + if name not in parameters.domains: + lo, hi, constraint = expr.children + params = list(v.name for v in traversal([lo, hi]) if isinstance(v, (Argument, Variable))) + vars = isl.make_zero_and_vars([name], params) + domain = (vars[name].ge_set(translate(lo, vars)) + & vars[name].lt_set(translate(hi, vars))) + parameters.domains[name] = domain + if constraint is not None: + parameters.assumptions[name] = translate(constraint, vars) + return pym.Variable(name) + + +@expression.register(MultiIndex) +def expression_multiindex(expr, parameters): + return tuple(expression(c, parameters) for c in expr.children) + + +@expression.register(Extent) +def expression_extent(expr, parameters): + multiindex, = expr.children + # TODO: If loopy eventually gains the ability to vectorise + # functions that use this, we will need a symbolic node for the + # index extent. + return int(numpy.prod(tuple(i.extent for i in multiindex))) + + +@expression.register(Symbol) +def expression_symbol(expr, parameters): + return pym.Variable(expr.name) + + +@expression.register(Argument) +def expression_argument(expr, parameters): + name = expr.name + shape = expr.shape + dtype = expr.dtype + if shape == (): + arg = loopy.ValueArg(name, dtype=dtype) + else: + arg = loopy.GlobalArg(name, + dtype=dtype, + shape=shape, + strides=loopy.auto) + idx = parameters.wrapper_arguments.index(expr) + parameters.kernel_data[idx] = arg + return pym.Variable(name) + + +@expression.register(Variable) +def expression_variable(expr, parameters): + name = expr.name + shape = expr.shape + dtype = expr.dtype + if name not in parameters.temporaries: + parameters.temporaries[name] = loopy.TemporaryVariable(name, + dtype=dtype, + shape=shape, + address_space=loopy.auto) + return pym.Variable(name) + + +@expression.register(Zero) +def expression_zero(expr, parameters): + assert expr.shape == () + return 0 + + +@expression.register(Literal) +def expression_literal(expr, parameters): + assert expr.shape == () + if expr.casting: + return loopy.symbolic.TypeCast(expr.dtype, expr.value) + return expr.value + + +@expression.register(NamedLiteral) +def expression_namedliteral(expr, parameters): + name = expr.name + val = loopy.TemporaryVariable(name, + dtype=expr.dtype, + shape=expr.shape, + address_space=loopy.AddressSpace.LOCAL, + read_only=True, + initializer=expr.value) + parameters.temporaries[name] = val + + return pym.Variable(name) + + +@expression.register(Conditional) +def expression_conditional(expr, parameters): + return pym.If(*(expression(c, parameters) for c in expr.children)) + + +@expression.register(Comparison) +def expression_comparison(expr, parameters): + l, r = (expression(c, parameters) for c in expr.children) + return pym.Comparison(l, expr.operator, r) + + +@expression.register(LogicalNot) +@expression.register(BitwiseNot) +def expression_uop(expr, parameters): + child, = (expression(c, parameters) for c in expr.children) + return {LogicalNot: pym.LogicalNot, + BitwiseNot: pym.BitwiseNot}[type(expr)](child) + + +@expression.register(Sum) +@expression.register(Product) +@expression.register(Quotient) +@expression.register(FloorDiv) +@expression.register(Remainder) +@expression.register(LogicalAnd) +@expression.register(LogicalOr) +@expression.register(BitwiseAnd) +@expression.register(BitwiseOr) +def expression_binop(expr, parameters): + children = tuple(expression(c, parameters) for c in expr.children) + if type(expr) in {Quotient, FloorDiv, Remainder}: + return {Quotient: pym.Quotient, + FloorDiv: pym.FloorDiv, + Remainder: pym.Remainder}[type(expr)](*children) + else: + return {Sum: pym.Sum, + Product: pym.Product, + LogicalOr: pym.LogicalOr, + LogicalAnd: pym.LogicalAnd, + BitwiseOr: pym.BitwiseOr, + BitwiseAnd: pym.BitwiseAnd}[type(expr)](children) + + +@expression.register(Min) +@expression.register(Max) +def expression_minmax(expr, parameters): + children = tuple(expression(c, parameters) for c in expr.children) + return {Min: pym.Variable("min"), + Max: pym.Variable("max")}[type(expr)](*children) + + +@expression.register(BitShift) +def expression_bitshift(expr, parameters): + children = (expression(c, parameters) for c in expr.children) + return {"<<": pym.LeftShift, + ">>": pym.RightShift}[expr.direction](*children) + + +@expression.register(Indexed) +def expression_indexed(expr, parameters): + aggregate, multiindex = (expression(c, parameters) for c in expr.children) + return pym.Subscript(aggregate, multiindex) diff --git a/pyop2/codegen/representation.py b/pyop2/codegen/representation.py new file mode 100644 index 0000000000..5277094d96 --- /dev/null +++ b/pyop2/codegen/representation.py @@ -0,0 +1,547 @@ +import numbers +import itertools +from functools import partial +from collections import defaultdict +from pyop2.utils import cached_property +import numpy +from abc import ABCMeta +from pyop2.codegen.node import Node as NodeBase + + +class InstructionLabel(object): + def __init__(self, within_inames=()): + self.within_inames = tuple(w for w in within_inames if isinstance(w, Node)) + + +class PackInst(InstructionLabel): + pass + + +class UnpackInst(InstructionLabel): + pass + + +class PreUnpackInst(InstructionLabel): + pass + + +class KernelInst(InstructionLabel): + pass + + +class Node(NodeBase): + + def is_equal(self, other): + """Common subexpression eliminating equality predicate. + + When two (sub)expressions are equal, the children of one + object are reassigned to the children of the other, so some + duplicated subexpressions are eliminated. + """ + result = NodeBase.is_equal(self, other) + if result: + self.children = other.children + return result + + +class Terminal(Node): + __slots__ = () + children = () + is_equal = NodeBase.is_equal + + +class Scalar(Node): + __slots__ = () + + shape = () + + +class Constant(Terminal): + __slots__ = () + + +class DTypeMixin(object): + + @cached_property + def dtype(self): + dtype, = set(c.dtype for c in self.children) + return dtype + + +class Zero(Constant): + __slots__ = ("shape", "dtype") + __front__ = ("shape", "dtype") + + def __init__(self, shape, dtype): + self.shape = shape + self.dtype = dtype + + +class IndexBase(metaclass=ABCMeta): + pass + + +class Index(Terminal, Scalar): + _count = itertools.count() + __slots__ = ("extent", "merge", "name") + __front__ = ("extent", "merge", "name") + + def __init__(self, extent=None, merge=True, name=None): + self.name = name or "i%d" % next(Index._count) + self.extent = None + self.set_extent(extent) + self.merge = merge + + def set_extent(self, value): + if self.extent is None: + if isinstance(value, numbers.Integral): + value = int(value) + self.extent = value + elif self.extent != value: + raise ValueError("Inconsistent index extents") + + dtype = numpy.int32 + + +class FixedIndex(Terminal, Scalar): + __slots__ = ("value", ) + __front__ = ("value", ) + + extent = 1 + + def __init__(self, value): + assert isinstance(value, numbers.Integral) + self.value = numpy.int32(value) + + dtype = numpy.int32 + + +class RuntimeIndex(Scalar): + _count = itertools.count() + __slots__ = ("children", "name") + __back__ = ("name", ) + + def __init__(self, lo, hi, constraint, name): + assert name is not None, "runtime indices need a name" + self.name = name + self.children = lo, hi, constraint + + @cached_property + def extents(self): + return self.children[:2] + + @cached_property + def dtype(self): + a, b, c = self.children + assert a.dtype == b.dtype + return a.dtype + + +IndexBase.register(FixedIndex) +IndexBase.register(Index) +IndexBase.register(RuntimeIndex) + + +class MultiIndex(Node): + __slots__ = ("children", ) + + def __init__(self, *indices): + self.children = indices + + def __iter__(self): + return iter(self.children) + + def __len__(self): + return len(self.children) + + +class Extent(Scalar): + __slots__ = ("children", ) + + def __init__(self, multiindex): + assert all(isinstance(i, (Index, FixedIndex)) for i in multiindex.children) + self.children = multiindex, + + +class Symbol(Terminal): + __slots__ = ("name", ) + __front__ = ("name", ) + + def __init__(self, name): + self.name = name + + +class Argument(Terminal): + _count = defaultdict(partial(itertools.count)) + + __slots__ = ("shape", "dtype", "_name", "prefix", "_gen_name") + __front__ = ("shape", "dtype", "_name", "prefix") + + def __init__(self, shape, dtype, name=None, pfx=None): + self.dtype = dtype + self.shape = shape + self._name = name + pfx = pfx or "v" + self.prefix = pfx + self._gen_name = name or "%s%d" % (pfx, next(Argument._count[pfx])) + + def get_hash(self): + return hash((type(self),) + self._cons_args(self.children) + (self.name,)) + + @property + def name(self): + return self._name or self._gen_name + + +class Literal(Terminal, Scalar): + __slots__ = ("value", ) + __front__ = ("value", ) + shape = () + + def __new__(cls, value, casting=True): + assert value.shape == () + assert isinstance(value, numpy.number) + if value == 0: + # All zeros, make symbolic zero + return Zero((), value.dtype) + else: + return super().__new__(cls) + + def __init__(self, value, casting=True): + self.value = value + self.casting = casting + + def is_equal(self, other): + if type(self) != type(other): + return False + return self.value == other.value + + def get_hash(self): + return hash((type(self), self.value)) + + @cached_property + def dtype(self): + return self.value.dtype + + +class NamedLiteral(Terminal): + __slots__ = ("value", "parent", "suffix") + __front__ = ("value", "parent", "suffix") + + def __init__(self, value, parent, suffix): + self.value = value + self.parent = parent + self.suffix = suffix + + def is_equal(self, other): + if type(self) != type(other): + return False + if self.shape != other.shape: + return False + if self.parent != other.parent: + return False + if self.suffix != other.suffix: + return False + return tuple(self.value.flat) == tuple(other.value.flat) + + def get_hash(self): + return hash((type(self), self.shape, tuple(self.value.flat))) + + @cached_property + def shape(self): + return self.value.shape + + @cached_property + def dtype(self): + return self.value.dtype + + @property + def name(self): + return f"{self.parent.name}_{self.suffix}" + + +class Min(Scalar): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + @cached_property + def dtype(self): + a, b = self.children + return a.dtype + + +class Max(Scalar): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + @cached_property + def dtype(self): + a, b = self.children + return numpy.result_type(a.dtype, b.dtype) + + +class Sum(Scalar): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + @cached_property + def dtype(self): + a, b = self.children + return numpy.result_type(a.dtype, b.dtype) + + +class Product(Scalar): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + @cached_property + def dtype(self): + a, b = self.children + return numpy.result_type(a.dtype, b.dtype) + + +class QuotientBase(Scalar): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + @cached_property + def dtype(self): + a, b = self.children + return numpy.result_type(a.dtype, b.dtype) + + +class Quotient(QuotientBase): + pass + + +class FloorDiv(QuotientBase): + pass + + +class Remainder(QuotientBase): + pass + + +class Indexed(Scalar): + __slots__ = ("children", ) + + def __new__(cls, aggregate, multiindex): + multiindex = MultiIndex(*(int(i) if isinstance(i, numbers.Integral) else i + for i in multiindex)) + assert len(aggregate.shape) == len(multiindex) + for index, extent in zip(multiindex, aggregate.shape): + if isinstance(index, Index): + index.set_extent(extent) + + self = super().__new__(cls) + self.children = (aggregate, multiindex) + return self + + def index_ordering(self): + _, multiindex = self.children + return tuple(i for i in self.multiindex if isinstance(i, Index)) + + @cached_property + def dtype(self): + return self.aggregate.dtype + + @cached_property + def aggregate(self): + return self.children[0] + + @cached_property + def multiindex(self): + return self.children[1] + + +class When(Node): + __slots__ = ("children", ) + + def __init__(self, condition, expr): + self.children = condition, expr + + @cached_property + def dtype(self): + return self.children[1].dtype + + +class Materialise(Node): + _count = itertools.count() + __slots__ = ("children", "name", "label") + __front__ = ("label",) + + def __init__(self, label, init, indices, *expressions_and_indices): + assert all(isinstance(i, (Index, FixedIndex)) for i in indices) + assert len(expressions_and_indices) % 2 == 0 + assert isinstance(label, InstructionLabel) + self.label = label + self.children = (init, indices) + tuple(expressions_and_indices) + self.name = "t%d" % next(Materialise._count) + + def reconstruct(self, *args): + new = type(self)(*self._cons_args(args)) + new.name = self.name + return new + + @cached_property + def shape(self): + indices = self.children[1] + return tuple(i.extent for i in indices) + + @cached_property + def dtype(self): + expr = self.children[0] + return expr.dtype + + +class Variable(Terminal): + __slots__ = ("name", "shape", "dtype") + __front__ = ("name", "shape", "dtype") + + def __init__(self, name, shape, dtype): + self.name = name + self.shape = shape + self.dtype = dtype + + +class DummyInstruction(Node): + __slots__ = ("children", "label") + __front__ = ("label",) + + def __init__(self, label, *children): + self.children = children + self.label = label + + +class Accumulate(Node): + __slots__ = ("children", "label") + __front__ = ("label",) + + def __init__(self, label, lvalue, rvalue): + self.children = (lvalue, rvalue) + self.label = label + + +class FunctionCall(Node): + __slots__ = ("name", "access", "free_indices", "label", "children") + __front__ = ("name", "label", "access", "free_indices") + + def __init__(self, name, label, access, free_indices, *arguments): + self.children = tuple(arguments) + self.access = tuple(access) + self.free_indices = free_indices + self.name = name + self.label = label + assert len(self.access) == len(self.children) + + +class Conditional(Scalar): + __slots__ = ("children", ) + + def __init__(self, condition, then, else_): + assert not condition.shape + assert not then.shape + assert then.shape == else_.shape + assert then.dtype == else_.dtype + self.children = condition, then, else_ + self.shape = then.shape + + @cached_property + def dtype(self): + return self.children[1].dtype + + +class Comparison(Scalar): + __slots__ = ("operator", "children") + __front__ = ("operator", ) + + def __init__(self, op, a, b): + assert not a.shape + assert not b.shape + if op not in {">", ">=", "==", "!=", "<", "<="}: + raise ValueError("invalid operator") + + self.operator = op + self.children = a, b + + +class LogicalNot(Scalar, DTypeMixin): + __slots__ = ("children", ) + + def __init__(self, expression): + assert not expression.shape + self.children = expression, + + +class LogicalAnd(Scalar, DTypeMixin): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + +class LogicalOr(Scalar, DTypeMixin): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + +class BitwiseNot(Scalar, DTypeMixin): + __slots__ = ("children", ) + + def __init__(self, expression): + assert not expression.shape + self.children = expression, + + +class BitwiseAnd(Scalar, DTypeMixin): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + +class BitwiseOr(Scalar, DTypeMixin): + __slots__ = ("children", ) + + def __init__(self, a, b): + assert not a.shape + assert not b.shape + self.children = a, b + + +class BitShift(Scalar, DTypeMixin): + __slots__ = ("direction", "children", ) + __front__ = ("direction", ) + + def __init__(self, direction, expr, shift): + assert direction in {"<<", ">>"} + self.direction = direction + self.children = expr, shift diff --git a/pyop2/compilation.py b/pyop2/compilation.py new file mode 100644 index 0000000000..76ccbb38a7 --- /dev/null +++ b/pyop2/compilation.py @@ -0,0 +1,701 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +from abc import ABC +import os +import platform +import shutil +import subprocess +import sys +import ctypes +import shlex +from hashlib import md5 +from packaging.version import Version, InvalidVersion +from textwrap import dedent +from functools import partial +from pathlib import Path +from contextlib import contextmanager +from tempfile import gettempdir, mkstemp +from random import randint + + +from pyop2 import mpi +from pyop2.caching import parallel_cache, memory_cache, default_parallel_hashkey, _as_hexdigest, DictLikeDiskAccess +from pyop2.configuration import configuration +from pyop2.logger import warning, debug, progress, INFO +from pyop2.exceptions import CompilationError +from pyop2.utils import get_petsc_variables +import pyop2.global_kernel +from petsc4py import PETSc + + +def _check_hashes(x, y, datatype): + """MPI reduction op to check if code hashes differ across ranks.""" + if x == y: + return x + return False + + +_check_op = mpi.MPI.Op.Create(_check_hashes, commute=True) +_compiler = None +# Directory must be unique per VENV for multiple installs +# _and_ per user for shared machines +_EXE_HASH = md5(sys.executable.encode()).hexdigest()[-6:] +MEM_TMP_DIR = Path(gettempdir()).joinpath(f"pyop2-tempcache-uid{os.getuid()}").joinpath(_EXE_HASH) +# PETSc Configuration +petsc_variables = get_petsc_variables() + + +def set_default_compiler(compiler): + """Set the PyOP2 default compiler, globally over COMM_WORLD. + + :arg compiler: String with name or path to compiler executable + OR a subclass of the Compiler class + """ + global _compiler + if _compiler: + warning( + "`set_default_compiler` should only ever be called once, calling" + " multiple times is untested and may produce unexpected results" + ) + if isinstance(compiler, str): + _compiler = sniff_compiler(compiler) + elif isinstance(compiler, type) and issubclass(compiler, Compiler): + _compiler = compiler + else: + raise TypeError( + "compiler must be a path to a compiler (a string) or a subclass" + " of the pyop2.compilation.Compiler class" + ) + + +def sniff_compiler_version(compiler, cpp=False): + """Attempt to determine the compiler version number. + + :arg compiler: Instance of compiler to sniff the version of + :arg cpp: If set to True will use the C++ compiler rather than + the C compiler to determine the version number. + """ + # Note: + # Sniffing the compiler version for very large numbers of + # MPI ranks is expensive, ensure this is only run on rank 0 + exe = compiler.cxx if cpp else compiler.cc + version = None + # `-dumpversion` is not sufficient to get the whole version string (for some compilers), + # but other compilers do not implement `-dumpfullversion`! + for dumpstring in ["-dumpfullversion", "-dumpversion"]: + try: + output = subprocess.run( + [exe, dumpstring], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + encoding="utf-8" + ).stdout + version = Version(output) + break + except (subprocess.CalledProcessError, UnicodeDecodeError, InvalidVersion): + continue + return version + + +def sniff_compiler(exe, comm=mpi.COMM_WORLD): + """Obtain the correct compiler class by calling the compiler executable. + + :arg exe: String with name or path to compiler executable + :arg comm: Comm over which we want to determine the compiler type + :returns: A compiler class + """ + compiler = None + if comm.rank == 0: + # Note: + # Sniffing compiler for very large numbers of MPI ranks is + # expensive so we do this on one rank and broadcast + try: + output = subprocess.run( + [exe, "--version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + encoding="utf-8" + ).stdout + except (subprocess.CalledProcessError, UnicodeDecodeError): + output = "" + + # Find the name of the compiler family + if output.startswith("gcc") or output.startswith("g++"): + name = "GNU" + elif output.startswith("clang"): + name = "clang" + elif output.startswith("Apple LLVM") or output.startswith("Apple clang"): + name = "clang" + elif output.startswith("icc"): + name = "Intel" + elif "Cray" in output.split("\n")[0]: + # Cray is more awkward eg: + # Cray clang version 11.0.4 () + # gcc (GCC) 9.3.0 20200312 (Cray Inc.) + name = "Cray" + else: + name = "unknown" + + # Set the compiler instance based on the platform (and architecture) + if sys.platform.find("linux") == 0: + if name == "Intel": + compiler = LinuxIntelCompiler + elif name == "GNU": + compiler = LinuxGnuCompiler + elif name == "clang": + compiler = LinuxClangCompiler + elif name == "Cray": + compiler = LinuxCrayCompiler + else: + compiler = AnonymousCompiler + elif sys.platform.find("darwin") == 0: + if name == "clang": + machine = platform.uname().machine + if machine == "arm64": + compiler = MacClangARMCompiler + elif machine == "x86_64": + compiler = MacClangCompiler + elif name == "GNU": + compiler = MacGNUCompiler + else: + compiler = AnonymousCompiler + else: + compiler = AnonymousCompiler + + # Now try and get a version number + temp = Compiler() + version = sniff_compiler_version(temp) + compiler = partial(compiler, version=version) + + return comm.bcast(compiler, root=0) + + +class Compiler(ABC): + """A compiler for shared libraries. + + :arg extra_compiler_flags: A list of arguments to the C compiler (CFLAGS) + or the C++ compiler (CXXFLAGS) + (optional, prepended to any flags specified as the cflags configuration option). + The environment variables ``PYOP2_CFLAGS`` and ``PYOP2_CXXFLAGS`` + can also be used to extend these options. + :arg extra_linker_flags: A list of arguments to the linker (LDFLAGS) + (optional, prepended to any flags specified as the ldflags configuration option). + The environment variable ``PYOP2_LDFLAGS`` can also be used to + extend these options. + :arg version: (Optional) usually sniffed by loader. + :arg debug: Whether to use debugging compiler flags. + """ + _name = "unknown" + + _cc = None + _cxx = None + _ld = None + + _cflags = () + _cxxflags = () + _ldflags = () + + _optflags = () + _debugflags = () + + def __init__(self, extra_compiler_flags=(), extra_linker_flags=(), version=None, debug=False): + self._extra_compiler_flags = tuple(extra_compiler_flags) + self._extra_linker_flags = tuple(extra_linker_flags) + self._version = version + self._debug = debug + + def __repr__(self): + string = f"{self.__class__.__name__}(" + string += f"extra_compiler_flags={self._extra_compiler_flags}, " + string += f"extra_linker_flags={self._extra_linker_flags}, " + string += f"version={self._version!r}, " + string += f"debug={self._debug})" + return string + + def __str__(self): + return f"<{self._name} compiler, version {self._version or 'unknown'}>" + + @property + def cc(self): + return self._cc or petsc_variables["CC"] + + @property + def cxx(self): + return self._cxx or petsc_variables["CXX"] + + @property + def ld(self): + return self._ld + + @property + def cflags(self): + cflags = self._cflags + self._extra_compiler_flags + self.bugfix_cflags + if self._debug: + cflags += self._debugflags + else: + cflags += self._optflags + cflags += tuple(shlex.split(configuration["cflags"])) + return cflags + + @property + def cxxflags(self): + cxxflags = self._cxxflags + self._extra_compiler_flags + self.bugfix_cflags + if self._debug: + cxxflags += self._debugflags + else: + cxxflags += self._optflags + cxxflags += tuple(shlex.split(configuration["cxxflags"])) + return cxxflags + + @property + def ldflags(self): + ldflags = self._ldflags + self._extra_linker_flags + ldflags += tuple(shlex.split(configuration["ldflags"])) + return ldflags + + @property + def bugfix_cflags(self): + return () + + +class MacClangCompiler(Compiler): + """A compiler for building a shared library on Mac systems.""" + _name = "Mac Clang" + + _cflags = ("-fPIC", "-Wall", "-framework", "Accelerate", "-std=gnu11") + _cxxflags = ("-fPIC", "-Wall", "-framework", "Accelerate") + _ldflags = ("-dynamiclib",) + + _optflags = ("-O3", "-ffast-math", "-march=native") + _debugflags = ("-O0", "-g") + + +class MacClangARMCompiler(MacClangCompiler): + """A compiler for building a shared library on ARM based Mac systems.""" + # See https://stackoverflow.com/q/65966969 + _optflags = ("-O3", "-ffast-math", "-mcpu=apple-a14") + # Need to pass -L/opt/homebrew/opt/gcc/lib/gcc/11 to prevent linker error: + # ld: file not found: @rpath/libgcc_s.1.1.dylib for architecture arm64 This + # seems to be a homebrew configuration issue somewhere. Hopefully this + # requirement will go away at some point. + _ldflags = ("-dynamiclib", "-L/opt/homebrew/opt/gcc/lib/gcc/11") + + +class MacGNUCompiler(MacClangCompiler): + """A compiler for building a shared library on Mac systems with a GNU compiler.""" + _name = "Mac GNU" + + +class LinuxGnuCompiler(Compiler): + """The GNU compiler for building a shared library on Linux systems.""" + _name = "GNU" + + _cflags = ("-fPIC", "-Wall", "-std=gnu11") + _cxxflags = ("-fPIC", "-Wall") + _ldflags = ("-shared",) + + _optflags = ("-march=native", "-O3", "-ffast-math") + _debugflags = ("-O0", "-g") + + @property + def bugfix_cflags(self): + """Flags to work around bugs in compilers.""" + ver = self._version + cflags = () + if Version("4.8.0") <= ver < Version("4.9.0"): + # GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61068 + cflags = ("-fno-ivopts",) + if Version("5.0") <= ver <= Version("5.4.0"): + cflags = ("-fno-tree-loop-vectorize",) + if Version("6.0.0") <= ver < Version("6.5.0"): + # GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79920 + cflags = ("-fno-tree-loop-vectorize",) + if Version("7.1.0") <= ver < Version("7.1.2"): + # GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81633 + cflags = ("-fno-tree-loop-vectorize",) + if Version("7.3") <= ver <= Version("7.5"): + # GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90055 + # See also https://github.com/firedrakeproject/firedrake/issues/1442 + # And https://github.com/firedrakeproject/firedrake/issues/1717 + # Bug also on skylake with the vectoriser in this + # combination (disappears without + # -fno-tree-loop-vectorize!) + cflags = ("-fno-tree-loop-vectorize", "-mno-avx512f") + return cflags + + +class LinuxClangCompiler(Compiler): + """The clang for building a shared library on Linux systems.""" + _name = "Clang" + + _ld = "ld.lld" + + _cflags = ("-fPIC", "-Wall", "-std=gnu11") + _cxxflags = ("-fPIC", "-Wall") + _ldflags = ("-shared", "-L/usr/lib") + + _optflags = ("-march=native", "-O3", "-ffast-math") + _debugflags = ("-O0", "-g") + + +class LinuxIntelCompiler(Compiler): + """The Intel compiler for building a shared library on Linux systems.""" + _name = "Intel" + + _cflags = ("-fPIC", "-no-multibyte-chars", "-std=gnu11") + _cxxflags = ("-fPIC", "-no-multibyte-chars") + _ldflags = ("-shared",) + + _optflags = ("-Ofast", "-xHost") + _debugflags = ("-O0", "-g") + + +class LinuxCrayCompiler(Compiler): + """The Cray compiler for building a shared library on Linux systems.""" + _name = "Cray" + + _cflags = ("-fPIC", "-Wall", "-std=gnu11") + _cxxflags = ("-fPIC", "-Wall") + _ldflags = ("-shared",) + + _optflags = ("-march=native", "-O3", "-ffast-math") + _debugflags = ("-O0", "-g") + + @property + def ldflags(self): + ldflags = super(LinuxCrayCompiler).ldflags + if '-llapack' in ldflags: + ldflags = tuple(flag for flag in ldflags if flag != '-llapack') + return ldflags + + +class AnonymousCompiler(Compiler): + """Compiler for building a shared library on systems with unknown compiler. + The properties of this compiler are entirely controlled through environment + variables""" + _name = "Unknown" + + +def load_hashkey(*args, **kwargs): + from pyop2.global_kernel import GlobalKernel + if isinstance(args[0], str): + code_hash = md5(args[0].encode()).hexdigest() + elif isinstance(args[0], GlobalKernel): + code_hash = md5(str(args[0].cache_key).encode()).hexdigest() + else: + pass # This will raise an error in load + return default_parallel_hashkey(code_hash, *args[1:], **kwargs) + + +@mpi.collective +@memory_cache(hashkey=load_hashkey) +@PETSc.Log.EventDecorator() +def load(jitmodule, extension, fn_name, cppargs=(), ldargs=(), + argtypes=None, restype=None, comm=None): + """Build a shared library and return a function pointer from it. + + :arg jitmodule: The JIT Module which can generate the code to compile, or + the string representing the source code. + :arg extension: extension of the source file (c, cpp) + :arg fn_name: The name of the function to return from the resulting library + :arg cppargs: A tuple of arguments to the C compiler (optional) + :arg ldargs: A tuple of arguments to the linker (optional) + :arg argtypes: A list of ctypes argument types matching the arguments of + the returned function (optional, pass ``None`` for ``void``). This is + only used when string is passed in instead of JITModule. + :arg restype: The return type of the function (optional, pass + ``None`` for ``void``). + :kwarg comm: Optional communicator to compile the code on (only + rank 0 compiles code) (defaults to pyop2.mpi.COMM_WORLD). + """ + if isinstance(jitmodule, str): + class StrCode(object): + def __init__(self, code, argtypes): + self.code_to_compile = code + self.cache_key = (None, code) # We peel off the first + # entry, since for a jitmodule, it's a process-local + # cache key + self.argtypes = argtypes + code = StrCode(jitmodule, argtypes) + elif isinstance(jitmodule, pyop2.global_kernel.GlobalKernel): + code = jitmodule + else: + raise ValueError("Don't know how to compile code of type %r" % type(jitmodule)) + + global _compiler + if _compiler: + # Use the global compiler if it has been set + compiler = _compiler + else: + # Sniff compiler from file extension, + if extension == "cpp": + exe = petsc_variables["CXX"] + else: + exe = petsc_variables["CC"] + compiler = sniff_compiler(exe, comm) + + debug = configuration["debug"] + compiler_instance = compiler(cppargs, ldargs, debug=debug) + if configuration['check_src_hashes'] or configuration['debug']: + check_source_hashes(compiler_instance, code, extension, comm) + # This call is cached on disk + so_name = make_so(compiler_instance, code, extension, comm) + # This call might be cached in memory by the OS (system dependent) + dll = ctypes.CDLL(so_name) + + if isinstance(jitmodule, pyop2.global_kernel.GlobalKernel): + _add_profiling_events(dll, code.local_kernel.events) + + fn = getattr(dll, fn_name) + fn.argtypes = code.argtypes + fn.restype = restype + return fn + + +def expandWl(ldflags): + """Generator to expand the `-Wl` compiler flags for use as linker flags + :arg ldflags: linker flags for a compiler command + """ + for flag in ldflags: + if flag.startswith('-Wl'): + for f in flag.lstrip('-Wl')[1:].split(','): + yield f + else: + yield flag + + +class CompilerDiskAccess(DictLikeDiskAccess): + @contextmanager + def open(self, filename, *args, **kwargs): + yield filename + + def write(self, filename, value): + shutil.copy(value, filename) + + def read(self, filename): + if not filename.exists(): + raise FileNotFoundError("File not on disk, cache miss") + return filename + + def setdefault(self, key, default=None): + try: + return self[key] + except KeyError: + self[key] = default + return self[key] + + +def _make_so_hashkey(compiler, jitmodule, extension, comm): + if extension == "cpp": + exe = compiler.cxx + compiler_flags = compiler.cxxflags + else: + exe = compiler.cc + compiler_flags = compiler.cflags + return (compiler, exe, compiler_flags, compiler.ld, compiler.ldflags, jitmodule.cache_key) + + +def check_source_hashes(compiler, jitmodule, extension, comm): + """A check to see whether code generated on all ranks is identical. + + :arg compiler: The compiler to use to create the shared library. + :arg jitmodule: The JIT Module which can generate the code to compile. + :arg filename: The filename of the library to create. + :arg extension: extension of the source file (c, cpp). + :arg comm: Communicator over which to perform compilation. + """ + # Reconstruct hash from filename + hashval = _as_hexdigest(_make_so_hashkey(compiler, jitmodule, extension, comm)) + with mpi.temp_internal_comm(comm) as icomm: + matching = icomm.allreduce(hashval, op=_check_op) + if matching != hashval: + # Dump all src code to disk for debugging + output = Path(configuration["cache_dir"]).joinpath("mismatching-kernels") + srcfile = output.joinpath(f"src-rank{icomm.rank}.{extension}") + if icomm.rank == 0: + output.mkdir(exist_ok=True) + icomm.barrier() + with open(srcfile, "w") as fh: + fh.write(jitmodule.code_to_compile) + icomm.barrier() + raise CompilationError(f"Generated code differs across ranks (see output in {output})") + + +@mpi.collective +@parallel_cache( + hashkey=_make_so_hashkey, + cache_factory=lambda: CompilerDiskAccess(configuration['cache_dir'], extension=".so") +) +@PETSc.Log.EventDecorator() +def make_so(compiler, jitmodule, extension, comm, filename=None): + """Build a shared library and load it + + :arg compiler: The compiler to use to create the shared library. + :arg jitmodule: The JIT Module which can generate the code to compile. + :arg filename: The filename of the library to create. + :arg extension: extension of the source file (c, cpp). + :arg comm: Communicator over which to perform compilation. + :arg filename: Optional + Returns a :class:`ctypes.CDLL` object of the resulting shared + library.""" + # Compilation communicators are reference counted on the PyOP2 comm + icomm = mpi.internal_comm(comm, compiler) + ccomm = mpi.compilation_comm(icomm, compiler) + + # C or C++ + if extension == "cpp": + exe = compiler.cxx + compiler_flags = compiler.cxxflags + else: + exe = compiler.cc + compiler_flags = compiler.cflags + + # Compile on compilation communicator (ccomm) rank 0 + soname = None + if ccomm.rank == 0: + if filename is None: + # Adding random 2-digit hexnum avoids using excessive filesystem inodes + tempdir = MEM_TMP_DIR.joinpath(f"{randint(0, 255):02x}") + tempdir.mkdir(parents=True, exist_ok=True) + # This path + filename should be unique + descriptor, filename = mkstemp(suffix=f".{extension}", dir=tempdir, text=True) + filename = Path(filename) + else: + filename.parent.mkdir(exist_ok=True) + + cname = filename + oname = filename.with_suffix(".o") + soname = filename.with_suffix(".so") + logfile = filename.with_suffix(".log") + errfile = filename.with_suffix(".err") + with progress(INFO, 'Compiling wrapper'): + # Write source code to disk + with open(cname, "w") as fh: + fh.write(jitmodule.code_to_compile) + os.close(descriptor) + + if not compiler.ld: + # Compile and link + cc = (exe,) + compiler_flags + ('-o', str(soname), str(cname)) + compiler.ldflags + _run(cc, logfile, errfile) + else: + # Compile + cc = (exe,) + compiler_flags + ('-c', '-o', str(oname), str(cname)) + _run(cc, logfile, errfile) + # Extract linker specific "cflags" from ldflags and link + ld = tuple(shlex.split(compiler.ld)) + ('-o', str(soname), str(oname)) + tuple(expandWl(compiler.ldflags)) + _run(ld, logfile, errfile, step="Linker", filemode="a") + + return ccomm.bcast(soname, root=0) + + +def _run(cc, logfile, errfile, step="Compilation", filemode="w"): + """ Run a compilation command and handle logging + errors. + """ + debug(f"{step} command: {' '.join(cc)}") + try: + if configuration['no_fork_available']: + redirect = ">" if filemode == "w" else ">>" + cc += (f"2{redirect}", str(errfile), redirect, str(logfile)) + cmd = " ".join(cc) + status = os.system(cmd) + if status != 0: + raise subprocess.CalledProcessError(status, cmd) + else: + with open(logfile, filemode) as log, open(errfile, filemode) as err: + log.write(f"{step} command:\n") + log.write(" ".join(cc)) + log.write("\n\n") + subprocess.check_call(cc, stderr=err, stdout=log) + except subprocess.CalledProcessError as e: + raise CompilationError(dedent(f""" + Command "{e.cmd}" return error status {e.returncode}. + Unable to compile code + Compile log in {logfile!s} + Compile errors in {errfile!s} + """)) + + +def _add_profiling_events(dll, events): + """ + If PyOP2 is in profiling mode, events are attached to dll to profile the local linear algebra calls. + The event is generated here in python and then set in the shared library, + so that memory is not allocated over and over again in the C kernel. The naming + convention is that the event ids are named by the event name prefixed by "ID_". + """ + if PETSc.Log.isActive(): + # also link the events from the linear algebra callables + if hasattr(dll, "solve"): + events += ('solve_memcpy', 'solve_getrf', 'solve_getrs') + if hasattr(dll, "inverse"): + events += ('inv_memcpy', 'inv_getrf', 'inv_getri') + # link all ids in DLL to the events generated here in python + for e in list(filter(lambda e: e is not None, events)): + ctypes.c_int.in_dll(dll, 'ID_'+e).value = PETSc.Log.Event(e).id + + +def clear_compiler_disk_cache(prompt=False): + """Clear the PyOP2 compiler disk cache. + + :arg prompt: if ``True`` prompt before removing any files + """ + cachedirs = [configuration['cache_dir'], MEM_TMP_DIR] + + for directory in cachedirs: + if not os.path.exists(directory): + print("Cache directory could not be found") + continue + if len(os.listdir(directory)) == 0: + print("No cached libraries to remove") + continue + + remove = True + if prompt: + user = input(f"Remove cached libraries from {directory}? [Y/n]: ") + + while user.lower() not in ['', 'y', 'n']: + print("Please answer y or n.") + user = input(f"Remove cached libraries from {directory}? [Y/n]: ") + + if user.lower() == 'n': + remove = False + + if remove: + print(f"Removing cached libraries from {directory}") + shutil.rmtree(directory, ignore_errors=True) + else: + print("Not removing cached libraries") diff --git a/pyop2/configuration.py b/pyop2/configuration.py new file mode 100644 index 0000000000..34969908ac --- /dev/null +++ b/pyop2/configuration.py @@ -0,0 +1,166 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""PyOP2 global configuration.""" + +import os +from tempfile import gettempdir +from loopy.target.c import CWithGNULibcTarget + +from pyop2.exceptions import ConfigurationError + + +class Configuration(dict): + r"""PyOP2 configuration parameters + + :param cc: C compiler (executable name eg: `gcc` + or path eg: `/opt/gcc/bin/gcc`). + :param cxx: C++ compiler (executable name eg: `g++` + or path eg: `/opt/gcc/bin/g++`). + :param ld: Linker (executable name `ld` + or path eg: `/opt/gcc/bin/ld`). + :param cflags: extra flags to be passed to the C compiler. + :param cxxflags: extra flags to be passed to the C++ compiler. + :param ldflags: extra flags to be passed to the linker. + :param simd_width: number of doubles in SIMD instructions + (e.g. 4 for AVX2, 8 for AVX512). + :param debug: Turn on debugging for generated code (turns off + compiler optimisations). + :param type_check: Should PyOP2 type-check API-calls? (Default, + yes) + :param check_src_hashes: Should PyOP2 check that generated code is + the same on all processes? (Default, yes). Uses an allreduce. + :param cache_dir: Where should generated code be cached? + :param node_local_compilation: Should generated code by compiled + "node-local" (one process for each set of processes that share + a filesystem)? You should probably arrange to set cache_dir + to a node-local filesystem too. + :param log_level: How chatty should PyOP2 be? Valid values + are "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". + :param print_cache_size: Should PyOP2 print the cache information at + program exit? + :param matnest: Should matrices on mixed maps be built as nests? (Default yes) + :param block_sparsity: Should sparsity patterns on datasets with + cdim > 1 be built as block sparsities, or dof sparsities. The + former saves memory but changes which preconditioners are + available for the resulting matrices. (Default yes) + :param spmd_strict: Enable barriers for calls marked with @collective and + for cache access. This adds considerable overhead, but is useful for + tracking down deadlocks. (Default no) + """ + # name, env variable, type, default, write once + cache_dir = os.path.join(gettempdir(), "pyop2-cache-uid%s" % os.getuid()) + DEFAULTS = { + "cflags": + ("PYOP2_CFLAGS", str, ""), + "cxxflags": + ("PYOP2_CXXFLAGS", str, ""), + "ldflags": + ("PYOP2_LDFLAGS", str, ""), + "simd_width": + ("PYOP2_SIMD_WIDTH", int, 4), + "debug": + ("PYOP2_DEBUG", bool, False), + "compute_kernel_flops": + ("PYOP2_COMPUTE_KERNEL_FLOPS", bool, False), + "type_check": + ("PYOP2_TYPE_CHECK", bool, True), + "check_src_hashes": + ("PYOP2_CHECK_SRC_HASHES", bool, True), + "log_level": + ("PYOP2_LOG_LEVEL", (str, int), "WARNING"), + "cache_dir": + ("PYOP2_CACHE_DIR", str, cache_dir), + "node_local_compilation": + ("PYOP2_NODE_LOCAL_COMPILATION", bool, True), + "no_fork_available": + ("PYOP2_NO_FORK_AVAILABLE", bool, False), + "print_cache_info": + ("PYOP2_CACHE_INFO", bool, False), + "matnest": + ("PYOP2_MATNEST", bool, True), + "block_sparsity": + ("PYOP2_BLOCK_SPARSITY", bool, True), + "spmd_strict": + ("PYOP2_SPMD_STRICT", bool, False), + } + """Default values for PyOP2 configuration parameters""" + + def __init__(self): + def convert(env, typ, v): + if not isinstance(typ, type): + typ = typ[0] + try: + if typ is bool: + return bool(int(os.environ.get(env, v))) + return typ(os.environ.get(env, v)) + except ValueError: + raise ValueError("Cannot convert value of environment variable %s to %r" % (env, typ)) + defaults = dict((k, convert(env, typ, v)) + for k, (env, typ, v) in Configuration.DEFAULTS.items()) + super(Configuration, self).__init__(**defaults) + self._set = set() + self._defaults = defaults + + def reset(self): + """Reset the configuration parameters to the default values.""" + self.update(self._defaults) + self._set = set() + + def reconfigure(self, **kwargs): + """Update the configuration parameters with new values.""" + for k, v in kwargs.items(): + self[k] = v + + def unsafe_reconfigure(self, **kwargs): + """"Unsafely reconfigure (just replacing the values)""" + self.update(kwargs) + + def __setitem__(self, key, value): + """Set the value of a configuration parameter. + + :arg key: The parameter to set + :arg value: The value to set it to. + """ + if key in Configuration.DEFAULTS: + valid_type = Configuration.DEFAULTS[key][1] + if not isinstance(value, valid_type): + raise ConfigurationError("Values for configuration key %s must be of type %r, not %r" + % (key, valid_type, type(value))) + self._set.add(key) + super(Configuration, self).__setitem__(key, value) + + +configuration = Configuration() + +target = CWithGNULibcTarget() diff --git a/pyop2/datatypes.py b/pyop2/datatypes.py new file mode 100644 index 0000000000..6dccfdd4d6 --- /dev/null +++ b/pyop2/datatypes.py @@ -0,0 +1,79 @@ + +import ctypes + +import loopy as lp +import numpy +from petsc4py.PETSc import IntType, RealType, ScalarType + +IntType = numpy.dtype(IntType) +RealType = numpy.dtype(RealType) +ScalarType = numpy.dtype(ScalarType) + + +def as_cstr(dtype): + """Convert a numpy dtype like object to a C type as a string.""" + return {"bool": "unsigned char", + "int": "int", + "int8": "int8_t", + "int16": "int16_t", + "int32": "int32_t", + "int64": "int64_t", + "uint8": "uint8_t", + "uint16": "uint16_t", + "uint32": "uint32_t", + "uint64": "uint64_t", + "float32": "float", + "float64": "double", + "complex128": "double complex"}[numpy.dtype(dtype).name] + + +def as_ctypes(dtype): + """Convert a numpy dtype like object to a ctypes type.""" + return {"bool": ctypes.c_bool, + "int": ctypes.c_int, + "int8": ctypes.c_char, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "float32": ctypes.c_float, + "float64": ctypes.c_double}[numpy.dtype(dtype).name] + + +def as_numpy_dtype(dtype): + """Convert a dtype-like object into a numpy dtype.""" + if isinstance(dtype, numpy.dtype): + return dtype + elif isinstance(dtype, lp.types.NumpyType): + return dtype.numpy_dtype + else: + raise ValueError + + +def dtype_limits(dtype): + """Attempt to determine the min and max values of a datatype. + + :arg dtype: A numpy datatype. + :returns: a 2-tuple of min, max + :raises ValueError: If numeric limits could not be determined. + """ + try: + info = numpy.finfo(dtype) + except ValueError: + # maybe an int? + try: + info = numpy.iinfo(dtype) + except ValueError as e: + raise ValueError("Unable to determine numeric limits from %s" % dtype) from e + return info.min, info.max + + +class OpaqueType(lp.types.OpaqueType): + def __init__(self, name): + super().__init__(name=name) + + def __repr__(self): + return self.name diff --git a/pyop2/exceptions.py b/pyop2/exceptions.py new file mode 100644 index 0000000000..eec5eedac9 --- /dev/null +++ b/pyop2/exceptions.py @@ -0,0 +1,158 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""OP2 exception types""" + + +class DataTypeError(TypeError): + + """Invalid type for data.""" + + +class DimTypeError(TypeError): + + """Invalid type for dimension.""" + + +class ArityTypeError(TypeError): + + """Invalid type for arity.""" + + +class IndexTypeError(TypeError): + + """Invalid type for index.""" + + +class NameTypeError(TypeError): + + """Invalid type for name.""" + + +class SetTypeError(TypeError): + + """Invalid type for :class:`pyop2.op2.Set`.""" + + +class SizeTypeError(TypeError): + + """Invalid type for size.""" + + +class SubsetIndexOutOfBounds(TypeError): + + """Out of bound index.""" + + +class SparsityTypeError(TypeError): + + """Invalid type for :class:`pyop2.op2.Sparsity`.""" + + +class MapTypeError(TypeError): + + """Invalid type for :class:`pyop2.op2.Map`.""" + + +class DataSetTypeError(TypeError): + """Invalid type for :class:`pyop2.op2.DataSet`.""" + + +class MatTypeError(TypeError): + + """Invalid type for :class:`pyop2.op2.Mat`.""" + + +class DatTypeError(TypeError): + + """Invalid type for :class:`pyop2.op2.Dat`.""" + + +class KernelTypeError(TypeError): + + """Invalid type for :class:`pyop2.op2.Kernel`.""" + + +class DataValueError(ValueError): + + """Illegal value for data.""" + + +class IndexValueError(ValueError): + + """Illegal value for index.""" + + +class ModeValueError(ValueError): + + """Illegal value for mode.""" + + +class IterateValueError(ValueError): + + """Illegal value for iterate.""" + + +class SetValueError(ValueError): + + """Illegal value for :class:`pyop2.op2.Set`.""" + + +class MapValueError(ValueError): + + """Illegal value for :class:`pyop2.op2.Map`.""" + + +class ConfigurationError(RuntimeError): + + """Illegal configuration value or type.""" + + +class CompilationError(RuntimeError): + + """Error during JIT compilation""" + + +class SparsityFormatError(ValueError): + + """Unable to produce a sparsity for this matrix format.""" + + +class CachingError(ValueError): + + """A caching error.""" + + +class HashError(CachingError): + + """Something is wrong with the hash.""" diff --git a/pyop2/global_kernel.py b/pyop2/global_kernel.py new file mode 100644 index 0000000000..ae13dc1c59 --- /dev/null +++ b/pyop2/global_kernel.py @@ -0,0 +1,429 @@ +import collections.abc +import ctypes +from dataclasses import dataclass +import os +from typing import Optional, Tuple +import itertools + +import loopy as lp +import numpy as np +import pytools +from petsc4py import PETSc + +from pyop2 import mpi +from pyop2.compilation import load +from pyop2.configuration import configuration +from pyop2.datatypes import IntType, as_ctypes +from pyop2.types import IterationRegion, Constant, READ +from pyop2.utils import cached_property, get_petsc_dir + + +# We set eq=False to force identity-based hashing. This is required for when +# we check whether or not we have duplicate maps getting passed to the kernel. +@dataclass(eq=False, frozen=True) +class MapKernelArg: + """Class representing a map argument to the kernel. + + :param arity: The arity of the map (how many indirect accesses are needed + for each item of the iterset). + :param offset: Tuple of integers describing the offset for each DoF in the + base mesh needed to move up the column of an extruded mesh. + """ + + arity: int + offset: Optional[Tuple[int, ...]] = None + offset_quotient: Optional[Tuple[int, ...]] = None + + def __post_init__(self): + if not isinstance(self.offset, collections.abc.Hashable): + raise ValueError("The provided offset must be hashable") + if not isinstance(self.offset_quotient, collections.abc.Hashable): + raise ValueError("The provided offset_quotient must be hashable") + + @property + def cache_key(self): + return type(self), self.arity, self.offset, self.offset_quotient + + +@dataclass(eq=False, frozen=True) +class PermutedMapKernelArg: + """Class representing a permuted map input to the kernel. + + :param base_map: The underlying :class:`MapKernelArg`. + :param permutation: Tuple of integers describing the applied permutation. + """ + + base_map: MapKernelArg + permutation: Tuple[int, ...] + + def __post_init__(self): + if not isinstance(self.permutation, collections.abc.Hashable): + raise ValueError("The provided permutation must be hashable") + + @property + def cache_key(self): + return type(self), self.base_map.cache_key, tuple(self.permutation) + + +@dataclass(eq=False, init=False) +class ComposedMapKernelArg: + """Class representing a composed map input to the kernel. + + :param base_maps: An arbitrary combination of :class:`MapKernelArg`s, :class:`PermutedMapKernelArg`s, and :class:`ComposedMapKernelArg`s. + """ + + def __init__(self, *base_maps): + self.base_maps = base_maps + + def __post_init__(self): + for m in self.base_maps: + if not isinstance(m, (MapKernelArg, PermutedMapKernelArg, ComposedMapKernelArg)): + raise TypeError("base_maps must be a combination of MapKernelArgs, PermutedMapKernelArgs, and ComposedMapKernelArgs") + + @property + def cache_key(self): + return type(self), tuple(m.cache_key for m in self.base_maps) + + +@dataclass(frozen=True) +class GlobalKernelArg: + """Class representing a :class:`pyop2.types.Global` being passed to the kernel. + + :param dim: The shape of the data. + """ + + dim: Tuple[int, ...] + + @property + def cache_key(self): + return type(self), self.dim + + @property + def maps(self): + return () + + +@dataclass(frozen=True) +class DatKernelArg: + """Class representing a :class:`pyop2.types.Dat` being passed to the kernel. + + :param dim: The shape at each node of the dataset. + :param map_: The map used for indirect data access. May be ``None``. + :param index: The index if the :class:`pyop2.types.Dat` is + a :class:`pyop2.types.DatView`. + """ + + dim: Tuple[int, ...] + map_: MapKernelArg = None + index: Optional[Tuple[int, ...]] = None + + @property + def pack(self): + from pyop2.codegen.builder import DatPack + return DatPack + + @property + def is_direct(self): + """Is the data getting accessed directly?""" + return self.map_ is None + + @property + def is_indirect(self): + """Is the data getting accessed indirectly?""" + return not self.is_direct + + @property + def cache_key(self): + map_key = self.map_.cache_key if self.map_ is not None else None + return type(self), self.dim, map_key, self.index + + @property + def maps(self): + if self.map_ is not None: + return self.map_, + else: + return () + + +@dataclass(frozen=True) +class MatKernelArg: + """Class representing a :class:`pyop2.types.Mat` being passed to the kernel. + + :param dims: The shape at each node of each of the datasets. + :param maps: The indirection maps. + :param unroll: Is it impossible to set matrix values in 'blocks'? + """ + dims: Tuple[Tuple[int, ...], Tuple[int, ...]] + maps: Tuple[MapKernelArg, MapKernelArg] + unroll: bool = False + + @property + def pack(self): + from pyop2.codegen.builder import MatPack + return MatPack + + @property + def cache_key(self): + return type(self), self.dims, tuple(m.cache_key for m in self.maps), self.unroll + + +@dataclass(frozen=True) +class MixedDatKernelArg: + """Class representing a :class:`pyop2.types.MixedDat` being passed to the kernel. + + :param arguments: Iterable of :class:`DatKernelArg` instances. + """ + + arguments: Tuple[DatKernelArg, ...] + + def __iter__(self): + return iter(self.arguments) + + def __len__(self): + return len(self.arguments) + + @property + def is_direct(self): + """Is the data getting accessed directly?""" + return pytools.single_valued(a.is_direct for a in self.arguments) + + @property + def is_indirect(self): + """Is the data getting accessed indirectly?""" + return pytools.single_valued(a.is_indirect for a in self.arguments) + + @property + def cache_key(self): + return tuple(a.cache_key for a in self.arguments) + + @property + def maps(self): + return tuple(m for a in self.arguments for m in a.maps) + + @property + def pack(self): + from pyop2.codegen.builder import DatPack + return DatPack + + +class PassthroughKernelArg: + @property + def cache_key(self): + return type(self) + + @property + def maps(self): + return () + + +@dataclass(frozen=True) +class MixedMatKernelArg: + """Class representing a :class:`pyop2.types.MixedDat` being passed to the kernel. + + :param arguments: Iterable of :class:`MatKernelArg` instances. + :param shape: The shape of the arguments array. + """ + + arguments: Tuple[MatKernelArg, ...] + shape: Tuple[int, ...] + + def __iter__(self): + return iter(self.arguments) + + def __len__(self): + return len(self.arguments) + + @property + def cache_key(self): + return tuple(a.cache_key for a in self.arguments) + + @property + def maps(self): + return tuple(m for a in self.arguments for m in a.maps) + + @property + def pack(self): + from pyop2.codegen.builder import MatPack + return MatPack + + +class GlobalKernel: + """Class representing the generated code for the global computation. + + :param local_kernel: :class:`pyop2.LocalKernel` instance representing the + local computation. + :param arguments: An iterable of :class:`KernelArg` instances describing + the arguments to the global kernel. + :param extruded: Are we looping over an extruded mesh? + :param extruded_periodic: Flag for periodic extrusion. + :param constant_layers: If looping over an extruded mesh, are the layers the + same for each base entity? + :param subset: Are we iterating over a subset? + :param iteration_region: :class:`IterationRegion` representing the set of + entities being iterated over. Only valid if looping over an extruded mesh. + Valid values are: + - ``ON_BOTTOM``: iterate over the bottom layer of cells. + - ``ON_TOP`` iterate over the top layer of cells. + - ``ALL`` iterate over all cells (the default if unspecified) + - ``ON_INTERIOR_FACETS`` iterate over all the layers + except the top layer, accessing data two adjacent (in + the extruded direction) cells at a time. + :param pass_layer_arg: Should the wrapper pass the current layer into the + kernel (as an `int`). Only makes sense for indirect extruded iteration. + """ + def __init__(self, local_kernel, arguments, *, + extruded=False, + extruded_periodic=False, + constant_layers=False, + subset=False, + iteration_region=None, + pass_layer_arg=False): + if not len(local_kernel.accesses) == len(arguments): + raise ValueError( + "Number of arguments passed to the local and global kernels" + " do not match" + ) + + if any( + isinstance(garg, Constant) and larg.access is not READ + for larg, garg in zip(local_kernel.arguments, arguments) + ): + raise ValueError( + "Constants can only ever be read in a parloop, not modified" + ) + + if pass_layer_arg and not extruded: + raise ValueError( + "Cannot request layer argument for non-extruded iteration" + ) + if constant_layers and not extruded: + raise ValueError( + "Cannot request constant_layers argument for non-extruded iteration" + ) + + counter = itertools.count() + seen_maps = collections.defaultdict(lambda: next(counter)) + self.cache_key = ( + local_kernel.cache_key, + *[a.cache_key for a in arguments], + *[seen_maps[m] for a in arguments for m in a.maps], + extruded, extruded_periodic, constant_layers, subset, + iteration_region, pass_layer_arg, configuration["simd_width"] + ) + self.local_kernel = local_kernel + self.arguments = arguments + self._extruded = extruded + self._extruded_periodic = extruded_periodic + self._constant_layers = constant_layers + self._subset = subset + self._iteration_region = iteration_region + self._pass_layer_arg = pass_layer_arg + + @mpi.collective + def __call__(self, comm, *args): + """Execute the compiled kernel. + + :arg comm: Communicator the execution is collective over. + :*args: Arguments to pass to the compiled kernel. + """ + # It is unnecessary to cache this call as it is cached in pyop2/compilation.py + func = self.compile(comm) + func(*args) + + @property + def _wrapper_name(self): + import warnings + warnings.warn("GlobalKernel._wrapper_name is a deprecated alias for GlobalKernel.name", + DeprecationWarning) + return self.name + + @cached_property + def name(self): + return f"wrap_{self.local_kernel.name}" + + @cached_property + def zipped_arguments(self): + """Iterate through arguments for the local kernel and global kernel together.""" + return tuple(zip(self.local_kernel.arguments, self.arguments)) + + @cached_property + def builder(self): + from pyop2.codegen.builder import WrapperBuilder + + builder = WrapperBuilder(kernel=self.local_kernel, + subset=self._subset, + extruded=self._extruded, + extruded_periodic=self._extruded_periodic, + constant_layers=self._constant_layers, + iteration_region=self._iteration_region, + pass_layer_to_kernel=self._pass_layer_arg) + for arg in self.arguments: + builder.add_argument(arg) + return builder + + @cached_property + def code_to_compile(self): + """Return the C/C++ source code as a string.""" + from pyop2.codegen.rep2loopy import generate + + wrapper = generate(self.builder) + code = lp.generate_code_v2(wrapper) + + if self.local_kernel.cpp: + from loopy.codegen.result import process_preambles + preamble = "".join(process_preambles(getattr(code, "device_preambles", []))) + device_code = "\n\n".join(str(dp.ast) for dp in code.device_programs) + return preamble + "\nextern \"C\" {\n" + device_code + "\n}\n" + return code.device_code() + + @PETSc.Log.EventDecorator() + @mpi.collective + def compile(self, comm): + """Compile the kernel. + + :arg comm: The communicator the compilation is collective over. + :returns: A ctypes function pointer for the compiled function. + """ + extension = "cpp" if self.local_kernel.cpp else "c" + cppargs = ( + tuple("-I%s/include" % d for d in get_petsc_dir()) + + tuple("-I%s" % d for d in self.local_kernel.include_dirs) + + ("-I%s" % os.path.abspath(os.path.dirname(__file__)),) + ) + ldargs = ( + tuple("-L%s/lib" % d for d in get_petsc_dir()) + + tuple("-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()) + + ("-lpetsc", "-lm") + + tuple(self.local_kernel.ldargs) + ) + + return load( + self, + extension, + self.name, + cppargs=cppargs, + ldargs=ldargs, + restype=ctypes.c_int, + comm=comm + ) + + @cached_property + def argtypes(self): + """Return the ctypes datatypes of the compiled function.""" + # The first two arguments to the global kernel are the 'start' and 'stop' + # indices. All other arguments are declared to be void pointers. + dtypes = [as_ctypes(IntType)] * 2 + dtypes.extend([ctypes.c_voidp for _ in self.builder.wrapper_args[2:]]) + return tuple(dtypes) + + def num_flops(self, iterset): + """Compute the number of FLOPs done by the kernel.""" + size = 1 + if iterset._extruded: + region = self._iteration_region + layers = np.mean(iterset.layers_array[:, 1] - iterset.layers_array[:, 0]) + if region is IterationRegion.INTERIOR_FACETS: + size = layers - 2 + elif region not in {IterationRegion.TOP, IterationRegion.BOTTOM}: + size = layers - 1 + return size * self.local_kernel.num_flops diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py new file mode 100644 index 0000000000..da82f6ecad --- /dev/null +++ b/pyop2/local_kernel.py @@ -0,0 +1,229 @@ +import abc +from dataclasses import dataclass +import hashlib +from typing import Union + +import loopy as lp +from loopy.kernel import LoopKernel +from loopy.translation_unit import TranslationUnit +from loopy.tools import LoopyKeyBuilder +import numpy as np + +from pyop2 import version +from pyop2.configuration import configuration +from pyop2.datatypes import ScalarType +from pyop2.exceptions import NameTypeError +from pyop2.types import Access +from pyop2.utils import cached_property, validate_type + + +@dataclass(frozen=True) +class LocalKernelArg: + """Class representing a kernel argument. + + :param access: Access descriptor for the argument. + :param dtype: The argument's datatype. + """ + + access: Access + dtype: Union[np.dtype, str] + + +@validate_type(("name", str, NameTypeError)) +def Kernel(code, name, **kwargs): + """Construct a local kernel. + + For a description of the arguments to this function please see :class:`LocalKernel`. + """ + if isinstance(code, str): + return CStringLocalKernel(code, name, **kwargs) + elif isinstance(code, (lp.LoopKernel, lp.TranslationUnit)): + return LoopyLocalKernel(code, name, **kwargs) + else: + raise TypeError("code argument is the wrong type") + + +class LocalKernel(abc.ABC): + """Class representing the kernel executed per member of the iterset. + + :arg code: Function definition (including signature). + :arg name: The kernel name. This must match the name of the kernel + function given in `code`. + :arg accesses: Optional iterable of :class:`Access` instances describing + how each argument in the function definition is accessed. + + :kwarg cpp: Is the kernel actually C++ rather than C? If yes, + then compile with the C++ compiler (kernel is wrapped in + extern C for linkage reasons). + :kwarg flop_count: The number of FLOPs performed by the kernel. + :kwarg headers: list of system headers to include when compiling the kernel + in the form ``#include `` (optional, defaults to empty) + :kwarg include_dirs: list of additional include directories to be searched + when compiling the kernel (optional, defaults to empty) + :kwarg ldargs: A list of arguments to pass to the linker when + compiling this Kernel. + :kwarg opts: An options dictionary for declaring optimisations to apply. + :kwarg requires_zeroed_output_arguments: Does this kernel require the + output arguments to be zeroed on entry when called? (default no) + :kwarg user_code: code snippet to be executed once at the very start of + the generated kernel wrapper code (optional, defaults to + empty) + :kwarg events: Tuple of log event names which are called in the C code of the local kernels + + Consider the case of initialising a :class:`~pyop2.Dat` with seeded random + values in the interval 0 to 1. The corresponding :class:`~pyop2.Kernel` is + constructed as follows: :: + + op2.CStringKernel("void setrand(double *x) { x[0] = (double)random()/RAND_MAX); }", + name="setrand", + headers=["#include "], user_code="srandom(10001);") + + .. note:: + When running in parallel with MPI the generated code must be the same + on all ranks. + """ + + @validate_type(("name", str, NameTypeError)) + def __init__(self, code, name, accesses=None, *, + cpp=False, + flop_count=None, + headers=(), + include_dirs=(), + ldargs=(), + opts=None, + requires_zeroed_output_arguments=False, + user_code="", + events=()): + self.code = code + self.name = name + self.accesses = accesses + self.cpp = cpp + self.flop_count = flop_count + self.headers = headers + self.include_dirs = include_dirs + self.ldargs = ldargs + self.opts = opts or {} + self.requires_zeroed_output_arguments = requires_zeroed_output_arguments + self.user_code = user_code + self.events = events + + @property + @abc.abstractmethod + def dtypes(self): + """Return the dtypes of the arguments to the kernel.""" + + @property + def cache_key(self): + return self._immutable_cache_key, self.accesses, self.dtypes + + @cached_property + def _immutable_cache_key(self): + # We need this function because self.accesses is mutable due to legacy support + if isinstance(self.code, lp.TranslationUnit): + key_hash = hashlib.sha256() + self.code.update_persistent_hash(key_hash, LoopyKeyBuilder()) + code = key_hash.hexdigest() + else: + code = self.code + + key = (code, self.name, self.cpp, self.flop_count, + self.headers, self.include_dirs, self.ldargs, sorted(self.opts.items()), + self.requires_zeroed_output_arguments, self.user_code, version.__version__) + return hashlib.md5(str(key).encode()).hexdigest() + + @property + def _wrapper_cache_key_(self): + import warnings + warnings.warn("_wrapper_cache_key is deprecated, use cache_key instead", DeprecationWarning) + + return self.cache_key + + @property + def arguments(self): + """Return an iterable of :class:`LocalKernelArg` instances representing + the arguments expected by the kernel. + """ + assert len(self.accesses) == len(self.dtypes) + + return tuple(LocalKernelArg(acc, dtype) + for acc, dtype in zip(self.accesses, self.dtypes)) + + @cached_property + def num_flops(self): + """Compute the numbers of FLOPs if not already known.""" + if self.flop_count is not None: + return self.flop_count + + if not configuration["compute_kernel_flops"]: + return 0 + + if isinstance(self.code, lp.TranslationUnit): + op_map = lp.get_op_map( + self.code.copy(options=lp.Options(ignore_boostable_into=True), + silenced_warnings=['insn_count_subgroups_upper_bound', + 'get_x_map_guessing_subgroup_size', + 'summing_if_branches_ops']), + subgroup_size='guess') + return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], + dtype=[ScalarType]).eval_and_sum({}) + else: + return 0 + + def __eq__(self, other): + if not isinstance(other, LocalKernel): + return NotImplemented + else: + return self.cache_key == other.cache_key + + def __hash__(self): + return hash(self.cache_key) + + def __str__(self): + return f"OP2 Kernel: {self.name}" + + def __repr__(self): + return 'Kernel("""%s""", %r)' % (self.code, self.name) + + +class CStringLocalKernel(LocalKernel): + """:class:`LocalKernel` class where `code` is a string of C code. + + :kwarg dtypes: Iterable of datatypes (either `np.dtype` or `str`) for + each kernel argument. This is not required for :class:`LoopyLocalKernel` + because it can be inferred. + + All other `__init__` parameters are the same. + """ + + @validate_type(("code", str, TypeError)) + def __init__(self, code, name, accesses=None, dtypes=None, **kwargs): + super().__init__(code, name, accesses, **kwargs) + self._dtypes = dtypes + + @property + def dtypes(self): + return self._dtypes + + @dtypes.setter + def dtypes(self, dtypes): + self._dtypes = dtypes + + +class LoopyLocalKernel(LocalKernel): + """:class:`LocalKernel` class where `code` has type :class:`loopy.LoopKernel` + or :class:`loopy.TranslationUnit`. + """ + + @validate_type(("code", (LoopKernel, TranslationUnit), TypeError)) + def __init__(self, code, *args, **kwargs): + super().__init__(code, *args, **kwargs) + + @property + def dtypes(self): + return tuple(a.dtype for a in self._loopy_arguments) + + @cached_property + def _loopy_arguments(self): + """Return the loopy arguments associated with the kernel.""" + return tuple(a for a in self.code.callables_table[self.name].subkernel.args + if isinstance(a, lp.ArrayArg)) diff --git a/pyop2/logger.py b/pyop2/logger.py new file mode 100644 index 0000000000..2e58e3446c --- /dev/null +++ b/pyop2/logger.py @@ -0,0 +1,93 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""The PyOP2 logger, based on the Python standard library logging module.""" + +from contextlib import contextmanager +import logging + +logger = logging.getLogger('pyop2') +handler = logging.StreamHandler() +logger.addHandler(handler) + + +debug = logger.debug +info = logger.info +warning = logger.warning +error = logger.error +critical = logger.critical + +DEBUG = logging.DEBUG +INFO = logging.INFO +WARNING = logging.WARNING +ERROR = logging.ERROR +CRITICAL = logging.CRITICAL + + +def set_log_level(level): + '''Set the log level of the PyOP2 logger. + + :arg level: the log level. Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL ''' + logger.setLevel(level) + + +def log(level, msg, *args, **kwargs): + ''' Print 'msg % args' with the severity 'level'. + + :arg level: the log level. Valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL + :arg msg: the message ''' + + logger.log(level, msg, *args, **kwargs) + + +_indent = 0 + + +@contextmanager +def progress(level, msg, *args, **kwargs): + """A context manager to print a progress message. + + The block is wrapped in ``msg...``, ``msg...done`` log messages + with an appropriate indent (to distinguish nested message). + + :arg level: the log level. See :func:`log` for valid values + :arg msg: the message. + + See :func:`log` for more details. + """ + global _indent + log(level, (' ' * _indent) + msg + '...', *args, **kwargs) + _indent += 2 + yield + _indent -= 2 + log(level, (' ' * _indent) + msg + '...done', *args, **kwargs) diff --git a/pyop2/mpi-compat.h b/pyop2/mpi-compat.h new file mode 100644 index 0000000000..367c58a7d1 --- /dev/null +++ b/pyop2/mpi-compat.h @@ -0,0 +1,14 @@ +/* Author: Lisandro Dalcin */ +/* Contact: dalcinl@gmail.com */ + +#ifndef MPI_COMPAT_H +#define MPI_COMPAT_H + +#include + +#if (MPI_VERSION < 3) && !defined(PyMPI_HAVE_MPI_Message) +typedef void *PyMPI_MPI_Message; +#define MPI_Message PyMPI_MPI_Message +#endif + +#endif/*MPI_COMPAT_H*/ diff --git a/pyop2/mpi.py b/pyop2/mpi.py new file mode 100644 index 0000000000..7e88b8dd09 --- /dev/null +++ b/pyop2/mpi.py @@ -0,0 +1,615 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""PyOP2 MPI communicator.""" + + +from petsc4py import PETSc +from mpi4py import MPI # noqa +from itertools import count +from functools import wraps +import atexit +import gc +import glob +import os +import tempfile +import weakref + +from pyop2.configuration import configuration +from pyop2.exceptions import CompilationError +from pyop2.logger import debug, logger, DEBUG +from pyop2.utils import trim + + +__all__ = ( + "COMM_WORLD", + "COMM_SELF", + "MPI", + "internal_comm", + "is_pyop2_comm", + "incref", + "decref", + "temp_internal_comm" +) + +# These are user-level communicators, we never send any messages on +# them inside PyOP2. +COMM_WORLD = PETSc.COMM_WORLD.tompi4py().Dup() +COMM_WORLD.Set_name("PYOP2_COMM_WORLD") + +COMM_SELF = PETSc.COMM_SELF.tompi4py().Dup() +COMM_SELF.Set_name("PYOP2_COMM_SELF") + +# Creation index counter +_COMM_CIDX = count() +# Dict of internal communicators, keyed by creation index, to be freed at exit. +_DUPED_COMM_DICT = {} +# Flag to indicate whether we are in cleanup (at exit) +PYOP2_FINALIZED = False +# Flag for outputting information at the end of testing (do not abuse!) +_running_on_ci = bool(os.environ.get('PYOP2_CI_TESTS')) + + +class PyOP2CommError(ValueError): + pass + +# ============ +# Exposition: +# ============ +# +# To avoid PyOP2 library messages interfering with messages that the +# user might send on communicators, we duplicate any communicator +# passed in to PyOP2 and send our messages on this internal +# communicator. This is equivalent to the way PETSc does things. +# +# To avoid unnecessarily duplicating communicators that we've already +# seen, we store information on both the inner and the outer +# communicator using MPI attributes. In addition we store the reference +# count and creation index as attributes on PyOP2 comms. +# +# The references are as follows: +# +# User Facing Comms PyOP2 Comms DUPED +# .-----------. .-------------. COMM +# | User-Comm |------>| PyOP2-Comm | DICT +# |```````````| |`````````````| .-------. +# | |<------| refcount |<------| cidx | +# | | | cidx | |```````| +# '-----------' '-------------' | | +# | ^ | | +# | | | | +# v | | | +# .-------------. | | +# | Compilation | | | +# | Comm | |.......| +# |`````````````|<------| cidx | +# | refcount | '-------' +# | cidx | +# '-------------' +# +# Creation: +# ---------- +# When we're asked to for an internal communicator, we first check if it +# has a refcount (therefore it's a PyOP2 comm). In which case we +# increment the refcount and return it. +# +# If it's not a PyOP2 comm, we check if it has an embedded PyOP2 comm, +# pull that out, increment the refcount and return it. +# +# If we've never seen this communicator before, we MPI_Comm_dup it, +# and set up the references with an initial refcount of 2: +# - One for the returned PyOP2 comm +# - One for the reference held by the internal dictionary of created +# comms +# We also assign the comm a creation index (cidx). +# +# Something similar happens for compilation communicators. +# +# This is all handled by the user-facing functions internal_comm() and +# compilation_comm(). +# +# Destruction: +# ------------- +# Freeing communicators is tricky as the Python cyclic garbage +# collector can cause decref to be called. Unless the garage collector +# is called simultaneously on all ranks (unlikely to happen) the +# reference count for an internal comm will not agree across all ranks. +# To avoid the situation where Free() is called on some ranks but not +# others we maintain one reference to any duplicated comm in the global +# _DUPED_COMM_DICT. +# +# The user is responsible for calling MPI_Comm_free on any user +# communicators. When a user destroys a the MPI callback delcomm_outer() +# ensures that the corresponding PyOP2 comms are properly freed. +# +# Cleanup: +# --------- +# Finally, we register an atexit handler _free_comms() to clean up any +# outstanding duplicated communicators by freeing any remaining entries +# in _DUPED_COMM_DICT. Since the interpreter is shutting down, it is +# necessary to skip some checks, this is done by setting the +# PYOP2_FINALISED flag. + + +if configuration["spmd_strict"]: + def collective(fn): + extra = trim(""" + This function is logically collective over MPI ranks, it is an + error to call it on fewer than all the ranks in MPI communicator. + PYOP2_SPMD_STRICT=1 is in your environment and function calls will be + guarded by a barrier where possible. + """) + + @wraps(fn) + def wrapper(*args, **kwargs): + comms = filter( + lambda arg: isinstance(arg, MPI.Comm), + args + tuple(kwargs.values()) + ) + try: + comm = next(comms) + except StopIteration: + if args and hasattr(args[0], "comm"): + comm = args[0].comm + else: + comm = None + + if comm is None: + debug( + "`@collective` wrapper found no communicators in args or kwargs, " + "this means that the call is implicitly collective over an " + "unknown communicator. " + f"The following call to {fn.__module__}.{fn.__qualname__} is " + "not protected by an MPI barrier." + ) + subcomm = ", UNKNOWN Comm" + else: + subcomm = f", {comm.name} R{comm.rank}" + + debug_string_pt1 = f"{COMM_WORLD.name} R{COMM_WORLD.rank}{subcomm}: " + debug_string_pt2 = f" {fn.__module__}.{fn.__qualname__}" + debug(debug_string_pt1 + "Entering" + debug_string_pt2) + if comm is not None: + comm.Barrier() + value = fn(*args, **kwargs) + debug(debug_string_pt1 + "Leaving" + debug_string_pt2) + if comm is not None: + comm.Barrier() + return value + + wrapper.__doc__ = f"{trim(fn.__doc__)}\n\n{extra}" if fn.__doc__ else extra + return wrapper +else: + def collective(fn): + extra = trim(""" + This function is logically collective over MPI ranks, it is an + error to call it on fewer than all the ranks in MPI communicator. + You can set PYOP2_SPMD_STRICT=1 in your environment to try and catch + non-collective calls. + """) + fn.__doc__ = f"{trim(fn.__doc__)}\n\n{extra}" if fn.__doc__ else extra + return fn + + +def delcomm_outer(comm, keyval, icomm): + """Deleter for internal communicator, removes reference to outer comm. + Generalised to also delete compilation communicators. + + :arg comm: Outer communicator. + :arg keyval: The MPI keyval, should be ``innercomm_keyval``. + :arg icomm: The inner communicator, should have a reference to + ``comm``. + """ + # Use debug printer that is safe to use at exit time + debug = finalize_safe_debug() + if keyval not in (innercomm_keyval, compilationcomm_keyval): + raise PyOP2CommError("Unexpected keyval") + + if keyval == innercomm_keyval: + debug(f'Deleting innercomm keyval on {comm.name}') + if keyval == compilationcomm_keyval: + debug(f'Deleting compilationcomm keyval on {comm.name}') + + ocomm = icomm.Get_attr(outercomm_keyval) + if ocomm is None: + raise PyOP2CommError("Inner comm does not have expected reference to outer comm") + + if ocomm != comm: + raise PyOP2CommError("Inner comm has reference to non-matching outer comm") + icomm.Delete_attr(outercomm_keyval) + + # An inner comm may or may not hold a reference to a compilation comm + comp_comm = icomm.Get_attr(compilationcomm_keyval) + if comp_comm is not None: + debug('Removing compilation comm on inner comm') + decref(comp_comm) + icomm.Delete_attr(compilationcomm_keyval) + + # Once we have removed the reference to the inner/compilation comm we can free it + cidx = icomm.Get_attr(cidx_keyval) + cidx = cidx[0] + del _DUPED_COMM_DICT[cidx] + gc.collect() + refcount = icomm.Get_attr(refcount_keyval) + if refcount[0] > 1: + # In the case where `comm` is a custom user communicator there may be references + # to the inner comm still held and this is not an issue, but there is not an + # easy way to distinguish this case, so we just log the event. + debug( + f"There are still {refcount[0]} references to {comm.name}, " + "this will cause deadlock if the communicator has been incorrectly freed" + ) + icomm.Free() + + +# Reference count, creation index, inner/outer/compilation communicator +# attributes for internal communicators +refcount_keyval = MPI.Comm.Create_keyval() +cidx_keyval = MPI.Comm.Create_keyval() +innercomm_keyval = MPI.Comm.Create_keyval(delete_fn=delcomm_outer) +outercomm_keyval = MPI.Comm.Create_keyval() +compilationcomm_keyval = MPI.Comm.Create_keyval(delete_fn=delcomm_outer) +comm_cache_keyval = MPI.Comm.Create_keyval() + + +def is_pyop2_comm(comm): + """Returns ``True`` if ``comm`` is a PyOP2 communicator, + False if `comm` another communicator. + Raises exception if ``comm`` is not a communicator. + + :arg comm: Communicator to query + """ + if isinstance(comm, PETSc.Comm): + ispyop2comm = False + elif comm == MPI.COMM_NULL: + raise PyOP2CommError("Communicator passed to is_pyop2_comm() is COMM_NULL") + elif isinstance(comm, MPI.Comm): + ispyop2comm = bool(comm.Get_attr(refcount_keyval)) + else: + raise PyOP2CommError(f"Argument passed to is_pyop2_comm() is a {type(comm)}, which is not a recognised comm type") + return ispyop2comm + + +def pyop2_comm_status(): + """ Return string containing a table of the reference counts for all + communicators PyOP2 has duplicated. + """ + status_string = 'PYOP2 Communicator reference counts:\n' + status_string += '| Communicator name | Count |\n' + status_string += '==================================================\n' + for comm in _DUPED_COMM_DICT.values(): + if comm == MPI.COMM_NULL: + null = 'COMM_NULL' + status_string += f'| {null:39}| {0:5d} |\n' + else: + refcount = comm.Get_attr(refcount_keyval)[0] + if refcount is None: + refcount = -999 + status_string += f'| {comm.name:39}| {refcount:5d} |\n' + return status_string + + +class temp_internal_comm: + """ Use a PyOP2 internal communicator and + increment and decrement the internal comm. + :arg comm: Any communicator + """ + def __init__(self, comm): + self.user_comm = comm + self.internal_comm = internal_comm(self.user_comm, self) + + def __enter__(self): + """ Returns an internal comm that will be safely decref'd + when the context manager is destroyed + + :returns pyop2_comm: A PyOP2 internal communicator + """ + return self.internal_comm + + def __exit__(self, exc_type, exc_value, traceback): + pass + + +def internal_comm(comm, obj): + """ Creates an internal comm from the user comm. + If comm is None, create an internal communicator from COMM_WORLD + :arg comm: A communicator or None + :arg obj: The object which the comm is an attribute of + (usually `self`) + + :returns pyop2_comm: A PyOP2 internal communicator + """ + # Parse inputs + if comm is None: + # None will be the default when creating most objects + comm = COMM_WORLD + elif isinstance(comm, PETSc.Comm): + comm = comm.tompi4py() + + # Check for invalid inputs + if comm == MPI.COMM_NULL: + raise PyOP2CommError("MPI_COMM_NULL passed to internal_comm()") + elif not isinstance(comm, MPI.Comm): + raise PyOP2CommError("Don't know how to dup a %r" % type(comm)) + + # Handle a valid input + if is_pyop2_comm(comm): + incref(comm) + pyop2_comm = comm + else: + pyop2_comm = dup_comm(comm) + weakref.finalize(obj, decref, pyop2_comm) + return pyop2_comm + + +def incref(comm): + """ Increment communicator reference count + """ + assert is_pyop2_comm(comm) + refcount = comm.Get_attr(refcount_keyval) + refcount[0] += 1 + + +def decref(comm): + """ Decrement communicator reference count + """ + if comm == MPI.COMM_NULL: + # This case occurs if the the outer communicator has already been freed by + # the user + debug("Cannot decref an already freed communicator") + else: + assert is_pyop2_comm(comm) + refcount = comm.Get_attr(refcount_keyval) + refcount[0] -= 1 + # Freeing the internal comm is handled by the destruction of the user comm + if refcount[0] < 1: + raise PyOP2CommError("Reference count is less than 1, decref called too many times") + + +def dup_comm(comm_in): + """Given a communicator return a communicator for internal use. + + :arg comm_in: Communicator to duplicate + + :returns internal_comm: An internal (PyOP2) communicator.""" + assert not is_pyop2_comm(comm_in) + + # Check if communicator has an embedded PyOP2 comm. + internal_comm = comm_in.Get_attr(innercomm_keyval) + if internal_comm is None: + # Haven't seen this comm before, duplicate it. + internal_comm = comm_in.Dup() + comm_in.Set_attr(innercomm_keyval, internal_comm) + internal_comm.Set_attr(outercomm_keyval, comm_in) + # Name + internal_comm.Set_name(f"{comm_in.name or comm_in.py2f()}_DUP") + # Refcount + internal_comm.Set_attr(refcount_keyval, [1]) + incref(internal_comm) + # Remember we need to destroy it. + debug(f"Appending comm {internal_comm.name} to list of known comms") + cidx = next(_COMM_CIDX) + internal_comm.Set_attr(cidx_keyval, [cidx]) + _DUPED_COMM_DICT[cidx] = internal_comm + elif is_pyop2_comm(internal_comm): + # Inner comm is a PyOP2 comm, return it + incref(internal_comm) + else: + raise PyOP2CommError("Inner comm is not a PyOP2 comm") + return internal_comm + + +@collective +def create_split_comm(comm): + """ Create a split communicator based on either shared memory access + if using MPI >= 3, or shared local disk access if using MPI <= 3. + Used internally for creating compilation communicators + + :arg comm: A communicator to split + + :return split_comm: A split communicator + """ + if MPI.VERSION >= 3: + debug("Creating compilation communicator using MPI_Split_type") + split_comm = comm.Split_type(MPI.COMM_TYPE_SHARED) + debug("Finished creating compilation communicator using MPI_Split_type") + else: + debug("Creating compilation communicator using MPI_Split + filesystem") + if comm.rank == 0: + if not os.path.exists(configuration["cache_dir"]): + os.makedirs(configuration["cache_dir"], exist_ok=True) + tmpname = tempfile.mkdtemp(prefix="rank-determination-", + dir=configuration["cache_dir"]) + else: + tmpname = None + tmpname = comm.bcast(tmpname, root=0) + if tmpname is None: + raise CompilationError("Cannot determine sharedness of filesystem") + # Touch file + debug("Made tmpdir %s" % tmpname) + with open(os.path.join(tmpname, str(comm.rank)), "wb"): + pass + comm.barrier() + ranks = sorted(int(os.path.basename(name)) + for name in glob.glob("%s/[0-9]*" % tmpname)) + debug("Creating compilation communicator using filesystem colors") + split_comm = comm.Split(color=min(ranks), key=comm.rank) + debug("Finished creating compilation communicator using filesystem colors") + # Name + split_comm.Set_name(f"{comm.name or comm.py2f()}_COMPILATION") + # Outer communicator + split_comm.Set_attr(outercomm_keyval, comm) + # Refcount + split_comm.Set_attr(refcount_keyval, [1]) + incref(split_comm) + return split_comm + + +def get_compilation_comm(comm): + return comm.Get_attr(compilationcomm_keyval) + + +def set_compilation_comm(comm, comp_comm): + """Stash the compilation communicator (``comp_comm``) on the + PyOP2 communicator ``comm`` + + :arg comm: A PyOP2 Communicator + :arg comp_comm: The compilation communicator + """ + if not is_pyop2_comm(comm): + raise PyOP2CommError("Compilation communicator must be stashed on a PyOP2 comm") + + # Check if the compilation communicator is already set + old_comp_comm = comm.Get_attr(compilationcomm_keyval) + + if not is_pyop2_comm(comp_comm): + raise PyOP2CommError( + "Communicator used for compilation communicator must be a PyOP2 communicator.\n" + "Use pyop2.mpi.dup_comm() to create a PyOP2 comm from an existing comm.") + else: + if old_comp_comm is not None: + # Clean up old_comp_comm before setting new one + if not is_pyop2_comm(old_comp_comm): + raise PyOP2CommError("Compilation communicator is not a PyOP2 comm, something is very broken!") + gc.collect() + decref(old_comp_comm) + # Stash `comp_comm` as an attribute on `comm` + comm.Set_attr(compilationcomm_keyval, comp_comm) + # NB: Set_attr calls the delete method for the + # compilationcomm_keyval freeing old_comp_comm + + +@collective +def compilation_comm(comm, obj): + """Get a communicator for compilation. + + :arg comm: The input communicator, must be a PyOP2 comm. + :arg obj: The object which the comm is an attribute of + (usually `self`) + + :returns: A communicator used for compilation (may be smaller) + """ + if not is_pyop2_comm(comm): + raise PyOP2CommError("Communicator is not a PyOP2 comm") + # Should we try and do node-local compilation? + if configuration["node_local_compilation"]: + comp_comm = get_compilation_comm(comm) + if comp_comm is not None: + debug("Found existing compilation communicator") + debug(f"{comp_comm.name}") + else: + comp_comm = create_split_comm(comm) + set_compilation_comm(comm, comp_comm) + # Add to list of known duplicated comms + debug(f"Appending compiler comm {comp_comm.name} to list of known comms") + cidx = next(_COMM_CIDX) + comp_comm.Set_attr(cidx_keyval, [cidx]) + _DUPED_COMM_DICT[cidx] = comp_comm + else: + comp_comm = comm + incref(comp_comm) + weakref.finalize(obj, decref, comp_comm) + return comp_comm + + +def finalize_safe_debug(): + ''' Return function for debug output. + + When Python is finalizing the logging module may be finalized before we have + finished writing debug information. In this case we fall back to using the + Python `print` function to output debugging information. + + Furthermore, we always want to see this finalization information when + running the CI tests. + ''' + global debug + if PYOP2_FINALIZED: + if logger.level > DEBUG and not _running_on_ci: + debug = lambda string: None + else: + debug = lambda string: print(string) + return debug + + +@atexit.register +def _free_comms(): + """Free all outstanding communicators.""" + global PYOP2_FINALIZED + PYOP2_FINALIZED = True + debug = finalize_safe_debug() + debug("PyOP2 Finalizing") + # Collect garbage as it may hold on to communicator references + + debug("Calling gc.collect()") + gc.collect() + debug("STATE0") + debug(pyop2_comm_status()) + + debug("Freeing PYOP2_COMM_WORLD") + COMM_WORLD.Free() + debug("STATE1") + debug(pyop2_comm_status()) + + debug("Freeing PYOP2_COMM_SELF") + COMM_SELF.Free() + debug("STATE2") + debug(pyop2_comm_status()) + debug(f"Freeing comms in list (length {len(_DUPED_COMM_DICT)})") + for key in sorted(_DUPED_COMM_DICT.keys(), reverse=True): + comm = _DUPED_COMM_DICT[key] + if comm != MPI.COMM_NULL: + refcount = comm.Get_attr(refcount_keyval) + debug(f"Freeing {comm.name}, with index {key}, which has refcount {refcount[0]}") + comm.Free() + del _DUPED_COMM_DICT[key] + for kv in [ + refcount_keyval, + innercomm_keyval, + outercomm_keyval, + compilationcomm_keyval, + comm_cache_keyval + ]: + MPI.Comm.Free_keyval(kv) + + +# Install an exception hook to MPI Abort if an exception isn't caught +# see: https://groups.google.com/d/msg/mpi4py/me2TFzHmmsQ/sSF99LE0t9QJ +if COMM_WORLD.size > 1: + import sys + except_hook = sys.excepthook + + def mpi_excepthook(typ, value, traceback): + except_hook(typ, value, traceback) + sys.stderr.flush() + COMM_WORLD.Abort(1) + sys.excepthook = mpi_excepthook diff --git a/pyop2/op2.py b/pyop2/op2.py new file mode 100644 index 0000000000..35e5649f4d --- /dev/null +++ b/pyop2/op2.py @@ -0,0 +1,121 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""The PyOP2 API specification.""" + +import atexit + +from pyop2.configuration import configuration +from pyop2.datatypes import OpaqueType # noqa: F401 +from pyop2.logger import debug, info, warning, error, critical, set_log_level +from pyop2.mpi import MPI, COMM_WORLD, collective + +from pyop2.types import ( # noqa: F401 + Set, ExtrudedSet, MixedSet, Subset, DataSet, MixedDataSet, + Map, MixedMap, PermutedMap, ComposedMap, Sparsity, Halo, + Global, Constant, GlobalDataSet, + Dat, MixedDat, DatView, Mat +) +from pyop2.types import (READ, WRITE, RW, INC, MIN, MAX, + ON_BOTTOM, ON_TOP, ON_INTERIOR_FACETS, ALL) + +from pyop2.local_kernel import CStringLocalKernel, LoopyLocalKernel, Kernel # noqa: F401 +from pyop2.global_kernel import (GlobalKernelArg, DatKernelArg, MixedDatKernelArg, # noqa: F401 + MatKernelArg, MixedMatKernelArg, MapKernelArg, GlobalKernel) +from pyop2.parloop import (GlobalParloopArg, DatParloopArg, MixedDatParloopArg, # noqa: F401 + MatParloopArg, MixedMatParloopArg, PassthroughArg, Parloop, parloop, par_loop) +from pyop2.parloop import (GlobalLegacyArg, DatLegacyArg, MixedDatLegacyArg, # noqa: F401 + MatLegacyArg, MixedMatLegacyArg, LegacyParloop, ParLoop) + + +__all__ = ['configuration', 'READ', 'WRITE', 'RW', 'INC', 'MIN', 'MAX', + 'ON_BOTTOM', 'ON_TOP', 'ON_INTERIOR_FACETS', 'ALL', + 'debug', 'info', 'warning', 'error', 'critical', 'initialised', + 'set_log_level', 'MPI', 'init', 'exit', 'Kernel', 'Set', 'ExtrudedSet', + 'MixedSet', 'Subset', 'DataSet', 'GlobalDataSet', 'MixedDataSet', + 'Halo', 'Dat', 'MixedDat', 'Mat', 'Global', 'Map', 'MixedMap', + 'Sparsity', 'parloop', 'Parloop', 'ParLoop', 'par_loop', + 'DatView', 'PermutedMap', 'ComposedMap'] + + +_initialised = False + +# set the log level +set_log_level(configuration['log_level']) + + +def initialised(): + """Check whether PyOP2 has been yet initialised but not yet finalised.""" + return _initialised + + +@collective +def init(**kwargs): + """Initialise PyOP2: select the backend and potentially other configuration + options. + + :arg debug: The level of debugging output. + :arg comm: The MPI communicator to use for parallel communication, + defaults to `MPI_COMM_WORLD` + :arg log_level: The log level. Options: DEBUG, INFO, WARNING, ERROR, CRITICAL + + For debugging purposes, `init` accepts all keyword arguments + accepted by the PyOP2 :class:`Configuration` object, see + :meth:`Configuration.__init__` for details of further accepted + options. + + .. note:: + Calling ``init`` again with a different backend raises an exception. + Changing the backend is not possible. Calling ``init`` again with the + same backend or not specifying a backend will update the configuration. + Calling ``init`` after ``exit`` has been called is an error and will + raise an exception. + """ + global _initialised + configuration.reconfigure(**kwargs) + + set_log_level(configuration['log_level']) + _initialised = True + + +@atexit.register +@collective +def exit(): + """Exit OP2 and clean up""" + if configuration['print_cache_info'] and COMM_WORLD.rank == 0: + from pyop2.caching import print_cache_stats + print(f"{' PyOP2 cache sizes on rank 0 at exit ':*^120}") + print_cache_stats(alive=False) + configuration.reset() + global _initialised + _initialised = False diff --git a/pyop2/parloop.py b/pyop2/parloop.py new file mode 100644 index 0000000000..c70f4c9fb7 --- /dev/null +++ b/pyop2/parloop.py @@ -0,0 +1,799 @@ +import abc +import itertools +import operator +from dataclasses import dataclass +from typing import Any, Optional, Tuple + +import loopy as lp +import numpy as np +from petsc4py import PETSc + +from pyop2 import mpi, profiling +from pyop2.configuration import configuration +from pyop2.datatypes import as_numpy_dtype +from pyop2.exceptions import KernelTypeError, MapValueError, SetTypeError +from pyop2.global_kernel import (GlobalKernelArg, DatKernelArg, MixedDatKernelArg, + MatKernelArg, MixedMatKernelArg, PassthroughKernelArg, GlobalKernel) +from pyop2.local_kernel import LocalKernel, CStringLocalKernel, LoopyLocalKernel +from pyop2.types import (Access, Global, AbstractDat, Dat, DatView, MixedDat, Mat, Set, + MixedSet, ExtrudedSet, Subset, Map, ComposedMap, MixedMap) +from pyop2.types.data_carrier import DataCarrier +from pyop2.utils import cached_property + + +class ParloopArg(abc.ABC): + + @staticmethod + def check_map(m): + if configuration["type_check"]: + if isinstance(m, ComposedMap): + for m_ in m.maps_: + ParloopArg.check_map(m_) + elif m.iterset.total_size > 0 and len(m.values_with_halo) == 0: + raise MapValueError(f"{m} is not initialized") + + +@dataclass +class GlobalParloopArg(ParloopArg): + """Class representing a :class:`Global` argument to a :class:`Parloop`.""" + + data: Global + + @property + def _kernel_args_(self): + return self.data._kernel_args_ + + @property + def map_kernel_args(self): + return () + + @property + def maps(self): + return () + + +@dataclass +class DatParloopArg(ParloopArg): + """Class representing a :class:`Dat` argument to a :class:`Parloop`.""" + + data: Dat + map_: Optional[Map] = None + + def __post_init__(self): + if self.map_ is not None: + self.check_map(self.map_) + + @property + def _kernel_args_(self): + return self.data._kernel_args_ + + @property + def map_kernel_args(self): + return self.map_._kernel_args_ if self.map_ else () + + @property + def maps(self): + if self.map_ is not None: + return self.map_, + else: + return () + + +@dataclass +class MixedDatParloopArg(ParloopArg): + """Class representing a :class:`MixedDat` argument to a :class:`Parloop`.""" + + data: MixedDat + map_: MixedMap + + def __post_init__(self): + self.check_map(self.map_) + + @property + def _kernel_args_(self): + return self.data._kernel_args_ + + @property + def map_kernel_args(self): + return self.map_._kernel_args_ if self.map_ else () + + @property + def maps(self): + return self.map_, + + +@dataclass +class MatParloopArg(ParloopArg): + """Class representing a :class:`Mat` argument to a :class:`Parloop`.""" + + data: Mat + maps: Tuple[Map, Map] + lgmaps: Optional[Any] = None + + def __post_init__(self): + for m in self.maps: + self.check_map(m) + + @property + def _kernel_args_(self): + return self.data._kernel_args_ + + @property + def map_kernel_args(self): + rmap, cmap = self.maps + return tuple(itertools.chain(rmap._kernel_args_, cmap._kernel_args_)) + + +@dataclass +class MixedMatParloopArg(ParloopArg): + """Class representing a mixed :class:`Mat` argument to a :class:`Parloop`.""" + + data: Mat + maps: Tuple[MixedMap, MixedMap] + lgmaps: Any = None + + def __post_init__(self): + for m in self.maps: + self.check_map(m) + + @property + def _kernel_args_(self): + return self.data._kernel_args_ + + @property + def map_kernel_args(self): + rmap, cmap = self.maps + return tuple(itertools.chain(rmap._kernel_args_, cmap._kernel_args_)) + + +@dataclass +class PassthroughParloopArg(ParloopArg): + # a pointer + data: int + + @property + def _kernel_args_(self): + return (self.data,) + + @property + def map_kernel_args(self): + return () + + @property + def maps(self): + return () + + +class Parloop: + """A parallel loop invocation. + + :arg global_knl: The :class:`GlobalKernel` to be executed. + :arg iterset: The iteration :class:`Set` over which the kernel should be executed. + :arguments: Iterable of arguments to the parloop. + """ + + def __init__(self, global_knl, iterset, arguments): + if len(global_knl.arguments) != len(arguments): + raise ValueError("You are trying to pass in a different number of " + "arguments than the kernel is expecting") + + # Performing checks on dtypes is difficult for C-string kernels because PyOP2 + # will happily pass any type into a kernel with void* arguments. + if (isinstance(global_knl.local_kernel, LoopyLocalKernel) + and not all(as_numpy_dtype(a.dtype) == as_numpy_dtype(b.data.dtype) + for a, b in zip(global_knl.local_kernel.arguments, arguments))): + raise ValueError("The argument dtypes do not match those for the local kernel") + + self.check_iterset(iterset, global_knl, arguments) + self._check_frozen_access_modes(global_knl.local_kernel, arguments) + + self.global_kernel = global_knl + self.iterset = iterset + self.comm = mpi.internal_comm(iterset.comm, self) + self.arguments, self.reduced_globals = self.prepare_reduced_globals(arguments, global_knl) + + @property + def local_kernel(self): + return self.global_kernel.local_kernel + + @property + def accesses(self): + return self.local_kernel.accesses + + @property + def arglist(self): + """Prepare the argument list for calling generated code.""" + arglist = self.iterset._kernel_args_ + for d in self.arguments: + arglist += d._kernel_args_ + + # Collect an ordered set of maps (ignore duplicates) + maps = {m: None for d in self.arguments for m in d.map_kernel_args} + return arglist + tuple(maps.keys()) + + @property + def zipped_arguments(self): + return self.zip_arguments(self.global_kernel, self.arguments) + + def replace_data(self, index, new_argument): + self.arguments[index].data = new_argument + + def _compute_event(self): + return profiling.timed_region(f"Parloop_{self.iterset.name}_{self.global_kernel.name}") + + @mpi.collective + def _compute(self, part): + """Execute the kernel over all members of a MPI-part of the iteration space. + + :arg part: The :class:`SetPartition` to compute over. + """ + with self._compute_event(): + PETSc.Log.logFlops(part.size*self.num_flops) + self.global_kernel(self.comm, part.offset, part.offset+part.size, *self.arglist) + + @cached_property + def num_flops(self): + return self.global_kernel.num_flops(self.iterset) + + @mpi.collective + def compute(self): + # Parloop.compute is an alias for Parloop.__call__ + self() + + @PETSc.Log.EventDecorator("ParLoopExecute") + @mpi.collective + def __call__(self): + """Execute the kernel over all members of the iteration space.""" + self.increment_dat_version() + self.zero_global_increments() + orig_lgmaps = self.replace_lgmaps() + self.global_to_local_begin() + self._compute(self.iterset.core_part) + self.global_to_local_end() + self._compute(self.iterset.owned_part) + requests = self.reduction_begin() + self.local_to_global_begin() + self.update_arg_data_state() + self.restore_lgmaps(orig_lgmaps) + self.reduction_end(requests) + self.finalize_global_increments() + self.local_to_global_end() + + def increment_dat_version(self): + """Increment dat versions of :class:`DataCarrier`s in the arguments.""" + for lk_arg, gk_arg, pl_arg in self.zipped_arguments: + if isinstance(pl_arg, PassthroughParloopArg): + continue + assert isinstance(pl_arg.data, DataCarrier) + if lk_arg.access is not Access.READ: + if pl_arg.data in self.reduced_globals: + self.reduced_globals[pl_arg.data].data.increment_dat_version() + else: + pl_arg.data.increment_dat_version() + + def zero_global_increments(self): + """Zero any global increments every time the loop is executed.""" + for g in self.reduced_globals.keys(): + g._data[...] = 0 + + def replace_lgmaps(self): + """Swap out any lgmaps for any :class:`MatParloopArg` instances + if necessary. + """ + if not self._has_mats: + return + + orig_lgmaps = [] + for i, (lk_arg, gk_arg, pl_arg) in enumerate(self.zipped_arguments): + if isinstance(gk_arg, (MatKernelArg, MixedMatKernelArg)): + new_state = {Access.INC: Mat.ADD_VALUES, + Access.WRITE: Mat.INSERT_VALUES}[lk_arg.access] + for m in pl_arg.data: + m.change_assembly_state(new_state) + pl_arg.data.change_assembly_state(new_state) + + if pl_arg.lgmaps is not None: + olgmaps = [] + for m, lgmaps in zip(pl_arg.data, pl_arg.lgmaps): + olgmaps.append(m.handle.getLGMap()) + m.handle.setLGMap(*lgmaps) + orig_lgmaps.append(olgmaps) + return tuple(orig_lgmaps) + + def restore_lgmaps(self, orig_lgmaps): + """Restore any swapped lgmaps.""" + if not self._has_mats: + return + + orig_lgmaps = list(orig_lgmaps) + for arg, d in reversed(list(zip(self.global_kernel.arguments, self.arguments))): + if isinstance(arg, (MatKernelArg, MixedMatKernelArg)) and d.lgmaps is not None: + for m, lgmaps in zip(d.data, orig_lgmaps.pop()): + m.handle.setLGMap(*lgmaps) + + @cached_property + def _has_mats(self): + return any(isinstance(a, (MatParloopArg, MixedMatParloopArg)) for a in self.arguments) + + @mpi.collective + def global_to_local_begin(self): + """Start halo exchanges.""" + for idx, op in self._g2l_begin_ops: + op(self.arguments[idx].data) + + @mpi.collective + def global_to_local_end(self): + """Finish halo exchanges.""" + for idx, op in self._g2l_end_ops: + op(self.arguments[idx].data) + + @cached_property + def _g2l_begin_ops(self): + ops = [] + for idx in self._g2l_idxs: + op = operator.methodcaller( + "global_to_local_begin", + access_mode=self.accesses[idx], + ) + ops.append((idx, op)) + return tuple(ops) + + @cached_property + def _g2l_end_ops(self): + ops = [] + for idx in self._g2l_idxs: + op = operator.methodcaller( + "global_to_local_end", + access_mode=self.accesses[idx], + ) + ops.append((idx, op)) + return tuple(ops) + + @cached_property + def _g2l_idxs(self): + seen = set() + indices = [] + for i, (lknl_arg, gknl_arg, pl_arg) in enumerate(self.zipped_arguments): + if (isinstance(gknl_arg, (DatKernelArg, MixedDatKernelArg)) and pl_arg.data not in seen + and gknl_arg.is_indirect and lknl_arg.access is not Access.WRITE): + indices.append(i) + seen.add(pl_arg.data) + return tuple(indices) + + @mpi.collective + def local_to_global_begin(self): + """Start halo exchanges.""" + for idx, op in self._l2g_begin_ops: + op(self.arguments[idx].data) + + @mpi.collective + def local_to_global_end(self): + """Finish halo exchanges (wait on irecvs).""" + for idx, op in self._l2g_end_ops: + op(self.arguments[idx].data) + + @cached_property + def _l2g_begin_ops(self): + ops = [] + for idx in self._l2g_idxs: + op = operator.methodcaller( + "local_to_global_begin", + insert_mode=self.accesses[idx], + ) + ops.append((idx, op)) + return tuple(ops) + + @cached_property + def _l2g_end_ops(self): + ops = [] + for idx in self._l2g_idxs: + op = operator.methodcaller( + "local_to_global_end", + insert_mode=self.accesses[idx], + ) + ops.append((idx, op)) + return tuple(ops) + + @cached_property + def _l2g_idxs(self): + seen = set() + indices = [] + for i, (lknl_arg, gknl_arg, pl_arg) in enumerate(self.zipped_arguments): + if (isinstance(gknl_arg, (DatKernelArg, MixedDatKernelArg)) and pl_arg.data not in seen + and gknl_arg.is_indirect + and lknl_arg.access in {Access.INC, Access.MIN, Access.MAX}): + indices.append(i) + seen.add(pl_arg.data) + return tuple(indices) + + @PETSc.Log.EventDecorator("ParLoopRednBegin") + @mpi.collective + def reduction_begin(self): + """Begin reductions.""" + requests = [] + for idx in self._reduction_idxs: + glob = self.arguments[idx].data + mpi_op = {Access.INC: mpi.MPI.SUM, + Access.MIN: mpi.MPI.MIN, + Access.MAX: mpi.MPI.MAX}.get(self.accesses[idx]) + + if mpi.MPI.VERSION >= 3: + requests.append(self.comm.Iallreduce(glob._data, glob._buf, op=mpi_op)) + else: + self.comm.Allreduce(glob._data, glob._buf, op=mpi_op) + return tuple(requests) + + @PETSc.Log.EventDecorator("ParLoopRednEnd") + @mpi.collective + def reduction_end(self, requests): + """Finish reductions.""" + if mpi.MPI.VERSION >= 3: + for idx, req in zip(self._reduction_idxs, requests): + req.Wait() + glob = self.arguments[idx].data + glob._data[:] = glob._buf + else: + assert len(requests) == 0 + + for idx in self._reduction_idxs: + glob = self.arguments[idx].data + glob._data[:] = glob._buf + + @cached_property + def _reduction_idxs(self): + return tuple(i for i, arg + in enumerate(self.global_kernel.arguments) + if isinstance(arg, GlobalKernelArg) + and self.accesses[i] in {Access.INC, Access.MIN, Access.MAX}) + + def finalize_global_increments(self): + """Finalise global increments.""" + for tmp, glob in self.reduced_globals.items(): + glob.data._data += tmp._data + + @mpi.collective + def update_arg_data_state(self): + r"""Update the state of the :class:`DataCarrier`\s in the arguments to the `par_loop`. + + This marks :class:`Mat`\s that need assembly.""" + for i, (wrapper_arg, d) in enumerate(zip(self.global_kernel.arguments, self.arguments)): + access = self.accesses[i] + if access is Access.READ: + continue + if isinstance(wrapper_arg, (DatKernelArg, MixedDatKernelArg)): + d.data.halo_valid = False + elif isinstance(wrapper_arg, (MatKernelArg, MixedMatKernelArg)): + state = {Access.WRITE: Mat.INSERT_VALUES, + Access.INC: Mat.ADD_VALUES}[access] + d.data.assembly_state = state + + @classmethod + def check_iterset(cls, iterset, global_knl, arguments): + """Check that the iteration set is valid. + + For an explanation of the arguments see :class:`Parloop`. + + :raises MapValueError: If ``iterset`` does not match that of the arguments. + :raises SetTypeError: If ``iterset`` is of the wrong type. + """ + if not configuration["type_check"]: + return + + if not isinstance(iterset, Set): + raise SetTypeError("Iteration set is of the wrong type") + + if isinstance(iterset, MixedSet): + raise SetTypeError("Cannot iterate over mixed sets") + + if isinstance(iterset, Subset): + iterset = iterset.superset + + for i, (lk_arg, gk_arg, pl_arg) in enumerate(cls.zip_arguments(global_knl, arguments)): + if isinstance(gk_arg, DatKernelArg) and gk_arg.is_direct: + _iterset = iterset.parent if isinstance(iterset, ExtrudedSet) else iterset + if pl_arg.data.dataset.set != _iterset: + raise MapValueError(f"Iterset of direct arg {i} does not match parloop iterset") + + for j, m in enumerate(pl_arg.maps): + if m.iterset != iterset and m.iterset not in iterset: + raise MapValueError(f"Iterset of arg {i} map {j} does not match parloop iterset") + + @classmethod + def _check_frozen_access_modes(cls, local_knl, arguments): + """Check that any frozen :class:`Dat` are getting accessed with the right access mode.""" + for lknl_arg, pl_arg in zip(local_knl.arguments, arguments): + if isinstance(pl_arg.data, AbstractDat): + if any( + d._halo_frozen and d._frozen_access_mode != lknl_arg.access + for d in pl_arg.data + ): + raise RuntimeError( + "Dats with frozen halos must always be accessed with the same access mode" + ) + + def prepare_reduced_globals(self, arguments, global_knl): + """Swap any :class:`GlobalParloopArg` instances that are INC'd into + with zeroed replacements. + + This is needed to ensure that successive parloops incrementing into a + :class:`Global` in parallel produces the right result. The same is not + needed for MAX and MIN because they commute with the reduction. + """ + arguments = list(arguments) + reduced_globals = {} + for i, (lk_arg, gk_arg, pl_arg) in enumerate(self.zip_arguments(global_knl, arguments)): + if isinstance(gk_arg, GlobalKernelArg) and lk_arg.access == Access.INC: + tmp = Global(gk_arg.dim, data=np.zeros_like(pl_arg.data.data_ro), dtype=lk_arg.dtype, comm=self.comm) + reduced_globals[tmp] = pl_arg + arguments[i] = GlobalParloopArg(tmp) + + return arguments, reduced_globals + + @staticmethod + def zip_arguments(global_knl, arguments): + """Utility method for iterating over the arguments for local kernel, + global kernel and parloop arguments together. + """ + return tuple(zip(global_knl.local_kernel.arguments, global_knl.arguments, arguments)) + + +class LegacyArg(abc.ABC): + """Old-style input to a :func:`parloop` where the codegen-level info is + passed in alongside any data. + """ + + @property + @abc.abstractmethod + def global_kernel_arg(self): + """Return a corresponding :class:`GlobalKernelArg`.""" + + @property + @abc.abstractmethod + def parloop_arg(self): + """Return a corresponding :class:`ParloopArg`.""" + + +@dataclass +class GlobalLegacyArg(LegacyArg): + """Legacy argument for a :class:`Global`.""" + + data: Global + access: Access + + @property + def dtype(self): + return self.data.dtype + + @property + def global_kernel_arg(self): + return GlobalKernelArg(self.data.dim) + + @property + def parloop_arg(self): + return GlobalParloopArg(self.data) + + +@dataclass +class DatLegacyArg(LegacyArg): + """Legacy argument for a :class:`Dat`.""" + + data: Dat + map_: Optional[Map] + access: Access + + @property + def dtype(self): + return self.data.dtype + + @property + def global_kernel_arg(self): + map_arg = self.map_._global_kernel_arg if self.map_ is not None else None + index = self.data.index if isinstance(self.data, DatView) else None + return DatKernelArg(self.data.dataset.dim, map_arg, index=index) + + @property + def parloop_arg(self): + return DatParloopArg(self.data, self.map_) + + +@dataclass +class MixedDatLegacyArg(LegacyArg): + """Legacy argument for a :class:`MixedDat`.""" + + data: MixedDat + map_: MixedMap + access: Access + + @property + def dtype(self): + return self.data.dtype + + @property + def global_kernel_arg(self): + args = [] + for d, m in zip(self.data, self.map_): + map_arg = m._global_kernel_arg if m is not None else None + args.append(DatKernelArg(d.dataset.dim, map_arg)) + return MixedDatKernelArg(tuple(args)) + + @property + def parloop_arg(self): + return MixedDatParloopArg(self.data, self.map_) + + +@dataclass +class MatLegacyArg(LegacyArg): + """Legacy argument for a :class:`Mat`.""" + + data: Mat + maps: Tuple[Map, Map] + access: Access + lgmaps: Optional[Tuple[Any, Any]] = None + needs_unrolling: Optional[bool] = False + + @property + def dtype(self): + return self.data.dtype + + @property + def global_kernel_arg(self): + map_args = [m._global_kernel_arg for m in self.maps] + return MatKernelArg(self.data.dims, tuple(map_args), unroll=self.needs_unrolling) + + @property + def parloop_arg(self): + return MatParloopArg(self.data, self.maps, self.lgmaps) + + +@dataclass +class MixedMatLegacyArg(LegacyArg): + """Legacy argument for a mixed :class:`Mat`.""" + + data: Mat + maps: Tuple[MixedMap, MixedMap] + access: Access + lgmaps: Tuple[Any] = None + needs_unrolling: Optional[bool] = False + + @property + def dtype(self): + return self.data.dtype + + @property + def global_kernel_arg(self): + nrows, ncols = self.data.sparsity.shape + mr, mc = self.maps + mat_args = [] + for i in range(nrows): + for j in range(ncols): + mat = self.data[i, j] + + map_args = [m._global_kernel_arg for m in [mr.split[i], mc.split[j]]] + arg = MatKernelArg(mat.dims, tuple(map_args), unroll=self.needs_unrolling) + mat_args.append(arg) + return MixedMatKernelArg(tuple(mat_args), shape=self.data.sparsity.shape) + + @property + def parloop_arg(self): + return MixedMatParloopArg(self.data, tuple(self.maps), self.lgmaps) + + +@dataclass +class PassthroughArg(LegacyArg): + """Argument that is simply passed to the local kernel without packing. + + :param dtype: The datatype of the argument. This is needed for code generation. + :param data: A pointer to the data. + """ + # We don't know what the local kernel is doing with this argument + access = Access.RW + + dtype: Any + data: int + + @property + def global_kernel_arg(self): + return PassthroughKernelArg() + + @property + def parloop_arg(self): + return PassthroughParloopArg(self.data) + + +def ParLoop(*args, **kwargs): + return LegacyParloop(*args, **kwargs) + + +def LegacyParloop(local_knl, iterset, *args, **kwargs): + """Create a :class:`Parloop` with :class:`LegacyArg` inputs. + + :arg local_knl: The :class:`LocalKernel` to be executed. + :arg iterset: The iteration :class:`Set` over which the kernel should be executed. + :*args: Iterable of :class:`LegacyArg` instances representing arguments to the parloop. + :**kwargs: These will be passed to the :class:`GlobalKernel` constructor. + + :returns: An appropriate :class:`Parloop` instance. + """ + if not all(isinstance(a, LegacyArg) for a in args): + raise ValueError("LegacyParloop only expects LegacyArg arguments") + + if not isinstance(iterset, Set): + raise SetTypeError("Iteration set is of the wrong type") + + # finish building the local kernel + local_knl.accesses = tuple(a.access for a in args) + if isinstance(local_knl, CStringLocalKernel): + local_knl.dtypes = tuple(a.dtype for a in args) + + global_knl_args = tuple(a.global_kernel_arg for a in args) + extruded = iterset._extruded + extruded_periodic = iterset._extruded_periodic + constant_layers = extruded and iterset.constant_layers + subset = isinstance(iterset, Subset) + global_knl = GlobalKernel(local_knl, global_knl_args, + extruded=extruded, + extruded_periodic=extruded_periodic, + constant_layers=constant_layers, + subset=subset, + **kwargs) + + parloop_args = tuple(a.parloop_arg for a in args) + return Parloop(global_knl, iterset, parloop_args) + + +def par_loop(*args, **kwargs): + parloop(*args, **kwargs) + + +@mpi.collective +def parloop(knl, *args, **kwargs): + """Construct and execute a :class:`Parloop`. + + For a description of the possible arguments to this function see + :class:`Parloop` and :func:`LegacyParloop`. + """ + if isinstance(knl, GlobalKernel): + Parloop(knl, *args, **kwargs)() + elif isinstance(knl, LocalKernel): + LegacyParloop(knl, *args, **kwargs)() + else: + raise KernelTypeError + + +def generate_single_cell_wrapper(iterset, args, forward_args=(), + kernel_name=None, wrapper_name=None): + """Generates wrapper for a single cell. No iteration loop, but cellwise data is extracted. + Cell is expected as an argument to the wrapper. For extruded, the numbering of the cells + is columnwise continuous, bottom to top. + + :param iterset: The iteration set + :param args: :class:`Arg`s + :param forward_args: To forward unprocessed arguments to the kernel via the wrapper, + give an iterable of strings describing their C types. + :param kernel_name: Kernel function name + :param wrapper_name: Wrapper function name + + :return: string containing the C code for the single-cell wrapper + """ + from pyop2.codegen.builder import WrapperBuilder + from pyop2.codegen.rep2loopy import generate + from loopy.types import OpaqueType + + accs = tuple(a.access for a in args) + dtypes = tuple(a.data.dtype for a in args) + empty_knl = CStringLocalKernel("", kernel_name, accesses=accs, dtypes=dtypes) + + forward_arg_types = [OpaqueType(fa) for fa in forward_args] + builder = WrapperBuilder(kernel=empty_knl, + subset=isinstance(iterset, Subset), + extruded=iterset._extruded, + extruded_periodic=iterset._extruded_periodic, + constant_layers=iterset._extruded and iterset.constant_layers, + single_cell=True, + forward_arg_types=forward_arg_types) + for arg in args: + builder.add_argument(arg.global_kernel_arg) + wrapper = generate(builder, wrapper_name) + code = lp.generate_code_v2(wrapper) + + return code.device_code() diff --git a/pyop2/profiling.py b/pyop2/profiling.py new file mode 100644 index 0000000000..6a8094292f --- /dev/null +++ b/pyop2/profiling.py @@ -0,0 +1,61 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +from petsc4py import PETSc +from decorator import decorator + + +timed_stage = PETSc.Log.Stage +"""Enter a code Stage, this is a PETSc log Stage. + +:arg name: The name of the stage.""" + + +timed_region = PETSc.Log.Event +"""Time a code region, this a PETSc log Event. + +:arg name: The name of the region.""" + + +class timed_function(object): + def __init__(self, name=None): + self.name = name + + def __call__(self, f): + def wrapper(f, *args, **kwargs): + if self.name is None: + self.name = f.__name__ + with timed_region(self.name): + return f(*args, **kwargs) + return decorator(wrapper, f) diff --git a/pyop2/scripts/spydump b/pyop2/scripts/spydump new file mode 100755 index 0000000000..0077fe1ca1 --- /dev/null +++ b/pyop2/scripts/spydump @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Show a spy plot from a binary PETSc matrix dump or compare two dumps as spy +plots if two input file names are given.""" + +import matplotlib +import numpy as np +import pylab +from scipy.sparse import csr_matrix + +COOKIE = 1211216 # from petscmat.h +IntType = '>i4' # big-endian, 4 byte integer +ScalarType = '>f8' # big-endian, 8 byte real floating + + +# after http://lists.mcs.anl.gov/pipermail/petsc-users/2010-February/005935.html +def readmat(filename): + with open(filename, 'rb') as fh: + header = np.fromfile(fh, dtype=IntType, count=4) + assert header[0] == COOKIE + M, N, nz = header[1:] + # + I = np.empty(M+1, dtype=IntType) + I[0] = 0 + rownz = np.fromfile(fh, dtype=IntType, count=M) + np.cumsum(rownz, out=I[1:]) + assert I[-1] == nz + # + J = np.fromfile(fh, dtype=IntType, count=nz) + V = np.fromfile(fh, dtype=ScalarType, count=nz) + return (M, N), (I, J, V) + + +def dump2csr(filename): + (M, N), (I, J, V) = readmat(filename) + return csr_matrix((V, J, I)) + + +def compare_dump(files, outfile=None, marker='.', markersize=.5): + """Compare two binary PETSc matrix dumps as spy plots.""" + + opts = {'marker': marker, 'markersize': markersize} + csr1 = dump2csr(files[0]) + + if len(files) > 1: + matplotlib.rc('font', size=4) + pylab.figure(figsize=(12, 5), dpi=300) + pylab.subplot(221) + else: + matplotlib.rc('font', size=10) + pylab.figure(figsize=(5, 5), dpi=300) + pylab.spy(csr1, **opts) + pylab.title(files[0]) + + if len(files) > 1: + csr2 = dump2csr(files[1]) + pylab.subplot(222) + pylab.spy(csr2, **opts) + pylab.title(files[1]) + + pylab.subplot(223) + pylab.spy(csr1 - csr2, **opts) + pylab.title(files[0] + ' - ' + files[1]) + + pylab.subplot(224) + pylab.plot(csr1.data, label=files[0], **opts) + pylab.plot(csr2.data, label=files[1], **opts) + pylab.plot(csr1.data - csr2.data, label='Difference', **opts) + pylab.legend() + pylab.title('Nonzero values') + + if outfile: + pylab.savefig(outfile) + else: + pylab.show() + + +def main(): + import argparse + parser = argparse.ArgumentParser(description=__doc__, add_help=True) + parser.add_argument('files', nargs='+', help='Matrix dump files') + parser.add_argument('--output', '-o', + help='Output plot to file instead of showing interactively') + parser.add_argument('--marker', default='.', choices=['s', 'o', '.', ','], + help='Specify marker to use for spyplot') + parser.add_argument('--markersize', type=float, default=.5, + help='Specify marker size to use for spyplot') + args = parser.parse_args() + + compare_dump(args.files, args.output, marker=args.marker, markersize=args.markersize) + + +if __name__ == '__main__': + main() diff --git a/pyop2/sparsity.pyx b/pyop2/sparsity.pyx new file mode 100644 index 0000000000..d6411fecac --- /dev/null +++ b/pyop2/sparsity.pyx @@ -0,0 +1,405 @@ +# cython: language_level=3 + +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +cimport numpy as np +import cython +cimport petsc4py.PETSc as PETSc +from petsc4py import PETSc +from pyop2.datatypes import IntType + +np.import_array() + +cdef extern from "petsc.h": + ctypedef long PetscInt + ctypedef double PetscScalar + ctypedef enum PetscBool: + PETSC_TRUE, PETSC_FALSE + ctypedef enum PetscInsertMode "InsertMode": + PETSC_INSERT_VALUES "INSERT_VALUES" + int PetscCalloc1(size_t, void*) + int PetscMalloc1(size_t, void*) + int PetscMalloc2(size_t, void*, size_t, void*) + int PetscFree(void*) + int PetscFree2(void*,void*) + int MatSetValuesBlockedLocal(PETSc.PetscMat, PetscInt, PetscInt*, PetscInt, PetscInt*, + PetscScalar*, PetscInsertMode) + int MatSetValuesLocal(PETSc.PetscMat, PetscInt, PetscInt*, PetscInt, PetscInt*, + PetscScalar*, PetscInsertMode) + int MatPreallocatorPreallocate(PETSc.PetscMat, PetscBool, PETSc.PetscMat) + int MatXAIJSetPreallocation(PETSc.PetscMat, PetscInt, const PetscInt[], const PetscInt[], + const PetscInt[], const PetscInt[]) + +cdef extern from "petsc/private/matimpl.h": + struct _p_Mat: + void *data + +ctypedef struct Mat_Preallocator: + void *ht + PetscInt *dnz + PetscInt *onz + +cdef extern from *: + void PyErr_SetObject(object, object) + void *PyExc_RuntimeError + +cdef object PetscError = PyExc_RuntimeError + +cdef inline int SETERR(int ierr) with gil: + if (PetscError) != NULL: + PyErr_SetObject(PetscError, ierr) + else: + PyErr_SetObject(PyExc_RuntimeError, ierr) + return ierr + +cdef inline int CHKERR(int ierr) nogil except -1: + if ierr == 0: + return 0 # no error + else: + SETERR(ierr) + return -1 + +cdef object set_writeable(map): + flag = map.values_with_halo.flags['WRITEABLE'] + map.values_with_halo.setflags(write=True) + return flag + +cdef void restore_writeable(map, flag): + map.values_with_halo.setflags(write=flag) + + +def get_preallocation(PETSc.Mat preallocator, PetscInt nrow): + cdef: + _p_Mat *A = <_p_Mat *>(preallocator.mat) + Mat_Preallocator *p = (A.data) + + if p.dnz != NULL: + dnz = p.dnz + dnz = np.asarray(dnz).copy() + else: + dnz = np.zeros(0, dtype=IntType) + if p.onz != NULL: + onz = p.onz + onz = np.asarray(onz).copy() + else: + onz = np.zeros(0, dtype=IntType) + return dnz, onz + + +def build_sparsity(sparsity): + rset, cset = sparsity.dsets + mixed = len(rset) > 1 or len(cset) > 1 + nest = sparsity.nested + if mixed and sparsity.nested: + raise ValueError("Can't build sparsity on mixed nest, build the sparsity on the blocks") + preallocator = PETSc.Mat().create(comm=sparsity.comm) + preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) + if mixed: + # Sparsity is the dof sparsity. + nrows = rset.layout_vec.local_size + ncols = cset.layout_vec.local_size + preallocator.setLGMap(rmap=rset.unblocked_lgmap, cmap=cset.unblocked_lgmap) + else: + # Sparsity is the block sparsity + nrows = rset.layout_vec.local_size // rset.layout_vec.block_size + ncols = cset.layout_vec.local_size // cset.layout_vec.block_size + preallocator.setLGMap(rmap=rset.scalar_lgmap, cmap=cset.scalar_lgmap) + + preallocator.setSizes(size=((nrows, None), (ncols, None)), + bsize=1) + preallocator.setUp() + + if mixed: + for i, r in enumerate(rset): + for j, c in enumerate(cset): + maps = sparsity.rcmaps.get((i, j), []) + iter_regions = sparsity.iteration_regions.get((i, j), []) + mat = preallocator.getLocalSubMatrix(isrow=rset.local_ises[i], + iscol=cset.local_ises[j]) + fill_with_zeros(mat, (r.cdim, c.cdim), + maps, + iter_regions, + set_diag=((i == j) and sparsity._has_diagonal)) + mat.assemble() + preallocator.restoreLocalSubMatrix(isrow=rset.local_ises[i], + iscol=cset.local_ises[j], + submat=mat) + preallocator.assemble() + nnz, onnz = get_preallocation(preallocator, nrows) + else: + fill_with_zeros(preallocator, (1, 1), + sparsity.rcmaps[(0, 0)], + sparsity.iteration_regions[(0, 0)], + set_diag=sparsity._has_diagonal) + preallocator.assemble() + nnz, onnz = get_preallocation(preallocator, nrows) + if not (sparsity._block_sparse and rset.cdim == cset.cdim): + # We only build baij for the the square blocks, so unwind if we didn't + nnz = nnz * cset.cdim + nnz = np.repeat(nnz, rset.cdim) + onnz = onnz * cset.cdim + onnz = np.repeat(onnz, rset.cdim) + preallocator.destroy() + return nnz, onnz + + +def fill_with_zeros(PETSc.Mat mat not None, dims, maps, iteration_regions, set_diag=True): + """Fill a PETSc matrix with zeros in all slots we might end up inserting into + + :arg mat: the PETSc Mat (must already be preallocated) + :arg dims: the dimensions of the sparsity (block size) + :arg maps: the pairs of maps defining the sparsity pattern + + You must call ``mat.assemble()`` after this call.""" + cdef: + PetscInt rdim, cdim + PetscScalar *values + PetscScalar *diag_values + int set_entry + int set_size + int region_selector + bint constant_layers, extruded_periodic + PetscInt layer_start, layer_end, layer_bottom, num_layers, effective_offset, layer + PetscInt[:, ::1] layers + PetscInt i, k, irem + PetscInt nrow, ncol + PetscInt rarity, carity, tmp_rarity, tmp_carity + PetscInt[:, ::1] rmap, cmap, tempmap + PetscInt **rcomposedmaps = NULL + PetscInt **ccomposedmaps = NULL + PetscInt nrcomposedmaps, nccomposedmaps, rset_entry, cset_entry + PetscInt *rvals + PetscInt *cvals + PetscInt *roffset + PetscInt *coffset + PetscInt *roffset_quotient + PetscInt *coffset_quotient + + from pyop2 import op2 + rdim, cdim = dims + # Always allocate space for diagonal + nrow, ncol = mat.getLocalSize() + if set_diag: + CHKERR(PetscCalloc1(rdim*cdim, &diag_values)) + for i in range(nrow // rdim): + if i < ncol // cdim: + CHKERR(MatSetValuesBlockedLocal(mat.mat, 1, &i, 1, &i, diag_values, PETSC_INSERT_VALUES)) + CHKERR(PetscFree(diag_values)) + if len(maps) == 0: + return + extruded = maps[0][0].iterset._extruded + for pair, iteration_region in zip(maps, iteration_regions): + # Iterate over row map values including value entries + set_size = pair[0].iterset.size + if set_size == 0: + continue + rflags = [] + cflags = [] + if isinstance(pair[0], op2.ComposedMap): + m = pair[0].flattened_maps[0] + rflags.append(set_writeable(m)) + rmap = m.values_with_halo + nrcomposedmaps = len(pair[0].flattened_maps) - 1 + else: + rflags.append(set_writeable(pair[0])) # Memoryviews require writeable buffers + rmap = pair[0].values_with_halo # Map values + nrcomposedmaps = 0 + if isinstance(pair[1], op2.ComposedMap): + m = pair[1].flattened_maps[0] + cflags.append(set_writeable(m)) + cmap = m.values_with_halo + nccomposedmaps = len(pair[1].flattened_maps) - 1 + else: + cflags.append(set_writeable(pair[1])) + cmap = pair[1].values_with_halo + nccomposedmaps = 0 + # Handle ComposedMaps + CHKERR(PetscMalloc2(nrcomposedmaps, &rcomposedmaps, nccomposedmaps, &ccomposedmaps)) + for i in range(nrcomposedmaps): + m = pair[0].flattened_maps[1 + i] + rflags.append(set_writeable(m)) + tempmap = m.values_with_halo + rcomposedmaps[i] = &tempmap[0, 0] + for i in range(nccomposedmaps): + m = pair[1].flattened_maps[1 + i] + cflags.append(set_writeable(m)) + tempmap = m.values_with_halo + ccomposedmaps[i] = &tempmap[0, 0] + # Arity of maps + rarity = pair[0].arity + carity = pair[1].arity + if not extruded: + # The non-extruded case is easy, we just walk over the + # rmap and cmap entries and set a block of values. + CHKERR(PetscCalloc1(rarity*carity*rdim*cdim, &values)) + for set_entry in range(set_size): + rset_entry = set_entry + cset_entry = set_entry + for i in range(nrcomposedmaps): + rset_entry = rcomposedmaps[nrcomposedmaps - 1 - i][rset_entry] + if rset_entry < 0: + break + if rset_entry < 0: + continue + for i in range(nccomposedmaps): + cset_entry = ccomposedmaps[nccomposedmaps - 1 - i][cset_entry] + if cset_entry < 0: + break + if cset_entry < 0: + continue + CHKERR(MatSetValuesBlockedLocal(mat.mat, rarity, &rmap[rset_entry, 0], + carity, &cmap[cset_entry, 0], + values, PETSC_INSERT_VALUES)) + else: + # The extruded case needs a little more work. + layers = pair[0].iterset.layers_array + constant_layers = pair[0].iterset.constant_layers + extruded_periodic = pair[0].iterset._extruded_periodic + # We only need the *4 if we have an ON_INTERIOR_FACETS + # iteration region, but it doesn't hurt to make them all + # bigger, since we can special case less code below. + CHKERR(PetscCalloc1(4*rarity*carity*rdim*cdim, &values)) + # Row values (generally only rarity of these) + CHKERR(PetscMalloc1(2 * rarity, &rvals)) + # Col values (generally only rarity of these) + CHKERR(PetscMalloc1(2 * carity, &cvals)) + # Offsets (for walking up the column) + CHKERR(PetscMalloc1(rarity, &roffset)) + CHKERR(PetscMalloc1(carity, &coffset)) + # Offset quotients (for walking up the column) + CHKERR(PetscMalloc1(rarity, &roffset_quotient)) + CHKERR(PetscMalloc1(carity, &coffset_quotient)) + # Walk over the iteration regions on this map. + for r in iteration_region: + region_selector = -1 + tmp_rarity = rarity + tmp_carity = carity + if r == op2.ON_BOTTOM: + region_selector = 1 + elif r == op2.ON_TOP: + region_selector = 2 + elif r == op2.ON_INTERIOR_FACETS: + region_selector = 3 + # Double up rvals and cvals (the map is over two + # cells, not one) + tmp_rarity *= 2 + tmp_carity *= 2 + elif r != op2.ALL: + raise RuntimeError("Unhandled iteration region %s", r) + for i in range(rarity): + roffset[i] = pair[0].offset[i] + for i in range(carity): + coffset[i] = pair[1].offset[i] + for i in range(rarity): + roffset_quotient[i] = 0 if pair[0].offset_quotient is None else pair[0].offset_quotient[i] + for i in range(carity): + coffset_quotient[i] = 0 if pair[1].offset_quotient is None else pair[1].offset_quotient[i] + for set_entry in range(set_size): + rset_entry = set_entry + cset_entry = set_entry + for i in range(nrcomposedmaps): + rset_entry = rcomposedmaps[nrcomposedmaps - 1 - i][rset_entry] + if rset_entry < 0: + break + if rset_entry < 0: + continue + for i in range(nccomposedmaps): + cset_entry = ccomposedmaps[nccomposedmaps - 1 - i][cset_entry] + if cset_entry < 0: + break + if cset_entry < 0: + continue + if constant_layers: + layer_start = layers[0, 0] + layer_end = layers[0, 1] - 1 + else: + layer_start = layers[set_entry, 0] + layer_end = layers[set_entry, 1] - 1 + layer_bottom = layer_start + num_layers = layer_end - layer_start + if region_selector == 1: + # Bottom, finish after first layer + layer_end = layer_start + 1 + elif region_selector == 2: + # Top, start on penultimate layer + layer_start = layer_end - 1 + elif region_selector == 3: + if not extruded_periodic: + # interior, finish on penultimate layer + layer_end = layer_end - 1 + for layer in range(layer_start, layer_end): + # Make sure that the following cases are covered: + # + # - extrusion type : standard, periodic + # - num_layers : 1, 2, and N (general) + # - integration_type : ON_INTERIOR_FACET, ALL + # - {r,c}offset_quotient[irem]: 0 and 1 (for FEM) + # + # For the standard extrusion, the following reduces to + # the conventional logic; + # note that {r,c}offset_quotient[:] == 0 in that case. + for i in range(tmp_rarity): + k = i // rarity # always 0 if not ON_INTERIOR_FACETS + irem = i % rarity # always i if not ON_INTERIOR_FACETS + effective_offset = layer + k + roffset_quotient[irem] + rvals[i] = rmap[rset_entry, irem] + \ + roffset[irem] * (effective_offset % num_layers - roffset_quotient[irem] % num_layers) + for i in range(tmp_carity): + k = i // carity + irem = i % carity + effective_offset = layer + k + coffset_quotient[irem] + cvals[i] = cmap[cset_entry, irem] + \ + coffset[irem] * (effective_offset % num_layers - coffset_quotient[irem] % num_layers) + CHKERR(MatSetValuesBlockedLocal(mat.mat, tmp_rarity, rvals, + tmp_carity, cvals, + values, PETSC_INSERT_VALUES)) + CHKERR(PetscFree(rvals)) + CHKERR(PetscFree(cvals)) + CHKERR(PetscFree(roffset)) + CHKERR(PetscFree(coffset)) + CHKERR(PetscFree(roffset_quotient)) + CHKERR(PetscFree(coffset_quotient)) + CHKERR(PetscFree2(rcomposedmaps, ccomposedmaps)) + if isinstance(pair[0], op2.ComposedMap): + for m, rflag in zip(pair[0].flattened_maps, rflags): + restore_writeable(m, rflag) + else: + restore_writeable(pair[0], rflags[0]) + if isinstance(pair[1], op2.ComposedMap): + for m, cflag in zip(pair[1].flattened_maps, cflags): + restore_writeable(m, cflag) + else: + restore_writeable(pair[1], cflags[0]) + CHKERR(PetscFree(values)) diff --git a/pyop2/types/__init__.py b/pyop2/types/__init__.py new file mode 100644 index 0000000000..b33a4c1de8 --- /dev/null +++ b/pyop2/types/__init__.py @@ -0,0 +1,31 @@ +import enum + +from .access import * # noqa: F401 +from .data_carrier import * # noqa: F401 +from .dataset import * # noqa: F401 +from .dat import * # noqa: F401 +from .glob import * # noqa: F401 +from .halo import * # noqa: F401 +from .map import * # noqa: F401 +from .mat import * # noqa: F401 +from .set import * # noqa: F401 + + +class IterationRegion(enum.IntEnum): + BOTTOM = 1 + TOP = 2 + INTERIOR_FACETS = 3 + ALL = 4 + + +ON_BOTTOM = IterationRegion.BOTTOM +"""Iterate over the cells at the bottom of the column in an extruded mesh.""" + +ON_TOP = IterationRegion.TOP +"""Iterate over the top cells in an extruded mesh.""" + +ON_INTERIOR_FACETS = IterationRegion.INTERIOR_FACETS +"""Iterate over the interior facets of an extruded mesh.""" + +ALL = IterationRegion.ALL +"""Iterate over all cells of an extruded mesh.""" diff --git a/pyop2/types/access.py b/pyop2/types/access.py new file mode 100644 index 0000000000..c3e2fe003a --- /dev/null +++ b/pyop2/types/access.py @@ -0,0 +1,37 @@ +import enum + + +class Access(enum.IntEnum): + READ = 1 + WRITE = 2 + RW = 3 + INC = 4 + MIN = 5 + MAX = 6 + + +READ = Access.READ +"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed read-only.""" + +WRITE = Access.WRITE +"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed write-only, +and OP2 is not required to handle write conflicts.""" + +RW = Access.RW +"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed for reading +and writing, and OP2 is not required to handle write conflicts.""" + +INC = Access.INC +"""The kernel computes increments to be summed onto a :class:`Global`, +:class:`Dat`, or :class:`Mat`. OP2 is responsible for managing the write +conflicts caused.""" + +MIN = Access.MIN +"""The kernel contributes to a reduction into a :class:`Global` using a ``min`` +operation. OP2 is responsible for reducing over the different kernel +invocations.""" + +MAX = Access.MAX +"""The kernel contributes to a reduction into a :class:`Global` using a ``max`` +operation. OP2 is responsible for reducing over the different kernel +invocations.""" diff --git a/pyop2/types/dat.py b/pyop2/types/dat.py new file mode 100644 index 0000000000..fb877c1a88 --- /dev/null +++ b/pyop2/types/dat.py @@ -0,0 +1,1175 @@ +import abc +import contextlib +import ctypes +import itertools +import operator + +import loopy as lp +import numpy as np +import pytools +from petsc4py import PETSc + +from pyop2 import ( + configuration as conf, + datatypes as dtypes, + exceptions as ex, + mpi, + utils +) +from pyop2.types.access import Access +from pyop2.types.dataset import DataSet, GlobalDataSet, MixedDataSet +from pyop2.types.data_carrier import DataCarrier, EmptyDataMixin, VecAccessMixin +from pyop2.types.set import ExtrudedSet, GlobalSet, Set + + +class AbstractDat(DataCarrier, EmptyDataMixin, abc.ABC): + """OP2 vector data. A :class:`Dat` holds values on every element of a + :class:`DataSet`.o + + If a :class:`Set` is passed as the ``dataset`` argument, rather + than a :class:`DataSet`, the :class:`Dat` is created with a default + :class:`DataSet` dimension of 1. + + If a :class:`Dat` is passed as the ``dataset`` argument, a copy is + returned. + + It is permissible to pass `None` as the `data` argument. In this + case, allocation of the data buffer is postponed until it is + accessed. + + .. note:: + If the data buffer is not passed in, it is implicitly + initialised to be zero. + + When a :class:`Dat` is passed to :func:`pyop2.op2.par_loop`, the map via + which indirection occurs and the access descriptor are passed by + calling the :class:`Dat`. For instance, if a :class:`Dat` named ``D`` is + to be accessed for reading via a :class:`Map` named ``M``, this is + accomplished by :: + + D(pyop2.READ, M) + + The :class:`Map` through which indirection occurs can be indexed + using the index notation described in the documentation for the + :class:`Map`. Direct access to a Dat is accomplished by + omitting the path argument. + + :class:`Dat` objects support the pointwise linear algebra operations + ``+=``, ``*=``, ``-=``, ``/=``, where ``*=`` and ``/=`` also support + multiplication / division by a scalar. + """ + + _zero_kernels = {} + """Class-level cache for zero kernels.""" + + _modes = [Access.READ, Access.WRITE, Access.RW, Access.INC, Access.MIN, Access.MAX] + + @utils.validate_type(('dataset', (DataCarrier, DataSet, Set), ex.DataSetTypeError), + ('name', str, ex.NameTypeError)) + @utils.validate_dtype(('dtype', None, ex.DataTypeError)) + def __init__(self, dataset, data=None, dtype=None, name=None): + + if isinstance(dataset, Dat): + self.__init__(dataset.dataset, None, dtype=dataset.dtype, + name="copy_of_%s" % dataset.name) + dataset.copy(self) + return + if type(dataset) is Set or type(dataset) is ExtrudedSet: + # If a Set, rather than a dataset is passed in, default to + # a dataset dimension of 1. + dataset = dataset ** 1 + self._shape = (dataset.total_size,) + (() if dataset.cdim == 1 else dataset.dim) + EmptyDataMixin.__init__(self, data, dtype, self._shape) + + self._dataset = dataset + self.comm = mpi.internal_comm(dataset.comm, self) + self.halo_valid = True + self._name = name or "dat_#x%x" % id(self) + + self._halo_frozen = False + self._frozen_access_mode = None + + @utils.cached_property + def _kernel_args_(self): + return (self._data.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dtype, self._dataset._wrapper_cache_key_) + + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, path=None): + from pyop2.parloop import DatLegacyArg + + if conf.configuration["type_check"] and path and path.toset != self.dataset.set: + raise ex.MapValueError("To Set of Map does not match Set of Dat.") + return DatLegacyArg(self, path, access) + + def __getitem__(self, idx): + """Return self if ``idx`` is 0, raise an error otherwise.""" + if idx != 0: + raise ex.IndexValueError("Can only extract component 0 from %r" % self) + return self + + @utils.cached_property + def split(self): + """Tuple containing only this :class:`Dat`.""" + return (self,) + + @utils.cached_property + def dataset(self): + """:class:`DataSet` on which the Dat is defined.""" + return self._dataset + + @utils.cached_property + def dim(self): + """The shape of the values for each element of the object.""" + return self.dataset.dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the object. This is + the product of the dim tuple.""" + return self.dataset.cdim + + @property + @mpi.collective + def data(self): + """Numpy array containing the data values. + + With this accessor you are claiming that you will modify + the values you get back. If you only need to look at the + values, use :meth:`data_ro` instead. + + This only shows local values, to see the halo values too use + :meth:`data_with_halos`. + + """ + # Increment dat_version since this accessor assumes data modification + self.increment_dat_version() + if self.dataset.total_size > 0 and self._data.size == 0 and self.cdim > 0: + raise RuntimeError("Illegal access: no data associated with this Dat!") + self.halo_valid = False + v = self._data[:self.dataset.size].view() + v.setflags(write=True) + return v + + @property + @mpi.collective + def data_with_halos(self): + r"""A view of this :class:`Dat`\s data. + + This accessor marks the :class:`Dat` as dirty, see + :meth:`data` for more details on the semantics. + + With this accessor, you get to see up to date halo values, but + you should not try and modify them, because they will be + overwritten by the next halo exchange.""" + self.increment_dat_version() + self.global_to_local_begin(Access.RW) + self.global_to_local_end(Access.RW) + self.halo_valid = False + v = self._data.view() + v.setflags(write=True) + return v + + @property + @mpi.collective + def data_ro(self): + """Numpy array containing the data values. Read-only. + + With this accessor you are not allowed to modify the values + you get back. If you need to do so, use :meth:`data` instead. + + This only shows local values, to see the halo values too use + :meth:`data_ro_with_halos`. + + """ + if self.dataset.total_size > 0 and self._data.size == 0 and self.cdim > 0: + raise RuntimeError("Illegal access: no data associated with this Dat!") + v = self._data[:self.dataset.size].view() + v.setflags(write=False) + return v + + @property + @mpi.collective + def data_ro_with_halos(self): + r"""A view of this :class:`Dat`\s data. + + This accessor does not mark the :class:`Dat` as dirty, and is + a read only view, see :meth:`data_ro` for more details on the + semantics. + + With this accessor, you get to see up to date halo values, but + you should not try and modify them, because they will be + overwritten by the next halo exchange. + + """ + self.global_to_local_begin(Access.READ) + self.global_to_local_end(Access.READ) + v = self._data.view() + v.setflags(write=False) + return v + + @property + @mpi.collective + def data_wo(self): + """Numpy array containing the data values that is only valid for writing to. + + This only shows local values, to see the halo values too use + :meth:`data_wo_with_halos`. + + """ + return self.data + + @property + @mpi.collective + def data_wo_with_halos(self): + """Return a write-only view of all the data values. + + This method, unlike :meth:`data_with_halos`, avoids a halo exchange + if the halo is dirty. + + """ + self.increment_dat_version() + self.halo_valid = False + v = self._data.view() + v.setflags(write=True) + return v + + def save(self, filename): + """Write the data array to file ``filename`` in NumPy format.""" + np.save(filename, self.data_ro) + + def load(self, filename): + """Read the data stored in file ``filename`` into a NumPy array + and store the values in :meth:`_data`. + """ + # The np.save method appends a .npy extension to the file name + # if the user has not supplied it. However, np.load does not, + # so we need to handle this ourselves here. + if filename[-4:] != ".npy": + filename = filename + ".npy" + + if isinstance(self.data, tuple): + # MixedDat case + for d, d_from_file in zip(self.data, np.load(filename)): + d[:] = d_from_file[:] + else: + self.data[:] = np.load(filename) + + @utils.cached_property + def shape(self): + return self._shape + + @utils.cached_property + def dtype(self): + return self._dtype + + @utils.cached_property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`Dat` in bytes. This will be the correct size of the data + payload, but does not take into account the (presumably small) + overhead of the object and its metadata. + + Note that this is the process local memory usage, not the sum + over all MPI processes. + """ + + return self.dtype.itemsize * self.dataset.total_size * self.dataset.cdim + + @mpi.collective + def zero(self, subset=None): + """Zero the data associated with this :class:`Dat` + + :arg subset: A :class:`Subset` of entries to zero (optional).""" + # Data modification + self.increment_dat_version() + # If there is no subset we can safely zero the halo values. + if subset is None: + self._data[:] = 0 + self.halo_valid = True + elif subset.superset != self.dataset.set: + raise ex.MapValueError("The subset and dataset are incompatible") + else: + self.data[subset.owned_indices] = 0 + + @mpi.collective + def copy(self, other, subset=None): + """Copy the data in this :class:`Dat` into another. + + :arg other: The destination :class:`Dat` + :arg subset: A :class:`Subset` of elements to copy (optional)""" + if other is self: + return + if subset is None: + # If the current halo is valid we can also copy these values across. + if self.halo_valid: + other._data[:] = self._data + other.halo_valid = True + else: + other.data[:] = self.data_ro + elif subset.superset != self.dataset.set: + raise ex.MapValueError("The subset and dataset are incompatible") + else: + other.data[subset.owned_indices] = self.data_ro[subset.owned_indices] + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 Dat: %s on (%s) with datatype %s" \ + % (self._name, self._dataset, self.dtype.name) + + def __repr__(self): + return "Dat(%r, None, %r, %r)" \ + % (self._dataset, self.dtype, self._name) + + def _check_shape(self, other): + if other.dataset.dim != self.dataset.dim: + raise ValueError('Mismatched shapes in operands %s and %s', + self.dataset.dim, other.dataset.dim) + + def _op_kernel(self, op, globalp, dtype): + key = (op, globalp, dtype) + try: + if not hasattr(self, "_op_kernel_cache"): + self._op_kernel_cache = {} + return self._op_kernel_cache[key] + except KeyError: + pass + import islpy as isl + import pymbolic.primitives as p + from pyop2.local_kernel import Kernel + name = "binop_%s" % op.__name__ + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + _other = p.Variable("other") + _self = p.Variable("self") + _ret = p.Variable("ret") + i = p.Variable("i") + lhs = _ret.index(i) + if globalp: + rhs = _other.index(0) + rshape = (1, ) + else: + rhs = _other.index(i) + rshape = (self.cdim, ) + insn = lp.Assignment(lhs, op(_self.index(i), rhs), within_inames=frozenset(["i"])) + data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), + lp.GlobalArg("other", dtype=dtype, shape=rshape), + lp.GlobalArg("ret", dtype=self.dtype, shape=(self.cdim,))] + knl = lp.make_function([domain], [insn], data, name=name, target=conf.target, lang_version=(2018, 2)) + return self._op_kernel_cache.setdefault(key, Kernel(knl, name)) + + def _op(self, other, op): + from pyop2.types.glob import Global + from pyop2.parloop import parloop + + ret = Dat(self.dataset, None, self.dtype) + if np.isscalar(other): + other = Global(1, data=other, comm=self.comm) + globalp = True + else: + self._check_shape(other) + globalp = False + parloop(self._op_kernel(op, globalp, other.dtype), + self.dataset.set, self(Access.READ), other(Access.READ), ret(Access.WRITE)) + return ret + + def _iop_kernel(self, op, globalp, other_is_self, dtype): + key = (op, globalp, other_is_self, dtype) + try: + if not hasattr(self, "_iop_kernel_cache"): + self._iop_kernel_cache = {} + return self._iop_kernel_cache[key] + except KeyError: + pass + import islpy as isl + import pymbolic.primitives as p + from pyop2.local_kernel import Kernel + + name = "iop_%s" % op.__name__ + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + _other = p.Variable("other") + _self = p.Variable("self") + i = p.Variable("i") + lhs = _self.index(i) + rshape = (self.cdim, ) + if globalp: + rhs = _other.index(0) + rshape = (1, ) + elif other_is_self: + rhs = _self.index(i) + else: + rhs = _other.index(i) + insn = lp.Assignment(lhs, op(lhs, rhs), within_inames=frozenset(["i"])) + data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))] + if not other_is_self: + data.append(lp.GlobalArg("other", dtype=dtype, shape=rshape)) + knl = lp.make_function([domain], [insn], data, name=name, target=conf.target, lang_version=(2018, 2)) + return self._iop_kernel_cache.setdefault(key, Kernel(knl, name)) + + def _iop(self, other, op): + from pyop2.parloop import parloop + from pyop2.types.glob import Global, Constant + + globalp = False + if np.isscalar(other): + other = Global(1, data=other, comm=self.comm) + globalp = True + elif isinstance(other, Constant): + other = Global(other, comm=self.comm) + globalp = True + elif other is not self: + self._check_shape(other) + args = [self(Access.INC)] + if other is not self: + args.append(other(Access.READ)) + parloop(self._iop_kernel(op, globalp, other is self, other.dtype), self.dataset.set, *args) + return self + + def _inner_kernel(self, dtype): + try: + if not hasattr(self, "_inner_kernel_cache"): + self._inner_kernel_cache = {} + return self._inner_kernel_cache[dtype] + except KeyError: + pass + import islpy as isl + import pymbolic.primitives as p + from pyop2.local_kernel import Kernel + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + _self = p.Variable("self") + _other = p.Variable("other") + _ret = p.Variable("ret") + _conj = p.Variable("conj") if dtype.kind == "c" else lambda x: x + i = p.Variable("i") + insn = lp.Assignment(_ret[0], _ret[0] + _self[i]*_conj(_other[i]), + within_inames=frozenset(["i"])) + data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), + lp.GlobalArg("other", dtype=dtype, shape=(self.cdim,)), + lp.GlobalArg("ret", dtype=self.dtype, shape=(1,))] + knl = lp.make_function([domain], [insn], data, name="inner", target=conf.target, lang_version=(2018, 2)) + k = Kernel(knl, "inner") + return self._inner_kernel_cache.setdefault(dtype, k) + + def inner(self, other): + """Compute the l2 inner product of the flattened :class:`Dat` + + :arg other: the other :class:`Dat` to compute the inner + product against. The complex conjugate of this is taken. + + """ + from pyop2.parloop import parloop + from pyop2.types.glob import Global + + self._check_shape(other) + ret = Global(1, data=0, dtype=self.dtype, comm=self.comm) + parloop(self._inner_kernel(other.dtype), self.dataset.set, + self(Access.READ), other(Access.READ), ret(Access.INC)) + return ret.data_ro[0] + + @property + def norm(self): + """Compute the l2 norm of this :class:`Dat` + + .. note:: + + This acts on the flattened data (see also :meth:`inner`).""" + from math import sqrt + return sqrt(self.inner(self).real) + + def __pos__(self): + pos = Dat(self) + return pos + + def __add__(self, other): + """Pointwise addition of fields.""" + return self._op(other, operator.add) + + def __radd__(self, other): + """Pointwise addition of fields. + + self.__radd__(other) <==> other + self.""" + return self + other + + @utils.cached_property + def _neg_kernel(self): + # Copy and negate in one go. + import islpy as isl + import pymbolic.primitives as p + from pyop2.local_kernel import Kernel + name = "neg" + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + lvalue = p.Variable("other") + rvalue = p.Variable("self") + i = p.Variable("i") + insn = lp.Assignment(lvalue.index(i), -rvalue.index(i), within_inames=frozenset(["i"])) + data = [lp.GlobalArg("other", dtype=self.dtype, shape=(self.cdim,)), + lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))] + knl = lp.make_function([domain], [insn], data, name=name, target=conf.target, lang_version=(2018, 2)) + return Kernel(knl, name) + + def __neg__(self): + from pyop2.parloop import parloop + + neg = Dat(self.dataset, dtype=self.dtype) + parloop(self._neg_kernel, self.dataset.set, neg(Access.WRITE), self(Access.READ)) + return neg + + def __sub__(self, other): + """Pointwise subtraction of fields.""" + return self._op(other, operator.sub) + + def __rsub__(self, other): + """Pointwise subtraction of fields. + + self.__rsub__(other) <==> other - self.""" + ret = -self + ret += other + return ret + + def __mul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._op(other, operator.mul) + + def __rmul__(self, other): + """Pointwise multiplication or scaling of fields. + + self.__rmul__(other) <==> other * self.""" + return self.__mul__(other) + + def __truediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._op(other, operator.truediv) + + def __iadd__(self, other): + """Pointwise addition of fields.""" + return self._iop(other, operator.iadd) + + def __isub__(self, other): + """Pointwise subtraction of fields.""" + return self._iop(other, operator.isub) + + def __imul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._iop(other, operator.imul) + + def __itruediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._iop(other, operator.itruediv) + + @mpi.collective + def global_to_local_begin(self, access_mode): + """Begin a halo exchange from global to ghosted representation. + + :kwarg access_mode: Mode with which the data will subsequently + be accessed.""" + halo = self.dataset.halo + if halo is None or self._halo_frozen: + return + if not self.halo_valid and access_mode in {Access.READ, Access.RW}: + halo.global_to_local_begin(self, Access.WRITE) + elif access_mode in {Access.INC, Access.MIN, Access.MAX}: + min_, max_ = dtypes.dtype_limits(self.dtype) + val = {Access.MAX: min_, Access.MIN: max_, Access.INC: 0}[access_mode] + self._data[self.dataset.size:] = val + else: + # WRITE + pass + + @mpi.collective + def global_to_local_end(self, access_mode): + """End a halo exchange from global to ghosted representation. + + :kwarg access_mode: Mode with which the data will subsequently + be accessed.""" + halo = self.dataset.halo + if halo is None or self._halo_frozen: + return + if not self.halo_valid and access_mode in {Access.READ, Access.RW}: + halo.global_to_local_end(self, Access.WRITE) + self.halo_valid = True + elif access_mode in {Access.INC, Access.MIN, Access.MAX}: + self.halo_valid = False + else: + # WRITE + pass + + @mpi.collective + def local_to_global_begin(self, insert_mode): + """Begin a halo exchange from ghosted to global representation. + + :kwarg insert_mode: insertion mode (an access descriptor)""" + halo = self.dataset.halo + if halo is None or self._halo_frozen: + return + halo.local_to_global_begin(self, insert_mode) + + @mpi.collective + def local_to_global_end(self, insert_mode): + """End a halo exchange from ghosted to global representation. + + :kwarg insert_mode: insertion mode (an access descriptor)""" + halo = self.dataset.halo + if halo is None or self._halo_frozen: + return + halo.local_to_global_end(self, insert_mode) + self.halo_valid = False + + @mpi.collective + def frozen_halo(self, access_mode): + """Temporarily disable halo exchanges inside a context manager. + + :arg access_mode: Mode with which the data will subsequently be accessed. + + This is useful in cases where one is repeatedly writing to a :class:`Dat` with + the same access descriptor since the intermediate updates can be skipped. + """ + return frozen_halo(self, access_mode) + + @mpi.collective + def freeze_halo(self, access_mode): + """Disable halo exchanges. + + :arg access_mode: Mode with which the data will subsequently be accessed. + + Note that some bookkeeping is needed when freezing halos. Prefer to use the + :meth:`Dat.frozen_halo` context manager. + """ + if self._halo_frozen: + raise RuntimeError("Expected an unfrozen halo") + self._halo_frozen = True + self._frozen_access_mode = access_mode + + @mpi.collective + def unfreeze_halo(self): + """Re-enable halo exchanges.""" + if not self._halo_frozen: + raise RuntimeError("Expected a frozen halo") + self._halo_frozen = False + self._frozen_access_mode = None + + +class DatView(AbstractDat): + """An indexed view into a :class:`Dat`. + + This object can be used like a :class:`Dat` but the kernel will + only see the requested index, rather than the full data. + + :arg dat: The :class:`Dat` to create a view into. + :arg index: The component to select a view of. + """ + def __init__(self, dat, index): + index = utils.as_tuple(index) + assert len(index) == len(dat.dim) + for i, d in zip(index, dat.dim): + if not (0 <= i < d): + raise ex.IndexValueError("Can't create DatView with index %s for Dat with shape %s" % (index, dat.dim)) + self.index = index + self._idx = (slice(None), *index) + self._parent = dat + # Point at underlying data + super(DatView, self).__init__(dat.dataset, + dat._data, + dtype=dat.dtype, + name="view[%s](%s)" % (index, dat.name)) + + @utils.cached_property + def _kernel_args_(self): + return self._parent._kernel_args_ + + @utils.cached_property + def _argtypes_(self): + return self._parent._argtypes_ + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.index, self._parent._wrapper_cache_key_) + + @utils.cached_property + def cdim(self): + return 1 + + @utils.cached_property + def dim(self): + return (1, ) + + @utils.cached_property + def shape(self): + return (self.dataset.total_size, ) + + @property + def halo_valid(self): + return self._parent.halo_valid + + @halo_valid.setter + def halo_valid(self, value): + self._parent.halo_valid = value + + @property + def dat_version(self): + return self._parent.dat_version + + @property + def _data(self): + return self._parent._data[self._idx] + + @property + def data(self): + return self._parent.data[self._idx] + + @property + def data_ro(self): + return self._parent.data_ro[self._idx] + + @property + def data_wo(self): + return self._parent.data_wo[self._idx] + + @property + def data_with_halos(self): + return self._parent.data_with_halos[self._idx] + + @property + def data_ro_with_halos(self): + return self._parent.data_ro_with_halos[self._idx] + + @property + def data_wo_with_halos(self): + return self._parent.data_wo_with_halos[self._idx] + + +class Dat(AbstractDat, VecAccessMixin): + + def __init__(self, *args, **kwargs): + AbstractDat.__init__(self, *args, **kwargs) + # Determine if we can rely on PETSc state counter + petsc_counter = (self.dtype == PETSc.ScalarType) + VecAccessMixin.__init__(self, petsc_counter=petsc_counter) + + @utils.cached_property + def _vec(self): + assert self.dtype == PETSc.ScalarType, \ + "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) + # Can't duplicate layout_vec of dataset, because we then + # carry around extra unnecessary data. + # But use getSizes to save an Allreduce in computing the + # global size. + size = self.dataset.layout_vec.getSizes() + data = self._data[:size[0]] + return PETSc.Vec().createWithArray(data, size=size, bsize=self.cdim, comm=self.comm) + + @contextlib.contextmanager + def vec_context(self, access): + r"""A context manager for a :class:`PETSc.Vec` from a :class:`Dat`. + + :param access: Access descriptor: READ, WRITE, or RW.""" + yield self._vec + if access is not Access.READ: + self.halo_valid = False + + +class MixedDat(AbstractDat, VecAccessMixin): + r"""A container for a bag of :class:`Dat`\s. + + Initialized either from a :class:`MixedDataSet`, a :class:`MixedSet`, or + an iterable of :class:`DataSet`\s and/or :class:`Set`\s, where all the + :class:`Set`\s are implcitly upcast to :class:`DataSet`\s :: + + mdat = op2.MixedDat(mdset) + mdat = op2.MixedDat([dset1, ..., dsetN]) + + or from an iterable of :class:`Dat`\s :: + + mdat = op2.MixedDat([dat1, ..., datN]) + """ + + def __init__(self, mdset_or_dats): + from pyop2.types.glob import Global + + def what(x): + if isinstance(x, (Global, GlobalDataSet, GlobalSet)): + return Global + elif isinstance(x, (Dat, DataSet, Set)): + return Dat + else: + raise ex.DataSetTypeError("Huh?!") + if isinstance(mdset_or_dats, MixedDat): + self._dats = tuple(what(d)(d) for d in mdset_or_dats) + else: + self._dats = tuple(d if isinstance(d, (Dat, Global)) else what(d)(d) for d in mdset_or_dats) + if not all(d.dtype == self._dats[0].dtype for d in self._dats): + raise ex.DataValueError('MixedDat with different dtypes is not supported') + # TODO: Think about different communicators on dats (c.f. MixedSet) + self.comm = mpi.internal_comm(self._dats[0].comm, self) + + @property + def dat_version(self): + return sum(d.dat_version for d in self._dats) + + @property + def _halo_frozen(self): + return pytools.single_valued(d._halo_frozen for d in self._dats) + + def increment_dat_version(self): + for d in self: + d.increment_dat_version() + + def __call__(self, access, path=None): + from pyop2.parloop import MixedDatLegacyArg + return MixedDatLegacyArg(self, path, access) + + @utils.cached_property + def _kernel_args_(self): + return tuple(itertools.chain(*(d._kernel_args_ for d in self))) + + @utils.cached_property + def _argtypes_(self): + return tuple(itertools.chain(*(d._argtypes_ for d in self))) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self),) + tuple(d._wrapper_cache_key_ for d in self) + + def __getitem__(self, idx): + """Return :class:`Dat` with index ``idx`` or a given slice of Dats.""" + return self._dats[idx] + + @utils.cached_property + def dtype(self): + """The NumPy dtype of the data.""" + return self._dats[0].dtype + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`Dat`\s.""" + return self._dats + + @utils.cached_property + def dataset(self): + r""":class:`MixedDataSet`\s this :class:`MixedDat` is defined on.""" + return MixedDataSet(tuple(s.dataset for s in self._dats)) + + @utils.cached_property + def _data(self): + """Return the user-provided data buffer, or a zeroed buffer of + the correct size if none was provided.""" + return tuple(d._data for d in self) + + @property + @mpi.collective + def data(self): + """Numpy arrays containing the data excluding halos.""" + return tuple(s.data for s in self._dats) + + @property + @mpi.collective + def data_with_halos(self): + """Numpy arrays containing the data including halos.""" + return tuple(s.data_with_halos for s in self._dats) + + @property + @mpi.collective + def data_ro(self): + """Numpy arrays with read-only data excluding halos.""" + return tuple(s.data_ro for s in self._dats) + + @property + @mpi.collective + def data_ro_with_halos(self): + """Numpy arrays with read-only data including halos.""" + return tuple(s.data_ro_with_halos for s in self._dats) + + @property + @mpi.collective + def data_wo(self): + """Numpy arrays with read-only data excluding halos.""" + return tuple(s.data_wo for s in self._dats) + + @property + @mpi.collective + def data_wo_with_halos(self): + """Numpy arrays with read-only data including halos.""" + return tuple(s.data_wo_with_halos for s in self._dats) + + @property + def halo_valid(self): + """Does this Dat have up to date halos?""" + return all(s.halo_valid for s in self) + + @halo_valid.setter + def halo_valid(self, val): + """Indictate whether this Dat requires a halo update""" + for d in self: + d.halo_valid = val + + @mpi.collective + def global_to_local_begin(self, access_mode): + for s in self: + s.global_to_local_begin(access_mode) + + @mpi.collective + def global_to_local_end(self, access_mode): + for s in self: + s.global_to_local_end(access_mode) + + @mpi.collective + def local_to_global_begin(self, insert_mode): + for s in self: + s.local_to_global_begin(insert_mode) + + @mpi.collective + def local_to_global_end(self, insert_mode): + for s in self: + s.local_to_global_end(insert_mode) + + @mpi.collective + def freeze_halo(self, access_mode): + """Disable halo exchanges.""" + for d in self: + d.freeze_halo(access_mode) + + @mpi.collective + def unfreeze_halo(self): + """Re-enable halo exchanges.""" + for d in self: + d.unfreeze_halo() + + @mpi.collective + def zero(self, subset=None): + """Zero the data associated with this :class:`MixedDat`. + + :arg subset: optional subset of entries to zero (not implemented).""" + if subset is not None: + raise NotImplementedError("Subsets of mixed sets not implemented") + for d in self._dats: + d.zero() + + @utils.cached_property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`MixedDat` in bytes. This will be the correct size of the data + payload, but does not take into account the (presumably small) + overhead of the object and its metadata. + + Note that this is the process local memory usage, not the sum + over all MPI processes. + """ + + return np.sum([d.nbytes for d in self._dats]) + + @mpi.collective + def copy(self, other, subset=None): + """Copy the data in this :class:`MixedDat` into another. + + :arg other: The destination :class:`MixedDat` + :arg subset: Subsets are not supported, this must be :class:`None`""" + + if subset is not None: + raise NotImplementedError("MixedDat.copy with a Subset is not supported") + for s, o in zip(self, other): + s.copy(o) + + def __iter__(self): + r"""Yield all :class:`Dat`\s when iterated over.""" + for d in self._dats: + yield d + + def __len__(self): + r"""Return number of contained :class:`Dats`\s.""" + return len(self._dats) + + def __hash__(self): + return hash(self._dats) + + def __eq__(self, other): + r""":class:`MixedDat`\s are equal if all their contained :class:`Dat`\s + are.""" + return type(self) == type(other) and self._dats == other._dats + + def __ne__(self, other): + r""":class:`MixedDat`\s are equal if all their contained :class:`Dat`\s + are.""" + return not self.__eq__(other) + + def __str__(self): + return "OP2 MixedDat composed of Dats: %s" % (self._dats,) + + def __repr__(self): + return "MixedDat(%r)" % (self._dats,) + + def inner(self, other): + """Compute the l2 inner product. + + :arg other: the other :class:`MixedDat` to compute the inner product against""" + ret = 0 + for s, o in zip(self, other): + ret += s.inner(o) + return ret + + def _op(self, other, op): + ret = [] + if np.isscalar(other): + for s in self: + ret.append(op(s, other)) + else: + self._check_shape(other) + for s, o in zip(self, other): + ret.append(op(s, o)) + return MixedDat(ret) + + def _iop(self, other, op): + if np.isscalar(other): + for s in self: + op(s, other) + else: + self._check_shape(other) + for s, o in zip(self, other): + op(s, o) + return self + + def __pos__(self): + ret = [] + for s in self: + ret.append(s.__pos__()) + return MixedDat(ret) + + def __neg__(self): + ret = [] + for s in self: + ret.append(s.__neg__()) + return MixedDat(ret) + + def __add__(self, other): + """Pointwise addition of fields.""" + return self._op(other, operator.add) + + def __radd__(self, other): + """Pointwise addition of fields. + + self.__radd__(other) <==> other + self.""" + return self._op(other, operator.add) + + def __sub__(self, other): + """Pointwise subtraction of fields.""" + return self._op(other, operator.sub) + + def __rsub__(self, other): + """Pointwise subtraction of fields. + + self.__rsub__(other) <==> other - self.""" + return self._op(other, operator.sub) + + def __mul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._op(other, operator.mul) + + def __rmul__(self, other): + """Pointwise multiplication or scaling of fields. + + self.__rmul__(other) <==> other * self.""" + return self._op(other, operator.mul) + + def __div__(self, other): + """Pointwise division or scaling of fields.""" + return self._op(other, operator.div) + + def __iadd__(self, other): + """Pointwise addition of fields.""" + return self._iop(other, operator.iadd) + + def __isub__(self, other): + """Pointwise subtraction of fields.""" + return self._iop(other, operator.isub) + + def __imul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._iop(other, operator.imul) + + def __idiv__(self, other): + """Pointwise division or scaling of fields.""" + return self._iop(other, operator.idiv) + + @utils.cached_property + def _vec(self): + assert self.dtype == PETSc.ScalarType, \ + "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) + # In this case we can just duplicate the layout vec + # because we're not placing an array. + return self.dataset.layout_vec.duplicate() + + @contextlib.contextmanager + def vec_context(self, access): + r"""A context manager scattering the arrays of all components of this + :class:`MixedDat` into a contiguous :class:`PETSc.Vec` and reverse + scattering to the original arrays when exiting the context. + + :param access: Access descriptor: READ, WRITE, or RW. + + .. note:: + + The :class:`~PETSc.Vec` obtained from this context is in + the correct order to be left multiplied by a compatible + :class:`MixedMat`. In parallel it is *not* just a + concatenation of the underlying :class:`Dat`\s.""" + # Do the actual forward scatter to fill the full vector with + # values + if access is not Access.WRITE: + offset = 0 + with self._vec as array: + for d in self: + with d.vec_ro as v: + size = v.local_size + array[offset:offset+size] = v.array_r[:] + offset += size + + yield self._vec + if access is not Access.READ: + # Reverse scatter to get the values back to their original locations + offset = 0 + array = self._vec.array_r + for d in self: + with d.vec_wo as v: + size = v.local_size + v.array[:] = array[offset:offset+size] + offset += size + self.halo_valid = False + + +class frozen_halo: + """Context manager handling the freezing and unfreezing of halos. + + :param dat: The :class:`Dat` whose halo is to be frozen. + :param access_mode: Mode with which the :class:`Dat` will be accessed whilst + its halo is frozen. + """ + def __init__(self, dat, access_mode): + self._dat = dat + self._access_mode = access_mode + + def __enter__(self): + # Initialise the halo values (e.g. set to zero if INC'ing) + self._dat.global_to_local_begin(self._access_mode) + self._dat.global_to_local_end(self._access_mode) + self._dat.freeze_halo(self._access_mode) + + def __exit__(self, *args): + # Finally do the halo exchanges + self._dat.unfreeze_halo() + self._dat.local_to_global_begin(self._access_mode) + self._dat.local_to_global_end(self._access_mode) diff --git a/pyop2/types/data_carrier.py b/pyop2/types/data_carrier.py new file mode 100644 index 0000000000..73d3974c2e --- /dev/null +++ b/pyop2/types/data_carrier.py @@ -0,0 +1,133 @@ +import abc + +import numpy as np + +from pyop2 import ( + datatypes as dtypes, + mpi, + utils +) +from pyop2.types.access import Access + + +class DataCarrier(abc.ABC): + + """Abstract base class for OP2 data. + + Actual objects will be :class:`DataCarrier` objects of rank 0 + (:class:`Global`), rank 1 (:class:`Dat`), or rank 2 + (:class:`Mat`)""" + + @utils.cached_property + def dtype(self): + """The Python type of the data.""" + return self._data.dtype + + @utils.cached_property + def ctype(self): + """The c type of the data.""" + return dtypes.as_cstr(self.dtype) + + @utils.cached_property + def name(self): + """User-defined label.""" + return self._name + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the object.""" + return self._dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the object. This is + the product of the dim tuple.""" + return self._cdim + + def increment_dat_version(self): + pass + + +class EmptyDataMixin(abc.ABC): + """A mixin for :class:`Dat` and :class:`Global` objects that takes + care of allocating data on demand if the user has passed nothing + in. + + Accessing the :attr:`_data` property allocates a zeroed data array + if it does not already exist. + """ + def __init__(self, data, dtype, shape): + if data is None: + self._dtype = np.dtype(dtype if dtype is not None else dtypes.ScalarType) + else: + self._numpy_data = utils.verify_reshape(data, dtype, shape, allow_none=True) + self._dtype = self._data.dtype + + @utils.cached_property + def _data(self): + """Return the user-provided data buffer, or a zeroed buffer of + the correct size if none was provided.""" + if not self._is_allocated: + self._numpy_data = np.zeros(self.shape, dtype=self._dtype) + return self._numpy_data + + @property + def _is_allocated(self): + """Return True if the data buffer has been allocated.""" + return hasattr(self, '_numpy_data') + + +class VecAccessMixin(abc.ABC): + + def __init__(self, petsc_counter=None): + if petsc_counter: + # Use lambda since `_vec` allocates the data buffer + # -> Dat/Global should not allocate storage until accessed + self._dat_version = lambda: self._vec.stateGet() + self.increment_dat_version = lambda: self._vec.stateIncrease() + else: + # No associated PETSc Vec if incompatible type: + # -> Equip Dat/Global with their own counter. + self._version = 0 + self._dat_version = lambda: self._version + + def _inc(): + self._version += 1 + self.increment_dat_version = _inc + + @property + def dat_version(self): + return self._dat_version() + + @abc.abstractmethod + def vec_context(self, access): + pass + + @abc.abstractproperty + def _vec(self): + pass + + @property + @mpi.collective + def vec(self): + """Context manager for a PETSc Vec appropriate for this Dat. + + You're allowed to modify the data you get back from this view.""" + return self.vec_context(access=Access.RW) + + @property + @mpi.collective + def vec_wo(self): + """Context manager for a PETSc Vec appropriate for this Dat. + + You're allowed to modify the data you get back from this view, + but you cannot read from it.""" + return self.vec_context(access=Access.WRITE) + + @property + @mpi.collective + def vec_ro(self): + """Context manager for a PETSc Vec appropriate for this Dat. + + You're not allowed to modify the data you get back from this view.""" + return self.vec_context(access=Access.READ) diff --git a/pyop2/types/dataset.py b/pyop2/types/dataset.py new file mode 100644 index 0000000000..3b4f4bfd8a --- /dev/null +++ b/pyop2/types/dataset.py @@ -0,0 +1,515 @@ +import numbers + +import numpy as np +from petsc4py import PETSc + +from pyop2 import ( + caching, + datatypes as dtypes, + exceptions as ex, + mpi, + utils +) +from pyop2.types.set import ExtrudedSet, GlobalSet, MixedSet, Set, Subset + + +class DataSet(caching.ObjectCached): + """PyOP2 Data Set + + Set used in the op2.Dat structures to specify the dimension of the data. + """ + + @utils.validate_type(('iter_set', Set, ex.SetTypeError), + ('dim', (numbers.Integral, tuple, list), ex.DimTypeError), + ('name', str, ex.NameTypeError)) + def __init__(self, iter_set, dim=1, name=None): + if isinstance(iter_set, ExtrudedSet): + raise NotImplementedError("Not allowed!") + if self._initialized: + return + if isinstance(iter_set, Subset): + raise NotImplementedError("Deriving a DataSet from a Subset is unsupported") + self.comm = mpi.internal_comm(iter_set.comm, self) + self._set = iter_set + self._dim = utils.as_tuple(dim, numbers.Integral) + self._cdim = np.prod(self._dim).item() + self._name = name or "dset_#x%x" % id(self) + self._initialized = True + + @classmethod + def _process_args(cls, *args, **kwargs): + return (args[0], ) + args, kwargs + + @classmethod + def _cache_key(cls, iter_set, dim=1, name=None): + return (iter_set, utils.as_tuple(dim, numbers.Integral)) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dim, self._set._wrapper_cache_key_) + + def __getstate__(self): + """Extract state to pickle.""" + return self.__dict__ + + def __setstate__(self, d): + """Restore from pickled state.""" + self.__dict__.update(d) + + # Look up any unspecified attributes on the _set. + def __getattr__(self, name): + """Returns a Set specific attribute.""" + value = getattr(self.set, name) + return value + + def __getitem__(self, idx): + """Allow index to return self""" + assert idx == 0 + return self + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the set.""" + return self._dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the set. This is + the product of the dim tuple.""" + return self._cdim + + @utils.cached_property + def name(self): + """Returns the name of the data set.""" + return self._name + + @utils.cached_property + def set(self): + """Returns the parent set of the data set.""" + return self._set + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 DataSet: %s on set %s, with dim %s" % \ + (self._name, self._set, self._dim) + + def __repr__(self): + return "DataSet(%r, %r, %r)" % (self._set, self._dim, self._name) + + def __contains__(self, dat): + """Indicate whether a given Dat is compatible with this DataSet.""" + return dat.dataset == self + + @utils.cached_property + def lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet`. + """ + lgmap = PETSc.LGMap() + if self.comm.size == 1 and self.halo is None: + lgmap.create(indices=np.arange(self.size, dtype=dtypes.IntType), + bsize=self.cdim, comm=self.comm) + else: + lgmap.create(indices=self.halo.local_to_global_numbering, + bsize=self.cdim, comm=self.comm) + return lgmap + + @utils.cached_property + def scalar_lgmap(self): + if self.cdim == 1: + return self.lgmap + indices = self.lgmap.block_indices + return PETSc.LGMap().create(indices=indices, bsize=1, comm=self.comm) + + @utils.cached_property + def unblocked_lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet` with a block size of 1. + """ + if self.cdim == 1: + return self.lgmap + else: + indices = self.lgmap.indices + lgmap = PETSc.LGMap().create(indices=indices, + bsize=1, comm=self.lgmap.comm) + return lgmap + + @utils.cached_property + def field_ises(self): + """A list of PETSc ISes defining the global indices for each set in + the DataSet. + + Used when extracting blocks from matrices for solvers.""" + ises = [] + nlocal_rows = 0 + for dset in self: + nlocal_rows += dset.layout_vec.local_size + offset = self.comm.scan(nlocal_rows) + offset -= nlocal_rows + for dset in self: + nrows = dset.layout_vec.local_size + iset = PETSc.IS().createStride(nrows, first=offset, step=1, + comm=self.comm) + iset.setBlockSize(dset.cdim) + ises.append(iset) + offset += nrows + return tuple(ises) + + @utils.cached_property + def local_ises(self): + """A list of PETSc ISes defining the local indices for each set in the DataSet. + + Used when extracting blocks from matrices for assembly.""" + ises = [] + start = 0 + for dset in self: + bs = dset.cdim + n = dset.total_size*bs + iset = PETSc.IS().createStride(n, first=start, step=1, + comm=mpi.COMM_SELF) + iset.setBlockSize(bs) + start += n + ises.append(iset) + return tuple(ises) + + @utils.cached_property + def layout_vec(self): + """A PETSc Vec compatible with the dof layout of this DataSet.""" + vec = PETSc.Vec().create(comm=self.comm) + size = ((self.size - self.set.constrained_size) * self.cdim, None) + vec.setSizes(size, bsize=self.cdim) + vec.setUp() + return vec + + @utils.cached_property + def dm(self): + dm = PETSc.DMShell().create(comm=self.comm) + dm.setGlobalVector(self.layout_vec) + return dm + + +class GlobalDataSet(DataSet): + """A proxy :class:`DataSet` for use in a :class:`Sparsity` where the + matrix has :class:`Global` rows or columns.""" + + def __init__(self, global_): + """ + :param global_: The :class:`Global` on which this object is based.""" + if self._initialized: + return + self._global = global_ + self.comm = mpi.internal_comm(global_.comm, self) + self._globalset = GlobalSet(comm=self.comm) + self._name = "gdset_#x%x" % id(self) + self._initialized = True + + @classmethod + def _cache_key(cls, *args): + return None + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the set.""" + return self._global._dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the set. This is + the product of the dim tuple.""" + return self._global._cdim + + @utils.cached_property + def name(self): + """Returns the name of the data set.""" + return self._global._name + + @utils.cached_property + def set(self): + """Returns the parent set of the data set.""" + return self._globalset + + @utils.cached_property + def size(self): + """The number of local entries in the Dataset (1 on rank 0)""" + return 1 if mpi.MPI.comm.rank == 0 else 0 + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 GlobalDataSet: %s on Global %s" % \ + (self._name, self._global) + + def __repr__(self): + return "GlobalDataSet(%r)" % (self._global) + + @utils.cached_property + def lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet`. + """ + lgmap = PETSc.LGMap() + lgmap.create(indices=np.arange(1, dtype=dtypes.IntType), + bsize=self.cdim, comm=self.comm) + return lgmap + + @utils.cached_property + def unblocked_lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet` with a block size of 1. + """ + if self.cdim == 1: + return self.lgmap + else: + indices = self.lgmap.indices + lgmap = PETSc.LGMap().create(indices=indices, + bsize=1, comm=self.lgmap.comm) + return lgmap + + @utils.cached_property + def local_ises(self): + """A list of PETSc ISes defining the local indices for each set in the DataSet. + + Used when extracting blocks from matrices for assembly.""" + raise NotImplementedError + + @utils.cached_property + def layout_vec(self): + """A PETSc Vec compatible with the dof layout of this DataSet.""" + vec = PETSc.Vec().create(comm=self.comm) + size = (self.size * self.cdim, None) + vec.setSizes(size, bsize=self.cdim) + vec.setUp() + return vec + + @utils.cached_property + def dm(self): + dm = PETSc.DMShell().create(comm=self.comm) + dm.setGlobalVector(self.layout_vec) + return dm + + +class MixedDataSet(DataSet): + r"""A container for a bag of :class:`DataSet`\s. + + Initialized either from a :class:`MixedSet` and an iterable or iterator of + ``dims`` of corresponding length :: + + mdset = op2.MixedDataSet(mset, [dim1, ..., dimN]) + + or from a tuple of :class:`Set`\s and an iterable of ``dims`` of + corresponding length :: + + mdset = op2.MixedDataSet([set1, ..., setN], [dim1, ..., dimN]) + + If all ``dims`` are to be the same, they can also be given as an + :class:`int` for either of above invocations :: + + mdset = op2.MixedDataSet(mset, dim) + mdset = op2.MixedDataSet([set1, ..., setN], dim) + + Initialized from a :class:`MixedSet` without explicitly specifying ``dims`` + they default to 1 :: + + mdset = op2.MixedDataSet(mset) + + Initialized from an iterable or iterator of :class:`DataSet`\s and/or + :class:`Set`\s, where :class:`Set`\s are implicitly upcast to + :class:`DataSet`\s of dim 1 :: + + mdset = op2.MixedDataSet([dset1, ..., dsetN]) + """ + + def __init__(self, arg, dims=None): + r""" + :param arg: a :class:`MixedSet` or an iterable or a generator + expression of :class:`Set`\s or :class:`DataSet`\s or a + mixture of both + :param dims: `None` (the default) or an :class:`int` or an iterable or + generator expression of :class:`int`\s, which **must** be + of same length as `arg` + + .. Warning :: + When using generator expressions for ``arg`` or ``dims``, these + **must** terminate or else will cause an infinite loop. + """ + if self._initialized: + return + self._dsets = arg + try: + # Try to choose the comm to be the same as the first set + # of the MixedDataSet + comm = self._process_args(arg, dims)[0][0].comm + except AttributeError: + comm = None + self.comm = mpi.internal_comm(comm, self) + self._initialized = True + + @classmethod + def _process_args(cls, arg, dims=None): + # If the second argument is not None it is expect to be a scalar dim + # or an iterable of dims and the first is expected to be a MixedSet or + # an iterable of Sets + if dims is not None: + # If arg is a MixedSet, get its Sets tuple + sets = arg.split if isinstance(arg, MixedSet) else tuple(arg) + # If dims is a scalar, turn it into a tuple of right length + dims = (dims,) * len(sets) if isinstance(dims, int) else tuple(dims) + if len(sets) != len(dims): + raise ValueError("Got MixedSet of %d Sets but %s dims" % + (len(sets), len(dims))) + dsets = tuple(s ** d for s, d in zip(sets, dims)) + # Otherwise expect the first argument to be an iterable of Sets and/or + # DataSets and upcast Sets to DataSets as necessary + else: + arg = [s if isinstance(s, DataSet) else s ** 1 for s in arg] + dsets = utils.as_tuple(arg, type=DataSet) + + return (dsets[0].set, ) + (dsets, ), {} + + @classmethod + def _cache_key(cls, arg, dims=None): + return arg + + @utils.cached_property + def _wrapper_cache_key_(self): + raise NotImplementedError + + def __getitem__(self, idx): + """Return :class:`DataSet` with index ``idx`` or a given slice of datasets.""" + return self._dsets[idx] + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`DataSet`\s.""" + return self._dsets + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the sets.""" + return tuple(s.dim for s in self._dsets) + + @utils.cached_property + def cdim(self): + """The sum of the scalar number of values for each member of the sets. + This is the sum of products of the dim tuples.""" + return sum(s.cdim for s in self._dsets) + + @utils.cached_property + def name(self): + """Returns the name of the data sets.""" + return tuple(s.name for s in self._dsets) + + @utils.cached_property + def set(self): + """Returns the :class:`MixedSet` this :class:`MixedDataSet` is + defined on.""" + return MixedSet(s.set for s in self._dsets) + + def __iter__(self): + r"""Yield all :class:`DataSet`\s when iterated over.""" + for ds in self._dsets: + yield ds + + def __len__(self): + """Return number of contained :class:`DataSet`s.""" + return len(self._dsets) + + def __str__(self): + return "OP2 MixedDataSet composed of DataSets: %s" % (self._dsets,) + + def __repr__(self): + return "MixedDataSet(%r)" % (self._dsets,) + + @utils.cached_property + def layout_vec(self): + """A PETSc Vec compatible with the dof layout of this MixedDataSet.""" + vec = PETSc.Vec().create(comm=self.comm) + # Compute local and global size from sizes of layout vecs + lsize, gsize = map(sum, zip(*(d.layout_vec.sizes for d in self))) + vec.setSizes((lsize, gsize), bsize=1) + vec.setUp() + return vec + + @utils.cached_property + def lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`MixedDataSet`. + """ + lgmap = PETSc.LGMap() + if self.comm.size == 1 and self.halo is None: + size = sum((s.size - s.constrained_size) * s.cdim for s in self) + lgmap.create(indices=np.arange(size, dtype=dtypes.IntType), + bsize=1, comm=self.comm) + return lgmap + # Compute local to global maps for a monolithic mixed system + # from the individual local to global maps for each field. + # Exposition: + # + # We have N fields and P processes. The global row + # ordering is: + # + # f_0_p_0, f_1_p_0, ..., f_N_p_0; f_0_p_1, ..., ; f_0_p_P, + # ..., f_N_p_P. + # + # We have per-field local to global numberings, to convert + # these into multi-field local to global numberings, we note + # the following: + # + # For each entry in the per-field l2g map, we first determine + # the rank that entry belongs to, call this r. + # + # We know that this must be offset by: + # 1. The sum of all field lengths with rank < r + # 2. The sum of all lower-numbered field lengths on rank r. + # + # Finally, we need to shift the field-local entry by the + # current field offset. + idx_size = sum(s.total_size*s.cdim for s in self) + indices = np.full(idx_size, -1, dtype=dtypes.IntType) + owned_sz = np.array([sum((s.size - s.constrained_size) * s.cdim for s in self)], + dtype=dtypes.IntType) + field_offset = np.empty_like(owned_sz) + self.comm.Scan(owned_sz, field_offset) + field_offset -= owned_sz + + all_field_offsets = np.empty(self.comm.size, dtype=dtypes.IntType) + self.comm.Allgather(field_offset, all_field_offsets) + + start = 0 + all_local_offsets = np.zeros(self.comm.size, dtype=dtypes.IntType) + current_offsets = np.zeros(self.comm.size + 1, dtype=dtypes.IntType) + for s in self: + idx = indices[start:start + s.total_size * s.cdim] + owned_sz[0] = (s.size - s.set.constrained_size) * s.cdim + self.comm.Scan(owned_sz, field_offset) + self.comm.Allgather(field_offset, current_offsets[1:]) + # Find the ranks each entry in the l2g belongs to + l2g = s.unblocked_lgmap.indices + tmp_indices = np.searchsorted(current_offsets, l2g, side="right") - 1 + idx[:] = l2g[:] - current_offsets[tmp_indices] + \ + all_field_offsets[tmp_indices] + all_local_offsets[tmp_indices] + self.comm.Allgather(owned_sz, current_offsets[1:]) + all_local_offsets += current_offsets[1:] + start += s.total_size * s.cdim + lgmap.create(indices=indices, bsize=1, comm=self.comm) + return lgmap + + @utils.cached_property + def unblocked_lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet` with a block size of 1. + """ + return self.lgmap diff --git a/pyop2/types/glob.py b/pyop2/types/glob.py new file mode 100644 index 0000000000..d8ed991346 --- /dev/null +++ b/pyop2/types/glob.py @@ -0,0 +1,433 @@ +import contextlib +import ctypes +import operator +import warnings + +import numpy as np +from petsc4py import PETSc + +from pyop2 import ( + exceptions as ex, + mpi, + utils +) +from pyop2.types.access import Access +from pyop2.types.dataset import GlobalDataSet +from pyop2.types.data_carrier import DataCarrier, EmptyDataMixin, VecAccessMixin + + +class SetFreeDataCarrier(DataCarrier, EmptyDataMixin): + + @utils.validate_type(('name', str, ex.NameTypeError)) + def __init__(self, dim, data=None, dtype=None, name=None): + self._dim = utils.as_tuple(dim, int) + self._cdim = np.prod(self._dim).item() + EmptyDataMixin.__init__(self, data, dtype, self._dim) + self._buf = np.empty(self.shape, dtype=self.dtype) + self._name = name or "%s_#x%x" % (self.__class__.__name__.lower(), id(self)) + + @utils.cached_property + def _kernel_args_(self): + return (self._data.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dtype, self.shape) + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __getitem__(self, idx): + """Return self if ``idx`` is 0, raise an error otherwise.""" + if idx != 0: + raise ex.IndexValueError("Can only extract component 0 from %r" % self) + return self + + @property + def shape(self): + return self._dim + + @property + def data(self): + """Data array.""" + self.increment_dat_version() + if len(self._data) == 0: + raise RuntimeError("Illegal access: No data associated with this Global!") + return self._data + + @property + def dtype(self): + return self._dtype + + @property + def data_ro(self): + """Data array.""" + view = self._data.view() + view.setflags(write=False) + return view + + @property + def data_wo(self): + return self.data + + @data.setter + def data(self, value): + self.increment_dat_version() + self._data[:] = utils.verify_reshape(value, self.dtype, self.dim) + + @property + def data_with_halos(self): + return self.data + + @property + def data_ro_with_halos(self): + return self.data_ro + + @property + def data_wo_with_halos(self): + return self.data_wo + + @property + def halo_valid(self): + return True + + @halo_valid.setter + def halo_valid(self, value): + pass + + @mpi.collective + def copy(self, other, subset=None): + """Copy the data in this :class:`SetFreeDataCarrier` into another. + + :arg other: The destination :class:`Global` + :arg subset: A :class:`Subset` of elements to copy (optional)""" + + other.data = np.copy(self.data_ro) + + @property + def split(self): + return (self,) + + @property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`Global` in bytes. This will be the correct size of the + data payload, but does not take into account the overhead of + the object and its metadata. This renders this method of + little statistical significance, however it is included to + make the interface consistent. + """ + + return self.dtype.itemsize * self._cdim + + def _op(self, other, op): + ret = type(self)(self.dim, dtype=self.dtype, name=self.name, comm=self.comm) + if isinstance(other, type(self)): + ret.data[:] = op(self.data_ro, other.data_ro) + else: + ret.data[:] = op(self.data_ro, other) + return ret + + def _iop(self, other, op): + if isinstance(other, type(self)): + op(self.data[:], other.data_ro) + else: + op(self.data[:], other) + return self + + def __pos__(self): + return self.duplicate() + + def __add__(self, other): + """Pointwise addition of fields.""" + return self._op(other, operator.add) + + def __radd__(self, other): + """Pointwise addition of fields. + + self.__radd__(other) <==> other + self.""" + return self + other + + def __sub__(self, other): + """Pointwise subtraction of fields.""" + return self._op(other, operator.sub) + + def __rsub__(self, other): + """Pointwise subtraction of fields. + + self.__rsub__(other) <==> other - self.""" + ret = -self + ret += other + return ret + + def __mul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._op(other, operator.mul) + + def __rmul__(self, other): + """Pointwise multiplication or scaling of fields. + + self.__rmul__(other) <==> other * self.""" + return self.__mul__(other) + + def __truediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._op(other, operator.truediv) + + def __iadd__(self, other): + """Pointwise addition of fields.""" + return self._iop(other, operator.iadd) + + def __isub__(self, other): + """Pointwise subtraction of fields.""" + return self._iop(other, operator.isub) + + def __imul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._iop(other, operator.imul) + + def __itruediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._iop(other, operator.itruediv) + + def inner(self, other): + assert issubclass(type(other), type(self)) + return np.dot(self.data_ro, np.conj(other.data_ro)) + + +# must have comm, can be modified in parloop (implies a reduction) +class Global(SetFreeDataCarrier, VecAccessMixin): + """OP2 global value. + + When a ``Global`` is passed to a :func:`pyop2.op2.par_loop`, the access + descriptor is passed by `calling` the ``Global``. For example, if + a ``Global`` named ``G`` is to be accessed for reading, this is + accomplished by:: + + G(pyop2.READ) + + It is permissible to pass `None` as the `data` argument. In this + case, allocation of the data buffer is postponed until it is + accessed. + + .. note:: + If the data buffer is not passed in, it is implicitly + initialised to be zero. + """ + _modes = [Access.READ, Access.INC, Access.MIN, Access.MAX] + + def __init__(self, dim, data=None, dtype=None, name=None, comm=None): + if isinstance(dim, (type(self), Constant)): + # If g is a Global, Global(g) performs a deep copy. + # If g is a Constant, Global(g) performs a deep copy, + # but a comm should be provided. + # This is for compatibility with Dat. + self.__init__( + dim._dim, + None, + dtype=dim.dtype, + name="copy_of_%s" % dim.name, + comm=comm or dim.comm + ) + dim.copy(self) + else: + super().__init__(dim, data, dtype, name) + if comm is None: + warnings.warn("PyOP2.Global has no comm, this is likely to break in parallel!") + self.comm = mpi.internal_comm(comm, self) + + # Object versioning setup + petsc_counter = (comm and self.dtype == PETSc.ScalarType) + VecAccessMixin.__init__(self, petsc_counter=petsc_counter) + + def __str__(self): + return "OP2 Global Argument: %s with dim %s and value %s" \ + % (self._name, self._dim, self._data) + + def __repr__(self): + return "Global(%r, %r, %r, %r)" % (self._dim, self._data, + self._data.dtype, self._name) + + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, map_=None): + from pyop2.parloop import GlobalLegacyArg + + assert map_ is None + return GlobalLegacyArg(self, access) + + def __neg__(self): + return type(self)( + self.dim, + data=-np.copy(self.data_ro), + dtype=self.dtype, + name=self.name, + comm=self.comm + ) + + @utils.cached_property + def dataset(self): + return GlobalDataSet(self) + + @mpi.collective + def duplicate(self): + """Return a deep copy of self.""" + return type(self)( + self.dim, + data=np.copy(self.data_ro), + dtype=self.dtype, + name=self.name, + comm=self.comm + ) + + @mpi.collective + def zero(self, subset=None): + assert subset is None + self.increment_dat_version() + self._data[...] = 0 + + @mpi.collective + def global_to_local_begin(self, access_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def global_to_local_end(self, access_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def local_to_global_begin(self, insert_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def local_to_global_end(self, insert_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def frozen_halo(self, access_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + return contextlib.nullcontext() + + @mpi.collective + def freeze_halo(self, access_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def unfreeze_halo(self): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @utils.cached_property + def _vec(self): + assert self.dtype == PETSc.ScalarType, \ + "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) + # Can't duplicate layout_vec of dataset, because we then + # carry around extra unnecessary data. + # But use getSizes to save an Allreduce in computing the + # global size. + data = self._data + size = self.dataset.layout_vec.getSizes() + if self.comm.rank == 0: + return PETSc.Vec().createWithArray(data, size=size, + bsize=self.cdim, + comm=self.comm) + else: + return PETSc.Vec().createWithArray(np.empty(0, dtype=self.dtype), + size=size, + bsize=self.cdim, + comm=self.comm) + + @contextlib.contextmanager + def vec_context(self, access): + """A context manager for a :class:`PETSc.Vec` from a :class:`Global`. + + :param access: Access descriptor: READ, WRITE, or RW.""" + yield self._vec + if access is not Access.READ: + data = self._data + self.comm.Bcast(data, 0) + + +# has no comm, can only be READ +class Constant(SetFreeDataCarrier): + """OP2 constant value. + + When a ``Constant`` is passed to a :func:`pyop2.op2.par_loop`, the access + descriptor is always ``Access.READ``. Used in cases where collective + functionality is not required, or is not desirable. + For example: objects with no associated mesh and do not have a + communicator. + """ + _modes = [Access.READ] + + def __init__(self, dim, data=None, dtype=None, name=None, comm=None): + if isinstance(dim, (type(self), Global)): + # If g is a Constant, Constant(g) performs a deep copy. + # If g is a Global, Constant(g) performs a deep copy, dropping the comm. + # This is for compatibility with Dat. + self.__init__( + dim._dim, + None, + dtype=dim.dtype, + name="copy_of_%s" % dim.name + ) + dim.copy(self) + else: + super().__init__(dim, data, dtype, name) + if comm is not None: + raise ValueError("Constants should not have communicators") + + def __str__(self): + return "OP2 Constant Argument: %s with dim %s and value %s" \ + % (self._name, self._dim, self._data) + + def __repr__(self): + return "Constant(%r, %r, %r, %r)" % ( + self._dim, + self._data, + self._data.dtype, + self._name + ) + + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, map_=None): + from pyop2.parloop import GlobalLegacyArg + + assert map_ is None + return GlobalLegacyArg(self, access) + + def __neg__(self): + return type(self)( + self.dim, + data=-np.copy(self.data_ro), + dtype=self.dtype, + name=self.name, + ) + + def duplicate(self): + """Return a deep copy of self.""" + return type(self)( + self.dim, + data=np.copy(self.data_ro), + dtype=self.dtype, + name=self.name + ) diff --git a/pyop2/types/halo.py b/pyop2/types/halo.py new file mode 100644 index 0000000000..81669443e3 --- /dev/null +++ b/pyop2/types/halo.py @@ -0,0 +1,56 @@ +import abc + + +class Halo(abc.ABC): + + """A description of a halo associated with a :class:`pyop2.types.set.Set`. + + The halo object describes which :class:`pyop2.types.set.Set` elements are sent + where, and which :class:`pyop2.types.set.Set` elements are received from where. + """ + + @abc.abstractproperty + def comm(self): + """The MPI communicator for this halo.""" + pass + + @abc.abstractproperty + def local_to_global_numbering(self): + """The mapping from process-local to process-global numbers for this halo.""" + pass + + @abc.abstractmethod + def global_to_local_begin(self, dat, insert_mode): + """Begin an exchange from global (assembled) to local (ghosted) representation. + + :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass + + @abc.abstractmethod + def global_to_local_end(self, dat, insert_mode): + """Finish an exchange from global (assembled) to local (ghosted) representation. + + :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass + + @abc.abstractmethod + def local_to_global_begin(self, dat, insert_mode): + """Begin an exchange from local (ghosted) to global (assembled) representation. + + :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass + + @abc.abstractmethod + def local_to_global_end(self, dat, insert_mode): + """Finish an exchange from local (ghosted) to global (assembled) representation. + + :arg dat: The :class:`pyop2.types.dat.Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass diff --git a/pyop2/types/map.py b/pyop2/types/map.py new file mode 100644 index 0000000000..81e3865465 --- /dev/null +++ b/pyop2/types/map.py @@ -0,0 +1,434 @@ +import itertools +import functools +import numbers + +import numpy as np + +from pyop2 import ( + caching, + datatypes as dtypes, + exceptions as ex, + utils +) +from pyop2 import mpi +from pyop2.types.set import GlobalSet, MixedSet, Set + + +class Map: + + """OP2 map, a relation between two :class:`Set` objects. + + Each entry in the ``iterset`` maps to ``arity`` entries in the + ``toset``. When a map is used in a :func:`pyop2.op2.par_loop`, it is + possible to use Python index notation to select an individual entry on the + right hand side of this map. There are three possibilities: + + * No index. All ``arity`` :class:`Dat` entries will be passed to the + kernel. + * An integer: ``some_map[n]``. The ``n`` th entry of the + map result will be passed to the kernel. + """ + + dtype = dtypes.IntType + + @utils.validate_type(('iterset', Set, ex.SetTypeError), ('toset', Set, ex.SetTypeError), + ('arity', numbers.Integral, ex.ArityTypeError), ('name', str, ex.NameTypeError)) + def __init__(self, iterset, toset, arity, values=None, name=None, offset=None, offset_quotient=None): + self._iterset = iterset + self._toset = toset + self.comm = mpi.internal_comm(toset.comm, self) + self._arity = arity + self._values = utils.verify_reshape(values, dtypes.IntType, + (iterset.total_size, arity), allow_none=True) + self.shape = (iterset.total_size, arity) + self._name = name or "map_#x%x" % id(self) + if offset is None or len(offset) == 0: + self._offset = None + else: + self._offset = utils.verify_reshape(offset, dtypes.IntType, (arity, )) + if offset_quotient is None or len(offset_quotient) == 0: + self._offset_quotient = None + else: + self._offset_quotient = utils.verify_reshape(offset_quotient, dtypes.IntType, (arity, )) + # A cache for objects built on top of this map + self._cache = {} + + @utils.cached_property + def _kernel_args_(self): + return (self._values.ctypes.data, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.arity, utils.tuplify(self.offset), utils.tuplify(self.offset_quotient)) + + # This is necessary so that we can convert a Map to a tuple + # (needed in as_tuple). Because, __getitem__ no longer returns a + # Map we have to explicitly provide an iterable interface + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + # Here we enforce that every map stores a single, unique MapKernelArg. + # This is required because we use object identity to determined whether + # maps are referenced more than once in a parloop. + @utils.cached_property + def _global_kernel_arg(self): + from pyop2.global_kernel import MapKernelArg + + offset = tuple(self.offset) if self.offset is not None else None + offset_quotient = tuple(self.offset_quotient) if self.offset_quotient is not None else None + return MapKernelArg(self.arity, offset, offset_quotient) + + @utils.cached_property + def split(self): + return (self,) + + @utils.cached_property + def iterset(self): + """:class:`Set` mapped from.""" + return self._iterset + + @utils.cached_property + def toset(self): + """:class:`Set` mapped to.""" + return self._toset + + @utils.cached_property + def arity(self): + """Arity of the mapping: number of toset elements mapped to per + iterset element.""" + return self._arity + + @utils.cached_property + def arities(self): + """Arity of the mapping: number of toset elements mapped to per + iterset element. + + :rtype: tuple""" + return (self._arity,) + + @utils.cached_property + def arange(self): + """Tuple of arity offsets for each constituent :class:`Map`.""" + return (0, self._arity) + + @utils.cached_property + def values(self): + """Mapping array. + + This only returns the map values for local points, to see the + halo points too, use :meth:`values_with_halo`.""" + return self._values[:self.iterset.size] + + @utils.cached_property + def values_with_halo(self): + """Mapping array. + + This returns all map values (including halo points), see + :meth:`values` if you only need to look at the local + points.""" + return self._values + + @utils.cached_property + def name(self): + """User-defined label""" + return self._name + + @utils.cached_property + def offset(self): + """The vertical offset.""" + return self._offset + + @utils.cached_property + def offset_quotient(self): + """The offset quotient.""" + return self._offset_quotient + + def __str__(self): + return "OP2 Map: %s from (%s) to (%s) with arity %s" \ + % (self._name, self._iterset, self._toset, self._arity) + + def __repr__(self): + return "Map(%r, %r, %r, None, %r, %r, %r)" \ + % (self._iterset, self._toset, self._arity, self._name, self._offset, self._offset_quotient) + + def __le__(self, o): + """self<=o if o equals self or self._parent <= o.""" + return self == o + + @utils.cached_property + def flattened_maps(self): + """Return all component maps. + + This is useful to flatten nested :class:`ComposedMap`s.""" + return (self, ) + + +class PermutedMap(Map): + """Composition of a standard :class:`Map` with a constant permutation. + + :arg map_: The map to permute. + :arg permutation: The permutation of the map indices. + + Where normally staging to element data is performed as + + .. code-block:: + + local[i] = global[map[i]] + + With a :class:`PermutedMap` we instead get + + .. code-block:: + + local[i] = global[map[permutation[i]]] + + This might be useful if your local kernel wants data in a + different order to the one that the map provides, and you don't + want two global-sized data structures. + """ + def __init__(self, map_, permutation): + if not isinstance(map_, Map): + raise TypeError("map_ must be a Map instance") + if isinstance(map_, ComposedMap): + raise NotImplementedError("PermutedMap of ComposedMap not implemented: simply permute before composing") + self.map_ = map_ + self.comm = mpi.internal_comm(map_.comm, self) + self.permutation = np.asarray(permutation, dtype=Map.dtype) + assert (np.unique(permutation) == np.arange(map_.arity, dtype=Map.dtype)).all() + + @utils.cached_property + def _wrapper_cache_key_(self): + return super()._wrapper_cache_key_ + (tuple(self.permutation),) + + # See Map._global_kernel_arg above for more information. + @utils.cached_property + def _global_kernel_arg(self): + from pyop2.global_kernel import PermutedMapKernelArg + + return PermutedMapKernelArg(self.map_._global_kernel_arg, tuple(self.permutation)) + + def __getattr__(self, name): + return getattr(self.map_, name) + + +class ComposedMap(Map): + """Composition of :class:`Map`s, :class:`PermutedMap`s, and/or :class:`ComposedMap`s. + + :arg maps_: The maps to compose. + + Where normally staging to element data is performed as + + .. code-block:: + + local[i] = global[map[i]] + + With a :class:`ComposedMap` we instead get + + .. code-block:: + + local[i] = global[maps_[0][maps_[1][maps_[2][...[i]]]]] + + This might be useful if the map you want can be represented by + a composition of existing maps. + """ + def __init__(self, *maps_, name=None): + if not all(isinstance(m, Map) for m in maps_): + raise TypeError("All maps must be Map instances") + for tomap, frommap in zip(maps_[:-1], maps_[1:]): + if tomap.iterset is not frommap.toset: + raise ex.MapTypeError("tomap.iterset must match frommap.toset") + if tomap.comm is not frommap.comm: + raise ex.MapTypeError("All maps needs to share a communicator") + if frommap.arity != 1: + raise ex.MapTypeError("frommap.arity must be 1") + self._iterset = maps_[-1].iterset + self._toset = maps_[0].toset + self.comm = mpi.internal_comm(self._toset.comm, self) + self._arity = maps_[0].arity + # Don't call super().__init__() to avoid calling verify_reshape() + self._values = None + self.shape = (self._iterset.total_size, self._arity) + self._name = name or "cmap_#x%x" % id(self) + self._offset = maps_[0]._offset + # A cache for objects built on top of this map + self._cache = {} + self.maps_ = tuple(maps_) + + @utils.cached_property + def _kernel_args_(self): + return tuple(itertools.chain(*[m._kernel_args_ for m in self.maps_])) + + @utils.cached_property + def _wrapper_cache_key_(self): + return tuple(m._wrapper_cache_key_ for m in self.maps_) + + @utils.cached_property + def _global_kernel_arg(self): + from pyop2.global_kernel import ComposedMapKernelArg + + return ComposedMapKernelArg(*(m._global_kernel_arg for m in self.maps_)) + + @utils.cached_property + def values(self): + raise RuntimeError("ComposedMap does not store values directly") + + @utils.cached_property + def values_with_halo(self): + raise RuntimeError("ComposedMap does not store values directly") + + def __str__(self): + return "OP2 ComposedMap of Maps: [%s]" % ",".join([str(m) for m in self.maps_]) + + def __repr__(self): + return "ComposedMap(%s)" % ",".join([repr(m) for m in self.maps_]) + + def __le__(self, o): + raise NotImplementedError("__le__ not implemented for ComposedMap") + + @utils.cached_property + def flattened_maps(self): + return tuple(itertools.chain(*(m.flattened_maps for m in self.maps_))) + + +class MixedMap(Map, caching.ObjectCached): + r"""A container for a bag of :class:`Map`\s.""" + + def __init__(self, maps): + r""":param iterable maps: Iterable of :class:`Map`\s""" + if self._initialized: + return + self._maps = maps + # TODO: Think about different communicators on maps (c.f. MixedSet) + # TODO: What if all maps are None? + comms = tuple(m.comm for m in self._maps if m is not None) + if not all(c == comms[0] for c in comms): + raise ex.MapTypeError("All maps needs to share a communicator") + if len(comms) == 0: + raise ex.MapTypeError("Don't know how to make communicator") + self.comm = mpi.internal_comm(comms[0], self) + self._initialized = True + + @classmethod + def _process_args(cls, *args, **kwargs): + maps = utils.as_tuple(args[0], type=Map, allow_none=True) + cache = maps[0] + return (cache, ) + (maps, ), kwargs + + @classmethod + def _cache_key(cls, maps): + return maps + + @utils.cached_property + def _kernel_args_(self): + return tuple(itertools.chain(*(m._kernel_args_ for m in self if m is not None))) + + @utils.cached_property + def _argtypes_(self): + return tuple(itertools.chain(*(m._argtypes_ for m in self if m is not None))) + + @utils.cached_property + def _wrapper_cache_key_(self): + return tuple(m._wrapper_cache_key_ for m in self if m is not None) + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`Map`\s.""" + return self._maps + + @utils.cached_property + def iterset(self): + """:class:`MixedSet` mapped from.""" + s, = set(m.iterset for m in self._maps) + if len(s) == 1: + return functools.reduce(lambda a, b: a or b, map(lambda s: s if s is None else s.iterset, self._maps)) + else: + raise RuntimeError("Found multiple itersets.") + + @utils.cached_property + def toset(self): + """:class:`MixedSet` mapped to.""" + return MixedSet(tuple(GlobalSet(comm=self.comm) if m is None else + m.toset for m in self._maps)) + + @utils.cached_property + def arity(self): + """Arity of the mapping: total number of toset elements mapped to per + iterset element.""" + s, = set(m.iterset for m in self._maps) + if len(s) == 1: + return sum(m.arity for m in self._maps) + else: + raise RuntimeError("Found multiple itersets.") + + @utils.cached_property + def arities(self): + """Arity of the mapping: number of toset elements mapped to per + iterset element. + + :rtype: tuple""" + return tuple(m.arity for m in self._maps) + + @utils.cached_property + def arange(self): + """Tuple of arity offsets for each constituent :class:`Map`.""" + return (0,) + tuple(np.cumsum(self.arities)) + + @utils.cached_property + def values(self): + """Mapping arrays excluding data for halos. + + This only returns the map values for local points, to see the + halo points too, use :meth:`values_with_halo`.""" + return tuple(m.values for m in self._maps) + + @utils.cached_property + def values_with_halo(self): + """Mapping arrays including data for halos. + + This returns all map values (including halo points), see + :meth:`values` if you only need to look at the local + points.""" + return tuple(None if m is None else + m.values_with_halo for m in self._maps) + + @utils.cached_property + def name(self): + """User-defined labels""" + return tuple(m.name for m in self._maps) + + @utils.cached_property + def offset(self): + """Vertical offsets.""" + return tuple(0 if m is None else m.offset for m in self._maps) + + @utils.cached_property + def offset_quotient(self): + """Offsets quotient.""" + return tuple(0 if m is None else m.offset_quotient for m in self._maps) + + def __iter__(self): + r"""Yield all :class:`Map`\s when iterated over.""" + for m in self._maps: + yield m + + def __len__(self): + r"""Number of contained :class:`Map`\s.""" + return len(self._maps) + + def __le__(self, o): + """self<=o if o equals self or its self._parent==o.""" + return self == o or all(m <= om for m, om in zip(self, o)) + + def __str__(self): + return "OP2 MixedMap composed of Maps: %s" % (self._maps,) + + def __repr__(self): + return "MixedMap(%r)" % (self._maps,) + + @utils.cached_property + def flattened_maps(self): + raise NotImplementedError("flattend_maps should not be necessary for MixedMap") diff --git a/pyop2/types/mat.py b/pyop2/types/mat.py new file mode 100644 index 0000000000..94a34564e8 --- /dev/null +++ b/pyop2/types/mat.py @@ -0,0 +1,1167 @@ +import abc +import ctypes +import itertools +from collections.abc import Sequence + +import numpy as np +from petsc4py import PETSc + +from pyop2 import ( + caching, + configuration as conf, + datatypes as dtypes, + exceptions as ex, + mpi, + profiling, + sparsity, + utils +) +from pyop2.types.access import Access +from pyop2.types.data_carrier import DataCarrier +from pyop2.types.dataset import DataSet, GlobalDataSet, MixedDataSet +from pyop2.types.map import Map, ComposedMap +from pyop2.types.set import MixedSet, Subset + + +class Sparsity(caching.ObjectCached): + + """OP2 Sparsity, the non-zero structure of a matrix derived from the block-wise specified pairs of :class:`Map` objects. + + Examples of constructing a Sparsity: :: + + Sparsity((row_dset, col_dset), + [(first_rowmap, first_colmap), (second_rowmap, second_colmap), None]) + + .. _MatMPIAIJSetPreallocation: http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatMPIAIJSetPreallocation.html + """ + + def __init__(self, dsets, maps_and_regions, name=None, nest=None, block_sparse=None, diagonal_block=True): + r""" + :param dsets: :class:`DataSet`\s for the left and right function + spaces this :class:`Sparsity` maps between + :param maps_and_regions: `dict` to build the :class:`Sparsity` from. + ``maps_and_regions`` must be keyed by the block index pair (i, j). + ``maps_and_regions[(i, j)]`` must be a list of tuples of + ``(rmap, cmap, iteration_regions)``, where ``rmap`` and ``cmap`` + is a pair of :class:`Map`\s specifying a row map and a column map, + and ``iteration_regions`` represent regions that select subsets + of extruded maps to iterate over. If the matrix only has a single + block, one can altenatively pass the value ``maps_and_regions[(0, 0)]``. + :param string name: user-defined label (optional) + :param nest: Should the sparsity over mixed set be built as nested blocks? + :param block_sparse: Should the sparsity for datasets with + cdim > 1 be built as a block sparsity? + :param diagonal_block: Flag indicating whether this sparsity is for + a matrix/submatrix located on the diagonal. + """ + # Protect against re-initialization when retrieved from cache + if self._initialized: + return + self._dsets = dsets + self._maps_and_regions = maps_and_regions + self._block_sparse = block_sparse + self._diagonal_block = diagonal_block + self.lcomm = mpi.internal_comm(self.dsets[0].comm, self) + self.rcomm = mpi.internal_comm(self.dsets[1].comm, self) + if isinstance(dsets[0], GlobalDataSet) or isinstance(dsets[1], GlobalDataSet): + self._dims = (((1, 1),),) + self._d_nnz = None + self._o_nnz = None + else: + rset, cset = self.dsets + self._has_diagonal = (rset == cset) and diagonal_block + tmp = itertools.product([x.cdim for x in self.dsets[0]], + [x.cdim for x in self.dsets[1]]) + dims = [[None for _ in range(self.shape[1])] for _ in range(self.shape[0])] + for r in range(self.shape[0]): + for c in range(self.shape[1]): + dims[r][c] = next(tmp) + self._dims = tuple(tuple(d) for d in dims) + if self.lcomm != self.rcomm: + raise ValueError("Haven't thought hard enough about different left and right communicators") + self.comm = mpi.internal_comm(self.lcomm, self) + self._name = name or "sparsity_#x%x" % id(self) + # If the Sparsity is defined on MixedDataSets, we need to build each + # block separately + if (isinstance(dsets[0], MixedDataSet) or isinstance(dsets[1], MixedDataSet)) \ + and nest: + self._nested = True + self._blocks = [] + for i, rds in enumerate(dsets[0]): + row = [] + for j, cds in enumerate(dsets[1]): + row.append(Sparsity((rds, cds), tuple(self._maps_and_regions[(i, j)]) if (i, j) in self._maps_and_regions else (), + block_sparse=block_sparse, + diagonal_block=(dsets[0] is dsets[1] and i == j))) + self._blocks.append(row) + self._d_nnz = tuple(s._d_nnz for s in self) + self._o_nnz = tuple(s._o_nnz for s in self) + elif isinstance(dsets[0], GlobalDataSet) or isinstance(dsets[1], GlobalDataSet): + # Where the sparsity maps either from or to a Global, we + # don't really have any sparsity structure. + self._blocks = [[self]] + self._nested = False + else: + for dset in dsets: + if isinstance(dset, MixedDataSet) and any([isinstance(d, GlobalDataSet) for d in dset]): + raise ex.SparsityFormatError("Mixed monolithic matrices with Global rows or columns are not supported.") + self._nested = False + with profiling.timed_region("CreateSparsity"): + nnz, onnz = sparsity.build_sparsity(self) + self._d_nnz = nnz + self._o_nnz = onnz + self._blocks = [[self]] + self._initialized = True + + _cache = {} + + @classmethod + @utils.validate_type(('name', str, ex.NameTypeError)) + def _process_args(cls, dsets, maps_and_regions, name=None, nest=None, block_sparse=None, diagonal_block=True): + from pyop2.types import IterationRegion + + if len(dsets) != 2: + raise RuntimeError(f"dsets must be a tuple of two DataSets: got {dsets}") + for dset in dsets: + if not isinstance(dset, DataSet) and dset is not None: + raise ex.DataSetTypeError("All data sets must be of type DataSet, not type %r" % type(dset)) + if isinstance(maps_and_regions, Sequence): + # Convert short-hand notation to generic one. + maps_and_regions = {(0, 0): maps_and_regions} + elif not isinstance(maps_and_regions, dict): + raise TypeError(f"maps_and_regions must be dict or Sequence: got {type(maps_and_regions)}") + processed_maps_and_regions = {(i, j): frozenset() for i, _ in enumerate(dsets[0]) for j, _ in enumerate(dsets[1])} + for key, val in maps_and_regions.items(): + i, j = key # block indices: (0, 0) if not mixed + if i >= len(dsets[0]) or j >= len(dsets[1]): + raise RuntimeError(f"(i, j) must be < {(len(dsets[0]), len(dsets[1]))}: got {(i, j)}") + processed_val = set() + for rmap, cmap, iteration_regions in set(val): + if not isinstance(dsets[0][i], GlobalDataSet) and not isinstance(dsets[1][j], GlobalDataSet): + for m in [rmap, cmap]: + if not isinstance(m, Map): + raise ex.MapTypeError( + "All maps must be of type map, not type %r" % type(m)) + if not isinstance(m, ComposedMap) and len(m.values_with_halo) == 0 and m.iterset.total_size > 0: + raise ex.MapValueError( + "Unpopulated map values when trying to build sparsity.") + if rmap.toset is not dsets[0][i].set or cmap.toset is not dsets[1][j].set: + raise RuntimeError("Map toset must be the same as DataSet set") + if rmap.iterset is not cmap.iterset: + raise RuntimeError("Iterset of both maps in a pair must be the same") + if iteration_regions is None: + iteration_regions = (IterationRegion.ALL, ) + else: + iteration_regions = tuple(sorted(iteration_regions)) + processed_val.update(((rmap, cmap, iteration_regions), )) + if len(processed_val) > 0: + processed_maps_and_regions[key] = frozenset(processed_val) + processed_maps_and_regions = dict(sorted(processed_maps_and_regions.items())) + # Need to return the caching object, a tuple of the processed + # arguments and a dict of kwargs. + if isinstance(dsets[0], GlobalDataSet): + cache = None + elif isinstance(dsets[0].set, MixedSet): + cache = dsets[0].set[0] + else: + cache = dsets[0].set + if nest is None: + nest = conf.configuration["matnest"] + if block_sparse is None: + block_sparse = conf.configuration["block_sparsity"] + kwargs = {"name": name, + "nest": nest, + "block_sparse": block_sparse, + "diagonal_block": diagonal_block} + return (cache,) + (tuple(dsets), processed_maps_and_regions), kwargs + + @classmethod + def _cache_key(cls, dsets, maps_and_regions, name, nest, block_sparse, diagonal_block, *args, **kwargs): + return (dsets, tuple(maps_and_regions.items()), nest, block_sparse) + + def __getitem__(self, idx): + """Return :class:`Sparsity` block with row and column given by ``idx`` + or a given row of blocks.""" + try: + i, j = idx + return self._blocks[i][j] + except TypeError: + return self._blocks[idx] + + @utils.cached_property + def dsets(self): + r"""A pair of :class:`DataSet`\s for the left and right function + spaces this :class:`Sparsity` maps between.""" + return self._dsets + + @utils.cached_property + def rcmaps(self): + return {key: [(_rmap, _cmap) for _rmap, _cmap, _ in val] for key, val in self._maps_and_regions.items()} + + @utils.cached_property + def iteration_regions(self): + return {key: [_iteration_regions for _, _, _iteration_regions in val] for key, val in self._maps_and_regions.items()} + + @utils.cached_property + def dims(self): + """A tuple of tuples where the ``i,j``th entry + is a pair giving the number of rows per entry of the row + :class:`Set` and the number of columns per entry of the column + :class:`Set` of the ``Sparsity``. The extents of the first + two indices are given by the :attr:`shape` of the sparsity. + """ + return self._dims + + @utils.cached_property + def shape(self): + """Number of block rows and columns.""" + return (len(self._dsets[0] or [1]), + len(self._dsets[1] or [1])) + + @utils.cached_property + def nested(self): + r"""Whether a sparsity is monolithic (even if it has a block structure). + + To elaborate, if a sparsity maps between + :class:`MixedDataSet`\s, it can either be nested, in which + case it consists of as many blocks are the product of the + length of the datasets it maps between, or monolithic. In the + latter case the sparsity is for the full map between the mixed + datasets, rather than between the blocks of the non-mixed + datasets underneath them. + """ + return self._nested + + @utils.cached_property + def name(self): + """A user-defined label.""" + return self._name + + def __iter__(self): + r"""Iterate over all :class:`Sparsity`\s by row and then by column.""" + for row in self._blocks: + for s in row: + yield s + + def __str__(self): + return "OP2 Sparsity: dsets %s, maps_and_regions %s, name %s, nested %s, block_sparse %s, diagonal_block %s" % \ + (self._dsets, self._maps_and_regions, self._name, self._nested, self._block_sparse, self._diagonal_block) + + def __repr__(self): + return "Sparsity(%r, %r, name=%r, nested=%r, block_sparse=%r, diagonal_block=%r)" % (self.dsets, self._maps_and_regions, self.name, self._nested, self._block_sparse, self._diagonal_block) + + @utils.cached_property + def nnz(self): + """Array containing the number of non-zeroes in the various rows of the + diagonal portion of the local submatrix. + + This is the same as the parameter `d_nnz` used for preallocation in + PETSc's MatMPIAIJSetPreallocation_.""" + return self._d_nnz + + @utils.cached_property + def onnz(self): + """Array containing the number of non-zeroes in the various rows of the + off-diagonal portion of the local submatrix. + + This is the same as the parameter `o_nnz` used for preallocation in + PETSc's MatMPIAIJSetPreallocation_.""" + return self._o_nnz + + @utils.cached_property + def nz(self): + return self._d_nnz.sum() + + @utils.cached_property + def onz(self): + return self._o_nnz.sum() + + def __contains__(self, other): + """Return true if other is a pair of maps in self.maps(). This + will also return true if the elements of other have parents in + self.maps().""" + for i, rm in enumerate(other[0]): + for j, cm in enumerate(other[1]): + for maps in self.rcmaps[(i, j)]: + if (rm, cm) <= maps: + break + else: + return False + return True + + +class SparsityBlock(Sparsity): + """A proxy class for a block in a monolithic :class:`.Sparsity`. + + :arg parent: The parent monolithic sparsity. + :arg i: The block row. + :arg j: The block column. + + .. warning:: + + This class only implements the properties necessary to infer + its shape. It does not provide arrays of non zero fill.""" + def __init__(self, parent, i, j): + # Protect against re-initialization when retrieved from cache + if self._initialized: + return + + self._dsets = (parent.dsets[0][i], parent.dsets[1][j]) + self._maps_and_regions = {(0, 0): tuple(parent._maps_and_regions[(i, j)]) if (i, j) in parent._maps_and_regions else ()} + self._has_diagonal = i == j and parent._has_diagonal + self._parent = parent + self._dims = tuple([tuple([parent.dims[i][j]])]) + self._blocks = [[self]] + self.lcomm = mpi.internal_comm(self.dsets[0].comm, self) + self.rcomm = mpi.internal_comm(self.dsets[1].comm, self) + # TODO: think about lcomm != rcomm + self.comm = mpi.internal_comm(self.lcomm, self) + self._initialized = True + + @classmethod + def _process_args(cls, *args, **kwargs): + return (None, ) + args, kwargs + + @classmethod + def _cache_key(cls, *args, **kwargs): + return None + + def __repr__(self): + return "SparsityBlock(%r, %r, %r)" % (self._parent, self._i, self._j) + + +def masked_lgmap(lgmap, mask, block=True): + if block: + indices = lgmap.block_indices.copy() + bsize = lgmap.getBlockSize() + else: + indices = lgmap.indices.copy() + bsize = 1 + indices[mask] = -1 + return PETSc.LGMap().create(indices=indices, bsize=bsize, comm=lgmap.comm) + + +class AbstractMat(DataCarrier, abc.ABC): + r"""OP2 matrix data. A ``Mat`` is defined on a sparsity pattern and holds a value + for each element in the :class:`Sparsity`. + + When a ``Mat`` is passed to :func:`pyop2.op2.par_loop`, the maps via which + indirection occurs for the row and column space, and the access + descriptor are passed by `calling` the ``Mat``. For instance, if a + ``Mat`` named ``A`` is to be accessed for reading via a row :class:`Map` + named ``R`` and a column :class:`Map` named ``C``, this is accomplished by:: + + A(pyop2.READ, (R[pyop2.i[0]], C[pyop2.i[1]])) + + Notice that it is `always` necessary to index the indirection maps + for a ``Mat``. See the :class:`Mat` documentation for more + details. + + .. note :: + + After executing :func:`par_loop`\s that write to a ``Mat`` and + before using it (for example to view its values), you must call + :meth:`assemble` to finalise the writes. + """ + + ASSEMBLED = "ASSEMBLED" + INSERT_VALUES = "INSERT_VALUES" + ADD_VALUES = "ADD_VALUES" + + _modes = [Access.WRITE, Access.INC] + + @utils.validate_type(('sparsity', Sparsity, ex.SparsityTypeError), + ('name', str, ex.NameTypeError)) + def __init__(self, sparsity, dtype=None, name=None): + self._sparsity = sparsity + self.lcomm = mpi.internal_comm(sparsity.lcomm, self) + self.rcomm = mpi.internal_comm(sparsity.rcomm, self) + self.comm = mpi.internal_comm(sparsity.comm, self) + dtype = dtype or dtypes.ScalarType + self._datatype = np.dtype(dtype) + self._name = name or "mat_#x%x" % id(self) + self.assembly_state = Mat.ASSEMBLED + + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, path, lgmaps=None, unroll_map=False): + from pyop2.parloop import MatLegacyArg, MixedMatLegacyArg + + path_maps = utils.as_tuple(path, Map, 2) + if conf.configuration["type_check"] and tuple(path_maps) not in self.sparsity: + raise ex.MapValueError("Path maps not in sparsity maps") + + if self.is_mixed: + return MixedMatLegacyArg(self, path, access, lgmaps, unroll_map) + else: + return MatLegacyArg(self, path, access, lgmaps, unroll_map) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dtype, self.dims) + + def assemble(self): + """Finalise this :class:`Mat` ready for use. + + Call this /after/ executing all the par_loops that write to + the matrix before you want to look at it. + """ + raise NotImplementedError("Subclass should implement this") + + def addto_values(self, rows, cols, values): + """Add a block of values to the :class:`Mat`.""" + raise NotImplementedError( + "Abstract Mat base class doesn't know how to set values.") + + def set_values(self, rows, cols, values): + """Set a block of values in the :class:`Mat`.""" + raise NotImplementedError( + "Abstract Mat base class doesn't know how to set values.") + + @utils.cached_property + def nblocks(self): + return int(np.prod(self.sparsity.shape)) + + @utils.cached_property + def _argtypes_(self): + """Ctypes argtype for this :class:`Mat`""" + return tuple(ctypes.c_voidp for _ in self) + + @utils.cached_property + def is_mixed(self): + return self.sparsity.shape > (1, 1) + + @utils.cached_property + def dims(self): + """A pair of integers giving the number of matrix rows and columns for + each member of the row :class:`Set` and column :class:`Set` + respectively. This corresponds to the ``cdim`` member of a + :class:`DataSet`.""" + return self._sparsity._dims + + @utils.cached_property + def nrows(self): + "The number of rows in the matrix (local to this process)" + return self.sparsity.dsets[0].layout_vec.local_size + + @utils.cached_property + def nblock_rows(self): + """The number "block" rows in the matrix (local to this process). + + This is equivalent to the number of rows in the matrix divided + by the dimension of the row :class:`DataSet`. + """ + assert len(self.sparsity.dsets[0]) == 1, "Block rows don't make sense for mixed Mats" + layout_vec = self.sparsity.dsets[0].layout_vec + return layout_vec.local_size // layout_vec.block_size + + @utils.cached_property + def nblock_cols(self): + """The number of "block" columns in the matrix (local to this process). + + This is equivalent to the number of columns in the matrix + divided by the dimension of the column :class:`DataSet`. + """ + assert len(self.sparsity.dsets[1]) == 1, "Block cols don't make sense for mixed Mats" + layout_vec = self.sparsity.dsets[1].layout_vec + return layout_vec.local_size // layout_vec.block_size + + @utils.cached_property + def ncols(self): + "The number of columns in the matrix (local to this process)" + return self.sparsity.dsets[1].layout_vec.local_size + + @utils.cached_property + def sparsity(self): + """:class:`Sparsity` on which the ``Mat`` is defined.""" + return self._sparsity + + @utils.cached_property + def _is_scalar_field(self): + # Sparsity from Dat to MixedDat has a shape like (1, (1, 1)) + # (which you can't take the product of) + return all(np.prod(d) == 1 for d in self.dims) + + @utils.cached_property + def _is_vector_field(self): + return not self._is_scalar_field + + def change_assembly_state(self, new_state): + """Switch the matrix assembly state.""" + if new_state == Mat.ASSEMBLED or self.assembly_state == Mat.ASSEMBLED: + self.assembly_state = new_state + elif new_state != self.assembly_state: + self._flush_assembly() + self.assembly_state = new_state + else: + pass + + def _flush_assembly(self): + """Flush the in flight assembly operations (used when + switching between inserting and adding values).""" + pass + + @property + def values(self): + """A numpy array of matrix values. + + .. warning :: + This is a dense array, so will need a lot of memory. It's + probably not a good idea to access this property if your + matrix has more than around 10000 degrees of freedom. + """ + raise NotImplementedError("Abstract base Mat does not implement values()") + + @utils.cached_property + def dtype(self): + """The Python type of the data.""" + return self._datatype + + @utils.cached_property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`Mat` in bytes. This will be the correct size of the + data payload, but does not take into account the (presumably + small) overhead of the object and its metadata. The memory + associated with the sparsity pattern is also not recorded. + + Note that this is the process local memory usage, not the sum + over all MPI processes. + """ + if self._sparsity._block_sparse: + mult = np.sum(np.prod(self._sparsity.dims)) + else: + mult = 1 + return (self._sparsity.nz + self._sparsity.onz) \ + * self.dtype.itemsize * mult + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __mul__(self, other): + """Multiply this :class:`Mat` with the vector ``other``.""" + raise NotImplementedError("Abstract base Mat does not implement multiplication") + + def __str__(self): + return "OP2 Mat: %s, sparsity (%s), datatype %s" \ + % (self._name, self._sparsity, self._datatype.name) + + def __repr__(self): + return "Mat(%r, %r, %r)" \ + % (self._sparsity, self._datatype, self._name) + + +class Mat(AbstractMat): + """OP2 matrix data. A Mat is defined on a sparsity pattern and holds a value + for each element in the :class:`Sparsity`.""" + + def __init__(self, *args, **kwargs): + self.mat_type = kwargs.pop("mat_type", None) + super().__init__(*args, **kwargs) + self._init() + self.assembly_state = Mat.ASSEMBLED + + # Firedrake relies on this to distinguish between MatBlock and not for boundary conditions + local_to_global_maps = (None, None) + + @utils.cached_property + def _kernel_args_(self): + return tuple(a.handle.handle for a in self) + + @mpi.collective + def _init(self): + if not self.dtype == PETSc.ScalarType: + raise RuntimeError("Can only create a matrix of type %s, %s is not supported" + % (PETSc.ScalarType, self.dtype)) + if self.mat_type == "dense": + self._init_dense() + # If the Sparsity is defined on MixedDataSets, we need to build a MatNest + elif self.sparsity.shape > (1, 1): + if self.sparsity.nested: + self._init_nest() + self._nested = True + else: + self._init_monolithic() + else: + self._init_block() + + def _init_dense(self): + mat = PETSc.Mat() + rset, cset = self.sparsity.dsets + rlgmap = rset.unblocked_lgmap + clgmap = cset.unblocked_lgmap + mat.createDense(size=((self.nrows, None), (self.ncols, None)), + bsize=1, + comm=self.comm) + mat.setLGMap(rmap=rlgmap, cmap=clgmap) + self.handle = mat + self._blocks = [] + rows, cols = self.sparsity.shape + for i in range(rows): + row = [] + for j in range(cols): + row.append(MatBlock(self, i, j)) + self._blocks.append(row) + mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False) + mat.setOption(mat.Option.SUBSET_OFF_PROC_ENTRIES, True) + mat.setUp() + # Put zeros in all the places we might eventually put a value. + with profiling.timed_region("MatZeroInitial"): + mat.zeroEntries() + mat.assemble() + + def _init_monolithic(self): + mat = PETSc.Mat() + rset, cset = self.sparsity.dsets + rlgmap = rset.unblocked_lgmap + clgmap = cset.unblocked_lgmap + mat.createAIJ(size=((self.nrows, None), (self.ncols, None)), + nnz=(self.sparsity.nnz, self.sparsity.onnz), + bsize=1, + comm=self.comm) + mat.setLGMap(rmap=rlgmap, cmap=clgmap) + self.handle = mat + self._blocks = [] + rows, cols = self.sparsity.shape + for i in range(rows): + row = [] + for j in range(cols): + row.append(MatBlock(self, i, j)) + self._blocks.append(row) + mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, False) + mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True) + # We completely fill the allocated matrix when zeroing the + # entries, so raise an error if we "missed" one. + mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) + mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False) + mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + # The first assembly (filling with zeros) sets all possible entries. + mat.setOption(mat.Option.SUBSET_OFF_PROC_ENTRIES, True) + # Put zeros in all the places we might eventually put a value. + with profiling.timed_region("MatZeroInitial"): + for i in range(rows): + for j in range(cols): + sparsity.fill_with_zeros(self[i, j].handle, + self[i, j].sparsity.dims[0][0], + self[i, j].sparsity.rcmaps[(0, 0)], + self[i, j].sparsity.iteration_regions[(0, 0)], + set_diag=self[i, j].sparsity._has_diagonal) + self[i, j].handle.assemble() + + mat.assemble() + mat.setOption(mat.Option.NEW_NONZERO_LOCATION_ERR, True) + mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, True) + + def _init_nest(self): + mat = PETSc.Mat() + self._blocks = [] + rows, cols = self.sparsity.shape + rset, cset = self.sparsity.dsets + for i in range(rows): + row = [] + for j in range(cols): + row.append(Mat(self.sparsity[i, j], self.dtype, + '_'.join([self.name, str(i), str(j)]))) + self._blocks.append(row) + # PETSc Mat.createNest wants a flattened list of Mats + mat.createNest([[m.handle for m in row_] for row_ in self._blocks], + isrows=rset.field_ises, iscols=cset.field_ises, + comm=self.comm) + self.handle = mat + + def _init_block(self): + self._blocks = [[self]] + + rset, cset = self.sparsity.dsets + if (isinstance(rset, GlobalDataSet) or isinstance(cset, GlobalDataSet)): + self._init_global_block() + return + + mat = PETSc.Mat() + row_lg = rset.lgmap + col_lg = cset.lgmap + rdim, cdim = self.dims[0][0] + + if rdim == cdim and rdim > 1 and self.sparsity._block_sparse: + # Size is total number of rows and columns, but the + # /sparsity/ is the block sparsity. + block_sparse = True + create = mat.createBAIJ + else: + # Size is total number of rows and columns, sparsity is + # the /dof/ sparsity. + block_sparse = False + create = mat.createAIJ + create(size=((self.nrows, None), + (self.ncols, None)), + nnz=(self.sparsity.nnz, self.sparsity.onnz), + bsize=(rdim, cdim), + comm=self.comm) + mat.setLGMap(rmap=row_lg, cmap=col_lg) + # Stash entries destined for other processors + mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False) + # Any add or insertion that would generate a new entry that has not + # been preallocated will raise an error + mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + # Do not ignore zeros while we fill the initial matrix so that + # petsc doesn't compress things out. + if not block_sparse: + mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, False) + # When zeroing rows (e.g. for enforcing Dirichlet bcs), keep those in + # the nonzero structure of the matrix. Otherwise PETSc would compact + # the sparsity and render our sparsity caching useless. + mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True) + # We completely fill the allocated matrix when zeroing the + # entries, so raise an error if we "missed" one. + mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) + # Put zeros in all the places we might eventually put a value. + with profiling.timed_region("MatZeroInitial"): + sparsity.fill_with_zeros(mat, self.sparsity.dims[0][0], + self.sparsity.rcmaps[(0, 0)], + self.sparsity.iteration_regions[(0, 0)], + set_diag=self.sparsity._has_diagonal) + mat.assemble() + mat.setOption(mat.Option.NEW_NONZERO_LOCATION_ERR, True) + # Now we've filled up our matrix, so the sparsity is + # "complete", we can ignore subsequent zero entries. + if not block_sparse: + mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, True) + self.handle = mat + + def _init_global_block(self): + """Initialise this block in the case where the matrix maps either + to or from a :class:`Global`""" + + if (isinstance(self.sparsity._dsets[0], GlobalDataSet) and isinstance(self.sparsity._dsets[1], GlobalDataSet)): + # In this case both row and column are a Global. + mat = _GlobalMat(comm=self.comm) + else: + mat = _DatMat(self.sparsity) + self.handle = mat + + def __call__(self, access, path, lgmaps=None, unroll_map=False): + """Override the parent __call__ method in order to special-case global + blocks in matrices.""" + from pyop2.parloop import GlobalLegacyArg, DatLegacyArg + + if path == (None, None): + lgmaps, = lgmaps + assert all(l is None for l in lgmaps) + return GlobalLegacyArg(self.handle.getPythonContext().global_, access) + elif None in path: + thispath = path[0] or path[1] + return DatLegacyArg(self.handle.getPythonContext().dat, thispath, access) + else: + return super().__call__(access, path, lgmaps, unroll_map) + + def __getitem__(self, idx): + """Return :class:`Mat` block with row and column given by ``idx`` + or a given row of blocks.""" + try: + i, j = idx + return self.blocks[i][j] + except TypeError: + return self.blocks[idx] + + def __iter__(self): + """Iterate over all :class:`Mat` blocks by row and then by column.""" + yield from itertools.chain(*self.blocks) + + @property + def dat_version(self): + if self.assembly_state != Mat.ASSEMBLED: + raise RuntimeError("Should not ask for state counter if the matrix is not assembled.") + return self.handle.stateGet() + + @mpi.collective + def zero(self): + """Zero the matrix.""" + self.assemble() + self.handle.zeroEntries() + + @mpi.collective + def zero_rows(self, rows, diag_val=1.0): + """Zeroes the specified rows of the matrix, with the exception of the + diagonal entry, which is set to diag_val. May be used for applying + strong boundary conditions. + + :param rows: a :class:`Subset` or an iterable""" + self.assemble() + rows = rows.indices if isinstance(rows, Subset) else rows + self.handle.zeroRowsLocal(rows, diag_val) + + def _flush_assembly(self): + self.handle.assemble(assembly=PETSc.Mat.AssemblyType.FLUSH) + + @mpi.collective + def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): + """Set the diagonal entry in ``rows`` to a particular value. + + :param rows: a :class:`Subset` or an iterable. + :param diag_val: the value to add + + The indices in ``rows`` should index the process-local rows of + the matrix (no mapping to global indexes is applied). + """ + rows = np.asarray(rows, dtype=dtypes.IntType) + rbs, _ = self.dims[0][0] + if rbs > 1: + if idx is not None: + rows = rbs * rows + idx + else: + rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten() + rows = rows.reshape(-1, 1) + self.change_assembly_state(Mat.INSERT_VALUES) + if len(rows) > 0: + values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType) + self.handle.setValuesLocalRCV(rows, rows, values, + addv=PETSc.InsertMode.INSERT_VALUES) + + @mpi.collective + def assemble(self): + # If the matrix is nested, we need to check each subblock to + # see if it needs assembling. But if it's monolithic then the + # subblock assembly doesn't do anything, so we don't do that. + if self.sparsity.nested: + self.handle.assemble() + for m in self: + if m.assembly_state != Mat.ASSEMBLED: + m.change_assembly_state(Mat.ASSEMBLED) + else: + # Instead, we assemble the full monolithic matrix. + self.handle.assemble() + for m in self: + m.handle.assemble() + self.change_assembly_state(Mat.ASSEMBLED) + + def addto_values(self, rows, cols, values): + """Add a block of values to the :class:`Mat`.""" + self.change_assembly_state(Mat.ADD_VALUES) + if len(values) > 0: + self.handle.setValuesBlockedLocal(rows, cols, values, + addv=PETSc.InsertMode.ADD_VALUES) + + def set_values(self, rows, cols, values): + """Set a block of values in the :class:`Mat`.""" + self.change_assembly_state(Mat.INSERT_VALUES) + if len(values) > 0: + self.handle.setValuesBlockedLocal(rows, cols, values, + addv=PETSc.InsertMode.INSERT_VALUES) + + @utils.cached_property + def blocks(self): + """2-dimensional array of matrix blocks.""" + return self._blocks + + @property + def values(self): + self.assemble() + if self.nrows * self.ncols > 1000000: + raise ValueError("Printing dense matrix with more than 1 million entries not allowed.\n" + "Are you sure you wanted to do this?") + if (isinstance(self.sparsity._dsets[0], GlobalDataSet) or isinstance(self.sparsity._dsets[1], GlobalDataSet)): + return self.handle.getPythonContext()[:, :] + else: + return self.handle[:, :] + + +class MatBlock(AbstractMat): + """A proxy class for a local block in a monolithic :class:`.Mat`. + + :arg parent: The parent monolithic matrix. + :arg i: The block row. + :arg j: The block column. + """ + def __init__(self, parent, i, j): + self._parent = parent + self._i = i + self._j = j + self._sparsity = SparsityBlock(parent.sparsity, i, j) + rset, cset = self._parent.sparsity.dsets + rowis = rset.local_ises[i] + colis = cset.local_ises[j] + self.handle = parent.handle.getLocalSubMatrix(isrow=rowis, + iscol=colis) + self.comm = mpi.internal_comm(parent.comm, self) + self.local_to_global_maps = self.handle.getLGMap() + + @property + def dat_version(self): + return self.handle.stateGet() + + @utils.cached_property + def _kernel_args_(self): + return (self.handle.handle, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self._parent), self._parent.dtype, self.dims) + + @property + def assembly_state(self): + # Track our assembly state only + return self._parent.assembly_state + + @assembly_state.setter + def assembly_state(self, value): + self._parent.assembly_state = value + + def __getitem__(self, idx): + return self + + def __iter__(self): + yield self + + def _flush_assembly(self): + # Need to flush for all blocks + for b in self._parent: + b.handle.assemble(assembly=PETSc.Mat.AssemblyType.FLUSH) + self._parent._flush_assembly() + + def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): + rows = np.asarray(rows, dtype=dtypes.IntType) + rbs, _ = self.dims[0][0] + if rbs > 1: + if idx is not None: + rows = rbs * rows + idx + else: + rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten() + rows = rows.reshape(-1, 1) + self.change_assembly_state(Mat.INSERT_VALUES) + if len(rows) > 0: + values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType) + self.handle.setValuesLocalRCV(rows, rows, values, + addv=PETSc.InsertMode.INSERT_VALUES) + + def addto_values(self, rows, cols, values): + """Add a block of values to the :class:`Mat`.""" + self.change_assembly_state(Mat.ADD_VALUES) + if len(values) > 0: + self.handle.setValuesBlockedLocal(rows, cols, values, + addv=PETSc.InsertMode.ADD_VALUES) + + def set_values(self, rows, cols, values): + """Set a block of values in the :class:`Mat`.""" + self.change_assembly_state(Mat.INSERT_VALUES) + if len(values) > 0: + self.handle.setValuesBlockedLocal(rows, cols, values, + addv=PETSc.InsertMode.INSERT_VALUES) + + def assemble(self): + raise RuntimeError("Should never call assemble on MatBlock") + + @property + def values(self): + rset, cset = self._parent.sparsity.dsets + rowis = rset.field_ises[self._i] + colis = cset.field_ises[self._j] + self._parent.assemble() + mat = self._parent.handle.createSubMatrix(isrow=rowis, + iscol=colis) + return mat[:, :] + + @property + def dtype(self): + return self._parent.dtype + + @property + def nbytes(self): + return self._parent.nbytes // (np.prod(self.sparsity.shape)) + + def __repr__(self): + return "MatBlock(%r, %r, %r)" % (self._parent, self._i, self._j) + + def __str__(self): + return "Block[%s, %s] of %s" % (self._i, self._j, self._parent) + + +def _DatMat(sparsity, dat=None): + """A :class:`PETSc.Mat` with global size nx1 or nx1 implemented as a + :class:`.Dat`""" + if isinstance(sparsity.dsets[0], GlobalDataSet): + dset = sparsity.dsets[1] + sizes = ((None, 1), (dset.size*dset.cdim, None)) + elif isinstance(sparsity.dsets[1], GlobalDataSet): + dset = sparsity.dsets[0] + sizes = ((dset.size * dset.cdim, None), (None, 1)) + else: + raise ValueError("Not a DatMat") + + A = PETSc.Mat().createPython(sizes, comm=sparsity.comm) + A.setPythonContext(_DatMatPayload(sparsity, dat)) + A.setUp() + return A + + +class _DatMatPayload: + + def __init__(self, sparsity, dat=None, dset=None): + from pyop2.types.dat import Dat + + if isinstance(sparsity.dsets[0], GlobalDataSet): + self.dset = sparsity.dsets[1] + self.sizes = ((None, 1), (self.dset.size * self.dset.cdim, None)) + elif isinstance(sparsity.dsets[1], GlobalDataSet): + self.dset = sparsity.dsets[0] + self.sizes = ((self.dset.size * self.dset.cdim, None), (None, 1)) + else: + raise ValueError("Not a DatMat") + + self.sparsity = sparsity + self.dat = dat or Dat(self.dset, dtype=PETSc.ScalarType) + self.dset = dset + + def __getitem__(self, key): + shape = [s[0] or 1 for s in self.sizes] + return self.dat.data_ro.reshape(*shape)[key] + + def zeroEntries(self, mat): + self.dat.data[...] = 0.0 + + def mult(self, mat, x, y): + '''Y = mat x''' + with self.dat.vec_ro as v: + if self.sizes[0][0] is None: + # Row matrix + out = v.dot(x) + if y.comm.rank == 0: + y.array[0] = out + else: + y.array[...] + else: + # Column matrix + if x.sizes[1] == 1: + v.copy(y) + a = np.zeros(1, dtype=dtypes.ScalarType) + if x.comm.rank == 0: + a[0] = x.array_r + else: + x.array_r + with mpi.temp_internal_comm(x.comm) as comm: + comm.bcast(a) + return y.scale(a) + else: + return v.pointwiseMult(x, y) + + def multTranspose(self, mat, x, y): + with self.dat.vec_ro as v: + if self.sizes[0][0] is None: + # Row matrix + if x.sizes[1] == 1: + v.copy(y) + a = np.zeros(1, dtype=dtypes.ScalarType) + if x.comm.rank == 0: + a[0] = x.array_r + else: + x.array_r + with mpi.temp_internal_comm(x.comm) as comm: + comm.bcast(a) + y.scale(a) + else: + v.pointwiseMult(x, y) + else: + # Column matrix + out = v.dot(x) + if y.comm.rank == 0: + y.array[0] = out + else: + y.array[...] + + def multTransposeAdd(self, mat, x, y, z): + ''' z = y + mat^Tx ''' + with self.dat.vec_ro as v: + if self.sizes[0][0] is None: + # Row matrix + if x.sizes[1] == 1: + v.copy(z) + a = np.zeros(1, dtype=dtypes.ScalarType) + if x.comm.rank == 0: + a[0] = x.array_r + else: + x.array_r + with mpi.temp_internal_comm(x.comm) as comm: + comm.bcast(a) + if y == z: + # Last two arguments are aliased. + tmp = y.duplicate() + y.copy(tmp) + y = tmp + z.scale(a) + z.axpy(1, y) + else: + if y == z: + # Last two arguments are aliased. + tmp = y.duplicate() + y.copy(tmp) + y = tmp + v.pointwiseMult(x, z) + return z.axpy(1, y) + else: + # Column matrix + out = v.dot(x) + y = y.array_r + if z.comm.rank == 0: + z.array[0] = out + y[0] + else: + z.array[...] + + def duplicate(self, mat, copy=True): + if copy: + return _DatMat(self.sparsity, self.dat.duplicate()) + else: + return _DatMat(self.sparsity) + + +def _GlobalMat(global_=None, comm=None): + """A :class:`PETSc.Mat` with global size 1x1 implemented as a + :class:`.Global`""" + A = PETSc.Mat().createPython(((None, 1), (None, 1)), comm=comm) + A.setPythonContext(_GlobalMatPayload(global_, comm)) + A.setUp() + return A + + +class _GlobalMatPayload: + + def __init__(self, global_=None, comm=None): + from pyop2.types.glob import Global + self.global_ = global_ or Global(1, dtype=PETSc.ScalarType, comm=comm) + + def __getitem__(self, key): + return self.global_.data_ro.reshape(1, 1)[key] + + def zeroEntries(self, mat): + self.global_.data[...] = 0.0 + + def getDiagonal(self, mat, result=None): + if result is None: + result = self.global_.dataset.layout_vec.duplicate() + if result.comm.rank == 0: + result.array[...] = self.global_.data_ro + else: + result.array[...] + return result + + def mult(self, mat, x, result): + if result.comm.rank == 0: + result.array[...] = self.global_.data_ro * x.array_r + else: + result.array[...] + + def multTransposeAdd(self, mat, x, y, z): + if z.comm.rank == 0: + ax = self.global_.data_ro * x.array_r + if y == z: + z.array[...] += ax + else: + z.array[...] = ax + y.array_r + else: + x.array_r + y.array_r + z.array[...] + + def duplicate(self, mat, copy=True): + if copy: + return _GlobalMat(self.global_.duplicate(), comm=mat.comm) + else: + return _GlobalMat(comm=mat.comm) diff --git a/pyop2/types/set.py b/pyop2/types/set.py new file mode 100644 index 0000000000..f10c934048 --- /dev/null +++ b/pyop2/types/set.py @@ -0,0 +1,662 @@ +import ctypes +import numbers + +import numpy as np +import pytools + +from pyop2 import ( + caching, + datatypes as dtypes, + exceptions as ex, + mpi, + utils +) + + +class Set: + + """OP2 set. + + :param size: The size of the set. + :type size: integer or list of four integers. + :param string name: The name of the set (optional). + :param halo: An exisiting halo to use (optional). + + When the set is employed as an iteration space in a + :func:`pyop2.op2.par_loop`, the extent of any local iteration space within + each set entry is indicated in brackets. See the example in + :func:`pyop2.op2.par_loop` for more details. + + The size of the set can either be an integer, or a list of four + integers. The latter case is used for running in parallel where + we distinguish between: + + - `CORE` (owned and not touching halo) + - `OWNED` (owned, touching halo) + - `EXECUTE HALO` (not owned, but executed over redundantly) + - `NON EXECUTE HALO` (not owned, read when executing in the execute halo) + + If a single integer is passed, we assume that we're running in + serial and there is no distinction. + + The division of set elements is: :: + + [0, CORE) + [CORE, OWNED) + [OWNED, GHOST) + + Halo send/receive data is stored on sets in a :class:`Halo`. + """ + + _CORE_SIZE = 0 + _OWNED_SIZE = 1 + _GHOST_SIZE = 2 + + _extruded = False + _extruded_periodic = False + + _kernel_args_ = () + _argtypes_ = () + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), ) + + @utils.validate_type(('size', (numbers.Integral, tuple, list, np.ndarray), ex.SizeTypeError), + ('name', str, ex.NameTypeError)) + def __init__(self, size, name=None, halo=None, comm=None, constrained_size=0): + self.comm = mpi.internal_comm(comm, self) + if isinstance(size, numbers.Integral): + size = [size] * 3 + size = utils.as_tuple(size, numbers.Integral, 3) + assert size[Set._CORE_SIZE] <= size[Set._OWNED_SIZE] <= \ + size[Set._GHOST_SIZE], "Set received invalid sizes: %s" % size + self._sizes = size + self._name = name or "set_#x%x" % id(self) + self._halo = halo + self._partition_size = 1024 + self._constrained_size = constrained_size + + # A cache of objects built on top of this set + self._cache = {} + + @property + def indices(self): + """Returns iterator.""" + return range(self.total_size) + + @utils.cached_property + def core_size(self): + """Core set size. Owned elements not touching halo elements.""" + return self._sizes[Set._CORE_SIZE] + + @utils.cached_property + def constrained_size(self): + return self._constrained_size + + @utils.cached_property + def size(self): + """Set size, owned elements.""" + return self._sizes[Set._OWNED_SIZE] + + @utils.cached_property + def total_size(self): + """Set size including ghost elements. + """ + return self._sizes[Set._GHOST_SIZE] + + @utils.cached_property + def sizes(self): + """Set sizes: core, owned, execute halo, total.""" + return self._sizes + + @utils.cached_property + def core_part(self): + return SetPartition(self, 0, self.core_size) + + @utils.cached_property + def owned_part(self): + return SetPartition(self, self.core_size, self.size - self.core_size) + + @utils.cached_property + def name(self): + """User-defined label""" + return self._name + + @utils.cached_property + def halo(self): + """:class:`Halo` associated with this Set""" + return self._halo + + @property + def partition_size(self): + """Default partition size""" + return self._partition_size + + @partition_size.setter + def partition_size(self, partition_value): + """Set the partition size""" + self._partition_size = partition_value + + def __hash__(self): + """Hash on sizes and name""" + return hash((self._sizes, self._name)) + + def __eq__(self, other): + """Two Sets are the same if they have the same sizes and names.""" + return self._sizes == other._sizes and self._name == other._name + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __getitem__(self, idx): + """Allow indexing to return self""" + assert idx == 0 + return self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 Set: %s with size %s" % (self._name, self.size) + + def __repr__(self): + return "Set(%r, %r)" % (self._sizes, self._name) + + def __call__(self, *indices): + """Build a :class:`Subset` from this :class:`Set` + + :arg indices: The elements of this :class:`Set` from which the + :class:`Subset` should be formed. + + """ + if len(indices) == 1: + indices = indices[0] + if np.isscalar(indices): + indices = [indices] + return Subset(self, indices) + + def __contains__(self, dset): + """Indicate whether a given DataSet is compatible with this Set.""" + from pyop2.types import DataSet + if isinstance(dset, DataSet): + return dset.set is self + else: + return False + + def __pow__(self, e): + """Derive a :class:`DataSet` with dimension ``e``""" + from pyop2.types import DataSet + return DataSet(self, dim=e) + + @utils.cached_property + def layers(self): + """Return None (not an :class:`ExtrudedSet`).""" + return None + + def _check_operands(self, other): + if type(other) is Set: + if other is not self: + raise ValueError("Uable to perform set operations between two unrelated sets: %s and %s." % (self, other)) + elif type(other) is Subset: + if self is not other._superset: + raise TypeError("Superset mismatch: self (%s) != other._superset (%s)" % (self, other._superset)) + else: + raise TypeError("Unable to perform set operations between `Set` and %s." % (type(other), )) + + def intersection(self, other): + self._check_operands(other) + return other + + def union(self, other): + self._check_operands(other) + return self + + def difference(self, other): + self._check_operands(other) + if other is self: + return Subset(self, []) + else: + return type(other)(self, np.setdiff1d(np.asarray(range(self.total_size), dtype=dtypes.IntType), other._indices)) + + def symmetric_difference(self, other): + self._check_operands(other) + return self.difference(other) + + +class GlobalSet(Set): + + _extruded = False + _extruded_periodic = False + + """A proxy set allowing a :class:`Global` to be used in place of a + :class:`Dat` where appropriate.""" + + _kernel_args_ = () + _argtypes_ = () + + def __init__(self, comm=None): + self.comm = mpi.internal_comm(comm, self) + self._cache = {} + + @utils.cached_property + def core_size(self): + return 0 + + @utils.cached_property + def size(self): + return 1 if self.comm.rank == 0 else 0 + + @utils.cached_property + def total_size(self): + """Total set size, including halo elements.""" + return 1 if self.comm.rank == 0 else 0 + + @utils.cached_property + def sizes(self): + """Set sizes: core, owned, execute halo, total.""" + return (self.core_size, self.size, self.total_size) + + @utils.cached_property + def name(self): + """User-defined label""" + return "GlobalSet" + + @utils.cached_property + def halo(self): + """:class:`Halo` associated with this Set""" + return None + + @property + def partition_size(self): + """Default partition size""" + return None + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __getitem__(self, idx): + """Allow indexing to return self""" + assert idx == 0 + return self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 GlobalSet" + + def __repr__(self): + return "GlobalSet()" + + def __eq__(self, other): + # Currently all GlobalSets compare equal. + return isinstance(other, GlobalSet) + + def __hash__(self): + # Currently all GlobalSets compare equal. + return hash(type(self)) + + +class ExtrudedSet(Set): + + """OP2 ExtrudedSet. + + :param parent: The parent :class:`Set` to build this :class:`ExtrudedSet` on top of + :type parent: a :class:`Set`. + :param layers: The number of layers in this :class:`ExtrudedSet`. + :type layers: an integer, indicating the number of layers for every entity, + or an array of shape (parent.total_size, 2) giving the start + and one past the stop layer for every entity. An entry + ``a, b = layers[e, ...]`` means that the layers for entity + ``e`` run over :math:`[a, b)`. + + The number of layers indicates the number of time the base set is + extruded in the direction of the :class:`ExtrudedSet`. As a + result, there are ``layers-1`` extruded "cells" in an extruded set. + """ + + @utils.validate_type(('parent', Set, TypeError)) + def __init__(self, parent, layers, extruded_periodic=False): + self._parent = parent + self.comm = mpi.internal_comm(parent.comm, self) + try: + layers = utils.verify_reshape(layers, dtypes.IntType, (parent.total_size, 2)) + self.constant_layers = False + if layers.min(initial=0) < 0: + raise ex.SizeTypeError("Bottom of layers must be >= 0") + if any(layers[:, 1] - layers[:, 0] < 1): + raise ex.SizeTypeError("Number of layers must be >= 0") + except ex.DataValueError: + # Legacy, integer + layers = np.asarray(layers, dtype=dtypes.IntType) + if layers.shape: + raise ex.SizeTypeError(f"Specifying layers per entity, but provided " + f"{layers.shape}, needed ({parent.total_size}, 2)") + if layers < 2: + raise ex.SizeTypeError("Need at least two layers, not %d", layers) + layers = np.asarray([[0, layers]], dtype=dtypes.IntType) + self.constant_layers = True + + self._layers = layers + self._extruded = True + self._extruded_periodic = extruded_periodic + + @utils.cached_property + def _kernel_args_(self): + return (self.layers_array.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return self.parent._wrapper_cache_key_ + (self.constant_layers, ) + + def __getattr__(self, name): + """Returns a :class:`Set` specific attribute.""" + value = getattr(self._parent, name) + return value + + def __contains__(self, set): + return set is self.parent + + def __str__(self): + return "OP2 ExtrudedSet: %s with size %s (%s layers)" % \ + (self._name, self.size, self._layers) + + def __repr__(self): + return "ExtrudedSet(%r, %r)" % (self._parent, self._layers) + + @utils.cached_property + def parent(self): + return self._parent + + @utils.cached_property + def layers(self): + """The layers of this extruded set.""" + if self.constant_layers: + # Backwards compat + return self.layers_array[0, 1] + else: + raise ValueError("No single layer, use layers_array attribute") + + @utils.cached_property + def layers_array(self): + return self._layers + + +class Subset(ExtrudedSet): + + """OP2 subset. + + :param superset: The superset of the subset. + :type superset: a :class:`Set` or a :class:`Subset`. + :param indices: Elements of the superset that form the + subset. Duplicate values are removed when constructing the subset. + :type indices: a list of integers, or a numpy array. + """ + @utils.validate_type(('superset', Set, TypeError), + ('indices', (list, tuple, np.ndarray), TypeError)) + def __init__(self, superset, indices): + self.comm = mpi.internal_comm(superset.comm, self) + + # sort and remove duplicates + indices = np.unique(indices) + if isinstance(superset, Subset): + # Unroll indices to point to those in the parent + indices = superset.indices[indices] + superset = superset.superset + assert type(superset) is Set or type(superset) is ExtrudedSet, \ + 'Subset construction failed, should not happen' + + self._superset = superset + self._indices = utils.verify_reshape(indices, dtypes.IntType, (len(indices),)) + + if len(self._indices) > 0 and (self._indices[0] < 0 or self._indices[-1] >= self._superset.total_size): + raise ex.SubsetIndexOutOfBounds( + 'Out of bounds indices in Subset construction: [%d, %d) not [0, %d)' % + (self._indices[0], self._indices[-1], self._superset.total_size)) + + self._sizes = ((self._indices < superset.core_size).sum(), + (self._indices < superset.size).sum(), + len(self._indices)) + self._extruded = superset._extruded + self._extruded_periodic = superset._extruded_periodic + + @utils.cached_property + def _kernel_args_(self): + return self._superset._kernel_args_ + (self._indices.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return self._superset._argtypes_ + (ctypes.c_voidp, ) + + # Look up any unspecified attributes on the _set. + def __getattr__(self, name): + """Returns a :class:`Set` specific attribute.""" + value = getattr(self._superset, name) + return value + + def __pow__(self, e): + """Derive a :class:`DataSet` with dimension ``e``""" + raise NotImplementedError("Deriving a DataSet from a Subset is unsupported") + + def __str__(self): + return "OP2 Subset: %s with sizes %s" % \ + (self._name, self._sizes) + + def __repr__(self): + return "Subset(%r, %r)" % (self._superset, self._indices) + + def __call__(self, *indices): + """Build a :class:`Subset` from this :class:`Subset` + + :arg indices: The elements of this :class:`Subset` from which the + :class:`Subset` should be formed. + + """ + if len(indices) == 1: + indices = indices[0] + if np.isscalar(indices): + indices = [indices] + return Subset(self, indices) + + @utils.cached_property + def superset(self): + """Returns the superset Set""" + return self._superset + + @utils.cached_property + def indices(self): + """Returns the indices pointing in the superset.""" + return self._indices + + @utils.cached_property + def owned_indices(self): + """Return the indices that correspond to the owned entities of the + superset. + """ + return self.indices[self.indices < self.superset.size] + + @utils.cached_property + def layers_array(self): + if self._superset.constant_layers: + return self._superset.layers_array + else: + return self._superset.layers_array[self.indices, ...] + + def _check_operands(self, other): + if type(other) is Set: + if other is not self._superset: + raise TypeError("Superset mismatch: self._superset (%s) != other (%s)" % (self._superset, other)) + elif type(other) is Subset: + if self._superset is not other._superset: + raise TypeError("Unable to perform set operation between subsets of mismatching supersets (%s != %s)" % (self._superset, other._superset)) + else: + raise TypeError("Unable to perform set operations between `Subset` and %s." % (type(other), )) + + def intersection(self, other): + self._check_operands(other) + if other is self._superset: + return self + else: + return type(self)(self._superset, np.intersect1d(self._indices, other._indices)) + + def union(self, other): + self._check_operands(other) + if other is self._superset: + return other + else: + return type(self)(self._superset, np.union1d(self._indices, other._indices)) + + def difference(self, other): + self._check_operands(other) + if other is self._superset: + return Subset(other, []) + else: + return type(self)(self._superset, np.setdiff1d(self._indices, other._indices)) + + def symmetric_difference(self, other): + self._check_operands(other) + if other is self._superset: + return other.symmetric_difference(self) + else: + return type(self)(self._superset, np.setxor1d(self._indices, other._indices)) + + +class SetPartition: + def __init__(self, set, offset, size): + self.set = set + self.offset = offset + self.size = size + + +class MixedSet(Set, caching.ObjectCached): + r"""A container for a bag of :class:`Set`\s.""" + + def __init__(self, sets): + r""":param iterable sets: Iterable of :class:`Set`\s or :class:`ExtrudedSet`\s""" + if self._initialized: + return + self._sets = sets + assert all(s is None or isinstance(s, GlobalSet) or ((s.layers == self._sets[0].layers).all() if s.layers is not None else True) for s in sets), \ + "All components of a MixedSet must have the same number of layers." + # TODO: do all sets need the same communicator? + self.comm = mpi.internal_comm( + pytools.single_valued(s.comm for s in sets if s is not None), + self + ) + self._initialized = True + + @utils.cached_property + def _kernel_args_(self): + raise NotImplementedError + + @utils.cached_property + def _argtypes_(self): + raise NotImplementedError + + @utils.cached_property + def _wrapper_cache_key_(self): + raise NotImplementedError + + @classmethod + def _process_args(cls, sets, **kwargs): + sets = [s for s in sets] + try: + sets = utils.as_tuple(sets, ExtrudedSet) + except TypeError: + sets = utils.as_tuple(sets, (Set, type(None))) + cache = sets[0] + return (cache, ) + (sets, ), kwargs + + @classmethod + def _cache_key(cls, sets, **kwargs): + return sets + + def __getitem__(self, idx): + """Return :class:`Set` with index ``idx`` or a given slice of sets.""" + return self._sets[idx] + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`Set`\s.""" + return self._sets + + @utils.cached_property + def core_size(self): + """Core set size. Owned elements not touching halo elements.""" + return sum(s.core_size for s in self._sets) + + @utils.cached_property + def constrained_size(self): + """Set size, owned constrained elements.""" + return sum(s.constrained_size for s in self._sets) + + @utils.cached_property + def size(self): + """Set size, owned elements.""" + return sum(0 if s is None else s.size for s in self._sets) + + @utils.cached_property + def total_size(self): + """Total set size, including halo elements.""" + return sum(s.total_size for s in self._sets) + + @utils.cached_property + def sizes(self): + """Set sizes: core, owned, execute halo, total.""" + return (self.core_size, self.size, self.total_size) + + @utils.cached_property + def name(self): + """User-defined labels.""" + return tuple(s.name for s in self._sets) + + @utils.cached_property + def halo(self): + r""":class:`Halo`\s associated with these :class:`Set`\s.""" + halos = tuple(s.halo for s in self._sets) + return halos if any(halos) else None + + @utils.cached_property + def _extruded(self): + return isinstance(self._sets[0], ExtrudedSet) + + @utils.cached_property + def _extruded_periodic(self): + raise NotImplementedError("_extruded_periodic not implemented in MixedSet") + + @utils.cached_property + def layers(self): + """Numbers of layers in the extruded mesh (or None if this MixedSet is not extruded).""" + return self._sets[0].layers + + def __iter__(self): + r"""Yield all :class:`Set`\s when iterated over.""" + for s in self._sets: + yield s + + def __len__(self): + """Return number of contained :class:`Set`s.""" + return len(self._sets) + + def __pow__(self, e): + """Derive a :class:`MixedDataSet` with dimensions ``e``""" + from pyop2.types import MixedDataSet + return MixedDataSet(self._sets, e) + + def __str__(self): + return "OP2 MixedSet composed of Sets: %s" % (self._sets,) + + def __repr__(self): + return "MixedSet(%r)" % (self._sets,) + + def __eq__(self, other): + return type(self) == type(other) and self._sets == other._sets diff --git a/pyop2/utils.py b/pyop2/utils.py new file mode 100644 index 0000000000..2739c75f58 --- /dev/null +++ b/pyop2/utils.py @@ -0,0 +1,327 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Common utility classes/functions.""" + + +import os +import sys +import numpy as np +from decorator import decorator +import argparse +import petsc4py + +from functools import cached_property # noqa: F401 + +from pyop2.exceptions import DataTypeError, DataValueError +from pyop2.configuration import configuration + + +def as_tuple(item, type=None, length=None, allow_none=False): + # Empty list if we get passed None + if item is None: + t = () + else: + # Convert iterable to tuple... + try: + t = tuple(item) + # ... or create a list of a single item + except (TypeError, NotImplementedError): + t = (item,) * (length or 1) + if configuration["type_check"]: + if length and not len(t) == length: + raise ValueError("Tuple needs to be of length %d" % length) + if type is not None: + if allow_none: + valid = all((isinstance(i, type) or i is None) for i in t) + else: + valid = all(isinstance(i, type) for i in t) + if not valid: + raise TypeError("Items need to be of type %s" % type) + return t + + +def as_type(obj, typ): + """Return obj if it is of dtype typ, otherwise return a copy type-cast to + typ.""" + # Assume it's a NumPy data type + try: + return obj if obj.dtype == typ else obj.astype(typ) + except AttributeError: + if isinstance(obj, int): + return np.int64(obj).astype(typ) + elif isinstance(obj, float): + return np.float64(obj).astype(typ) + else: + raise TypeError("Invalid type %s" % type(obj)) + + +def tuplify(xs): + """Turn a data structure into a tuple tree.""" + try: + return tuple(tuplify(x) for x in xs) + except TypeError: + return xs + + +class validate_base: + + """Decorator to validate arguments + + Formal parameters that don't exist in the definition of the function + being decorated as well as actual arguments not being present when + the validation is called are silently ignored.""" + + def __init__(self, *checks): + self._checks = checks + + def __call__(self, f): + def wrapper(f, *args, **kwargs): + if configuration["type_check"]: + self.nargs = f.__code__.co_argcount + self.defaults = f.__defaults__ or () + self.varnames = f.__code__.co_varnames + self.file = f.__code__.co_filename + self.line = f.__code__.co_firstlineno + 1 + self.check_args(args, kwargs) + return f(*args, **kwargs) + return decorator(wrapper, f) + + def check_args(self, args, kwargs): + for argname, argcond, exception in self._checks: + # If the argument argname is not present in the decorated function + # silently ignore it + try: + i = self.varnames.index(argname) + except ValueError: + # No formal parameter argname + continue + # Try the argument by keyword first, and by position second. + # If the argument isn't given, silently ignore it. + try: + arg = kwargs.get(argname) + arg = arg or args[i] + except IndexError: + # No actual parameter argname + continue + # If the argument has a default value, also accept that (since the + # constructor will be able to deal with that) + default_index = i - self.nargs + len(self.defaults) + if default_index >= 0 and arg == self.defaults[default_index]: + continue + self.check_arg(arg, argcond, exception) + + +class validate_type(validate_base): + + """Decorator to validate argument types + + The decorator expects one or more arguments, which are 3-tuples of + (name, type, exception), where name is the argument name in the + function being decorated, type is the argument type to be validated + and exception is the exception type to be raised if validation fails.""" + + def check_arg(self, arg, argtype, exception): + if not isinstance(arg, argtype): + raise exception("%s:%d Parameter %s must be of type %r" + % (self.file, self.line, arg, argtype)) + + +class validate_in(validate_base): + + """Decorator to validate argument is in a set of valid argument values + + The decorator expects one or more arguments, which are 3-tuples of + (name, list, exception), where name is the argument name in the + function being decorated, list is the list of valid argument values + and exception is the exception type to be raised if validation fails.""" + + def check_arg(self, arg, values, exception): + if arg not in values: + raise exception("%s:%d %s must be one of %s" + % (self.file, self.line, arg, values)) + + +class validate_range(validate_base): + + """Decorator to validate argument value is in a given numeric range + + The decorator expects one or more arguments, which are 3-tuples of + (name, range, exception), where name is the argument name in the + function being decorated, range is a 2-tuple defining the valid argument + range and exception is the exception type to be raised if validation + fails.""" + + def check_arg(self, arg, range, exception): + if not range[0] <= arg <= range[1]: + raise exception("%s:%d %s must be within range %s" + % (self.file, self.line, arg, range)) + + +class validate_dtype(validate_base): + + """Decorator to validate argument value is in a valid Numpy dtype + + The decorator expects one or more arguments, which are 3-tuples of + (name, _, exception), where name is the argument name in the + function being decorated, second argument is ignored and exception + is the exception type to be raised if validation fails.""" + + def check_arg(self, arg, ignored, exception): + try: + np.dtype(arg) + except TypeError: + raise exception("%s:%d %s must be a valid dtype" + % (self.file, self.line, arg)) + + +def verify_reshape(data, dtype, shape, allow_none=False): + """Verify data is of type dtype and try to reshaped to shape.""" + + try: + t = np.dtype(dtype) if dtype is not None else None + except TypeError: + raise DataTypeError("Invalid data type: %s" % dtype) + if data is None and allow_none: + return np.asarray([], dtype=t) + elif data is None: + raise DataValueError("Invalid data: None is not allowed!") + else: + try: + a = np.asarray(data, dtype=t) + except ValueError: + raise DataValueError("Invalid data: cannot convert to %s!" % dtype) + except TypeError: + raise DataTypeError("Invalid data type: %s" % dtype) + try: + # Destructively modify shape. Fails if data are not + # contiguous, but that's what we want anyway. + a.shape = shape + return a + except ValueError: + raise DataValueError("Invalid data: expected %d values, got %d!" % + (np.prod(shape), np.asarray(data).size)) + + +def align(bytes, alignment=16): + """Align BYTES to a multiple of ALIGNMENT""" + return ((bytes + alignment - 1) // alignment) * alignment + + +def flatten(iterable): + """Flatten a given nested iterable.""" + return (x for e in iterable for x in e) + + +def parser(description=None, group=False): + """Create default argparse.ArgumentParser parser for pyop2 programs.""" + parser = argparse.ArgumentParser(description=description, + add_help=True, + prefix_chars="-", + formatter_class=argparse.RawDescriptionHelpFormatter) + + g = parser.add_argument_group( + 'pyop2', 'backend configuration options') if group else parser + + g.add_argument('-d', '--debug', default=argparse.SUPPRESS, + type=int, choices=list(range(8)), + help='set debug level' if group else 'set pyop2 debug level') + g.add_argument('-l', '--log-level', default='WARN', + choices=['CRITICAL', 'ERROR', 'WARN', 'INFO', 'DEBUG'], + help='set logging level (default=WARN)' if group else + 'set pyop2 logging level (default=WARN)') + + return parser + + +def parse_args(*args, **kwargs): + """Return parsed arguments as variables for later use. + + ARGS and KWARGS are passed into the parser instantiation. + The only recognised options are `group` and `description`.""" + return vars(parser(*args, **kwargs).parse_args()) + + +def trim(docstring): + """Trim a docstring according to `PEP 257 + `_.""" + if not docstring: + return '' + # Convert tabs to spaces (following the normal Python rules) + # and split into a list of lines: + lines = docstring.expandtabs().splitlines() + # Determine minimum indentation (first line doesn't count): + indent = sys.maxsize + for line in lines[1:]: + stripped = line.lstrip() + if stripped: + indent = min(indent, len(line) - len(stripped)) + # Remove indentation (first line is special): + trimmed = [lines[0].strip()] + if indent < sys.maxsize: + for line in lines[1:]: + trimmed.append(line[indent:].rstrip()) + # Strip off trailing and leading blank lines: + while trimmed and not trimmed[-1]: + trimmed.pop() + while trimmed and not trimmed[0]: + trimmed.pop(0) + # Return a single string: + return '\n'.join(trimmed) + + +def strip(code): + return '\n'.join([l for l in code.splitlines() if l.strip() and l.strip() != ';']) + + +def get_petsc_dir(): + """Attempts to find the PETSc directory on the system + """ + petsc_config = petsc4py.get_config() + petsc_dir = petsc_config["PETSC_DIR"] + petsc_arch = petsc_config["PETSC_ARCH"] + pathlist = [petsc_dir] + if petsc_arch: + pathlist.append(os.path.join(petsc_dir, petsc_arch)) + return tuple(pathlist) + + +def get_petsc_variables(): + """Attempts obtain a dictionary of PETSc configuration settings + """ + path = [get_petsc_dir()[-1], "lib/petsc/conf/petscvariables"] + variables_path = os.path.join(*path) + with open(variables_path) as fh: + # Split lines on first '=' (assignment) + splitlines = (line.split("=", maxsplit=1) for line in fh.readlines()) + return {k.strip(): v.strip() for k, v in splitlines} diff --git a/pyop2/version.py b/pyop2/version.py new file mode 100644 index 0000000000..dcb98845b7 --- /dev/null +++ b/pyop2/version.py @@ -0,0 +1,3 @@ + +__version_info__ = (0, 12, 0) +__version__ = '.'.join(map(str, __version_info__)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..69af99a483 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,102 @@ +[project] +name = "firedrake" +version = "0.14_dev" +description = "An automated system for the portable solution of partial differential equations using the finite element method" +readme = "README.rst" +license = {file = "LICENSE"} +maintainers = [ + {name = "Pablo D. Brubeck"}, + {name = "Daiane I. Dolci"}, + {name = "David A. Ham", email = "david.ham@imperial.ac.uk"}, + {name = "Josh Hope-Collins"}, + {name = "Koki Sagiyama"}, + {name = "Connor J. Ward", email = "c.ward20@imperial.ac.uk"}, +] +requires-python = ">=3.10" +dependencies = [ + "cachetools", + "decorator<=4.4.2", + "mpi4py", + "h5py", + "petsc4py", + "numpy", + "packaging", + "pkgconfig", + "progress", + "pycparser", + "pytools", + "requests", + "rtree>=1.2", + "scipy", + "sympy", + "fenics-ufl @ git+https://github.com/firedrakeproject/ufl.git", + "fenics-fiat @ git+https://github.com/firedrakeproject/fiat.git", + "finat @ git+https://github.com/FInAT/FInAT.git", + "tsfc @ git+https://github.com/firedrakeproject/tsfc.git", + "pyadjoint-ad @ git+https://github.com/dolfin-adjoint/pyadjoint.git", + "loopy @ git+https://github.com/firedrakeproject/loopy.git@main", +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Operating System :: Unix", +] + +[project.urls] +Homepage = "https://firedrakeproject.org" +Repository = "https://github.com/firedrakeproject/firedrake" +Issues = "https://github.com/firedrakeproject/firedrake/issues/new/choose" + +[project.scripts] +firedrake-clean = "firedrake.scripts.firedrake_clean:main" +firedrake-preprocess-bibtex = "firedrake.scripts.firedrake_preprocess_bibtex:main" +firedrake-status = "firedrake.scripts.firedrake_status:main" +pyop2-clean = "pyop2.compilation:clear_compiler_disk_cache" +spydump = "pyop2.scripts.spydump:main" + +[project.optional-dependencies] +test = [ + "pylit", + "nbval", + "pytest", + "pytest-xdist", + "pytest-mpi @ git+https://github.com/firedrakeproject/pytest-mpi.git@main", +] +dev = [ + "flake8", + "pylint", +] +docs = [ + "sphinx", + "sphinx-autobuild", + "sphinxcontrib-bibtex", + "sphinxcontrib-svg2pdfconverter", + "sphinxcontrib-jquery", + "bibtexparser", + "sphinxcontrib-youtube", + "numpydoc", +] + +[build-system] +requires = [ + "setuptools>61.2", + "Cython>=3.0", + "pybind11", + "pkgconfig", + "numpy", + "mpi4py", + "petsc4py", + "rtree>=1.2", +] +build-backend = "setuptools.build_meta" + +# TODO: Convert firedrake-zenodo to a proper entrypoint script. +[tool.setuptools] +script-files = ["firedrake/scripts/firedrake-zenodo"] diff --git a/requirements-ext.txt b/requirements-ext.txt index 77bbe07bb3..a2d2c4415c 100644 --- a/requirements-ext.txt +++ b/requirements-ext.txt @@ -1,14 +1,20 @@ -cached_property cachetools +Cython>=3.0 +decorator<=4.4.2 +flake8 +mpi4py nbval +numpy packaging pkgconfig progress pybind11 +pycparser pylint pylit pytest pytest-xdist +pytools requests rtree>=1.2 scipy diff --git a/requirements-git.txt b/requirements-git.txt index b6e9e8e1dd..c037220ed1 100644 --- a/requirements-git.txt +++ b/requirements-git.txt @@ -1,7 +1,8 @@ -git+https://github.com/firedrakeproject/ufl.git#egg=ufl -git+https://github.com/firedrakeproject/fiat.git#egg=fiat +git+https://github.com/firedrakeproject/ufl.git#egg=fenics-ufl +git+https://github.com/firedrakeproject/fiat.git#egg=fenics-fiat git+https://github.com/FInAT/FInAT.git#egg=finat git+https://github.com/firedrakeproject/tsfc.git#egg=tsfc -git+https://github.com/OP2/PyOP2.git#egg=pyop2 -git+https://github.com/dolfin-adjoint/pyadjoint.git#egg=pyadjoint +git+https://github.com/dolfin-adjoint/pyadjoint.git#egg=pyadjoint-ad +git+https://github.com/firedrakeproject/loopy.git@main#egg=loopy +git+https://github.com/firedrakeproject/pytest-mpi.git@main#egg=pytest-mpi git+https://github.com/firedrakeproject/petsc.git@firedrake#egg=petsc diff --git a/scripts/firedrake-clean b/scripts/firedrake-clean deleted file mode 100755 index 99f700d10b..0000000000 --- a/scripts/firedrake-clean +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -if __name__ == '__main__': - import os - import shutil - import tempfile - import firedrake_configuration - try: - import platformdirs as appdirs - except ImportError: - import appdirs - - firedrake_configuration.setup_cache_dirs() - tsfc_cache = os.environ.get('FIREDRAKE_TSFC_KERNEL_CACHE_DIR', - os.path.join(tempfile.gettempdir(), - 'firedrake-tsfc-kernel-cache-uid%d' % os.getuid())) - pyop2_cache = os.environ.get('PYOP2_CACHE_DIR', - os.path.join(tempfile.gettempdir(), - 'pyop2-cache-uid%d' % os.getuid())) - pytools_cache = appdirs.user_cache_dir("pytools", "pytools") - print('Removing cached TSFC kernels from %s' % tsfc_cache) - print('Removing cached PyOP2 code from %s' % pyop2_cache) - print('Removing cached pytools files from %s' % pytools_cache) - for cache in [tsfc_cache, pyop2_cache, pytools_cache]: - if os.path.exists(cache): - shutil.rmtree(cache, ignore_errors=True) diff --git a/scripts/firedrake-install b/scripts/firedrake-install index b0bc4fb3a8..a83b04a451 100755 --- a/scripts/firedrake-install +++ b/scripts/firedrake-install @@ -27,7 +27,7 @@ arch = platform.uname().machine # Packages which we wish to ensure are always recompiled wheel_blacklist = ["mpi4py", "randomgen", "numpy"] # Packages for which we set CC=mpicc, CXX=mpicxx, F90=mpif90. -parallel_packages = ["h5py", "petsc4py", "slepc", "slepc4py", "PyOP2", "libsupermesh", "firedrake"] +parallel_packages = ["h5py", "petsc4py", "slepc", "slepc4py", "libsupermesh", "firedrake"] # Firedrake application installation shortcuts. firedrake_apps = { @@ -377,19 +377,12 @@ honoured.""", config = FiredrakeConfiguration(args) else: - # This duplicates code from firedrake_configuration in order to avoid the module dependency and allow for installation recovery. try: with open(os.path.join(os.environ["VIRTUAL_ENV"], ".configuration.json"), "r") as f: config = json.load(f) - except FileNotFoundError: - # Fall back to the old location. - import firedrake_configuration - - config = firedrake_configuration.get_config() - if config is None: - raise InstallError("Failed to find existing Firedrake configuration") + raise InstallError("Failed to find existing Firedrake configuration") parser = ArgumentParser(description="""Update this firedrake install to the latest versions of all packages.""", formatter_class=RawDescriptionHelpFormatter) @@ -1803,7 +1796,7 @@ if mode == "install": run_pip(["install", "lazy-object-proxy==1.4.*"]) packages = clone_dependencies("firedrake") - packages = clone_dependencies("PyOP2") + ["petsc4py"] + packages + packages = ["petsc4py"] + packages packages += ["firedrake"] for p in options["packages"]: @@ -1812,13 +1805,18 @@ if mode == "install": packages += [name] if args.honour_petsc_dir: - packages.remove("petsc") - - # Need to install petsc first in order to resolve hdf5 dependency. - if not args.honour_petsc_dir: + try: + packages.remove("petsc") + except ValueError: + log.warning("PETSc wasn't in dependencies list") + else: + # Need to install petsc first in order to resolve hdf5 dependency. with environment(**compiler_env): with pipargs("--no-deps"): - packages.remove("petsc") + try: + packages.remove("petsc") + except ValueError: + log.warning("PETSc wasn't in dependencies list") install("petsc/") os.environ["PETSC_DIR"] = petsc_dir os.environ["PETSC_ARCH"] = petsc_arch @@ -1865,7 +1863,6 @@ if mode == "install": with pipargs("--no-deps"): try: - packages.remove("PyOP2") packages.remove("firedrake") except ValueError: pass @@ -1882,14 +1879,8 @@ if mode == "install": build_and_install_libsupermesh(cc, cxx, f90, mpiexec) with pipargs("--no-deps"), environment(**compiler_env, **link_env): - for p in ("PyOP2", "firedrake"): - install(p+"/") + install("firedrake/") - # Work around easy-install.pth bug. - try: - packages.remove("petsc") - except ValueError: - pass packages.remove("petsc4py") else: @@ -1909,7 +1900,6 @@ else: run_pip_install(package.split()) deps = OrderedDict() - deps.update(list_cloned_dependencies("PyOP2")) deps.update(list_cloned_dependencies("firedrake")) for p in options["packages"]: name = split_requirements_url(p)[0] @@ -1938,7 +1928,10 @@ else: clean("slepc4py") shutil.move("slepc4py", "slepc4py_old") - packages.remove("petsc") + try: + packages.remove("petsc") + except ValueError: + log.warning("PETSc wasn't in dependencies list") packages.remove("petsc4py") if args.clean: @@ -2039,7 +2032,6 @@ Please consider updating your PETSc manually. with pipargs("--no-deps"): try: - packages.remove("PyOP2") packages.remove("firedrake") except ValueError: pass @@ -2056,8 +2048,7 @@ Please consider updating your PETSc manually. build_and_install_libsupermesh(cc, cxx, f90, mpiexec) with pipargs("--no-deps"), environment(**compiler_env, **link_env): - for p in ("PyOP2", "firedrake"): - install(p+"/") + install("firedrake/") # Ensure pytest is at the latest version run_pip(["install", "-U", "pytest"]) @@ -2092,8 +2083,8 @@ if args.documentation_dependencies: if mode == "update": try: - import firedrake_configuration - firedrake_configuration.setup_cache_dirs() + from firedrake.configuration import setup_cache_dirs + setup_cache_dirs() log.info("Clearing just in time compilation caches.") from firedrake.tsfc_interface import clear_cache, TSFCKernel from pyop2.compilation import clear_cache as pyop2_clear_cache diff --git a/scripts/firedrake-preprocess-bibtex b/scripts/firedrake-preprocess-bibtex deleted file mode 100755 index cad231830a..0000000000 --- a/scripts/firedrake-preprocess-bibtex +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 -import io -from argparse import ArgumentParser, RawDescriptionHelpFormatter -try: - from bibtexparser.bwriter import BibTexWriter - import bibtexparser -except ImportError: - raise ImportError("Failed to import bibtexparser. Run:\n firedrake-update --documentation-dependencies") - - -parser = ArgumentParser(description="""Ensure BibTeX entries for inclusion in the Firedrake website have a -URL or DOI, and impose clean formatting.""", - formatter_class=RawDescriptionHelpFormatter) -parser.add_argument("bibtex_file", help="The BibTeX file to process") -parser.add_argument("--validate", action='store_true', - help="Instead of rewriting the bibtex file, raise an exception if anything would have changed.") - -args = parser.parse_args() - -filename = args.bibtex_file - -parser = bibtexparser.bparser.BibTexParser() -parser.common_strings = True -parser.ignore_nonstandard_types = False - -with open(filename) as bibtex_file: - bib_database = parser.parse_file(bibtex_file) - -for entry in bib_database.entries: - if "url" not in entry and \ - "doi" not in entry: - if entry.get("archiveprefix", None) == "arXiv": - entry["url"] = "https://arxiv.org/abs/" + entry["eprint"] - else: - raise ValueError("%s in bibliograpy %s\n has no url and no DOI.\n" % (entry["ID"], filename)) - -writer = BibTexWriter() -writer.indent = ' ' # indent entries with 2 spaces instead of one -writer.align_values = True - -if args.validate: - with io.StringIO() as outbuffer: - outbuffer.write(writer.write(bib_database)) - processed = outbuffer.getvalue() - with open(filename) as bibtex_file: - inbuffer = bibtex_file.read() - if processed != inbuffer: - raise ValueError("%s would be changed by firedrake-preprocess-bibtex. Please preprocess it and commit the result" % filename) - -else: - with open(filename, 'w') as bibfile: - bibfile.write(writer.write(bib_database)) diff --git a/scripts/firedrake-status b/scripts/firedrake-status deleted file mode 100755 index af7a0c58b5..0000000000 --- a/scripts/firedrake-status +++ /dev/null @@ -1,142 +0,0 @@ -#! /usr/bin/env python3 -from six import iteritems - -from argparse import ArgumentParser, RawDescriptionHelpFormatter -from pprint import pformat -import logging -import sys -import os -import subprocess -from collections import OrderedDict, defaultdict - -parser = ArgumentParser(description="""Provide information on the currently downloaded version of Firedrake and its configuration. -This is particularly useful information to include when reporting bugs.""", - formatter_class=RawDescriptionHelpFormatter) -parser.add_argument("--log", action='store_true', - help="Log the output of the script to firedrake-status.log as well as to the console.") - - -args = parser.parse_args() - -# Set up logging -if args.log: - logging.basicConfig(level=logging.DEBUG, - format='%(asctime)s %(levelname)-6s %(message)s', - filename='firedrake-status.log', - filemode='w') - console = logging.StreamHandler() - console.setLevel(logging.WARNING) - formatter = logging.Formatter('%(message)s') - console.setFormatter(formatter) - logging.getLogger().addHandler(console) -else: - logging.basicConfig(level=logging.WARNING, - format='%(message)s') -log = logging.getLogger() - - -def check_output(args, env=None): - return subprocess.check_output(args, stderr=subprocess.STDOUT, env=env) - - -def quit(message): - log.error(message) - sys.exit(1) - - -try: - firedrake_env = os.environ["VIRTUAL_ENV"] -except KeyError: - quit("Unable to retrieve virtualenv name from the environment.\n Please ensure the virtualenv is active before running firedrake-update.") - - -try: - import firedrake_configuration -except ImportError: - log.info("Failed to import firedrake_configuration module") - config = defaultdict(dict) -else: - config = firedrake_configuration.get_config() - -try: - config["system"] = check_output(["uname", "-a"]) -except subprocess.CalledProcessError: - log.error("Failed to retrieve system information.") - - -print("Firedrake Configuration:") -if not config: - print("No configuration information found.") -else: - for key, val in iteritems(config["options"]): - print(" {}: {}".format(key, val)) - - print("Additions:") - if config["additions"]: - for a in config["additions"]: - print(" " + a) - else: - print(" None") - -for var in ["PYTHONPATH", "PETSC_ARCH", "PETSC_DIR"]: - config["environment"][var] = os.environ.get(var, None) - -print("Environment:") -for key, val in iteritems(config["environment"]): - print(" {}: {}".format(key, val)) - -status = OrderedDict() -for dir in sorted(os.listdir(firedrake_env + "/src")): - try: - os.chdir(firedrake_env + "/src/" + dir) - except OSError as e: - if e.errno == 20: - # Not a directory - continue - else: - raise - try: - revision = check_output(["git", "rev-parse", "--short", "HEAD"]).decode('ascii').strip() - branch = check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]).decode('ascii').strip() - except subprocess.CalledProcessError: - log.error("Unable to retrieve git information from %s." % dir) - else: - try: - _ = check_output(["git", "diff-index", "--quiet", "HEAD"]) - modified = False - except subprocess.CalledProcessError: - modified = True - - status[dir] = {"branch": branch, - "revision": revision, - "modified": modified} - -status_string = "" -status_string += "Status of components:\n" -componentformat = "|{:20}|{:30}|{:10}|{!s:10}|\n" -header = componentformat.format("Package", "Branch", "Revision", "Modified") -line = "-" * (len(header) - 1) + "\n" -status_string += line + header + line -for dir, d in iteritems(status): - status_string += componentformat.format(dir, d["branch"], d["revision"], d["modified"]) -status_string += line - -print(status_string) -log.info("\n" + status_string) - -log.info("Firedrake configuration: ") -log.info(pformat(config)) -log.debug("\nPip packages installed:") -try: - log.debug(check_output(["pip", "freeze"])) -except subprocess.CalledProcessError: - log.error("""Failed to retrieve list of pip installed packages. Try running: - - pip freeze. - -""") -log.debug("\n Full environment:") -try: - log.debug(check_output(["env"])) -except subprocess.CalledProcessError: - log.error("""Shell command env failed.""") diff --git a/setup.cfg b/setup.cfg index d9fa8e08a3..165a3bc775 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,17 +1,3 @@ - -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -VCS = git -style = pep440 -versionfile_source = firedrake/_version.py -versionfile_build = firedrake/_version.py -tag_prefix = v -parentdir_prefix = firedrake- - - [flake8] ignore = E501,F403,F405,E226,E402,E721,E731,E741,W503,F999, diff --git a/setup.py b/setup.py index 6a0b0f3c8c..36a5fe5082 100644 --- a/setup.py +++ b/setup.py @@ -1,49 +1,50 @@ -from setuptools import setup, find_packages +from dataclasses import dataclass, field +from setuptools import setup, find_packages, Extension from glob import glob -from os import environ as env, path from pathlib import Path -from Cython.Distutils import build_ext +from Cython.Build import cythonize import os import sys +import site import numpy as np +import pybind11 import petsc4py import rtree -import versioneer +import pkgconfig -from firedrake_configuration import get_config +# Define the compilers to use if not already set +if "CC" not in os.environ: + os.environ["CC"] = os.environ.get("MPICC", "mpicc") +if "CXX" not in os.environ: + os.environ["CXX"] = os.environ.get("MPICXX", "mpicxx") -try: - from Cython.Distutils.extension import Extension - config = get_config() - complex_mode = config["options"].get("complex", False) -except ImportError: - # No Cython Extension means no complex mode! - from setuptools import Extension - complex_mode = False -try: - from pybind11.setup_helpers import Pybind11Extension -except ImportError: - Pybind11Extension = Extension +petsc_config = petsc4py.get_config() def get_petsc_dir(): - try: - petsc_dir = os.environ["PETSC_DIR"] - petsc_arch = os.environ.get("PETSC_ARCH", "") - except KeyError: - try: - petsc_dir = os.path.join(os.environ["VIRTUAL_ENV"], "src", "petsc") - petsc_arch = "default" - except KeyError: - sys.exit("""Error: Firedrake venv not active.""") - - return (petsc_dir, path.join(petsc_dir, petsc_arch)) - - -cmdclass = versioneer.get_cmdclass() -cmdclass["build_ext"] = build_ext - + """Attempts to find the PETSc directory on the system + """ + petsc_dir = petsc_config["PETSC_DIR"] + petsc_arch = petsc_config["PETSC_ARCH"] + pathlist = [petsc_dir] + if petsc_arch: + pathlist.append(os.path.join(petsc_dir, petsc_arch)) + return pathlist + + +def get_petsc_variables(): + """Attempts obtain a dictionary of PETSc configuration settings + """ + path = [get_petsc_dir()[-1], "lib/petsc/conf/petscvariables"] + variables_path = os.path.join(*path) + with open(variables_path) as fh: + # Split lines on first '=' (assignment) + splitlines = (line.split("=", maxsplit=1) for line in fh.readlines()) + return {k.strip(): v.strip() for k, v in splitlines} + + +# TODO: This is deprecated behaviour, what to do?: if "clean" in sys.argv[1:]: # Forcibly remove the results of Cython. for dirname, dirs, files in os.walk("firedrake"): @@ -53,70 +54,192 @@ def get_petsc_dir(): or ext == ".so"): os.remove(os.path.join(dirname, f)) -cython_compile_time_env = {"COMPLEX": complex_mode} -cythonfiles = [ - ("dmcommon", ["petsc"]), - ("extrusion_numbering", ["petsc"]), - ("hdf5interface", ["petsc"]), - ("mgimpl", ["petsc"]), - ("patchimpl", ["petsc"]), - ("spatialindex", None), - ("supermeshimpl", ["supermesh", "petsc"]), -] +@dataclass +class ExternalDependency: + ''' This dataclass stores the relevant information for the compiler as fields + that correspond to the keyword arguments of `Extension`. For convenience it + also implements addition and `**` unpacking. + ''' + include_dirs: list[str] = field(default_factory=list, init=True) + extra_compile_args: list[str] = field(default_factory=list, init=True) + libraries: list[str] = field(default_factory=list, init=True) + library_dirs: list[str] = field(default_factory=list, init=True) + extra_link_args: list[str] = field(default_factory=list, init=True) + runtime_library_dirs: list[str] = field(default_factory=list, init=True) + + def __add__(self, other): + combined = {} + for f in self.__dataclass_fields__.keys(): + combined[f] = getattr(self, f) + getattr(other, f) + return self.__class__(**combined) + + def keys(self): + return self.__dataclass_fields__.keys() + + def __getitem__(self, key): + try: + return getattr(self, key) + except AttributeError: + raise KeyError(f"Key {key} not present") + + +# Pybind11 +# example: +# gcc -I/pyind11/include ... +pybind11_ = ExternalDependency(include_dirs=[pybind11.get_include()]) + +# numpy +# example: +# gcc -I/numpy/include ... +numpy_ = ExternalDependency(include_dirs=[np.get_include()]) +# PETSc +# example: +# gcc -I$PETSC_DIR/include -I$PETSC_DIR/$PETSC_ARCH/include -I/petsc4py/include +# gcc -L$PETSC_DIR/$PETSC_ARCH/lib -lpetsc -Wl,-rpath,$PETSC_DIR/$PETSC_ARCH/lib petsc_dirs = get_petsc_dir() -if os.environ.get("HDF5_DIR"): - petsc_dirs = petsc_dirs + (os.environ.get("HDF5_DIR"), ) -include_dirs = [np.get_include(), rtree.finder.get_include()] -petsc_include = [petsc4py.get_include()] + [os.path.join(d, "include") for d in petsc_dirs] -include_dirs += petsc_include -petsc_library = [os.path.join(petsc_dirs[1], "lib")] - -dirs = (sys.prefix, *petsc_dirs) -link_args = ["-L%s/lib" % d for d in dirs] + ["-Wl,-rpath,%s/lib" % d for d in dirs] +petsc_ = ExternalDependency( + libraries=["petsc"], + include_dirs=[petsc4py.get_include()] + [os.path.join(d, "include") for d in petsc_dirs], + library_dirs=[os.path.join(petsc_dirs[-1], "lib")], + runtime_library_dirs=[os.path.join(petsc_dirs[-1], "lib")], +) +petsc_variables = get_petsc_variables() +petsc_hdf5_compile_args = petsc_variables.get("HDF5_INCLUDE", "") +petsc_hdf5_link_args = petsc_variables.get("HDF5_LIB", "") + +# HDF5 +# example: +# gcc -I$HDF5_DIR/include +# gcc -L$HDF5_DIR/lib -lhdf5 +if petsc_hdf5_link_args and petsc_hdf5_compile_args: + # We almost always want to be in this first case!!! + # PETSc variables only contains the compile/link args, not the paths + hdf5_ = ExternalDependency( + extra_compile_args = petsc_hdf5_compile_args.split(), + extra_link_args = petsc_hdf5_link_args.split() + ) +elif os.environ.get("HDF5_DIR"): + hdf5_dir = Path(os.environ.get("HDF5_DIR")) + hdf5_ = ExternalDependency( + libraries=["hdf5"], + include_dirs = [str(hdf5_dir.joinpath("include"))], + library_dirs = [str(hdf5_dir.joinpath("lib"))] + ) +elif pkgconfig.exists("hdf5"): + hdf5_ = ExternalDependency(**pkgconfig.parse("hdf5")) +else: + # Set the library name and hope for the best + hdf5_ = ExternalDependency(libraries=["hdf5"]) + +# Note: +# In the next 2 linkages we are using `site.getsitepackages()[0]`, which isn't +# guaranteed to be the correct place we could also use "$ORIGIN/../../lib_dir", +# but that definitely doesn't work with editable installs. + +# libspatialindex +# example: +# gcc -I/rtree/include +# gcc /rtree.libs/libspatialindex.so -Wl,-rpath,$ORIGIN/../../Rtree.libs libspatialindex_so = Path(rtree.core.rt._name).absolute() -link_args += [str(libspatialindex_so)] -link_args += ["-Wl,-rpath,%s" % libspatialindex_so.parent] - -extensions = [ - Extension( - "firedrake.cython.{}".format(ext), - sources=[os.path.join("firedrake", "cython", "{}.pyx".format(ext))], - include_dirs=include_dirs, - libraries=libs, - extra_link_args=link_args, - cython_compile_time_env=cython_compile_time_env - ) for (ext, libs) in cythonfiles -] + [ - Pybind11Extension( +spatialindex_ = ExternalDependency( + include_dirs=[rtree.finder.get_include()], + extra_link_args=[str(libspatialindex_so)], + runtime_library_dirs=[os.path.join(site.getsitepackages()[0], "Rtree.libs")] +) + +# libsupermesh +# example: +# gcc -I/supermesh/include +# gcc /supermesh/supermesh.cpython-311-x86_64-linux-gnu.so \ +# -lsupermesh \ +# -Wl,-rpath,$ORIGIN/../../supermesh +supermesh_ = ExternalDependency( + include_dirs=[f"{sys.prefix}/include"], + library_dirs=[f"{sys.prefix}/lib"], + libraries=["supermesh"], + runtime_library_dirs=[f"{sys.prefix}/lib"], +) + +# The following extensions need to be linked accordingly: +def extensions(): + ## CYTHON EXTENSIONS + cython_list = [] + # firedrake/cython/dmcommon.pyx: petsc, numpy + cython_list.append(Extension( + name="firedrake.cython.dmcommon", + language="c", + sources=[os.path.join("firedrake", "cython", "dmcommon.pyx")], + **(petsc_ + numpy_) + )) + # firedrake/cython/extrusion_numbering.pyx: petsc, numpy + cython_list.append(Extension( + name="firedrake.cython.extrusion_numbering", + language="c", + sources=[os.path.join("firedrake", "cython", "extrusion_numbering.pyx")], + **(petsc_ + numpy_) + )) + # firedrake/cython/hdf5interface.pyx: petsc, numpy, hdf5 + cython_list.append(Extension( + name="firedrake.cython.hdf5interface", + language="c", + sources=[os.path.join("firedrake", "cython", "hdf5interface.pyx")], + **(petsc_ + numpy_ + hdf5_) + )) + # firedrake/cython/mgimpl.pyx: petsc, numpy + cython_list.append(Extension( + name="firedrake.cython.mgimpl", + language="c", + sources=[os.path.join("firedrake", "cython", "mgimpl.pyx")], + **(petsc_ + numpy_) + )) + # firedrake/cython/patchimpl.pyx: petsc, numpy + cython_list.append(Extension( + name="firedrake.cython.patchimpl", + language="c", + sources=[os.path.join("firedrake", "cython", "patchimpl.pyx")], + **(petsc_ + numpy_) + )) + # firedrake/cython/spatialindex.pyx: numpy, spatialindex + cython_list.append(Extension( + name="firedrake.cython.spatialindex", + language="c", + sources=[os.path.join("firedrake", "cython", "spatialindex.pyx")], + **(numpy_ + spatialindex_) + )) + # firedrake/cython/supermeshimpl.pyx: petsc, numpy, supermesh + cython_list.append(Extension( + name="firedrake.cython.supermeshimpl", + language="c", + sources=[os.path.join("firedrake", "cython", "supermeshimpl.pyx")], + **(petsc_ + numpy_ + supermesh_) + )) + # pyop2/sparsity.pyx: petsc, numpy, + cython_list.append(Extension( + name="pyop2.sparsity", + language="c", + sources=[os.path.join("pyop2", "sparsity.pyx")], + **(petsc_ + numpy_) + )) + ## PYBIND11 EXTENSIONS + pybind11_list = [] + # tinyasm/tinyasm.cpp: petsc, pybind11 + # tinyasm/tinyasm.cpp: petsc, pybind11 + pybind11_list.append(Extension( name="tinyasm._tinyasm", + language="c++", sources=sorted(glob("tinyasm/*.cpp")), # Sort source files for reproducibility - include_dirs=petsc_include, - library_dirs=petsc_library, - extra_compile_args=["-std=c++11",], - extra_link_args=["-lpetsc",], - runtime_library_dirs=petsc_library, - ) -] - -if "CC" not in env: - env["CC"] = "mpicc" + **(petsc_ + pybind11_) + )) + return cythonize(cython_list) + pybind11_list setup( - name="firedrake", - version=versioneer.get_version(), - cmdclass=cmdclass, - description="An automated finite element system.", - long_description="""Firedrake is an automated system for the portable - solution of partial differential equations using the finite element - method (FEM)""", - author="Imperial College London and others", - author_email="firedrake@imperial.ac.uk", - url="http://firedrakeproject.org", packages=find_packages(), - package_data={"firedrake": ["evaluate.h", "locate.c", "icons/*.png"]}, - scripts=glob("scripts/*"), - ext_modules=extensions + package_data={ + "firedrake": ["evaluate.h", "locate.c", "icons/*.png"], + "pyop2": ["assets/*", "*.h", "*.pxd", "*.pyx", "codegen/c/*.c"] + }, + ext_modules=extensions() ) diff --git a/tests/conftest.py b/tests/firedrake/conftest.py similarity index 100% rename from tests/conftest.py rename to tests/firedrake/conftest.py diff --git a/tests/demos/test_demos_run.py b/tests/firedrake/demos/test_demos_run.py similarity index 99% rename from tests/demos/test_demos_run.py rename to tests/firedrake/demos/test_demos_run.py index bba22b91f4..9d6fdb9a3f 100644 --- a/tests/demos/test_demos_run.py +++ b/tests/firedrake/demos/test_demos_run.py @@ -17,7 +17,7 @@ CWD = abspath(dirname(__file__)) -DEMO_DIR = join(CWD, "..", "..", "demos") +DEMO_DIR = join(CWD, "..", "..", "..", "demos") SERIAL_DEMOS = [ Demo(("benney_luke", "benney_luke"), ["vtk"]), diff --git a/tests/demos/test_notebooks_run.py b/tests/firedrake/demos/test_notebooks_run.py similarity index 94% rename from tests/demos/test_notebooks_run.py rename to tests/firedrake/demos/test_notebooks_run.py index 8a415eab76..036521cebd 100644 --- a/tests/demos/test_notebooks_run.py +++ b/tests/firedrake/demos/test_notebooks_run.py @@ -12,7 +12,7 @@ cwd = os.path.abspath(os.path.dirname(__file__)) -nb_dir = os.path.join(cwd, "..", "..", "docs", "notebooks") +nb_dir = os.path.join(cwd, "..", "..", "..", "docs", "notebooks") # Discover the notebook files by globbing the notebook directory diff --git a/tests/ensemble_reduced_functional/test_reduced_functional.py b/tests/firedrake/ensemble_reduced_functional/test_reduced_functional.py similarity index 100% rename from tests/ensemble_reduced_functional/test_reduced_functional.py rename to tests/firedrake/ensemble_reduced_functional/test_reduced_functional.py diff --git a/tests/equation_bcs/test_bcs_reconstruct.py b/tests/firedrake/equation_bcs/test_bcs_reconstruct.py similarity index 100% rename from tests/equation_bcs/test_bcs_reconstruct.py rename to tests/firedrake/equation_bcs/test_bcs_reconstruct.py diff --git a/tests/equation_bcs/test_equation_bcs.py b/tests/firedrake/equation_bcs/test_equation_bcs.py similarity index 100% rename from tests/equation_bcs/test_equation_bcs.py rename to tests/firedrake/equation_bcs/test_equation_bcs.py diff --git a/tests/equation_bcs/test_equation_bcs_assemble.py b/tests/firedrake/equation_bcs/test_equation_bcs_assemble.py similarity index 100% rename from tests/equation_bcs/test_equation_bcs_assemble.py rename to tests/firedrake/equation_bcs/test_equation_bcs_assemble.py diff --git a/tests/external_operators/test_abstract_external_operator.py b/tests/firedrake/external_operators/test_abstract_external_operator.py similarity index 100% rename from tests/external_operators/test_abstract_external_operator.py rename to tests/firedrake/external_operators/test_abstract_external_operator.py diff --git a/tests/external_operators/test_external_operators.py b/tests/firedrake/external_operators/test_external_operators.py similarity index 100% rename from tests/external_operators/test_external_operators.py rename to tests/firedrake/external_operators/test_external_operators.py diff --git a/tests/external_operators/test_external_operators_adjoint.py b/tests/firedrake/external_operators/test_external_operators_adjoint.py similarity index 100% rename from tests/external_operators/test_external_operators_adjoint.py rename to tests/firedrake/external_operators/test_external_operators_adjoint.py diff --git a/tests/external_operators/test_jax_operator.py b/tests/firedrake/external_operators/test_jax_operator.py similarity index 100% rename from tests/external_operators/test_jax_operator.py rename to tests/firedrake/external_operators/test_jax_operator.py diff --git a/tests/external_operators/test_pytorch_operator.py b/tests/firedrake/external_operators/test_pytorch_operator.py similarity index 100% rename from tests/external_operators/test_pytorch_operator.py rename to tests/firedrake/external_operators/test_pytorch_operator.py diff --git a/tests/extrusion/conftest.py b/tests/firedrake/extrusion/conftest.py similarity index 100% rename from tests/extrusion/conftest.py rename to tests/firedrake/extrusion/conftest.py diff --git a/tests/extrusion/test_2d_cohomology.py b/tests/firedrake/extrusion/test_2d_cohomology.py similarity index 100% rename from tests/extrusion/test_2d_cohomology.py rename to tests/firedrake/extrusion/test_2d_cohomology.py diff --git a/tests/extrusion/test_annulus.py b/tests/firedrake/extrusion/test_annulus.py similarity index 100% rename from tests/extrusion/test_annulus.py rename to tests/firedrake/extrusion/test_annulus.py diff --git a/tests/extrusion/test_assembly.py b/tests/firedrake/extrusion/test_assembly.py similarity index 100% rename from tests/extrusion/test_assembly.py rename to tests/firedrake/extrusion/test_assembly.py diff --git a/tests/extrusion/test_bcs_interior_facet.py b/tests/firedrake/extrusion/test_bcs_interior_facet.py similarity index 100% rename from tests/extrusion/test_bcs_interior_facet.py rename to tests/firedrake/extrusion/test_bcs_interior_facet.py diff --git a/tests/extrusion/test_bdmc_extr.py b/tests/firedrake/extrusion/test_bdmc_extr.py similarity index 100% rename from tests/extrusion/test_bdmc_extr.py rename to tests/firedrake/extrusion/test_bdmc_extr.py diff --git a/tests/extrusion/test_cellvolume_extrusion.py b/tests/firedrake/extrusion/test_cellvolume_extrusion.py similarity index 100% rename from tests/extrusion/test_cellvolume_extrusion.py rename to tests/firedrake/extrusion/test_cellvolume_extrusion.py diff --git a/tests/extrusion/test_cylinder.py b/tests/firedrake/extrusion/test_cylinder.py similarity index 100% rename from tests/extrusion/test_cylinder.py rename to tests/firedrake/extrusion/test_cylinder.py diff --git a/tests/extrusion/test_dg_coords.py b/tests/firedrake/extrusion/test_dg_coords.py similarity index 100% rename from tests/extrusion/test_dg_coords.py rename to tests/firedrake/extrusion/test_dg_coords.py diff --git a/tests/extrusion/test_embedded_sphere_extrusion.py b/tests/firedrake/extrusion/test_embedded_sphere_extrusion.py similarity index 100% rename from tests/extrusion/test_embedded_sphere_extrusion.py rename to tests/firedrake/extrusion/test_embedded_sphere_extrusion.py diff --git a/tests/extrusion/test_enrichment_1_feec.py b/tests/firedrake/extrusion/test_enrichment_1_feec.py similarity index 100% rename from tests/extrusion/test_enrichment_1_feec.py rename to tests/firedrake/extrusion/test_enrichment_1_feec.py diff --git a/tests/extrusion/test_extruded_cell_subdomains.py b/tests/firedrake/extrusion/test_extruded_cell_subdomains.py similarity index 100% rename from tests/extrusion/test_extruded_cell_subdomains.py rename to tests/firedrake/extrusion/test_extruded_cell_subdomains.py diff --git a/tests/extrusion/test_extruded_periodic.py b/tests/firedrake/extrusion/test_extruded_periodic.py similarity index 100% rename from tests/extrusion/test_extruded_periodic.py rename to tests/firedrake/extrusion/test_extruded_periodic.py diff --git a/tests/extrusion/test_facet_avg_extruded.py b/tests/firedrake/extrusion/test_facet_avg_extruded.py similarity index 100% rename from tests/extrusion/test_facet_avg_extruded.py rename to tests/firedrake/extrusion/test_facet_avg_extruded.py diff --git a/tests/extrusion/test_facet_integrals_2D.py b/tests/firedrake/extrusion/test_facet_integrals_2D.py similarity index 100% rename from tests/extrusion/test_facet_integrals_2D.py rename to tests/firedrake/extrusion/test_facet_integrals_2D.py diff --git a/tests/extrusion/test_facet_integrals_3D.py b/tests/firedrake/extrusion/test_facet_integrals_3D.py similarity index 100% rename from tests/extrusion/test_facet_integrals_3D.py rename to tests/firedrake/extrusion/test_facet_integrals_3D.py diff --git a/tests/extrusion/test_facet_support_dofs.py b/tests/firedrake/extrusion/test_facet_support_dofs.py similarity index 100% rename from tests/extrusion/test_facet_support_dofs.py rename to tests/firedrake/extrusion/test_facet_support_dofs.py diff --git a/tests/extrusion/test_fs_abbreviations.py b/tests/firedrake/extrusion/test_fs_abbreviations.py similarity index 100% rename from tests/extrusion/test_fs_abbreviations.py rename to tests/firedrake/extrusion/test_fs_abbreviations.py diff --git a/tests/extrusion/test_galerkinproj.py b/tests/firedrake/extrusion/test_galerkinproj.py similarity index 100% rename from tests/extrusion/test_galerkinproj.py rename to tests/firedrake/extrusion/test_galerkinproj.py diff --git a/tests/extrusion/test_helmholtz_scalar.py b/tests/firedrake/extrusion/test_helmholtz_scalar.py similarity index 100% rename from tests/extrusion/test_helmholtz_scalar.py rename to tests/firedrake/extrusion/test_helmholtz_scalar.py diff --git a/tests/extrusion/test_helmholtz_serendipity_2d.py b/tests/firedrake/extrusion/test_helmholtz_serendipity_2d.py similarity index 100% rename from tests/extrusion/test_helmholtz_serendipity_2d.py rename to tests/firedrake/extrusion/test_helmholtz_serendipity_2d.py diff --git a/tests/extrusion/test_helmholtz_serendipity_3d.py b/tests/firedrake/extrusion/test_helmholtz_serendipity_3d.py similarity index 100% rename from tests/extrusion/test_helmholtz_serendipity_3d.py rename to tests/firedrake/extrusion/test_helmholtz_serendipity_3d.py diff --git a/tests/extrusion/test_helmholtz_vector.py b/tests/firedrake/extrusion/test_helmholtz_vector.py similarity index 100% rename from tests/extrusion/test_helmholtz_vector.py rename to tests/firedrake/extrusion/test_helmholtz_vector.py diff --git a/tests/extrusion/test_identity_extrusion.py b/tests/firedrake/extrusion/test_identity_extrusion.py similarity index 100% rename from tests/extrusion/test_identity_extrusion.py rename to tests/firedrake/extrusion/test_identity_extrusion.py diff --git a/tests/extrusion/test_interior_facets_extr.py b/tests/firedrake/extrusion/test_interior_facets_extr.py similarity index 100% rename from tests/extrusion/test_interior_facets_extr.py rename to tests/firedrake/extrusion/test_interior_facets_extr.py diff --git a/tests/extrusion/test_interval.py b/tests/firedrake/extrusion/test_interval.py similarity index 100% rename from tests/extrusion/test_interval.py rename to tests/firedrake/extrusion/test_interval.py diff --git a/tests/extrusion/test_kernel_int_cube.py b/tests/firedrake/extrusion/test_kernel_int_cube.py similarity index 100% rename from tests/extrusion/test_kernel_int_cube.py rename to tests/firedrake/extrusion/test_kernel_int_cube.py diff --git a/tests/extrusion/test_kernel_int_p0.py b/tests/firedrake/extrusion/test_kernel_int_p0.py similarity index 100% rename from tests/extrusion/test_kernel_int_p0.py rename to tests/firedrake/extrusion/test_kernel_int_p0.py diff --git a/tests/extrusion/test_kernel_intas_p0.py b/tests/firedrake/extrusion/test_kernel_intas_p0.py similarity index 100% rename from tests/extrusion/test_kernel_intas_p0.py rename to tests/firedrake/extrusion/test_kernel_intas_p0.py diff --git a/tests/extrusion/test_kernel_intrhs.py b/tests/firedrake/extrusion/test_kernel_intrhs.py similarity index 100% rename from tests/extrusion/test_kernel_intrhs.py rename to tests/firedrake/extrusion/test_kernel_intrhs.py diff --git a/tests/extrusion/test_kernel_intvar_p0.py b/tests/firedrake/extrusion/test_kernel_intvar_p0.py similarity index 100% rename from tests/extrusion/test_kernel_intvar_p0.py rename to tests/firedrake/extrusion/test_kernel_intvar_p0.py diff --git a/tests/extrusion/test_laplace_neumann.py b/tests/firedrake/extrusion/test_laplace_neumann.py similarity index 100% rename from tests/extrusion/test_laplace_neumann.py rename to tests/firedrake/extrusion/test_laplace_neumann.py diff --git a/tests/extrusion/test_layer_height_mesh_volume.py b/tests/firedrake/extrusion/test_layer_height_mesh_volume.py similarity index 100% rename from tests/extrusion/test_layer_height_mesh_volume.py rename to tests/firedrake/extrusion/test_layer_height_mesh_volume.py diff --git a/tests/extrusion/test_meshes.py b/tests/firedrake/extrusion/test_meshes.py similarity index 100% rename from tests/extrusion/test_meshes.py rename to tests/firedrake/extrusion/test_meshes.py diff --git a/tests/extrusion/test_mixed_bcs.py b/tests/firedrake/extrusion/test_mixed_bcs.py similarity index 100% rename from tests/extrusion/test_mixed_bcs.py rename to tests/firedrake/extrusion/test_mixed_bcs.py diff --git a/tests/extrusion/test_mixed_mats_extrusion.py b/tests/firedrake/extrusion/test_mixed_mats_extrusion.py similarity index 100% rename from tests/extrusion/test_mixed_mats_extrusion.py rename to tests/firedrake/extrusion/test_mixed_mats_extrusion.py diff --git a/tests/extrusion/test_mixed_periodic.py b/tests/firedrake/extrusion/test_mixed_periodic.py similarity index 100% rename from tests/extrusion/test_mixed_periodic.py rename to tests/firedrake/extrusion/test_mixed_periodic.py diff --git a/tests/extrusion/test_offset_computation.py b/tests/firedrake/extrusion/test_offset_computation.py similarity index 100% rename from tests/extrusion/test_offset_computation.py rename to tests/firedrake/extrusion/test_offset_computation.py diff --git a/tests/extrusion/test_point_eval_cells_extrusion.py b/tests/firedrake/extrusion/test_point_eval_cells_extrusion.py similarity index 100% rename from tests/extrusion/test_point_eval_cells_extrusion.py rename to tests/firedrake/extrusion/test_point_eval_cells_extrusion.py diff --git a/tests/extrusion/test_point_eval_fs_extrusion.py b/tests/firedrake/extrusion/test_point_eval_fs_extrusion.py similarity index 100% rename from tests/extrusion/test_point_eval_fs_extrusion.py rename to tests/firedrake/extrusion/test_point_eval_fs_extrusion.py diff --git a/tests/extrusion/test_poisson_neumann.py b/tests/firedrake/extrusion/test_poisson_neumann.py similarity index 100% rename from tests/extrusion/test_poisson_neumann.py rename to tests/firedrake/extrusion/test_poisson_neumann.py diff --git a/tests/extrusion/test_poisson_strong_bcs_extrusion.py b/tests/firedrake/extrusion/test_poisson_strong_bcs_extrusion.py similarity index 100% rename from tests/extrusion/test_poisson_strong_bcs_extrusion.py rename to tests/firedrake/extrusion/test_poisson_strong_bcs_extrusion.py diff --git a/tests/extrusion/test_real_tensorproduct.py b/tests/firedrake/extrusion/test_real_tensorproduct.py similarity index 100% rename from tests/extrusion/test_real_tensorproduct.py rename to tests/firedrake/extrusion/test_real_tensorproduct.py diff --git a/tests/extrusion/test_rhs_bcs.py b/tests/firedrake/extrusion/test_rhs_bcs.py similarity index 100% rename from tests/extrusion/test_rhs_bcs.py rename to tests/firedrake/extrusion/test_rhs_bcs.py diff --git a/tests/extrusion/test_rhs_side_bcs.py b/tests/firedrake/extrusion/test_rhs_side_bcs.py similarity index 100% rename from tests/extrusion/test_rhs_side_bcs.py rename to tests/firedrake/extrusion/test_rhs_side_bcs.py diff --git a/tests/extrusion/test_serendipity_3d_polys.py b/tests/firedrake/extrusion/test_serendipity_3d_polys.py similarity index 100% rename from tests/extrusion/test_serendipity_3d_polys.py rename to tests/firedrake/extrusion/test_serendipity_3d_polys.py diff --git a/tests/extrusion/test_side_strong_bcs.py b/tests/firedrake/extrusion/test_side_strong_bcs.py similarity index 100% rename from tests/extrusion/test_side_strong_bcs.py rename to tests/firedrake/extrusion/test_side_strong_bcs.py diff --git a/tests/extrusion/test_steady_advection_2D_extr.py b/tests/firedrake/extrusion/test_steady_advection_2D_extr.py similarity index 100% rename from tests/extrusion/test_steady_advection_2D_extr.py rename to tests/firedrake/extrusion/test_steady_advection_2D_extr.py diff --git a/tests/extrusion/test_steady_advection_3D_extr.py b/tests/firedrake/extrusion/test_steady_advection_3D_extr.py similarity index 100% rename from tests/extrusion/test_steady_advection_3D_extr.py rename to tests/firedrake/extrusion/test_steady_advection_3D_extr.py diff --git a/tests/extrusion/test_strong_bcs_caching.py b/tests/firedrake/extrusion/test_strong_bcs_caching.py similarity index 100% rename from tests/extrusion/test_strong_bcs_caching.py rename to tests/firedrake/extrusion/test_strong_bcs_caching.py diff --git a/tests/extrusion/test_subdomain_extruded.py b/tests/firedrake/extrusion/test_subdomain_extruded.py similarity index 100% rename from tests/extrusion/test_subdomain_extruded.py rename to tests/firedrake/extrusion/test_subdomain_extruded.py diff --git a/tests/extrusion/test_trace_extr.py b/tests/firedrake/extrusion/test_trace_extr.py similarity index 100% rename from tests/extrusion/test_trace_extr.py rename to tests/firedrake/extrusion/test_trace_extr.py diff --git a/tests/extrusion/test_two_step.py b/tests/firedrake/extrusion/test_two_step.py similarity index 100% rename from tests/extrusion/test_two_step.py rename to tests/firedrake/extrusion/test_two_step.py diff --git a/tests/extrusion/test_unit_square.py b/tests/firedrake/extrusion/test_unit_square.py similarity index 100% rename from tests/extrusion/test_unit_square.py rename to tests/firedrake/extrusion/test_unit_square.py diff --git a/tests/extrusion/test_variable_layers_bcs.py b/tests/firedrake/extrusion/test_variable_layers_bcs.py similarity index 100% rename from tests/extrusion/test_variable_layers_bcs.py rename to tests/firedrake/extrusion/test_variable_layers_bcs.py diff --git a/tests/extrusion/test_variable_layers_mesh_volume.py b/tests/firedrake/extrusion/test_variable_layers_mesh_volume.py similarity index 100% rename from tests/extrusion/test_variable_layers_mesh_volume.py rename to tests/firedrake/extrusion/test_variable_layers_mesh_volume.py diff --git a/tests/extrusion/test_variable_layers_numbering.py b/tests/firedrake/extrusion/test_variable_layers_numbering.py similarity index 100% rename from tests/extrusion/test_variable_layers_numbering.py rename to tests/firedrake/extrusion/test_variable_layers_numbering.py diff --git a/tests/extrusion/test_variable_layers_poisson.py b/tests/firedrake/extrusion/test_variable_layers_poisson.py similarity index 100% rename from tests/extrusion/test_variable_layers_poisson.py rename to tests/firedrake/extrusion/test_variable_layers_poisson.py diff --git a/tests/extrusion/test_variable_layers_steady_advection.py b/tests/firedrake/extrusion/test_variable_layers_steady_advection.py similarity index 100% rename from tests/extrusion/test_variable_layers_steady_advection.py rename to tests/firedrake/extrusion/test_variable_layers_steady_advection.py diff --git a/tests/extrusion/test_wedge_analytic.py b/tests/firedrake/extrusion/test_wedge_analytic.py similarity index 100% rename from tests/extrusion/test_wedge_analytic.py rename to tests/firedrake/extrusion/test_wedge_analytic.py diff --git a/tests/extrusion/test_zero_forms_extrusion.py b/tests/firedrake/extrusion/test_zero_forms_extrusion.py similarity index 100% rename from tests/extrusion/test_zero_forms_extrusion.py rename to tests/firedrake/extrusion/test_zero_forms_extrusion.py diff --git a/tests/extrusion/test_zero_integrand_extrusion.py b/tests/firedrake/extrusion/test_zero_integrand_extrusion.py similarity index 100% rename from tests/extrusion/test_zero_integrand_extrusion.py rename to tests/firedrake/extrusion/test_zero_integrand_extrusion.py diff --git a/tests/macro/test_macro_interp_project.py b/tests/firedrake/macro/test_macro_interp_project.py similarity index 100% rename from tests/macro/test_macro_interp_project.py rename to tests/firedrake/macro/test_macro_interp_project.py diff --git a/tests/macro/test_macro_low_order_refined.py b/tests/firedrake/macro/test_macro_low_order_refined.py similarity index 100% rename from tests/macro/test_macro_low_order_refined.py rename to tests/firedrake/macro/test_macro_low_order_refined.py diff --git a/tests/macro/test_macro_multigrid.py b/tests/firedrake/macro/test_macro_multigrid.py similarity index 100% rename from tests/macro/test_macro_multigrid.py rename to tests/firedrake/macro/test_macro_multigrid.py diff --git a/tests/macro/test_macro_quadrature.py b/tests/firedrake/macro/test_macro_quadrature.py similarity index 100% rename from tests/macro/test_macro_quadrature.py rename to tests/firedrake/macro/test_macro_quadrature.py diff --git a/tests/macro/test_macro_solve.py b/tests/firedrake/macro/test_macro_solve.py similarity index 100% rename from tests/macro/test_macro_solve.py rename to tests/firedrake/macro/test_macro_solve.py diff --git a/tests/macro/test_stokes_macroelements.py b/tests/firedrake/macro/test_stokes_macroelements.py similarity index 100% rename from tests/macro/test_stokes_macroelements.py rename to tests/firedrake/macro/test_stokes_macroelements.py diff --git a/tests/meshes/annulus.geo b/tests/firedrake/meshes/annulus.geo similarity index 100% rename from tests/meshes/annulus.geo rename to tests/firedrake/meshes/annulus.geo diff --git a/tests/meshes/annulus.msh b/tests/firedrake/meshes/annulus.msh similarity index 100% rename from tests/meshes/annulus.msh rename to tests/firedrake/meshes/annulus.msh diff --git a/tests/meshes/brick.e b/tests/firedrake/meshes/brick.e similarity index 100% rename from tests/meshes/brick.e rename to tests/firedrake/meshes/brick.e diff --git a/tests/meshes/broken_rogue_point.geo b/tests/firedrake/meshes/broken_rogue_point.geo similarity index 100% rename from tests/meshes/broken_rogue_point.geo rename to tests/firedrake/meshes/broken_rogue_point.geo diff --git a/tests/meshes/broken_rogue_point.msh b/tests/firedrake/meshes/broken_rogue_point.msh similarity index 100% rename from tests/meshes/broken_rogue_point.msh rename to tests/firedrake/meshes/broken_rogue_point.msh diff --git a/tests/meshes/cell-sets.geo b/tests/firedrake/meshes/cell-sets.geo similarity index 100% rename from tests/meshes/cell-sets.geo rename to tests/firedrake/meshes/cell-sets.geo diff --git a/tests/meshes/cell-sets.msh b/tests/firedrake/meshes/cell-sets.msh similarity index 100% rename from tests/meshes/cell-sets.msh rename to tests/firedrake/meshes/cell-sets.msh diff --git a/tests/meshes/circle_in_square.geo b/tests/firedrake/meshes/circle_in_square.geo similarity index 100% rename from tests/meshes/circle_in_square.geo rename to tests/firedrake/meshes/circle_in_square.geo diff --git a/tests/meshes/circle_in_square.msh b/tests/firedrake/meshes/circle_in_square.msh similarity index 100% rename from tests/meshes/circle_in_square.msh rename to tests/firedrake/meshes/circle_in_square.msh diff --git a/tests/meshes/cube_hex.geo b/tests/firedrake/meshes/cube_hex.geo similarity index 100% rename from tests/meshes/cube_hex.geo rename to tests/firedrake/meshes/cube_hex.geo diff --git a/tests/meshes/cube_hex.msh b/tests/firedrake/meshes/cube_hex.msh similarity index 100% rename from tests/meshes/cube_hex.msh rename to tests/firedrake/meshes/cube_hex.msh diff --git a/tests/meshes/cylinder.step b/tests/firedrake/meshes/cylinder.step similarity index 100% rename from tests/meshes/cylinder.step rename to tests/firedrake/meshes/cylinder.step diff --git a/tests/meshes/disk.step b/tests/firedrake/meshes/disk.step similarity index 100% rename from tests/meshes/disk.step rename to tests/firedrake/meshes/disk.step diff --git "a/tests/meshes/m\303\266bius_solid.geo" "b/tests/firedrake/meshes/m\303\266bius_solid.geo" similarity index 100% rename from "tests/meshes/m\303\266bius_solid.geo" rename to "tests/firedrake/meshes/m\303\266bius_solid.geo" diff --git "a/tests/meshes/m\303\266bius_solid.msh" "b/tests/firedrake/meshes/m\303\266bius_solid.msh" similarity index 100% rename from "tests/meshes/m\303\266bius_solid.msh" rename to "tests/firedrake/meshes/m\303\266bius_solid.msh" diff --git a/tests/meshes/sphere.geo b/tests/firedrake/meshes/sphere.geo similarity index 100% rename from tests/meshes/sphere.geo rename to tests/firedrake/meshes/sphere.geo diff --git a/tests/meshes/square.geo b/tests/firedrake/meshes/square.geo similarity index 100% rename from tests/meshes/square.geo rename to tests/firedrake/meshes/square.geo diff --git a/tests/meshes/square.msh b/tests/firedrake/meshes/square.msh similarity index 100% rename from tests/meshes/square.msh rename to tests/firedrake/meshes/square.msh diff --git a/tests/meshes/square_binary.msh b/tests/firedrake/meshes/square_binary.msh similarity index 100% rename from tests/meshes/square_binary.msh rename to tests/firedrake/meshes/square_binary.msh diff --git a/tests/meshes/square_with_embedded_line.geo b/tests/firedrake/meshes/square_with_embedded_line.geo similarity index 100% rename from tests/meshes/square_with_embedded_line.geo rename to tests/firedrake/meshes/square_with_embedded_line.geo diff --git a/tests/meshes/square_with_embedded_line.msh b/tests/firedrake/meshes/square_with_embedded_line.msh similarity index 100% rename from tests/meshes/square_with_embedded_line.msh rename to tests/firedrake/meshes/square_with_embedded_line.msh diff --git a/tests/meshes/t11_quad.msh b/tests/firedrake/meshes/t11_quad.msh similarity index 100% rename from tests/meshes/t11_quad.msh rename to tests/firedrake/meshes/t11_quad.msh diff --git a/tests/meshes/t11_tria.msh b/tests/firedrake/meshes/t11_tria.msh similarity index 100% rename from tests/meshes/t11_tria.msh rename to tests/firedrake/meshes/t11_tria.msh diff --git a/tests/meshes/t_twist.step b/tests/firedrake/meshes/t_twist.step similarity index 100% rename from tests/meshes/t_twist.step rename to tests/firedrake/meshes/t_twist.step diff --git a/tests/meshes/test_meshes_volume.py b/tests/firedrake/meshes/test_meshes_volume.py similarity index 100% rename from tests/meshes/test_meshes_volume.py rename to tests/firedrake/meshes/test_meshes_volume.py diff --git a/tests/meshes/unitsquare_unstructured_quadrilaterals.msh b/tests/firedrake/meshes/unitsquare_unstructured_quadrilaterals.msh similarity index 100% rename from tests/meshes/unitsquare_unstructured_quadrilaterals.msh rename to tests/firedrake/meshes/unitsquare_unstructured_quadrilaterals.msh diff --git a/tests/multigrid/test_basics.py b/tests/firedrake/multigrid/test_basics.py similarity index 100% rename from tests/multigrid/test_basics.py rename to tests/firedrake/multigrid/test_basics.py diff --git a/tests/multigrid/test_custom_transfer.py b/tests/firedrake/multigrid/test_custom_transfer.py similarity index 100% rename from tests/multigrid/test_custom_transfer.py rename to tests/firedrake/multigrid/test_custom_transfer.py diff --git a/tests/multigrid/test_embedded_transfer.py b/tests/firedrake/multigrid/test_embedded_transfer.py similarity index 100% rename from tests/multigrid/test_embedded_transfer.py rename to tests/firedrake/multigrid/test_embedded_transfer.py diff --git a/tests/multigrid/test_extruded_semicoarsen.py b/tests/firedrake/multigrid/test_extruded_semicoarsen.py similarity index 100% rename from tests/multigrid/test_extruded_semicoarsen.py rename to tests/firedrake/multigrid/test_extruded_semicoarsen.py diff --git a/tests/multigrid/test_grid_transfer.py b/tests/firedrake/multigrid/test_grid_transfer.py similarity index 100% rename from tests/multigrid/test_grid_transfer.py rename to tests/firedrake/multigrid/test_grid_transfer.py diff --git a/tests/multigrid/test_hiptmair.py b/tests/firedrake/multigrid/test_hiptmair.py similarity index 100% rename from tests/multigrid/test_hiptmair.py rename to tests/firedrake/multigrid/test_hiptmair.py diff --git a/tests/multigrid/test_inject_refined_extruded.py b/tests/firedrake/multigrid/test_inject_refined_extruded.py similarity index 100% rename from tests/multigrid/test_inject_refined_extruded.py rename to tests/firedrake/multigrid/test_inject_refined_extruded.py diff --git a/tests/multigrid/test_invalid_transfers.py b/tests/firedrake/multigrid/test_invalid_transfers.py similarity index 100% rename from tests/multigrid/test_invalid_transfers.py rename to tests/firedrake/multigrid/test_invalid_transfers.py diff --git a/tests/multigrid/test_multi_space_transfer.py b/tests/firedrake/multigrid/test_multi_space_transfer.py similarity index 100% rename from tests/multigrid/test_multi_space_transfer.py rename to tests/firedrake/multigrid/test_multi_space_transfer.py diff --git a/tests/multigrid/test_nested_split.py b/tests/firedrake/multigrid/test_nested_split.py similarity index 100% rename from tests/multigrid/test_nested_split.py rename to tests/firedrake/multigrid/test_nested_split.py diff --git a/tests/multigrid/test_netgen_gmg.py b/tests/firedrake/multigrid/test_netgen_gmg.py similarity index 100% rename from tests/multigrid/test_netgen_gmg.py rename to tests/firedrake/multigrid/test_netgen_gmg.py diff --git a/tests/multigrid/test_non_nested.py b/tests/firedrake/multigrid/test_non_nested.py similarity index 100% rename from tests/multigrid/test_non_nested.py rename to tests/firedrake/multigrid/test_non_nested.py diff --git a/tests/multigrid/test_opencascade_poisson.py b/tests/firedrake/multigrid/test_opencascade_poisson.py similarity index 100% rename from tests/multigrid/test_opencascade_poisson.py rename to tests/firedrake/multigrid/test_opencascade_poisson.py diff --git a/tests/multigrid/test_opencascade_volume.py b/tests/firedrake/multigrid/test_opencascade_volume.py similarity index 100% rename from tests/multigrid/test_opencascade_volume.py rename to tests/firedrake/multigrid/test_opencascade_volume.py diff --git a/tests/multigrid/test_p_multigrid.py b/tests/firedrake/multigrid/test_p_multigrid.py similarity index 100% rename from tests/multigrid/test_p_multigrid.py rename to tests/firedrake/multigrid/test_p_multigrid.py diff --git a/tests/multigrid/test_poisson_gmg.py b/tests/firedrake/multigrid/test_poisson_gmg.py similarity index 100% rename from tests/multigrid/test_poisson_gmg.py rename to tests/firedrake/multigrid/test_poisson_gmg.py diff --git a/tests/multigrid/test_poisson_gmg_extruded.py b/tests/firedrake/multigrid/test_poisson_gmg_extruded.py similarity index 100% rename from tests/multigrid/test_poisson_gmg_extruded.py rename to tests/firedrake/multigrid/test_poisson_gmg_extruded.py diff --git a/tests/multigrid/test_poisson_gmg_extruded_serendipity.py b/tests/firedrake/multigrid/test_poisson_gmg_extruded_serendipity.py similarity index 100% rename from tests/multigrid/test_poisson_gmg_extruded_serendipity.py rename to tests/firedrake/multigrid/test_poisson_gmg_extruded_serendipity.py diff --git a/tests/multigrid/test_poisson_gtmg.py b/tests/firedrake/multigrid/test_poisson_gtmg.py similarity index 100% rename from tests/multigrid/test_poisson_gtmg.py rename to tests/firedrake/multigrid/test_poisson_gtmg.py diff --git a/tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py b/tests/firedrake/multigrid/test_poisson_p1pcmg_extruded_serendipity.py similarity index 100% rename from tests/multigrid/test_poisson_p1pcmg_extruded_serendipity.py rename to tests/firedrake/multigrid/test_poisson_p1pcmg_extruded_serendipity.py diff --git a/tests/multigrid/test_refine_then_solve.py b/tests/firedrake/multigrid/test_refine_then_solve.py similarity index 100% rename from tests/multigrid/test_refine_then_solve.py rename to tests/firedrake/multigrid/test_refine_then_solve.py diff --git a/tests/multigrid/test_transfer_manager.py b/tests/firedrake/multigrid/test_transfer_manager.py similarity index 100% rename from tests/multigrid/test_transfer_manager.py rename to tests/firedrake/multigrid/test_transfer_manager.py diff --git a/tests/multigrid/test_two_poisson_gmg.py b/tests/firedrake/multigrid/test_two_poisson_gmg.py similarity index 100% rename from tests/multigrid/test_two_poisson_gmg.py rename to tests/firedrake/multigrid/test_two_poisson_gmg.py diff --git a/tests/output/conftest.py b/tests/firedrake/output/conftest.py similarity index 100% rename from tests/output/conftest.py rename to tests/firedrake/output/conftest.py diff --git a/tests/output/test_adjoint_disk_checkpointing.py b/tests/firedrake/output/test_adjoint_disk_checkpointing.py similarity index 100% rename from tests/output/test_adjoint_disk_checkpointing.py rename to tests/firedrake/output/test_adjoint_disk_checkpointing.py diff --git a/tests/output/test_dumb_checkpoint.py b/tests/firedrake/output/test_dumb_checkpoint.py similarity index 100% rename from tests/output/test_dumb_checkpoint.py rename to tests/firedrake/output/test_dumb_checkpoint.py diff --git a/tests/output/test_function_plotter.py b/tests/firedrake/output/test_function_plotter.py similarity index 100% rename from tests/output/test_function_plotter.py rename to tests/firedrake/output/test_function_plotter.py diff --git a/tests/output/test_hdf5file_checkpoint.py b/tests/firedrake/output/test_hdf5file_checkpoint.py similarity index 100% rename from tests/output/test_hdf5file_checkpoint.py rename to tests/firedrake/output/test_hdf5file_checkpoint.py diff --git a/tests/output/test_io_backward_compat.py b/tests/firedrake/output/test_io_backward_compat.py similarity index 100% rename from tests/output/test_io_backward_compat.py rename to tests/firedrake/output/test_io_backward_compat.py diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_hexahedron.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_hexahedron.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_hexahedron.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_hexahedron.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_interval_extruded_periodic.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_interval_extruded_periodic.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_interval_extruded_periodic.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_interval_extruded_periodic.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral_immersed.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral_immersed.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral_immersed.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_quadrilateral_immersed.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron_periodic.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron_periodic.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron_periodic.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_tetrahedron_periodic.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_timestepping.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_timestepping.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_timestepping.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_timestepping.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded_real.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded_real.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded_real.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_extruded_real.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_immersed.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_immersed.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_immersed.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_immersed.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_mixed.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_mixed.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_mixed.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_mixed.h5 diff --git a/tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_periodic.h5 b/tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_periodic.h5 similarity index 100% rename from tests/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_periodic.h5 rename to tests/firedrake/output/test_io_backward_compat_files/test_io_backward_compat_2024_01_27_triangle_periodic.h5 diff --git a/tests/output/test_io_freeze_distribution_permutation.py b/tests/firedrake/output/test_io_freeze_distribution_permutation.py similarity index 100% rename from tests/output/test_io_freeze_distribution_permutation.py rename to tests/firedrake/output/test_io_freeze_distribution_permutation.py diff --git a/tests/output/test_io_function.py b/tests/firedrake/output/test_io_function.py similarity index 100% rename from tests/output/test_io_function.py rename to tests/firedrake/output/test_io_function.py diff --git a/tests/output/test_io_mesh.py b/tests/firedrake/output/test_io_mesh.py similarity index 100% rename from tests/output/test_io_mesh.py rename to tests/firedrake/output/test_io_mesh.py diff --git a/tests/output/test_io_solve.py b/tests/firedrake/output/test_io_solve.py similarity index 100% rename from tests/output/test_io_solve.py rename to tests/firedrake/output/test_io_solve.py diff --git a/tests/output/test_io_timestepping.py b/tests/firedrake/output/test_io_timestepping.py similarity index 100% rename from tests/output/test_io_timestepping.py rename to tests/firedrake/output/test_io_timestepping.py diff --git a/tests/output/test_plotting.py b/tests/firedrake/output/test_plotting.py similarity index 100% rename from tests/output/test_plotting.py rename to tests/firedrake/output/test_plotting.py diff --git a/tests/output/test_pvd_output.py b/tests/firedrake/output/test_pvd_output.py similarity index 100% rename from tests/output/test_pvd_output.py rename to tests/firedrake/output/test_pvd_output.py diff --git a/tests/randomfunctiongen/test_randomfunction.py b/tests/firedrake/randomfunctiongen/test_randomfunction.py similarity index 100% rename from tests/randomfunctiongen/test_randomfunction.py rename to tests/firedrake/randomfunctiongen/test_randomfunction.py diff --git a/tests/randomfunctiongen/test_randomgen_compatibility.py b/tests/firedrake/randomfunctiongen/test_randomgen_compatibility.py similarity index 100% rename from tests/randomfunctiongen/test_randomgen_compatibility.py rename to tests/firedrake/randomfunctiongen/test_randomgen_compatibility.py diff --git a/tests/regression/test_2dcohomology.py b/tests/firedrake/regression/test_2dcohomology.py similarity index 100% rename from tests/regression/test_2dcohomology.py rename to tests/firedrake/regression/test_2dcohomology.py diff --git a/tests/regression/test_adjoint_bc.py b/tests/firedrake/regression/test_adjoint_bc.py similarity index 100% rename from tests/regression/test_adjoint_bc.py rename to tests/firedrake/regression/test_adjoint_bc.py diff --git a/tests/regression/test_adjoint_operators.py b/tests/firedrake/regression/test_adjoint_operators.py similarity index 100% rename from tests/regression/test_adjoint_operators.py rename to tests/firedrake/regression/test_adjoint_operators.py diff --git a/tests/regression/test_adv_diff.py b/tests/firedrake/regression/test_adv_diff.py similarity index 100% rename from tests/regression/test_adv_diff.py rename to tests/firedrake/regression/test_adv_diff.py diff --git a/tests/regression/test_adv_diff_nonsplit.py b/tests/firedrake/regression/test_adv_diff_nonsplit.py similarity index 100% rename from tests/regression/test_adv_diff_nonsplit.py rename to tests/firedrake/regression/test_adv_diff_nonsplit.py diff --git a/tests/regression/test_appctx_cleanup.py b/tests/firedrake/regression/test_appctx_cleanup.py similarity index 100% rename from tests/regression/test_appctx_cleanup.py rename to tests/firedrake/regression/test_appctx_cleanup.py diff --git a/tests/regression/test_assemble.py b/tests/firedrake/regression/test_assemble.py similarity index 100% rename from tests/regression/test_assemble.py rename to tests/firedrake/regression/test_assemble.py diff --git a/tests/regression/test_assemble_baseform.py b/tests/firedrake/regression/test_assemble_baseform.py similarity index 100% rename from tests/regression/test_assemble_baseform.py rename to tests/firedrake/regression/test_assemble_baseform.py diff --git a/tests/regression/test_auxiliary_dm.py b/tests/firedrake/regression/test_auxiliary_dm.py similarity index 100% rename from tests/regression/test_auxiliary_dm.py rename to tests/firedrake/regression/test_auxiliary_dm.py diff --git a/tests/regression/test_bcs.py b/tests/firedrake/regression/test_bcs.py similarity index 100% rename from tests/regression/test_bcs.py rename to tests/firedrake/regression/test_bcs.py diff --git a/tests/regression/test_bdmc.py b/tests/firedrake/regression/test_bdmc.py similarity index 100% rename from tests/regression/test_bdmc.py rename to tests/firedrake/regression/test_bdmc.py diff --git a/tests/regression/test_bdmc_riesz_map.py b/tests/firedrake/regression/test_bdmc_riesz_map.py similarity index 100% rename from tests/regression/test_bdmc_riesz_map.py rename to tests/firedrake/regression/test_bdmc_riesz_map.py diff --git a/tests/regression/test_bessel_functions.py b/tests/firedrake/regression/test_bessel_functions.py similarity index 100% rename from tests/regression/test_bessel_functions.py rename to tests/firedrake/regression/test_bessel_functions.py diff --git a/tests/regression/test_bubble.py b/tests/firedrake/regression/test_bubble.py similarity index 100% rename from tests/regression/test_bubble.py rename to tests/firedrake/regression/test_bubble.py diff --git a/tests/regression/test_cell_subdomains.py b/tests/firedrake/regression/test_cell_subdomains.py similarity index 100% rename from tests/regression/test_cell_subdomains.py rename to tests/firedrake/regression/test_cell_subdomains.py diff --git a/tests/regression/test_cellcoordinate.py b/tests/firedrake/regression/test_cellcoordinate.py similarity index 100% rename from tests/regression/test_cellcoordinate.py rename to tests/firedrake/regression/test_cellcoordinate.py diff --git a/tests/regression/test_cellorigin.py b/tests/firedrake/regression/test_cellorigin.py similarity index 100% rename from tests/regression/test_cellorigin.py rename to tests/firedrake/regression/test_cellorigin.py diff --git a/tests/regression/test_cellvolume.py b/tests/firedrake/regression/test_cellvolume.py similarity index 100% rename from tests/regression/test_cellvolume.py rename to tests/firedrake/regression/test_cellvolume.py diff --git a/tests/regression/test_change_coordinates.py b/tests/firedrake/regression/test_change_coordinates.py similarity index 100% rename from tests/regression/test_change_coordinates.py rename to tests/firedrake/regression/test_change_coordinates.py diff --git a/tests/regression/test_circle_manifold.py b/tests/firedrake/regression/test_circle_manifold.py similarity index 100% rename from tests/regression/test_circle_manifold.py rename to tests/firedrake/regression/test_circle_manifold.py diff --git a/tests/regression/test_coarse_nullspace.py b/tests/firedrake/regression/test_coarse_nullspace.py similarity index 100% rename from tests/regression/test_coarse_nullspace.py rename to tests/firedrake/regression/test_coarse_nullspace.py diff --git a/tests/regression/test_coefficient_derivatives.py b/tests/firedrake/regression/test_coefficient_derivatives.py similarity index 100% rename from tests/regression/test_coefficient_derivatives.py rename to tests/firedrake/regression/test_coefficient_derivatives.py diff --git a/tests/regression/test_cofunction.py b/tests/firedrake/regression/test_cofunction.py similarity index 100% rename from tests/regression/test_cofunction.py rename to tests/firedrake/regression/test_cofunction.py diff --git a/tests/regression/test_conditional.py b/tests/firedrake/regression/test_conditional.py similarity index 100% rename from tests/regression/test_conditional.py rename to tests/firedrake/regression/test_conditional.py diff --git a/tests/regression/test_constant.py b/tests/firedrake/regression/test_constant.py similarity index 100% rename from tests/regression/test_constant.py rename to tests/firedrake/regression/test_constant.py diff --git a/tests/regression/test_coordinatederivative.py b/tests/firedrake/regression/test_coordinatederivative.py similarity index 100% rename from tests/regression/test_coordinatederivative.py rename to tests/firedrake/regression/test_coordinatederivative.py diff --git a/tests/regression/test_custom_callbacks.py b/tests/firedrake/regression/test_custom_callbacks.py similarity index 100% rename from tests/regression/test_custom_callbacks.py rename to tests/firedrake/regression/test_custom_callbacks.py diff --git a/tests/regression/test_custom_pc_python_pmat.py b/tests/firedrake/regression/test_custom_pc_python_pmat.py similarity index 100% rename from tests/regression/test_custom_pc_python_pmat.py rename to tests/firedrake/regression/test_custom_pc_python_pmat.py diff --git a/tests/regression/test_dg_advection.py b/tests/firedrake/regression/test_dg_advection.py similarity index 100% rename from tests/regression/test_dg_advection.py rename to tests/firedrake/regression/test_dg_advection.py diff --git a/tests/regression/test_eigensolver.py b/tests/firedrake/regression/test_eigensolver.py similarity index 100% rename from tests/regression/test_eigensolver.py rename to tests/firedrake/regression/test_eigensolver.py diff --git a/tests/regression/test_element_mapping.py b/tests/firedrake/regression/test_element_mapping.py similarity index 100% rename from tests/regression/test_element_mapping.py rename to tests/firedrake/regression/test_element_mapping.py diff --git a/tests/regression/test_embedded_sphere.py b/tests/firedrake/regression/test_embedded_sphere.py similarity index 100% rename from tests/regression/test_embedded_sphere.py rename to tests/firedrake/regression/test_embedded_sphere.py diff --git a/tests/regression/test_ensembleparallelism.py b/tests/firedrake/regression/test_ensembleparallelism.py similarity index 98% rename from tests/regression/test_ensembleparallelism.py rename to tests/firedrake/regression/test_ensembleparallelism.py index 49db8e7d3f..23712cee51 100644 --- a/tests/regression/test_ensembleparallelism.py +++ b/tests/firedrake/regression/test_ensembleparallelism.py @@ -205,13 +205,13 @@ def test_ensemble_reduce(ensemble, mesh, W, urank, urank_sum, root, blocking): parallel_assert( lambda: error < 1e-12, subset=root_ranks, - msg=f"{error=:.5f}" + msg=f"{error = :.5f}" # noqa: E203, E251 ) error = errornorm(Function(W).assign(10), u_reduce) parallel_assert( lambda: error < 1e-12, subset={range(COMM_WORLD.size)} - root_ranks, - msg=f"{error=:.5f}" + msg=f"{error = :.5f}" # noqa: E203, E251 ) # check that u_reduce dat vector is still synchronised @@ -347,7 +347,7 @@ def test_send_and_recv(ensemble, mesh, W, blocking): parallel_assert( lambda: error < 1e-12, subset=root_ranks, - msg=f"{error=:.5f}" + msg=f"{error = :.5f}" # noqa: E203, E251 ) diff --git a/tests/regression/test_exodus_mesh.py b/tests/firedrake/regression/test_exodus_mesh.py similarity index 100% rename from tests/regression/test_exodus_mesh.py rename to tests/firedrake/regression/test_exodus_mesh.py diff --git a/tests/regression/test_expressions.py b/tests/firedrake/regression/test_expressions.py similarity index 100% rename from tests/regression/test_expressions.py rename to tests/firedrake/regression/test_expressions.py diff --git a/tests/regression/test_facet_avg.py b/tests/firedrake/regression/test_facet_avg.py similarity index 100% rename from tests/regression/test_facet_avg.py rename to tests/firedrake/regression/test_facet_avg.py diff --git a/tests/regression/test_facet_elements.py b/tests/firedrake/regression/test_facet_elements.py similarity index 100% rename from tests/regression/test_facet_elements.py rename to tests/firedrake/regression/test_facet_elements.py diff --git a/tests/regression/test_facet_normal.py b/tests/firedrake/regression/test_facet_normal.py similarity index 100% rename from tests/regression/test_facet_normal.py rename to tests/firedrake/regression/test_facet_normal.py diff --git a/tests/regression/test_facet_orientation.py b/tests/firedrake/regression/test_facet_orientation.py similarity index 100% rename from tests/regression/test_facet_orientation.py rename to tests/firedrake/regression/test_facet_orientation.py diff --git a/tests/regression/test_facet_split.py b/tests/firedrake/regression/test_facet_split.py similarity index 100% rename from tests/regression/test_facet_split.py rename to tests/firedrake/regression/test_facet_split.py diff --git a/tests/regression/test_facets.py b/tests/firedrake/regression/test_facets.py similarity index 100% rename from tests/regression/test_facets.py rename to tests/firedrake/regression/test_facets.py diff --git a/tests/regression/test_fas_snespatch.py b/tests/firedrake/regression/test_fas_snespatch.py similarity index 100% rename from tests/regression/test_fas_snespatch.py rename to tests/firedrake/regression/test_fas_snespatch.py diff --git a/tests/regression/test_fdm.py b/tests/firedrake/regression/test_fdm.py similarity index 100% rename from tests/regression/test_fdm.py rename to tests/firedrake/regression/test_fdm.py diff --git a/tests/regression/test_fieldsplit_breadcrumbs.py b/tests/firedrake/regression/test_fieldsplit_breadcrumbs.py similarity index 100% rename from tests/regression/test_fieldsplit_breadcrumbs.py rename to tests/firedrake/regression/test_fieldsplit_breadcrumbs.py diff --git a/tests/regression/test_fieldsplit_fieldsplit_aux_multigrid.py b/tests/firedrake/regression/test_fieldsplit_fieldsplit_aux_multigrid.py similarity index 100% rename from tests/regression/test_fieldsplit_fieldsplit_aux_multigrid.py rename to tests/firedrake/regression/test_fieldsplit_fieldsplit_aux_multigrid.py diff --git a/tests/regression/test_fieldsplit_split_reorder_bcs.py b/tests/firedrake/regression/test_fieldsplit_split_reorder_bcs.py similarity index 100% rename from tests/regression/test_fieldsplit_split_reorder_bcs.py rename to tests/firedrake/regression/test_fieldsplit_split_reorder_bcs.py diff --git a/tests/regression/test_fml.py b/tests/firedrake/regression/test_fml.py similarity index 100% rename from tests/regression/test_fml.py rename to tests/firedrake/regression/test_fml.py diff --git a/tests/regression/test_fs_caching.py b/tests/firedrake/regression/test_fs_caching.py similarity index 100% rename from tests/regression/test_fs_caching.py rename to tests/firedrake/regression/test_fs_caching.py diff --git a/tests/regression/test_function.py b/tests/firedrake/regression/test_function.py similarity index 100% rename from tests/regression/test_function.py rename to tests/firedrake/regression/test_function.py diff --git a/tests/regression/test_function_spaces.py b/tests/firedrake/regression/test_function_spaces.py similarity index 100% rename from tests/regression/test_function_spaces.py rename to tests/firedrake/regression/test_function_spaces.py diff --git a/tests/regression/test_garbage.py b/tests/firedrake/regression/test_garbage.py similarity index 100% rename from tests/regression/test_garbage.py rename to tests/firedrake/regression/test_garbage.py diff --git a/tests/regression/test_helmholtz.py b/tests/firedrake/regression/test_helmholtz.py similarity index 100% rename from tests/regression/test_helmholtz.py rename to tests/firedrake/regression/test_helmholtz.py diff --git a/tests/regression/test_helmholtz_bernstein.py b/tests/firedrake/regression/test_helmholtz_bernstein.py similarity index 100% rename from tests/regression/test_helmholtz_bernstein.py rename to tests/firedrake/regression/test_helmholtz_bernstein.py diff --git a/tests/regression/test_helmholtz_complex.py b/tests/firedrake/regression/test_helmholtz_complex.py similarity index 100% rename from tests/regression/test_helmholtz_complex.py rename to tests/firedrake/regression/test_helmholtz_complex.py diff --git a/tests/regression/test_helmholtz_mixed.py b/tests/firedrake/regression/test_helmholtz_mixed.py similarity index 100% rename from tests/regression/test_helmholtz_mixed.py rename to tests/firedrake/regression/test_helmholtz_mixed.py diff --git a/tests/regression/test_helmholtz_nonlinear_diffusion.py b/tests/firedrake/regression/test_helmholtz_nonlinear_diffusion.py similarity index 100% rename from tests/regression/test_helmholtz_nonlinear_diffusion.py rename to tests/firedrake/regression/test_helmholtz_nonlinear_diffusion.py diff --git a/tests/regression/test_helmholtz_serendipity.py b/tests/firedrake/regression/test_helmholtz_serendipity.py similarity index 100% rename from tests/regression/test_helmholtz_serendipity.py rename to tests/firedrake/regression/test_helmholtz_serendipity.py diff --git a/tests/regression/test_helmholtz_sphere.py b/tests/firedrake/regression/test_helmholtz_sphere.py similarity index 100% rename from tests/regression/test_helmholtz_sphere.py rename to tests/firedrake/regression/test_helmholtz_sphere.py diff --git a/tests/regression/test_helmholtz_zany.py b/tests/firedrake/regression/test_helmholtz_zany.py similarity index 100% rename from tests/regression/test_helmholtz_zany.py rename to tests/firedrake/regression/test_helmholtz_zany.py diff --git a/tests/regression/test_hypre_ads.py b/tests/firedrake/regression/test_hypre_ads.py similarity index 100% rename from tests/regression/test_hypre_ads.py rename to tests/firedrake/regression/test_hypre_ads.py diff --git a/tests/regression/test_hypre_ams.py b/tests/firedrake/regression/test_hypre_ams.py similarity index 100% rename from tests/regression/test_hypre_ams.py rename to tests/firedrake/regression/test_hypre_ams.py diff --git a/tests/regression/test_identity.py b/tests/firedrake/regression/test_identity.py similarity index 100% rename from tests/regression/test_identity.py rename to tests/firedrake/regression/test_identity.py diff --git a/tests/regression/test_import_applications.py b/tests/firedrake/regression/test_import_applications.py similarity index 100% rename from tests/regression/test_import_applications.py rename to tests/firedrake/regression/test_import_applications.py diff --git a/tests/regression/test_info.py b/tests/firedrake/regression/test_info.py similarity index 100% rename from tests/regression/test_info.py rename to tests/firedrake/regression/test_info.py diff --git a/tests/regression/test_integral_hex.py b/tests/firedrake/regression/test_integral_hex.py similarity index 100% rename from tests/regression/test_integral_hex.py rename to tests/firedrake/regression/test_integral_hex.py diff --git a/tests/regression/test_interior_bcs.py b/tests/firedrake/regression/test_interior_bcs.py similarity index 100% rename from tests/regression/test_interior_bcs.py rename to tests/firedrake/regression/test_interior_bcs.py diff --git a/tests/regression/test_interior_elements.py b/tests/firedrake/regression/test_interior_elements.py similarity index 100% rename from tests/regression/test_interior_elements.py rename to tests/firedrake/regression/test_interior_elements.py diff --git a/tests/regression/test_interior_facets.py b/tests/firedrake/regression/test_interior_facets.py similarity index 100% rename from tests/regression/test_interior_facets.py rename to tests/firedrake/regression/test_interior_facets.py diff --git a/tests/regression/test_interp_dual.py b/tests/firedrake/regression/test_interp_dual.py similarity index 100% rename from tests/regression/test_interp_dual.py rename to tests/firedrake/regression/test_interp_dual.py diff --git a/tests/regression/test_interpolate.py b/tests/firedrake/regression/test_interpolate.py similarity index 100% rename from tests/regression/test_interpolate.py rename to tests/firedrake/regression/test_interpolate.py diff --git a/tests/regression/test_interpolate_cross_mesh.py b/tests/firedrake/regression/test_interpolate_cross_mesh.py similarity index 100% rename from tests/regression/test_interpolate_cross_mesh.py rename to tests/firedrake/regression/test_interpolate_cross_mesh.py diff --git a/tests/regression/test_interpolate_p3intmoments.py b/tests/firedrake/regression/test_interpolate_p3intmoments.py similarity index 100% rename from tests/regression/test_interpolate_p3intmoments.py rename to tests/firedrake/regression/test_interpolate_p3intmoments.py diff --git a/tests/regression/test_interpolate_vs_project.py b/tests/firedrake/regression/test_interpolate_vs_project.py similarity index 100% rename from tests/regression/test_interpolate_vs_project.py rename to tests/firedrake/regression/test_interpolate_vs_project.py diff --git a/tests/regression/test_interpolate_zany.py b/tests/firedrake/regression/test_interpolate_zany.py similarity index 100% rename from tests/regression/test_interpolate_zany.py rename to tests/firedrake/regression/test_interpolate_zany.py diff --git a/tests/regression/test_interpolation_manual.py b/tests/firedrake/regression/test_interpolation_manual.py similarity index 100% rename from tests/regression/test_interpolation_manual.py rename to tests/firedrake/regression/test_interpolation_manual.py diff --git a/tests/regression/test_interpolation_nodes.py b/tests/firedrake/regression/test_interpolation_nodes.py similarity index 100% rename from tests/regression/test_interpolation_nodes.py rename to tests/firedrake/regression/test_interpolation_nodes.py diff --git a/tests/regression/test_ip_viscosity.py b/tests/firedrake/regression/test_ip_viscosity.py similarity index 100% rename from tests/regression/test_ip_viscosity.py rename to tests/firedrake/regression/test_ip_viscosity.py diff --git a/tests/regression/test_jacobian_invalidation.py b/tests/firedrake/regression/test_jacobian_invalidation.py similarity index 100% rename from tests/regression/test_jacobian_invalidation.py rename to tests/firedrake/regression/test_jacobian_invalidation.py diff --git a/tests/regression/test_jax_coupling.py b/tests/firedrake/regression/test_jax_coupling.py similarity index 100% rename from tests/regression/test_jax_coupling.py rename to tests/firedrake/regression/test_jax_coupling.py diff --git a/tests/regression/test_l2pullbacks.py b/tests/firedrake/regression/test_l2pullbacks.py similarity index 100% rename from tests/regression/test_l2pullbacks.py rename to tests/firedrake/regression/test_l2pullbacks.py diff --git a/tests/regression/test_line_smoother_periodic.py b/tests/firedrake/regression/test_line_smoother_periodic.py similarity index 100% rename from tests/regression/test_line_smoother_periodic.py rename to tests/firedrake/regression/test_line_smoother_periodic.py diff --git a/tests/regression/test_linear_solver_change_bc.py b/tests/firedrake/regression/test_linear_solver_change_bc.py similarity index 100% rename from tests/regression/test_linear_solver_change_bc.py rename to tests/firedrake/regression/test_linear_solver_change_bc.py diff --git a/tests/regression/test_linesmoother.py b/tests/firedrake/regression/test_linesmoother.py similarity index 100% rename from tests/regression/test_linesmoother.py rename to tests/firedrake/regression/test_linesmoother.py diff --git a/tests/regression/test_linesmoother_vfs.py b/tests/firedrake/regression/test_linesmoother_vfs.py similarity index 100% rename from tests/regression/test_linesmoother_vfs.py rename to tests/firedrake/regression/test_linesmoother_vfs.py diff --git a/tests/regression/test_load_mesh.py b/tests/firedrake/regression/test_load_mesh.py similarity index 100% rename from tests/regression/test_load_mesh.py rename to tests/firedrake/regression/test_load_mesh.py diff --git a/tests/regression/test_locate_cell.py b/tests/firedrake/regression/test_locate_cell.py similarity index 100% rename from tests/regression/test_locate_cell.py rename to tests/firedrake/regression/test_locate_cell.py diff --git a/tests/regression/test_manifolds.py b/tests/firedrake/regression/test_manifolds.py similarity index 98% rename from tests/regression/test_manifolds.py rename to tests/firedrake/regression/test_manifolds.py index 74fff831ec..312524baad 100644 --- a/tests/regression/test_manifolds.py +++ b/tests/firedrake/regression/test_manifolds.py @@ -1,10 +1,7 @@ -import firedrake_configuration from firedrake import * import pytest import numpy as np -config = firedrake_configuration.get_config() - # This test solves a mixed formulation of the Poisson equation with # inhomogeneous Neumann boundary conditions such that the exact diff --git a/tests/regression/test_manual_quadrature.py b/tests/firedrake/regression/test_manual_quadrature.py similarity index 100% rename from tests/regression/test_manual_quadrature.py rename to tests/firedrake/regression/test_manual_quadrature.py diff --git a/tests/regression/test_mark_entities.py b/tests/firedrake/regression/test_mark_entities.py similarity index 100% rename from tests/regression/test_mark_entities.py rename to tests/firedrake/regression/test_mark_entities.py diff --git a/tests/regression/test_mass_lumping.py b/tests/firedrake/regression/test_mass_lumping.py similarity index 100% rename from tests/regression/test_mass_lumping.py rename to tests/firedrake/regression/test_mass_lumping.py diff --git a/tests/regression/test_mat_type_dense.py b/tests/firedrake/regression/test_mat_type_dense.py similarity index 100% rename from tests/regression/test_mat_type_dense.py rename to tests/firedrake/regression/test_mat_type_dense.py diff --git a/tests/regression/test_matrix.py b/tests/firedrake/regression/test_matrix.py similarity index 100% rename from tests/regression/test_matrix.py rename to tests/firedrake/regression/test_matrix.py diff --git a/tests/regression/test_matrix_free.py b/tests/firedrake/regression/test_matrix_free.py similarity index 100% rename from tests/regression/test_matrix_free.py rename to tests/firedrake/regression/test_matrix_free.py diff --git a/tests/regression/test_matrix_prefix.py b/tests/firedrake/regression/test_matrix_prefix.py similarity index 100% rename from tests/regression/test_matrix_prefix.py rename to tests/firedrake/regression/test_matrix_prefix.py diff --git a/tests/regression/test_mesh_from_plex.py b/tests/firedrake/regression/test_mesh_from_plex.py similarity index 100% rename from tests/regression/test_mesh_from_plex.py rename to tests/firedrake/regression/test_mesh_from_plex.py diff --git a/tests/regression/test_mesh_generation.py b/tests/firedrake/regression/test_mesh_generation.py similarity index 100% rename from tests/regression/test_mesh_generation.py rename to tests/firedrake/regression/test_mesh_generation.py diff --git a/tests/regression/test_mesh_overlaps.py b/tests/firedrake/regression/test_mesh_overlaps.py similarity index 100% rename from tests/regression/test_mesh_overlaps.py rename to tests/firedrake/regression/test_mesh_overlaps.py diff --git a/tests/regression/test_mixed_interior_facets.py b/tests/firedrake/regression/test_mixed_interior_facets.py similarity index 100% rename from tests/regression/test_mixed_interior_facets.py rename to tests/firedrake/regression/test_mixed_interior_facets.py diff --git a/tests/regression/test_mixed_mats.py b/tests/firedrake/regression/test_mixed_mats.py similarity index 100% rename from tests/regression/test_mixed_mats.py rename to tests/firedrake/regression/test_mixed_mats.py diff --git a/tests/regression/test_mixed_tensor.py b/tests/firedrake/regression/test_mixed_tensor.py similarity index 100% rename from tests/regression/test_mixed_tensor.py rename to tests/firedrake/regression/test_mixed_tensor.py diff --git a/tests/regression/test_moore_spence.py b/tests/firedrake/regression/test_moore_spence.py similarity index 100% rename from tests/regression/test_moore_spence.py rename to tests/firedrake/regression/test_moore_spence.py diff --git a/tests/regression/test_mtw.py b/tests/firedrake/regression/test_mtw.py similarity index 100% rename from tests/regression/test_mtw.py rename to tests/firedrake/regression/test_mtw.py diff --git a/tests/regression/test_multiple_domains.py b/tests/firedrake/regression/test_multiple_domains.py similarity index 100% rename from tests/regression/test_multiple_domains.py rename to tests/firedrake/regression/test_multiple_domains.py diff --git a/tests/regression/test_nested_fieldsplit_solves.py b/tests/firedrake/regression/test_nested_fieldsplit_solves.py similarity index 100% rename from tests/regression/test_nested_fieldsplit_solves.py rename to tests/firedrake/regression/test_nested_fieldsplit_solves.py diff --git a/tests/regression/test_netgen.py b/tests/firedrake/regression/test_netgen.py similarity index 100% rename from tests/regression/test_netgen.py rename to tests/firedrake/regression/test_netgen.py diff --git a/tests/regression/test_nonlinear_helmholtz.py b/tests/firedrake/regression/test_nonlinear_helmholtz.py similarity index 100% rename from tests/regression/test_nonlinear_helmholtz.py rename to tests/firedrake/regression/test_nonlinear_helmholtz.py diff --git a/tests/regression/test_nonlinear_stokes_hdiv.py b/tests/firedrake/regression/test_nonlinear_stokes_hdiv.py similarity index 100% rename from tests/regression/test_nonlinear_stokes_hdiv.py rename to tests/firedrake/regression/test_nonlinear_stokes_hdiv.py diff --git a/tests/regression/test_norm.py b/tests/firedrake/regression/test_norm.py similarity index 100% rename from tests/regression/test_norm.py rename to tests/firedrake/regression/test_norm.py diff --git a/tests/regression/test_nullspace.py b/tests/firedrake/regression/test_nullspace.py similarity index 100% rename from tests/regression/test_nullspace.py rename to tests/firedrake/regression/test_nullspace.py diff --git a/tests/regression/test_octahedral_hemisphere.py b/tests/firedrake/regression/test_octahedral_hemisphere.py similarity index 100% rename from tests/regression/test_octahedral_hemisphere.py rename to tests/firedrake/regression/test_octahedral_hemisphere.py diff --git a/tests/regression/test_p1pc.py b/tests/firedrake/regression/test_p1pc.py similarity index 100% rename from tests/regression/test_p1pc.py rename to tests/firedrake/regression/test_p1pc.py diff --git a/tests/regression/test_par_loops.py b/tests/firedrake/regression/test_par_loops.py similarity index 100% rename from tests/regression/test_par_loops.py rename to tests/firedrake/regression/test_par_loops.py diff --git a/tests/regression/test_parallel_cr.py b/tests/firedrake/regression/test_parallel_cr.py similarity index 100% rename from tests/regression/test_parallel_cr.py rename to tests/firedrake/regression/test_parallel_cr.py diff --git a/tests/regression/test_parallel_kernel.py b/tests/firedrake/regression/test_parallel_kernel.py similarity index 100% rename from tests/regression/test_parallel_kernel.py rename to tests/firedrake/regression/test_parallel_kernel.py diff --git a/tests/regression/test_parameters.py b/tests/firedrake/regression/test_parameters.py similarity index 100% rename from tests/regression/test_parameters.py rename to tests/firedrake/regression/test_parameters.py diff --git a/tests/regression/test_partially_mixed_mat.py b/tests/firedrake/regression/test_partially_mixed_mat.py similarity index 100% rename from tests/regression/test_partially_mixed_mat.py rename to tests/firedrake/regression/test_partially_mixed_mat.py diff --git a/tests/regression/test_patch_pc.py b/tests/firedrake/regression/test_patch_pc.py similarity index 100% rename from tests/regression/test_patch_pc.py rename to tests/firedrake/regression/test_patch_pc.py diff --git a/tests/regression/test_patch_precompute_element_tensors.py b/tests/firedrake/regression/test_patch_precompute_element_tensors.py similarity index 100% rename from tests/regression/test_patch_precompute_element_tensors.py rename to tests/firedrake/regression/test_patch_precompute_element_tensors.py diff --git a/tests/regression/test_periodic_2d.py b/tests/firedrake/regression/test_periodic_2d.py similarity index 100% rename from tests/regression/test_periodic_2d.py rename to tests/firedrake/regression/test_periodic_2d.py diff --git a/tests/regression/test_periodic_interval_advection.py b/tests/firedrake/regression/test_periodic_interval_advection.py similarity index 100% rename from tests/regression/test_periodic_interval_advection.py rename to tests/firedrake/regression/test_periodic_interval_advection.py diff --git a/tests/regression/test_periodic_rectangle_advection.py b/tests/firedrake/regression/test_periodic_rectangle_advection.py similarity index 100% rename from tests/regression/test_periodic_rectangle_advection.py rename to tests/firedrake/regression/test_periodic_rectangle_advection.py diff --git a/tests/regression/test_piola_mixed_fn.py b/tests/firedrake/regression/test_piola_mixed_fn.py similarity index 100% rename from tests/regression/test_piola_mixed_fn.py rename to tests/firedrake/regression/test_piola_mixed_fn.py diff --git a/tests/regression/test_planesmoother.py b/tests/firedrake/regression/test_planesmoother.py similarity index 100% rename from tests/regression/test_planesmoother.py rename to tests/firedrake/regression/test_planesmoother.py diff --git a/tests/regression/test_point_eval_api.py b/tests/firedrake/regression/test_point_eval_api.py similarity index 100% rename from tests/regression/test_point_eval_api.py rename to tests/firedrake/regression/test_point_eval_api.py diff --git a/tests/regression/test_point_eval_cells.py b/tests/firedrake/regression/test_point_eval_cells.py similarity index 100% rename from tests/regression/test_point_eval_cells.py rename to tests/firedrake/regression/test_point_eval_cells.py diff --git a/tests/regression/test_point_eval_fs.py b/tests/firedrake/regression/test_point_eval_fs.py similarity index 100% rename from tests/regression/test_point_eval_fs.py rename to tests/firedrake/regression/test_point_eval_fs.py diff --git a/tests/regression/test_poisson_mixed_no_bcs.py b/tests/firedrake/regression/test_poisson_mixed_no_bcs.py similarity index 100% rename from tests/regression/test_poisson_mixed_no_bcs.py rename to tests/firedrake/regression/test_poisson_mixed_no_bcs.py diff --git a/tests/regression/test_poisson_mixed_strong_bcs.py b/tests/firedrake/regression/test_poisson_mixed_strong_bcs.py similarity index 100% rename from tests/regression/test_poisson_mixed_strong_bcs.py rename to tests/firedrake/regression/test_poisson_mixed_strong_bcs.py diff --git a/tests/regression/test_poisson_sphere.py b/tests/firedrake/regression/test_poisson_sphere.py similarity index 100% rename from tests/regression/test_poisson_sphere.py rename to tests/firedrake/regression/test_poisson_sphere.py diff --git a/tests/regression/test_poisson_strong_bcs.py b/tests/firedrake/regression/test_poisson_strong_bcs.py similarity index 100% rename from tests/regression/test_poisson_strong_bcs.py rename to tests/firedrake/regression/test_poisson_strong_bcs.py diff --git a/tests/regression/test_poisson_strong_bcs_nitsche.py b/tests/firedrake/regression/test_poisson_strong_bcs_nitsche.py similarity index 100% rename from tests/regression/test_poisson_strong_bcs_nitsche.py rename to tests/firedrake/regression/test_poisson_strong_bcs_nitsche.py diff --git a/tests/regression/test_project_interp_KMV.py b/tests/firedrake/regression/test_project_interp_KMV.py similarity index 96% rename from tests/regression/test_project_interp_KMV.py rename to tests/firedrake/regression/test_project_interp_KMV.py index 28ed787344..44ca5ec59c 100644 --- a/tests/regression/test_project_interp_KMV.py +++ b/tests/firedrake/regression/test_project_interp_KMV.py @@ -1,12 +1,9 @@ -import firedrake_configuration import pytest import numpy as np from firedrake import * from firedrake.__future__ import * import finat -config = firedrake_configuration.get_config() - @pytest.fixture(params=["square", "cube"]) def mesh_type(request): diff --git a/tests/regression/test_projection.py b/tests/firedrake/regression/test_projection.py similarity index 100% rename from tests/regression/test_projection.py rename to tests/firedrake/regression/test_projection.py diff --git a/tests/regression/test_projection_direct_serendipity.py b/tests/firedrake/regression/test_projection_direct_serendipity.py similarity index 100% rename from tests/regression/test_projection_direct_serendipity.py rename to tests/firedrake/regression/test_projection_direct_serendipity.py diff --git a/tests/regression/test_projection_symmetric_tensor.py b/tests/firedrake/regression/test_projection_symmetric_tensor.py similarity index 100% rename from tests/regression/test_projection_symmetric_tensor.py rename to tests/firedrake/regression/test_projection_symmetric_tensor.py diff --git a/tests/regression/test_projection_zany.py b/tests/firedrake/regression/test_projection_zany.py similarity index 100% rename from tests/regression/test_projection_zany.py rename to tests/firedrake/regression/test_projection_zany.py diff --git a/tests/regression/test_prolong_ncf_cube.py b/tests/firedrake/regression/test_prolong_ncf_cube.py similarity index 100% rename from tests/regression/test_prolong_ncf_cube.py rename to tests/firedrake/regression/test_prolong_ncf_cube.py diff --git a/tests/regression/test_pytorch_coupling.py b/tests/firedrake/regression/test_pytorch_coupling.py similarity index 100% rename from tests/regression/test_pytorch_coupling.py rename to tests/firedrake/regression/test_pytorch_coupling.py diff --git a/tests/regression/test_quadrature.py b/tests/firedrake/regression/test_quadrature.py similarity index 100% rename from tests/regression/test_quadrature.py rename to tests/firedrake/regression/test_quadrature.py diff --git a/tests/regression/test_real_space.py b/tests/firedrake/regression/test_real_space.py similarity index 100% rename from tests/regression/test_real_space.py rename to tests/firedrake/regression/test_real_space.py diff --git a/tests/regression/test_restricted_function_space.py b/tests/firedrake/regression/test_restricted_function_space.py similarity index 100% rename from tests/regression/test_restricted_function_space.py rename to tests/firedrake/regression/test_restricted_function_space.py diff --git a/tests/regression/test_scaled_mass.py b/tests/firedrake/regression/test_scaled_mass.py similarity index 100% rename from tests/regression/test_scaled_mass.py rename to tests/firedrake/regression/test_scaled_mass.py diff --git a/tests/regression/test_serendipity_biharmonic.py b/tests/firedrake/regression/test_serendipity_biharmonic.py similarity index 100% rename from tests/regression/test_serendipity_biharmonic.py rename to tests/firedrake/regression/test_serendipity_biharmonic.py diff --git a/tests/regression/test_slepc.py b/tests/firedrake/regression/test_slepc.py similarity index 100% rename from tests/regression/test_slepc.py rename to tests/firedrake/regression/test_slepc.py diff --git a/tests/regression/test_solver_error_checking.py b/tests/firedrake/regression/test_solver_error_checking.py similarity index 100% rename from tests/regression/test_solver_error_checking.py rename to tests/firedrake/regression/test_solver_error_checking.py diff --git a/tests/regression/test_solvers_options_prefix.py b/tests/firedrake/regression/test_solvers_options_prefix.py similarity index 100% rename from tests/regression/test_solvers_options_prefix.py rename to tests/firedrake/regression/test_solvers_options_prefix.py diff --git a/tests/regression/test_solving_interface.py b/tests/firedrake/regression/test_solving_interface.py similarity index 100% rename from tests/regression/test_solving_interface.py rename to tests/firedrake/regression/test_solving_interface.py diff --git a/tests/regression/test_split.py b/tests/firedrake/regression/test_split.py similarity index 100% rename from tests/regression/test_split.py rename to tests/firedrake/regression/test_split.py diff --git a/tests/regression/test_split_communicators.py b/tests/firedrake/regression/test_split_communicators.py similarity index 100% rename from tests/regression/test_split_communicators.py rename to tests/firedrake/regression/test_split_communicators.py diff --git a/tests/regression/test_star_pc.py b/tests/firedrake/regression/test_star_pc.py similarity index 100% rename from tests/regression/test_star_pc.py rename to tests/firedrake/regression/test_star_pc.py diff --git a/tests/regression/test_steady_advection_2D.py b/tests/firedrake/regression/test_steady_advection_2D.py similarity index 100% rename from tests/regression/test_steady_advection_2D.py rename to tests/firedrake/regression/test_steady_advection_2D.py diff --git a/tests/regression/test_steady_advection_3D.py b/tests/firedrake/regression/test_steady_advection_3D.py similarity index 100% rename from tests/regression/test_steady_advection_3D.py rename to tests/firedrake/regression/test_steady_advection_3D.py diff --git a/tests/regression/test_stokes_hdiv_parallel.py b/tests/firedrake/regression/test_stokes_hdiv_parallel.py similarity index 100% rename from tests/regression/test_stokes_hdiv_parallel.py rename to tests/firedrake/regression/test_stokes_hdiv_parallel.py diff --git a/tests/regression/test_stokes_mini.py b/tests/firedrake/regression/test_stokes_mini.py similarity index 100% rename from tests/regression/test_stokes_mini.py rename to tests/firedrake/regression/test_stokes_mini.py diff --git a/tests/regression/test_stress_elements.py b/tests/firedrake/regression/test_stress_elements.py similarity index 100% rename from tests/regression/test_stress_elements.py rename to tests/firedrake/regression/test_stress_elements.py diff --git a/tests/regression/test_subdomain.py b/tests/firedrake/regression/test_subdomain.py similarity index 100% rename from tests/regression/test_subdomain.py rename to tests/firedrake/regression/test_subdomain.py diff --git a/tests/regression/test_subdomain_integrals.py b/tests/firedrake/regression/test_subdomain_integrals.py similarity index 100% rename from tests/regression/test_subdomain_integrals.py rename to tests/firedrake/regression/test_subdomain_integrals.py diff --git a/tests/regression/test_taylor.py b/tests/firedrake/regression/test_taylor.py similarity index 100% rename from tests/regression/test_taylor.py rename to tests/firedrake/regression/test_taylor.py diff --git a/tests/regression/test_tensor_algebra.py b/tests/firedrake/regression/test_tensor_algebra.py similarity index 100% rename from tests/regression/test_tensor_algebra.py rename to tests/firedrake/regression/test_tensor_algebra.py diff --git a/tests/regression/test_tensor_elements.py b/tests/firedrake/regression/test_tensor_elements.py similarity index 100% rename from tests/regression/test_tensor_elements.py rename to tests/firedrake/regression/test_tensor_elements.py diff --git a/tests/regression/test_trace_galerkin_projection.py b/tests/firedrake/regression/test_trace_galerkin_projection.py similarity index 100% rename from tests/regression/test_trace_galerkin_projection.py rename to tests/firedrake/regression/test_trace_galerkin_projection.py diff --git a/tests/regression/test_ufl.py b/tests/firedrake/regression/test_ufl.py similarity index 100% rename from tests/regression/test_ufl.py rename to tests/firedrake/regression/test_ufl.py diff --git a/tests/regression/test_upwind_flux.py b/tests/firedrake/regression/test_upwind_flux.py similarity index 100% rename from tests/regression/test_upwind_flux.py rename to tests/firedrake/regression/test_upwind_flux.py diff --git a/tests/regression/test_variable_layers.py b/tests/firedrake/regression/test_variable_layers.py similarity index 100% rename from tests/regression/test_variable_layers.py rename to tests/firedrake/regression/test_variable_layers.py diff --git a/tests/regression/test_variants.py b/tests/firedrake/regression/test_variants.py similarity index 100% rename from tests/regression/test_variants.py rename to tests/firedrake/regression/test_variants.py diff --git a/tests/regression/test_vector.py b/tests/firedrake/regression/test_vector.py similarity index 100% rename from tests/regression/test_vector.py rename to tests/firedrake/regression/test_vector.py diff --git a/tests/regression/test_vector_laplace_on_quadrilaterals.py b/tests/firedrake/regression/test_vector_laplace_on_quadrilaterals.py similarity index 100% rename from tests/regression/test_vector_laplace_on_quadrilaterals.py rename to tests/firedrake/regression/test_vector_laplace_on_quadrilaterals.py diff --git a/tests/regression/test_vertex_based_limiter.py b/tests/firedrake/regression/test_vertex_based_limiter.py similarity index 100% rename from tests/regression/test_vertex_based_limiter.py rename to tests/firedrake/regression/test_vertex_based_limiter.py diff --git a/tests/regression/test_vfs_component_bcs.py b/tests/firedrake/regression/test_vfs_component_bcs.py similarity index 100% rename from tests/regression/test_vfs_component_bcs.py rename to tests/firedrake/regression/test_vfs_component_bcs.py diff --git a/tests/regression/test_work_functions.py b/tests/firedrake/regression/test_work_functions.py similarity index 100% rename from tests/regression/test_work_functions.py rename to tests/firedrake/regression/test_work_functions.py diff --git a/tests/regression/test_zero_forms.py b/tests/firedrake/regression/test_zero_forms.py similarity index 100% rename from tests/regression/test_zero_forms.py rename to tests/firedrake/regression/test_zero_forms.py diff --git a/tests/regression/test_zero_integrand.py b/tests/firedrake/regression/test_zero_integrand.py similarity index 100% rename from tests/regression/test_zero_integrand.py rename to tests/firedrake/regression/test_zero_integrand.py diff --git a/tests/slate/conftest.py b/tests/firedrake/slate/conftest.py similarity index 85% rename from tests/slate/conftest.py rename to tests/firedrake/slate/conftest.py index bfc31bad38..c2e6a7d9c9 100644 --- a/tests/slate/conftest.py +++ b/tests/firedrake/slate/conftest.py @@ -5,7 +5,7 @@ def pytest_collection_modifyitems(session, config, items): from firedrake.utils import complex_mode, SLATE_SUPPORTS_COMPLEX for item in items: test_file, *_ = item.location - if not test_file.startswith("tests/slate/"): + if not test_file.startswith("tests/firedrake/slate/"): continue if not SLATE_SUPPORTS_COMPLEX and complex_mode: item.add_marker(pytest.mark.skip(reason="Slate support for complex mode is missing")) diff --git a/tests/slate/script_logging.py b/tests/firedrake/slate/script_logging.py similarity index 100% rename from tests/slate/script_logging.py rename to tests/firedrake/slate/script_logging.py diff --git a/tests/slate/test_assemble_tensors.py b/tests/firedrake/slate/test_assemble_tensors.py similarity index 100% rename from tests/slate/test_assemble_tensors.py rename to tests/firedrake/slate/test_assemble_tensors.py diff --git a/tests/slate/test_cg_poisson.py b/tests/firedrake/slate/test_cg_poisson.py similarity index 100% rename from tests/slate/test_cg_poisson.py rename to tests/firedrake/slate/test_cg_poisson.py diff --git a/tests/slate/test_darcy_hybridized_mixed.py b/tests/firedrake/slate/test_darcy_hybridized_mixed.py similarity index 100% rename from tests/slate/test_darcy_hybridized_mixed.py rename to tests/firedrake/slate/test_darcy_hybridized_mixed.py diff --git a/tests/slate/test_facet_tensors.py b/tests/firedrake/slate/test_facet_tensors.py similarity index 100% rename from tests/slate/test_facet_tensors.py rename to tests/firedrake/slate/test_facet_tensors.py diff --git a/tests/slate/test_facet_tensors_extr.py b/tests/firedrake/slate/test_facet_tensors_extr.py similarity index 100% rename from tests/slate/test_facet_tensors_extr.py rename to tests/firedrake/slate/test_facet_tensors_extr.py diff --git a/tests/slate/test_hdg_poisson.py b/tests/firedrake/slate/test_hdg_poisson.py similarity index 100% rename from tests/slate/test_hdg_poisson.py rename to tests/firedrake/slate/test_hdg_poisson.py diff --git a/tests/slate/test_hybrid_poisson_sphere.py b/tests/firedrake/slate/test_hybrid_poisson_sphere.py similarity index 100% rename from tests/slate/test_hybrid_poisson_sphere.py rename to tests/firedrake/slate/test_hybrid_poisson_sphere.py diff --git a/tests/slate/test_linear_algebra.py b/tests/firedrake/slate/test_linear_algebra.py similarity index 100% rename from tests/slate/test_linear_algebra.py rename to tests/firedrake/slate/test_linear_algebra.py diff --git a/tests/slate/test_local_logging.py b/tests/firedrake/slate/test_local_logging.py similarity index 100% rename from tests/slate/test_local_logging.py rename to tests/firedrake/slate/test_local_logging.py diff --git a/tests/slate/test_matrix_free_hybridization.py b/tests/firedrake/slate/test_matrix_free_hybridization.py similarity index 100% rename from tests/slate/test_matrix_free_hybridization.py rename to tests/firedrake/slate/test_matrix_free_hybridization.py diff --git a/tests/slate/test_mimetic.py b/tests/firedrake/slate/test_mimetic.py similarity index 100% rename from tests/slate/test_mimetic.py rename to tests/firedrake/slate/test_mimetic.py diff --git a/tests/slate/test_optimise.py b/tests/firedrake/slate/test_optimise.py similarity index 100% rename from tests/slate/test_optimise.py rename to tests/firedrake/slate/test_optimise.py diff --git a/tests/slate/test_orientations.py b/tests/firedrake/slate/test_orientations.py similarity index 100% rename from tests/slate/test_orientations.py rename to tests/firedrake/slate/test_orientations.py diff --git a/tests/slate/test_scalar_tensors.py b/tests/firedrake/slate/test_scalar_tensors.py similarity index 100% rename from tests/slate/test_scalar_tensors.py rename to tests/firedrake/slate/test_scalar_tensors.py diff --git a/tests/slate/test_scalar_tensors_extr.py b/tests/firedrake/slate/test_scalar_tensors_extr.py similarity index 100% rename from tests/slate/test_scalar_tensors_extr.py rename to tests/firedrake/slate/test_scalar_tensors_extr.py diff --git a/tests/slate/test_slac.py b/tests/firedrake/slate/test_slac.py similarity index 100% rename from tests/slate/test_slac.py rename to tests/firedrake/slate/test_slac.py diff --git a/tests/slate/test_slac_parallel.py b/tests/firedrake/slate/test_slac_parallel.py similarity index 100% rename from tests/slate/test_slac_parallel.py rename to tests/firedrake/slate/test_slac_parallel.py diff --git a/tests/slate/test_slate_hybridization.py b/tests/firedrake/slate/test_slate_hybridization.py similarity index 100% rename from tests/slate/test_slate_hybridization.py rename to tests/firedrake/slate/test_slate_hybridization.py diff --git a/tests/slate/test_slate_hybridization_extr.py b/tests/firedrake/slate/test_slate_hybridization_extr.py similarity index 100% rename from tests/slate/test_slate_hybridization_extr.py rename to tests/firedrake/slate/test_slate_hybridization_extr.py diff --git a/tests/slate/test_slate_hybridized_mixed_bcs.py b/tests/firedrake/slate/test_slate_hybridized_mixed_bcs.py similarity index 100% rename from tests/slate/test_slate_hybridized_mixed_bcs.py rename to tests/firedrake/slate/test_slate_hybridized_mixed_bcs.py diff --git a/tests/slate/test_slate_infrastructure.py b/tests/firedrake/slate/test_slate_infrastructure.py similarity index 100% rename from tests/slate/test_slate_infrastructure.py rename to tests/firedrake/slate/test_slate_infrastructure.py diff --git a/tests/slate/test_slate_mixed_direct.py b/tests/firedrake/slate/test_slate_mixed_direct.py similarity index 100% rename from tests/slate/test_slate_mixed_direct.py rename to tests/firedrake/slate/test_slate_mixed_direct.py diff --git a/tests/slate/test_subdomains.py b/tests/firedrake/slate/test_subdomains.py similarity index 100% rename from tests/slate/test_subdomains.py rename to tests/firedrake/slate/test_subdomains.py diff --git a/tests/slate/test_unaryop_precedence.py b/tests/firedrake/slate/test_unaryop_precedence.py similarity index 100% rename from tests/slate/test_unaryop_precedence.py rename to tests/firedrake/slate/test_unaryop_precedence.py diff --git a/tests/slate/test_variational_prb.py b/tests/firedrake/slate/test_variational_prb.py similarity index 100% rename from tests/slate/test_variational_prb.py rename to tests/firedrake/slate/test_variational_prb.py diff --git a/tests/slate/test_zany_element_tensors.py b/tests/firedrake/slate/test_zany_element_tensors.py similarity index 100% rename from tests/slate/test_zany_element_tensors.py rename to tests/firedrake/slate/test_zany_element_tensors.py diff --git a/tests/submesh/test_submesh_interpolate.py b/tests/firedrake/submesh/test_submesh_interpolate.py similarity index 100% rename from tests/submesh/test_submesh_interpolate.py rename to tests/firedrake/submesh/test_submesh_interpolate.py diff --git a/tests/supermesh/test_assemble_mixed_mass_matrix.py b/tests/firedrake/supermesh/test_assemble_mixed_mass_matrix.py similarity index 100% rename from tests/supermesh/test_assemble_mixed_mass_matrix.py rename to tests/firedrake/supermesh/test_assemble_mixed_mass_matrix.py diff --git a/tests/supermesh/test_galerkin_projection.py b/tests/firedrake/supermesh/test_galerkin_projection.py similarity index 100% rename from tests/supermesh/test_galerkin_projection.py rename to tests/firedrake/supermesh/test_galerkin_projection.py diff --git a/tests/supermesh/test_intersection_finder_nested.py b/tests/firedrake/supermesh/test_intersection_finder_nested.py similarity index 100% rename from tests/supermesh/test_intersection_finder_nested.py rename to tests/firedrake/supermesh/test_intersection_finder_nested.py diff --git a/tests/supermesh/test_nonnested_project.py b/tests/firedrake/supermesh/test_nonnested_project.py similarity index 100% rename from tests/supermesh/test_nonnested_project.py rename to tests/firedrake/supermesh/test_nonnested_project.py diff --git a/tests/supermesh/test_nonnested_project_no_hierarchy.py b/tests/firedrake/supermesh/test_nonnested_project_no_hierarchy.py similarity index 100% rename from tests/supermesh/test_nonnested_project_no_hierarchy.py rename to tests/firedrake/supermesh/test_nonnested_project_no_hierarchy.py diff --git a/tests/supermesh/test_periodic.py b/tests/firedrake/supermesh/test_periodic.py similarity index 100% rename from tests/supermesh/test_periodic.py rename to tests/firedrake/supermesh/test_periodic.py diff --git a/tests/test_0init.py b/tests/firedrake/test_0init.py similarity index 70% rename from tests/test_0init.py rename to tests/firedrake/test_0init.py index 99f82e49c5..82d1a395ef 100644 --- a/tests/test_0init.py +++ b/tests/firedrake/test_0init.py @@ -1,6 +1,7 @@ import pytest import os from firedrake import * +from pathlib import Path # See https://pytest-xdist.readthedocs.io/en/stable/how-to.html#identifying-the-worker-process-during-a-test @@ -26,19 +27,7 @@ def test_pyop2_custom_init(): op2.configuration.reset() -def test_int_type(): - import firedrake_configuration - from firedrake.utils import IntType - - expected = firedrake_configuration.get_config()["options"]["petsc_int_type"] - actual = {4: "int32", 8: "int64"}[IntType.itemsize] - - assert expected == actual - - def test_pyop2_cache_dir_set_correctly(): - from firedrake_configuration import get_config - - config = get_config() - cache_dir = os.path.join(config["options"]["cache_dir"], "pyop2") + root = Path(os.environ.get("VIRTUAL_ENV", "~")).joinpath(".cache") + cache_dir = os.environ.get("PYOP2_CACHE_DIR", str(root.joinpath("pyop2"))) assert op2.configuration["cache_dir"] == cache_dir diff --git a/tests/test_tsfc_interface.py b/tests/firedrake/test_tsfc_interface.py similarity index 100% rename from tests/test_tsfc_interface.py rename to tests/firedrake/test_tsfc_interface.py diff --git a/tests/unit/test_fml/test_label.py b/tests/firedrake/unit/test_fml/test_label.py similarity index 100% rename from tests/unit/test_fml/test_label.py rename to tests/firedrake/unit/test_fml/test_label.py diff --git a/tests/unit/test_fml/test_label_map.py b/tests/firedrake/unit/test_fml/test_label_map.py similarity index 100% rename from tests/unit/test_fml/test_label_map.py rename to tests/firedrake/unit/test_fml/test_label_map.py diff --git a/tests/unit/test_fml/test_labelled_form.py b/tests/firedrake/unit/test_fml/test_labelled_form.py similarity index 100% rename from tests/unit/test_fml/test_labelled_form.py rename to tests/firedrake/unit/test_fml/test_labelled_form.py diff --git a/tests/unit/test_fml/test_replace_perp.py b/tests/firedrake/unit/test_fml/test_replace_perp.py similarity index 100% rename from tests/unit/test_fml/test_replace_perp.py rename to tests/firedrake/unit/test_fml/test_replace_perp.py diff --git a/tests/unit/test_fml/test_replacement.py b/tests/firedrake/unit/test_fml/test_replacement.py similarity index 100% rename from tests/unit/test_fml/test_replacement.py rename to tests/firedrake/unit/test_fml/test_replacement.py diff --git a/tests/unit/test_fml/test_term.py b/tests/firedrake/unit/test_fml/test_term.py similarity index 100% rename from tests/unit/test_fml/test_term.py rename to tests/firedrake/unit/test_fml/test_term.py diff --git a/tests/unit/test_utils/test_uid.py b/tests/firedrake/unit/test_utils/test_uid.py similarity index 100% rename from tests/unit/test_utils/test_uid.py rename to tests/firedrake/unit/test_utils/test_uid.py diff --git a/tests/vertexonly/test_different_comms.py b/tests/firedrake/vertexonly/test_different_comms.py similarity index 100% rename from tests/vertexonly/test_different_comms.py rename to tests/firedrake/vertexonly/test_different_comms.py diff --git a/tests/vertexonly/test_interpolation_from_parent.py b/tests/firedrake/vertexonly/test_interpolation_from_parent.py similarity index 100% rename from tests/vertexonly/test_interpolation_from_parent.py rename to tests/firedrake/vertexonly/test_interpolation_from_parent.py diff --git a/tests/vertexonly/test_point_eval_immersed_manifold.py b/tests/firedrake/vertexonly/test_point_eval_immersed_manifold.py similarity index 100% rename from tests/vertexonly/test_point_eval_immersed_manifold.py rename to tests/firedrake/vertexonly/test_point_eval_immersed_manifold.py diff --git a/tests/vertexonly/test_poisson_inverse_conductivity.py b/tests/firedrake/vertexonly/test_poisson_inverse_conductivity.py similarity index 100% rename from tests/vertexonly/test_poisson_inverse_conductivity.py rename to tests/firedrake/vertexonly/test_poisson_inverse_conductivity.py diff --git a/tests/vertexonly/test_swarm.py b/tests/firedrake/vertexonly/test_swarm.py similarity index 100% rename from tests/vertexonly/test_swarm.py rename to tests/firedrake/vertexonly/test_swarm.py diff --git a/tests/vertexonly/test_vertex_only_fs.py b/tests/firedrake/vertexonly/test_vertex_only_fs.py similarity index 100% rename from tests/vertexonly/test_vertex_only_fs.py rename to tests/firedrake/vertexonly/test_vertex_only_fs.py diff --git a/tests/vertexonly/test_vertex_only_manual.py b/tests/firedrake/vertexonly/test_vertex_only_manual.py similarity index 100% rename from tests/vertexonly/test_vertex_only_manual.py rename to tests/firedrake/vertexonly/test_vertex_only_manual.py diff --git a/tests/vertexonly/test_vertex_only_mesh_generation.py b/tests/firedrake/vertexonly/test_vertex_only_mesh_generation.py similarity index 100% rename from tests/vertexonly/test_vertex_only_mesh_generation.py rename to tests/firedrake/vertexonly/test_vertex_only_mesh_generation.py diff --git a/tests/output/test_config_exist.py b/tests/output/test_config_exist.py deleted file mode 100644 index bfc0a37145..0000000000 --- a/tests/output/test_config_exist.py +++ /dev/null @@ -1,4 +0,0 @@ -def test_config_exist(): - import firedrake_configuration - config = firedrake_configuration.get_config() - assert config is not None diff --git a/tests/pyop2/test_api.py b/tests/pyop2/test_api.py new file mode 100644 index 0000000000..468d175587 --- /dev/null +++ b/tests/pyop2/test_api.py @@ -0,0 +1,1619 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +User API Unit Tests +""" + +import pytest +import numpy as np +from numpy.testing import assert_equal + +from pyop2 import exceptions, op2 +from pyop2.mpi import COMM_WORLD + + +@pytest.fixture +def set(): + return op2.Set(5, 'foo') + + +@pytest.fixture +def iterset(): + return op2.Set(2, 'iterset') + + +@pytest.fixture +def toset(): + return op2.Set(3, 'toset') + + +@pytest.fixture +def sets(set, iterset, toset): + return set, iterset, toset + + +@pytest.fixture +def mset(sets): + return op2.MixedSet(sets) + + +@pytest.fixture(params=['sets', 'mset', 'gen']) +def msets(sets, mset, request): + return {'sets': sets, 'mset': mset, 'gen': iter(sets)}[request.param] + + +@pytest.fixture(params=[1, 2, (2, 3)]) +def dset(request, set): + return op2.DataSet(set, request.param, 'dfoo') + + +@pytest.fixture +def diterset(iterset): + return op2.DataSet(iterset, 1, 'diterset') + + +@pytest.fixture +def dtoset(toset): + return op2.DataSet(toset, 1, 'dtoset') + + +@pytest.fixture +def dsets(dset, diterset, dtoset): + return dset, diterset, dtoset + + +@pytest.fixture +def mdset(dsets): + return op2.MixedDataSet(dsets) + + +@pytest.fixture +def dat(dtoset): + return op2.Dat(dtoset, np.arange(dtoset.cdim * dtoset.size, dtype=np.int32)) + + +@pytest.fixture +def dats(dtoset, dset): + return op2.Dat(dtoset), op2.Dat(dset) + + +@pytest.fixture +def mdat(dats): + return op2.MixedDat(dats) + + +@pytest.fixture +def m_iterset_toset(iterset, toset): + return op2.Map(iterset, toset, 2, [1] * 2 * iterset.size, 'm_iterset_toset') + + +@pytest.fixture +def m_iterset_set(iterset, set): + return op2.Map(iterset, set, 2, [1] * 2 * iterset.size, 'm_iterset_set') + + +@pytest.fixture +def m_set_toset(set, toset): + return op2.Map(set, toset, 1, [1] * set.size, 'm_set_toset') + + +@pytest.fixture +def m_set_set(set): + return op2.Map(set, set, 1, [1] * set.size, 'm_set_set') + + +@pytest.fixture +def maps(m_iterset_toset, m_iterset_set): + return m_iterset_toset, m_iterset_set + + +@pytest.fixture +def mmap(maps): + return op2.MixedMap(maps) + + +@pytest.fixture +def mds(dtoset, set): + return op2.MixedDataSet((dtoset, set)) + + +# pytest doesn't currently support using fixtures are paramters to tests +# or other fixtures. We have to work around that by requesting fixtures +# by name +@pytest.fixture(params=[('mds', 'mds', 'mmap', 'mmap'), + ('mds', 'dtoset', 'mmap', 'm_iterset_toset'), + ('dtoset', 'mds', 'm_iterset_toset', 'mmap')]) +def ms(request): + rds, cds, rmm, cmm = [request.getfixturevalue(p) for p in request.param] + return op2.Sparsity((rds, cds), {(i, j): [(rm, cm, None)] for i, rm in enumerate(rmm) for j, cm in enumerate(cmm)}) + + +@pytest.fixture +def sparsity(m_iterset_toset, dtoset): + return op2.Sparsity((dtoset, dtoset), [(m_iterset_toset, m_iterset_toset, None)]) + + +@pytest.fixture +def mat(sparsity): + return op2.Mat(sparsity) + + +@pytest.fixture +def diag_mat(toset): + _d = toset ** 1 + _m = op2.Map(toset, toset, 1, np.arange(toset.size)) + return op2.Mat(op2.Sparsity((_d, _d), [(_m, _m, None)])) + + +@pytest.fixture +def mmat(ms): + return op2.Mat(ms) + + +@pytest.fixture +def g(): + return op2.Global(1, 1, comm=COMM_WORLD) + + +class TestClassAPI: + + """Do PyOP2 classes behave like normal classes?""" + + def test_isinstance(self, set, dat): + "isinstance should behave as expected." + assert isinstance(set, op2.Set) + assert isinstance(dat, op2.Dat) + assert not isinstance(set, op2.Dat) + assert not isinstance(dat, op2.Set) + + def test_issubclass(self, set, dat): + "issubclass should behave as expected" + assert issubclass(type(set), op2.Set) + assert issubclass(type(dat), op2.Dat) + assert not issubclass(type(set), op2.Dat) + assert not issubclass(type(dat), op2.Set) + + +class TestSetAPI: + + """ + Set API unit tests + """ + + def test_set_illegal_size(self): + "Set size should be int." + with pytest.raises(exceptions.SizeTypeError): + op2.Set('illegalsize') + + def test_set_illegal_name(self): + "Set name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.Set(1, 2) + + def test_set_iter(self, set): + "Set should be iterable and yield self." + for s in set: + assert s is set + + def test_set_len(self, set): + "Set len should be 1." + assert len(set) == 1 + + def test_set_repr(self, set): + "Set repr should produce a Set object when eval'd." + from pyop2.op2 import Set # noqa: needed by eval + assert isinstance(eval(repr(set)), op2.Set) + + def test_set_str(self, set): + "Set should have the expected string representation." + assert str(set) == "OP2 Set: %s with size %s" % (set.name, set.size) + + def test_set_eq(self, set): + "The equality test for sets is identity, not attribute equality" + assert set == set + assert not set != set + + def test_dset_in_set(self, set, dset): + "The in operator should indicate compatibility of DataSet and Set" + assert dset in set + + def test_dset_not_in_set(self, dset): + "The in operator should indicate incompatibility of DataSet and Set" + assert dset not in op2.Set(5, 'bar') + + def test_set_exponentiation_builds_dset(self, set): + "The exponentiation operator should build a DataSet" + dset = set ** 1 + assert isinstance(dset, op2.DataSet) + assert dset.cdim == 1 + + dset = set ** 3 + assert dset.cdim == 3 + + +class TestExtrudedSetAPI: + """ + ExtrudedSet API tests + """ + def test_illegal_layers_arg(self, set): + """Must pass at least 2 as a layers argument""" + with pytest.raises(exceptions.SizeTypeError): + op2.ExtrudedSet(set, 1) + + def test_illegal_set_arg(self): + """Extuded Set should be build on a Set""" + with pytest.raises(TypeError): + op2.ExtrudedSet(1, 3) + + def test_set_compatiblity(self, set, iterset): + """The set an extruded set was built on should be contained in it""" + e = op2.ExtrudedSet(set, 5) + assert set in e + assert iterset not in e + + def test_iteration_compatibility(self, iterset, m_iterset_toset, m_iterset_set, dats): + """It should be possible to iterate over an extruded set reading dats + defined on the base set (indirectly).""" + e = op2.ExtrudedSet(iterset, 5) + k = op2.Kernel('static void k() { }', 'k') + dat1, dat2 = dats + op2.par_loop(k, e, dat1(op2.READ, m_iterset_toset)) + op2.par_loop(k, e, dat2(op2.READ, m_iterset_set)) + + def test_iteration_incompatibility(self, set, m_iterset_toset, dat): + """It should not be possible to iteratve over an extruded set reading + dats not defined on the base set (indirectly).""" + e = op2.ExtrudedSet(set, 5) + k = op2.Kernel('static void k() { }', 'k') + with pytest.raises(exceptions.MapValueError): + op2.ParLoop(k, e, dat(op2.READ, m_iterset_toset)) + + +class TestSubsetAPI: + """ + Subset API unit tests + """ + + def test_illegal_set_arg(self): + "The subset constructor checks arguments." + with pytest.raises(TypeError): + op2.Subset("fail", [0, 1]) + + def test_out_of_bounds_index(self, set): + "The subset constructor checks indices are correct." + with pytest.raises(exceptions.SubsetIndexOutOfBounds): + op2.Subset(set, list(range(set.total_size + 1))) + + def test_invalid_index(self, set): + "The subset constructor checks indices are correct." + with pytest.raises(exceptions.SubsetIndexOutOfBounds): + op2.Subset(set, [-1]) + + def test_empty_subset(self, set): + "Subsets can be empty." + ss = op2.Subset(set, []) + assert len(ss.indices) == 0 + + def test_index_construction(self, set): + "We should be able to construct a Subset by indexing a Set." + ss = set(0, 1) + ss2 = op2.Subset(set, [0, 1]) + assert_equal(ss.indices, ss2.indices) + + ss = set(0) + ss2 = op2.Subset(set, [0]) + assert_equal(ss.indices, ss2.indices) + + ss = set(np.arange(5)) + ss2 = op2.Subset(set, np.arange(5)) + assert_equal(ss.indices, ss2.indices) + + def test_indices_duplicate_removed(self, set): + "The subset constructor voids duplicate indices)" + ss = op2.Subset(set, [0, 0, 1, 1]) + assert np.sum(ss.indices == 0) == 1 + assert np.sum(ss.indices == 1) == 1 + + def test_indices_sorted(self, set): + "The subset constructor sorts indices)" + ss = op2.Subset(set, [0, 4, 1, 2, 3]) + assert_equal(ss.indices, list(range(5))) + + ss2 = op2.Subset(set, list(range(5))) + assert_equal(ss.indices, ss2.indices) + + +class TestMixedSetAPI: + + """ + MixedSet API unit tests + """ + + def test_mixed_set_illegal_set(self): + "MixedSet sets should be of type Set." + with pytest.raises(TypeError): + op2.MixedSet(('foo', 'bar')) + + def test_mixed_set_getitem(self, sets): + "MixedSet should return the corresponding Set when indexed." + mset = op2.MixedSet(sets) + for i, s in enumerate(sets): + assert mset[i] == s + + def test_mixed_set_split(self, sets): + "MixedSet split should return a tuple of the Sets." + assert op2.MixedSet(sets).split == sets + + def test_mixed_set_core_size(self, mset): + "MixedSet core_size should return the sum of the Set core_sizes." + assert mset.core_size == sum(s.core_size for s in mset) + + def test_mixed_set_size(self, mset): + "MixedSet size should return the sum of the Set sizes." + assert mset.size == sum(s.size for s in mset) + + def test_mixed_set_total_size(self, mset): + "MixedSet total_size should return the sum of the Set total_sizes." + assert mset.total_size == sum(s.total_size for s in mset) + + def test_mixed_set_sizes(self, mset): + "MixedSet sizes should return a tuple of the Set sizes." + assert mset.sizes == (mset.core_size, mset.size, mset.total_size) + + def test_mixed_set_name(self, mset): + "MixedSet name should return a tuple of the Set names." + assert mset.name == tuple(s.name for s in mset) + + def test_mixed_set_halo(self, mset): + "MixedSet halo should be None when running sequentially." + assert mset.halo is None + + def test_mixed_set_layers(self, mset): + "MixedSet layers should return the layers of the first Set." + assert mset.layers == mset[0].layers + + def test_mixed_set_layers_must_match(self, sets): + "All components of a MixedSet must have the same number of layers." + sets = [op2.ExtrudedSet(s, layers=i+4) for i, s in enumerate(sets)] + with pytest.raises(AssertionError): + op2.MixedSet(sets) + + def test_mixed_set_iter(self, mset, sets): + "MixedSet should be iterable and yield the Sets." + assert tuple(s for s in mset) == sets + + def test_mixed_set_len(self, sets): + "MixedSet should have length equal to the number of contained Sets." + assert len(op2.MixedSet(sets)) == len(sets) + + def test_mixed_set_pow_int(self, mset): + "MixedSet should implement ** operator returning a MixedDataSet." + assert mset ** 1 == op2.MixedDataSet([s ** 1 for s in mset]) + + def test_mixed_set_pow_seq(self, mset): + "MixedSet should implement ** operator returning a MixedDataSet." + assert mset ** ((1,) * len(mset)) == op2.MixedDataSet([s ** 1 for s in mset]) + + def test_mixed_set_pow_gen(self, mset): + "MixedSet should implement ** operator returning a MixedDataSet." + assert mset ** (1 for _ in mset) == op2.MixedDataSet([s ** 1 for s in mset]) + + def test_mixed_set_eq(self, sets): + "MixedSets created from the same Sets should compare equal." + assert op2.MixedSet(sets) == op2.MixedSet(sets) + assert not op2.MixedSet(sets) != op2.MixedSet(sets) + + def test_mixed_set_ne(self, set, iterset, toset): + "MixedSets created from different Sets should not compare equal." + assert op2.MixedSet((set, iterset, toset)) != op2.MixedSet((set, toset, iterset)) + assert not op2.MixedSet((set, iterset, toset)) == op2.MixedSet((set, toset, iterset)) + + def test_mixed_set_ne_set(self, sets): + "A MixedSet should not compare equal to a Set." + assert op2.MixedSet(sets) != sets[0] + assert not op2.MixedSet(sets) == sets[0] + + def test_mixed_set_repr(self, mset): + "MixedSet repr should produce a MixedSet object when eval'd." + from pyop2.op2 import Set, MixedSet # noqa: needed by eval + assert isinstance(eval(repr(mset)), op2.MixedSet) + + def test_mixed_set_str(self, mset): + "MixedSet should have the expected string representation." + assert str(mset) == "OP2 MixedSet composed of Sets: %s" % (mset._sets,) + + +class TestDataSetAPI: + """ + DataSet API unit tests + """ + + def test_dset_illegal_dim(self, iterset): + "DataSet dim should be int or int tuple." + with pytest.raises(TypeError): + op2.DataSet(iterset, 'illegaldim') + + def test_dset_illegal_dim_tuple(self, iterset): + "DataSet dim should be int or int tuple." + with pytest.raises(TypeError): + op2.DataSet(iterset, (1, 'illegaldim')) + + def test_dset_illegal_name(self, iterset): + "DataSet name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.DataSet(iterset, 1, 2) + + def test_dset_default_dim(self, iterset): + "DataSet constructor should default dim to (1,)." + assert op2.DataSet(iterset).dim == (1,) + + def test_dset_dim(self, iterset): + "DataSet constructor should create a dim tuple." + s = op2.DataSet(iterset, 1) + assert s.dim == (1,) + + def test_dset_dim_list(self, iterset): + "DataSet constructor should create a dim tuple from a list." + s = op2.DataSet(iterset, [2, 3]) + assert s.dim == (2, 3) + + def test_dset_iter(self, dset): + "DataSet should be iterable and yield self." + for s in dset: + assert s is dset + + def test_dset_len(self, dset): + "DataSet len should be 1." + assert len(dset) == 1 + + def test_dset_repr(self, dset): + "DataSet repr should produce a Set object when eval'd." + from pyop2.op2 import Set, DataSet # noqa: needed by eval + assert isinstance(eval(repr(dset)), op2.DataSet) + + def test_dset_str(self, dset): + "DataSet should have the expected string representation." + assert str(dset) == "OP2 DataSet: %s on set %s, with dim %s" \ + % (dset.name, dset.set, dset.dim) + + def test_dset_eq(self, dset): + "The equality test for DataSets is same dim and same set" + dsetcopy = op2.DataSet(dset.set, dset.dim) + assert dsetcopy == dset + assert not dsetcopy != dset + + def test_dset_ne_set(self, dset): + "DataSets with the same dim but different Sets are not equal." + dsetcopy = op2.DataSet(op2.Set(dset.set.size), dset.dim) + assert dsetcopy != dset + assert not dsetcopy == dset + + def test_dset_ne_dim(self, dset): + "DataSets with the same Set but different dims are not equal." + dsetcopy = op2.DataSet(dset.set, tuple(d + 1 for d in dset.dim)) + assert dsetcopy != dset + assert not dsetcopy == dset + + def test_dat_in_dset(self, dset): + "The in operator should indicate compatibility of DataSet and Set" + assert op2.Dat(dset) in dset + + def test_dat_not_in_dset(self, dset): + "The in operator should indicate incompatibility of DataSet and Set" + assert op2.Dat(dset) not in op2.DataSet(op2.Set(5, 'bar')) + + +class TestMixedDataSetAPI: + """ + MixedDataSet API unit tests + """ + + @pytest.mark.parametrize('arg', ['illegalarg', (set, 'illegalarg'), + iter((set, 'illegalarg'))]) + def test_mixed_dset_illegal_arg(self, arg): + """Constructing a MixedDataSet from anything other than a MixedSet or + an iterable of Sets and/or DataSets should fail.""" + with pytest.raises(TypeError): + op2.MixedDataSet(arg) + + @pytest.mark.parametrize('dims', ['illegaldim', (1, 2, 'illegaldim')]) + def test_mixed_dset_dsets_illegal_dims(self, dsets, dims): + """When constructing a MixedDataSet from an iterable of DataSets it is + an error to specify dims.""" + with pytest.raises((TypeError, ValueError)): + op2.MixedDataSet(dsets, dims) + + def test_mixed_dset_dsets_dims(self, dsets): + """When constructing a MixedDataSet from an iterable of DataSets it is + an error to specify dims.""" + with pytest.raises(TypeError): + op2.MixedDataSet(dsets, 1) + + def test_mixed_dset_upcast_sets(self, msets, mset): + """Constructing a MixedDataSet from an iterable/iterator of Sets or + MixedSet should upcast.""" + assert op2.MixedDataSet(msets) == mset ** 1 + + def test_mixed_dset_sets_and_dsets(self, set, dset): + """Constructing a MixedDataSet from an iterable with a mixture of + Sets and DataSets should upcast the Sets.""" + assert op2.MixedDataSet((set, dset)).split == (set ** 1, dset) + + def test_mixed_dset_sets_and_dsets_gen(self, set, dset): + """Constructing a MixedDataSet from an iterable with a mixture of + Sets and DataSets should upcast the Sets.""" + assert op2.MixedDataSet(iter((set, dset))).split == (set ** 1, dset) + + def test_mixed_dset_dims_default_to_one(self, msets, mset): + """Constructing a MixedDataSet from an interable/iterator of Sets or + MixedSet without dims should default them to 1.""" + assert op2.MixedDataSet(msets).dim == ((1,),) * len(mset) + + def test_mixed_dset_dims_int(self, msets, mset): + """Construct a MixedDataSet from an iterator/iterable of Sets and a + MixedSet with dims as an int.""" + assert op2.MixedDataSet(msets, 2).dim == ((2,),) * len(mset) + + def test_mixed_dset_dims_gen(self, msets, mset): + """Construct a MixedDataSet from an iterator/iterable of Sets and a + MixedSet with dims as a generator.""" + dims = (2 for _ in mset) + assert op2.MixedDataSet(msets, dims).dim == ((2,),) * len(mset) + + def test_mixed_dset_dims_iterable(self, msets): + """Construct a MixedDataSet from an iterator/iterable of Sets and a + MixedSet with dims as an iterable.""" + dims = ((2,), (2, 2), (1,)) + assert op2.MixedDataSet(msets, dims).dim == dims + + def test_mixed_dset_dims_mismatch(self, msets, sets): + """Constructing a MixedDataSet from an iterable/iterator of Sets and a + MixedSet with mismatching number of dims should raise ValueError.""" + with pytest.raises(ValueError): + op2.MixedDataSet(msets, list(range(1, len(sets)))) + + def test_mixed_dset_getitem(self, mdset): + "MixedDataSet should return the corresponding DataSet when indexed." + for i, ds in enumerate(mdset): + assert mdset[i] == ds + + def test_mixed_dset_split(self, dsets): + "MixedDataSet split should return a tuple of the DataSets." + assert op2.MixedDataSet(dsets).split == dsets + + def test_mixed_dset_dim(self, mdset): + "MixedDataSet dim should return a tuple of the DataSet dims." + assert mdset.dim == tuple(s.dim for s in mdset) + + def test_mixed_dset_cdim(self, mdset): + "MixedDataSet cdim should return the sum of the DataSet cdims." + assert mdset.cdim == sum(s.cdim for s in mdset) + + def test_mixed_dset_name(self, mdset): + "MixedDataSet name should return a tuple of the DataSet names." + assert mdset.name == tuple(s.name for s in mdset) + + def test_mixed_dset_set(self, mset): + "MixedDataSet set should return a MixedSet." + assert op2.MixedDataSet(mset).set == mset + + def test_mixed_dset_iter(self, mdset, dsets): + "MixedDataSet should be iterable and yield the DataSets." + assert tuple(s for s in mdset) == dsets + + def test_mixed_dset_len(self, dsets): + """MixedDataSet should have length equal to the number of contained + DataSets.""" + assert len(op2.MixedDataSet(dsets)) == len(dsets) + + def test_mixed_dset_eq(self, dsets): + "MixedDataSets created from the same DataSets should compare equal." + assert op2.MixedDataSet(dsets) == op2.MixedDataSet(dsets) + assert not op2.MixedDataSet(dsets) != op2.MixedDataSet(dsets) + + def test_mixed_dset_ne(self, dset, diterset, dtoset): + "MixedDataSets created from different DataSets should not compare equal." + mds1 = op2.MixedDataSet((dset, diterset, dtoset)) + mds2 = op2.MixedDataSet((dset, dtoset, diterset)) + assert mds1 != mds2 + assert not mds1 == mds2 + + def test_mixed_dset_ne_dset(self, diterset, dtoset): + "MixedDataSets should not compare equal to a scalar DataSet." + assert op2.MixedDataSet((diterset, dtoset)) != diterset + assert not op2.MixedDataSet((diterset, dtoset)) == diterset + + def test_mixed_dset_repr(self, mdset): + "MixedDataSet repr should produce a MixedDataSet object when eval'd." + from pyop2.op2 import Set, DataSet, MixedDataSet # noqa: needed by eval + assert isinstance(eval(repr(mdset)), op2.MixedDataSet) + + def test_mixed_dset_str(self, mdset): + "MixedDataSet should have the expected string representation." + assert str(mdset) == "OP2 MixedDataSet composed of DataSets: %s" % (mdset._dsets,) + + +class TestDatAPI: + + """ + Dat API unit tests + """ + + def test_dat_illegal_set(self): + "Dat set should be DataSet." + with pytest.raises(exceptions.DataSetTypeError): + op2.Dat('illegalset', 1) + + def test_dat_illegal_name(self, dset): + "Dat name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.Dat(dset, name=2) + + def test_dat_initialise_data(self, dset): + """Dat initilialised without the data should initialise data with the + correct size and type.""" + d = op2.Dat(dset) + assert d.data.size == dset.size * dset.cdim and d.data.dtype == np.float64 + + def test_dat_initialise_data_type(self, dset): + """Dat intiialised without the data but with specified type should + initialise its data with the correct type.""" + d = op2.Dat(dset, dtype=np.int32) + assert d.data.dtype == np.int32 + + def test_dat_subscript(self, dat): + """Extracting component 0 of a Dat should yield self.""" + assert dat[0] is dat + + def test_dat_illegal_subscript(self, dat): + """Extracting component 0 of a Dat should yield self.""" + with pytest.raises(exceptions.IndexValueError): + dat[1] + + def test_dat_arg_default_map(self, dat): + """Dat __call__ should default the Arg map to None if not given.""" + assert dat(op2.READ).map_ is None + + def test_dat_arg_illegal_map(self, dset): + """Dat __call__ should not allow a map with a toset other than this + Dat's set.""" + d = op2.Dat(dset) + set1 = op2.Set(3) + set2 = op2.Set(2) + to_set2 = op2.Map(set1, set2, 1, [0, 0, 0]) + with pytest.raises(exceptions.MapValueError): + d(op2.READ, to_set2) + + def test_dat_on_set_builds_dim_one_dataset(self, set): + """If a Set is passed as the dataset argument, it should be + converted into a Dataset with dim=1""" + d = op2.Dat(set) + assert d.cdim == 1 + assert isinstance(d.dataset, op2.DataSet) + assert d.dataset.cdim == 1 + + def test_dat_dtype_type(self, dset): + "The type of a Dat's dtype property should be a numpy.dtype." + d = op2.Dat(dset) + assert isinstance(d.dtype, np.dtype) + d = op2.Dat(dset, [1.0] * dset.size * dset.cdim) + assert isinstance(d.dtype, np.dtype) + + def test_dat_split(self, dat): + "Splitting a Dat should yield a tuple with self" + for d in dat.split: + d == dat + + def test_dat_dtype(self, dset): + "Default data type should be numpy.float64." + d = op2.Dat(dset) + assert d.dtype == np.double + + def test_dat_float(self, dset): + "Data type for float data should be numpy.float64." + d = op2.Dat(dset, [1.0] * dset.size * dset.cdim) + assert d.dtype == np.double + + def test_dat_int(self, dset): + "Data type for int data should be numpy.int." + d = op2.Dat(dset, [1] * dset.size * dset.cdim) + assert d.dtype == np.asarray(1).dtype + + def test_dat_convert_int_float(self, dset): + "Explicit float type should override NumPy's default choice of int." + d = op2.Dat(dset, [1] * dset.size * dset.cdim, np.double) + assert d.dtype == np.float64 + + def test_dat_convert_float_int(self, dset): + "Explicit int type should override NumPy's default choice of float." + d = op2.Dat(dset, [1.5] * dset.size * dset.cdim, np.int32) + assert d.dtype == np.int32 + + def test_dat_illegal_dtype(self, dset): + "Illegal data type should raise DataTypeError." + with pytest.raises(exceptions.DataTypeError): + op2.Dat(dset, dtype='illegal_type') + + def test_dat_illegal_length(self, dset): + "Mismatching data length should raise DataValueError." + with pytest.raises(exceptions.DataValueError): + op2.Dat(dset, [1] * (dset.size * dset.cdim + 1)) + + def test_dat_reshape(self, dset): + "Data should be reshaped according to the set's dim." + d = op2.Dat(dset, [1.0] * dset.size * dset.cdim) + shape = (dset.size,) + (() if dset.cdim == 1 else dset.dim) + assert d.data.shape == shape + + def test_dat_properties(self, dset): + "Dat constructor should correctly set attributes." + d = op2.Dat(dset, [1] * dset.size * dset.cdim, 'double', 'bar') + assert d.dataset.set == dset.set and d.dtype == np.float64 and \ + d.name == 'bar' and d.data.sum() == dset.size * dset.cdim + + def test_dat_iter(self, dat): + "Dat should be iterable and yield self." + for d in dat: + assert d is dat + + def test_dat_len(self, dat): + "Dat len should be 1." + assert len(dat) == 1 + + def test_dat_repr(self, dat): + "Dat repr should produce a Dat object when eval'd." + from pyop2.op2 import Dat, DataSet, Set # noqa: needed by eval + from numpy import dtype # noqa: needed by eval + assert isinstance(eval(repr(dat)), op2.Dat) + + def test_dat_str(self, dset): + "Dat should have the expected string representation." + d = op2.Dat(dset, dtype='double', name='bar') + s = "OP2 Dat: %s on (%s) with datatype %s" \ + % (d.name, d.dataset, d.data.dtype.name) + assert str(d) == s + + def test_dat_ro_accessor(self, dat): + "Attempting to set values through the RO accessor should raise an error." + x = dat.data_ro + with pytest.raises((RuntimeError, ValueError)): + x[0] = 1 + + def test_dat_ro_write_accessor(self, dat): + "Re-accessing the data in writeable form should be allowed." + x = dat.data_ro + with pytest.raises((RuntimeError, ValueError)): + x[0] = 1 + x = dat.data + x[0] = -100 + assert (dat.data_ro[0] == -100).all() + + def test_dat_lazy_allocation(self, dset): + "Temporary Dats should not allocate storage until accessed." + d = op2.Dat(dset) + assert not d._is_allocated + + def test_dat_zero_cdim(self, set): + "A Dat built on a DataSet with zero dim should be allowed." + dset = set**0 + d = op2.Dat(dset) + assert d.shape == (set.total_size, 0) + assert d._data.size == 0 + assert d._data.shape == (set.total_size, 0) + + +class TestMixedDatAPI: + + """ + MixedDat API unit tests + """ + + def test_mixed_dat_illegal_arg(self): + """Constructing a MixedDat from anything other than a MixedSet, a + MixedDataSet or an iterable of Dats should fail.""" + with pytest.raises(exceptions.DataSetTypeError): + op2.MixedDat('illegalarg') + + def test_mixed_dat_illegal_dtype(self, set): + """Constructing a MixedDat from Dats of different dtype should fail.""" + with pytest.raises(exceptions.DataValueError): + op2.MixedDat((op2.Dat(set, dtype=np.int32), op2.Dat(set))) + + def test_mixed_dat_dats(self, dats): + """Constructing a MixedDat from an iterable of Dats should leave them + unchanged.""" + assert op2.MixedDat(dats).split == dats + + def test_mixed_dat_dsets(self, mdset): + """Constructing a MixedDat from an iterable of DataSets should leave + them unchanged.""" + assert op2.MixedDat(mdset).dataset == mdset + + def test_mixed_dat_upcast_sets(self, mset): + "Constructing a MixedDat from an iterable of Sets should upcast." + assert op2.MixedDat(mset).dataset == op2.MixedDataSet(mset) + + def test_mixed_dat_getitem(self, mdat): + "MixedDat should return the corresponding Dat when indexed." + for i, d in enumerate(mdat): + assert mdat[i] == d + assert mdat[:-1] == tuple(mdat)[:-1] + + def test_mixed_dat_dim(self, mdset): + "MixedDat dim should return a tuple of the DataSet dims." + assert op2.MixedDat(mdset).dim == mdset.dim + + def test_mixed_dat_cdim(self, mdset): + "MixedDat cdim should return a tuple of the DataSet cdims." + assert op2.MixedDat(mdset).cdim == mdset.cdim + + def test_mixed_dat_data(self, mdat): + "MixedDat data should return a tuple of the Dat data arrays." + assert all((d1 == d2.data).all() for d1, d2 in zip(mdat.data, mdat)) + + def test_mixed_dat_data_ro(self, mdat): + "MixedDat data_ro should return a tuple of the Dat data_ro arrays." + assert all((d1 == d2.data_ro).all() for d1, d2 in zip(mdat.data_ro, mdat)) + + def test_mixed_dat_data_with_halos(self, mdat): + """MixedDat data_with_halos should return a tuple of the Dat + data_with_halos arrays.""" + assert all((d1 == d2.data_with_halos).all() for d1, d2 in zip(mdat.data_with_halos, mdat)) + + def test_mixed_dat_data_ro_with_halos(self, mdat): + """MixedDat data_ro_with_halos should return a tuple of the Dat + data_ro_with_halos arrays.""" + assert all((d1 == d2.data_ro_with_halos).all() for d1, d2 in zip(mdat.data_ro_with_halos, mdat)) + + def test_mixed_dat_needs_halo_update(self, mdat): + """MixedDat needs_halo_update should indicate if at least one contained + Dat needs a halo update.""" + assert mdat.halo_valid + mdat[0].halo_valid = False + assert not mdat.halo_valid + + def test_mixed_dat_needs_halo_update_setter(self, mdat): + """Setting MixedDat needs_halo_update should set the property for all + contained Dats.""" + assert mdat.halo_valid + mdat.halo_valid = False + assert not any(d.halo_valid for d in mdat) + + def test_mixed_dat_iter(self, mdat, dats): + "MixedDat should be iterable and yield the Dats." + assert tuple(s for s in mdat) == dats + + def test_mixed_dat_len(self, dats): + """MixedDat should have length equal to the number of contained Dats.""" + assert len(op2.MixedDat(dats)) == len(dats) + + def test_mixed_dat_eq(self, dats): + "MixedDats created from the same Dats should compare equal." + assert op2.MixedDat(dats) == op2.MixedDat(dats) + assert not op2.MixedDat(dats) != op2.MixedDat(dats) + + def test_mixed_dat_ne(self, dats): + "MixedDats created from different Dats should not compare equal." + mdat1 = op2.MixedDat(dats) + mdat2 = op2.MixedDat(reversed(dats)) + assert mdat1 != mdat2 + assert not mdat1 == mdat2 + + def test_mixed_dat_ne_dat(self, dats): + "A MixedDat should not compare equal to a Dat." + assert op2.MixedDat(dats) != dats[0] + assert not op2.MixedDat(dats) == dats[0] + + def test_mixed_dat_repr(self, mdat): + "MixedDat repr should produce a MixedDat object when eval'd." + from pyop2.op2 import Set, DataSet, MixedDataSet, Dat, MixedDat # noqa: needed by eval + from numpy import dtype # noqa: needed by eval + assert isinstance(eval(repr(mdat)), op2.MixedDat) + + def test_mixed_dat_str(self, mdat): + "MixedDat should have the expected string representation." + assert str(mdat) == "OP2 MixedDat composed of Dats: %s" % (mdat.split,) + + +class TestSparsityAPI: + + """ + Sparsity API unit tests + """ + + @pytest.fixture + def mi(cls, toset): + iterset = op2.Set(3, 'iterset2') + return op2.Map(iterset, toset, 1, [1] * iterset.size, 'mi') + + @pytest.fixture + def dataset2(cls): + return op2.Set(1, 'dataset2') + + @pytest.fixture + def md(cls, iterset, dataset2): + return op2.Map(iterset, dataset2, 1, [0] * iterset.size, 'md') + + @pytest.fixture + def di(cls, toset): + return op2.DataSet(toset, 1, 'di') + + @pytest.fixture + def dd(cls, dataset2): + return op2.DataSet(dataset2, 1, 'dd') + + @pytest.fixture + def s(cls, di, mi): + return op2.Sparsity((di, di), [(mi, mi, None)]) + + @pytest.fixture + def mixed_row_sparsity(cls, dtoset, mds, m_iterset_toset, mmap): + return op2.Sparsity((mds, dtoset), {(0, 0): [(mmap[0], m_iterset_toset, None)], + (1, 0): [(mmap[1], m_iterset_toset, None)]}) + + @pytest.fixture + def mixed_col_sparsity(cls, dtoset, mds, m_iterset_toset, mmap): + return op2.Sparsity((dtoset, mds), {(0, 0): [(m_iterset_toset, mmap[0], None)], + (0, 1): [(m_iterset_toset, mmap[1], None)]}) + + def test_sparsity_illegal_rdset(self, di, mi): + "Sparsity rdset should be a DataSet" + with pytest.raises(TypeError): + op2.Sparsity(('illegalrmap', di), [(mi, mi, None)]) + + def test_sparsity_illegal_cdset(self, di, mi): + "Sparsity cdset should be a DataSet" + with pytest.raises(TypeError): + op2.Sparsity((di, 'illegalrmap'), [(mi, mi, None)]) + + def test_sparsity_illegal_rmap(self, di, mi): + "Sparsity rmap should be a Map" + with pytest.raises(TypeError): + op2.Sparsity((di, di), [('illegalrmap', mi, None)]) + + def test_sparsity_illegal_cmap(self, di, mi): + "Sparsity cmap should be a Map" + with pytest.raises(TypeError): + op2.Sparsity((di, di), [(mi, 'illegalcmap', None)]) + + def test_sparsity_illegal_name(self, di, mi): + "Sparsity name should be a string." + with pytest.raises(TypeError): + op2.Sparsity((di, di), [(mi, mi, None)], 0) + + def test_sparsity_map_pair_different_dataset(self, mi, md, di, dd, m_iterset_toset): + """Sparsity can be built from different row and column maps as long as + the tosets match the row and column DataSet.""" + s = op2.Sparsity((di, dd), [(m_iterset_toset, md, None)], name="foo") + assert (s.rcmaps[(0, 0)][0] == (m_iterset_toset, md) and s.dims[0][0] == (1, 1) + and s.name == "foo" and s.dsets == (di, dd)) + + def test_sparsity_unique_map_pairs(self, mi, di): + "Sparsity constructor should filter duplicate tuples of pairs of maps." + s = op2.Sparsity((di, di), [(mi, mi, None), (mi, mi, None)], name="foo") + assert s.rcmaps[(0, 0)] == [(mi, mi)] and s.dims[0][0] == (1, 1) + + def test_sparsity_map_pairs_different_itset(self, mi, di, dd, m_iterset_toset): + "Sparsity constructor should accept maps with different iteration sets" + maps = ((m_iterset_toset, m_iterset_toset), (mi, mi)) + s = op2.Sparsity((di, di), [(*maps[0], None), + (*maps[1], None)], name="foo") + assert frozenset(s.rcmaps[(0, 0)]) == frozenset(maps) and s.dims[0][0] == (1, 1) + + def test_sparsity_map_pairs_sorted(self, mi, di, dd, m_iterset_toset): + "Sparsity maps should have a deterministic order." + s1 = op2.Sparsity((di, di), [(m_iterset_toset, m_iterset_toset, None), (mi, mi, None)]) + s2 = op2.Sparsity((di, di), [(mi, mi, None), (m_iterset_toset, m_iterset_toset, None)]) + assert s1.rcmaps[(0, 0)] == s2.rcmaps[(0, 0)] + + def test_sparsity_illegal_itersets(self, mi, md, di, dd): + "Both maps in a (rmap,cmap) tuple must have same iteration set" + with pytest.raises(RuntimeError): + op2.Sparsity((dd, di), [(md, mi, None)]) + + def test_sparsity_illegal_row_datasets(self, mi, md, di): + "All row maps must share the same data set" + with pytest.raises(RuntimeError): + op2.Sparsity((di, di), [(mi, mi, None), (md, mi, None)]) + + def test_sparsity_illegal_col_datasets(self, mi, md, di, dd): + "All column maps must share the same data set" + with pytest.raises(RuntimeError): + op2.Sparsity((di, di), [(mi, mi, None), (mi, md, None)]) + + def test_sparsity_shape(self, s): + "Sparsity shape of a single block should be (1, 1)." + assert s.shape == (1, 1) + + def test_sparsity_iter(self, s): + "Iterating over a Sparsity of a single block should yield self." + for bs in s: + assert bs == s + + def test_sparsity_getitem(self, s): + "Block 0, 0 of a Sparsity of a single block should be self." + assert s[0, 0] == s + + def test_sparsity_mmap_iter(self, ms): + "Iterating a Sparsity should yield the block by row." + cols = ms.shape[1] + for i, block in enumerate(ms): + assert block == ms[i // cols, i % cols] + + def test_sparsity_mmap_getitem(self, ms): + """Sparsity block i, j should be defined on the corresponding row and + column DataSets and Maps.""" + for i, rds in enumerate(ms.dsets[0]): + for j, cds in enumerate(ms.dsets[1]): + block = ms[i, j] + # Indexing with a tuple and double index is equivalent + assert block == ms[i][j] + assert (block.dsets == (rds, cds) + and block.rcmaps[(0, 0)] == ms.rcmaps[(i, j)]) + + def test_sparsity_mmap_getrow(self, ms): + """Indexing a Sparsity with a single index should yield a row of + blocks.""" + for i, rds in enumerate(ms.dsets[0]): + for j, (s, cds) in enumerate(zip(ms[i], ms.dsets[1])): + assert (s.dsets == (rds, cds) + and s.rcmaps[(0, 0)] == ms.rcmaps[(i, j)]) + + def test_sparsity_mmap_shape(self, ms): + "Sparsity shape of should be the sizes of the mixed space." + assert ms.shape == (len(ms.dsets[0]), len(ms.dsets[1])) + + def test_sparsity_mmap_illegal_itersets(self, m_iterset_toset, + m_iterset_set, m_set_toset, + m_set_set, mds): + "Both maps in a (rmap,cmap) tuple must have same iteration set." + rmm = op2.MixedMap((m_iterset_toset, m_iterset_set)) + cmm = op2.MixedMap((m_set_toset, m_set_set)) + with pytest.raises(RuntimeError): + op2.Sparsity((mds, mds), {(i, j): [(rm, cm, None)] for i, rm in enumerate(rmm) for j, cm in enumerate(cmm)}) + + def test_sparsity_mmap_illegal_row_datasets(self, m_iterset_toset, + m_iterset_set, m_set_toset, mds): + "All row maps must share the same data set." + rmm = op2.MixedMap((m_iterset_toset, m_iterset_set)) + cmm = op2.MixedMap((m_set_toset, m_set_toset)) + with pytest.raises(RuntimeError): + op2.Sparsity((mds, mds), {(i, j): [(rm, cm, None)] for i, rm in enumerate(rmm) for j, cm in enumerate(cmm)}) + + def test_sparsity_mmap_illegal_col_datasets(self, m_iterset_toset, + m_iterset_set, m_set_toset, mds): + "All column maps must share the same data set." + rmm = op2.MixedMap((m_set_toset, m_set_toset)) + cmm = op2.MixedMap((m_iterset_toset, m_iterset_set)) + with pytest.raises(RuntimeError): + op2.Sparsity((mds, mds), {(i, j): [(rm, cm, None)] for i, rm in enumerate(rmm) for j, cm in enumerate(cmm)}) + + def test_sparsity_repr(self, sparsity): + "Sparsity should have the expected repr." + + # Note: We can't actually reproduce a Sparsity from its repr because + # the Sparsity constructor checks that the maps are populated + r = "Sparsity(%r, %r, name=%r, nested=%r, block_sparse=%r, diagonal_block=%r)" % (sparsity.dsets, sparsity._maps_and_regions, sparsity.name, sparsity._nested, sparsity._block_sparse, sparsity._diagonal_block) + assert repr(sparsity) == r + + def test_sparsity_str(self, sparsity): + "Sparsity should have the expected string representation." + s = "OP2 Sparsity: dsets %s, maps_and_regions %s, name %s, nested %s, block_sparse %s, diagonal_block %s" % \ + (sparsity.dsets, sparsity._maps_and_regions, sparsity.name, sparsity._nested, sparsity._block_sparse, sparsity._diagonal_block) + assert str(sparsity) == s + + +class TestMatAPI: + + """ + Mat API unit tests + """ + + def test_mat_illegal_sets(self): + "Mat sparsity should be a Sparsity." + with pytest.raises(TypeError): + op2.Mat('illegalsparsity') + + def test_mat_illegal_name(self, sparsity): + "Mat name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.Mat(sparsity, name=2) + + def test_mat_dtype(self, mat): + "Default data type should be numpy.float64." + assert mat.dtype == np.double + + def test_mat_properties(self, sparsity): + "Mat constructor should correctly set attributes." + m = op2.Mat(sparsity, 'double', 'bar') + assert m.sparsity == sparsity and \ + m.dtype == np.float64 and m.name == 'bar' + + def test_mat_mixed(self, mmat): + "Default data type should be numpy.float64." + assert mmat.dtype == np.double + + def test_mat_illegal_maps(self, mat): + "Mat arg constructor should reject invalid maps." + wrongmap = op2.Map(op2.Set(2), op2.Set(3), 2, [0, 0, 0, 0]) + with pytest.raises(exceptions.MapValueError): + mat(op2.INC, (wrongmap, wrongmap)) + + @pytest.mark.parametrize("mode", [op2.READ, op2.RW, op2.MIN, op2.MAX]) + def test_mat_arg_illegal_mode(self, mat, mode, m_iterset_toset): + """Mat arg constructor should reject illegal access modes.""" + with pytest.raises(exceptions.ModeValueError): + mat(mode, (m_iterset_toset, m_iterset_toset)) + + def test_mat_iter(self, mat): + "Mat should be iterable and yield self." + for m in mat: + assert m is mat + + def test_mat_repr(self, mat): + "Mat should have the expected repr." + + # Note: We can't actually reproduce a Sparsity from its repr because + # the Sparsity constructor checks that the maps are populated + r = "Mat(%r, %r, %r)" % (mat.sparsity, mat.dtype, mat.name) + assert repr(mat) == r + + def test_mat_str(self, mat): + "Mat should have the expected string representation." + s = "OP2 Mat: %s, sparsity (%s), datatype %s" \ + % (mat.name, mat.sparsity, mat.dtype.name) + assert str(mat) == s + + +class TestGlobalAPI: + + """ + Global API unit tests + """ + + def test_global_illegal_dim(self): + "Global dim should be int or int tuple." + with pytest.raises(TypeError): + op2.Global('illegaldim', comm=COMM_WORLD) + + def test_global_illegal_dim_tuple(self): + "Global dim should be int or int tuple." + with pytest.raises(TypeError): + op2.Global((1, 'illegaldim'), comm=COMM_WORLD) + + def test_global_illegal_name(self): + "Global name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.Global(1, 1, name=2, comm=COMM_WORLD) + + def test_global_dim(self): + "Global constructor should create a dim tuple." + g = op2.Global(1, 1, comm=COMM_WORLD) + assert g.dim == (1,) + + def test_global_dim_list(self): + "Global constructor should create a dim tuple from a list." + g = op2.Global([2, 3], [1] * 6, comm=COMM_WORLD) + assert g.dim == (2, 3) + + def test_global_float(self): + "Data type for float data should be numpy.float64." + g = op2.Global(1, 1.0, comm=COMM_WORLD) + assert g.dtype == np.asarray(1.0).dtype + + def test_global_int(self): + "Data type for int data should be numpy.int." + g = op2.Global(1, 1, comm=COMM_WORLD) + assert g.dtype == np.asarray(1).dtype + + def test_global_convert_int_float(self): + "Explicit float type should override NumPy's default choice of int." + g = op2.Global(1, 1, dtype=np.float64, comm=COMM_WORLD) + assert g.dtype == np.float64 + + def test_global_convert_float_int(self): + "Explicit int type should override NumPy's default choice of float." + g = op2.Global(1, 1.5, dtype=np.int64, comm=COMM_WORLD) + assert g.dtype == np.int64 + + def test_global_illegal_dtype(self): + "Illegal data type should raise DataValueError." + with pytest.raises(exceptions.DataValueError): + op2.Global(1, 'illegal_type', 'double', comm=COMM_WORLD) + + @pytest.mark.parametrize("dim", [1, (2, 2)]) + def test_global_illegal_length(self, dim): + "Mismatching data length should raise DataValueError." + with pytest.raises(exceptions.DataValueError): + op2.Global(dim, [1] * (np.prod(dim) + 1), comm=COMM_WORLD) + + def test_global_reshape(self): + "Data should be reshaped according to dim." + g = op2.Global((2, 2), [1.0] * 4, comm=COMM_WORLD) + assert g.dim == (2, 2) and g.data.shape == (2, 2) + + def test_global_properties(self): + "Data globalructor should correctly set attributes." + g = op2.Global((2, 2), [1] * 4, 'double', 'bar', comm=COMM_WORLD) + assert g.dim == (2, 2) and g.dtype == np.float64 and g.name == 'bar' \ + and g.data.sum() == 4 + + def test_global_setter(self, g): + "Setter attribute on data should correct set data value." + g.data = 2 + assert g.data.sum() == 2 + + def test_global_setter_malformed_data(self, g): + "Setter attribute should reject malformed data." + with pytest.raises(exceptions.DataValueError): + g.data = [1, 2] + + def test_global_iter(self, g): + "Global should be iterable and yield self." + for g_ in g: + assert g_ is g + + def test_global_len(self, g): + "Global len should be 1." + assert len(g) == 1 + + def test_global_str(self): + "Global should have the expected string representation." + g = op2.Global(1, 1, 'double', comm=COMM_WORLD) + s = "OP2 Global Argument: %s with dim %s and value %s" \ + % (g.name, g.dim, g.data) + assert str(g) == s + + @pytest.mark.parametrize("mode", [op2.RW, op2.WRITE]) + def test_global_arg_illegal_mode(self, g, mode): + """Global __call__ should not allow illegal access modes.""" + with pytest.raises(exceptions.ModeValueError): + g(mode) + + +class TestMapAPI: + + """ + Map API unit tests + """ + + def test_map_illegal_iterset(self, set): + "Map iterset should be Set." + with pytest.raises(exceptions.SetTypeError): + op2.Map('illegalset', set, 1, []) + + def test_map_illegal_toset(self, set): + "Map toset should be Set." + with pytest.raises(exceptions.SetTypeError): + op2.Map(set, 'illegalset', 1, []) + + def test_map_illegal_arity(self, set): + "Map arity should be int." + with pytest.raises(exceptions.ArityTypeError): + op2.Map(set, set, 'illegalarity', []) + + def test_map_illegal_arity_tuple(self, set): + "Map arity should not be a tuple." + with pytest.raises(exceptions.ArityTypeError): + op2.Map(set, set, (2, 2), []) + + def test_map_illegal_name(self, set): + "Map name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.Map(set, set, 1, [], name=2) + + def test_map_illegal_dtype(self, set): + "Illegal data type should raise DataValueError." + with pytest.raises(exceptions.DataValueError): + op2.Map(set, set, 1, 'abcdefg') + + def test_map_illegal_length(self, iterset, toset): + "Mismatching data length should raise DataValueError." + with pytest.raises(exceptions.DataValueError): + op2.Map(iterset, toset, 1, [1] * (iterset.size + 1)) + + def test_map_convert_float_int(self, iterset, toset): + "Float data should be implicitely converted to int." + from pyop2.datatypes import IntType + m = op2.Map(iterset, toset, 1, [1.5] * iterset.size) + assert m.values.dtype == IntType and m.values.sum() == iterset.size + + def test_map_reshape(self, iterset, toset): + "Data should be reshaped according to arity." + m = op2.Map(iterset, toset, 2, [1] * 2 * iterset.size) + assert m.arity == 2 and m.values.shape == (iterset.size, 2) + + def test_map_split(self, m_iterset_toset): + "Splitting a Map should yield a tuple with self" + for m in m_iterset_toset.split: + m == m_iterset_toset + + def test_map_properties(self, iterset, toset): + "Data constructor should correctly set attributes." + m = op2.Map(iterset, toset, 2, [1] * 2 * iterset.size, 'bar') + assert (m.iterset == iterset and m.toset == toset and m.arity == 2 + and m.arities == (2,) and m.arange == (0, 2) + and m.values.sum() == 2 * iterset.size and m.name == 'bar') + + def test_map_eq(self, m_iterset_toset): + """Map equality is identity.""" + mcopy = op2.Map(m_iterset_toset.iterset, m_iterset_toset.toset, + m_iterset_toset.arity, m_iterset_toset.values) + assert m_iterset_toset != mcopy + assert not m_iterset_toset == mcopy + assert mcopy == mcopy + + def test_map_ne_iterset(self, m_iterset_toset): + """Maps that have copied but not equal iteration sets are not equal.""" + mcopy = op2.Map(op2.Set(m_iterset_toset.iterset.size), + m_iterset_toset.toset, m_iterset_toset.arity, + m_iterset_toset.values) + assert m_iterset_toset != mcopy + assert not m_iterset_toset == mcopy + + def test_map_ne_toset(self, m_iterset_toset): + """Maps that have copied but not equal to sets are not equal.""" + mcopy = op2.Map(m_iterset_toset.iterset, op2.Set(m_iterset_toset.toset.size), + m_iterset_toset.arity, m_iterset_toset.values) + assert m_iterset_toset != mcopy + assert not m_iterset_toset == mcopy + + def test_map_ne_arity(self, m_iterset_toset): + """Maps that have different arities are not equal.""" + mcopy = op2.Map(m_iterset_toset.iterset, m_iterset_toset.toset, + m_iterset_toset.arity * 2, list(m_iterset_toset.values) * 2) + assert m_iterset_toset != mcopy + assert not m_iterset_toset == mcopy + + def test_map_ne_values(self, m_iterset_toset): + """Maps that have different values are not equal.""" + m2 = op2.Map(m_iterset_toset.iterset, m_iterset_toset.toset, + m_iterset_toset.arity, m_iterset_toset.values.copy()) + m2.values[0] = 2 + assert m_iterset_toset != m2 + assert not m_iterset_toset == m2 + + def test_map_iter(self, m_iterset_toset): + "Map should be iterable and yield self." + for m_ in m_iterset_toset: + assert m_ is m_iterset_toset + + def test_map_len(self, m_iterset_toset): + "Map len should be 1." + assert len(m_iterset_toset) == 1 + + def test_map_repr(self, m_iterset_toset): + "Map should have the expected repr." + r = "Map(%r, %r, %r, None, %r, %r, %r)" % (m_iterset_toset.iterset, m_iterset_toset.toset, + m_iterset_toset.arity, m_iterset_toset.name, m_iterset_toset._offset, m_iterset_toset._offset_quotient) + assert repr(m_iterset_toset) == r + + def test_map_str(self, m_iterset_toset): + "Map should have the expected string representation." + s = "OP2 Map: %s from (%s) to (%s) with arity %s" \ + % (m_iterset_toset.name, m_iterset_toset.iterset, m_iterset_toset.toset, m_iterset_toset.arity) + assert str(m_iterset_toset) == s + + +class TestMixedMapAPI: + + """ + MixedMap API unit tests + """ + + def test_mixed_map_illegal_arg(self): + "Map iterset should be Set." + with pytest.raises(TypeError): + op2.MixedMap('illegalarg') + + def test_mixed_map_split(self, maps): + """Constructing a MixedDat from an iterable of Maps should leave them + unchanged.""" + mmap = op2.MixedMap(maps) + assert mmap.split == maps + for i, m in enumerate(maps): + assert mmap.split[i] == m + assert mmap.split[:-1] == tuple(mmap)[:-1] + + def test_mixed_map_iterset(self, mmap): + "MixedMap iterset should return the common iterset of all Maps." + for m in mmap: + assert mmap.iterset == m.iterset + + def test_mixed_map_toset(self, mmap): + "MixedMap toset should return a MixedSet of the Map tosets." + assert mmap.toset == op2.MixedSet(m.toset for m in mmap) + + def test_mixed_map_arity(self, mmap): + "MixedMap arity should return the sum of the Map arities." + assert mmap.arity == sum(m.arity for m in mmap) + + def test_mixed_map_arities(self, mmap): + "MixedMap arities should return a tuple of the Map arities." + assert mmap.arities == tuple(m.arity for m in mmap) + + def test_mixed_map_arange(self, mmap): + "MixedMap arities should return a tuple of the Map arities." + assert mmap.arange == (0,) + tuple(np.cumsum(mmap.arities)) + + def test_mixed_map_values(self, mmap): + "MixedMap values should return a tuple of the Map values." + assert all((v == m.values).all() for v, m in zip(mmap.values, mmap)) + + def test_mixed_map_values_with_halo(self, mmap): + "MixedMap values_with_halo should return a tuple of the Map values." + assert all((v == m.values_with_halo).all() for v, m in zip(mmap.values_with_halo, mmap)) + + def test_mixed_map_name(self, mmap): + "MixedMap name should return a tuple of the Map names." + assert mmap.name == tuple(m.name for m in mmap) + + def test_mixed_map_offset(self, mmap): + "MixedMap offset should return a tuple of the Map offsets." + assert mmap.offset == tuple(m.offset for m in mmap) + + def test_mixed_map_iter(self, maps): + "MixedMap should be iterable and yield the Maps." + assert tuple(m for m in op2.MixedMap(maps)) == maps + + def test_mixed_map_len(self, maps): + """MixedMap should have length equal to the number of contained Maps.""" + assert len(op2.MixedMap(maps)) == len(maps) + + def test_mixed_map_eq(self, maps): + "MixedMaps created from the same Maps should compare equal." + assert op2.MixedMap(maps) == op2.MixedMap(maps) + assert not op2.MixedMap(maps) != op2.MixedMap(maps) + + def test_mixed_map_ne(self, maps): + "MixedMaps created from different Maps should not compare equal." + mm1 = op2.MixedMap((maps[0], maps[1])) + mm2 = op2.MixedMap((maps[1], maps[0])) + assert mm1 != mm2 + assert not mm1 == mm2 + + def test_mixed_map_ne_map(self, maps): + "A MixedMap should not compare equal to a Map." + assert op2.MixedMap(maps) != maps[0] + assert not op2.MixedMap(maps) == maps[0] + + def test_mixed_map_repr(self, mmap): + "MixedMap should have the expected repr." + # Note: We can't actually reproduce a MixedMap from its repr because + # the iteration sets will not be identical, which is checked in the + # constructor + assert repr(mmap) == "MixedMap(%r)" % (mmap.split,) + + def test_mixed_map_str(self, mmap): + "MixedMap should have the expected string representation." + assert str(mmap) == "OP2 MixedMap composed of Maps: %s" % (mmap.split,) + + +class TestKernelAPI: + + """ + Kernel API unit tests + """ + + def test_kernel_illegal_name(self): + "Kernel name should be string." + with pytest.raises(exceptions.NameTypeError): + op2.Kernel("", name=2) + + def test_kernel_properties(self): + "Kernel constructor should correctly set attributes." + k = op2.CStringLocalKernel("", "foo", accesses=(), dtypes=()) + assert k.name == "foo" + + def test_kernel_repr(self, set): + "Kernel should have the expected repr." + k = op2.Kernel("static int foo() { return 0; }", 'foo') + assert repr(k) == 'Kernel("""%s""", %r)' % (k.code, k.name) + + def test_kernel_str(self, set): + "Kernel should have the expected string representation." + k = op2.Kernel("static int foo() { return 0; }", 'foo') + assert str(k) == "OP2 Kernel: %s" % k.name + + +class TestParLoopAPI: + + """ + ParLoop API unit tests + """ + + def test_illegal_kernel(self, set, dat, m_iterset_toset): + """The first ParLoop argument has to be of type op2.Kernel.""" + with pytest.raises(exceptions.KernelTypeError): + op2.par_loop('illegal_kernel', set, dat(op2.READ, m_iterset_toset)) + + def test_illegal_iterset(self, dat, m_iterset_toset): + """The first ParLoop argument has to be of type op2.Kernel.""" + with pytest.raises(exceptions.SetTypeError): + op2.par_loop(op2.Kernel("", "k"), 'illegal_set', + dat(op2.READ, m_iterset_toset)) + + def test_illegal_dat_iterset(self): + """ParLoop should reject a Dat argument using a different iteration + set from the par_loop's.""" + set1 = op2.Set(2) + set2 = op2.Set(3) + dset1 = op2.DataSet(set1, 1) + dat = op2.Dat(dset1) + map = op2.Map(set2, set1, 1, [0, 0, 0]) + kernel = op2.Kernel("void k() { }", "k") + with pytest.raises(exceptions.MapValueError): + op2.ParLoop(kernel, set1, dat(op2.READ, map)) + + def test_illegal_mat_iterset(self, sparsity): + """ParLoop should reject a Mat argument using a different iteration + set from the par_loop's.""" + set1 = op2.Set(2) + m = op2.Mat(sparsity) + rmap, cmap = sparsity.rcmaps[(0, 0)][0] + kernel = op2.Kernel("static void k() { }", "k") + with pytest.raises(exceptions.MapValueError): + op2.par_loop( + kernel, + set1, + m(op2.INC, (rmap, cmap)) + ) + + def test_empty_map_and_iterset(self): + """If the iterset of the ParLoop is zero-sized, it should not matter if + a map defined on it has no values.""" + s1 = op2.Set(0) + s2 = op2.Set(10) + m = op2.Map(s1, s2, 3) + d = op2.Dat(s2 ** 1, [0] * 10, dtype=int) + k = op2.Kernel("static void k(int *x) {}", "k") + op2.par_loop(k, s1, d(op2.READ, m)) + + def test_frozen_dats_cannot_use_different_access_mode(self): + s1 = op2.Set(2) + s2 = op2.Set(3) + m = op2.Map(s1, s2, 3, [0]*6) + d = op2.Dat(s2**1, [0]*3, dtype=int) + k = op2.Kernel("static void k(int *x) {}", "k") + + with d.frozen_halo(op2.INC): + op2.par_loop(k, s1, d(op2.INC, m)) + + with pytest.raises(RuntimeError): + op2.par_loop(k, s1, d(op2.WRITE, m)) + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_caching.py b/tests/pyop2/test_caching.py new file mode 100644 index 0000000000..1298991b3e --- /dev/null +++ b/tests/pyop2/test_caching.py @@ -0,0 +1,808 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +import ctypes +import os +import pytest +import tempfile +import numpy +from itertools import chain +from textwrap import dedent +from pyop2 import op2 +from pyop2.caching import ( + DEFAULT_CACHE, + disk_only_cache, + memory_cache, + memory_and_disk_cache, + clear_memory_cache +) +from pyop2.compilation import load +from pyop2.mpi import ( + MPI, + COMM_WORLD, + COMM_SELF, + comm_cache_keyval, + internal_comm, + temp_internal_comm +) + + +def _seed(): + return 0.02041724 + + +nelems = 8 +default_cache_name = DEFAULT_CACHE().__class__.__name__ + + +@pytest.fixture +def iterset(): + return op2.Set(nelems, "iterset") + + +@pytest.fixture +def indset(): + return op2.Set(nelems, "indset") + + +@pytest.fixture +def diterset(iterset): + return op2.DataSet(iterset, 1, "diterset") + + +@pytest.fixture +def dindset(indset): + return op2.DataSet(indset, 1, "dindset") + + +@pytest.fixture +def dindset2(indset): + return op2.DataSet(indset, 2, "dindset2") + + +@pytest.fixture +def g(): + return op2.Global(1, 0, numpy.uint32, "g", comm=COMM_WORLD) + + +@pytest.fixture +def x(dindset): + return op2.Dat(dindset, list(range(nelems)), numpy.uint32, "x") + + +@pytest.fixture +def x2(dindset2): + return op2.Dat(dindset2, list(range(nelems)) * 2, numpy.uint32, "x2") + + +@pytest.fixture +def xl(dindset): + return op2.Dat(dindset, list(range(nelems)), numpy.uint64, "xl") + + +@pytest.fixture +def y(dindset): + return op2.Dat(dindset, [0] * nelems, numpy.uint32, "y") + + +@pytest.fixture +def iter2ind1(iterset, indset): + u_map = numpy.array(list(range(nelems)), dtype=numpy.uint32)[::-1] + return op2.Map(iterset, indset, 1, u_map, "iter2ind1") + + +@pytest.fixture +def iter2ind2(iterset, indset): + u_map = numpy.array(list(range(nelems)) * 2, dtype=numpy.uint32)[::-1] + return op2.Map(iterset, indset, 2, u_map, "iter2ind2") + + +class TestObjectCaching: + + @pytest.fixture(scope='class') + def base_set(self): + return op2.Set(1) + + @pytest.fixture(scope='class') + def base_set2(self): + return op2.Set(1) + + @pytest.fixture(scope='class') + def base_map(self, base_set): + return op2.Map(base_set, base_set, 1, [0]) + + @pytest.fixture(scope='class') + def base_map2(self, base_set, base_set2): + return op2.Map(base_set, base_set2, 1, [0]) + + @pytest.fixture(scope='class') + def base_map3(self, base_set): + return op2.Map(base_set, base_set, 1, [0]) + + def test_set_identity(self, base_set, base_set2): + assert base_set is base_set + assert base_set is not base_set2 + assert base_set != base_set2 + assert not base_set == base_set2 + + def test_map_identity(self, base_map, base_map2): + assert base_map is base_map + assert base_map is not base_map2 + assert base_map != base_map2 + assert not base_map == base_map2 + + def test_dataset_cache_hit(self, base_set): + d1 = base_set ** 2 + d2 = base_set ** 2 + + assert d1 is d2 + assert d1 == d2 + assert not d1 != d2 + + def test_dataset_cache_miss(self, base_set, base_set2): + d1 = base_set ** 1 + d2 = base_set ** 2 + + assert d1 is not d2 + assert d1 != d2 + assert not d1 == d2 + + d3 = base_set2 ** 1 + assert d1 is not d3 + assert d1 != d3 + assert not d1 == d3 + + def test_mixedset_cache_hit(self, base_set): + ms = op2.MixedSet([base_set, base_set]) + ms2 = op2.MixedSet([base_set, base_set]) + + assert ms is ms2 + assert not ms != ms2 + assert ms == ms2 + + def test_mixedset_cache_miss(self, base_set, base_set2): + ms = op2.MixedSet([base_set, base_set2]) + ms2 = op2.MixedSet([base_set2, base_set]) + + assert ms is not ms2 + assert ms != ms2 + assert not ms == ms2 + + ms3 = op2.MixedSet([base_set, base_set2]) + assert ms is ms3 + assert not ms != ms3 + assert ms == ms3 + + def test_mixedmap_cache_hit(self, base_map, base_map2): + mm = op2.MixedMap([base_map, base_map2]) + mm2 = op2.MixedMap([base_map, base_map2]) + + assert mm is mm2 + assert not mm != mm2 + assert mm == mm2 + + def test_mixedmap_cache_miss(self, base_map, base_map2): + ms = op2.MixedMap([base_map, base_map2]) + ms2 = op2.MixedMap([base_map2, base_map]) + + assert ms is not ms2 + assert ms != ms2 + assert not ms == ms2 + + ms3 = op2.MixedMap([base_map, base_map2]) + assert ms is ms3 + assert not ms != ms3 + assert ms == ms3 + + def test_mixeddataset_cache_hit(self, base_set, base_set2): + mds = op2.MixedDataSet([base_set, base_set2]) + mds2 = op2.MixedDataSet([base_set, base_set2]) + + assert mds is mds2 + assert not mds != mds2 + assert mds == mds2 + + def test_mixeddataset_cache_miss(self, base_set, base_set2): + mds = op2.MixedDataSet([base_set, base_set2]) + mds2 = op2.MixedDataSet([base_set2, base_set]) + mds3 = op2.MixedDataSet([base_set, base_set]) + + assert mds is not mds2 + assert mds != mds2 + assert not mds == mds2 + + assert mds is not mds3 + assert mds != mds3 + assert not mds == mds3 + + assert mds2 is not mds3 + assert mds2 != mds3 + assert not mds2 == mds3 + + def test_sparsity_cache_hit(self, base_set, base_map): + dsets = (base_set ** 1, base_set ** 1) + maps = (base_map, base_map) + sp = op2.Sparsity(dsets, [(*maps, None)]) + sp2 = op2.Sparsity(dsets, [(*maps, None)]) + + assert sp is sp2 + assert not sp != sp2 + assert sp == sp2 + + mixed_set = op2.MixedSet([base_set, base_set]) + dsets = (mixed_set ** 1, mixed_set ** 1) + + maps = op2.MixedMap([base_map, base_map]) + sp = op2.Sparsity(dsets, {(i, j): [(rm, cm, None)] for i, rm in enumerate(maps) for j, cm in enumerate(maps)}) + + mixed_set2 = op2.MixedSet([base_set, base_set]) + dsets2 = (mixed_set2 ** 1, mixed_set2 ** 1) + maps2 = op2.MixedMap([base_map, base_map]) + sp2 = op2.Sparsity(dsets2, {(i, j): [(rm, cm, None)] for i, rm in enumerate(maps2) for j, cm in enumerate(maps2)}) + assert sp is sp2 + assert not sp != sp2 + assert sp == sp2 + + def test_sparsity_cache_miss(self, base_set, base_set2, + base_map, base_map2): + dsets = (base_set ** 1, base_set ** 1) + maps = (base_map, base_map) + sp = op2.Sparsity(dsets, [(*maps, (op2.ALL, ))]) + + mixed_set = op2.MixedSet([base_set, base_set]) + dsets2 = (mixed_set ** 1, mixed_set ** 1) + maps2 = op2.MixedMap([base_map, base_map]) + sp2 = op2.Sparsity(dsets2, {(i, j): [(rm, cm, (op2.ALL, ))] for i, rm in enumerate(maps2) for j, cm in enumerate(maps2)}) + assert sp is not sp2 + assert sp != sp2 + assert not sp == sp2 + + dsets2 = (base_set ** 1, base_set2 ** 1) + maps2 = (base_map, base_map2) + sp2 = op2.Sparsity(dsets2, [(*maps2, (op2.ALL, ))]) + assert sp is not sp2 + assert sp != sp2 + assert not sp == sp2 + + +class TestGeneratedCodeCache: + + """ + Generated Code Cache Tests. + """ + + @property + def cache(self): + int_comm = internal_comm(COMM_WORLD, self) + _cache_collection = int_comm.Get_attr(comm_cache_keyval) + if _cache_collection is None: + _cache_collection = {default_cache_name: DEFAULT_CACHE()} + int_comm.Set_attr(comm_cache_keyval, _cache_collection) + return _cache_collection[default_cache_name] + + @pytest.fixture + def a(cls, diterset): + return op2.Dat(diterset, list(range(nelems)), numpy.uint32, "a") + + @pytest.fixture + def b(cls, diterset): + return op2.Dat(diterset, list(range(nelems)), numpy.uint32, "b") + + def test_same_args(self, iterset, iter2ind1, x, a): + self.cache.clear() + assert len(self.cache) == 0 + + kernel_cpy = "static void cpy(unsigned int* dst, unsigned int* src) { *dst = *src; }" + + op2.par_loop(op2.Kernel(kernel_cpy, "cpy"), + iterset, + a(op2.WRITE), + x(op2.READ, iter2ind1)) + + assert len(self.cache) == 1 + + op2.par_loop(op2.Kernel(kernel_cpy, "cpy"), + iterset, + a(op2.WRITE), + x(op2.READ, iter2ind1)) + + assert len(self.cache) == 1 + + def test_diff_kernel(self, iterset, iter2ind1, x, a): + self.cache.clear() + assert len(self.cache) == 0 + + kernel_cpy = "static void cpy(unsigned int* dst, unsigned int* src) { *dst = *src; }" + + op2.par_loop(op2.Kernel(kernel_cpy, "cpy"), + iterset, + a(op2.WRITE), + x(op2.READ, iter2ind1)) + + assert len(self.cache) == 1 + + kernel_cpy = "static void cpy(unsigned int* DST, unsigned int* SRC) { *DST = *SRC; }" + + op2.par_loop(op2.Kernel(kernel_cpy, "cpy"), + iterset, + a(op2.WRITE), + x(op2.READ, iter2ind1)) + + assert len(self.cache) == 2 + + def test_invert_arg_similar_shape(self, iterset, iter2ind1, x, y): + self.cache.clear() + assert len(self.cache) == 0 + + kernel_swap = """ +static void swap(unsigned int* x, unsigned int* y) +{ + unsigned int t; + t = *x; + *x = *y; + *y = t; +} +""" + op2.par_loop(op2.Kernel(kernel_swap, "swap"), + iterset, + x(op2.RW, iter2ind1), + y(op2.RW, iter2ind1)) + + assert len(self.cache) == 1 + + op2.par_loop(op2.Kernel(kernel_swap, "swap"), + iterset, + y(op2.RW, iter2ind1), + x(op2.RW, iter2ind1)) + + assert len(self.cache) == 1 + + def test_dloop_ignore_scalar(self, iterset, a, b): + self.cache.clear() + assert len(self.cache) == 0 + + kernel_swap = """ +static void swap(unsigned int* x, unsigned int* y) +{ + unsigned int t; + t = *x; + *x = *y; + *y = t; +} +""" + op2.par_loop(op2.Kernel(kernel_swap, "swap"), + iterset, + a(op2.RW), + b(op2.RW)) + + assert len(self.cache) == 1 + + op2.par_loop(op2.Kernel(kernel_swap, "swap"), + iterset, + b(op2.RW), + a(op2.RW)) + + assert len(self.cache) == 1 + + def test_vector_map(self, iterset, x2, iter2ind2): + self.cache.clear() + assert len(self.cache) == 0 + + kernel_swap = """ +static void swap(unsigned int* x) +{ + unsigned int t; + t = x[0]; + x[0] = x[1]; + x[1] = t; +} +""" + + op2.par_loop(op2.Kernel(kernel_swap, "swap"), + iterset, + x2(op2.RW, iter2ind2)) + + assert len(self.cache) == 1 + + op2.par_loop(op2.Kernel(kernel_swap, "swap"), + iterset, + x2(op2.RW, iter2ind2)) + + assert len(self.cache) == 1 + + def test_same_iteration_space_works(self, iterset, x2, iter2ind2): + self.cache.clear() + assert len(self.cache) == 0 + k = op2.Kernel("""static void k(void *x) {}""", 'k') + + op2.par_loop(k, iterset, + x2(op2.INC, iter2ind2)) + + assert len(self.cache) == 1 + + op2.par_loop(k, iterset, + x2(op2.INC, iter2ind2)) + + assert len(self.cache) == 1 + + def test_change_dat_dtype_matters(self, iterset, diterset): + d = op2.Dat(diterset, list(range(nelems)), numpy.uint32) + self.cache.clear() + assert len(self.cache) == 0 + + k = op2.Kernel("""static void k(void *x) {}""", 'k') + + op2.par_loop(k, iterset, d(op2.WRITE)) + + assert len(self.cache) == 1 + + d = op2.Dat(diterset, list(range(nelems)), numpy.int32) + op2.par_loop(k, iterset, d(op2.WRITE)) + + assert len(self.cache) == 2 + + def test_change_global_dtype_matters(self, iterset, diterset): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + self.cache.clear() + assert len(self.cache) == 0 + + k = op2.Kernel("""static void k(void *x) {}""", 'k') + + op2.par_loop(k, iterset, g(op2.INC)) + + assert len(self.cache) == 1 + + g = op2.Global(1, 0, dtype=numpy.float64, comm=COMM_WORLD) + op2.par_loop(k, iterset, g(op2.INC)) + + assert len(self.cache) == 2 + + +class TestSparsityCache: + + @pytest.fixture + def s1(cls): + return op2.Set(5) + + @pytest.fixture + def s2(cls): + return op2.Set(5) + + @pytest.fixture + def ds2(cls, s2): + return op2.DataSet(s2, 1) + + @pytest.fixture + def m1(cls, s1, s2): + return op2.Map(s1, s2, 1, [0, 1, 2, 3, 4]) + + @pytest.fixture + def m2(cls, s1, s2): + return op2.Map(s1, s2, 1, [1, 2, 3, 4, 0]) + + def test_sparsities_differing_maps_not_cached(self, m1, m2, ds2): + """Sparsities with different maps should not share a C handle.""" + sp1 = op2.Sparsity((ds2, ds2), [(m1, m1, None)]) + sp2 = op2.Sparsity((ds2, ds2), [(m2, m2, None)]) + assert sp1 is not sp2 + + def test_sparsities_differing_map_pairs_not_cached(self, m1, m2, ds2): + """Sparsities with different maps should not share a C handle.""" + sp1 = op2.Sparsity((ds2, ds2), [(m1, m2, None)]) + sp2 = op2.Sparsity((ds2, ds2), [(m2, m1, None)]) + assert sp1 is not sp2 + + def test_sparsities_differing_map_tuples_not_cached(self, m1, m2, ds2): + """Sparsities with different maps should not share a C handle.""" + sp1 = op2.Sparsity((ds2, ds2), [(m1, m1, None), (m2, m2, None)]) + sp2 = op2.Sparsity((ds2, ds2), [(m2, m2, None), (m2, m2, None)]) + assert sp1 is not sp2 + + def test_sparsities_same_map_pair_cached(self, m1, ds2): + """Sparsities with the same map pair should share a C handle.""" + sp1 = op2.Sparsity((ds2, ds2), [(m1, m1, None)]) + sp2 = op2.Sparsity((ds2, ds2), [(m1, m1, None)]) + assert sp1 is sp2 + + def test_sparsities_same_map_tuple_cached(self, m1, m2, ds2): + "Sparsities with the same tuple of map pairs should share a C handle." + sp1 = op2.Sparsity((ds2, ds2), [(m1, m1, None), (m2, m2, None)]) + sp2 = op2.Sparsity((ds2, ds2), [(m1, m1, None), (m2, m2, None)]) + assert sp1 is sp2 + + def test_sparsities_different_ordered_map_tuple_cached(self, m1, m2, ds2): + "Sparsities with the same tuple of map pairs should share a C handle." + sp1 = op2.Sparsity((ds2, ds2), [(m1, m1, None), (m2, m2, None)]) + sp2 = op2.Sparsity((ds2, ds2), [(m2, m2, None), (m1, m1, None)]) + assert sp1 is sp2 + + +class TestDiskCachedDecorator: + + @staticmethod + def myfunc(arg, comm): + """Example function to cache the outputs of.""" + return {arg} + + @pytest.fixture + def comm(self): + """This fixture provides a temporary comm so that each test gets it's own + communicator and that caches are cleaned on free.""" + temporary_comm = COMM_WORLD.Dup() + temporary_comm.name = "pytest temp COMM_WORLD" + with temp_internal_comm(temporary_comm) as comm: + yield comm + temporary_comm.Free() + + @pytest.fixture + def cachedir(cls): + return tempfile.TemporaryDirectory() + + def test_decorator_in_memory_cache_reuses_results(self, cachedir, comm): + decorated_func = memory_and_disk_cache( + cachedir=cachedir.name + )(self.myfunc) + + obj1 = decorated_func("input1", comm=comm) + mem_cache = comm.Get_attr(comm_cache_keyval)[default_cache_name] + assert len(mem_cache) == 1 + assert len(os.listdir(cachedir.name)) == 1 + + obj2 = decorated_func("input1", comm=comm) + assert obj1 is obj2 + assert len(mem_cache) == 1 + assert len(os.listdir(cachedir.name)) == 1 + + def test_decorator_uses_different_in_memory_caches_on_different_comms(self, cachedir, comm): + comm_world_func = memory_and_disk_cache( + cachedir=cachedir.name + )(self.myfunc) + + temporary_comm = COMM_SELF.Dup() + temporary_comm.name = "pytest temp COMM_SELF" + with temp_internal_comm(temporary_comm) as comm_self: + comm_self_func = memory_and_disk_cache( + cachedir=cachedir.name + )(self.myfunc) + + # obj1 should be cached on the COMM_WORLD cache + obj1 = comm_world_func("input1", comm=comm) + comm_world_cache = comm.Get_attr(comm_cache_keyval)[default_cache_name] + assert len(comm_world_cache) == 1 + assert len(os.listdir(cachedir.name)) == 1 + + # obj2 should be cached on the COMM_SELF cache + obj2 = comm_self_func("input1", comm=comm_self) + comm_self_cache = comm_self.Get_attr(comm_cache_keyval)[default_cache_name] + assert obj1 == obj2 and obj1 is not obj2 + assert len(comm_world_cache) == 1 + assert len(comm_self_cache) == 1 + assert len(os.listdir(cachedir.name)) == 1 + + temporary_comm.Free() + + def test_decorator_disk_cache_reuses_results(self, cachedir, comm): + decorated_func = memory_and_disk_cache(cachedir=cachedir.name)(self.myfunc) + + obj1 = decorated_func("input1", comm=comm) + clear_memory_cache(comm) + obj2 = decorated_func("input1", comm=comm) + mem_cache = comm.Get_attr(comm_cache_keyval)[default_cache_name] + assert obj1 == obj2 and obj1 is not obj2 + assert len(mem_cache) == 1 + assert len(os.listdir(cachedir.name)) == 1 + + def test_decorator_cache_misses(self, cachedir, comm): + decorated_func = memory_and_disk_cache(cachedir=cachedir.name)(self.myfunc) + + obj1 = decorated_func("input1", comm=comm) + obj2 = decorated_func("input2", comm=comm) + mem_cache = comm.Get_attr(comm_cache_keyval)[default_cache_name] + assert obj1 != obj2 + assert len(mem_cache) == 2 + assert len(os.listdir(cachedir.name)) == 2 + + +# Test updated caching functionality +class StateIncrement: + """Simple class for keeping track of the number of times executed + """ + def __init__(self): + self._count = 0 + + def __call__(self): + self._count += 1 + return self._count + + @property + def value(self): + return self._count + + +def twople(x): + return (x, )*2 + + +def threeple(x): + return (x, )*3 + + +def n_comms(n): + return [MPI.COMM_WORLD]*n + + +def n_ops(n): + return [MPI.SUM]*n + + +# decorator = parallel_memory_only_cache, parallel_memory_only_cache_no_broadcast, disk_only_cached +def function_factory(state, decorator, f, **kwargs): + def custom_function(x, comm=COMM_WORLD): + state() + return f(x) + + return decorator(**kwargs)(custom_function) + + +@pytest.fixture +def state(): + return StateIncrement() + + +@pytest.mark.parametrize("decorator, uncached_function", [ + (memory_cache, twople), + (memory_cache, n_comms), + (memory_and_disk_cache, twople), + (disk_only_cache, twople) +]) +def test_function_args_twice_caches(request, state, decorator, uncached_function, tmpdir): + if request.node.callspec.params["decorator"] in {disk_only_cache, memory_and_disk_cache}: + kwargs = {"cachedir": tmpdir} + else: + kwargs = {} + + cached_function = function_factory(state, decorator, uncached_function, **kwargs) + assert state.value == 0 + first = cached_function(2, comm=COMM_WORLD) + assert first == uncached_function(2) + assert state.value == 1 + second = cached_function(2, comm=COMM_WORLD) + assert second == uncached_function(2) + if request.node.callspec.params["decorator"] is not disk_only_cache: + assert second is first + assert state.value == 1 + + clear_memory_cache(COMM_WORLD) + + +@pytest.mark.parametrize("decorator, uncached_function", [ + (memory_cache, twople), + (memory_cache, n_comms), + (memory_and_disk_cache, twople), + (disk_only_cache, twople) +]) +def test_function_args_different(request, state, decorator, uncached_function, tmpdir): + if request.node.callspec.params["decorator"] in {disk_only_cache, memory_and_disk_cache}: + kwargs = {"cachedir": tmpdir} + else: + kwargs = {} + + cached_function = function_factory(state, decorator, uncached_function, **kwargs) + assert state.value == 0 + first = cached_function(2, comm=COMM_WORLD) + assert first == uncached_function(2) + assert state.value == 1 + second = cached_function(3, comm=COMM_WORLD) + assert second == uncached_function(3) + assert state.value == 2 + + clear_memory_cache(COMM_WORLD) + + +@pytest.mark.parallel(nprocs=3) +@pytest.mark.parametrize("decorator, uncached_function", [ + (memory_cache, twople), + (memory_cache, n_comms), + (memory_and_disk_cache, twople), + (disk_only_cache, twople) +]) +def test_function_over_different_comms(request, state, decorator, uncached_function, tmpdir): + if request.node.callspec.params["decorator"] in {disk_only_cache, memory_and_disk_cache}: + # In parallel different ranks can get different tempdirs, we just want one + tmpdir = COMM_WORLD.bcast(tmpdir, root=0) + kwargs = {"cachedir": tmpdir} + else: + kwargs = {} + + cached_function = function_factory(state, decorator, uncached_function, **kwargs) + assert state.value == 0 + + for ii in range(10): + color = 0 if COMM_WORLD.rank < 2 else MPI.UNDEFINED + comm12 = COMM_WORLD.Split(color=color) + if COMM_WORLD.rank < 2: + _ = cached_function(2, comm=comm12) + comm12.Free() + + color = 0 if COMM_WORLD.rank > 0 else MPI.UNDEFINED + comm23 = COMM_WORLD.Split(color=color) + if COMM_WORLD.rank > 0: + _ = cached_function(2, comm=comm23) + comm23.Free() + + clear_memory_cache(COMM_WORLD) + + +# pyop2/compilation.py uses a custom cache which we test here +@pytest.mark.parallel(nprocs=2) +def test_writing_large_so(): + # This test exercises the compilation caching when handling larger files + if COMM_WORLD.rank == 0: + preamble = dedent("""\ + #include \n + void big(double *result){ + """) + variables = (f"v{next(tempfile._get_candidate_names())}" for _ in range(128*1024)) + lines = (f" double {v} = {hash(v)/1000000000};\n *result += {v};\n" for v in variables) + program = "\n".join(chain.from_iterable(((preamble, ), lines, ("}\n", )))) + with open("big.c", "w") as fh: + fh.write(program) + + COMM_WORLD.Barrier() + with open("big.c", "r") as fh: + program = fh.read() + + if COMM_WORLD.rank == 1: + os.remove("big.c") + + fn = load(program, "c", "big", argtypes=(ctypes.c_voidp,), comm=COMM_WORLD) + assert fn is not None + + +@pytest.mark.parallel(nprocs=2) +def test_two_comms_compile_the_same_code(): + new_comm = COMM_WORLD.Split(color=COMM_WORLD.rank) + new_comm.name = "test_two_comms" + code = dedent("""\ + #include \n + void noop(){ + printf("Do nothing!\\n"); + } + """) + + fn = load(code, "c", "noop", argtypes=(), comm=COMM_WORLD) + assert fn is not None + + +if __name__ == '__main__': + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_callables.py b/tests/pyop2/test_callables.py new file mode 100644 index 0000000000..85b6f09f17 --- /dev/null +++ b/tests/pyop2/test_callables.py @@ -0,0 +1,122 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012-2014, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import loopy +from pyop2.codegen.rep2loopy import SolveCallable, INVCallable +import numpy as np +from pyop2 import op2 +from pyop2.configuration import target + + +@pytest.fixture +def s(): + return op2.Set(1) + + +@pytest.fixture +def zero_mat(s): + return op2.Dat(s ** (2, 2), [[0.0, 0.0], [0.0, 0.0]]) + + +@pytest.fixture +def inv_mat(s): + return op2.Dat(s ** (2, 2), [[1.0, 2.0], [3.0, 4.0]]) + + +@pytest.fixture +def zero_vec(s): + return op2.Dat(s ** (2, 1), [0.0, 0.0]) + + +@pytest.fixture +def solve_mat(s): + d = op2.Dat(s ** (2, 2), [[2.0, 1.0], [-3.0, 2.0]]) + return d + + +@pytest.fixture +def solve_vec(s): + return op2.Dat(s ** (2, 1), [1.0, 0.0]) + + +class TestCallables: + + def test_inverse_callable(self, zero_mat, inv_mat): + loopy.set_caching_enabled(False) + + k = loopy.make_kernel( + ["{ : }"], + """ + B[:,:] = inverse(A[:,:]) + """, + [loopy.GlobalArg('B', dtype=np.float64, shape=(2, 2)), + loopy.GlobalArg('A', dtype=np.float64, shape=(2, 2))], + target=target, + name="callable_kernel", + lang_version=(2018, 2)) + + k = loopy.register_callable(k, INVCallable.name, INVCallable()) + code = loopy.generate_code_v2(k).device_code() + code.replace('void callable_kernel', 'static void callable_kernel') + + loopykernel = op2.Kernel(code, "callable_kernel", ldargs=["-llapack"]) + + op2.par_loop(loopykernel, zero_mat.dataset.set, zero_mat(op2.WRITE), inv_mat(op2.READ)) + expected = np.linalg.inv(inv_mat.data) + assert np.allclose(expected, zero_mat.data) + + def test_solve_callable(self, zero_vec, solve_mat, solve_vec): + loopy.set_caching_enabled(False) + + k = loopy.make_kernel( + ["{ : }"], + """ + x[:] = solve(A[:,:], b[:]) + """, + [loopy.GlobalArg('x', dtype=np.float64, shape=(2, )), + loopy.GlobalArg('A', dtype=np.float64, shape=(2, 2)), + loopy.GlobalArg('b', dtype=np.float64, shape=(2, ),)], + target=target, + name="callable_kernel2", + lang_version=(2018, 2)) + + k = loopy.register_callable(k, SolveCallable.name, SolveCallable()) + code = loopy.generate_code_v2(k).device_code() + code.replace('void callable_kernel2', 'static void callable_kernel2') + loopykernel = op2.Kernel(code, "callable_kernel2", ldargs=["-llapack"]) + args = [zero_vec(op2.READ), solve_mat(op2.READ), solve_vec(op2.WRITE)] + + op2.par_loop(loopykernel, solve_mat.dataset.set, *args) + expected = np.linalg.solve(solve_mat.data, solve_vec.data) + assert np.allclose(expected, zero_vec.data) diff --git a/tests/pyop2/test_configuration.py b/tests/pyop2/test_configuration.py new file mode 100644 index 0000000000..f6c5c849d7 --- /dev/null +++ b/tests/pyop2/test_configuration.py @@ -0,0 +1,58 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Configuration unit tests.""" + + +import pytest +from pyop2.configuration import Configuration +from pyop2.exceptions import ConfigurationError + + +class TestConfigurationAPI: + """Configuration API unit tests.""" + + def test_add_configuration_value(self): + """Defining an non default argument.""" + c = Configuration() + c.reconfigure(foo='bar') + assert c['foo'] == 'bar' + + @pytest.mark.parametrize(('key', 'val'), [('debug', 'illegal'), + ('log_level', 1.5)]) + def test_configuration_illegal_types(self, key, val): + """Illegal types for configuration values should raise + ConfigurationError.""" + c = Configuration() + with pytest.raises(ConfigurationError): + c[key] = val diff --git a/tests/pyop2/test_dats.py b/tests/pyop2/test_dats.py new file mode 100644 index 0000000000..2b8cf2efbd --- /dev/null +++ b/tests/pyop2/test_dats.py @@ -0,0 +1,323 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np + +from pyop2 import op2 + +nelems = 5 + + +@pytest.fixture(scope='module') +def s(): + return op2.Set(nelems) + + +@pytest.fixture +def d1(s): + return op2.Dat(s, list(range(nelems)), dtype=np.float64) + + +@pytest.fixture +def mdat(d1): + return op2.MixedDat([d1, d1]) + + +@pytest.fixture(scope='module') +def s2(s): + return op2.DataSet(s, 2) + + +@pytest.fixture +def vdat(s2): + return op2.Dat(s2, np.zeros(2 * nelems), dtype=np.float64) + + +class TestDat: + + """ + Test some properties of Dats + """ + + def test_copy_constructor(self, d1): + """Dat copy constructor should copy values""" + d2 = op2.Dat(d1) + assert d1.dataset.set == d2.dataset.set + assert (d1.data_ro == d2.data_ro).all() + d1.data[:] = -1 + assert (d1.data_ro != d2.data_ro).all() + + def test_copy_constructor_mixed(self, mdat): + """MixedDat copy constructor should copy values""" + mdat2 = op2.MixedDat(mdat) + assert mdat.dataset.set == mdat2.dataset.set + assert all(all(d.data_ro == d_.data_ro) for d, d_ in zip(mdat, mdat2)) + for dat in mdat.data: + dat[:] = -1 + assert all(all(d.data_ro != d_.data_ro) for d, d_ in zip(mdat, mdat2)) + + def test_copy(self, d1, s): + """Copy method on a Dat should copy values into given target""" + d2 = op2.Dat(s) + d1.copy(d2) + assert d1.dataset.set == d2.dataset.set + assert (d1.data_ro == d2.data_ro).all() + d1.data[:] = -1 + assert (d1.data_ro != d2.data_ro).all() + + def test_copy_mixed(self, s, mdat): + """Copy method on a MixedDat should copy values into given target""" + mdat2 = op2.MixedDat([s, s]) + mdat.copy(mdat2) + assert all(all(d.data_ro == d_.data_ro) for d, d_ in zip(mdat, mdat2)) + for dat in mdat.data: + dat[:] = -1 + assert all(all(d.data_ro != d_.data_ro) for d, d_ in zip(mdat, mdat2)) + + def test_copy_subset(self, s, d1): + """Copy method should copy values on a subset""" + d2 = op2.Dat(s) + ss = op2.Subset(s, list(range(1, nelems, 2))) + d1.copy(d2, subset=ss) + assert (d1.data_ro[ss.indices] == d2.data_ro[ss.indices]).all() + assert (d2.data_ro[::2] == 0).all() + + def test_copy_mixed_subset_fails(self, s, mdat): + """Copy method on a MixedDat does not support subsets""" + with pytest.raises(NotImplementedError): + mdat.copy(op2.MixedDat([s, s]), subset=op2.Subset(s, [])) + + @pytest.mark.parametrize('dim', [1, 2]) + def test_dat_nbytes(self, dim): + """Nbytes computes the number of bytes occupied by a Dat.""" + s = op2.Set(10) + assert op2.Dat(s**dim).nbytes == 10*8*dim + + def test_dat_save_and_load(self, tmpdir, d1, s, mdat): + """The save method should dump Dat and MixedDat values to + the file 'output', and the load method should read back + those same values from the 'output' file. """ + output = tmpdir.join('output').strpath + d1.save(output) + d2 = op2.Dat(s) + d2.load(output) + assert (d1.data_ro == d2.data_ro).all() + + mdat.save(output) + mdat2 = op2.MixedDat([d1, d1]) + mdat2.load(output) + assert all(all(d.data_ro == d_.data_ro) for d, d_ in zip(mdat, mdat2)) + + def test_dat_version(self, s, d1): + """Check object versioning for Dat""" + d2 = op2.Dat(s) + + assert d1.dat_version == 0 + assert d2.dat_version == 0 + + # Access data property + d1.data + + assert d1.dat_version == 1 + assert d2.dat_version == 0 + + # Access data property + d2.data[:] += 1 + + assert d1.dat_version == 1 + assert d2.dat_version == 1 + + # Access zero property + d1.zero() + + assert d1.dat_version == 2 + assert d2.dat_version == 1 + + # Copy d2 into d1 + d2.copy(d1) + + assert d1.dat_version == 3 + assert d2.dat_version == 1 + + # Context managers (without changing d1 and d2) + with d1.vec_wo as _: + pass + + with d2.vec as _: + pass + + # Dat version shouldn't change as we are just calling the context manager + # and not changing the Dat objects. + assert d1.dat_version == 3 + assert d2.dat_version == 1 + + # Context managers (modify d1 and d2) + with d1.vec_wo as x: + x += 1 + + with d2.vec as x: + x += 1 + + assert d1.dat_version == 4 + assert d2.dat_version == 2 + + # ParLoop + d3 = op2.Dat(s ** 1, data=None, dtype=np.uint32) + assert d3.dat_version == 0 + k = op2.Kernel(""" +static void write(unsigned int* v) { + *v = 1; +} +""", "write") + op2.par_loop(k, s, d3(op2.WRITE)) + assert d3.dat_version == 1 + + def test_mixed_dat_version(self, s, d1, mdat): + """Check object versioning for MixedDat""" + d2 = op2.Dat(s) + mdat2 = op2.MixedDat([d1, d2]) + + assert mdat.dat_version == 0 + assert mdat2.dat_version == 0 + + # Access data property + mdat2.data + + # mdat2.data will call d1.data and d2.data + assert d1.dat_version == 1 + assert d2.dat_version == 1 + assert mdat.dat_version == 2 + assert mdat2.dat_version == 2 + + # Access zero property + mdat.zero() + + # mdat.zero() will call d1.zero() twice + assert d1.dat_version == 3 + assert d2.dat_version == 1 + assert mdat.dat_version == 6 + assert mdat2.dat_version == 4 + + # Access zero property + d1.zero() + + assert d1.dat_version == 4 + assert mdat.dat_version == 8 + assert mdat2.dat_version == 5 + + # ParLoop + d3 = op2.Dat(s ** 1, data=None, dtype=np.uint32) + d4 = op2.Dat(s ** 1, data=None, dtype=np.uint32) + d3d4 = op2.MixedDat([d3, d4]) + assert d3.dat_version == 0 + assert d4.dat_version == 0 + assert d3d4.dat_version == 0 + k = op2.Kernel(""" +static void write(unsigned int* v) { + v[0] = 1; + v[1] = 2; +} +""", "write") + m = op2.Map(s, op2.Set(nelems), 1, values=[0, 1, 2, 3, 4]) + op2.par_loop(k, s, d3d4(op2.WRITE, op2.MixedMap([m, m]))) + assert d3.dat_version == 1 + assert d4.dat_version == 1 + assert d3d4.dat_version == 2 + + def test_accessing_data_with_halos_increments_dat_version(self, d1): + assert d1.dat_version == 0 + d1.data_ro_with_halos + assert d1.dat_version == 0 + d1.data_with_halos + assert d1.dat_version == 1 + + +class TestDatView(): + + def test_dat_view_assign(self, vdat): + vdat.data[:, 0] = 3 + vdat.data[:, 1] = 4 + comp = op2.DatView(vdat, 1) + comp.data[:] = 7 + assert not vdat.halo_valid + assert not comp.halo_valid + + expected = np.zeros_like(vdat.data) + expected[:, 0] = 3 + expected[:, 1] = 7 + assert all(comp.data == expected[:, 1]) + assert all(vdat.data[:, 0] == expected[:, 0]) + assert all(vdat.data[:, 1] == expected[:, 1]) + + def test_dat_view_zero(self, vdat): + vdat.data[:, 0] = 3 + vdat.data[:, 1] = 4 + comp = op2.DatView(vdat, 1) + comp.zero() + assert vdat.halo_valid + assert comp.halo_valid + + expected = np.zeros_like(vdat.data) + expected[:, 0] = 3 + expected[:, 1] = 0 + assert all(comp.data == expected[:, 1]) + assert all(vdat.data[:, 0] == expected[:, 0]) + assert all(vdat.data[:, 1] == expected[:, 1]) + + def test_dat_view_halo_valid(self, vdat): + """Check halo validity for DatView""" + comp = op2.DatView(vdat, 1) + assert vdat.halo_valid + assert comp.halo_valid + assert vdat.dat_version == 0 + assert comp.dat_version == 0 + + comp.data_ro_with_halos + assert vdat.halo_valid + assert comp.halo_valid + assert vdat.dat_version == 0 + assert comp.dat_version == 0 + + # accessing comp.data_with_halos should mark the parent halo as dirty + comp.data_with_halos + assert not vdat.halo_valid + assert not comp.halo_valid + assert vdat.dat_version == 1 + assert comp.dat_version == 1 + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_direct_loop.py b/tests/pyop2/test_direct_loop.py new file mode 100644 index 0000000000..2524a78f3d --- /dev/null +++ b/tests/pyop2/test_direct_loop.py @@ -0,0 +1,291 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np +from petsc4py import PETSc + +from pyop2 import op2 +from pyop2.exceptions import MapValueError +from pyop2.mpi import COMM_WORLD + +nelems = 4096 + + +@pytest.fixture(params=[(nelems, nelems, nelems), + (0, nelems, nelems), + (nelems // 2, nelems, nelems), + (0, nelems//2, nelems)]) +def elems(request): + return op2.Set(request.param, "elems") + + +@pytest.fixture +def delems(elems): + return op2.DataSet(elems, 1, "delems") + + +@pytest.fixture +def delems2(elems): + return op2.DataSet(elems, 2, "delems2") + + +def xarray(): + return np.array(range(nelems), dtype=np.uint32) + + +class TestDirectLoop: + + """ + Direct Loop Tests + """ + + @pytest.fixture + def x(cls, delems): + return op2.Dat(delems, xarray(), np.uint32, "x") + + @pytest.fixture + def y(cls, delems2): + return op2.Dat(delems2, [xarray(), xarray()], np.uint32, "x") + + @pytest.fixture + def g(cls): + return op2.Global(1, 0, np.uint32, "g", comm=COMM_WORLD) + + @pytest.fixture + def h(cls): + return op2.Global(1, 1, np.uint32, "h", comm=COMM_WORLD) + + def test_wo(self, elems, x): + """Set a Dat to a scalar value with op2.WRITE.""" + kernel_wo = """static void wo(unsigned int* x) { *x = 42; }""" + op2.par_loop(op2.Kernel(kernel_wo, "wo"), + elems, x(op2.WRITE)) + assert all(map(lambda x: x == 42, x.data)) + + def test_mismatch_set_raises_error(self, elems, x): + """The iterset of the parloop should match the dataset of the direct dat.""" + kernel_wo = """static void wo(unsigned int* x) { *x = 42; }""" + with pytest.raises(MapValueError): + op2.par_loop( + op2.Kernel(kernel_wo, "wo"), + op2.Set(elems.size), + x(op2.WRITE) + ) + + def test_rw(self, elems, x): + """Increment each value of a Dat by one with op2.RW.""" + kernel_rw = """static void wo(unsigned int* x) { (*x) = (*x) + 1; }""" + op2.par_loop(op2.Kernel(kernel_rw, "wo"), + elems, x(op2.RW)) + _nelems = elems.size + assert sum(x.data_ro) == _nelems * (_nelems + 1) // 2 + if _nelems == nelems: + assert sum(x.data_ro_with_halos) == nelems * (nelems + 1) // 2 + + def test_global_inc(self, elems, x, g): + """Increment each value of a Dat by one and a Global at the same time.""" + kernel_global_inc = """static void global_inc(unsigned int* x, unsigned int* inc) { + (*x) = (*x) + 1; (*inc) += (*x); + }""" + op2.par_loop(op2.Kernel(kernel_global_inc, "global_inc"), + elems, x(op2.RW), g(op2.INC)) + _nelems = elems.size + assert g.data[0] == _nelems * (_nelems + 1) // 2 + + def test_global_inc_init_not_zero(self, elems, g): + """Increment a global initialized with a non-zero value.""" + k = """static void k(unsigned int* inc) { (*inc) += 1; }""" + g.data[0] = 10 + op2.par_loop(op2.Kernel(k, 'k'), elems, g(op2.INC)) + assert g.data[0] == elems.size + 10 + + def test_global_max_dat_is_max(self, elems, x, g): + """Verify that op2.MAX reduces to the maximum value.""" + k_code = """static void k(unsigned int *g, unsigned int *x) { + if ( *g < *x ) { *g = *x; } + }""" + k = op2.Kernel(k_code, 'k') + + op2.par_loop(k, elems, g(op2.MAX), x(op2.READ)) + assert g.data[0] == x.data.max() + + def test_global_max_g_is_max(self, elems, x, g): + """Verify that op2.MAX does not reduce a maximum value smaller than the + Global's initial value.""" + k_code = """static void k(unsigned int *x, unsigned int *g) { + if ( *g < *x ) { *g = *x; } + }""" + + k = op2.Kernel(k_code, 'k') + + g.data[0] = nelems * 2 + + op2.par_loop(k, elems, x(op2.READ), g(op2.MAX)) + + assert g.data[0] == nelems * 2 + + def test_global_min_dat_is_min(self, elems, x, g): + """Verify that op2.MIN reduces to the minimum value.""" + k_code = """static void k(unsigned int *g, unsigned int *x) { + if ( *g > *x ) { *g = *x; } + }""" + k = op2.Kernel(k_code, 'k') + g.data[0] = 1000 + op2.par_loop(k, elems, g(op2.MIN), x(op2.READ)) + + assert g.data[0] == x.data.min() + + def test_global_min_g_is_min(self, elems, x, g): + """Verify that op2.MIN does not reduce a minimum value larger than the + Global's initial value.""" + k_code = """static void k(unsigned int *x, unsigned int *g) { + if ( *g > *x ) { *g = *x; } + }""" + + k = op2.Kernel(k_code, 'k') + g.data[0] = 10 + x.data[:] = 11 + op2.par_loop(k, elems, x(op2.READ), g(op2.MIN)) + + assert g.data[0] == 10 + + def test_global_read(self, elems, x, h): + """Increment each value of a Dat by the value of a Global.""" + kernel_global_read = """ + static void global_read(unsigned int* x, unsigned int* h) { + (*x) += (*h); + }""" + op2.par_loop(op2.Kernel(kernel_global_read, "global_read"), + elems, x(op2.RW), h(op2.READ)) + _nelems = elems.size + assert sum(x.data_ro) == _nelems * (_nelems + 1) // 2 + + def test_2d_dat(self, elems, y): + """Set both components of a vector-valued Dat to a scalar value.""" + kernel_2d_wo = """static void k2d_wo(unsigned int* x) { + x[0] = 42; x[1] = 43; + }""" + op2.par_loop(op2.Kernel(kernel_2d_wo, "k2d_wo"), + elems, y(op2.WRITE)) + assert all(map(lambda x: all(x == [42, 43]), y.data)) + + def test_host_write(self, elems, x, g): + """Increment a global by the values of a Dat.""" + kernel = """static void k(unsigned int *g, unsigned int *x) { *g += *x; }""" + x.data[:] = 1 + g.data[:] = 0 + op2.par_loop(op2.Kernel(kernel, 'k'), elems, + g(op2.INC), x(op2.READ)) + _nelems = elems.size + assert g.data[0] == _nelems + + x.data[:] = 2 + g.data[:] = 0 + kernel = """static void k(unsigned int *x, unsigned int *g) { *g += *x; }""" + op2.par_loop(op2.Kernel(kernel, 'k'), elems, + x(op2.READ), g(op2.INC)) + assert g.data[0] == 2 * _nelems + + def test_zero_1d_dat(self, x): + """Zero a Dat.""" + x.data[:] = 10 + assert (x.data == 10).all() + x.zero() + assert (x.data == 0).all() + + def test_zero_2d_dat(self, y): + """Zero a vector-valued Dat.""" + y.data[:] = 10 + assert (y.data == 10).all() + y.zero() + assert (y.data == 0).all() + + def test_kernel_cplusplus(self, delems): + """Test that passing cpp=True to a Kernel works.""" + + y = op2.Dat(delems, dtype=np.float64) + y.data[:] = -10.5 + + k = op2.Kernel(""" + #include + + static void k(double *y) + { + *y = std::abs(*y); + } + """, "k", cpp=True) + op2.par_loop(k, y.dataset.set, y(op2.RW)) + + assert (y.data == 10.5).all() + + def test_passthrough_mat(self): + niters = 10 + iterset = op2.Set(niters) + + c_kernel = """ +static void mat_inc(Mat mat) { + PetscScalar values[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + PetscInt idxs[] = {0, 2, 4}; + MatSetValues(mat, 3, idxs, 3, idxs, values, ADD_VALUES); +} + """ + kernel = op2.Kernel(c_kernel, "mat_inc") + + # create a tiny 5x5 sparse matrix + petsc_mat = PETSc.Mat().create() + petsc_mat.setSizes(5) + petsc_mat.setUp() + petsc_mat.setValues([0, 2, 4], [0, 2, 4], np.zeros((3, 3), dtype=PETSc.ScalarType)) + petsc_mat.assemble() + + arg = op2.PassthroughArg(op2.OpaqueType("Mat"), petsc_mat.handle) + op2.par_loop(kernel, iterset, arg) + petsc_mat.assemble() + + assert np.allclose( + petsc_mat.getValues(range(5), range(5)), + [ + [10, 0, 20, 0, 30], + [0]*5, + [40, 0, 50, 0, 60], + [0]*5, + [70, 0, 80, 0, 90], + ] + ) + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_extrusion.py b/tests/pyop2/test_extrusion.py new file mode 100644 index 0000000000..7a24d581b1 --- /dev/null +++ b/tests/pyop2/test_extrusion.py @@ -0,0 +1,449 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy +import random + +from pyop2 import op2 +from pyop2.mpi import COMM_WORLD + + +def compute_ind_extr(nums, + map_dofs, + lins, + layers, + mesh2d, + dofs, + A, + wedges, + map, + lsize): + count = 0 + ind = numpy.zeros(lsize, dtype=numpy.int32) + len1 = len(mesh2d) + for mm in range(lins): + offset = 0 + for d in range(2): + c = 0 + for i in range(len1): + a4 = dofs[i, d] + if a4 != 0: + len2 = len(A[d]) + for j in range(0, mesh2d[i]): + m = map[mm][c] + for k in range(0, len2): + ind[count] = m*(layers - d) + A[d][k] + offset + count += 1 + c += 1 + elif dofs[i, 1-d] != 0: + c += mesh2d[i] + offset += a4*nums[i]*(layers - d) + return ind + + +# Data type +valuetype = numpy.float64 + +# Constants +NUM_ELE = 2 +NUM_NODES = 4 +NUM_DIMS = 2 + + +def _seed(): + return 0.02041724 + + +nelems = 32 +nnodes = nelems + 2 +nedges = 2 * nelems + 1 + +nums = numpy.array([nnodes, nedges, nelems]) + +layers = 11 +wedges = layers - 1 +partition_size = 300 + +mesh2d = numpy.array([3, 3, 1]) +mesh1d = numpy.array([2, 1]) +A = [[0, 1], [0]] + +dofs = numpy.array([[2, 0], [0, 0], [0, 1]]) +dofs_coords = numpy.array([[2, 0], [0, 0], [0, 0]]) +dofs_field = numpy.array([[0, 0], [0, 0], [0, 1]]) + +off1 = numpy.array([1, 1, 1, 1, 1, 1], dtype=numpy.int32) +off2 = numpy.array([1], dtype=numpy.int32) + +noDofs = numpy.dot(mesh2d, dofs) +noDofs = len(A[0]) * noDofs[0] + noDofs[1] + +map_dofs_coords = 6 +map_dofs_field = 1 + +# CRATE THE MAPS +# elems to nodes +elems2nodes = numpy.zeros(mesh2d[0] * nelems, dtype=numpy.int32) +for i in range(nelems): + elems2nodes[mesh2d[0] * i:mesh2d[0] * (i + 1)] = [i, i + 1, i + 2] +elems2nodes = elems2nodes.reshape(nelems, 3) + +# elems to edges +elems2edges = numpy.zeros(mesh2d[1] * nelems, numpy.int32) +c = 0 +for i in range(nelems): + elems2edges[mesh2d[1] * i:mesh2d[1] * (i + 1)] = [ + i + c, i + 1 + c, i + 2 + c] + c = 1 +elems2edges = elems2edges.reshape(nelems, 3) + +# elems to elems +elems2elems = numpy.zeros(mesh2d[2] * nelems, numpy.int32) +elems2elems[:] = range(nelems) +elems2elems = elems2elems.reshape(nelems, 1) + +xtr_elem_node_map = numpy.asarray( + [0, 1, 11, 12, 33, 34, 22, 23, 33, 34, 11, 12], dtype=numpy.uint32) + + +@pytest.fixture +def iterset(): + return op2.Set(nelems, "iterset") + + +@pytest.fixture +def indset(): + return op2.Set(nelems, "indset") + + +@pytest.fixture +def diterset(iterset): + return op2.DataSet(iterset, 1, "diterset") + + +@pytest.fixture +def dindset(indset): + return op2.DataSet(indset, 1, "dindset") + + +@pytest.fixture +def x(dindset): + return op2.Dat(dindset, range(nelems), numpy.uint32, "x") + + +@pytest.fixture +def iterset2indset(iterset, indset): + u_map = numpy.array(range(nelems), dtype=numpy.uint32) + random.shuffle(u_map, _seed) + return op2.Map(iterset, indset, 1, u_map, "iterset2indset") + + +@pytest.fixture +def elements(): + s = op2.Set(nelems) + return op2.ExtrudedSet(s, layers=layers) + + +@pytest.fixture +def node_set1(): + return op2.Set(nnodes * layers, "nodes1") + + +@pytest.fixture +def edge_set1(): + return op2.Set(nedges * layers, "edges1") + + +@pytest.fixture +def elem_set1(): + return op2.Set(nelems * wedges, "elems1") + + +@pytest.fixture +def dnode_set1(node_set1): + return op2.DataSet(node_set1, 1, "dnodes1") + + +@pytest.fixture +def dnode_set2(node_set1): + return op2.DataSet(node_set1, 2, "dnodes2") + + +@pytest.fixture +def dedge_set1(edge_set1): + return op2.DataSet(edge_set1, 1, "dedges1") + + +@pytest.fixture +def delem_set1(elem_set1): + return op2.DataSet(elem_set1, 1, "delems1") + + +@pytest.fixture +def delems_set2(elem_set1): + return op2.DataSet(elem_set1, 2, "delems2") + + +@pytest.fixture +def dat_coords(dnode_set2): + coords_size = nums[0] * layers * 2 + coords_dat = numpy.zeros(coords_size) + count = 0 + for k in range(0, nums[0]): + coords_dat[count:count + layers * dofs[0][0]] = numpy.tile( + [(k // 2), k % 2], layers) + count += layers * dofs[0][0] + return op2.Dat(dnode_set2, coords_dat, numpy.float64, "coords") + + +@pytest.fixture +def dat_field(delem_set1): + field_size = nums[2] * wedges * 1 + field_dat = numpy.zeros(field_size) + field_dat[:] = 1.0 + return op2.Dat(delem_set1, field_dat, numpy.float64, "field") + + +@pytest.fixture +def dat_c(dnode_set2): + coords_size = nums[0] * layers * 2 + coords_dat = numpy.zeros(coords_size) + count = 0 + for k in range(0, nums[0]): + coords_dat[count:count + layers * dofs[0][0]] = numpy.tile([0, 0], layers) + count += layers * dofs[0][0] + return op2.Dat(dnode_set2, coords_dat, numpy.float64, "c") + + +@pytest.fixture +def dat_f(delem_set1): + field_size = nums[2] * wedges * 1 + field_dat = numpy.zeros(field_size) + field_dat[:] = -1.0 + return op2.Dat(delem_set1, field_dat, numpy.float64, "f") + + +@pytest.fixture +def coords_map(elements, node_set1): + lsize = nums[2] * map_dofs_coords + ind_coords = compute_ind_extr( + nums, map_dofs_coords, nelems, layers, mesh2d, dofs_coords, A, wedges, elems2nodes, lsize) + return op2.Map(elements, node_set1, map_dofs_coords, ind_coords, "elem_dofs", off1) + + +@pytest.fixture +def field_map(elements, elem_set1): + lsize = nums[2] * map_dofs_field + ind_field = compute_ind_extr( + nums, map_dofs_field, nelems, layers, mesh2d, dofs_field, A, wedges, elems2elems, lsize) + return op2.Map(elements, elem_set1, map_dofs_field, ind_field, "elem_elem", off2) + + +@pytest.fixture +def xtr_elements(): + eset = op2.Set(NUM_ELE) + return op2.ExtrudedSet(eset, layers=layers) + + +@pytest.fixture +def xtr_nodes(): + return op2.Set(NUM_NODES * layers) + + +@pytest.fixture +def xtr_dnodes(xtr_nodes): + return op2.DataSet(xtr_nodes, 1, "xtr_dnodes") + + +@pytest.fixture +def xtr_elem_node(xtr_elements, xtr_nodes): + return op2.Map(xtr_elements, xtr_nodes, 6, xtr_elem_node_map, "xtr_elem_node", + numpy.array([1, 1, 1, 1, 1, 1], dtype=numpy.int32)) + + +@pytest.fixture +def xtr_mat(xtr_elem_node, xtr_dnodes): + sparsity = op2.Sparsity((xtr_dnodes, xtr_dnodes), {(0, 0): [(xtr_elem_node, xtr_elem_node, None, None)]}, "xtr_sparsity") + return op2.Mat(sparsity, valuetype, "xtr_mat") + + +@pytest.fixture +def xtr_dvnodes(xtr_nodes): + return op2.DataSet(xtr_nodes, 3, "xtr_dvnodes") + + +@pytest.fixture +def xtr_b(xtr_dnodes): + b_vals = numpy.zeros(NUM_NODES * layers, dtype=valuetype) + return op2.Dat(xtr_dnodes, b_vals, valuetype, "xtr_b") + + +@pytest.fixture +def xtr_coords(xtr_dvnodes): + coord_vals = numpy.asarray([(0.0, 0.0, 0.0), (1.0, 0.0, 0.0), + (0.0, 1.0, 0.0), (1.0, 1.0, 0.0)], + dtype=valuetype) + return coord_vals + + +@pytest.fixture +def extrusion_kernel(): + kernel_code = """ +static void extrusion(double *xtr, double *x, int* j) +{ + //Only the Z-coord is increased, the others stay the same + xtr[0] = x[0]; + xtr[1] = x[1]; + xtr[2] = 0.1*j[0]; +}""" + return op2.Kernel(kernel_code, "extrusion") + + +class TestExtrusion: + + """ + Extruded Mesh Tests + """ + + def test_extrusion(self, elements, dat_coords, dat_field, coords_map, field_map): + g = op2.Global(1, data=0.0, name='g', comm=COMM_WORLD) + mass = op2.Kernel(""" +static void comp_vol(double A[1], double x[6][2], double y[1]) +{ + double abs = x[0][0]*(x[2][1]-x[4][1])+x[2][0]*(x[4][1]-x[0][1])+x[4][0]*(x[0][1]-x[2][1]); + if (abs < 0) + abs = abs * (-1.0); + A[0]+=0.5*abs*0.1 * y[0]; +}""", "comp_vol") + + op2.par_loop(mass, elements, + g(op2.INC), + dat_coords(op2.READ, coords_map), + dat_field(op2.READ, field_map)) + + assert int(g.data[0]) == int((layers - 1) * 0.1 * (nelems // 2)) + + def test_extruded_nbytes(self, dat_field): + """Nbytes computes the number of bytes occupied by an extruded Dat.""" + assert dat_field.nbytes == nums[2] * wedges * 8 + + def test_direct_loop_inc(self, iterset, diterset): + dat = op2.Dat(diterset) + xtr_iterset = op2.ExtrudedSet(iterset, layers=10) + k = 'static void k(double *x) { *x += 1.0; }' + dat.data[:] = 0 + op2.par_loop(op2.Kernel(k, 'k'), + xtr_iterset, dat(op2.INC)) + assert numpy.allclose(dat.data[:], 9.0) + + def test_extruded_layer_arg(self, elements, field_map, dat_f): + """Tests that the layer argument is being passed when prompted + to in the parloop.""" + + kernel_blah = """ + static void blah(double* x, int layer_arg){ + x[0] = layer_arg; + }""" + + op2.par_loop(op2.Kernel(kernel_blah, "blah"), + elements, dat_f(op2.WRITE, field_map), + pass_layer_arg=True) + end = layers - 1 + start = 0 + ref = numpy.arange(start, end) + assert [dat_f.data[end*n:end*(n+1)] == ref + for n in range(int(len(dat_f.data)/end) - 1)] + + def test_write_data_field(self, elements, dat_coords, dat_field, coords_map, field_map, dat_f): + kernel_wo = "static void wo(double* x) { x[0] = 42.0; }\n" + + op2.par_loop(op2.Kernel(kernel_wo, "wo"), + elements, dat_f(op2.WRITE, field_map)) + + assert all(map(lambda x: x == 42, dat_f.data)) + + def test_write_data_coords(self, elements, dat_coords, dat_field, coords_map, field_map, dat_c): + kernel_wo_c = """ + static void wo_c(double x[6][2]) { + x[0][0] = 42.0; x[0][1] = 42.0; + x[1][0] = 42.0; x[1][1] = 42.0; + x[2][0] = 42.0; x[2][1] = 42.0; + x[3][0] = 42.0; x[3][1] = 42.0; + x[4][0] = 42.0; x[4][1] = 42.0; + x[5][0] = 42.0; x[5][1] = 42.0; + }""" + op2.par_loop(op2.Kernel(kernel_wo_c, "wo_c"), + elements, dat_c(op2.WRITE, coords_map)) + + assert all(map(lambda x: x[0] == 42 and x[1] == 42, dat_c.data)) + + def test_read_coord_neighbours_write_to_field( + self, elements, dat_coords, dat_field, + coords_map, field_map, dat_c, dat_f): + kernel_wtf = """ + static void wtf(double* y, double x[6][2]) { + double sum = 0.0; + for (int i=0; i<6; i++){ + sum += x[i][0] + x[i][1]; + } + y[0] = sum; + }""" + op2.par_loop(op2.Kernel(kernel_wtf, "wtf"), elements, + dat_f(op2.WRITE, field_map), + dat_coords(op2.READ, coords_map),) + assert all(dat_f.data >= 0) + + def test_indirect_coords_inc(self, elements, dat_coords, + dat_field, coords_map, field_map, dat_c, + dat_f): + kernel_inc = """ + static void inc(double y[6][2], double x[6][2]) { + for (int i=0; i<6; i++){ + if (y[i][0] == 0){ + y[i][0] += 1; + y[i][1] += 1; + } + } + }""" + op2.par_loop(op2.Kernel(kernel_inc, "inc"), elements, + dat_c(op2.RW, coords_map), + dat_coords(op2.READ, coords_map)) + + assert sum(sum(dat_c.data)) == nums[0] * layers * 2 + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_global_reduction.py b/tests/pyop2/test_global_reduction.py new file mode 100644 index 0000000000..aae5322181 --- /dev/null +++ b/tests/pyop2/test_global_reduction.py @@ -0,0 +1,462 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy +from numpy.testing import assert_allclose + +from pyop2 import op2 +from pyop2.mpi import COMM_WORLD + +nelems = 4096 + + +class TestGlobalReductions: + + """ + Global reduction argument tests + """ + + @pytest.fixture(scope='module', params=[(nelems, nelems, nelems), + (0, nelems, nelems), + (nelems // 2, nelems, nelems)]) + def set(cls, request): + return op2.Set(request.param, 'set') + + @pytest.fixture(scope='module') + def dset(cls, set): + return op2.DataSet(set, 1, 'set') + + @pytest.fixture(scope='module') + def dset2(cls, set): + return op2.DataSet(set, 2, 'set2') + + @pytest.fixture + def d1(cls, dset): + return op2.Dat(dset, numpy.arange(nelems) + 1, dtype=numpy.uint32) + + @pytest.fixture + def d2(cls, dset2): + return op2.Dat(dset2, numpy.arange(2 * nelems) + 1, dtype=numpy.uint32) + + @pytest.fixture(scope='module') + def k1_write_to_dat(cls): + k = """ + static void k(unsigned int *x, unsigned int *g) { *x = *g; } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k1_inc_to_global(cls): + k = """ + static void k(unsigned int *g, unsigned int *x) { *g += *x; } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k1_min_to_global(cls): + k = """ + static void k(unsigned int *g, unsigned int *x) { if (*x < *g) *g = *x; } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k2_min_to_global(cls): + k = """ + static void k(unsigned int *g, unsigned int *x) { + if (x[0] < g[0]) g[0] = x[0]; + if (x[1] < g[1]) g[1] = x[1]; + } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k1_max_to_global(cls): + k = """ + static void k(unsigned int *g, unsigned int *x) { + if (*x > *g) *g = *x; + } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k2_max_to_global(cls): + k = """ + static void k(unsigned int *g, unsigned int *x) { + if (x[0] > g[0]) g[0] = x[0]; + if (x[1] > g[1]) g[1] = x[1]; + } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k2_write_to_dat(cls, request): + k = """ + static void k(unsigned int *x, unsigned int *g) { *x = g[0] + g[1]; } + """ + return op2.Kernel(k, "k") + + @pytest.fixture(scope='module') + def k2_inc_to_global(cls): + k = """ + static void k(unsigned int *g, unsigned int *x) { g[0] += x[0]; g[1] += x[1]; } + """ + return op2.Kernel(k, "k") + + @pytest.fixture + def duint32(cls, dset): + return op2.Dat(dset, [12] * nelems, numpy.uint32, "duint32") + + @pytest.fixture + def dint32(cls, dset): + return op2.Dat(dset, [-12] * nelems, numpy.int32, "dint32") + + @pytest.fixture + def dfloat32(cls, dset): + return op2.Dat(dset, [-12.0] * nelems, numpy.float32, "dfloat32") + + @pytest.fixture + def dfloat64(cls, dset): + return op2.Dat(dset, [-12.0] * nelems, numpy.float64, "dfloat64") + + def test_direct_min_uint32(self, set, duint32): + kernel_min = """ +static void k(unsigned int* g, unsigned int* x) +{ + if ( *x < *g ) *g = *x; +} +""" + g = op2.Global(1, 8, numpy.uint32, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_min, "k"), set, + g(op2.MIN), + duint32(op2.READ)) + assert g.data[0] == 8 + + def test_direct_min_int32(self, set, dint32): + kernel_min = """ +static void k(int* g, int* x) +{ + if ( *x < *g ) *g = *x; +} +""" + g = op2.Global(1, 8, numpy.int32, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_min, "k"), set, + g(op2.MIN), + dint32(op2.READ)) + assert g.data[0] == -12 + + def test_direct_max_int32(self, set, dint32): + kernel_max = """ +static void k(int* g, int* x) +{ + if ( *x > *g ) *g = *x; +} +""" + g = op2.Global(1, -42, numpy.int32, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_max, "k"), set, + g(op2.MAX), + dint32(op2.READ)) + assert g.data[0] == -12 + + def test_direct_min_float(self, set, dfloat32): + kernel_min = """ +static void k(float* g, float* x) +{ + if ( *x < *g ) *g = *x; +} +""" + g = op2.Global(1, -.8, numpy.float32, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_min, "k"), set, + g(op2.MIN), + dfloat32(op2.READ)) + + assert_allclose(g.data[0], -12.0) + + def test_direct_max_float(self, set, dfloat32): + kernel_max = """ +static void k(float* g, float* x) +{ + if ( *x > *g ) *g = *x; +} +""" + g = op2.Global(1, -42.8, numpy.float32, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_max, "k"), set, + g(op2.MAX), + dfloat32(op2.READ)) + assert_allclose(g.data[0], -12.0) + + def test_direct_min_double(self, set, dfloat64): + kernel_min = """ +static void k(double* g, double* x) +{ + if ( *x < *g ) *g = *x; +} +""" + g = op2.Global(1, -.8, numpy.float64, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_min, "k"), set, + g(op2.MIN), + dfloat64(op2.READ)) + assert_allclose(g.data[0], -12.0) + + def test_direct_max_double(self, set, dfloat64): + kernel_max = """ +static void k(double* g, double* x) +{ + if ( *x > *g ) *g = *x; +} +""" + g = op2.Global(1, -42.8, numpy.float64, "g", comm=COMM_WORLD) + + op2.par_loop(op2.Kernel(kernel_max, "k"), set, + g(op2.MAX), + dfloat64(op2.READ)) + assert_allclose(g.data[0], -12.0) + + def test_1d_read(self, k1_write_to_dat, set, d1): + g = op2.Global(1, 1, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_write_to_dat, set, + d1(op2.WRITE), + g(op2.READ)) + + assert all(d1.data == g.data) + + def test_1d_read_no_init(self, k1_write_to_dat, set, d1): + g = op2.Global(1, dtype=numpy.uint32, comm=COMM_WORLD) + d1.data[:] = 100 + op2.par_loop(k1_write_to_dat, set, + d1(op2.WRITE), + g(op2.READ)) + + assert all(g.data == 0) + assert all(d1.data == 0) + + def test_2d_read(self, k2_write_to_dat, set, d1): + g = op2.Global(2, (1, 2), dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k2_write_to_dat, set, + d1(op2.WRITE), + g(op2.READ)) + + assert all(d1.data == g.data.sum()) + + def test_1d_inc(self, k1_inc_to_global, set, d1): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + + assert g.data == d1.data.sum() + + def test_1d_inc_no_data(self, k1_inc_to_global, set, d1): + g = op2.Global(1, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + + assert g.data == d1.data.sum() + + def test_1d_min_dat_is_min(self, k1_min_to_global, set, d1): + val = d1.data.min() + 1 + g = op2.Global(1, val, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_min_to_global, set, + g(op2.MIN), + d1(op2.READ)) + + assert g.data == d1.data.min() + + def test_1d_min_global_is_min(self, k1_min_to_global, set, d1): + d1.data[:] += 10 + val = d1.data.min() - 1 + g = op2.Global(1, val, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_min_to_global, set, + g(op2.MIN), + d1(op2.READ)) + assert g.data == val + + def test_1d_max_dat_is_max(self, k1_max_to_global, set, d1): + val = d1.data.max() - 1 + g = op2.Global(1, val, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_max_to_global, set, + g(op2.MAX), + d1(op2.READ)) + + assert g.data == d1.data.max() + + def test_1d_max_global_is_max(self, k1_max_to_global, set, d1): + val = d1.data.max() + 1 + g = op2.Global(1, val, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_max_to_global, set, + g(op2.MAX), + d1(op2.READ)) + + assert g.data == val + + def test_2d_inc(self, k2_inc_to_global, set, d2): + g = op2.Global(2, (0, 0), dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k2_inc_to_global, set, + g(op2.INC), + d2(op2.READ)) + + assert g.data[0] == d2.data[:, 0].sum() + assert g.data[1] == d2.data[:, 1].sum() + + def test_2d_min_dat_is_min(self, k2_min_to_global, set, d2): + val_0 = d2.data[:, 0].min() + 1 + val_1 = d2.data[:, 1].min() + 1 + g = op2.Global(2, (val_0, val_1), dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k2_min_to_global, set, + g(op2.MIN), + d2(op2.READ)) + + assert g.data[0] == d2.data[:, 0].min() + assert g.data[1] == d2.data[:, 1].min() + + def test_2d_min_global_is_min(self, k2_min_to_global, set, d2): + d2.data[:, 0] += 10 + d2.data[:, 1] += 10 + val_0 = d2.data[:, 0].min() - 1 + val_1 = d2.data[:, 1].min() - 1 + g = op2.Global(2, (val_0, val_1), dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k2_min_to_global, set, + g(op2.MIN), + d2(op2.READ)) + assert g.data[0] == val_0 + assert g.data[1] == val_1 + + def test_2d_max_dat_is_max(self, k2_max_to_global, set, d2): + val_0 = d2.data[:, 0].max() - 1 + val_1 = d2.data[:, 1].max() - 1 + g = op2.Global(2, (val_0, val_1), dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k2_max_to_global, set, + g(op2.MAX), + d2(op2.READ)) + + assert g.data[0] == d2.data[:, 0].max() + assert g.data[1] == d2.data[:, 1].max() + + def test_2d_max_global_is_max(self, k2_max_to_global, set, d2): + max_val_0 = d2.data[:, 0].max() + 1 + max_val_1 = d2.data[:, 1].max() + 1 + g = op2.Global(2, (max_val_0, max_val_1), dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k2_max_to_global, set, + g(op2.MAX), + d2(op2.READ)) + + assert g.data[0] == max_val_0 + assert g.data[1] == max_val_1 + + def test_1d_multi_inc_same_global(self, k1_inc_to_global, set, d1): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + assert g.data == d1.data.sum() + + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + + assert g.data == d1.data.sum() * 2 + + def test_1d_multi_inc_same_global_reset(self, k1_inc_to_global, set, d1): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + assert g.data == d1.data.sum() + + g.data = 10 + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + + assert g.data == d1.data.sum() + 10 + + def test_1d_multi_inc_diff_global(self, k1_inc_to_global, set, d1): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + g2 = op2.Global(1, 10, dtype=numpy.uint32, comm=COMM_WORLD) + op2.par_loop(k1_inc_to_global, set, + g(op2.INC), + d1(op2.READ)) + assert g.data == d1.data.sum() + + op2.par_loop(k1_inc_to_global, set, + g2(op2.INC), + d1(op2.READ)) + assert g2.data == d1.data.sum() + 10 + + def test_globals_with_different_types(self, set): + g_uint32 = op2.Global(1, [0], numpy.uint32, "g_uint32", comm=COMM_WORLD) + g_double = op2.Global(1, [0.0], numpy.float64, "g_double", comm=COMM_WORLD) + k = """static void k(unsigned int* i, double* d) { *i += 1; *d += 1.0f; }""" + op2.par_loop(op2.Kernel(k, "k"), + set, + g_uint32(op2.INC), + g_double(op2.INC)) + assert_allclose(g_uint32.data[0], g_double.data[0]) + assert g_uint32.data[0] == set.size + + def test_inc_repeated_loop(self, set): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + k = """static void k(unsigned int* g) { *g += 1; }""" + op2.par_loop(op2.Kernel(k, "k"), + set, + g(op2.INC)) + assert_allclose(g.data, set.size) + op2.par_loop(op2.Kernel(k, "k"), + set, + g(op2.INC)) + assert_allclose(g.data, 2*set.size) + g.zero() + op2.par_loop(op2.Kernel(k, "k"), + set, + g(op2.INC)) + assert_allclose(g.data, set.size) + + def test_inc_reused_loop(self, set): + g = op2.Global(1, 0, dtype=numpy.uint32, comm=COMM_WORLD) + k = """void k(unsigned int* g) { *g += 1; }""" + loop = op2.ParLoop(op2.Kernel(k, "k"), set, g(op2.INC)) + loop.compute() + assert_allclose(g.data, set.size) + loop.compute() + assert_allclose(g.data, 2*set.size) + g.zero() + loop.compute() + assert_allclose(g.data, set.size) diff --git a/tests/pyop2/test_globals.py b/tests/pyop2/test_globals.py new file mode 100644 index 0000000000..1649a0451a --- /dev/null +++ b/tests/pyop2/test_globals.py @@ -0,0 +1,79 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +from pyop2 import op2 +from pyop2.mpi import COMM_WORLD + + +def test_global_operations(): + g1 = op2.Global(1, data=2., comm=COMM_WORLD) + g2 = op2.Global(1, data=5., comm=COMM_WORLD) + + assert (g1 + g2).data == 7. + assert (g2 - g1).data == 3. + assert (-g2).data == -5. + assert (g1 * g2).data == 10. + g1 *= g2 + assert g1.data == 10. + + +def test_global_dat_version(): + g1 = op2.Global(1, data=1., comm=COMM_WORLD) + g2 = op2.Global(1, data=2., comm=COMM_WORLD) + + assert g1.dat_version == 0 + assert g2.dat_version == 0 + + # Access data property + d1 = g1.data + + assert g1.dat_version == 1 + assert g2.dat_version == 0 + + # Access data property + g2.data[:] += 1 + + assert g1.dat_version == 1 + assert g2.dat_version == 1 + + # Access zero property + g1.zero() + + assert g1.dat_version == 2 + assert g2.dat_version == 1 + + # Access data setter + g2.data = d1 + + assert g1.dat_version == 2 + assert g2.dat_version == 2 diff --git a/tests/pyop2/test_indirect_loop.py b/tests/pyop2/test_indirect_loop.py new file mode 100644 index 0000000000..ca8341b1b2 --- /dev/null +++ b/tests/pyop2/test_indirect_loop.py @@ -0,0 +1,473 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np + +from pyop2 import op2 +from pyop2.exceptions import MapValueError +from pyop2.mpi import COMM_WORLD + + +nelems = 4096 + + +@pytest.fixture(params=[(nelems, nelems, nelems), + (0, nelems, nelems), + (nelems // 2, nelems, nelems)]) +def iterset(request): + return op2.Set(request.param, "iterset") + + +@pytest.fixture +def indset(): + return op2.Set(nelems, "indset") + + +@pytest.fixture +def unitset(): + return op2.Set(1, "unitset") + + +@pytest.fixture +def diterset(iterset): + return op2.DataSet(iterset, 1, "diterset") + + +@pytest.fixture +def x(indset): + return op2.Dat(indset, list(range(nelems)), np.uint32, "x") + + +@pytest.fixture +def x2(indset): + return op2.Dat(indset ** 2, np.array([list(range(nelems)), list(range(nelems))], + dtype=np.uint32), np.uint32, "x2") + + +@pytest.fixture +def mapd(): + mapd = list(range(nelems)) + return mapd[::-1] + + +@pytest.fixture +def iterset2indset(iterset, indset, mapd): + u_map = np.array(mapd, dtype=np.uint32) + return op2.Map(iterset, indset, 1, u_map, "iterset2indset") + + +@pytest.fixture +def iterset2indset2(iterset, indset, mapd): + u_map = np.array([mapd, mapd], dtype=np.uint32) + return op2.Map(iterset, indset, 2, u_map, "iterset2indset2") + + +@pytest.fixture +def iterset2unitset(iterset, unitset): + u_map = np.zeros(nelems, dtype=np.uint32) + return op2.Map(iterset, unitset, 1, u_map, "iterset2unitset") + + +class TestIndirectLoop: + + """ + Indirect Loop Tests + """ + + def test_mismatching_iterset(self, iterset, indset, x): + """Accessing a par_loop argument via a Map with iterset not matching + the par_loop's should raise an exception.""" + with pytest.raises(MapValueError): + op2.par_loop(op2.Kernel("", "dummy"), iterset, + x(op2.WRITE, op2.Map(op2.Set(nelems), indset, 1))) + + def test_mismatching_indset(self, iterset, x): + """Accessing a par_loop argument via a Map with toset not matching + the Dat's should raise an exception.""" + with pytest.raises(MapValueError): + op2.par_loop(op2.Kernel("", "dummy"), iterset, + x(op2.WRITE, op2.Map(iterset, op2.Set(nelems), 1))) + + def test_uninitialized_map(self, iterset, indset, x): + """Accessing a par_loop argument via an uninitialized Map should raise + an exception.""" + kernel_wo = "static void wo(unsigned int* x) { *x = 42; }\n" + with pytest.raises(MapValueError): + op2.par_loop(op2.Kernel(kernel_wo, "wo"), iterset, + x(op2.WRITE, op2.Map(iterset, indset, 1))) + + def test_onecolor_wo(self, iterset, x, iterset2indset): + """Set a Dat to a scalar value with op2.WRITE.""" + kernel_wo = "static void kernel_wo(unsigned int* x) { *x = 42; }\n" + + op2.par_loop(op2.Kernel(kernel_wo, "kernel_wo"), + iterset, x(op2.WRITE, iterset2indset)) + assert all(map(lambda x: x == 42, x.data)) + + def test_onecolor_rw(self, iterset, x, iterset2indset): + """Increment each value of a Dat by one with op2.RW.""" + kernel_rw = "static void rw(unsigned int* x) { (*x) = (*x) + 1; }\n" + + op2.par_loop(op2.Kernel(kernel_rw, "rw"), + iterset, x(op2.RW, iterset2indset)) + assert sum(x.data) == nelems * (nelems + 1) // 2 + + def test_indirect_inc(self, iterset, unitset, iterset2unitset): + """Sum into a scalar Dat with op2.INC.""" + u = op2.Dat(unitset, np.array([0], dtype=np.uint32), np.uint32, "u") + kernel_inc = "static void inc(unsigned int* x) { (*x) = (*x) + 1; }\n" + op2.par_loop(op2.Kernel(kernel_inc, "inc"), + iterset, u(op2.INC, iterset2unitset)) + assert u.data[0] == nelems + + def test_indirect_max(self, iterset, indset, iterset2indset): + a = op2.Dat(indset, dtype=np.int32) + b = op2.Dat(indset, dtype=np.int32) + a.data[:] = -10 + b.data[:] = -5 + kernel = "static void maxify(int *a, int *b) {*a = *a < *b ? *b : *a;}\n" + op2.par_loop(op2.Kernel(kernel, "maxify"), + iterset, a(op2.MAX, iterset2indset), b(op2.READ, iterset2indset)) + assert np.allclose(a.data_ro, -5) + + def test_indirect_min(self, iterset, indset, iterset2indset): + a = op2.Dat(indset, dtype=np.int32) + b = op2.Dat(indset, dtype=np.int32) + a.data[:] = 10 + b.data[:] = 5 + kernel = "static void minify(int *a, int *b) {*a = *a > *b ? *b : *a;}\n" + op2.par_loop(op2.Kernel(kernel, "minify"), + iterset, a(op2.MIN, iterset2indset), b(op2.READ, iterset2indset)) + assert np.allclose(a.data_ro, 5) + + def test_global_read(self, iterset, x, iterset2indset): + """Divide a Dat by a Global.""" + g = op2.Global(1, 2, np.uint32, "g", comm=COMM_WORLD) + + kernel_global_read = "static void global_read(unsigned int* x, unsigned int* g) { (*x) /= (*g); }\n" + + op2.par_loop(op2.Kernel(kernel_global_read, "global_read"), + iterset, + x(op2.RW, iterset2indset), + g(op2.READ)) + assert sum(x.data) == sum(map(lambda v: v // 2, range(nelems))) + + def test_global_inc(self, iterset, x, iterset2indset): + """Increment each value of a Dat by one and a Global at the same time.""" + g = op2.Global(1, 0, np.uint32, "g", comm=COMM_WORLD) + + kernel_global_inc = """ + static void global_inc(unsigned int *x, unsigned int *inc) { + (*x) = (*x) + 1; (*inc) += (*x); + }""" + + op2.par_loop( + op2.Kernel(kernel_global_inc, "global_inc"), iterset, + x(op2.RW, iterset2indset), + g(op2.INC)) + assert sum(x.data) == nelems * (nelems + 1) // 2 + assert g.data[0] == nelems * (nelems + 1) // 2 + + def test_2d_dat(self, iterset, iterset2indset, x2): + """Set both components of a vector-valued Dat to a scalar value.""" + kernel_wo = "static void wo(unsigned int* x) { x[0] = 42; x[1] = 43; }\n" + op2.par_loop(op2.Kernel(kernel_wo, "wo"), iterset, + x2(op2.WRITE, iterset2indset)) + assert all(all(v == [42, 43]) for v in x2.data) + + def test_2d_map(self): + """Sum nodal values incident to a common edge.""" + nedges = nelems - 1 + nodes = op2.Set(nelems, "nodes") + edges = op2.Set(nedges, "edges") + node_vals = op2.Dat(nodes, np.arange(nelems, dtype=np.uint32), + np.uint32, "node_vals") + edge_vals = op2.Dat(edges, np.zeros(nedges, dtype=np.uint32), + np.uint32, "edge_vals") + + e_map = np.array([(i, i + 1) for i in range(nedges)], dtype=np.uint32) + edge2node = op2.Map(edges, nodes, 2, e_map, "edge2node") + + kernel_sum = """ + static void sum(unsigned int *edge, unsigned int *nodes) { + *edge = nodes[0] + nodes[1]; + }""" + op2.par_loop(op2.Kernel(kernel_sum, "sum"), edges, + edge_vals(op2.WRITE), + node_vals(op2.READ, edge2node)) + + expected = np.arange(1, nedges * 2 + 1, 2) + assert all(expected == edge_vals.data) + + +@pytest.fixture +def mset(indset, unitset): + return op2.MixedSet((indset, unitset)) + + +@pytest.fixture +def mdat(mset): + return op2.MixedDat(mset) + + +@pytest.fixture +def mmap(iterset2indset, iterset2unitset): + return op2.MixedMap((iterset2indset, iterset2unitset)) + + +class TestMixedIndirectLoop: + """Mixed indirect loop tests.""" + + def test_mixed_non_mixed_dat(self, mdat, mmap, iterset): + """Increment into a MixedDat from a non-mixed Dat.""" + d = op2.Dat(iterset, np.ones(iterset.size)) + kernel_inc = """static void inc(double *d, double *x) { + d[0] += x[0]; d[1] += x[0]; + }""" + op2.par_loop(op2.Kernel(kernel_inc, "inc"), iterset, + mdat(op2.INC, mmap), + d(op2.READ)) + assert all(mdat[0].data == 1.0) and mdat[1].data == 4096.0 + + def test_mixed_non_mixed_dat_itspace(self, mdat, mmap, iterset): + """Increment into a MixedDat from a Dat using iteration spaces.""" + d = op2.Dat(iterset, np.ones(iterset.size)) + kernel_inc = """static void inc(double *d, double *x) { + for (int i=0; i<2; ++i) + d[i] += x[0]; + }""" + op2.par_loop(op2.Kernel(kernel_inc, "inc"), iterset, + mdat(op2.INC, mmap), + d(op2.READ)) + assert all(mdat[0].data == 1.0) and mdat[1].data == 4096.0 + + +def test_permuted_map(): + fromset = op2.Set(1) + toset = op2.Set(4) + d1 = op2.Dat(op2.DataSet(toset, 1), dtype=np.int32) + d2 = op2.Dat(op2.DataSet(toset, 1), dtype=np.int32) + d1.data[:] = np.arange(4, dtype=np.int32) + k = op2.Kernel(""" + void copy(int *to, const int * restrict from) { + for (int i = 0; i < 4; i++) { to[i] = from[i]; } + }""", "copy") + m1 = op2.Map(fromset, toset, 4, values=[1, 2, 3, 0]) + m2 = op2.PermutedMap(m1, [3, 2, 0, 1]) + op2.par_loop(k, fromset, d2(op2.WRITE, m2), d1(op2.READ, m1)) + expect = np.empty_like(d1.data) + expect[m1.values[..., m2.permutation]] = d1.data[m1.values] + assert (d1.data == np.arange(4, dtype=np.int32)).all() + assert (d2.data == expect).all() + + +def test_permuted_map_both(): + fromset = op2.Set(1) + toset = op2.Set(4) + d1 = op2.Dat(op2.DataSet(toset, 1), dtype=np.int32) + d2 = op2.Dat(op2.DataSet(toset, 1), dtype=np.int32) + d1.data[:] = np.arange(4, dtype=np.int32) + k = op2.Kernel(""" + void copy(int *to, const int * restrict from) { + for (int i = 0; i < 4; i++) { to[i] = from[i]; } + }""", "copy") + m1 = op2.Map(fromset, toset, 4, values=[0, 2, 1, 3]) + m2 = op2.PermutedMap(m1, [3, 2, 1, 0]) + m3 = op2.PermutedMap(m1, [0, 2, 3, 1]) + op2.par_loop(k, fromset, d2(op2.WRITE, m2), d1(op2.READ, m3)) + expect = np.empty_like(d1.data) + expect[m1.values[..., m2.permutation]] = d1.data[m1.values[..., m3.permutation]] + assert (d1.data == np.arange(4, dtype=np.int32)).all() + assert (d2.data == expect).all() + + +@pytest.mark.parametrize("permuted", ["none", "pre"]) +def test_composed_map_two_maps(permuted): + arity = 2 + setB = op2.Set(3) + nodesetB = op2.Set(6) + datB = op2.Dat(op2.DataSet(nodesetB, 1), dtype=np.float64) + mapB = op2.Map(setB, nodesetB, arity, values=[[0, 1], [2, 3], [4, 5]]) + setA = op2.Set(5) + nodesetA = op2.Set(8) + datA = op2.Dat(op2.DataSet(nodesetA, 1), dtype=np.float64) + datA.data[:] = np.array([.0, .1, .2, .3, .4, .5, .6, .7], dtype=np.float64) + mapA0 = op2.Map(setA, nodesetA, arity, values=[[0, 1], [2, 3], [4, 5], [6, 7], [0, 1]]) + if permuted == "pre": + mapA0 = op2.PermutedMap(mapA0, [1, 0]) + mapA1 = op2.Map(setB, setA, 1, values=[3, 1, 2]) + mapA = op2.ComposedMap(mapA0, mapA1) + # "post" permutation is currently not supported + k = op2.Kernel(""" + void copy(double *to, const double * restrict from) { + for (int i = 0; i < 2; ++i) { to[i] = from[i]; } + }""", "copy") + op2.par_loop(k, setB, datB(op2.WRITE, mapB), datA(op2.READ, mapA)) + if permuted == "none": + assert (datB.data == np.array([.6, .7, .2, .3, .4, .5], dtype=np.float64)).all() + else: + assert (datB.data == np.array([.7, .6, .3, .2, .5, .4], dtype=np.float64)).all() + + +@pytest.mark.parametrize("nested", ["none", "first", "last"]) +@pytest.mark.parametrize("subset", [False, True]) +def test_composed_map_three_maps(nested, subset): + arity = 2 + setC = op2.Set(2) + nodesetC = op2.Set(4) + datC = op2.Dat(op2.DataSet(nodesetC, 1), dtype=np.float64) + mapC = op2.Map(setC, nodesetC, arity, values=[[0, 1], [2, 3]]) + setB = op2.Set(3) + setA = op2.Set(5) + nodesetA = op2.Set(8) + datA = op2.Dat(op2.DataSet(nodesetA, 1), dtype=np.float64) + datA.data[:] = np.array([.0, .1, .2, .3, .4, .5, .6, .7], dtype=np.float64) + mapA0 = op2.Map(setA, nodesetA, arity, values=[[0, 1], [2, 3], [4, 5], [6, 7], [0, 1]]) + mapA1 = op2.Map(setB, setA, 1, values=[3, 1, 2]) + mapA2 = op2.Map(setC, setB, 1, values=[2, 0]) + if nested == "none": + mapA = op2.ComposedMap(mapA0, mapA1, mapA2) + elif nested == "first": + mapA = op2.ComposedMap(op2.ComposedMap(mapA0, mapA1), mapA2) + elif nested == "last": + mapA = op2.ComposedMap(mapA0, op2.ComposedMap(mapA1, mapA2)) + else: + raise ValueError(f"Unknown nested param: {nested}") + k = op2.Kernel(""" + void copy(double *to, const double * restrict from) { + for (int i = 0; i < 2; ++i) { to[i] = from[i]; } + }""", "copy") + if subset: + indices = np.array([1], dtype=np.int32) + setC = op2.Subset(setC, indices) + op2.par_loop(k, setC, datC(op2.WRITE, mapC), datA(op2.READ, mapA)) + if subset: + assert (datC.data == np.array([.0, .0, .6, .7], dtype=np.float64)).all() + else: + assert (datC.data == np.array([.4, .5, .6, .7], dtype=np.float64)).all() + + +@pytest.mark.parametrize("variable", [False, True]) +@pytest.mark.parametrize("subset", [False, True]) +def test_composed_map_extrusion(variable, subset): + # variable: False + # + # +14-+-9-+-4-+ + # |13 | 8 | 3 | + # +12-+-7-+-2-+ + # |11 | 6 | 1 | + # +10-+-5-+-0-+ + # + # 0 1 2 <- setA + # 0 1 <- setC + # + # variable: True + # + # +12-+-7-+-4-+ + # |11 | 6 | 3 | + # +10-+-5-+-2-+ + # | 9 | | 1 | + # +-8-+ +-0-+ + # + # 0 1 2 <- setA + # 0 1 <- setC + # + arity = 3 + if variable: + # A layer is a copy of base layer, so cell_layer_index + 1 + layersC = [[1, 2 + 1], [0, 2 + 1]] + setC = op2.ExtrudedSet(op2.Set(2), layersC) + nodesetC = op2.Set(8) + datC = op2.Dat(op2.DataSet(nodesetC, 1), dtype=np.float64) + mapC = op2.Map(setC, nodesetC, arity, + values=[[5, 6, 7], + [0, 1, 2]], + offset=[2, 2, 2]) + layersA = [[0, 2 + 1], [1, 2 + 1], [0, 2 + 1]] + setA = op2.ExtrudedSet(op2.Set(3), layersA) + nodesetA = op2.Set(13) + datA = op2.Dat(op2.DataSet(nodesetA, 1), dtype=np.float64) + datA.data[:] = np.arange(0, 13, dtype=np.float64) + mapA0 = op2.Map(setA, nodesetA, arity, + values=[[8, 9, 10], + [5, 6, 7], + [0, 1, 2]], + offset=[2, 2, 2]) + mapA1 = op2.Map(setC, setA, 1, values=[1, 2]) + mapA = op2.ComposedMap(mapA0, mapA1) + if subset: + expected = np.array([0., 1., 2., 3., 4., 0., 0., 0.], dtype=np.float64) + else: + expected = np.array([0., 1., 2., 3., 4., 5., 6., 7.], dtype=np.float64) + else: + # A layer is a copy of base layer, so cell_layer_index + 1 + layersC = 2 + 1 + setC = op2.ExtrudedSet(op2.Set(2), layersC) + nodesetC = op2.Set(10) + datC = op2.Dat(op2.DataSet(nodesetC, 1), dtype=np.float64) + mapC = op2.Map(setC, nodesetC, arity, + values=[[5, 6, 7], + [0, 1, 2]], + offset=[2, 2, 2]) + layersA = 2 + 1 + setA = op2.ExtrudedSet(op2.Set(3), layersA) + nodesetA = op2.Set(15) + datA = op2.Dat(op2.DataSet(nodesetA, 1), dtype=np.float64) + datA.data[:] = np.arange(0, 15, dtype=np.float64) + mapA0 = op2.Map(setA, nodesetA, arity, + values=[[10, 11, 12], + [5, 6, 7], + [0, 1, 2]], + offset=[2, 2, 2]) + mapA1 = op2.Map(setC, setA, 1, values=[1, 2]) + mapA = op2.ComposedMap(mapA0, mapA1) + if subset: + expected = np.array([0., 1., 2., 3., 4., 0., 0., 0., 0., 0.], dtype=np.float64) + else: + expected = np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=np.float64) + k = op2.Kernel(""" + void copy(double *to, const double * restrict from) { + for (int i = 0; i < 3; ++i) { to[i] = from[i]; } + }""", "copy") + if subset: + indices = np.array([1], dtype=np.int32) + setC = op2.Subset(setC, indices) + op2.par_loop(k, setC, datC(op2.WRITE, mapC), datA(op2.READ, mapA)) + assert (datC.data == expected).all() + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_iteration_space_dats.py b/tests/pyop2/test_iteration_space_dats.py new file mode 100644 index 0000000000..96f530279a --- /dev/null +++ b/tests/pyop2/test_iteration_space_dats.py @@ -0,0 +1,231 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy + +from pyop2 import op2 + + +def _seed(): + return 0.02041724 + + +nnodes = 4096 +nele = nnodes // 2 + + +@pytest.fixture(scope='module') +def node(): + return op2.Set(nnodes, 'node') + + +@pytest.fixture(scope='module') +def ele(): + return op2.Set(nele, 'ele') + + +@pytest.fixture +def d1(node): + return op2.Dat(node, numpy.zeros(nnodes), dtype=numpy.int32) + + +@pytest.fixture +def d2(node): + return op2.Dat(node ** 2, numpy.zeros(2 * nnodes), dtype=numpy.int32) + + +@pytest.fixture +def vd1(ele): + return op2.Dat(ele, numpy.zeros(nele), dtype=numpy.int32) + + +@pytest.fixture +def vd2(ele): + return op2.Dat(ele ** 2, numpy.zeros(2 * nele), dtype=numpy.int32) + + +@pytest.fixture(scope='module') +def node2ele(node, ele): + vals = numpy.arange(nnodes) / 2 + return op2.Map(node, ele, 1, vals, 'node2ele') + + +class TestIterationSpaceDats: + + """ + Test IterationSpace access to Dat objects + """ + + def test_sum_nodes_to_edges(self): + """Creates a 1D grid with edge values numbered consecutively. + Iterates over edges, summing the node values.""" + + nedges = nnodes - 1 + nodes = op2.Set(nnodes, "nodes") + edges = op2.Set(nedges, "edges") + + node_vals = op2.Dat(nodes, numpy.arange( + nnodes, dtype=numpy.uint32), numpy.uint32, "node_vals") + edge_vals = op2.Dat( + edges, numpy.zeros(nedges, dtype=numpy.uint32), numpy.uint32, "edge_vals") + + e_map = numpy.array([(i, i + 1) + for i in range(nedges)], dtype=numpy.uint32) + edge2node = op2.Map(edges, nodes, 2, e_map, "edge2node") + kernel_sum = """ +static void sum(unsigned int *edge, unsigned int *nodes) { + for (int i=0; i<2; ++i) + edge[0] += nodes[i]; +} + """ + + op2.par_loop(op2.Kernel(kernel_sum, "sum"), edges, + edge_vals(op2.INC), + node_vals(op2.READ, edge2node)) + + expected = numpy.arange(1, nedges * 2 + 1, 2) + assert all(expected == edge_vals.data) + + def test_read_1d_itspace_map(self, node, d1, vd1, node2ele): + vd1.data[:] = numpy.arange(nele) + k = """ +static void k(int *d, int *vd) { + for (int i=0; i<1; ++i) + d[0] = vd[i]; +} + """ + op2.par_loop(op2.Kernel(k, 'k'), node, + d1(op2.WRITE), + vd1(op2.READ, node2ele)) + assert all(d1.data[::2] == vd1.data) + assert all(d1.data[1::2] == vd1.data) + + def test_write_1d_itspace_map(self, node, vd1, node2ele): + k = """ +static void k(int *vd) { + for (int i=0; i<1; ++i) + vd[i] = 2; +} + """ + + op2.par_loop(op2.Kernel(k, 'k'), node, + vd1(op2.WRITE, node2ele)) + assert all(vd1.data == 2) + + def test_inc_1d_itspace_map(self, node, d1, vd1, node2ele): + vd1.data[:] = 3 + d1.data[:] = numpy.arange(nnodes).reshape(d1.data.shape) + + k = """ +static void k(int *vd, int *d) { + for (int i=0; i<1; ++i) + vd[i] += d[0]; +} + """ + op2.par_loop(op2.Kernel(k, 'k'), node, + vd1(op2.INC, node2ele), + d1(op2.READ)) + expected = numpy.zeros_like(vd1.data) + expected[:] = 3 + expected += numpy.arange( + start=0, stop=nnodes, step=2).reshape(expected.shape) + expected += numpy.arange( + start=1, stop=nnodes, step=2).reshape(expected.shape) + assert all(vd1.data == expected) + + def test_read_2d_itspace_map(self, d2, vd2, node2ele, node): + vd2.data[:] = numpy.arange(nele * 2).reshape(nele, 2) + k = """ +static void k(int *d, int *vd) { + for (int i=0; i<1; ++i) { + d[0] = vd[i]; + d[1] = vd[i+1]; + } +} + """ + op2.par_loop(op2.Kernel(k, 'k'), node, + d2(op2.WRITE), + vd2(op2.READ, node2ele)) + assert all(d2.data[::2, 0] == vd2.data[:, 0]) + assert all(d2.data[::2, 1] == vd2.data[:, 1]) + assert all(d2.data[1::2, 0] == vd2.data[:, 0]) + assert all(d2.data[1::2, 1] == vd2.data[:, 1]) + + def test_write_2d_itspace_map(self, vd2, node2ele, node): + k = """ +static void k(int *vd) { + for (int i=0; i<1; ++i) { + vd[i] = 2; + vd[i+1] = 3; + } +} + """ + op2.par_loop(op2.Kernel(k, 'k'), node, + vd2(op2.WRITE, node2ele)) + assert all(vd2.data[:, 0] == 2) + assert all(vd2.data[:, 1] == 3) + + def test_inc_2d_itspace_map(self, d2, vd2, node2ele, node): + vd2.data[:, 0] = 3 + vd2.data[:, 1] = 4 + d2.data[:] = numpy.arange(2 * nnodes).reshape(d2.data.shape) + + k = """ +static void k(int *vd, int *d) { + for (int i=0; i<1; ++i) { + vd[i] += d[0]; + vd[i+1] += d[1]; + } +} + """ + + op2.par_loop(op2.Kernel(k, 'k'), node, + vd2(op2.INC, node2ele), + d2(op2.READ)) + + expected = numpy.zeros_like(vd2.data) + expected[:, 0] = 3 + expected[:, 1] = 4 + expected[:, 0] += numpy.arange(start=0, stop=2 * nnodes, step=4) + expected[:, 0] += numpy.arange(start=2, stop=2 * nnodes, step=4) + expected[:, 1] += numpy.arange(start=1, stop=2 * nnodes, step=4) + expected[:, 1] += numpy.arange(start=3, stop=2 * nnodes, step=4) + assert all(vd2.data[:, 0] == expected[:, 0]) + assert all(vd2.data[:, 1] == expected[:, 1]) + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_linalg.py b/tests/pyop2/test_linalg.py new file mode 100644 index 0000000000..5fce55d0ee --- /dev/null +++ b/tests/pyop2/test_linalg.py @@ -0,0 +1,345 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np + +from pyop2 import op2 + +nelems = 8 + + +@pytest.fixture +def set(): + return op2.Set(nelems) + + +@pytest.fixture +def dset(set): + return op2.DataSet(set, 1) + + +@pytest.fixture +def x(dset): + return op2.Dat(dset, None, np.float64, "x") + + +@pytest.fixture +def y(dset): + return op2.Dat(dset, np.arange(1, nelems + 1), np.float64, "y") + + +@pytest.fixture +def yi(dset): + return op2.Dat(dset, np.arange(1, nelems + 1), np.int64, "y") + + +@pytest.fixture +def x2(): + s = op2.Set(nelems, "s1") + return op2.Dat(s ** (1, 2), np.zeros(2 * nelems), np.float64, "x") + + +@pytest.fixture +def y2(): + s = op2.Set(nelems, "s2") + return op2.Dat(s ** (2, 1), np.zeros(2 * nelems), np.float64, "y") + + +class TestLinAlgOp: + + """ + Tests of linear algebra operators returning a new Dat. + """ + + def test_add(self, x, y): + x._data = 2 * y.data + assert all((x + y).data == 3 * y.data) + + def test_sub(self, x, y): + x._data = 2 * y.data + assert all((x - y).data == y.data) + + def test_mul(self, x, y): + x._data = 2 * y.data + assert all((x * y).data == 2 * y.data * y.data) + + def test_div(self, x, y): + x._data = 2 * y.data + assert all((x / y).data == 2.0) + + def test_add_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 + y2 + + def test_sub_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 - y2 + + def test_mul_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 * y2 + + def test_div_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 / y2 + + def test_add_scalar(self, x, y): + x._data = y.data + 1.0 + assert all(x.data == (y + 1.0).data) + + def test_radd_scalar(self, x, y): + x._data = y.data + 1.0 + assert all(x.data == (1.0 + y).data) + + def test_pos_copies(self, y): + z = +y + assert all(z.data == y.data) + assert z is not y + + def test_neg_copies(self, y): + z = -y + assert all(z.data == -y.data) + assert z is not y + + def test_sub_scalar(self, x, y): + x._data = y.data - 1.0 + assert all(x.data == (y - 1.0).data) + + def test_rsub_scalar(self, x, y): + x._data = 1.0 - y.data + assert all(x.data == (1.0 - y).data) + + def test_mul_scalar(self, x, y): + x._data = 2 * y.data + assert all(x.data == (y * 2.0).data) + + def test_rmul_scalar(self, x, y): + x._data = 2 * y.data + assert all(x.data == (2.0 * y).data) + + def test_div_scalar(self, x, y): + x._data = 2 * y.data + assert all((x / 2.0).data == y.data) + + def test_add_ftype(self, y, yi): + x = y + yi + assert x.data.dtype == np.float64 + + def test_sub_ftype(self, y, yi): + x = y - yi + assert x.data.dtype == np.float64 + + def test_mul_ftype(self, y, yi): + x = y * yi + assert x.data.dtype == np.float64 + + def test_div_ftype(self, y, yi): + x = y / yi + assert x.data.dtype == np.float64 + + def test_add_itype(self, y, yi): + xi = yi + y + assert xi.data.dtype == np.int64 + + def test_sub_itype(self, y, yi): + xi = yi - y + assert xi.data.dtype == np.int64 + + def test_mul_itype(self, y, yi): + xi = yi * y + assert xi.data.dtype == np.int64 + + def test_div_itype(self, y, yi): + xi = yi / y + assert xi.data.dtype == np.int64 + + def test_linalg_and_parloop(self, x, y): + """Linear algebra operators should force computation""" + x._data = np.zeros(x.dataset.total_size, dtype=np.float64) + k = op2.Kernel('static void k(double *x) { *x = 1.0; }', 'k') + op2.par_loop(k, x.dataset.set, x(op2.WRITE)) + z = x + y + assert all(z.data == y.data + 1) + + +class TestLinAlgIop: + + """ + Tests of linear algebra operators modifying a Dat in place. + """ + + def test_iadd(self, x, y): + x._data = 2 * y.data + x += y + assert all(x.data == 3 * y.data) + + def test_isub(self, x, y): + x._data = 2 * y.data + x -= y + assert all(x.data == y.data) + + def test_imul(self, x, y): + x._data = 2 * y.data + x *= y + assert all(x.data == 2 * y.data * y.data) + + def test_idiv(self, x, y): + x._data = 2 * y.data + x /= y + assert all(x.data == 2.0) + + def test_iadd_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 += y2 + + def test_isub_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 -= y2 + + def test_imul_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 *= y2 + + def test_idiv_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 /= y2 + + def test_iadd_scalar(self, x, y): + x._data = y.data + 1.0 + y += 1.0 + assert all(x.data == y.data) + + def test_isub_scalar(self, x, y): + x._data = y.data - 1.0 + y -= 1.0 + assert all(x.data == y.data) + + def test_imul_scalar(self, x, y): + x._data = 2 * y.data + y *= 2.0 + assert all(x.data == y.data) + + def test_idiv_scalar(self, x, y): + x._data = 2 * y.data + x /= 2.0 + assert all(x.data == y.data) + + def test_iadd_ftype(self, y, yi): + y += yi + assert y.data.dtype == np.float64 + + def test_isub_ftype(self, y, yi): + y -= yi + assert y.data.dtype == np.float64 + + def test_imul_ftype(self, y, yi): + y *= yi + assert y.data.dtype == np.float64 + + def test_idiv_ftype(self, y, yi): + y /= yi + assert y.data.dtype == np.float64 + + def test_iadd_itype(self, y, yi): + yi += y + assert yi.data.dtype == np.int64 + + def test_isub_itype(self, y, yi): + yi -= y + assert yi.data.dtype == np.int64 + + def test_imul_itype(self, y, yi): + yi *= y + assert yi.data.dtype == np.int64 + + def test_idiv_itype(self, y, yi): + yi /= y + assert yi.data.dtype == np.int64 + + +class TestLinAlgScalar: + + """ + Tests of linear algebra operators return a scalar. + """ + + def test_norm(self): + s = op2.Set(2) + n = op2.Dat(s, [3, 4], np.float64, "n") + assert abs(n.norm - 5) < 1e-12 + + def test_inner(self): + s = op2.Set(2) + n = op2.Dat(s, [3, 4], np.float64) + o = op2.Dat(s, [4, 5], np.float64) + + ret = n.inner(o) + + assert abs(ret - 32) < 1e-12 + + ret = o.inner(n) + + assert abs(ret - 32) < 1e-12 + + def test_norm_mixed(self): + s = op2.Set(1) + + n = op2.Dat(s, [3], np.float64) + o = op2.Dat(s, [4], np.float64) + + md = op2.MixedDat([n, o]) + + assert abs(md.norm - 5) < 1e-12 + + def test_inner_mixed(self): + s = op2.Set(1) + + n = op2.Dat(s, [3], np.float64) + o = op2.Dat(s, [4], np.float64) + + md = op2.MixedDat([n, o]) + + n1 = op2.Dat(s, [4], np.float64) + o1 = op2.Dat(s, [5], np.float64) + + md1 = op2.MixedDat([n1, o1]) + + ret = md.inner(md1) + + assert abs(ret - 32) < 1e-12 + + ret = md1.inner(md) + + assert abs(ret - 32) < 1e-12 diff --git a/tests/pyop2/test_linalg_complex.py b/tests/pyop2/test_linalg_complex.py new file mode 100644 index 0000000000..f7ee2f4cdd --- /dev/null +++ b/tests/pyop2/test_linalg_complex.py @@ -0,0 +1,449 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np + +from pyop2 import op2 + +nelems = 8 + + +@pytest.fixture +def set(): + return op2.Set(nelems) + + +@pytest.fixture +def dset(set): + return op2.DataSet(set, 1) + + +@pytest.fixture +def x(dset): + return op2.Dat(dset, None, np.complex128, "x") + + +@pytest.fixture +def y(dset): + return op2.Dat(dset, np.arange(1, nelems + 1) + np.arange(1, nelems + 1)*1.j, np.complex128, "y") + + +@pytest.fixture +def yf(dset): + return op2.Dat(dset, np.arange(1, nelems + 1), np.float64, "y") + + +@pytest.fixture +def yc(dset): + return op2.Dat(dset, np.arange(1, nelems + 1), np.complex128, "y") + + +@pytest.fixture +def yi(dset): + return op2.Dat(dset, np.arange(1, nelems + 1), np.int64, "y") + + +@pytest.fixture +def x2(): + s = op2.Set(nelems, "s1") + return op2.Dat(s ** (1, 2), np.zeros(2 * nelems), np.complex128, "x") + + +@pytest.fixture +def y2(): + s = op2.Set(nelems, "s2") + return op2.Dat(s ** (2, 1), np.zeros(2 * nelems), np.complex128, "y") + + +class TestLinAlgOp: + + """ + Tests of linear algebra operators returning a new Dat. + """ + + def test_add(self, x, y): + x._data = 2 * y.data + assert all((x + y).data == 3 * y.data) + + def test_sub(self, x, y): + x._data = 2 * y.data + assert all((x - y).data == y.data) + + def test_mul_complex(self, x, y): + x._data = (2+2j) * y.data + assert all((x * y).data == (2+2j) * y.data * y.data) + + def test_div_complex(self, x, y): + x._data = (2+2j) * y.data + # Note complex division does not have the same stability as + # floating point when vectorised + assert all(x.data / y.data == 2.0+2.j) + assert np.allclose((x / y).data, 2.0+2.j) + + def test_mul(self, x, y): + x._data = 2 * y.data + assert all((x * y).data == 2 * y.data * y.data) + + def test_div(self, x, y): + x._data = 2 * y.data + x.data / y.data + # Note complex division does not have the same stability as + # floating point when vectorised + assert all(x.data/y.data == 2.0+0.j) + assert np.allclose((x / y).data, 2.0+0.j) + + def test_add_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 + y2 + + def test_sub_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 - y2 + + def test_mul_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 * y2 + + def test_div_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 / y2 + + def test_add_scalar(self, x, y): + x._data = y.data + 1.0 + assert all(x.data == (y + 1.0).data) + + def test_radd_scalar(self, x, y): + x._data = y.data + 1.0 + assert all(x.data == (1.0 + y).data) + + def test_add_complex_scalar(self, x, y): + x._data = y.data + (1.0+1.j) + assert all(x.data == (y + (1.0+1.j)).data) + + def test_radd_complex_scalar(self, x, y): + x._data = y.data + (1.0+1.j) + assert all(x.data == ((1.0+1.j) + y).data) + + def test_pos_copies(self, y): + z = +y + assert all(z.data == y.data) + assert z is not y + + def test_neg_copies(self, y): + z = -y + assert all(z.data == -y.data) + assert z is not y + + def test_sub_scalar(self, x, y): + x._data = y.data - 1.0 + assert all(x.data == (y - 1.0).data) + + def test_rsub_scalar(self, x, y): + x._data = 1.0 - y.data + assert all(x.data == (1.0 - y).data) + + def test_mul_scalar(self, x, y): + x._data = 2 * y.data + assert all(x.data == (y * 2.0).data) + + def test_rmul_scalar(self, x, y): + x._data = 2 * y.data + assert all(x.data == (2.0 * y).data) + + def test_sub_complex_scalar(self, x, y): + x._data = y.data - (1.0+1.j) + assert all(x.data == (y - (1.0+1.j)).data) + + def test_rsub_complex_scalar(self, x, y): + x._data = (1.0+1.j) - y.data + assert all(x.data == ((1.0+1.j) - y).data) + + def test_mul_complex_scalar(self, x, y): + x._data = (2+2j) * y.data + assert all(x.data == (y * (2.0+2.j)).data) + + def test_rmul_complex_scalar(self, x, y): + x._data = (2+2j) * y.data + assert all(x.data == ((2.0+2.j) * y).data) + + def test_div_scalar(self, x, y): + x._data = 2 * y.data + assert all((x / 2.0).data == y.data) + + def test_add_ftype(self, y, yf): + x = y + yf + assert x.data.dtype == np.complex128 + + def test_sub_ftype(self, y, yf): + x = y - yf + assert x.data.dtype == np.complex128 + + def test_mul_ftype(self, y, yf): + x = y * yf + assert x.data.dtype == np.complex128 + + def test_div_ftype(self, y, yf): + x = y / yf + assert x.data.dtype == np.complex128 + + def test_add_ctype(self, y, yc): + x = y + yc + assert x.data.dtype == np.complex128 + + def test_sub_ctype(self, y, yc): + x = y - yc + assert x.data.dtype == np.complex128 + + def test_mul_ctype(self, y, yc): + x = y * yc + assert x.data.dtype == np.complex128 + + def test_div_ctype(self, y, yc): + x = y / yc + assert x.data.dtype == np.complex128 + + def test_add_itype(self, y, yi): + xi = yi + y + assert xi.data.dtype == np.int64 + + def test_sub_itype(self, y, yi): + xi = yi - y + assert xi.data.dtype == np.int64 + + def test_mul_itype(self, y, yi): + xi = yi * y + assert xi.data.dtype == np.int64 + + def test_div_itype(self, y, yi): + xi = yi / y + assert xi.data.dtype == np.int64 + + def test_linalg_and_parloop(self, x, y): + """Linear algebra operators should force computation""" + x._data = np.zeros(x.dataset.total_size, dtype=np.complex128) + k = op2.Kernel('static void k(complex double *x) { *x = 1.0+1.0*I; }', 'k') + op2.par_loop(k, x.dataset.set, x(op2.WRITE)) + z = x + y + assert all(z.data == y.data + (1.+1.j)) + + +class TestLinAlgIop: + + """ + Tests of linear algebra operators modifying a Dat in place. + """ + + def test_iadd(self, x, y): + x._data = 2 * y.data + x += y + assert all(x.data == 3 * y.data) + + def test_isub(self, x, y): + x._data = 2 * y.data + x -= y + assert all(x.data == y.data) + + def test_imul(self, x, y): + x._data = 2 * y.data + x *= y + assert all(x.data == 2 * y.data * y.data) + + def test_idiv(self, x, y): + x._data = 2 * y.data + x /= y + # Note complex division does not have the same stability as + # floating point when vectorised + assert np.allclose(x.data, 2.0 + 0.j) + + def test_iadd_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 += y2 + + def test_isub_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 -= y2 + + def test_imul_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 *= y2 + + def test_idiv_shape_mismatch(self, x2, y2): + with pytest.raises(ValueError): + x2 /= y2 + + def test_iadd_scalar(self, x, y): + x._data = y.data + 1.0 + y += 1.0 + assert all(x.data == y.data) + + def test_isub_scalar(self, x, y): + x._data = y.data - 1.0 + y -= 1.0 + assert all(x.data == y.data) + + def test_imul_scalar(self, x, y): + x._data = 2 * y.data + y *= 2.0 + assert all(x.data == y.data) + + def test_idiv_scalar(self, x, y): + x._data = 2 * y.data + x /= 2.0 + assert all(x.data == y.data) + + def test_iadd_complex_scalar(self, x, y): + x._data = y.data + (1.0+1.j) + y += (1.0+1.j) + assert all(x.data == y.data) + + def test_isub_complex_scalar(self, x, y): + x._data = y.data - (1.0+1.j) + y -= (1.0+1.j) + assert all(x.data == y.data) + + def test_imul_complex_scalar(self, x, y): + x._data = (2+2j) * y.data + y *= (2.0+2.j) + assert all(x.data == y.data) + + def test_idiv_complex_scalar(self, x, y): + x._data = (2+2j) * y.data + x /= (2.0+2j) + assert all(x.data == y.data) + + def test_iadd_ftype(self, y, yi): + y += yi + assert y.data.dtype == np.complex128 + + def test_isub_ftype(self, y, yi): + y -= yi + assert y.data.dtype == np.complex128 + + def test_imul_ftype(self, y, yi): + y *= yi + assert y.data.dtype == np.complex128 + + def test_idiv_ftype(self, y, yi): + y /= yi + assert y.data.dtype == np.complex128 + + def test_iadd_ctype(self, y, yc): + y += yc + assert y.data.dtype == np.complex128 + + def test_isub_ctype(self, y, yc): + y -= yc + assert y.data.dtype == np.complex128 + + def test_imul_ctype(self, y, yc): + y *= yc + assert y.data.dtype == np.complex128 + + def test_idiv_ctype(self, y, yc): + y /= yc + assert y.data.dtype == np.complex128 + + def test_iadd_itype(self, y, yi): + yi += y + assert yi.data.dtype == np.int64 + + def test_isub_itype(self, y, yi): + yi -= y + assert yi.data.dtype == np.int64 + + def test_imul_itype(self, y, yi): + yi *= y + assert yi.data.dtype == np.int64 + + def test_idiv_itype(self, y, yi): + yi /= y + assert yi.data.dtype == np.int64 + + +class TestLinAlgScalar: + + """ + Tests of linear algebra operators return a scalar. + """ + + def test_norm(self): + s = op2.Set(2) + n = op2.Dat(s, [3, 4j], np.complex128, "n") + assert type(n.norm) is float + assert abs(n.norm - 5) < 1e-12 + + def test_inner(self): + s = op2.Set(2) + n = op2.Dat(s, [3, 4j], np.complex128) + o = op2.Dat(s, [4, 5j], np.complex128) + + ret = n.inner(o) + + assert abs(ret - 32) < 1e-12 + + ret = o.inner(n) + + assert abs(ret - 32) < 1e-12 + + def test_norm_mixed(self): + s = op2.Set(1) + + n = op2.Dat(s, [3], np.complex128) + o = op2.Dat(s, [4j], np.complex128) + + md = op2.MixedDat([n, o]) + assert type(md.norm) is float + assert abs(md.norm - 5) < 1e-12 + + def test_inner_mixed(self): + s = op2.Set(1) + + n = op2.Dat(s, [3], np.complex128) + o = op2.Dat(s, [4j], np.complex128) + + md = op2.MixedDat([n, o]) + + n1 = op2.Dat(s, [4], np.complex128) + o1 = op2.Dat(s, [5j], np.complex128) + + md1 = op2.MixedDat([n1, o1]) + + ret = md.inner(md1) + + assert abs(ret - 32) < 1e-12 + + ret = md1.inner(md) + + assert abs(ret - 32) < 1e-12 diff --git a/tests/pyop2/test_matrices.py b/tests/pyop2/test_matrices.py new file mode 100644 index 0000000000..4f8ab1d1e3 --- /dev/null +++ b/tests/pyop2/test_matrices.py @@ -0,0 +1,986 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np +from numpy.testing import assert_allclose + +from pyop2 import op2 +from pyop2.exceptions import MapValueError, ModeValueError +from pyop2.mpi import COMM_WORLD +from pyop2.datatypes import IntType + +from petsc4py.PETSc import ScalarType + + +# Data type +valuetype = ScalarType + +# Constants +NUM_ELE = 2 +NUM_NODES = 4 +NUM_DIMS = 2 +layers = 11 + +elem_node_map = np.asarray([0, 1, 3, 2, 3, 1], dtype=np.uint32) + +xtr_elem_node_map = np.asarray([0, 1, 11, 12, 33, 34, 22, 23, 33, 34, 11, 12], dtype=np.uint32) + + +@pytest.fixture(scope='module') +def nodes(): + return op2.Set(NUM_NODES, "nodes") + + +@pytest.fixture(scope='module') +def elements(): + return op2.Set(NUM_ELE, "elements") + + +@pytest.fixture(scope='module') +def dnodes(nodes): + return op2.DataSet(nodes, 1, "dnodes") + + +@pytest.fixture(scope='module') +def dvnodes(nodes): + return op2.DataSet(nodes, 2, "dvnodes") + + +@pytest.fixture(scope='module') +def delements(elements): + return op2.DataSet(elements, 1, "delements") + + +@pytest.fixture(scope='module') +def elem_node(elements, nodes): + return op2.Map(elements, nodes, 3, elem_node_map, "elem_node") + + +@pytest.fixture +def mat(elem_node, dnodes): + sparsity = op2.Sparsity((dnodes, dnodes), [(elem_node, elem_node, None)], name="sparsity") + return op2.Mat(sparsity, valuetype, "mat") + + +@pytest.fixture +def mass_mat(mass, elements, mat, coords, elem_node): + mat.zero() + op2.par_loop(mass, elements, + mat(op2.INC, (elem_node, elem_node)), + coords(op2.READ, elem_node)) + mat.assemble() + return mat + + +@pytest.fixture +def coords(dvnodes): + coord_vals = np.asarray([(0.0, 0.0), (2.0, 0.0), + (1.0, 1.0), (0.0, 1.5)], + dtype=valuetype) + return op2.Dat(dvnodes, coord_vals, valuetype, "coords") + + +@pytest.fixture +def g(request): + return op2.Global(1, 1.0, np.float64, "g", comm=COMM_WORLD) + + +@pytest.fixture +def f(dnodes): + f_vals = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=valuetype) + return op2.Dat(dnodes, f_vals, valuetype, "f") + + +@pytest.fixture +def f_vec(dvnodes): + f_vals = np.asarray([(1.0, 2.0)] * 4, dtype=valuetype) + return op2.Dat(dvnodes, f_vals, valuetype, "f") + + +@pytest.fixture +def b(dnodes): + b_vals = np.zeros(NUM_NODES, dtype=valuetype) + return op2.Dat(dnodes, b_vals, valuetype, "b") + + +@pytest.fixture +def b_vec(dvnodes): + b_vals = np.zeros(NUM_NODES * 2, dtype=valuetype) + return op2.Dat(dvnodes, b_vals, valuetype, "b") + + +@pytest.fixture +def b_rhs(b, rhs, elements, coords, f, elem_node): + b.zero() + op2.par_loop(rhs, elements, + b(op2.INC, elem_node), + coords(op2.READ, elem_node), + f(op2.READ, elem_node)) + return b + + +@pytest.fixture +def x(dnodes): + x_vals = np.zeros(NUM_NODES, dtype=valuetype) + return op2.Dat(dnodes, x_vals, valuetype, "x") + + +@pytest.fixture +def x_vec(dvnodes): + x_vals = np.zeros(NUM_NODES * 2, dtype=valuetype) + return op2.Dat(dvnodes, x_vals, valuetype, "x") + + +@pytest.fixture +def mass(): + kernel_code = """ +static void mass(double localTensor[3][3], double c0[3][2]) { + double CG1[3][6] = { { 0.09157621, 0.09157621, 0.81684757, + 0.44594849, 0.44594849, 0.10810302 }, + { 0.09157621, 0.81684757, 0.09157621, + 0.44594849, 0.10810302, 0.44594849 }, + { 0.81684757, 0.09157621, 0.09157621, + 0.10810302, 0.44594849, 0.44594849 } }; + double d_CG1[3][6][2] = { { { 1., 0. }, + { 1., 0. }, + { 1., 0. }, + { 1., 0. }, + { 1., 0. }, + { 1., 0. } }, + { { 0., 1. }, + { 0., 1. }, + { 0., 1. }, + { 0., 1. }, + { 0., 1. }, + { 0., 1. } }, + { { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. } } }; + double w[6] = { 0.05497587, 0.05497587, 0.05497587, 0.11169079, + 0.11169079, 0.11169079 }; + double c_q0[6][2][2]; + for(int i_g = 0; i_g < 6; i_g++) + { + for(int i_d_0 = 0; i_d_0 < 2; i_d_0++) + { + for(int i_d_1 = 0; i_d_1 < 2; i_d_1++) + { + c_q0[i_g][i_d_0][i_d_1] = 0.0; + for(int q_r_0 = 0; q_r_0 < 3; q_r_0++) + { + c_q0[i_g][i_d_0][i_d_1] += c0[q_r_0][i_d_0] * d_CG1[q_r_0][i_g][i_d_1]; + }; + }; + }; + }; + for(int i_g = 0; i_g < 6; i_g++) { + for (int i_r_0=0; i_r_0<3; ++i_r_0) { + for (int i_r_1=0; i_r_1<3; ++i_r_1) { + double ST0 = 0.0; + ST0 += CG1[i_r_0][i_g] * CG1[i_r_1][i_g] * (c_q0[i_g][0][0] * c_q0[i_g][1][1] + -1 * c_q0[i_g][0][1] * c_q0[i_g][1][0]); + localTensor[i_r_0][i_r_1] += ST0 * w[i_g]; + } + } + } +} + """ + return op2.Kernel(kernel_code, "mass") + + +@pytest.fixture +def rhs(): + kernel_code = """ +static void rhs(double* localTensor, double c0[3][2], double* c1) +{ + double CG1[3][6] = { { 0.09157621, 0.09157621, 0.81684757, + 0.44594849, 0.44594849, 0.10810302 }, + { 0.09157621, 0.81684757, 0.09157621, + 0.44594849, 0.10810302, 0.44594849 }, + { 0.81684757, 0.09157621, 0.09157621, + 0.10810302, 0.44594849, 0.44594849 } }; + double d_CG1[3][6][2] = { { { 1., 0. }, + { 1., 0. }, + { 1., 0. }, + { 1., 0. }, + { 1., 0. }, + { 1., 0. } }, + { { 0., 1. }, + { 0., 1. }, + { 0., 1. }, + { 0., 1. }, + { 0., 1. }, + { 0., 1. } }, + { { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. }, + { -1.,-1. } } }; + double w[6] = { 0.05497587, 0.05497587, 0.05497587, 0.11169079, + 0.11169079, 0.11169079 }; + double c_q1[6]; + double c_q0[6][2][2]; + for(int i_g = 0; i_g < 6; i_g++) + { + c_q1[i_g] = 0.0; + for(int q_r_0 = 0; q_r_0 < 3; q_r_0++) + { + c_q1[i_g] += c1[q_r_0] * CG1[q_r_0][i_g]; + }; + for(int i_d_0 = 0; i_d_0 < 2; i_d_0++) + { + for(int i_d_1 = 0; i_d_1 < 2; i_d_1++) + { + c_q0[i_g][i_d_0][i_d_1] = 0.0; + for(int q_r_0 = 0; q_r_0 < 3; q_r_0++) + { + c_q0[i_g][i_d_0][i_d_1] += c0[q_r_0][i_d_0] * d_CG1[q_r_0][i_g][i_d_1]; + }; + }; + }; + }; + for(int i_r_0 = 0; i_r_0 < 3; i_r_0++) + { + for(int i_g = 0; i_g < 6; i_g++) + { + double ST1 = 0.0; + ST1 += CG1[i_r_0][i_g] * c_q1[i_g] * (c_q0[i_g][0][0] * c_q0[i_g][1][1] + -1 * c_q0[i_g][0][1] * c_q0[i_g][1][0]); + localTensor[i_r_0] += ST1 * w[i_g]; + }; + }; +}""" + return op2.Kernel(kernel_code, "rhs") + + +@pytest.fixture +def mass_ffc(): + kernel_code = """ +static void mass_ffc(double A[3][3], double x[3][2]) { + double J_00 = x[1][0] - x[0][0]; + double J_01 = x[2][0] - x[0][0]; + double J_10 = x[1][1] - x[0][1]; + double J_11 = x[2][1] - x[0][1]; + + double detJ = J_00*J_11 - J_01*J_10; + double det = fabs(detJ); + + double W3[3] = {0.166666666666667, 0.166666666666667, 0.166666666666667}; + double FE0[3][3] = \ + {{0.666666666666667, 0.166666666666667, 0.166666666666667}, + {0.166666666666667, 0.166666666666667, 0.666666666666667}, + {0.166666666666667, 0.666666666666667, 0.166666666666667}}; + + for (unsigned int ip = 0; ip < 3; ip++) + for (int j=0; j<3; ++j) + for (int k=0; k<3; ++k) + A[j][k] += FE0[ip][j]*FE0[ip][k]*W3[ip]*det; +} + """ + return op2.Kernel(kernel_code, "mass_ffc") + + +@pytest.fixture +def rhs_ffc(): + kernel_code = """ +static void rhs_ffc(double *A, double x[3][2], double *w0) +{ + double J_00 = x[1][0] - x[0][0]; + double J_01 = x[2][0] - x[0][0]; + double J_10 = x[1][1] - x[0][1]; + double J_11 = x[2][1] - x[0][1]; + + double detJ = J_00*J_11 - J_01*J_10; + + double det = fabs(detJ); + + double W3[3] = {0.166666666666667, 0.166666666666667, 0.166666666666667}; + double FE0[3][3] = \ + {{0.666666666666667, 0.166666666666667, 0.166666666666667}, + {0.166666666666667, 0.166666666666667, 0.666666666666667}, + {0.166666666666667, 0.666666666666667, 0.166666666666667}}; + + for (unsigned int ip = 0; ip < 3; ip++) + { + double F0 = 0.0; + + for (unsigned int r = 0; r < 3; r++) + { + F0 += FE0[ip][r]*w0[r]; + } + + for (unsigned int j = 0; j < 3; j++) + { + A[j] += FE0[ip][j]*F0*W3[ip]*det; + } + } +} +""" + return op2.Kernel(kernel_code, "rhs_ffc") + + +@pytest.fixture +def rhs_ffc_itspace(): + kernel_code = """ +static void rhs_ffc_itspace(double A[3], double x[3][2], double *w0) { + double J_00 = x[1][0] - x[0][0]; + double J_01 = x[2][0] - x[0][0]; + double J_10 = x[1][1] - x[0][1]; + double J_11 = x[2][1] - x[0][1]; + + double detJ = J_00*J_11 - J_01*J_10; + double det = fabs(detJ); + + double W3[3] = {0.166666666666667, 0.166666666666667, 0.166666666666667}; + double FE0[3][3] = \ + {{0.666666666666667, 0.166666666666667, 0.166666666666667}, + {0.166666666666667, 0.166666666666667, 0.666666666666667}, + {0.166666666666667, 0.666666666666667, 0.166666666666667}}; + + for (unsigned int ip = 0; ip < 3; ip++) { + double F0 = 0.0; + + for (unsigned int r = 0; r < 3; r++) + F0 += FE0[ip][r]*w0[r]; + for (unsigned int j=0; j<3; ++j) + A[j] += FE0[ip][j]*F0*W3[ip]*det; + } +} + """ + return op2.Kernel(kernel_code, "rhs_ffc_itspace") + + +@pytest.fixture +def zero_dat(): + kernel_code = """ +static void zero_dat(double *dat) +{ + *dat = 0.0; +} +""" + return op2.Kernel(kernel_code, "zero_dat") + + +@pytest.fixture +def zero_vec_dat(): + kernel_code = """ +static void zero_vec_dat(double *dat) +{ + dat[0] = 0.0; dat[1] = 0.0; +} +""" + return op2.Kernel(kernel_code, "zero_vec_dat") + + +@pytest.fixture +def kernel_inc(): + kernel_code = """ +static void inc(double entry[3][3], double *g) { + for (int i=0; i<3; ++i) + for (int j=0; j<3; ++j) + entry[i][j] += g[0]; +} + """ + return op2.Kernel(kernel_code, "inc") + + +@pytest.fixture +def kernel_set(): + kernel_code = """ +static void set(double entry[3][3], double *g) { + for (int i=0; i<3; ++i) + for (int j=0; j<3; ++j) + entry[i][j] = g[0]; +} + """ + return op2.Kernel(kernel_code, "set") + + +@pytest.fixture +def kernel_inc_vec(): + kernel_code = """ +static void inc_vec(double entry[2][2], double* g, int i, int j) +{ + entry[0][0] += *g; + entry[0][1] += *g; + entry[1][0] += *g; + entry[1][1] += *g; +} +""" + return op2.Kernel(kernel_code, "inc_vec") + + +@pytest.fixture +def kernel_set_vec(): + kernel_code = """ +static void set_vec(double entry[2][2], double* g, int i, int j) +{ + entry[0][0] = *g; + entry[0][1] = *g; + entry[1][0] = *g; + entry[1][1] = *g; +} +""" + return op2.Kernel(kernel_code, "set_vec") + + +@pytest.fixture +def expected_matrix(): + expected_vals = [(0.25, 0.125, 0.0, 0.125), + (0.125, 0.291667, 0.0208333, 0.145833), + (0.0, 0.0208333, 0.0416667, 0.0208333), + (0.125, 0.145833, 0.0208333, 0.291667)] + return np.asarray(expected_vals, dtype=valuetype) + + +@pytest.fixture +def expected_vector_matrix(): + expected_vals = [(0.25, 0., 0.125, 0., 0., 0., 0.125, 0.), + (0., 0.25, 0., 0.125, 0., 0., 0., 0.125), + (0.125, 0., 0.29166667, 0., + 0.02083333, 0., 0.14583333, 0.), + (0., 0.125, 0., 0.29166667, 0., + 0.02083333, 0., 0.14583333), + (0., 0., 0.02083333, 0., + 0.04166667, 0., 0.02083333, 0.), + (0., 0., 0., 0.02083333, 0., + 0.04166667, 0., 0.02083333), + (0.125, 0., 0.14583333, 0., + 0.02083333, 0., 0.29166667, 0.), + (0., 0.125, 0., 0.14583333, 0., 0.02083333, 0., 0.29166667)] + return np.asarray(expected_vals, dtype=valuetype) + + +@pytest.fixture +def expected_rhs(): + return np.asarray([0.9999999523522115, 1.3541666031724144, + 0.2499999883507239, 1.6458332580869566], + dtype=valuetype) + + +@pytest.fixture +def expected_vec_rhs(): + return np.asarray([[0.5, 1.0], [0.58333333, 1.16666667], + [0.08333333, 0.16666667], [0.58333333, 1.16666667]], + dtype=valuetype) + + +@pytest.fixture +def mset(): + return op2.MixedSet((op2.Set(3), op2.Set(4))) + + +rdata = lambda s: np.arange(1, s + 1, dtype=np.float64) + + +@pytest.fixture +def mdat(mset): + return op2.MixedDat(op2.Dat(s, rdata(s.size)) for s in mset) + + +@pytest.fixture +def mvdat(mset): + return op2.MixedDat(op2.Dat(s ** 2, list(zip(rdata(s.size), rdata(s.size)))) for s in mset) + + +@pytest.fixture +def mmap(mset): + elem, node = mset + return op2.MixedMap((op2.Map(elem, elem, 1, [0, 1, 2]), + op2.Map(elem, node, 2, [0, 1, 1, 2, 2, 3]))) + + +@pytest.fixture +def msparsity(mset, mmap): + return op2.Sparsity((mset ** 1, mset ** 1), {(i, j): [(rm, cm, None)] for i, rm in enumerate(mmap) for j, cm in enumerate(mmap)}) + + +@pytest.fixture +def non_nest_mixed_sparsity(mset, mmap): + return op2.Sparsity((mset ** 1, mset ** 1), {(i, j): [(rm, cm, None)] for i, rm in enumerate(mmap) for j, cm in enumerate(mmap)}, nest=False) + + +@pytest.fixture +def mvsparsity(mset, mmap): + return op2.Sparsity((mset ** 2, mset ** 2), {(i, j): [(rm, cm, None)] for i, rm in enumerate(mmap) for j, cm in enumerate(mmap)}) + + +class TestSparsity: + + """ + Sparsity tests + """ + + def test_sparsity_null_maps(self): + """Building sparsity from a pair of non-initialized maps should fail.""" + s = op2.Set(5) + with pytest.raises(MapValueError): + m = op2.Map(s, s, 1) + op2.Sparsity((s ** 1, s ** 1), [(m, m, None)]) + + def test_sparsity_has_diagonal_space(self): + # A sparsity should have space for diagonal entries if rmap==cmap + s = op2.Set(1) + d = op2.Set(4) + m = op2.Map(s, d, 2, [1, 3]) + d2 = op2.Set(4) + m2 = op2.Map(s, d2, 3, [1, 2, 3]) + sparsity = op2.Sparsity((d ** 1, d ** 1), [(m, m, None)]) + sparsity2 = op2.Sparsity((d ** 1, d2 ** 1), [(m, m2, None)]) + + assert all(sparsity.nnz == [1, 2, 1, 2]) + assert all(sparsity2.nnz == [0, 3, 0, 3]) + + +class TestMatrices: + + """ + Matrix tests + """ + + @pytest.mark.parametrize("mode", [op2.READ, op2.RW, op2.MAX, op2.MIN]) + def test_invalid_mode(self, elements, elem_node, mat, mode): + """Mat args can only have modes WRITE and INC.""" + with pytest.raises(ModeValueError): + op2.par_loop(op2.Kernel("", "dummy"), elements, + mat(mode, (elem_node, elem_node))) + + @pytest.mark.parametrize('n', [1, 2]) + def test_mat_set_diagonal(self, nodes, elem_node, n): + "Set the diagonal of the entire matrix to 1.0" + mat = op2.Mat(op2.Sparsity((nodes ** n, nodes ** n), [(elem_node, elem_node, None)]), valuetype) + nrows = mat.nblock_rows + mat.set_local_diagonal_entries(list(range(nrows))) + mat.assemble() + assert (mat.values == np.identity(nrows * n)).all() + + @pytest.mark.parametrize('n', [1, 2]) + def test_mat_repeated_set_diagonal(self, nodes, elem_node, n): + "Set the diagonal of the entire matrix to 1.0" + mat = op2.Mat(op2.Sparsity((nodes ** n, nodes ** n), [(elem_node, elem_node, None)]), valuetype) + nrows = mat.nblock_rows + mat.set_local_diagonal_entries(list(range(nrows))) + mat.assemble() + assert (mat.values == np.identity(nrows * n)).all() + mat.set_local_diagonal_entries(list(range(nrows))) + mat.assemble() + assert (mat.values == np.identity(nrows * n)).all() + + def test_mat_always_has_diagonal_space(self): + # A sparsity should always have space for diagonal entries + s = op2.Set(1) + d = op2.Set(4) + m = op2.Map(s, d, 1, [2]) + d2 = op2.Set(3) + m2 = op2.Map(s, d2, 1, [1]) + sparsity = op2.Sparsity((d ** 1, d2 ** 1), [(m, m2, None)]) + + from petsc4py import PETSc + # petsc4py default error handler swallows SETERRQ, so just + # install the abort handler to notice an error. + PETSc.Sys.pushErrorHandler("abort") + mat = op2.Mat(sparsity) + PETSc.Sys.popErrorHandler() + + assert np.allclose(mat.handle.getDiagonal().array, 0.0) + + def test_minimal_zero_mat(self): + """Assemble a matrix that is all zeros.""" + zero_mat_code = """ +void zero_mat(double local_mat[1][1]) { + local_mat[0][0] = 0.0; +} + """ + + nelems = 128 + set = op2.Set(nelems) + map = op2.Map(set, set, 1, np.array(list(range(nelems)), np.uint32)) + sparsity = op2.Sparsity((set ** 1, set ** 1), [(map, map, None)]) + mat = op2.Mat(sparsity, np.float64) + kernel = op2.Kernel(zero_mat_code, "zero_mat") + op2.par_loop(kernel, set, + mat(op2.WRITE, (map, map))) + + mat.assemble() + expected_matrix = np.zeros((nelems, nelems), dtype=np.float64) + eps = 1.e-12 + assert_allclose(mat.values, expected_matrix, eps) + + def test_assemble_mat(self, mass, mat, coords, elements, + elem_node, expected_matrix): + """Assemble a simple finite-element matrix and check the result.""" + mat.zero() + op2.par_loop(mass, elements, + mat(op2.INC, (elem_node, elem_node)), + coords(op2.READ, elem_node)) + mat.assemble() + eps = 1.e-5 + assert_allclose(mat.values, expected_matrix, eps) + + def test_assemble_rhs(self, rhs, elements, b, coords, f, + elem_node, expected_rhs): + """Assemble a simple finite-element right-hand side and check result.""" + b.zero() + op2.par_loop(rhs, elements, + b(op2.INC, elem_node), + coords(op2.READ, elem_node), + f(op2.READ, elem_node)) + + eps = 1.e-12 + assert_allclose(b.data, expected_rhs, eps) + + def test_solve(self, mass_mat, b_rhs, x, f): + """Solve a linear system where the solution is equal to the right-hand + side and check the result.""" + x = np.linalg.solve(mass_mat.values, b_rhs.data) + eps = 1.e-8 + assert_allclose(x, f.data, eps) + + def test_zero_matrix(self, mat): + """Test that the matrix is zeroed correctly.""" + mat.zero() + expected_matrix = np.zeros((4, 4), dtype=valuetype) + eps = 1.e-14 + assert_allclose(mat.values, expected_matrix, eps) + + def test_set_matrix(self, mat, elements, elem_node, + kernel_inc, kernel_set, g): + """Test accessing a scalar matrix with the WRITE access by adding some + non-zero values into the matrix, then setting them back to zero with a + kernel using op2.WRITE""" + mat.zero() + op2.par_loop(kernel_inc, elements, + mat(op2.INC, (elem_node, elem_node)), + g(op2.READ)) + mat.assemble() + # Check we have ones in the matrix + assert mat.values.sum() == 3 * 3 * elements.size + op2.par_loop(kernel_set, elements, + mat(op2.WRITE, (elem_node, elem_node)), + g(op2.READ)) + mat.assemble() + assert mat.values.sum() == (3 * 3 - 2) * elements.size + + def test_zero_rhs(self, b, zero_dat, nodes): + """Test that the RHS is zeroed correctly.""" + op2.par_loop(zero_dat, nodes, + b(op2.WRITE)) + assert all(b.data == np.zeros_like(b.data)) + + def test_assemble_ffc(self, mass_ffc, mat, coords, elements, + elem_node, expected_matrix): + """Test that the FFC mass assembly assembles the correct values.""" + op2.par_loop(mass_ffc, elements, + mat(op2.INC, (elem_node, elem_node)), + coords(op2.READ, elem_node)) + mat.assemble() + eps = 1.e-5 + assert_allclose(mat.values, expected_matrix, eps) + + def test_rhs_ffc(self, rhs_ffc, elements, b, coords, f, + elem_node, expected_rhs): + """Test that the FFC rhs assembly assembles the correct values.""" + op2.par_loop(rhs_ffc, elements, + b(op2.INC, elem_node), + coords(op2.READ, elem_node), + f(op2.READ, elem_node)) + + eps = 1.e-6 + assert_allclose(b.data, expected_rhs, eps) + + def test_rhs_ffc_itspace(self, rhs_ffc_itspace, elements, b, + coords, f, elem_node, expected_rhs, + zero_dat, nodes): + """Test that the FFC right-hand side assembly using iteration spaces + assembles the correct values.""" + # Zero the RHS first + op2.par_loop(zero_dat, nodes, + b(op2.WRITE)) + op2.par_loop(rhs_ffc_itspace, elements, + b(op2.INC, elem_node), + coords(op2.READ, elem_node), + f(op2.READ, elem_node)) + eps = 1.e-6 + assert_allclose(b.data, expected_rhs, eps) + + def test_zero_rows(self, mass_mat, expected_matrix): + """Zeroing a row in the matrix should set the diagonal to the given + value and all other values to 0.""" + expected_matrix[0] = [12.0, 0.0, 0.0, 0.0] + mass_mat.zero_rows([0], 12.0) + eps = 1.e-5 + assert_allclose(mass_mat.values, expected_matrix, eps) + + def test_zero_rows_subset(self, nodes, mass_mat, expected_matrix): + """Zeroing rows in the matrix given by a :class:`op2.Subset` should + set the diagonal to the given value and all other values to 0.""" + expected_matrix[0] = [12.0, 0.0, 0.0, 0.0] + ss = op2.Subset(nodes, [0]) + mass_mat.zero_rows(ss, 12.0) + assert_allclose(mass_mat.values, expected_matrix, 1e-5) + + def test_zero_last_row(self, nodes, mass_mat, expected_matrix): + """Zeroing a row in the matrix should set the diagonal to the given + value and all other values to 0.""" + expected_matrix[0] = [12.0, 0.0, 0.0, 0.0] + ss = op2.Subset(nodes, [0]) + mass_mat.zero_rows(ss, 12.0) + + which = NUM_NODES - 1 + expected_matrix[0] = [12.0, 0.0, 0.0, 0.0] + expected_matrix[which] = [0.0, 0.0, 0.0, 4.0] + mass_mat.zero_rows([which], 4.0) + eps = 1.e-5 + assert_allclose(mass_mat.values, expected_matrix, eps) + + def test_mat_nbytes(self, mat): + """Check that the matrix uses the amount of memory we expect.""" + assert mat.nbytes == 14 * 8 + + +class TestMatrixStateChanges: + + """ + Test that matrix state changes are correctly tracked. + """ + + @pytest.fixture(params=[False, True], + ids=["Non-nested", "Nested"]) + def mat(self, request, msparsity, non_nest_mixed_sparsity): + if request.param: + mat = op2.Mat(msparsity) + else: + mat = op2.Mat(non_nest_mixed_sparsity) + + opt = mat.handle.Option.NEW_NONZERO_ALLOCATION_ERR + opt2 = mat.handle.Option.UNUSED_NONZERO_LOCATION_ERR + mat.handle.setOption(opt, False) + mat.handle.setOption(opt2, False) + for m in mat: + m.handle.setOption(opt, False) + m.handle.setOption(opt2, False) + return mat + + def test_mat_starts_assembled(self, mat): + assert mat.assembly_state is op2.Mat.ASSEMBLED + for m in mat: + assert m.assembly_state is op2.Mat.ASSEMBLED + + def test_after_set_local_state_is_insert(self, mat): + mat[0, 0].set_local_diagonal_entries([0]) + assert mat[0, 0].assembly_state is op2.Mat.INSERT_VALUES + if not mat.sparsity.nested: + assert mat.assembly_state is op2.Mat.INSERT_VALUES + if mat.sparsity.nested: + assert mat[1, 1].assembly_state is op2.Mat.ASSEMBLED + + def test_after_addto_state_is_add(self, mat): + mat[0, 0].addto_values(0, 0, [1]) + assert mat[0, 0].assembly_state is op2.Mat.ADD_VALUES + if not mat.sparsity.nested: + assert mat.assembly_state is op2.Mat.ADD_VALUES + if mat.sparsity.nested: + assert mat[1, 1].assembly_state is op2.Mat.ASSEMBLED + + def test_matblock_assemble_runtimeerror(self, mat): + if mat.sparsity.nested: + return + with pytest.raises(RuntimeError): + mat[0, 0].assemble() + + def test_mixing_insert_and_add_works(self, mat): + mat[0, 0].addto_values(0, 0, [1]) + mat[1, 1].addto_values(1, 1, [3]) + mat[1, 1].set_values(0, 0, [2]) + mat[0, 0].set_values(1, 1, [4]) + mat[1, 1].addto_values(0, 0, [3]) + mat.assemble() + + assert np.allclose(mat[0, 0].values, np.diag([1, 4, 0])) + assert np.allclose(mat[1, 1].values, np.diag([5, 3, 0, 0])) + + assert np.allclose(mat[0, 1].values, 0) + assert np.allclose(mat[1, 0].values, 0) + + def test_assembly_flushed_between_insert_and_add(self, mat): + import types + flush_counter = [0] + + def make_flush(old_flush): + def flush(self): + old_flush() + flush_counter[0] += 1 + return flush + + oflush = mat._flush_assembly + mat._flush_assembly = types.MethodType(make_flush(oflush), mat) + if mat.sparsity.nested: + for m in mat: + oflush = m._flush_assembly + m._flush_assembly = types.MethodType(make_flush(oflush), m) + + mat[0, 0].addto_values(0, 0, [1]) + assert flush_counter[0] == 0 + mat[0, 0].set_values(1, 0, [2]) + assert flush_counter[0] == 1 + mat.assemble() + assert flush_counter[0] == 1 + + +class TestMixedMatrices: + """ + Matrix tests for mixed spaces + """ + + # off-diagonal blocks + od = np.array([[1.0, 2.0, 0.0, 0.0], + [0.0, 4.0, 6.0, 0.0], + [0.0, 0.0, 9.0, 12.0]]) + # lower left block + ll = (np.diag([1.0, 8.0, 18.0, 16.0]) + + np.diag([2.0, 6.0, 12.0], -1) + + np.diag([2.0, 6.0, 12.0], 1)) + + @pytest.fixture + def mat(self, msparsity, mmap, mdat): + mat = op2.Mat(msparsity) + + addone = """static void addone_mat(double v[9], double d[3]) { + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + v[i*3 + j] += d[i]*d[j]; + }""" + + addone = op2.Kernel(addone, "addone_mat") + op2.par_loop(addone, mmap.iterset, + mat(op2.INC, (mmap, mmap)), + mdat(op2.READ, mmap)) + mat.assemble() + return mat + + @pytest.fixture + def dat(self, mset, mmap, mdat): + dat = op2.MixedDat(mset) + kernel_code = """ +static void addone_rhs(double v[3], double d[3]) { + for (int i=0; i<3; ++i) + v[i] += d[i]; +} + """ + addone = op2.Kernel(kernel_code, "addone_rhs") + op2.par_loop(addone, mmap.iterset, + dat(op2.INC, mmap), + mdat(op2.READ, mmap)) + return dat + + def test_assemble_mixed_mat(self, mat): + """Assemble into a matrix declared on a mixed sparsity.""" + eps = 1.e-12 + assert_allclose(mat[0, 0].values, np.diag([1.0, 4.0, 9.0]), eps) + assert_allclose(mat[0, 1].values, self.od, eps) + assert_allclose(mat[1, 0].values, self.od.T, eps) + assert_allclose(mat[1, 1].values, self.ll, eps) + + def test_assemble_mixed_rhs(self, dat): + """Assemble a simple right-hand side over a mixed space and check result.""" + eps = 1.e-12 + assert_allclose(dat[0].data_ro, rdata(3), eps) + assert_allclose(dat[1].data_ro, [1.0, 4.0, 6.0, 4.0], eps) + + def test_assemble_mixed_rhs_vector(self, mset, mmap, mvdat): + """Assemble a simple right-hand side over a mixed space and check result.""" + dat = op2.MixedDat(mset ** 2) + kernel_code = """ +static void addone_rhs_vec(double v[6], double d[3][2]) { + for (int i=0; i<3; ++i) { + v[i*2+0] += d[i][0]; + v[i*2+1] += d[i][1]; + } +} + """ + addone = op2.Kernel(kernel_code, "addone_rhs_vec") + op2.par_loop(addone, mmap.iterset, + dat(op2.INC, mmap), + mvdat(op2.READ, mmap)) + eps = 1.e-12 + exp = np.kron(list(zip([1.0, 4.0, 6.0, 4.0])), np.ones(2)) + assert_allclose(dat[0].data_ro, np.kron(list(zip(rdata(3))), np.ones(2)), eps) + assert_allclose(dat[1].data_ro, exp, eps) + + +def test_matrices_sparsity_blockwise_specification(): + # + # 0 1 2 3 nodesetA + # x----x----x----x + # 0 1 2 setA + # + # 0 1 2 nodesetB + # x----x----x + # 0 1 setB + # + # 0 1 2 3 | 0 1 2 + # 0 x | + # 1 x | x x + # 2 x | x x x + # 3 x | x x sparsity + # ----------+------ + # 0 x x | x + # 1 x x x | x + # 2 x x | x + # + arity = 2 + setA = op2.Set(3) + nodesetA = op2.Set(4) + setB = op2.Set(2) + nodesetB = op2.Set(3) + nodesetAB = op2.MixedSet((nodesetA, nodesetB)) + datasetAB = nodesetAB ** 1 + mapA = op2.Map(setA, nodesetA, arity, values=[[0, 1], [1, 2], [2, 3]]) + mapB = op2.Map(setB, nodesetB, arity, values=[[0, 1], [1, 2]]) + mapBA = op2.Map(setB, setA, 1, values=[1, 2]) + mapAB = op2.Map(setA, setB, 1, values=[-1, 0, 1]) # "inverse" map + s = op2.Sparsity((datasetAB, datasetAB), {(1, 0): [(mapB, op2.ComposedMap(mapA, mapBA), None)], + (0, 1): [(mapA, op2.ComposedMap(mapB, mapAB), None)]}) + assert np.all(s._blocks[0][0].nnz == np.array([1, 1, 1, 1], dtype=IntType)) + assert np.all(s._blocks[0][1].nnz == np.array([0, 2, 3, 2], dtype=IntType)) + assert np.all(s._blocks[1][0].nnz == np.array([2, 3, 2], dtype=IntType)) + assert np.all(s._blocks[1][1].nnz == np.array([1, 1, 1], dtype=IntType)) + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__)) diff --git a/tests/pyop2/test_petsc.py b/tests/pyop2/test_petsc.py new file mode 100644 index 0000000000..57068a7aa1 --- /dev/null +++ b/tests/pyop2/test_petsc.py @@ -0,0 +1,84 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +PETSc specific unit tests +""" + + +import pytest +import numpy as np + +from pyop2 import op2 + +# If mpi4py or petsc4py are not available this test module is skipped +mpi4py = pytest.importorskip("mpi4py") +petsc4py = pytest.importorskip("petsc4py") + + +class TestPETSc: + + def test_vec_norm_changes(self): + s = op2.Set(1) + d = op2.Dat(s) + + d.data[:] = 1 + + with d.vec_ro as v: + assert np.allclose(v.norm(), 1.0) + + d.data[:] = 2 + + with d.vec_ro as v: + assert np.allclose(v.norm(), 2.0) + + def test_mixed_vec_access(self): + s = op2.Set(1) + ms = op2.MixedSet([s, s]) + d = op2.MixedDat(ms) + + d.data[0][:] = 1.0 + d.data[1][:] = 2.0 + + with d.vec_ro as v: + assert np.allclose(v.array_r, [1.0, 2.0]) + + d.data[0][:] = 0.0 + d.data[0][:] = 0.0 + + with d.vec_wo as v: + assert np.allclose(v.array_r, [1.0, 2.0]) + v.array[:] = 1 + + assert d.data[0][0] == 1 + assert d.data[1][0] == 1 diff --git a/tests/pyop2/test_subset.py b/tests/pyop2/test_subset.py new file mode 100644 index 0000000000..ebd824a317 --- /dev/null +++ b/tests/pyop2/test_subset.py @@ -0,0 +1,311 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy as np + +from pyop2 import op2 + +nelems = 32 + + +@pytest.fixture(params=[(nelems, nelems, nelems), + (0, nelems, nelems), + (nelems // 2, nelems, nelems)]) +def iterset(request): + return op2.Set(request.param, "iterset") + + +class TestSubSet: + + """ + SubSet tests + """ + + def test_direct_loop(self, iterset): + """Test a direct ParLoop on a subset""" + indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + ss = op2.Subset(iterset, indices) + + d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") + op2.par_loop(k, ss, d(op2.RW)) + inds, = np.where(d.data) + assert (inds == indices).all() + + def test_direct_loop_empty(self, iterset): + """Test a direct loop with an empty subset""" + ss = op2.Subset(iterset, []) + d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") + op2.par_loop(k, ss, d(op2.RW)) + inds, = np.where(d.data) + assert (inds == []).all() + + def test_direct_complementary_subsets(self, iterset): + """Test direct par_loop over two complementary subsets""" + even = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + odd = np.array([i for i in range(nelems) if i % 2], dtype=np.int32) + + sseven = op2.Subset(iterset, even) + ssodd = op2.Subset(iterset, odd) + + d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") + op2.par_loop(k, sseven, d(op2.RW)) + op2.par_loop(k, ssodd, d(op2.RW)) + assert (d.data == 1).all() + + def test_direct_complementary_subsets_with_indexing(self, iterset): + """Test direct par_loop over two complementary subsets""" + even = np.arange(0, nelems, 2, dtype=np.int32) + odd = np.arange(1, nelems, 2, dtype=np.int32) + + sseven = iterset(even) + ssodd = iterset(odd) + + d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") + op2.par_loop(k, sseven, d(op2.RW)) + op2.par_loop(k, ssodd, d(op2.RW)) + assert (d.data == 1).all() + + def test_direct_loop_sub_subset(self, iterset): + indices = np.arange(0, nelems, 2, dtype=np.int32) + ss = op2.Subset(iterset, indices) + indices = np.arange(0, nelems//2, 2, dtype=np.int32) + sss = op2.Subset(ss, indices) + + d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") + op2.par_loop(k, sss, d(op2.RW)) + + indices = np.arange(0, nelems, 4, dtype=np.int32) + ss2 = op2.Subset(iterset, indices) + d2 = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + op2.par_loop(k, ss2, d2(op2.RW)) + + assert (d.data == d2.data).all() + + def test_direct_loop_sub_subset_with_indexing(self, iterset): + indices = np.arange(0, nelems, 2, dtype=np.int32) + ss = iterset(indices) + indices = np.arange(0, nelems//2, 2, dtype=np.int32) + sss = ss(indices) + + d = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1; }", "inc") + op2.par_loop(k, sss, d(op2.RW)) + + indices = np.arange(0, nelems, 4, dtype=np.int32) + ss2 = iterset(indices) + d2 = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + op2.par_loop(k, ss2, d2(op2.RW)) + + assert (d.data == d2.data).all() + + def test_indirect_loop(self, iterset): + """Test a indirect ParLoop on a subset""" + indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + ss = op2.Subset(iterset, indices) + + indset = op2.Set(2, "indset") + map = op2.Map(iterset, indset, 1, [(1 if i % 2 else 0) for i in range(nelems)]) + d = op2.Dat(indset ** 1, data=None, dtype=np.uint32) + + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1;}", "inc") + op2.par_loop(k, ss, d(op2.INC, map)) + + assert d.data[0] == nelems // 2 + + def test_indirect_loop_empty(self, iterset): + """Test a indirect ParLoop on an empty""" + ss = op2.Subset(iterset, []) + + indset = op2.Set(2, "indset") + map = op2.Map(iterset, indset, 1, [(1 if i % 2 else 0) for i in range(nelems)]) + d = op2.Dat(indset ** 1, data=None, dtype=np.uint32) + + k = op2.Kernel("static void inc(unsigned int* v) { *v += 1;}", "inc") + d.data[:] = 0 + op2.par_loop(k, ss, d(op2.INC, map)) + + assert (d.data == 0).all() + + def test_indirect_loop_with_direct_dat(self, iterset): + """Test a indirect ParLoop on a subset""" + indices = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + ss = op2.Subset(iterset, indices) + + indset = op2.Set(2, "indset") + map = op2.Map(iterset, indset, 1, [(1 if i % 2 else 0) for i in range(nelems)]) + + values = [2976579765] * nelems + values[::2] = [i//2 for i in range(nelems)][::2] + dat1 = op2.Dat(iterset ** 1, data=values, dtype=np.uint32) + dat2 = op2.Dat(indset ** 1, data=None, dtype=np.uint32) + + k = op2.Kernel("static void inc(unsigned* d, unsigned int* s) { *d += *s;}", "inc") + op2.par_loop(k, ss, dat2(op2.INC, map), dat1(op2.READ)) + + assert dat2.data[0] == sum(values[::2]) + + def test_complementary_subsets(self, iterset): + """Test par_loop on two complementary subsets""" + even = np.array([i for i in range(nelems) if not i % 2], dtype=np.int32) + odd = np.array([i for i in range(nelems) if i % 2], dtype=np.int32) + + sseven = op2.Subset(iterset, even) + ssodd = op2.Subset(iterset, odd) + + indset = op2.Set(nelems, "indset") + map = op2.Map(iterset, indset, 1, [i for i in range(nelems)]) + dat1 = op2.Dat(iterset ** 1, data=None, dtype=np.uint32) + dat2 = op2.Dat(indset ** 1, data=None, dtype=np.uint32) + + k = op2.Kernel(""" +static void inc(unsigned int* v1, unsigned int* v2) { + *v1 += 1; + *v2 += 1; +} +""", "inc") + op2.par_loop(k, sseven, dat1(op2.RW), dat2(op2.INC, map)) + op2.par_loop(k, ssodd, dat1(op2.RW), dat2(op2.INC, map)) + + assert np.sum(dat1.data) == nelems + assert np.sum(dat2.data) == nelems + + def test_matrix(self): + """Test a indirect par_loop with a matrix argument""" + iterset = op2.Set(2) + idset = op2.Set(2) + ss01 = op2.Subset(iterset, [0, 1]) + ss10 = op2.Subset(iterset, [1, 0]) + indset = op2.Set(4) + + dat = op2.Dat(idset ** 1, data=[0, 1], dtype=np.float64) + map = op2.Map(iterset, indset, 4, [0, 1, 2, 3, 0, 1, 2, 3]) + idmap = op2.Map(iterset, idset, 1, [0, 1]) + sparsity = op2.Sparsity((indset ** 1, indset ** 1), {(0, 0): [(map, map, None)]}) + mat = op2.Mat(sparsity, np.float64) + mat01 = op2.Mat(sparsity, np.float64) + mat10 = op2.Mat(sparsity, np.float64) + + kernel_code = """ +static void unique_id(double mat[4][4], double *dat) { + for (int i=0; i<4; ++i) + for (int j=0; j<4; ++j) + mat[i][j] += (*dat)*16+i*4+j; +} + """ + k = op2.Kernel(kernel_code, "unique_id") + + mat.zero() + mat01.zero() + mat10.zero() + + op2.par_loop(k, iterset, + mat(op2.INC, (map, map)), + dat(op2.READ, idmap)) + mat.assemble() + op2.par_loop(k, ss01, + mat01(op2.INC, (map, map)), + dat(op2.READ, idmap)) + mat01.assemble() + op2.par_loop(k, ss10, + mat10(op2.INC, (map, map)), + dat(op2.READ, idmap)) + mat10.assemble() + + assert (mat01.values == mat.values).all() + assert (mat10.values == mat.values).all() + + +class TestSetOperations: + + """ + Set operation tests + """ + + def test_set_set_operations(self): + """Test standard set operations between a set and itself""" + a = op2.Set(10) + u = a.union(a) + i = a.intersection(a) + d = a.difference(a) + s = a.symmetric_difference(a) + assert u is a + assert i is a + assert d._indices.size == 0 + assert s._indices.size == 0 + + def test_set_subset_operations(self): + """Test standard set operations between a set and a subset""" + a = op2.Set(10) + b = op2.Subset(a, np.array([2, 3, 5, 7], dtype=np.int32)) + u = a.union(b) + i = a.intersection(b) + d = a.difference(b) + s = a.symmetric_difference(b) + assert u is a + assert i is b + assert (d._indices == [0, 1, 4, 6, 8, 9]).all() + assert (s._indices == d._indices).all() + + def test_subset_set_operations(self): + """Test standard set operations between a subset and a set""" + a = op2.Set(10) + b = op2.Subset(a, np.array([2, 3, 5, 7], dtype=np.int32)) + u = b.union(a) + i = b.intersection(a) + d = b.difference(a) + s = b.symmetric_difference(a) + assert u is a + assert i is b + assert d._indices.size == 0 + assert (s._indices == [0, 1, 4, 6, 8, 9]).all() + + def test_subset_subset_operations(self): + """Test standard set operations between two subsets""" + a = op2.Set(10) + b = op2.Subset(a, np.array([2, 3, 5, 7], dtype=np.int32)) + c = op2.Subset(a, np.array([2, 4, 6, 8], dtype=np.int32)) + u = b.union(c) + i = b.intersection(c) + d = b.difference(c) + s = b.symmetric_difference(c) + assert (u._indices == [2, 3, 4, 5, 6, 7, 8]).all() + assert (i._indices == [2, ]).all() + assert (d._indices == [3, 5, 7]).all() + assert (s._indices == [3, 4, 5, 6, 7, 8]).all() diff --git a/tests/pyop2/test_vector_map.py b/tests/pyop2/test_vector_map.py new file mode 100644 index 0000000000..2c7c7d5e0a --- /dev/null +++ b/tests/pyop2/test_vector_map.py @@ -0,0 +1,238 @@ +# This file is part of PyOP2 +# +# PyOP2 is Copyright (c) 2012, Imperial College London and +# others. Please see the AUTHORS file in the main source directory for +# a full list of copyright holders. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * The name of Imperial College London or that of other +# contributors may not be used to endorse or promote products +# derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS +# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest +import numpy + +from pyop2 import op2 + + +def _seed(): + return 0.02041724 + + +nnodes = 4096 +nele = nnodes // 2 + + +@pytest.fixture(scope='module') +def node(): + return op2.Set(nnodes, 'node') + + +@pytest.fixture(scope='module') +def ele(): + return op2.Set(nele, 'ele') + + +@pytest.fixture(scope='module') +def dnode(node): + return op2.DataSet(node, 1, 'dnode') + + +@pytest.fixture(scope='module') +def dnode2(node): + return op2.DataSet(node, 2, 'dnode2') + + +@pytest.fixture(scope='module') +def dele(ele): + return op2.DataSet(ele, 1, 'dele') + + +@pytest.fixture(scope='module') +def dele2(ele): + return op2.DataSet(ele, 2, 'dele2') + + +@pytest.fixture +def d1(dnode): + return op2.Dat(dnode, numpy.zeros(nnodes), dtype=numpy.int32) + + +@pytest.fixture +def d2(dnode2): + return op2.Dat(dnode2, numpy.zeros(2 * nnodes), dtype=numpy.int32) + + +@pytest.fixture +def vd1(dele): + return op2.Dat(dele, numpy.zeros(nele), dtype=numpy.int32) + + +@pytest.fixture +def vd2(dele2): + return op2.Dat(dele2, numpy.zeros(2 * nele), dtype=numpy.int32) + + +@pytest.fixture(scope='module') +def node2ele(node, ele): + vals = numpy.arange(nnodes) / 2 + return op2.Map(node, ele, 1, vals, 'node2ele') + + +class TestVectorMap: + + """ + Vector Map Tests + """ + + def test_sum_nodes_to_edges(self): + """Creates a 1D grid with edge values numbered consecutively. + Iterates over edges, summing the node values.""" + + nedges = nnodes - 1 + nodes = op2.Set(nnodes, "nodes") + edges = op2.Set(nedges, "edges") + + node_vals = op2.Dat( + nodes, numpy.array(range(nnodes), dtype=numpy.uint32), numpy.uint32, "node_vals") + edge_vals = op2.Dat( + edges, numpy.array([0] * nedges, dtype=numpy.uint32), numpy.uint32, "edge_vals") + + e_map = numpy.array([(i, i + 1) + for i in range(nedges)], dtype=numpy.uint32) + edge2node = op2.Map(edges, nodes, 2, e_map, "edge2node") + + kernel_sum = """ + static void sum(unsigned int* edge, unsigned int *nodes) { + *edge = nodes[0] + nodes[1]; + } + """ + op2.par_loop(op2.Kernel(kernel_sum, "sum"), edges, + edge_vals(op2.WRITE), + node_vals(op2.READ, edge2node)) + + expected = numpy.asarray( + range(1, nedges * 2 + 1, 2)) + assert all(expected == edge_vals.data) + + def test_read_1d_vector_map(self, node, d1, vd1, node2ele): + vd1.data[:] = numpy.arange(nele) + k = """ + static void k(int *d, int *vd) { + *d = vd[0]; + }""" + op2.par_loop(op2.Kernel(k, 'k'), node, + d1(op2.WRITE), + vd1(op2.READ, node2ele)) + assert all(d1.data[::2] == vd1.data) + assert all(d1.data[1::2] == vd1.data) + + def test_write_1d_vector_map(self, node, vd1, node2ele): + k = """ + static void k(int *vd) { + vd[0] = 2; + } + """ + + op2.par_loop(op2.Kernel(k, 'k'), node, + vd1(op2.WRITE, node2ele)) + assert all(vd1.data == 2) + + def test_inc_1d_vector_map(self, node, d1, vd1, node2ele): + vd1.data[:] = 3 + d1.data[:] = numpy.arange(nnodes).reshape(d1.data.shape) + + k = """ + static void k(int *vd, int *d) { + vd[0] += *d; + }""" + op2.par_loop(op2.Kernel(k, 'k'), node, + vd1(op2.INC, node2ele), + d1(op2.READ)) + expected = numpy.zeros_like(vd1.data) + expected[:] = 3 + expected += numpy.arange( + start=0, stop=nnodes, step=2).reshape(expected.shape) + expected += numpy.arange( + start=1, stop=nnodes, step=2).reshape(expected.shape) + assert all(vd1.data == expected) + + def test_read_2d_vector_map(self, node, d2, vd2, node2ele): + vd2.data[:] = numpy.arange(nele * 2).reshape(nele, 2) + k = """ + static void k(int d[2], int vd[1][2]) { + d[0] = vd[0][0]; + d[1] = vd[0][1]; + }""" + op2.par_loop(op2.Kernel(k, 'k'), node, + d2(op2.WRITE), + vd2(op2.READ, node2ele)) + assert all(d2.data[::2, 0] == vd2.data[:, 0]) + assert all(d2.data[::2, 1] == vd2.data[:, 1]) + assert all(d2.data[1::2, 0] == vd2.data[:, 0]) + assert all(d2.data[1::2, 1] == vd2.data[:, 1]) + + def test_write_2d_vector_map(self, node, vd2, node2ele): + k = """ + static void k(int vd[1][2]) { + vd[0][0] = 2; + vd[0][1] = 3; + } + """ + + op2.par_loop(op2.Kernel(k, 'k'), node, + vd2(op2.WRITE, node2ele)) + assert all(vd2.data[:, 0] == 2) + assert all(vd2.data[:, 1] == 3) + + def test_inc_2d_vector_map(self, node, d2, vd2, node2ele): + vd2.data[:, 0] = 3 + vd2.data[:, 1] = 4 + d2.data[:] = numpy.arange(2 * nnodes).reshape(d2.data.shape) + + k = """ + static void k(int vd[1][2], int d[2]) { + vd[0][0] += d[0]; + vd[0][1] += d[1]; + }""" + op2.par_loop(op2.Kernel(k, 'k'), node, + vd2(op2.INC, node2ele), + d2(op2.READ)) + + expected = numpy.zeros_like(vd2.data) + expected[:, 0] = 3 + expected[:, 1] = 4 + expected[:, 0] += numpy.arange(start=0, stop=2 * nnodes, step=4) + expected[:, 0] += numpy.arange(start=2, stop=2 * nnodes, step=4) + expected[:, 1] += numpy.arange(start=1, stop=2 * nnodes, step=4) + expected[:, 1] += numpy.arange(start=3, stop=2 * nnodes, step=4) + assert all(vd2.data[:, 0] == expected[:, 0]) + assert all(vd2.data[:, 1] == expected[:, 1]) + + +if __name__ == '__main__': + import os + pytest.main(os.path.abspath(__file__))