diff --git a/.github/workflows/build-mac.yml b/.github/workflows/build-mac.yml
new file mode 100644
index 0000000000..51aa9f5104
--- /dev/null
+++ b/.github/workflows/build-mac.yml
@@ -0,0 +1,58 @@
+name: Install and test Firedrake (macOS)
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    # By default this workflow is run on the "opened", "synchronize" and
+    # "reopened" events. We add "labelled" so it will run if the PR is given a label.
+    types: [opened, synchronize, reopened, labeled]
+
+concurrency:
+  # Cancels jobs running if new commits are pushed
+  group: >
+    ${{ github.workflow }}-
+    ${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Build Firedrake (macOS)
+    runs-on: [self-hosted, macOS]
+    # Only run this action if we are pushing to master or the PR is labelled "macOS"
+    if: ${{ (github.ref == 'refs/heads/master') || contains(github.event.pull_request.labels.*.name, 'macOS') }}
+    env:
+      FIREDRAKE_CI_TESTS: 1  # needed to symlink the checked out branch into the venv
+      OMP_NUM_THREADS: 1
+      OPENBLAS_NUM_THREADS: 1
+    steps:
+      - name: Add homebrew to PATH
+        # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-system-path
+        run: echo "/opt/homebrew/bin" >> "$GITHUB_PATH"
+      - uses: actions/checkout@v4
+      - name: Pre-run cleanup
+        if: ${{ always() }}
+        run: |
+          cd ..
+          rm -rf firedrake_venv
+      - name: Install Python
+        run: brew install python python-setuptools
+      - name: Build Firedrake
+        run: |
+          cd ..
+          "$(brew --prefix)/bin/python3" \
+            firedrake/scripts/firedrake-install \
+            --venv-name firedrake_venv \
+            --disable-ssh \
+            || (cat firedrake-install.log && /bin/false)
+      - name: Run smoke tests
+        run: |
+          . ../firedrake_venv/bin/activate
+          python -m pytest -v tests/firedrake/regression/ -k "poisson_strong or stokes_mini or dg_advection"
+        timeout-minutes: 30
+      - name: Post-run cleanup
+        if: ${{ always() }}
+        run: |
+          cd ..
+          rm -rf firedrake_venv
diff --git a/.github/workflows/pip-mac.yml b/.github/workflows/pip-mac.yml
new file mode 100644
index 0000000000..c8d1fbfe2e
--- /dev/null
+++ b/.github/workflows/pip-mac.yml
@@ -0,0 +1,127 @@
+name: Pip install Firedrake (macOS)
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    # By default this workflow is run on the "opened", "synchronize" and
+    # "reopened" events. We add "labelled" so it will run if the PR is given a label.
+    types: [opened, synchronize, reopened, labeled]
+
+concurrency:
+  # Cancels jobs running if new commits are pushed
+  group: >
+    ${{ github.workflow }}-
+    ${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: "Build Firedrake using pip (macOS)"
+    runs-on: [self-hosted, macOS]
+    # Only run this action if we are pushing to master or the PR is labelled "macOS"
+    if: ${{ (github.ref == 'refs/heads/master') || contains(github.event.pull_request.labels.*.name, 'macOS') }}
+    env:
+      OMP_NUM_THREADS: 1
+      OPENBLAS_NUM_THREADS: 1
+    steps:
+      - name: Add homebrew to PATH
+        # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-system-path
+        run: echo "/opt/homebrew/bin" >> "$GITHUB_PATH"
+
+      - name: Install homebrew packages
+        run: brew install gcc autoconf pkg-config make automake cmake ninja libtool boost openblas python python-setuptools mpich
+
+      - name: Cleanup (pre)
+        if: ${{ always() }}
+        run: |
+          rm -rf pip_venv
+          "$(brew --prefix)/bin/python3" -m pip cache purge
+
+      - name: Create a virtual environment
+        run: |
+          "$(brew --prefix)/bin/python3" -m venv pip_venv
+          mkdir pip_venv/src
+
+      - name: Install PETSc
+        run: |
+          cd pip_venv/src
+          git clone https://github.com/firedrakeproject/petsc.git
+          cd petsc
+          ./configure PETSC_DIR="$PWD" PETSC_ARCH=default \
+            --with-shared-libraries=1 \
+            --with-mpi-dir=/opt/homebrew \
+            --with-zlib \
+            --download-bison \
+            --download-hdf5 \
+            --download-hwloc \
+            --download-hypre \
+            --download-metis \
+            --download-mumps \
+            --download-netcdf \
+            --download-pastix \
+            --download-pnetcdf \
+            --download-ptscotch \
+            --download-scalapack \
+            --download-suitesparse \
+            --download-superlu_dist
+          make
+
+      - name: Install libsupermesh
+        run: |
+          source pip_venv/bin/activate
+          python -m pip install 'rtree>=1.2'
+          cd pip_venv/src
+          git clone https://github.com/firedrakeproject/libsupermesh.git
+          mkdir -p libsupermesh/build
+          cd libsupermesh/build
+          cmake .. \
+            -DBUILD_SHARED_LIBS=ON \
+            -DCMAKE_INSTALL_PREFIX="$VIRTUAL_ENV" \
+            -DMPI_C_COMPILER=/opt/homebrew/bin/mpicc \
+            -DMPI_CXX_COMPILER=/opt/homebrew/bin/mpicxx \
+            -DMPI_Fortran_COMPILER=/opt/homebrew/bin/mpif90 \
+            -DCMAKE_Fortran_COMPILER=/opt/homebrew/bin/mpif90 \
+            -DMPIEXEC_EXECUTABLE=/opt/homebrew/bin/mpiexec
+          make
+          make install
+
+      - uses: actions/checkout@v4
+        with:
+          path: pip_venv/src/firedrake
+
+      - name: Pip install
+        run: |
+          export PETSC_DIR="$PWD/pip_venv/src/petsc"
+          export PETSC_ARCH=default
+          export HDF5_DIR="$PETSC_DIR/$PETSC_ARCH"
+          export HDF5_MPI=ON
+          export CC=/opt/homebrew/bin/mpicc
+          export CXX=/opt/homebrew/bin/mpicxx
+          export MPICC="$CC"
+          source pip_venv/bin/activate
+          cd pip_venv/src
+          python -m pip install \
+            --log=firedrake-install.log \
+            --no-binary h5py \
+            -v -e './firedrake[test]'
+
+      - name: Install CI-specific test dependencies
+        run: |
+          source pip_venv/bin/activate
+          python -m pip install -U pytest-timeout
+
+      - name: Run Firedrake smoke tests
+        run: |
+          source pip_venv/bin/activate
+          cd pip_venv/src/firedrake
+          python -m pytest --timeout=1800 -v tests/firedrake/regression \
+            -k "poisson_strong or stokes_mini or dg_advection"
+        timeout-minutes: 30
+
+      - name: Cleanup (post)
+        if: ${{ always() }}
+        run: |
+          rm -rf pip_venv
+          "$(brew --prefix)/bin/python3" -m pip cache purge
diff --git a/.github/workflows/pyop2.yml b/.github/workflows/pyop2.yml
index 36065accf1..aed26fbbe4 100644
--- a/.github/workflows/pyop2.yml
+++ b/.github/workflows/pyop2.yml
@@ -1,14 +1,10 @@
-name: PyOP2
+name: Test PyOP2 and TSFC
 
-# Trigger the workflow on push or pull request,
-# but only for the master branch
 on:
   push:
     branches:
       - master
   pull_request:
-    branches:
-      - master
 
 jobs:
   test:
@@ -88,14 +84,14 @@ jobs:
           make
           make install
 
-      - name: Checkout PyOP2
+      - name: Checkout Firedrake
         uses: actions/checkout@v4
         with:
-          path: PyOP2
+          path: firedrake
 
       - name: Install PyOP2 dependencies
         shell: bash
-        working-directory: PyOP2
+        working-directory: firedrake
         run: |
           source ../venv/bin/activate
           python -m pip install -U pip
@@ -103,14 +99,25 @@ jobs:
 
       - name: Install PyOP2
         shell: bash
-        working-directory: PyOP2
+        working-directory: firedrake
         run: |
           source ../venv/bin/activate
-          python -m pip install -v ".[test]"
+          export CC=mpicc
+          export HDF5_DIR="$PETSC_DIR/$PETSC_ARCH"
+          export HDF5_MPI=ON
+          python -m pip install --no-binary h5py -v ".[test]"
+
+      - name: Run TSFC tests
+        shell: bash
+        working-directory: firedrake
+        run: |
+          source ../venv/bin/activate
+          pytest --tb=native --timeout=480 --timeout-method=thread -o faulthandler_timeout=540 -v tests/tsfc
+        timeout-minutes: 10
 
-      - name: Run tests
+      - name: Run PyOP2 tests
         shell: bash
-        working-directory: PyOP2
+        working-directory: firedrake
         run: |
           source ../venv/bin/activate
           # Running parallel test cases separately works around a bug in pytest-mpi
diff --git a/AUTHORS.rst b/AUTHORS.rst
index a4a5f4ddc5..80c7f1bde3 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -90,6 +90,8 @@ Christopher Hawkes
 
 Miklós Homolya
 
+Joshua Hope-Collins...........<https://www.imperial.ac.uk/people/joshua.hope-collins13>
+
 Christian T. Jacobs
 
 Darko Janeković
diff --git a/CITATION.rst b/CITATION.rst
index 2965de8d02..48513922ed 100644
--- a/CITATION.rst
+++ b/CITATION.rst
@@ -12,7 +12,7 @@ If you publish results using Firedrake, we would be grateful if you would cite t
 
   @manual{FiredrakeUserManual,
     title        = {Firedrake User Manual},
-    author       = {David A. Ham and Paul H. J. Kelly and Lawrence Mitchell and Colin J. Cotter and Robert C. Kirby and Koki Sagiyama and Nacime Bouziani and Thomas J. Gregory and Jack Betteridge and Daniel R. Shapero and Reuben W. Nixon-Hill and Connor J. Ward and Patrick E. Farrell and Pablo D. Brubeck and India Marsden and Daiane I. Dolci and Sophia Vorderwuelbecke and Thomas H. Gibson and Miklós Homolya and Tianjiao Sun and Andrew T. T. McRae and Fabio Luporini and Alastair Gregory and Michael Lange and Simon W. Funke and Florian Rathgeber and Gheorghe-Teodor Bercea and Graham R. Markall},
+    author       = {David A. Ham and Paul H. J. Kelly and Lawrence Mitchell and Colin J. Cotter and Robert C. Kirby and Koki Sagiyama and Nacime Bouziani and Thomas J. Gregory and Jack Betteridge and Daniel R. Shapero and Reuben W. Nixon-Hill and Connor J. Ward and Patrick E. Farrell and Pablo D. Brubeck and India Marsden and Daiane I. Dolci and Joshua Hope-Collins and Sophia Vorderwuelbecke and Thomas H. Gibson and Miklós Homolya and Tianjiao Sun and Andrew T. T. McRae and Fabio Luporini and Alastair Gregory and Michael Lange and Simon W. Funke and Florian Rathgeber and Gheorghe-Teodor Bercea and Graham R. Markall},
     organization = {Imperial College London and University of Oxford and Baylor University and University of Washington},
     edition      = {First edition},
     year         = {2023},
diff --git a/Makefile b/Makefile
index 033504d3ef..69c1a6a0a1 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,8 @@ lint:
 	@python -m flake8 $(FLAKE8_FORMAT) pyop2
 	@echo "    Linting PyOP2 scripts"
 	@python -m flake8 $(FLAKE8_FORMAT) pyop2/scripts --filename=*
+	@echo "    Linting TSFC"
+	@python -m flake8 $(FLAKE8_FORMAT) tsfc
 
 actionlint:
 	@echo "    Pull latest actionlint image"
diff --git a/docker/Dockerfile.env b/docker/Dockerfile.env
index a0ad86b989..ac15aa7e2f 100644
--- a/docker/Dockerfile.env
+++ b/docker/Dockerfile.env
@@ -13,7 +13,7 @@ RUN apt-get update \
                  cmake gfortran git libopenblas-serial-dev \
                  libtool python3-dev python3-pip python3-tk python3-venv \
                  python3-requests zlib1g-dev libboost-dev sudo gmsh \
-                 bison flex ninja-build \
+                 bison flex ninja-build pkg-config \
                  libocct-ocaf-dev libocct-data-exchange-dev \
                  swig graphviz \
                  libcurl4-openssl-dev libxml2-dev \
diff --git a/docs/source/download.rst b/docs/source/download.rst
index 30c37e78b0..74645fc682 100644
--- a/docs/source/download.rst
+++ b/docs/source/download.rst
@@ -135,6 +135,7 @@ they have the system dependencies:
 * zlib
 * flex, bison
 * Ninja
+* pkg-config
 
 Firedrake has been successfully installed on Windows 10 using the
 Windows Subsystem for Linux. There are more detailed instructions for
@@ -229,10 +230,10 @@ type::
 
 You should now be able to run ``firedrake-update``.
 
-Installing Firedrake with pip (experimental, Linux only)
+Installing Firedrake with pip (experimental)
 --------------------------------------------------------
 
-Firedrake has experimental support for installing using ``pip``, avoiding the need for the ``firedrake-install`` script. At present only Linux is tested using this install method.
+Firedrake has experimental support for installing using ``pip``, avoiding the need for the ``firedrake-install`` script.
 
 Requirements
 ~~~~~~~~~~~~
diff --git a/firedrake/assemble.py b/firedrake/assemble.py
index d3f4cfb8ba..f451b3f596 100644
--- a/firedrake/assemble.py
+++ b/firedrake/assemble.py
@@ -469,8 +469,9 @@ def base_form_assembly_visitor(self, expr, tensor, *args):
                 return sum(weight * arg for weight, arg in zip(expr.weights(), args))
             elif all(isinstance(op, firedrake.Cofunction) for op in args):
                 V, = set(a.function_space() for a in args)
-                res = sum([w*op.dat for (op, w) in zip(args, expr.weights())])
-                return firedrake.Cofunction(V, res)
+                result = firedrake.Cofunction(V)
+                result.dat.maxpy(expr.weights(), [a.dat for a in args])
+                return result
             elif all(isinstance(op, ufl.Matrix) for op in args):
                 res = tensor.petscmat if tensor else PETSc.Mat()
                 is_set = False
diff --git a/firedrake/bcs.py b/firedrake/bcs.py
index 272b74ddbf..f1568824bd 100644
--- a/firedrake/bcs.py
+++ b/firedrake/bcs.py
@@ -289,8 +289,10 @@ def __init__(self, V, g, sub_domain, method=None):
                 warnings.simplefilter('always', DeprecationWarning)
                 warnings.warn("Selecting a bcs method is deprecated. Only topological association is supported",
                               DeprecationWarning)
-        if len(V.boundary_set) and sub_domain not in V.boundary_set:
-            raise ValueError(f"Sub-domain {sub_domain} not in the boundary set of the restricted space.")
+        if len(V.boundary_set):
+            subs = [sub_domain] if type(sub_domain) in {int, str} else sub_domain
+            if any(sub not in V.boundary_set for sub in subs):
+                raise ValueError(f"Sub-domain {sub_domain} not in the boundary set of the restricted space.")
         super().__init__(V, sub_domain)
         if len(V) > 1:
             raise ValueError("Cannot apply boundary conditions on mixed spaces directly.\n"
@@ -311,10 +313,12 @@ def function_arg(self):
         return self._function_arg
 
     @PETSc.Log.EventDecorator()
-    def reconstruct(self, field=None, V=None, g=None, sub_domain=None, use_split=False):
+    def reconstruct(self, field=None, V=None, g=None, sub_domain=None, use_split=False, indices=()):
         fs = self.function_space()
         if V is None:
             V = fs
+        for index in indices:
+            V = V.sub(index)
         if g is None:
             g = self._original_arg
         if sub_domain is None:
@@ -686,3 +690,62 @@ def homogenize(bc):
         return DirichletBC(bc.function_space(), 0, bc.sub_domain)
     else:
         raise TypeError("homogenize only takes a DirichletBC or a list/tuple of DirichletBCs")
+
+
+def extract_subdomain_ids(bcs):
+    """Return a tuple of subdomain ids for each component of a MixedFunctionSpace.
+
+    Parameters
+    ----------
+    bcs :
+        A list of boundary conditions.
+
+    Returns
+    -------
+    A tuple of subdomain ids for each component of a MixedFunctionSpace.
+
+    """
+    if isinstance(bcs, DirichletBC):
+        bcs = (bcs,)
+    if len(bcs) == 0:
+        return None
+
+    V = bcs[0].function_space()
+    while V.parent:
+        V = V.parent
+
+    _chain = itertools.chain.from_iterable
+    _to_tuple = lambda s: (s,) if isinstance(s, (int, str)) else s
+    subdomain_ids = tuple(tuple(_chain(_to_tuple(bc.sub_domain)
+                                for bc in bcs if bc.function_space() == Vsub))
+                          for Vsub in V)
+    return subdomain_ids
+
+
+def restricted_function_space(V, ids):
+    """Create a :class:`.RestrictedFunctionSpace` from a tuple of subdomain ids.
+
+    Parameters
+    ----------
+    V :
+        FunctionSpace object to restrict
+    ids :
+        A tuple of subdomain ids.
+
+    Returns
+    -------
+    The RestrictedFunctionSpace.
+
+    """
+    if not ids:
+        return V
+
+    assert len(ids) == len(V)
+    spaces = [Vsub if len(boundary_set) == 0 else
+              firedrake.RestrictedFunctionSpace(Vsub, boundary_set=boundary_set)
+              for Vsub, boundary_set in zip(V, ids)]
+
+    if len(spaces) == 1:
+        return spaces[0]
+    else:
+        return firedrake.MixedFunctionSpace(spaces)
diff --git a/firedrake/eigensolver.py b/firedrake/eigensolver.py
index ae8a9f020b..ad05815527 100644
--- a/firedrake/eigensolver.py
+++ b/firedrake/eigensolver.py
@@ -1,8 +1,7 @@
 """Specify and solve finite element eigenproblems."""
 from firedrake.assemble import assemble
-from firedrake.bcs import DirichletBC
+from firedrake.bcs import extract_subdomain_ids, restricted_function_space
 from firedrake.function import Function
-from firedrake.functionspace import RestrictedFunctionSpace
 from firedrake.ufl_expr import TrialFunction, TestFunction
 from firedrake import utils
 from firedrake.petsc import OptionsManager, flatten_parameters
@@ -71,12 +70,12 @@ def __init__(self, A, M=None, bcs=None, bc_shift=0.0, restrict=True):
             M = inner(u, v) * dx
 
         if restrict and bcs:  # assumed u and v are in the same space here
-            V_res = RestrictedFunctionSpace(self.output_space, boundary_set=set([bc.sub_domain for bc in bcs]))
+            V_res = restricted_function_space(self.output_space, extract_subdomain_ids(bcs))
             u_res = TrialFunction(V_res)
             v_res = TestFunction(V_res)
             self.M = replace(M, {u: u_res, v: v_res})
             self.A = replace(A, {u: u_res, v: v_res})
-            self.bcs = [DirichletBC(V_res, bc.function_arg, bc.sub_domain) for bc in bcs]
+            self.bcs = [bc.reconstruct(V=V_res, indices=bc._indices) for bc in bcs]
             self.restricted_space = V_res
         else:
             self.A = A  # LHS
diff --git a/firedrake/function.py b/firedrake/function.py
index 100c02be87..da4d264971 100644
--- a/firedrake/function.py
+++ b/firedrake/function.py
@@ -124,12 +124,16 @@ def split(self):
 
     @utils.cached_property
     def _components(self):
-        if self.dof_dset.cdim == 1:
+        if self.function_space().rank == 0:
             return (self, )
         else:
-            return tuple(CoordinatelessFunction(self.function_space().sub(i), val=op2.DatView(self.dat, j),
-                                                name="view[%d](%s)" % (i, self.name()))
-                         for i, j in enumerate(np.ndindex(self.dof_dset.dim)))
+            if self.dof_dset.cdim == 1:
+                return (CoordinatelessFunction(self.function_space().sub(0), val=self.dat,
+                                               name=f"view[0]({self.name()})"),)
+            else:
+                return tuple(CoordinatelessFunction(self.function_space().sub(i), val=op2.DatView(self.dat, j),
+                                                    name=f"view[{i}]({self.name()})")
+                             for i, j in enumerate(np.ndindex(self.dof_dset.dim)))
 
     @PETSc.Log.EventDecorator()
     def sub(self, i):
@@ -143,9 +147,12 @@ def sub(self, i):
         rank-n :class:`~.FunctionSpace`, this returns a proxy object
         indexing the ith component of the space, suitable for use in
         boundary condition application."""
-        if len(self.function_space()) == 1:
-            return self._components[i]
-        return self.subfunctions[i]
+        mixed = len(self.function_space()) != 1
+        data = self.subfunctions if mixed else self._components
+        bound = len(data)
+        if i < 0 or i >= bound:
+            raise IndexError(f"Invalid component {i}, not in [0, {bound})")
+        return data[i]
 
     @property
     def cell_set(self):
@@ -327,7 +334,7 @@ def split(self):
 
     @utils.cached_property
     def _components(self):
-        if self.function_space().block_size == 1:
+        if self.function_space().rank == 0:
             return (self, )
         else:
             return tuple(type(self)(self.function_space().sub(i), self.topological.sub(i))
@@ -345,9 +352,12 @@ def sub(self, i):
         :func:`~.VectorFunctionSpace` or :func:`~.TensorFunctionSpace` this returns a proxy object
         indexing the ith component of the space, suitable for use in
         boundary condition application."""
-        if len(self.function_space()) == 1:
-            return self._components[i]
-        return self.subfunctions[i]
+        mixed = len(self.function_space()) != 1
+        data = self.subfunctions if mixed else self._components
+        bound = len(data)
+        if i < 0 or i >= bound:
+            raise IndexError(f"Invalid component {i}, not in [0, {bound})")
+        return data[i]
 
     @PETSc.Log.EventDecorator()
     @FunctionMixin._ad_annotate_project
diff --git a/firedrake/functionspace.py b/firedrake/functionspace.py
index 74dab04703..509f7a2863 100644
--- a/firedrake/functionspace.py
+++ b/firedrake/functionspace.py
@@ -308,20 +308,21 @@ def rec(eles):
 
 
 @PETSc.Log.EventDecorator("CreateFunctionSpace")
-def RestrictedFunctionSpace(function_space, name=None, boundary_set=[]):
+def RestrictedFunctionSpace(function_space, boundary_set=[], name=None):
     """Create a :class:`.RestrictedFunctionSpace`.
 
     Parameters
     ----------
     function_space :
         FunctionSpace object to restrict
-    name :
-        An optional name for the function space.
     boundary_set :
         A set of subdomains of the mesh in which Dirichlet boundary conditions
         will be applied.
+    name :
+        An optional name for the function space.
 
     """
-    return impl.WithGeometry.create(impl.RestrictedFunctionSpace(function_space, name=name,
-                                                                 boundary_set=boundary_set),
+    return impl.WithGeometry.create(impl.RestrictedFunctionSpace(function_space,
+                                                                 boundary_set=boundary_set,
+                                                                 name=name),
                                     function_space.mesh())
diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py
index 53f69e92ce..65592f6659 100644
--- a/firedrake/functionspaceimpl.py
+++ b/firedrake/functionspaceimpl.py
@@ -182,10 +182,12 @@ def _components(self):
 
     @PETSc.Log.EventDecorator()
     def sub(self, i):
-        bound = len(self._components)
+        mixed = len(self) != 1
+        data = self.subfunctions if mixed else self._components
+        bound = len(data)
         if i < 0 or i >= bound:
-            raise IndexError("Invalid component %d, not in [0, %d)" % (i, bound))
-        return self._components[i]
+            raise IndexError(f"Invalid component {i}, not in [0, {bound})")
+        return data[i]
 
     @utils.cached_property
     def dm(self):
@@ -654,13 +656,16 @@ def __getitem__(self, i):
 
     @utils.cached_property
     def _components(self):
-        return tuple(ComponentFunctionSpace(self, i) for i in range(self.block_size))
+        if self.rank == 0:
+            return self.subfunctions
+        else:
+            return tuple(ComponentFunctionSpace(self, i) for i in range(self.block_size))
 
     def sub(self, i):
         r"""Return a view into the ith component."""
-        if self.rank == 0:
-            assert i == 0
-            return self
+        bound = len(self._components)
+        if i < 0 or i >= bound:
+            raise IndexError(f"Invalid component {i}, not in [0, {bound})")
         return self._components[i]
 
     def __mul__(self, other):
@@ -860,17 +865,16 @@ class RestrictedFunctionSpace(FunctionSpace):
     output of the solver.
 
     :arg function_space: The :class:`FunctionSpace` to restrict.
+    :kwarg boundary_set: A set of subdomains on which a DirichletBC will be applied.
     :kwarg name: An optional name for this :class:`RestrictedFunctionSpace`,
         useful for later identification.
-    :kwarg boundary_set: A set of subdomains on which a DirichletBC will be
-        applied.
 
     Notes
     -----
     If using this class to solve or similar, a list of DirichletBCs will still
     need to be specified on this space and passed into the function.
     """
-    def __init__(self, function_space, name=None, boundary_set=frozenset()):
+    def __init__(self, function_space, boundary_set=frozenset(), name=None):
         label = ""
         for boundary_domain in boundary_set:
             label += str(boundary_domain)
@@ -884,8 +888,7 @@ def __init__(self, function_space, name=None, boundary_set=frozenset()):
                                                      label=self._label)
         self.function_space = function_space
         self.name = name or (function_space.name or "Restricted" + "_"
-                             + "_".join(sorted(
-                                        [str(i) for i in self.boundary_set])))
+                             + "_".join(sorted(map(str, self.boundary_set))))
 
     def set_shared_data(self):
         sdata = get_shared_data(self._mesh, self.ufl_element(), self.boundary_set)
diff --git a/firedrake/interpolation.py b/firedrake/interpolation.py
index 1a26285904..e8555ba7dd 100644
--- a/firedrake/interpolation.py
+++ b/firedrake/interpolation.py
@@ -905,6 +905,8 @@ def make_interpolator(expr, V, subset, access, bcs=None):
     elif len(arguments) == 1:
         if isinstance(V, firedrake.Function):
             raise ValueError("Cannot interpolate an expression with an argument into a Function")
+        if len(V) > 1:
+            raise NotImplementedError("Interpolation of mixed expressions with arguments is not supported")
         argfs = arguments[0].function_space()
         source_mesh = argfs.mesh()
         argfs_map = argfs.cell_node_map()
@@ -992,10 +994,29 @@ def callable():
         if numpy.prod(expr.ufl_shape, dtype=int) != V.value_size:
             raise RuntimeError('Expression of length %d required, got length %d'
                                % (V.value_size, numpy.prod(expr.ufl_shape, dtype=int)))
-        if len(V) > 1:
-            raise NotImplementedError(
-                "UFL expressions for mixed functions are not yet supported.")
-        loops.extend(_interpolator(V, tensor, expr, subset, arguments, access, bcs=bcs))
+
+        if len(V) == 1:
+            loops.extend(_interpolator(V, tensor, expr, subset, arguments, access, bcs=bcs))
+        else:
+            if (hasattr(expr, "subfunctions") and len(expr.subfunctions) == len(V)
+                    and all(sub_expr.ufl_shape == Vsub.value_shape for Vsub, sub_expr in zip(V, expr.subfunctions))):
+                # Use subfunctions if they match the target shapes
+                expressions = expr.subfunctions
+            else:
+                # Unflatten the expression into the shapes of the mixed components
+                offset = 0
+                expressions = []
+                for Vsub in V:
+                    if len(Vsub.value_shape) == 0:
+                        expressions.append(expr[offset])
+                    else:
+                        components = [expr[offset + j] for j in range(Vsub.value_size)]
+                        expressions.append(ufl.as_tensor(numpy.reshape(components, Vsub.value_shape)))
+                    offset += Vsub.value_size
+            # Interpolate each sub expression into each function space
+            for Vsub, sub_tensor, sub_expr in zip(V, tensor, expressions):
+                loops.extend(_interpolator(Vsub, sub_tensor, sub_expr, subset, arguments, access, bcs=bcs))
+
         if bcs and len(arguments) == 0:
             loops.extend(partial(bc.apply, f) for bc in bcs)
 
diff --git a/firedrake/mesh.py b/firedrake/mesh.py
index 06492165ac..7d92c7ecfe 100644
--- a/firedrake/mesh.py
+++ b/firedrake/mesh.py
@@ -1966,7 +1966,7 @@ def _renumber_entities(self, reorder):
         if reorder:
             swarm = self.topology_dm
             parent = self._parent_mesh.topology_dm
-            swarm_parent_cell_nums = swarm.getField("DMSwarm_cellid")
+            swarm_parent_cell_nums = swarm.getField("DMSwarm_cellid").ravel()
             parent_renum = self._parent_mesh._dm_renumbering.getIndices()
             pStart, _ = parent.getChart()
             parent_renum_inv = np.empty_like(parent_renum)
@@ -2063,7 +2063,7 @@ def cell_parent_cell_list(self):
         """Return a list of parent mesh cells numbers in vertex only
         mesh cell order.
         """
-        cell_parent_cell_list = np.copy(self.topology_dm.getField("parentcellnum"))
+        cell_parent_cell_list = np.copy(self.topology_dm.getField("parentcellnum").ravel())
         self.topology_dm.restoreField("parentcellnum")
         return cell_parent_cell_list[self.cell_closure[:, -1]]
 
@@ -2082,7 +2082,7 @@ def cell_parent_base_cell_list(self):
         """
         if not isinstance(self._parent_mesh, ExtrudedMeshTopology):
             raise AttributeError("Parent mesh is not extruded")
-        cell_parent_base_cell_list = np.copy(self.topology_dm.getField("parentcellbasenum"))
+        cell_parent_base_cell_list = np.copy(self.topology_dm.getField("parentcellbasenum").ravel())
         self.topology_dm.restoreField("parentcellbasenum")
         return cell_parent_base_cell_list[self.cell_closure[:, -1]]
 
@@ -2103,7 +2103,7 @@ def cell_parent_extrusion_height_list(self):
         """
         if not isinstance(self._parent_mesh, ExtrudedMeshTopology):
             raise AttributeError("Parent mesh is not extruded.")
-        cell_parent_extrusion_height_list = np.copy(self.topology_dm.getField("parentcellextrusionheight"))
+        cell_parent_extrusion_height_list = np.copy(self.topology_dm.getField("parentcellextrusionheight").ravel())
         self.topology_dm.restoreField("parentcellextrusionheight")
         return cell_parent_extrusion_height_list[self.cell_closure[:, -1]]
 
@@ -2123,7 +2123,7 @@ def mark_entities(self, tf, label_value, label_name=None):
     @utils.cached_property  # TODO: Recalculate if mesh moves
     def cell_global_index(self):
         """Return a list of unique cell IDs in vertex only mesh cell order."""
-        cell_global_index = np.copy(self.topology_dm.getField("globalindex"))
+        cell_global_index = np.copy(self.topology_dm.getField("globalindex").ravel())
         self.topology_dm.restoreField("globalindex")
         return cell_global_index
 
@@ -2158,8 +2158,8 @@ def _make_input_ordering_sf(swarm, nroots, ilocal):
         # ilocal = None -> leaves are swarm points [0, 1, 2, ...).
         # ilocal can also be Firedrake cell numbers.
         sf = PETSc.SF().create(comm=swarm.comm)
-        input_ranks = swarm.getField("inputrank")
-        input_indices = swarm.getField("inputindex")
+        input_ranks = swarm.getField("inputrank").ravel()
+        input_indices = swarm.getField("inputindex").ravel()
         nleaves = len(input_ranks)
         if ilocal is not None and nleaves != len(ilocal):
             swarm.restoreField("inputrank")
@@ -3652,7 +3652,7 @@ def _pic_swarm_in_mesh(
     # local_points[halo_indices] (it also updates local_points[~halo_indices]`, not changing any values there).
     # If some index of local_points_reduced corresponds to a missing point, local_points_reduced[index] is not updated
     # when we reduce and it does not update any leaf data, i.e., local_points, when we bcast.
-    owners = swarm.getField("DMSwarm_rank")
+    owners = swarm.getField("DMSwarm_rank").ravel()
     halo_indices, = np.where(owners != parent_mesh.comm.rank)
     halo_indices = halo_indices.astype(IntType)
     n = coords.shape[0]
@@ -3868,13 +3868,13 @@ def _dmswarm_create(
 
     # NOTE ensure that swarm.restoreField is called for each field too!
     swarm_coords = swarm.getField("DMSwarmPIC_coor").reshape((num_vertices, gdim))
-    swarm_parent_cell_nums = swarm.getField("DMSwarm_cellid")
-    field_parent_cell_nums = swarm.getField("parentcellnum")
+    swarm_parent_cell_nums = swarm.getField("DMSwarm_cellid").ravel()
+    field_parent_cell_nums = swarm.getField("parentcellnum").ravel()
     field_reference_coords = swarm.getField("refcoord").reshape((num_vertices, tdim))
-    field_global_index = swarm.getField("globalindex")
-    field_rank = swarm.getField("DMSwarm_rank")
-    field_input_rank = swarm.getField("inputrank")
-    field_input_index = swarm.getField("inputindex")
+    field_global_index = swarm.getField("globalindex").ravel()
+    field_rank = swarm.getField("DMSwarm_rank").ravel()
+    field_input_rank = swarm.getField("inputrank").ravel()
+    field_input_index = swarm.getField("inputindex").ravel()
     swarm_coords[...] = coords
     swarm_parent_cell_nums[...] = plex_parent_cell_nums
     field_parent_cell_nums[...] = parent_cell_nums
@@ -3895,8 +3895,8 @@ def _dmswarm_create(
     swarm.restoreField("DMSwarm_cellid")
 
     if extruded:
-        field_base_parent_cell_nums = swarm.getField("parentcellbasenum")
-        field_extrusion_heights = swarm.getField("parentcellextrusionheight")
+        field_base_parent_cell_nums = swarm.getField("parentcellbasenum").ravel()
+        field_extrusion_heights = swarm.getField("parentcellextrusionheight").ravel()
         field_base_parent_cell_nums[...] = base_parent_cell_nums
         field_extrusion_heights[...] = extrusion_heights
         swarm.restoreField("parentcellbasenum")
diff --git a/firedrake/preconditioners/__init__.py b/firedrake/preconditioners/__init__.py
index cd75ae7380..491a73657b 100644
--- a/firedrake/preconditioners/__init__.py
+++ b/firedrake/preconditioners/__init__.py
@@ -12,3 +12,4 @@
 from firedrake.preconditioners.fdm import *          # noqa: F401
 from firedrake.preconditioners.hiptmair import *     # noqa: F401
 from firedrake.preconditioners.facet_split import *  # noqa: F401
+from firedrake.preconditioners.bddc import *         # noqa: F401
diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py
new file mode 100644
index 0000000000..5fb1fca51a
--- /dev/null
+++ b/firedrake/preconditioners/bddc.py
@@ -0,0 +1,142 @@
+from firedrake.preconditioners.base import PCBase
+from firedrake.preconditioners.patch import bcdofs
+from firedrake.preconditioners.facet_split import restrict, get_restriction_indices
+from firedrake.petsc import PETSc
+from firedrake.dmhooks import get_function_space, get_appctx
+from firedrake.ufl_expr import TestFunction, TrialFunction
+from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace
+from ufl import curl, div, HCurl, HDiv, inner, dx
+from pyop2.utils import as_tuple
+import numpy
+
+__all__ = ("BDDCPC",)
+
+
+class BDDCPC(PCBase):
+    """PC for PETSc PCBDDC (Balancing Domain Decomposition by Constraints).
+    This is a domain decomposition method using subdomains defined by the
+    blocks in a Mat of type IS.
+
+    Internally, this PC creates a PETSc PCBDDC object that can be controlled by
+    the options:
+    - ``'bddc_pc_bddc_neumann'`` to set sub-KSPs on subdomains excluding corners,
+    - ``'bddc_pc_bddc_dirichlet'`` to set sub-KSPs on subdomain interiors,
+    - ``'bddc_pc_bddc_coarse'`` to set the coarse solver KSP.
+
+    This PC also inspects optional arguments supplied in the application context:
+    - ``'discrete_gradient'`` for problems in H(curl), this sets the arguments
+    (a Mat tabulating the gradient of the auxiliary H1 space) and
+    keyword arguments supplied to ``PETSc.PC.setBDDCDiscreteGradient``.
+    - ``'divergence_mat'`` for 3D problems in H(div), this sets the Mat with the
+    assembled bilinear form testing the divergence against an L2 space.
+
+    Notes
+    -----
+    Currently the Mat type IS is only supported by FDMPC.
+
+    """
+
+    _prefix = "bddc_"
+
+    def initialize(self, pc):
+        # Get context from pc
+        _, P = pc.getOperators()
+        dm = pc.getDM()
+        self.prefix = pc.getOptionsPrefix() + self._prefix
+
+        V = get_function_space(dm)
+
+        # Create new PC object as BDDC type
+        bddcpc = PETSc.PC().create(comm=pc.comm)
+        bddcpc.incrementTabLevel(1, parent=pc)
+        bddcpc.setOptionsPrefix(self.prefix)
+        bddcpc.setOperators(*pc.getOperators())
+        bddcpc.setType(PETSc.PC.Type.BDDC)
+
+        opts = PETSc.Options(bddcpc.getOptionsPrefix())
+        if V.ufl_element().variant() == "fdm" and "pc_bddc_use_local_mat_graph" not in opts:
+            # Disable computation of disconected components of subdomain interfaces
+            opts["pc_bddc_use_local_mat_graph"] = False
+
+        ctx = get_appctx(dm)
+        bcs = tuple(ctx._problem.bcs)
+        if V.extruded:
+            boundary_nodes = numpy.unique(numpy.concatenate(list(map(V.boundary_nodes, ("on_boundary", "top", "bottom")))))
+        else:
+            boundary_nodes = V.boundary_nodes("on_boundary")
+        if len(bcs) == 0:
+            dir_nodes = numpy.empty(0, dtype=boundary_nodes.dtype)
+        else:
+            dir_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs]))
+        neu_nodes = numpy.setdiff1d(boundary_nodes, dir_nodes)
+
+        V.dof_dset.lgmap.apply(dir_nodes, result=dir_nodes)
+        dir_bndr = PETSc.IS().createGeneral(dir_nodes, comm=pc.comm)
+        bddcpc.setBDDCDirichletBoundaries(dir_bndr)
+
+        V.dof_dset.lgmap.apply(neu_nodes, result=neu_nodes)
+        neu_bndr = PETSc.IS().createGeneral(neu_nodes, comm=pc.comm)
+        bddcpc.setBDDCNeumannBoundaries(neu_bndr)
+
+        appctx = self.get_appctx(pc)
+        sobolev_space = V.ufl_element().sobolev_space
+
+        tdim = V.mesh().topological_dimension()
+        degree = max(as_tuple(V.ufl_element().degree()))
+        if tdim >= 2 and V.finat_element.formdegree == tdim-1:
+            B = appctx.get("divergence_mat", None)
+            if B is None:
+                from firedrake.assemble import assemble
+                d = {HCurl: curl, HDiv: div}[sobolev_space]
+                if V.shape == ():
+                    make_function_space = FunctionSpace
+                elif len(V.shape) == 1:
+                    make_function_space = VectorFunctionSpace
+                else:
+                    make_function_space = TensorFunctionSpace
+                Q = make_function_space(V.mesh(), "DG", degree-1)
+                b = inner(d(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1))
+                B = assemble(b, mat_type="matfree")
+            bddcpc.setBDDCDivergenceMat(B.petscmat)
+        elif sobolev_space == HCurl:
+            gradient = appctx.get("discrete_gradient", None)
+            if gradient is None:
+                from firedrake.preconditioners.fdm import tabulate_exterior_derivative
+                from firedrake.preconditioners.hiptmair import curl_to_grad
+                Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element()))
+                gradient = tabulate_exterior_derivative(Q, V)
+                corners = get_vertex_dofs(Q)
+                gradient.compose('_elements_corners', corners)
+                grad_args = (gradient,)
+                grad_kwargs = {'order': degree}
+            else:
+                try:
+                    grad_args, grad_kwargs = gradient
+                except ValueError:
+                    grad_args = (gradient,)
+                    grad_kwargs = dict()
+
+            bddcpc.setBDDCDiscreteGradient(*grad_args, **grad_kwargs)
+
+        bddcpc.setFromOptions()
+        self.pc = bddcpc
+
+    def view(self, pc, viewer=None):
+        self.pc.view(viewer=viewer)
+
+    def update(self, pc):
+        pass
+
+    def apply(self, pc, x, y):
+        self.pc.apply(x, y)
+
+    def applyTranspose(self, pc, x, y):
+        self.pc.applyTranspose(x, y)
+
+
+def get_vertex_dofs(V):
+    W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), "vertex"))
+    indices = get_restriction_indices(V, W)
+    V.dof_dset.lgmap.apply(indices, result=indices)
+    vertex_dofs = PETSc.IS().createGeneral(indices, comm=V.comm)
+    return vertex_dofs
diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py
index 82e56057e7..1696801cb4 100644
--- a/firedrake/preconditioners/fdm.py
+++ b/firedrake/preconditioners/fdm.py
@@ -13,6 +13,7 @@
 from firedrake.functionspace import FunctionSpace, MixedFunctionSpace
 from firedrake.function import Function
 from firedrake.cofunction import Cofunction
+from firedrake.parloops import par_loop
 from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions
 from firedrake.utils import cached_property
 from firedrake_citations import Citations
@@ -63,6 +64,33 @@
 __all__ = ("FDMPC", "PoissonFDMPC")
 
 
+def broken_function(V, val):
+    W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element()))
+    w = Function(W, dtype=val.dtype)
+    v = Function(V, val=val)
+    domain = "{[i]: 0 <= i < v.dofs}"
+    instructions = """
+    for i
+        w[i] = v[i]
+    end
+    """
+    par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)})
+    return w
+
+
+def mask_local_indices(V, lgmap, repeated=False):
+    mask = lgmap.indices
+    if repeated:
+        w = broken_function(V, mask)
+        V = w.function_space()
+        mask = w.dat.data_ro_with_halos
+    indices = numpy.arange(len(mask), dtype=PETSc.IntType)
+    indices[mask == -1] = -1
+    indices_dat = V.make_dat(val=indices)
+    indices_acc = indices_dat(op2.READ, V.cell_node_map())
+    return indices_acc
+
+
 class FDMPC(PCBase):
     """
     A preconditioner for tensor-product elements that changes the shape
@@ -100,6 +128,12 @@ def initialize(self, pc):
         use_amat = options.getBool("pc_use_amat", True)
         use_static_condensation = options.getBool("static_condensation", False)
         pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ)
+        self.mat_type = pmat_type
+
+        allow_repeated = False
+        if pmat_type == "is":
+            allow_repeated = options.getBool("mat_is_allow_repeated", True)
+        self.allow_repeated = allow_repeated
 
         appctx = self.get_appctx(pc)
         fcp = appctx.get("form_compiler_parameters") or {}
@@ -124,14 +158,11 @@ def initialize(self, pc):
 
         # Transform the problem into the space with FDM shape functions
         V = J.arguments()[-1].function_space()
-        element = V.ufl_element()
-        e_fdm = element.reconstruct(variant=self._variant)
-
-        if element == e_fdm:
-            V_fdm, J_fdm, bcs_fdm = (V, J, bcs)
+        V_fdm = V.reconstruct(variant=self._variant)
+        if V == V_fdm:
+            J_fdm, bcs_fdm = (J, bcs)
         else:
             # Reconstruct Jacobian and bcs with variant element
-            V_fdm = FunctionSpace(V.mesh(), e_fdm)
             J_fdm = J(*(t.reconstruct(function_space=V_fdm) for t in J.arguments()))
             bcs_fdm = []
             for bc in bcs:
@@ -174,11 +205,10 @@ def initialize(self, pc):
         self.pc = fdmpc
 
         # Assemble the FDM preconditioner with sparse local matrices
+        self.V = V_fdm
         Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp,
                                                                    pmat_type, use_static_condensation, use_amat)
-        Pmat.setNullSpace(Amat.getNullSpace())
-        Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace())
-        Pmat.setNearNullSpace(Amat.getNearNullSpace())
+        self.assembly_callables.append(partial(Pmat.viewFromOptions, "-pmat_view", fdmpc))
         self._assemble_P()
 
         fdmpc.setOperators(A=Amat, P=Pmat)
@@ -232,10 +262,9 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
 
         # Create data structures needed for assembly
         self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V}
-        self.indices = {Vsub: op2.Dat(Vsub.dof_dset, self.lgmaps[Vsub].indices) for Vsub in V}
+        self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], self.allow_repeated) for Vsub in V}
         self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp)
         self.assemblers = {}
-        self.kernels = []
         Pmats = {}
 
         # Dictionary with kernel to compute the Schur complement
@@ -260,7 +289,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
                 Amat, Pmats = self.condense(Amat, J, bcs, fcp, pc_type=interior_pc_type)
 
         diagonal_terms = []
-        addv = PETSc.InsertMode.ADD_VALUES
         # Loop over all pairs of subspaces
         for Vrow, Vcol in product(V, V):
             if (Vrow, Vcol) in Pmats:
@@ -272,43 +300,33 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
 
             # Preallocate and assemble the FDM auxiliary sparse operator
             on_diag = Vrow == Vcol
-            sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol))
-            ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ
-
-            preallocator = PETSc.Mat().create(comm=self.comm)
-            preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
-            preallocator.setSizes(sizes)
-            preallocator.setUp()
-            preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
-            self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype)
-            preallocator.assemble()
-            dnz, onz = get_preallocation(preallocator, sizes[0][0])
-            if on_diag:
-                numpy.maximum(dnz, 1, out=dnz)
-            preallocator.destroy()
-
-            P = PETSc.Mat().create(comm=self.comm)
-            P.setType(ptype)
-            P.setSizes(sizes)
-            P.setPreallocationNNZ((dnz, onz))
-            P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False)
-            P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
-            P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
-            P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag)
-            if ptype.endswith("sbaij"):
-                P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
-            P.setUp()
+            P = self.setup_block(Vrow, Vcol)
+            addv = self.insert_mode[Vrow, Vcol]
+
+            assemble_sparsity = P.getType() == "is"
+            if assemble_sparsity:
+                self.set_values(P, Vrow, Vcol, mat_type="preallocator")
+                if on_diag:
+                    # populate diagonal entries
+                    i = numpy.arange(P.getLGMap()[0].getSize(), dtype=PETSc.IntType)[:, None]
+                    v = numpy.ones(i.shape, dtype=PETSc.ScalarType)
+                    P.setValuesLocalRCV(i, i, v, addv=addv)
+                P.assemble()
 
             # append callables to zero entries, insert element matrices, and apply BCs
             assembly_callables.append(P.zeroEntries)
-            assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=ptype))
+            assembly_callables.append(partial(self.set_values, P, Vrow, Vcol))
             if on_diag:
                 own = Vrow.dof_dset.layout_vec.getLocalSize()
                 bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None]
-                Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs)
-                if len(bdofs) > 0:
-                    vals = numpy.ones(bdofs.shape, dtype=PETSc.RealType)
-                    assembly_callables.append(partial(P.setValuesRCV, bdofs, bdofs, vals, addv))
+                if assemble_sparsity:
+                    Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs)
+                    assembly_callables.append(P.assemble)
+                    assembly_callables.append(partial(P.zeroRows, bdofs, 1.0))
+                else:
+                    v = numpy.ones(bdofs.shape, PETSc.ScalarType)
+                    assembly_callables.append(partial(P.setValuesLocalRCV, bdofs, bdofs, v, addv=addv))
+                    assembly_callables.append(P.assemble)
 
                 gamma = self.coefficients.get("facet")
                 if gamma is not None and gamma.function_space() == Vrow.dual():
@@ -316,11 +334,29 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
                         diagonal_terms.append(partial(P.setDiagonal, diag, addv=addv))
             Pmats[Vrow, Vcol] = P
 
+        def sub_nullspace(nsp, iset):
+            if not nsp.handle or iset is None:
+                return nsp
+            vectors = [vec.getSubVector(iset).copy() for vec in nsp.getVecs()]
+            for v in vectors:
+                v.normalize()
+            return PETSc.NullSpace().create(constant=nsp.hasConstant(),
+                                            vectors=vectors,
+                                            comm=nsp.getComm())
+
+        def set_nullspaces(P, A, iset=None):
+            P.setNullSpace(sub_nullspace(A.getNullSpace(), iset))
+            P.setTransposeNullSpace(sub_nullspace(A.getTransposeNullSpace(), iset))
+            P.setNearNullSpace(sub_nullspace(A.getNearNullSpace(), iset))
+
         if len(V) == 1:
             Pmat = Pmats[V, V]
+            set_nullspaces(Pmat, Amat)
         else:
             Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm)
-        assembly_callables.append(Pmat.assemble)
+            for Vrow, iset in zip(V, Pmat.getNestISs()[0]):
+                set_nullspaces(Pmats[Vrow, Vrow], Amat, iset=iset)
+            assembly_callables.append(Pmat.assemble)
         assembly_callables.extend(diagonal_terms)
         return Amat, Pmat, assembly_callables
 
@@ -418,10 +454,10 @@ def condense(self, A, J, bcs, fcp, pc_type="icc"):
         Pmats = dict(Smats)
         C0 = self.assemble_reference_tensor(V0)
         R0 = self.assemble_reference_tensor(V0, transpose=True)
+
         A0 = TripleProductKernel(R0, self._element_mass_matrix, C0)
         K0 = InteriorSolveKernel(A0, J00, fcp=fcp, pc_type=pc_type)
         K1 = ImplicitSchurComplementKernel(K0)
-        self.kernels.extend((A0, K0, K1))
         kernels = {V0: K0, V1: K1}
         comm = self.comm
         args = [self.coefficients["cell"], V0.mesh().coordinates, *J00.coefficients(), *extract_firedrake_constants(J00)]
@@ -495,9 +531,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=False):
         V = args_J[0].function_space()
         fe = V.finat_element
         formdegree = fe.formdegree
-        degree = fe.degree
-        if type(degree) != int:
-            degree, = set(degree)
+        degree, = set(as_tuple(fe.degree))
         qdeg = degree
         if formdegree == tdim:
             qfam = "DG" if tdim == 1 else "DQ"
@@ -545,6 +579,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=False):
         facet_integrals = [i for i in J.integrals() if "facet" in i.integral_type()]
         J_facet = expand_indices(expand_derivatives(ufl.Form(facet_integrals)))
         if len(J_facet.integrals()) > 0:
+            from firedrake.assemble import get_assembler
             gamma = coefficients.setdefault("facet", Function(V.dual()))
             assembly_callables.append(partial(get_assembler(J_facet, form_compiler_parameters=fcp, tensor=gamma, diagonal=True).assemble, tensor=gamma))
         return coefficients, assembly_callables
@@ -564,9 +599,7 @@ def assemble_reference_tensor(self, V, transpose=False, sort_interior=False):
         fe = V.finat_element
         tdim = fe.cell.get_spatial_dimension()
         formdegree = fe.formdegree
-        degree = fe.degree
-        if type(degree) != int:
-            degree, = set(degree)
+        degree, = set(as_tuple(fe.degree))
         if formdegree == tdim:
             degree = degree + 1
         is_interior, is_facet = is_restricted(fe)
@@ -663,8 +696,109 @@ def _element_mass_matrix(self):
             data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1))
         return PETSc.Mat().createAIJ((nrows, nrows), csr=(ai, aj, data), comm=COMM_SELF)
 
+    @cached_property
+    def _element_kernels(self):
+        M = self._element_mass_matrix
+        element_kernels = {}
+        for Vrow, Vcol in product(self.V, self.V):
+            # Interpolation of basis and exterior derivative onto broken spaces
+            C1 = self.assemble_reference_tensor(Vcol)
+            R1 = self.assemble_reference_tensor(Vrow, transpose=True)
+            # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b
+            element_kernel = TripleProductKernel(R1, M, C1)
+            schur_kernel = self.schur_kernel.get(Vrow) if Vrow == Vcol else None
+            if schur_kernel is not None:
+                V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior"))
+                C0 = self.assemble_reference_tensor(V0, sort_interior=True)
+                R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True)
+                element_kernel = schur_kernel(element_kernel,
+                                              TripleProductKernel(R1, M, C0),
+                                              TripleProductKernel(R0, M, C1),
+                                              TripleProductKernel(R0, M, C0))
+            element_kernels[Vrow, Vcol] = element_kernel
+        return element_kernels
+
+    @cached_property
+    def insert_mode(self):
+        is_dg = {}
+        for Vsub in self.V:
+            element = Vsub.finat_element
+            is_dg[Vsub] = element.entity_dofs() == element.entity_closure_dofs()
+
+        insert_mode = {}
+        for Vrow, Vcol in product(self.V, self.V):
+            addv = PETSc.InsertMode.ADD_VALUES
+            if is_dg[Vrow] or is_dg[Vcol]:
+                addv = PETSc.InsertMode.INSERT
+            insert_mode[Vrow, Vcol] = addv
+        return insert_mode
+
+    @cached_property
+    def assembly_lgmaps(self):
+        if self.mat_type != "is":
+            return {Vsub: Vsub.dof_dset.lgmap for Vsub in self.V}
+        lgmaps = {}
+        for Vsub in self.V:
+            lgmap = Vsub.dof_dset.lgmap
+            if self.allow_repeated:
+                indices = broken_function(Vsub, lgmap.indices).dat.data_ro
+            else:
+                indices = lgmap.indices.copy()
+                local_indices = numpy.arange(len(indices), dtype=PETSc.IntType)
+                cell_node_map = broken_function(Vsub, local_indices).dat.data_ro
+                ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map), assume_unique=True)
+                indices[ghost] = -1
+            lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm())
+        return lgmaps
+
+    def setup_block(self, Vrow, Vcol):
+        # Preallocate the auxiliary sparse operator
+        sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol))
+        rmap = self.assembly_lgmaps[Vrow]
+        cmap = self.assembly_lgmaps[Vcol]
+        on_diag = Vrow == Vcol
+        ptype = self.mat_type if on_diag else PETSc.Mat.Type.AIJ
+
+        preallocator = PETSc.Mat().create(comm=self.comm)
+        preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
+        preallocator.setSizes(sizes)
+        preallocator.setISAllowRepeated(self.allow_repeated)
+        preallocator.setLGMap(rmap, cmap)
+        preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
+        if ptype.endswith("sbaij"):
+            preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
+        preallocator.setUp()
+        self.set_values(preallocator, Vrow, Vcol)
+        preallocator.assemble()
+        dnz, onz = get_preallocation(preallocator, sizes[0][0])
+        if on_diag:
+            numpy.maximum(dnz, 1, out=dnz)
+        preallocator.destroy()
+        P = PETSc.Mat().create(comm=self.comm)
+        P.setType(ptype)
+        P.setSizes(sizes)
+        P.setISAllowRepeated(self.allow_repeated)
+        P.setLGMap(rmap, cmap)
+        if on_diag and ptype == "is" and self.allow_repeated:
+            bsize = Vrow.finat_element.space_dimension() * Vrow.value_size
+            local_mat = P.getISLocalMat()
+            nblocks = local_mat.getSize()[0] // bsize
+            local_mat.setVariableBlockSizes([bsize] * nblocks)
+        P.setPreallocationNNZ((dnz, onz))
+
+        if not (ptype.endswith("sbaij") or ptype == "is"):
+            P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
+        P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
+        P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag)
+        P.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, True)
+        P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True)
+        if ptype.endswith("sbaij"):
+            P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
+        P.setUp()
+        return P
+
     @PETSc.Log.EventDecorator("FDMSetValues")
-    def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
+    def set_values(self, A, Vrow, Vcol, mat_type=None):
         """Assemble the auxiliary operator in the FDM basis using sparse
         reference tensors and diagonal mass matrices.
 
@@ -676,17 +810,17 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
             The test space.
         Vcol : FunctionSpace
             The trial space.
-        addv : PETSc.Mat.InsertMode
-            Flag indicating if we want to insert or add matrix values.
-        mat_type : PETSc.Mat.Type
-            The matrix type of auxiliary operator. This only used when ``A`` is a preallocator
-            to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix.
         """
         key = (Vrow.ufl_element(), Vcol.ufl_element())
         on_diag = Vrow == Vcol
+        if mat_type is None:
+            mat_type = A.getType()
         try:
             assembler = self.assemblers[key]
         except KeyError:
+            addv = self.insert_mode[Vrow, Vcol]
+            spaces = (Vrow,) if on_diag else (Vrow, Vcol)
+            indices_acc = tuple(self.indices_acc[V] for V in spaces)
             M = self._element_mass_matrix
             # Interpolation of basis and exterior derivative onto broken spaces
             C1 = self.assemble_reference_tensor(Vcol)
@@ -702,24 +836,29 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
                                               TripleProductKernel(R1, M, C0),
                                               TripleProductKernel(R0, M, C1),
                                               TripleProductKernel(R0, M, C0))
-            self.kernels.append(element_kernel)
-            spaces = (Vrow, Vcol)[on_diag:]
-            indices_acc = tuple(self.indices[V](op2.READ, V.cell_node_map()) for V in spaces)
             coefficients = self.coefficients["cell"]
             coefficients_acc = coefficients.dat(op2.READ, coefficients.cell_node_map())
+
+            element_kernel = self._element_kernels[Vrow, Vcol]
             kernel = element_kernel.kernel(on_diag=on_diag, addv=addv)
             assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set,
                                     *element_kernel.make_args(A),
                                     coefficients_acc,
                                     *indices_acc)
             self.assemblers.setdefault(key, assembler)
-        if A.getType() == "preallocator":
-            # Determine the global sparsity pattern by inserting a constant sparse element matrix
-            args = assembler.arguments[:2]
-            kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag)
-            assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set,
-                                    *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args),
-                                    *indices_acc)
+        if mat_type == "preallocator":
+            key = key + ("preallocator",)
+            try:
+                assembler = self.assemblers[key]
+            except KeyError:
+                # Determine the global sparsity pattern by inserting a constant sparse element matrix
+                args = assembler.arguments[:2]
+                kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag, addv=addv)
+                assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set,
+                                        *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args),
+                                        *indices_acc)
+                self.assemblers.setdefault(key, assembler)
+
         assembler.arguments[0].data = A.handle
         assembler()
 
@@ -729,9 +868,8 @@ class ElementKernel:
     By default, it inserts the same matrix on each cell."""
     code = dedent("""
         PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) {
-            PetscFunctionBeginUser;
-            PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, A, name=None):
@@ -755,28 +893,23 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None):
                     PetscInt m;
                     const PetscInt *ai;
                     PetscScalar *vals;
-                    PetscFunctionBeginUser;
                     PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done));
                     PetscCall(MatSeqAIJGetArrayWrite(A, &vals));
                     PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals)));
                     PetscCall(MatSeqAIJRestoreArrayWrite(A, &vals));
                     PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done));
-                    PetscFunctionReturn(PETSC_SUCCESS);
+                    return PETSC_SUCCESS;
                 }""")
         if mat_type != "matfree":
-            select_cols = """
-        for (PetscInt j = ai[i]; j < ai[i + 1]; j++)
-            indices[j] -= (indices[j] < rindices[i]) * (indices[j] + 1);"""
             code += dedent("""
-                static inline PetscErrorCode MatSetValuesSparse(const Mat A, const Mat B,
-                                                                const PetscInt *restrict rindices,
-                                                                const PetscInt *restrict cindices,
-                                                                InsertMode addv) {
+                static inline PetscErrorCode MatSetValuesLocalSparse(const Mat A, const Mat B,
+                                                                     const PetscInt *restrict rindices,
+                                                                     const PetscInt *restrict cindices,
+                                                                     InsertMode addv) {
                     PetscBool done;
                     PetscInt m, ncols, istart, *indices;
                     const PetscInt *ai, *aj;
                     const PetscScalar *vals;
-                    PetscFunctionBeginUser;
                     PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done));
                     PetscCall(PetscMalloc1(ai[m], &indices));
                     for (PetscInt j = 0; j < ai[m]; j++) indices[j] = cindices[aj[j]];
@@ -784,14 +917,13 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None):
                     for (PetscInt i = 0; i < m; i++) {
                         istart = ai[i];
                         ncols = ai[i + 1] - istart;
-                        %(select_cols)s
-                        PetscCall(MatSetValues(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv));
+                        PetscCall(MatSetValuesLocal(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv));
                     }
                     PetscCall(MatSeqAIJRestoreArrayRead(B, &vals));
                     PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done));
                     PetscCall(PetscFree(indices));
-                    PetscFunctionReturn(PETSC_SUCCESS);
-                }""" % {"select_cols": select_cols if mat_type.endswith("sbaij") else ""})
+                    return PETSC_SUCCESS;
+                }""")
         code += self.code % dict(self.rules, name=self.name,
                                  indices=", ".join("const PetscInt *restrict %s" % s for s in indices),
                                  rows=indices[0], cols=indices[-1], addv=addv)
@@ -806,12 +938,11 @@ class TripleProductKernel(ElementKernel):
                                 const PetscScalar *restrict coefficients,
                                 %(indices)s) {
             Mat C;
-            PetscFunctionBeginUser;
             PetscCall(MatProductGetMats(B, NULL, &C, NULL));
             PetscCall(MatSetValuesArray(C, coefficients));
             PetscCall(MatProductNumeric(B));
-            PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, L, C, R, name=None):
@@ -828,12 +959,12 @@ class SchurComplementKernel(ElementKernel):
                                 const Mat A11, const Mat A10, const Mat A01, const Mat A00,
                                 const PetscScalar *restrict coefficients, %(indices)s) {
             Mat C;
-            PetscFunctionBeginUser;
             PetscCall(MatProductGetMats(A11, NULL, &C, NULL));
             PetscCall(MatSetValuesArray(C, coefficients));
             %(condense)s
-            PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            PetscCall(MatSetValuesLocalSparse(A, A11, %(rows)s, %(cols)s, %(addv)d));
+            PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, *kernels, name=None):
@@ -852,7 +983,9 @@ def __init__(self, *kernels, name=None):
             istart += k * kdofs
         self.blocks = sorted(degree for degree in self.slices if degree > 1)
 
-        super().__init__(self.condense(), name=name)
+        result = self.condense()
+        result.axpy(1.0, self.submats[0])
+        super().__init__(result, name=name)
         self.mats.extend(self.submats)
         self.rules["condense"] = self.condense_code
 
@@ -864,16 +997,15 @@ class SchurComplementPattern(SchurComplementKernel):
     """Kernel builder to pad with zeros the Schur complement sparsity pattern."""
     condense_code = dedent("""
         PetscCall(MatProductNumeric(A11));
-        PetscCall(MatAYPX(B, 0.0, A11, SUBSET_NONZERO_PATTERN));
+        PetscCall(MatZeroEntries(B));
         """)
 
     def condense(self, result=None):
         """Pad with zeros the statically condensed pattern"""
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         if result is None:
             _, A10, A01, A00 = self.submats
             result = A10.matMatMult(A00, A01, result=result)
-        result.aypx(0.0, self.submats[0], structure=structure)
+        result.zeroEntries()
         return result
 
 
@@ -898,18 +1030,15 @@ class SchurComplementDiagonal(SchurComplementKernel):
         PetscCall(MatSeqAIJRestoreArray(A00, &vals));
 
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         A11, A10, A01, A00 = self.submats
         self.work[0] = A00.getDiagonal(result=self.work[0])
         self.work[0].reciprocal()
         self.work[0].scale(-1)
         A01.diagonalScale(L=self.work[0])
         result = A10.matMult(A01, result=result)
-        result.axpy(1.0, A11, structure=structure)
         return result
 
 
@@ -923,7 +1052,6 @@ class SchurComplementBlockCholesky(SchurComplementKernel):
         const PetscInt *ai;
         PetscScalar *vals, *U;
         Mat X;
-        PetscFunctionBeginUser;
         PetscCall(MatProductNumeric(A11));
         PetscCall(MatProductNumeric(A01));
         PetscCall(MatProductNumeric(A00));
@@ -951,11 +1079,10 @@ class SchurComplementBlockCholesky(SchurComplementKernel):
         PetscCall(MatProductGetMats(B, &X, NULL, NULL));
         PetscCall(MatProductNumeric(X));
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAYPX(B, -1.0, A11, SUBSET_NONZERO_PATTERN));
+        PetscCall(MatScale(B, -1.0));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         # asssume that A10 = A01^T
         A11, _, A01, A00 = self.submats
         indptr, indices, R = A00.getValuesCSR()
@@ -976,7 +1103,7 @@ def condense(self, result=None):
         A00.assemble()
         self.work[0] = A00.matMult(A01, result=self.work[0])
         result = self.work[0].transposeMatMult(self.work[0], result=result)
-        result.aypx(-1.0, A11, structure=structure)
+        result.scale(-1.0)
         return result
 
 
@@ -990,7 +1117,6 @@ class SchurComplementBlockLU(SchurComplementKernel):
         const PetscInt *ai;
         PetscScalar *vals, *work, *L, *U;
         Mat X;
-        PetscFunctionBeginUser;
         PetscCall(MatProductNumeric(A11));
         PetscCall(MatProductNumeric(A10));
         PetscCall(MatProductNumeric(A01));
@@ -1049,13 +1175,11 @@ class SchurComplementBlockLU(SchurComplementKernel):
         PetscCall(MatSeqAIJRestoreArray(A00, &vals));
         PetscCall(PetscFree3(ipiv, perm, work));
 
-        // B = A11 - A10 * inv(L^T) * X
+        // B = - A10 * inv(L^T) * X
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         A11, A10, A01, A00 = self.submats
         indptr, indices, R = A00.getValuesCSR()
         Q = numpy.ones(R.shape, dtype=R.dtype)
@@ -1080,7 +1204,6 @@ def condense(self, result=None):
         A00.assemble()
         A00.scale(-1.0)
         result = A10.matMatMult(A00, self.work[0], result=result)
-        result.axpy(1.0, A11, structure=structure)
         return result
 
 
@@ -1093,7 +1216,6 @@ class SchurComplementBlockInverse(SchurComplementKernel):
         PetscInt m, irow, bsize, *ipiv;
         const PetscInt *ai;
         PetscScalar *vals, *work, *ainv, swork;
-        PetscFunctionBeginUser;
         PetscCall(MatProductNumeric(A11));
         PetscCall(MatProductNumeric(A10));
         PetscCall(MatProductNumeric(A01));
@@ -1129,11 +1251,9 @@ class SchurComplementBlockInverse(SchurComplementKernel):
 
         PetscCall(MatScale(A00, -1.0));
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         A11, A10, A01, A00 = self.submats
         indptr, indices, R = A00.getValuesCSR()
 
@@ -1152,7 +1272,6 @@ def condense(self, result=None):
         A00.assemble()
         A00.scale(-1.0)
         result = A10.matMatMult(A00, A01, result=result)
-        result.axpy(1.0, A11, structure=structure)
         return result
 
 
@@ -1216,7 +1335,6 @@ def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False):
             static PetscErrorCode %(prefix)s(Mat A, Vec X, Vec Y) {
                 PetscScalar **appctx, *y;
                 const PetscScalar *x;
-                PetscFunctionBeginUser;
                 PetscCall(MatShellGetContext(A, &appctx));
                 PetscCall(VecZeroEntries(Y));
                 PetscCall(VecGetArray(Y, &y));
@@ -1224,7 +1342,7 @@ def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False):
                 %(matmult_call)s
                 PetscCall(VecRestoreArrayRead(X, &x));
                 PetscCall(VecRestoreArray(Y, &y));
-                PetscFunctionReturn(PETSC_SUCCESS);
+                return PETSC_SUCCESS;
             }""" % {"prefix": prefix, "matmult_call": matmult_call("x", "y")})
     return matmult_struct, matmult_call, ctx_struct, ctx_pack
 
@@ -1243,7 +1361,6 @@ class InteriorSolveKernel(ElementKernel):
             PetscInt m;
             Mat A, B, C;
             Vec X, Y;
-            PetscFunctionBeginUser;
             PetscCall(KSPGetOperators(ksp, &A, &B));
             PetscCall(MatShellSetContext(A, &appctx));
             PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior));
@@ -1256,7 +1373,7 @@ class InteriorSolveKernel(ElementKernel):
             PetscCall(KSPSolve(ksp, Y, X));
             PetscCall(VecDestroy(&X));
             PetscCall(VecDestroy(&Y));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, kernel, form, name=None, prefix="interior_", fcp=None, pc_type="icc"):
@@ -1310,7 +1427,6 @@ class ImplicitSchurComplementKernel(ElementKernel):
             PetscInt i;
             Mat A, B, C;
             Vec X, Y;
-            PetscFunctionBeginUser;
             PetscCall(KSPGetOperators(ksp, &A, &B));
             PetscCall(MatShellSetContext(A, &appctx));
             PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior));
@@ -1337,7 +1453,7 @@ class ImplicitSchurComplementKernel(ElementKernel):
             for (i = 0; i < %(size)d; i++) y[i] = 0.0;
             %(A_call)s
             for (i = 0; i < %(fsize)d; i++) yf[i] += y[fdofs[i]];
-            PetscFunctionReturn(PETSC_SUCCESS);
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, kernel, name=None):
@@ -1747,10 +1863,8 @@ def load_setSubMatCSR(comm, triu=False):
                                   InsertMode addv)
             {{
                 PetscInt m, ncols, irow, icol;
-                PetscInt *indices;
-                const PetscInt *cols;
-                const PetscScalar *vals;
-                PetscFunctionBeginUser;
+                PetscInt *indices, *cols;
+                PetscScalar *vals;
                 PetscCall(MatGetSize(B, &m, NULL));
                 PetscCall(MatSeqAIJGetMaxRowNonzeros(B, &ncols));
                 PetscCall(PetscMalloc1(ncols, &indices));
@@ -1766,7 +1880,7 @@ def load_setSubMatCSR(comm, triu=False):
                     PetscCall(MatRestoreRow(B, i, &ncols, &cols, &vals));
                 }}
                 PetscCall(PetscFree(indices));
-                PetscFunctionReturn(PETSC_SUCCESS);
+                return PETSC_SUCCESS;
             }}
             """)
         argtypes = [ctypes.c_voidp, ctypes.c_voidp,
@@ -1836,7 +1950,7 @@ def assemble_reference_tensor(self, V):
         return Afdm, Dfdm, bdof, axes_shifts
 
     @PETSc.Log.EventDecorator("FDMSetValues")
-    def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
+    def set_values(self, A, Vrow, Vcol, mat_type=None):
         """Assemble the stiffness matrix in the FDM basis using Kronecker
         products of interval matrices.
 
@@ -1848,16 +1962,17 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
             The test space.
         Vcol : FunctionSpace
             The trial space.
-        addv : PETSc.Mat.InsertMode
-            Flag indicating if we want to insert or add matrix values.
         mat_type : PETSc.Mat.Type
             The matrix type of auxiliary operator. This only used when ``A`` is a preallocator
             to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix.
         """
+        if mat_type is None:
+            mat_type = A.getType()
         triu = A.getType() == "preallocator" and mat_type.endswith("sbaij")
         set_submat = SparseAssembler.setSubMatCSR(COMM_SELF, triu=triu)
         update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv)
         condense_element_mat = lambda x: x
+        addv = PETSc.InsertMode.ADD_VALUES
 
         def cell_to_global(lgmap, cell_to_local, cell_index, result=None):
             # Be careful not to create new arrays
@@ -2083,7 +2198,7 @@ def assemble_coefficients(self, J, fcp):
         tdim = mesh.topological_dimension()
         Finv = ufl.JacobianInverse(mesh)
 
-        degree = max(as_tuple(V.ufl_element().degree()))
+        degree, = set(as_tuple(V.ufl_element().degree()))
         quad_deg = fcp.get("degree", 2*degree+1)
         dx = ufl.dx(degree=quad_deg, domain=mesh)
         family = "Discontinuous Lagrange" if tdim == 1 else "DQ"
diff --git a/firedrake/scripts/firedrake-zenodo b/firedrake/scripts/firedrake-zenodo
index 6194d9df1d..d609a81326 100755
--- a/firedrake/scripts/firedrake-zenodo
+++ b/firedrake/scripts/firedrake-zenodo
@@ -16,35 +16,35 @@ import datetime
 
 # Change this to https://sandbox.zenodo.org/api for testing
 ZENODO_URL = "https://zenodo.org/api"
-# TODO: Remove "petsc4py" once all users have switched to "petsc/src/binding/petsc4py".
-# And the same for slepc4py.
 descriptions = OrderedDict([
     ("firedrake", "an automated finite element system"),
-    ("tsfc", "The Two Stage Form Compiler"),
     ("ufl", "The Unified Form Language"),
-    ("FInAT", "a smarter library of finite elements"),
     ("fiat", "The Finite Element Automated Tabulator"),
     ("petsc", "Portable, Extensible Toolkit for Scientific Computation"),
-    ("petsc4py", "The Python interface to PETSc"),
     ("loopy", "Transformation-Based Generation of High-Performance CPU/GPU Code"),
     ("slepc", "Scalable Library for Eigenvalue Problem Computations"),
-    ("slepc4py", "The Python interface to SLEPc")])
-
-projects = dict(
-    [("firedrake", "firedrakeproject"),
-     ("tsfc", "firedrakeproject"),
-     ("ufl", "firedrakeproject"),
-     ("FInAT", "FInAT"),
-     ("fiat", "firedrakeproject"),
-     ("petsc", "firedrakeproject"),
-     ("petsc4py", "firedrakeproject"),
-     ("loopy", "firedrakeproject"),
-     ("slepc", "firedrakeproject"),
-     ("slepc4py", "firedrakeproject")])
+    # removed components, left so old Firedrake versions are archivable
+    ("PyOP2", "Framework for performance-portable parallel computations on unstructured meshes"),
+    ("tsfc", "The Two Stage Form Compiler"),
+    ("FInAT", "a smarter library of finite elements"),
+])
+
+projects = dict([
+    ("firedrake", "firedrakeproject"),
+    ("ufl", "firedrakeproject"),
+    ("fiat", "firedrakeproject"),
+    ("petsc", "firedrakeproject"),
+    ("loopy", "firedrakeproject"),
+    ("slepc", "firedrakeproject"),
+    # removed components, left so old Firedrake versions are archivable
+    ("PyOP2", "OP2"),
+    ("tsfc", "firedrakeproject"),
+    ("FInAT", "FInAT"),
+])
 
 components = list(descriptions.keys())
 
-optional_components = ("slepc", "slepc4py", "petsc4py")
+optional_components = ("slepc", "PyOP2", "tsfc", "FInAT")
 
 parser = ArgumentParser(description="""Create Zenodo DOIs for specific versions of Firedrake components.
 
diff --git a/firedrake/variational_solver.py b/firedrake/variational_solver.py
index 609d599800..4a1ac396c5 100644
--- a/firedrake/variational_solver.py
+++ b/firedrake/variational_solver.py
@@ -10,9 +10,8 @@
     DEFAULT_SNES_PARAMETERS
 )
 from firedrake.function import Function
-from firedrake.functionspace import RestrictedFunctionSpace
 from firedrake.ufl_expr import TrialFunction, TestFunction
-from firedrake.bcs import DirichletBC, EquationBC
+from firedrake.bcs import DirichletBC, EquationBC, extract_subdomain_ids, restricted_function_space
 from firedrake.adjoint_utils import NonlinearVariationalProblemMixin, NonlinearVariationalSolverMixin
 from ufl import replace
 
@@ -88,19 +87,17 @@ def __init__(self, F, u, bcs=None, J=None,
         self.restrict = restrict
 
         if restrict and bcs:
-            V_res = RestrictedFunctionSpace(V, boundary_set=set([bc.sub_domain for bc in bcs]))
-            bcs = [DirichletBC(V_res, bc.function_arg, bc.sub_domain) for bc in bcs]
+            V_res = restricted_function_space(V, extract_subdomain_ids(bcs))
+            bcs = [bc.reconstruct(V=V_res, indices=bc._indices) for bc in bcs]
             self.u_restrict = Function(V_res).interpolate(u)
             v_res, u_res = TestFunction(V_res), TrialFunction(V_res)
             F_arg, = F.arguments()
-            replace_dict = {F_arg: v_res}
-            replace_dict[self.u] = self.u_restrict
-            self.F = replace(F, replace_dict)
+            self.F = replace(F, {F_arg: v_res, self.u: self.u_restrict})
             v_arg, u_arg = self.J.arguments()
-            self.J = replace(self.J, {v_arg: v_res, u_arg: u_res})
+            self.J = replace(self.J, {v_arg: v_res, u_arg: u_res, self.u: self.u_restrict})
             if self.Jp:
                 v_arg, u_arg = self.Jp.arguments()
-                self.Jp = replace(self.Jp, {v_arg: v_res, u_arg: u_res})
+                self.Jp = replace(self.Jp, {v_arg: v_res, u_arg: u_res, self.u: self.u_restrict})
             self.restricted_space = V_res
         else:
             self.u_restrict = u
diff --git a/pyop2/codegen/loopycompat.py b/pyop2/codegen/loopycompat.py
index ae3d5feffa..ed46039762 100644
--- a/pyop2/codegen/loopycompat.py
+++ b/pyop2/codegen/loopycompat.py
@@ -52,7 +52,7 @@ def map_subscript(self, expr):
             flattened_index -= (int(stride) * ind)
             new_indices.append(simplify_via_aff(ind))
 
-        return expr.aggregate.index(tuple(new_indices))
+        return expr.aggregate[tuple(new_indices)]
 
 
 def _match_caller_callee_argument_dimension_for_single_kernel(
diff --git a/pyop2/codegen/rep2loopy.py b/pyop2/codegen/rep2loopy.py
index f850411073..c12caa30fa 100644
--- a/pyop2/codegen/rep2loopy.py
+++ b/pyop2/codegen/rep2loopy.py
@@ -1,5 +1,6 @@
 import ctypes
 import numpy
+from dataclasses import dataclass
 
 import loopy
 from loopy.symbolic import SubArrayRef
@@ -113,10 +114,12 @@ def __init__(self, name=None, arg_id_to_dtype=None,
                  arg_id_to_descr=None, name_in_target=None):
         if name is not None:
             assert name == self.name
+
+        name_in_target = name_in_target if name_in_target else self.name
         super(LACallable, self).__init__(self.name,
                                          arg_id_to_dtype=arg_id_to_dtype,
-                                         arg_id_to_descr=arg_id_to_descr)
-        self.name_in_target = name_in_target if name_in_target else self.name
+                                         arg_id_to_descr=arg_id_to_descr,
+                                         name_in_target=name_in_target)
 
     @abc.abstractproperty
     def name(self):
@@ -205,16 +208,18 @@ def __call__(self, preamble_info):
         yield ("0", self.preamble)
 
 
+@dataclass(frozen=True, init=False)
 class PyOP2KernelCallable(loopy.ScalarCallable):
     """Handles PyOP2 Kernel passed in as a string
     """
 
-    fields = set(["name", "parameters", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target"])
     init_arg_names = ("name", "parameters", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target")
 
+    parameters: tuple
+
     def __init__(self, name, parameters, arg_id_to_dtype=None, arg_id_to_descr=None, name_in_target=None):
-        super(PyOP2KernelCallable, self).__init__(name, arg_id_to_dtype, arg_id_to_descr, name_in_target)
-        self.parameters = parameters
+        super().__init__(name, arg_id_to_dtype, arg_id_to_descr, name_in_target)
+        object.__setattr__(self, "parameters", tuple(parameters))
 
     def with_types(self, arg_id_to_dtype, callables_table):
         new_arg_id_to_dtype = arg_id_to_dtype.copy()
@@ -288,8 +293,8 @@ def runtime_indices(expressions):
     for node in traversal(expressions):
         if isinstance(node, RuntimeIndex):
             indices.append(node.name)
-
-    return frozenset(indices)
+    # use a dict as an ordered set
+    return {i: None for i in indices}
 
 
 def imperatives(exprs):
@@ -325,7 +330,7 @@ def loop_nesting(instructions, deps, outer_inames, kernel_name):
 
     # boost inames, if one instruction is inside inner inames (free indices),
     # it should be inside the outer inames as dictated by other instructions.
-    index_nesting = defaultdict(frozenset)  # free index -> {runtime indices}
+    index_nesting = defaultdict(dict)  # free index -> {runtime indices}
     for insn in instructions:
         if isinstance(insn, When):
             key = insn.children[1]
@@ -338,7 +343,7 @@ def loop_nesting(instructions, deps, outer_inames, kernel_name):
     for insn in imperatives(instructions):
         outer = reduce(operator.or_,
                        iter(index_nesting[fi] for fi in traversal([insn]) if isinstance(fi, Index)),
-                       frozenset())
+                       {})
         nesting[insn] = nesting[insn] | outer
 
     return nesting
@@ -407,11 +412,10 @@ def generate(builder, wrapper_name=None):
     Materialise._count = itertools.count()
     RuntimeIndex._count = itertools.count()
 
+    # use a dict as an ordered set
+    outer_inames = {builder._loop_index.name: None}
     if builder.layer_index is not None:
-        outer_inames = frozenset([builder._loop_index.name,
-                                  builder.layer_index.name])
-    else:
-        outer_inames = frozenset([builder._loop_index.name])
+        outer_inames.update({builder.layer_index.name: None})
 
     instructions = list(builder.emit_instructions())
 
@@ -476,10 +480,17 @@ def renamer(expr):
     deps = instruction_dependencies(instructions, mapper.initialisers)
     within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name)
 
+    # used to avoid disadvantageous loop interchanges
+    loop_priorities = set()
+    for iname_nest in within_inames.values():
+        if len(iname_nest) > 1:
+            loop_priorities.add(tuple(iname_nest.keys()))
+    loop_priorities = frozenset(loop_priorities)
+
     # generate loopy
     context = Bag()
     context.parameters = parameters
-    context.within_inames = within_inames
+    context.within_inames = {k: frozenset(v.keys()) for k, v in within_inames.items()}
     context.conditions = []
     context.index_ordering = []
     context.instruction_dependencies = deps
@@ -544,11 +555,8 @@ def renamer(expr):
                                 options=options,
                                 assumptions=assumptions,
                                 lang_version=(2018, 2),
-                                name=wrapper_name)
-
-    # prioritize loops
-    for indices in context.index_ordering:
-        wrapper = loopy.prioritize_loops(wrapper, indices)
+                                name=wrapper_name,
+                                loop_priority=loop_priorities)
 
     # register kernel
     kernel = builder.kernel
diff --git a/pyop2/compilation.py b/pyop2/compilation.py
index 76ccbb38a7..d28c945188 100644
--- a/pyop2/compilation.py
+++ b/pyop2/compilation.py
@@ -547,7 +547,7 @@ def check_source_hashes(compiler, jitmodule, extension, comm):
             output = Path(configuration["cache_dir"]).joinpath("mismatching-kernels")
             srcfile = output.joinpath(f"src-rank{icomm.rank}.{extension}")
             if icomm.rank == 0:
-                output.mkdir(exist_ok=True)
+                output.mkdir(parents=True, exist_ok=True)
             icomm.barrier()
             with open(srcfile, "w") as fh:
                 fh.write(jitmodule.code_to_compile)
diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py
index da82f6ecad..21e06ab043 100644
--- a/pyop2/local_kernel.py
+++ b/pyop2/local_kernel.py
@@ -120,13 +120,11 @@ def cache_key(self):
     def _immutable_cache_key(self):
         # We need this function because self.accesses is mutable due to legacy support
         if isinstance(self.code, lp.TranslationUnit):
-            key_hash = hashlib.sha256()
-            self.code.update_persistent_hash(key_hash, LoopyKeyBuilder())
-            code = key_hash.hexdigest()
+            code_key = LoopyKeyBuilder()(self.code)
         else:
-            code = self.code
+            code_key = self.code
 
-        key = (code, self.name, self.cpp, self.flop_count,
+        key = (code_key, self.name, self.cpp, self.flop_count,
                self.headers, self.include_dirs, self.ldargs, sorted(self.opts.items()),
                self.requires_zeroed_output_arguments, self.user_code, version.__version__)
         return hashlib.md5(str(key).encode()).hexdigest()
diff --git a/pyop2/types/dat.py b/pyop2/types/dat.py
index fb877c1a88..f41ee3d5b9 100644
--- a/pyop2/types/dat.py
+++ b/pyop2/types/dat.py
@@ -3,6 +3,7 @@
 import ctypes
 import itertools
 import operator
+from collections.abc import Sequence
 
 import loopy as lp
 import numpy as np
@@ -358,14 +359,14 @@ def _op_kernel(self, op, globalp, dtype):
         _self = p.Variable("self")
         _ret = p.Variable("ret")
         i = p.Variable("i")
-        lhs = _ret.index(i)
+        lhs = _ret[i]
         if globalp:
-            rhs = _other.index(0)
+            rhs = _other[0]
             rshape = (1, )
         else:
-            rhs = _other.index(i)
+            rhs = _other[i]
             rshape = (self.cdim, )
-        insn = lp.Assignment(lhs, op(_self.index(i), rhs), within_inames=frozenset(["i"]))
+        insn = lp.Assignment(lhs, op(_self[i], rhs), within_inames=frozenset(["i"]))
         data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)),
                 lp.GlobalArg("other", dtype=dtype, shape=rshape),
                 lp.GlobalArg("ret", dtype=self.dtype, shape=(self.cdim,))]
@@ -405,15 +406,15 @@ def _iop_kernel(self, op, globalp, other_is_self, dtype):
         _other = p.Variable("other")
         _self = p.Variable("self")
         i = p.Variable("i")
-        lhs = _self.index(i)
+        lhs = _self[i]
         rshape = (self.cdim, )
         if globalp:
-            rhs = _other.index(0)
+            rhs = _other[0]
             rshape = (1, )
         elif other_is_self:
-            rhs = _self.index(i)
+            rhs = _self[i]
         else:
-            rhs = _other.index(i)
+            rhs = _other[i]
         insn = lp.Assignment(lhs, op(lhs, rhs), within_inames=frozenset(["i"]))
         data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))]
         if not other_is_self:
@@ -492,6 +493,41 @@ def norm(self):
         from math import sqrt
         return sqrt(self.inner(self).real)
 
+    def maxpy(self, scalar: Sequence, x: Sequence) -> None:
+        """Compute a sequence of axpy operations.
+
+        This is equivalent to calling :meth:`axpy` for each pair of
+        scalars and :class:`Dat` in the input sequences.
+
+        Parameters
+        ----------
+        scalar :
+            A sequence of scalars.
+        x :
+            A sequence of :class:`Dat`.
+
+        """
+        if len(scalar) != len(x):
+            raise ValueError("scalar and x must have the same length")
+        for alpha_i, x_i in zip(scalar, x):
+            self.axpy(alpha_i, x_i)
+
+    def axpy(self, alpha: float, other: 'Dat') -> None:
+        """Compute the operation :math:`y = \\alpha x + y`.
+
+        In this case, ``self`` is ``y`` and ``other`` is ``x``.
+
+        """
+        self._check_shape(other)
+        if isinstance(other._data, np.ndarray):
+            if not np.isscalar(alpha):
+                raise TypeError("alpha must be a scalar")
+            np.add(
+                alpha * other.data_ro, self.data_ro,
+                out=self.data_wo)
+        else:
+            raise NotImplementedError("Not implemented for GPU")
+
     def __pos__(self):
         pos = Dat(self)
         return pos
@@ -518,7 +554,7 @@ def _neg_kernel(self):
         lvalue = p.Variable("other")
         rvalue = p.Variable("self")
         i = p.Variable("i")
-        insn = lp.Assignment(lvalue.index(i), -rvalue.index(i), within_inames=frozenset(["i"]))
+        insn = lp.Assignment(lvalue[i], -rvalue[i], within_inames=frozenset(["i"]))
         data = [lp.GlobalArg("other", dtype=self.dtype, shape=(self.cdim,)),
                 lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))]
         knl = lp.make_function([domain], [insn], data, name=name, target=conf.target, lang_version=(2018, 2))
@@ -1022,6 +1058,23 @@ def inner(self, other):
             ret += s.inner(o)
         return ret
 
+    def axpy(self, alpha: float, other: 'MixedDat') -> None:
+        """Compute the operation :math:`y = \\alpha x + y`.
+
+        In this case, ``self`` is ``y`` and ``other`` is ``x``.
+
+        """
+        self._check_shape(other)
+        for dat_result, dat_other in zip(self, other):
+            if isinstance(dat_result._data, np.ndarray):
+                if not np.isscalar(alpha):
+                    raise TypeError("alpha must be a scalar")
+                np.add(
+                    alpha * dat_other.data_ro, dat_result.data_ro,
+                    out=dat_result.data_wo)
+            else:
+                raise NotImplementedError("Not implemented for GPU")
+
     def _op(self, other, op):
         ret = []
         if np.isscalar(other):
diff --git a/pyop2/types/glob.py b/pyop2/types/glob.py
index d8ed991346..f895c91a5b 100644
--- a/pyop2/types/glob.py
+++ b/pyop2/types/glob.py
@@ -2,6 +2,7 @@
 import ctypes
 import operator
 import warnings
+from collections.abc import Sequence
 
 import numpy as np
 from petsc4py import PETSc
@@ -203,6 +204,38 @@ def inner(self, other):
         assert issubclass(type(other), type(self))
         return np.dot(self.data_ro, np.conj(other.data_ro))
 
+    def maxpy(self, scalar: Sequence, x: Sequence) -> None:
+        """Compute a sequence of axpy operations.
+
+        This is equivalent to calling :meth:`axpy` for each pair of
+        scalars and :class:`Dat` in the input sequences.
+
+        Parameters
+        ----------
+        scalar :
+            A sequence of scalars.
+        x :
+            A sequence of `Global`.
+
+        """
+        if len(scalar) != len(x):
+            raise ValueError("scalar and x must have the same length")
+        for alpha_i, x_i in zip(scalar, x):
+            self.axpy(alpha_i, x_i)
+
+    def axpy(self, alpha: float, other: 'Global') -> None:
+        """Compute the operation :math:`y = \\alpha x + y`.
+
+        In this case, ``self`` is ``y`` and ``other`` is ``x``.
+
+        """
+        if isinstance(self._data, np.ndarray):
+            if not np.isscalar(alpha):
+                raise ValueError("alpha must be a scalar")
+            np.add(alpha * other.data_ro, self.data_ro, out=self.data_wo)
+        else:
+            raise NotImplementedError("Not implemented for GPU")
+
 
 # must have comm, can be modified in parloop (implies a reduction)
 class Global(SetFreeDataCarrier, VecAccessMixin):
diff --git a/pyproject.toml b/pyproject.toml
index 69af99a483..fe15d8e7c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,23 +16,26 @@ requires-python = ">=3.10"
 dependencies = [
   "cachetools",
   "decorator<=4.4.2",
-  "mpi4py",
-  "h5py",
+  "mpi4py>3; python_version >= '3.13'",
+  "mpi4py; python_version < '3.13'",
+  # TODO: We are only using our fork here because the most recent PyPI release
+  # does not yet work with build isolation for Python 3.13. Once a version
+  # newer than 3.12.1 is released we can revert to simply using "h5py".
+  "h5py @ git+https://github.com/firedrakeproject/h5py.git ; python_version >= '3.13'",
+  "h5py; python_version < '3.13'",
   "petsc4py",
   "numpy",
   "packaging",
   "pkgconfig",
   "progress",
   "pycparser",
-  "pytools",
+  "pytools[siphash]",
   "requests",
   "rtree>=1.2",
   "scipy",
   "sympy",
   "fenics-ufl @ git+https://github.com/firedrakeproject/ufl.git",
   "fenics-fiat @ git+https://github.com/firedrakeproject/fiat.git",
-  "finat @ git+https://github.com/FInAT/FInAT.git",
-  "tsfc @ git+https://github.com/firedrakeproject/tsfc.git",
   "pyadjoint-ad @ git+https://github.com/dolfin-adjoint/pyadjoint.git",
   "loopy @ git+https://github.com/firedrakeproject/loopy.git@main",
 ]
@@ -91,7 +94,8 @@ requires = [
   "pybind11",
   "pkgconfig",
   "numpy",
-  "mpi4py",
+  "mpi4py>3; python_version >= '3.13'",
+  "mpi4py; python_version < '3.13'",
   "petsc4py",
   "rtree>=1.2",
 ]
diff --git a/requirements-git.txt b/requirements-git.txt
index a6369a694d..a762c70f36 100644
--- a/requirements-git.txt
+++ b/requirements-git.txt
@@ -1,7 +1,5 @@
 git+https://github.com/firedrakeproject/ufl.git#egg=fenics-ufl
 git+https://github.com/firedrakeproject/fiat.git#egg=fenics-fiat
-git+https://github.com/FInAT/FInAT.git#egg=finat
-git+https://github.com/firedrakeproject/tsfc.git#egg=tsfc
 git+https://github.com/dolfin-adjoint/pyadjoint.git#egg=pyadjoint-ad
 git+https://github.com/firedrakeproject/loopy.git@main#egg=loopy
 git+https://github.com/firedrakeproject/pytest-mpi.git@main#egg=pytest-mpi
diff --git a/scripts/firedrake-install b/scripts/firedrake-install
index a83b04a451..f4406a6ffb 100755
--- a/scripts/firedrake-install
+++ b/scripts/firedrake-install
@@ -742,7 +742,7 @@ def get_petsc_options(minimal=False):
         petsc_options.add("--download-suitesparse")
         petsc_options.add("--download-pastix")
         # Required by pastix
-        petsc_options.add("--download-hwloc")
+        petsc_options.update({"--download-hwloc", "--download-netlib-lapack", "--with-netlib-lapack-c-bindings"})
         if osname == "Darwin":
             petsc_options.add("--download-hwloc-configure-arguments=--disable-opencl")
         # Serial mesh partitioner
@@ -1517,6 +1517,7 @@ if mode == "install" or not args.update_script:
     if osname == "Darwin":
         package_manager = "brew"
         required_packages = ["gcc",
+                             "pkg-config",
                              "autoconf",
                              "make",
                              "automake",
@@ -1535,6 +1536,7 @@ if mode == "install" or not args.update_script:
                              "flex",   # for ptscotch
                              "cmake",
                              "gfortran",
+                             "pkg-config",
                              "git",
                              "libcurl4-openssl-dev",
                              "pkgconf",  # for p4est
@@ -1941,6 +1943,19 @@ else:
         if args.rebuild:
             pip_uninstall("petsc4py")
 
+    # Handle archived repositories
+    archived_repos = [("PyOP2", "firedrake", "PyOP2"),
+                      ("tsfc", "firedrake+fiat", "tsfc"),
+                      ("FInAT", "fiat", "FInAT")]
+    for src_repo, dest_repo, pip_pkg_name in archived_repos:
+        archived_path = os.path.join(firedrake_env, "src", src_repo)
+        if os.path.exists(archived_path):
+            log.warning("%s has been moved into %s, renaming to %s_old and pip uninstalling.\n"
+                        % (src_repo, dest_repo, src_repo))
+            pip_uninstall(pip_pkg_name)
+            new_path = os.path.join(firedrake_env, "src", "%s_old" % src_repo)
+            os.rename(archived_path, new_path)
+
     # If there is a petsc package to remove, then we're an old installation which will need to rebuild PETSc.
     update_petsc = pip_uninstall("petsc") or update_petsc
 
diff --git a/setup.py b/setup.py
index 36a5fe5082..cf970c1712 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,5 @@
-from dataclasses import dataclass, field
-from setuptools import setup, find_packages, Extension
-from glob import glob
-from pathlib import Path
-from Cython.Build import cythonize
 import os
+import platform
 import sys
 import site
 import numpy as np
@@ -11,6 +7,11 @@
 import petsc4py
 import rtree
 import pkgconfig
+from dataclasses import dataclass, field
+from setuptools import setup, find_packages, Extension
+from glob import glob
+from pathlib import Path
+from Cython.Build import cythonize
 
 # Define the compilers to use if not already set
 if "CC" not in os.environ:
@@ -87,7 +88,14 @@ def __getitem__(self, key):
 # Pybind11
 # example:
 # gcc -I/pyind11/include ...
-pybind11_ = ExternalDependency(include_dirs=[pybind11.get_include()])
+pybind11_extra_compile_args = []
+if platform.uname().system == "Darwin":
+    # Clang needs to specify at least C++11
+    pybind11_extra_compile_args.append("-std=c++11")
+pybind11_ = ExternalDependency(
+    include_dirs=[pybind11.get_include()],
+    extra_compile_args=pybind11_extra_compile_args,
+)
 
 # numpy
 # example:
@@ -225,7 +233,6 @@ def extensions():
     ## PYBIND11 EXTENSIONS
     pybind11_list = []
     # tinyasm/tinyasm.cpp: petsc, pybind11
-    # tinyasm/tinyasm.cpp: petsc, pybind11
     pybind11_list.append(Extension(
         name="tinyasm._tinyasm",
         language="c++",
diff --git a/tests/firedrake/regression/test_bddc.py b/tests/firedrake/regression/test_bddc.py
new file mode 100644
index 0000000000..f1f1630e2d
--- /dev/null
+++ b/tests/firedrake/regression/test_bddc.py
@@ -0,0 +1,130 @@
+import pytest
+from firedrake import *
+from firedrake.petsc import DEFAULT_DIRECT_SOLVER
+
+
+def bddc_params(static_condensation):
+    chol = {
+        "pc_type": "cholesky",
+        "pc_factor_mat_solver_type": "petsc",
+        "pc_factor_mat_ordering_type": "natural",
+    }
+    sp = {
+        "pc_type": "python",
+        "pc_python_type": "firedrake.BDDCPC",
+        "bddc_pc_bddc_neumann": chol,
+        "bddc_pc_bddc_dirichlet": chol,
+        "bddc_pc_bddc_coarse": DEFAULT_DIRECT_SOLVER,
+    }
+    return sp
+
+
+def solver_parameters(static_condensation=True):
+    rtol = 1E-8
+    atol = 1E-12
+    sp_bddc = bddc_params(static_condensation)
+    repeated = True
+    if static_condensation:
+        sp = {
+            "pc_type": "python",
+            "pc_python_type": "firedrake.FacetSplitPC",
+            "facet_pc_type": "python",
+            "facet_pc_python_type": "firedrake.FDMPC",
+            "facet_fdm_static_condensation": True,
+            "facet_fdm_pc_use_amat": False,
+            "facet_fdm_mat_type": "is",
+            "facet_fdm_mat_is_allow_repeated": repeated,
+            "facet_fdm_pc_type": "fieldsplit",
+            "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative",
+            "facet_fdm_pc_fieldsplit_diag_use_amat": False,
+            "facet_fdm_pc_fieldsplit_off_diag_use_amat": False,
+            "facet_fdm_fieldsplit_ksp_type": "preonly",
+            "facet_fdm_fieldsplit_0_pc_type": "jacobi",
+            "facet_fdm_fieldsplit_1": sp_bddc,
+        }
+    else:
+        sp = {
+            "pc_type": "python",
+            "pc_python_type": "firedrake.FDMPC",
+            "fdm_pc_use_amat": False,
+            "fdm_mat_type": "is",
+            "fdm_mat_is_allow_repeated": repeated,
+            "fdm": sp_bddc,
+        }
+    sp.update({
+        "mat_type": "matfree",
+        "ksp_type": "cg",
+        "ksp_norm_type": "natural",
+        "ksp_monitor": None,
+        "ksp_rtol": rtol,
+        "ksp_atol": atol,
+    })
+    return sp
+
+
+def solve_riesz_map(mesh, family, degree, bcs, condense):
+    dirichlet_ids = []
+    if bcs:
+        dirichlet_ids = ["on_boundary"]
+        if hasattr(mesh, "extruded") and mesh.extruded:
+            dirichlet_ids.extend(["bottom", "top"])
+
+    tdim = mesh.topological_dimension()
+    if family.endswith("E"):
+        family = "RTCE" if tdim == 2 else "NCE"
+    if family.endswith("F"):
+        family = "RTCF" if tdim == 2 else "NCF"
+    V = FunctionSpace(mesh, family, degree, variant="fdm")
+    v = TestFunction(V)
+    u = TrialFunction(V)
+    d = {
+        H1: grad,
+        HCurl: curl,
+        HDiv: div,
+    }[V.ufl_element().sobolev_space]
+
+    formdegree = V.finat_element.formdegree
+    if formdegree == 0:
+        a = inner(d(u), d(v)) * dx(degree=2*degree)
+    else:
+        a = (inner(u, v) + inner(d(u), d(v))) * dx(degree=2*degree)
+
+    rg = RandomGenerator(PCG64(seed=123456789))
+    u_exact = rg.uniform(V, -1, 1)
+    L = ufl.replace(a, {u: u_exact})
+    bcs = [DirichletBC(V, u_exact, sub) for sub in dirichlet_ids]
+    nsp = None
+    if formdegree == 0:
+        nsp = VectorSpaceBasis([Function(V).interpolate(Constant(1))])
+        nsp.orthonormalize()
+
+    uh = Function(V, name="solution")
+    problem = LinearVariationalProblem(a, L, uh, bcs=bcs)
+
+    sp = solver_parameters(condense)
+    solver = LinearVariationalSolver(problem, near_nullspace=nsp,
+                                     solver_parameters=sp,
+                                     options_prefix="")
+    solver.solve()
+    return solver.snes.getLinearSolveIterations()
+
+
+@pytest.fixture(params=(2, 3), ids=("square", "cube"))
+def mesh(request):
+    nx = 4
+    dim = request.param
+    msh = UnitSquareMesh(nx, nx, quadrilateral=True)
+    if dim == 3:
+        msh = ExtrudedMesh(msh, nx)
+    return msh
+
+
+@pytest.mark.parallel
+@pytest.mark.parametrize("family", "Q")
+@pytest.mark.parametrize("degree", (4,))
+@pytest.mark.parametrize("condense", (False, True))
+def test_bddc_fdm(mesh, family, degree, condense):
+    bcs = True
+    tdim = mesh.topological_dimension()
+    expected = 6 if tdim == 2 else 11
+    assert solve_riesz_map(mesh, family, degree, bcs, condense) <= expected
diff --git a/tests/firedrake/regression/test_dg_advection.py b/tests/firedrake/regression/test_dg_advection.py
index a655176c24..0559f2f159 100644
--- a/tests/firedrake/regression/test_dg_advection.py
+++ b/tests/firedrake/regression/test_dg_advection.py
@@ -38,8 +38,16 @@ def run_test(mesh):
     t = 0.0
     T = 10*dt
 
-    problem = LinearVariationalProblem(a_mass, action(arhs, D1), dD1)
-    solver = LinearVariationalSolver(problem, solver_parameters={'ksp_type': 'cg'})
+    problem = LinearVariationalProblem(a_mass, action(arhs, D1), dD1,
+                                       constant_jacobian=True)
+    solver = LinearVariationalSolver(
+        problem,
+        solver_parameters={
+            'ksp_type': 'preonly',
+            'pc_type': 'bjacobi',
+            'sub_pc_type': 'ilu'
+        }
+    )
 
     L2_0 = norm(D)
     Dbar_0 = assemble(D*dx)
diff --git a/tests/firedrake/regression/test_interpolate.py b/tests/firedrake/regression/test_interpolate.py
index 4a963aa36e..e8860a32e7 100644
--- a/tests/firedrake/regression/test_interpolate.py
+++ b/tests/firedrake/regression/test_interpolate.py
@@ -28,6 +28,47 @@ def test_function():
     assert np.allclose(g.dat.data, h.dat.data)
 
 
+def test_mixed_expression():
+    m = UnitTriangleMesh()
+    x = SpatialCoordinate(m)
+    V1 = FunctionSpace(m, 'P', 1)
+    V2 = FunctionSpace(m, 'P', 2)
+
+    V = V1 * V2
+    expressions = [x[0], x[0]*x[1]]
+    expr = as_vector(expressions)
+    fg = assemble(interpolate(expr, V))
+    f, g = fg.subfunctions
+
+    f1 = Function(V1).interpolate(expressions[0])
+    g1 = Function(V2).interpolate(expressions[1])
+    assert np.allclose(f.dat.data, f1.dat.data)
+    assert np.allclose(g.dat.data, g1.dat.data)
+
+
+def test_mixed_function():
+    m = UnitTriangleMesh()
+    x = SpatialCoordinate(m)
+    V1 = FunctionSpace(m, 'RT', 1)
+    V2 = FunctionSpace(m, 'DG', 0)
+    V = V1 * V2
+
+    expressions = [x[0], x[1], Constant(0.444)]
+    expr = as_vector(expressions)
+    v = assemble(interpolate(expr, V))
+
+    W1 = FunctionSpace(m, 'RT', 2)
+    W2 = FunctionSpace(m, 'DG', 1)
+    W = W1 * W2
+    w = assemble(interpolate(v, W))
+
+    f, g = w.subfunctions
+    f1 = Function(W1).interpolate(x)
+    g1 = Function(W2).interpolate(expressions[-1])
+    assert np.allclose(f.dat.data, f1.dat.data)
+    assert np.allclose(g.dat.data, g1.dat.data)
+
+
 def test_inner():
     m = UnitTriangleMesh()
     V1 = FunctionSpace(m, 'P', 1)
diff --git a/tests/firedrake/regression/test_poisson_strong_bcs.py b/tests/firedrake/regression/test_poisson_strong_bcs.py
index 2655f98b91..208260554d 100644
--- a/tests/firedrake/regression/test_poisson_strong_bcs.py
+++ b/tests/firedrake/regression/test_poisson_strong_bcs.py
@@ -18,7 +18,7 @@
 from firedrake import *
 
 
-def run_test(r, degree, parameters={}, quadrilateral=False):
+def run_test(r, degree, parameters, quadrilateral=False):
     # Create mesh and define function space
     mesh = UnitSquareMesh(2 ** r, 2 ** r, quadrilateral=quadrilateral)
     x = SpatialCoordinate(mesh)
@@ -41,7 +41,7 @@ def run_test(r, degree, parameters={}, quadrilateral=False):
     return sqrt(assemble(inner(u - f, u - f) * dx))
 
 
-def run_test_linear(r, degree, parameters={}, quadrilateral=False):
+def run_test_linear(r, degree, parameters, quadrilateral=False):
     # Create mesh and define function space
     mesh = UnitSquareMesh(2 ** r, 2 ** r, quadrilateral=quadrilateral)
     x = SpatialCoordinate(mesh)
@@ -86,7 +86,7 @@ def test_poisson_analytic_linear(params, degree, quadrilateral):
 
 @pytest.mark.parallel(nprocs=2)
 def test_poisson_analytic_linear_parallel():
-    from mpi4py import MPI
-    error = run_test_linear(1, 1)
-    print('[%d]' % MPI.COMM_WORLD.rank, 'error:', error)
+    # specify superlu_dist as MUMPS fails in parallel on MacOS
+    solver_parameters = {'pc_factor_mat_solver_type': 'superlu_dist'}
+    error = run_test_linear(1, 1, solver_parameters)
     assert error < 5e-6
diff --git a/tests/firedrake/regression/test_restricted_function_space.py b/tests/firedrake/regression/test_restricted_function_space.py
index 4a26e7834c..51e2139ae6 100644
--- a/tests/firedrake/regression/test_restricted_function_space.py
+++ b/tests/firedrake/regression/test_restricted_function_space.py
@@ -168,17 +168,40 @@ def test_restricted_function_space_coord_change(j):
     new_mesh = Mesh(Function(V).interpolate(as_vector([x, y])))
     new_V = FunctionSpace(new_mesh, "CG", j)
     bc = DirichletBC(new_V, 0, 1)
-    new_V_restricted = RestrictedFunctionSpace(new_V, name="Restricted", boundary_set=[1])
+    new_V_restricted = RestrictedFunctionSpace(new_V, boundary_set=[1], name="Restricted")
 
     compare_function_space_assembly(new_V, new_V_restricted, [bc])
 
 
+def test_poisson_restricted_mixed_space():
+    mesh = UnitSquareMesh(1, 1)
+    V = FunctionSpace(mesh, "RT", 1)
+    Q = FunctionSpace(mesh, "DG", 0)
+    Z = V * Q
+
+    u, p = TrialFunctions(Z)
+    v, q = TestFunctions(Z)
+    a = inner(u, v)*dx + inner(p, div(v))*dx + inner(div(u), q)*dx
+    L = inner(1, q)*dx
+
+    bcs = [DirichletBC(Z.sub(0), 0, [1])]
+
+    w = Function(Z)
+    solve(a == L, w, bcs=bcs, restrict=False)
+
+    w2 = Function(Z)
+    solve(a == L, w2, bcs=bcs, restrict=True)
+
+    assert errornorm(w.subfunctions[0], w2.subfunctions[0]) < 1.e-12
+    assert errornorm(w.subfunctions[1], w2.subfunctions[1]) < 1.e-12
+
+
 @pytest.mark.parametrize(["i", "j"], [(1, 0), (2, 0), (2, 1)])
-def test_restricted_mixed_spaces(i, j):
+def test_poisson_mixed_restricted_spaces(i, j):
     mesh = UnitSquareMesh(1, 1)
     DG = FunctionSpace(mesh, "DG", j)
     CG = VectorFunctionSpace(mesh, "CG", i)
-    CG_res = RestrictedFunctionSpace(CG, "Restricted", boundary_set=[4])
+    CG_res = RestrictedFunctionSpace(CG, boundary_set=[4], name="Restricted")
     W = CG * DG
     W_res = CG_res * DG
     bc = DirichletBC(W.sub(0), 0, 4)
diff --git a/tests/firedrake/vertexonly/test_interpolation_from_parent.py b/tests/firedrake/vertexonly/test_interpolation_from_parent.py
index 1ac9a0a393..c722d7b9b3 100644
--- a/tests/firedrake/vertexonly/test_interpolation_from_parent.py
+++ b/tests/firedrake/vertexonly/test_interpolation_from_parent.py
@@ -292,13 +292,19 @@ def test_tensor_function_interpolation(parentmesh, vertexcoords, tfs):
     assert np.allclose(w_v.dat.data_ro.reshape(result.shape), 2*result)
 
 
-@pytest.mark.xfail(raises=NotImplementedError, reason="Interpolation of UFL expressions into mixed functions not supported")
 def test_mixed_function_interpolation(parentmesh, vertexcoords, tfs):
     if parentmesh.name == "immersedsphere":
         vertexcoords = immersed_sphere_vertexcoords(parentmesh, vertexcoords)
     tfs_fam, tfs_deg, tfs_typ = tfs
+
     vm = VertexOnlyMesh(parentmesh, vertexcoords, missing_points_behaviour=None)
     vertexcoords = vm.coordinates.dat.data_ro.reshape(-1, parentmesh.geometric_dimension())
+    if (
+        parentmesh.coordinates.function_space().ufl_element().family()
+        == "Discontinuous Lagrange"
+    ):
+        pytest.skip(f"Interpolating into f{tfs_fam} on a periodic mesh is not well-defined.")
+
     V1 = tfs_typ(parentmesh, tfs_fam, tfs_deg)
     V2 = FunctionSpace(parentmesh, "CG", 1)
     V = V1 * V2
@@ -311,30 +317,23 @@ def test_mixed_function_interpolation(parentmesh, vertexcoords, tfs):
     # Get Function in V1
     # use outer product to check Regge works
     expr1 = outer(x, x)
-    assert W1.shape == expr1.ufl_shape
-    assemble(interpolate(expr1, v1))
+    assert W1.value_shape == expr1.ufl_shape
+    v1.interpolate(expr1)
     result1 = np.asarray([np.outer(vertexcoords[i], vertexcoords[i]) for i in range(len(vertexcoords))])
     if len(result1) == 0:
         result1 = result1.reshape(vertexcoords.shape + (parentmesh.geometric_dimension(),))
     # Get Function in V2
     expr2 = reduce(add, SpatialCoordinate(parentmesh))
-    assemble(interpolate(expr2, v2))
+    v2.interpolate(expr2)
     result2 = np.sum(vertexcoords, axis=1)
+
     # Interpolate Function in V into W
     w_v = assemble(interpolate(v, W))
+
     # Split result and check
     w_v1, w_v2 = w_v.subfunctions
-    assert np.allclose(w_v1.dat.data_ro, result1)
-    assert np.allclose(w_v2.dat.data_ro, result2)
-    # try and make reusable Interpolator from V to W
-    A_w = Interpolator(TestFunction(V), W)
-    w_v = Function(W)
-    assemble(A_w.interpolate(v), tensor=w_v)
-    # Split result and check
-    w_v1, w_v2 = w_v.subfunctions
-    assert np.allclose(w_v1.dat.data_ro, result1)
-    assert np.allclose(w_v2.dat.data_ro, result2)
-    # Enough tests - don't both using it again for a different Function in V
+    assert np.allclose(w_v1.dat.data_ro.reshape(result1.shape), result1)
+    assert np.allclose(w_v2.dat.data_ro.reshape(result2.shape), result2)
 
 
 def test_scalar_real_interpolation(parentmesh, vertexcoords):
@@ -431,7 +430,6 @@ def test_tensor_function_interpolation_parallel(parentmesh, vertexcoords, tfs):
     test_tensor_function_interpolation(parentmesh, vertexcoords, tfs)
 
 
-@pytest.mark.xfail(reason="Interpolation of UFL expressions into mixed functions not supported")
 @pytest.mark.parallel
 def test_mixed_function_interpolation_parallel(parentmesh, vertexcoords, tfs):
     test_mixed_function_interpolation(parentmesh, vertexcoords, tfs)
diff --git a/tests/firedrake/vertexonly/test_swarm.py b/tests/firedrake/vertexonly/test_swarm.py
index 21c0b12eca..d3f5680041 100644
--- a/tests/firedrake/vertexonly/test_swarm.py
+++ b/tests/firedrake/vertexonly/test_swarm.py
@@ -208,7 +208,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
         exclude_halos = True
 
     # Get point coords on current MPI rank
-    localpointcoords = np.copy(swarm.getField("DMSwarmPIC_coor"))
+    localpointcoords = np.copy(swarm.getField("DMSwarmPIC_coor").ravel())
     swarm.restoreField("DMSwarmPIC_coor")
     if len(inputpointcoords.shape) > 1:
         localpointcoords = np.reshape(localpointcoords, (-1, inputpointcoords.shape[1]))
@@ -218,11 +218,11 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
     nptslocal = len(localpointcoords)
     nptsglobal = MPI.COMM_WORLD.allreduce(nptslocal, op=MPI.SUM)
     # Get parent PETSc cell indices on current MPI rank
-    localparentcellindices = np.copy(swarm.getField("DMSwarm_cellid"))
+    localparentcellindices = np.copy(swarm.getField("DMSwarm_cellid").ravel())
     swarm.restoreField("DMSwarm_cellid")
 
     # also get the global coordinate numbering
-    globalindices = np.copy(swarm.getField("globalindex"))
+    globalindices = np.copy(swarm.getField("globalindex").ravel())
     swarm.restoreField("globalindex")
 
     # Tests
@@ -233,7 +233,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
 
     # get custom fields on swarm - will fail if didn't get created
     for name, size, dtype in other_fields:
-        f = swarm.getField(name)
+        f = swarm.getField(name).ravel()
         assert len(f) == size*nptslocal
         assert f.dtype == dtype
         swarm.restoreField(name)
@@ -330,7 +330,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
     # Check that the rank numbering is correct. Since we know all points are at
     # the midpoints of cells, there should be no disagreement about cell
     # ownership and the voting algorithm should have no effect.
-    owned_ranks = np.copy(swarm.getField("DMSwarm_rank"))
+    owned_ranks = np.copy(swarm.getField("DMSwarm_rank").ravel())
     swarm.restoreField("DMSwarm_rank")
     if exclude_halos:
         assert np.array_equal(owned_ranks, inputlocalpointcoordranks)
@@ -339,7 +339,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
         assert np.all(np.isin(inputlocalpointcoordranks, owned_ranks))
 
     # check that the input rank is correct
-    input_ranks = np.copy(swarm.getField("inputrank"))
+    input_ranks = np.copy(swarm.getField("inputrank").ravel())
     swarm.restoreField("inputrank")
     if exclude_halos:
         assert np.all(input_ranks == input_rank)
@@ -351,7 +351,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
         assert np.all(input_ranks < parentmesh.comm.size)
 
     # check that the input index is correct
-    input_indices = np.copy(swarm.getField("inputindex"))
+    input_indices = np.copy(swarm.getField("inputindex").ravel())
     swarm.restoreField("inputindex")
     if exclude_halos:
         assert np.array_equal(input_indices, input_local_coord_indices)
@@ -365,7 +365,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
 
     # check we have unique parent cell numbers, which we should since we have
     # points at cell midpoints
-    parentcellnums = np.copy(swarm.getField("parentcellnum"))
+    parentcellnums = np.copy(swarm.getField("parentcellnum").ravel())
     swarm.restoreField("parentcellnum")
     assert len(np.unique(parentcellnums)) == len(parentcellnums)
 
@@ -378,7 +378,7 @@ def test_pic_swarm_in_mesh(parentmesh, redundant, exclude_halos):
     ):
         swarm.setPointCoordinates(localpointcoords, redundant=False,
                                   mode=PETSc.InsertMode.INSERT_VALUES)
-        petsclocalparentcellindices = np.copy(swarm.getField("DMSwarm_cellid"))
+        petsclocalparentcellindices = np.copy(swarm.getField("DMSwarm_cellid").ravel())
         swarm.restoreField("DMSwarm_cellid")
         if exclude_halos:
             assert np.all(petsclocalparentcellindices == localparentcellindices)
diff --git a/tests/firedrake/vertexonly/test_vertex_only_fs.py b/tests/firedrake/vertexonly/test_vertex_only_fs.py
index a44791d0ea..a900aff568 100644
--- a/tests/firedrake/vertexonly/test_vertex_only_fs.py
+++ b/tests/firedrake/vertexonly/test_vertex_only_fs.py
@@ -116,7 +116,7 @@ def functionspace_tests(vm):
     h.dat.data_wo_with_halos[:] = -1
     h.interpolate(g)
     # Exclude points which we know are missing - these should all be equal to -1
-    input_ordering_parent_cell_nums = vm.input_ordering.topology_dm.getField("parentcellnum")
+    input_ordering_parent_cell_nums = vm.input_ordering.topology_dm.getField("parentcellnum").ravel()
     vm.input_ordering.topology_dm.restoreField("parentcellnum")
     idxs_to_include = input_ordering_parent_cell_nums != -1
     assert np.allclose(h.dat.data_ro_with_halos[idxs_to_include], np.prod(vm.input_ordering.coordinates.dat.data_ro_with_halos[idxs_to_include].reshape(-1, vm.input_ordering.geometric_dimension()), axis=1))
@@ -221,7 +221,7 @@ def vectorfunctionspace_tests(vm):
     h.dat.data_wo_with_halos[:] = -1
     h.interpolate(g)
     # Exclude points which we know are missing - these should all be equal to -1
-    input_ordering_parent_cell_nums = vm.input_ordering.topology_dm.getField("parentcellnum")
+    input_ordering_parent_cell_nums = vm.input_ordering.topology_dm.getField("parentcellnum").ravel()
     vm.input_ordering.topology_dm.restoreField("parentcellnum")
     idxs_to_include = input_ordering_parent_cell_nums != -1
     assert np.allclose(h.dat.data_ro[idxs_to_include], 2*vm.input_ordering.coordinates.dat.data_ro_with_halos[idxs_to_include])
diff --git a/tests/firedrake/vertexonly/test_vertex_only_mesh_generation.py b/tests/firedrake/vertexonly/test_vertex_only_mesh_generation.py
index ac33b461de..8031173b53 100644
--- a/tests/firedrake/vertexonly/test_vertex_only_mesh_generation.py
+++ b/tests/firedrake/vertexonly/test_vertex_only_mesh_generation.py
@@ -199,7 +199,7 @@ def verify_vertexonly_mesh(m, vm, inputvertexcoords, name):
     # Correct parent cell numbers
     stored_vertex_coords = np.copy(vm.topology_dm.getField("DMSwarmPIC_coor")).reshape((vm.num_cells(), gdim))
     vm.topology_dm.restoreField("DMSwarmPIC_coor")
-    stored_parent_cell_nums = np.copy(vm.topology_dm.getField("parentcellnum"))
+    stored_parent_cell_nums = np.copy(vm.topology_dm.getField("parentcellnum").ravel())
     vm.topology_dm.restoreField("parentcellnum")
     assert len(stored_vertex_coords) == len(stored_parent_cell_nums)
     if MPI.COMM_WORLD.size == 1:
diff --git a/tests/pyop2/test_dats.py b/tests/pyop2/test_dats.py
index 2b8cf2efbd..d937f8038a 100644
--- a/tests/pyop2/test_dats.py
+++ b/tests/pyop2/test_dats.py
@@ -263,6 +263,22 @@ def test_accessing_data_with_halos_increments_dat_version(self, d1):
         d1.data_with_halos
         assert d1.dat_version == 1
 
+    def test_axpy(self, d1):
+        d2 = op2.Dat(d1.dataset)
+        d1.data[:] = 0
+        d2.data[:] = 2
+        d1.axpy(3, d2)
+        assert (d1.data_ro == 3 * 2).all()
+
+    def test_maxpy(self, d1):
+        d2 = op2.Dat(d1.dataset)
+        d3 = op2.Dat(d1.dataset)
+        d1.data[:] = 0
+        d2.data[:] = 2
+        d3.data[:] = 3
+        d1.maxpy((2, 3), (d2, d3))
+        assert (d1.data_ro == 2 * 2 + 3 * 3).all()
+
 
 class TestDatView():
 
diff --git a/tests/tsfc/test_codegen.py b/tests/tsfc/test_codegen.py
new file mode 100644
index 0000000000..8d0bc79655
--- /dev/null
+++ b/tests/tsfc/test_codegen.py
@@ -0,0 +1,30 @@
+import pytest
+
+from gem import impero_utils
+from gem.gem import Index, Indexed, IndexSum, Product, Variable
+
+
+def test_loop_fusion():
+    i = Index()
+    j = Index()
+    Ri = Indexed(Variable('R', (6,)), (i,))
+
+    def make_expression(i, j):
+        A = Variable('A', (6,))
+        s = IndexSum(Indexed(A, (j,)), (j,))
+        return Product(Indexed(A, (i,)), s)
+
+    e1 = make_expression(i, j)
+    e2 = make_expression(i, i)
+
+    def gencode(expr):
+        impero_c = impero_utils.compile_gem([(Ri, expr)], (i, j))
+        return impero_c.tree
+
+    assert len(gencode(e1).children) == len(gencode(e2).children)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_coffee_optimise.py b/tests/tsfc/test_coffee_optimise.py
new file mode 100644
index 0000000000..065bb22ee4
--- /dev/null
+++ b/tests/tsfc/test_coffee_optimise.py
@@ -0,0 +1,78 @@
+import pytest
+
+from gem.gem import Index, Indexed, Product, Variable, Division, Literal, Sum
+from gem.optimise import replace_division
+from tsfc.coffee_mode import optimise_expressions
+
+
+def test_replace_div():
+    i = Index()
+    A = Variable('A', ())
+    B = Variable('B', (6,))
+    Bi = Indexed(B, (i,))
+    d = Division(Bi, A)
+    result, = replace_division([d])
+    expected = Product(Bi, Division(Literal(1.0), A))
+
+    assert result == expected
+
+
+def test_loop_optimise():
+    I = 20
+    J = K = 10
+    i = Index('i', I)
+    j = Index('j', J)
+    k = Index('k', K)
+
+    A1 = Variable('a1', (I,))
+    A2 = Variable('a2', (I,))
+    A3 = Variable('a3', (I,))
+    A1i = Indexed(A1, (i,))
+    A2i = Indexed(A2, (i,))
+    A3i = Indexed(A3, (i,))
+
+    B = Variable('b', (J,))
+    C = Variable('c', (J,))
+    Bj = Indexed(B, (j,))
+    Cj = Indexed(C, (j,))
+
+    E = Variable('e', (K,))
+    F = Variable('f', (K,))
+    G = Variable('g', (K,))
+    Ek = Indexed(E, (k,))
+    Fk = Indexed(F, (k,))
+    Gk = Indexed(G, (k,))
+
+    Z = Variable('z', ())
+
+    # Bj*Ek + Bj*Fk => (Ek + Fk)*Bj
+    expr = Sum(Product(Bj, Ek), Product(Bj, Fk))
+    result, = optimise_expressions([expr], (j, k))
+    expected = Product(Sum(Ek, Fk), Bj)
+    assert result == expected
+
+    # Bj*Ek + Bj*Fk + Bj*Gk + Cj*Ek + Cj*Fk =>
+    # (Ek + Fk + Gk)*Bj + (Ek+Fk)*Cj
+    expr = Sum(Sum(Sum(Sum(Product(Bj, Ek), Product(Bj, Fk)), Product(Bj, Gk)),
+                   Product(Cj, Ek)), Product(Cj, Fk))
+    result, = optimise_expressions([expr], (j, k))
+    expected = Sum(Product(Sum(Sum(Ek, Fk), Gk), Bj), Product(Sum(Ek, Fk), Cj))
+    assert result == expected
+
+    # Z*A1i*Bj*Ek + Z*A2i*Bj*Ek + A3i*Bj*Ek + Z*A1i*Bj*Fk =>
+    # Bj*(Ek*(Z*A1i + Z*A2i) + A3i) + Z*A1i*Fk)
+
+    expr = Sum(Sum(Sum(Product(Z, Product(A1i, Product(Bj, Ek))),
+                       Product(Z, Product(A2i, Product(Bj, Ek)))),
+                   Product(A3i, Product(Bj, Ek))),
+               Product(Z, Product(A1i, Product(Bj, Fk))))
+    result, = optimise_expressions([expr], (j, k))
+    expected = Product(Sum(Product(Ek, Sum(Sum(Product(Z, A1i), Product(Z, A2i)), A3i)),
+                           Product(Fk, Product(Z, A1i))), Bj)
+    assert result == expected
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_create_fiat_element.py b/tests/tsfc/test_create_fiat_element.py
new file mode 100644
index 0000000000..f8a7d6efc4
--- /dev/null
+++ b/tests/tsfc/test_create_fiat_element.py
@@ -0,0 +1,150 @@
+import pytest
+
+import FIAT
+from FIAT.discontinuous_lagrange import DiscontinuousLagrange as FIAT_DiscontinuousLagrange
+
+import ufl
+import finat.ufl
+from tsfc.finatinterface import create_element as _create_element
+
+
+supported_elements = {
+    # These all map directly to FIAT elements
+    "Brezzi-Douglas-Marini": FIAT.BrezziDouglasMarini,
+    "Brezzi-Douglas-Fortin-Marini": FIAT.BrezziDouglasFortinMarini,
+    "Lagrange": FIAT.Lagrange,
+    "Nedelec 1st kind H(curl)": FIAT.Nedelec,
+    "Nedelec 2nd kind H(curl)": FIAT.NedelecSecondKind,
+    "Raviart-Thomas": FIAT.RaviartThomas,
+    "Regge": FIAT.Regge,
+}
+"""A :class:`.dict` mapping UFL element family names to their
+FIAT-equivalent constructors."""
+
+
+def create_element(ufl_element):
+    """Create a FIAT element given a UFL element."""
+    finat_element = _create_element(ufl_element)
+    return finat_element.fiat_equivalent
+
+
+@pytest.fixture(params=["BDM",
+                        "BDFM",
+                        "Lagrange",
+                        "N1curl",
+                        "N2curl",
+                        "RT",
+                        "Regge"])
+def triangle_names(request):
+    return request.param
+
+
+@pytest.fixture
+def ufl_element(triangle_names):
+    return finat.ufl.FiniteElement(triangle_names, ufl.triangle, 2)
+
+
+def test_triangle_basic(ufl_element):
+    element = create_element(ufl_element)
+    assert isinstance(element, supported_elements[ufl_element.family()])
+
+
+@pytest.fixture(params=["CG", "DG", "DG L2"], scope="module")
+def tensor_name(request):
+    return request.param
+
+
+@pytest.fixture(params=[ufl.interval, ufl.triangle,
+                        ufl.quadrilateral],
+                ids=lambda x: x.cellname(),
+                scope="module")
+def ufl_A(request, tensor_name):
+    return finat.ufl.FiniteElement(tensor_name, request.param, 1)
+
+
+@pytest.fixture
+def ufl_B(tensor_name):
+    return finat.ufl.FiniteElement(tensor_name, ufl.interval, 1)
+
+
+def test_tensor_prod_simple(ufl_A, ufl_B):
+    tensor_ufl = finat.ufl.TensorProductElement(ufl_A, ufl_B)
+
+    tensor = create_element(tensor_ufl)
+    A = create_element(ufl_A)
+    B = create_element(ufl_B)
+
+    assert isinstance(tensor, FIAT.TensorProductElement)
+
+    assert tensor.A is A
+    assert tensor.B is B
+
+
+@pytest.mark.parametrize(('family', 'expected_cls'),
+                         [('P', FIAT.GaussLobattoLegendre),
+                          ('DP', FIAT.GaussLegendre),
+                          ('DP L2', FIAT.GaussLegendre)])
+def test_interval_variant_default(family, expected_cls):
+    ufl_element = finat.ufl.FiniteElement(family, ufl.interval, 3)
+    assert isinstance(create_element(ufl_element), expected_cls)
+
+
+@pytest.mark.parametrize(('family', 'variant', 'expected_cls'),
+                         [('P', 'equispaced', FIAT.Lagrange),
+                          ('P', 'spectral', FIAT.GaussLobattoLegendre),
+                          ('DP', 'equispaced', FIAT_DiscontinuousLagrange),
+                          ('DP', 'spectral', FIAT.GaussLegendre),
+                          ('DP L2', 'equispaced', FIAT_DiscontinuousLagrange),
+                          ('DP L2', 'spectral', FIAT.GaussLegendre)])
+def test_interval_variant(family, variant, expected_cls):
+    ufl_element = finat.ufl.FiniteElement(family, ufl.interval, 3, variant=variant)
+    assert isinstance(create_element(ufl_element), expected_cls)
+
+
+def test_triangle_variant_spectral():
+    ufl_element = finat.ufl.FiniteElement('DP', ufl.triangle, 2, variant='spectral')
+    create_element(ufl_element)
+
+
+def test_triangle_variant_spectral_l2():
+    ufl_element = finat.ufl.FiniteElement('DP L2', ufl.triangle, 2, variant='spectral')
+    create_element(ufl_element)
+
+
+def test_quadrilateral_variant_spectral_q():
+    element = create_element(finat.ufl.FiniteElement('Q', ufl.quadrilateral, 3, variant='spectral'))
+    assert isinstance(element.element.A, FIAT.GaussLobattoLegendre)
+    assert isinstance(element.element.B, FIAT.GaussLobattoLegendre)
+
+
+def test_quadrilateral_variant_spectral_dq():
+    element = create_element(finat.ufl.FiniteElement('DQ', ufl.quadrilateral, 1, variant='spectral'))
+    assert isinstance(element.element.A, FIAT.GaussLegendre)
+    assert isinstance(element.element.B, FIAT.GaussLegendre)
+
+
+def test_quadrilateral_variant_spectral_dq_l2():
+    element = create_element(finat.ufl.FiniteElement('DQ L2', ufl.quadrilateral, 1, variant='spectral'))
+    assert isinstance(element.element.A, FIAT.GaussLegendre)
+    assert isinstance(element.element.B, FIAT.GaussLegendre)
+
+
+def test_quadrilateral_variant_spectral_rtcf():
+    element = create_element(finat.ufl.FiniteElement('RTCF', ufl.quadrilateral, 2, variant='spectral'))
+    assert isinstance(element.element._elements[0].A, FIAT.GaussLobattoLegendre)
+    assert isinstance(element.element._elements[0].B, FIAT.GaussLegendre)
+    assert isinstance(element.element._elements[1].A, FIAT.GaussLegendre)
+    assert isinstance(element.element._elements[1].B, FIAT.GaussLobattoLegendre)
+
+
+def test_cache_hit(ufl_element):
+    A = create_element(ufl_element)
+    B = create_element(ufl_element)
+
+    assert A is B
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_create_finat_element.py b/tests/tsfc/test_create_finat_element.py
new file mode 100644
index 0000000000..c0f34c292c
--- /dev/null
+++ b/tests/tsfc/test_create_finat_element.py
@@ -0,0 +1,138 @@
+import pytest
+
+import ufl
+import finat.ufl
+import finat
+from tsfc.finatinterface import create_element, supported_elements
+
+
+@pytest.fixture(params=["BDM",
+                        "BDFM",
+                        "Lagrange",
+                        "N1curl",
+                        "N2curl",
+                        "RT",
+                        "Regge"])
+def triangle_names(request):
+    return request.param
+
+
+@pytest.fixture
+def ufl_element(triangle_names):
+    return finat.ufl.FiniteElement(triangle_names, ufl.triangle, 2)
+
+
+def test_triangle_basic(ufl_element):
+    element = create_element(ufl_element)
+    assert isinstance(element, supported_elements[ufl_element.family()])
+
+
+@pytest.fixture
+def ufl_vector_element(triangle_names):
+    return finat.ufl.VectorElement(triangle_names, ufl.triangle, 2)
+
+
+def test_triangle_vector(ufl_element, ufl_vector_element):
+    scalar = create_element(ufl_element)
+    vector = create_element(ufl_vector_element)
+
+    assert isinstance(vector, finat.TensorFiniteElement)
+    assert scalar == vector.base_element
+
+
+@pytest.fixture(params=["CG", "DG", "DG L2"])
+def tensor_name(request):
+    return request.param
+
+
+@pytest.fixture(params=[ufl.interval, ufl.triangle,
+                        ufl.quadrilateral],
+                ids=lambda x: x.cellname())
+def ufl_A(request, tensor_name):
+    return finat.ufl.FiniteElement(tensor_name, request.param, 1)
+
+
+@pytest.fixture
+def ufl_B(tensor_name):
+    return finat.ufl.FiniteElement(tensor_name, ufl.interval, 1)
+
+
+def test_tensor_prod_simple(ufl_A, ufl_B):
+    tensor_ufl = finat.ufl.TensorProductElement(ufl_A, ufl_B)
+
+    tensor = create_element(tensor_ufl)
+    A = create_element(ufl_A)
+    B = create_element(ufl_B)
+
+    assert isinstance(tensor, finat.TensorProductElement)
+
+    assert tensor.factors == (A, B)
+
+
+@pytest.mark.parametrize(('family', 'expected_cls'),
+                         [('P', finat.GaussLobattoLegendre),
+                          ('DP', finat.GaussLegendre),
+                          ('DP L2', finat.GaussLegendre)])
+def test_interval_variant_default(family, expected_cls):
+    ufl_element = finat.ufl.FiniteElement(family, ufl.interval, 3)
+    assert isinstance(create_element(ufl_element), expected_cls)
+
+
+@pytest.mark.parametrize(('family', 'variant', 'expected_cls'),
+                         [('P', 'equispaced', finat.Lagrange),
+                          ('P', 'spectral', finat.GaussLobattoLegendre),
+                          ('DP', 'equispaced', finat.DiscontinuousLagrange),
+                          ('DP', 'spectral', finat.GaussLegendre),
+                          ('DP L2', 'equispaced', finat.DiscontinuousLagrange),
+                          ('DP L2', 'spectral', finat.GaussLegendre)])
+def test_interval_variant(family, variant, expected_cls):
+    ufl_element = finat.ufl.FiniteElement(family, ufl.interval, 3, variant=variant)
+    assert isinstance(create_element(ufl_element), expected_cls)
+
+
+def test_triangle_variant_spectral():
+    ufl_element = finat.ufl.FiniteElement('DP', ufl.triangle, 2, variant='spectral')
+    create_element(ufl_element)
+
+
+def test_triangle_variant_spectral_l2():
+    ufl_element = finat.ufl.FiniteElement('DP L2', ufl.triangle, 2, variant='spectral')
+    create_element(ufl_element)
+
+
+def test_quadrilateral_variant_spectral_q():
+    element = create_element(finat.ufl.FiniteElement('Q', ufl.quadrilateral, 3, variant='spectral'))
+    assert isinstance(element.product.factors[0], finat.GaussLobattoLegendre)
+    assert isinstance(element.product.factors[1], finat.GaussLobattoLegendre)
+
+
+def test_quadrilateral_variant_spectral_dq():
+    element = create_element(finat.ufl.FiniteElement('DQ', ufl.quadrilateral, 1, variant='spectral'))
+    assert isinstance(element.product.factors[0], finat.GaussLegendre)
+    assert isinstance(element.product.factors[1], finat.GaussLegendre)
+
+
+def test_quadrilateral_variant_spectral_dq_l2():
+    element = create_element(finat.ufl.FiniteElement('DQ L2', ufl.quadrilateral, 1, variant='spectral'))
+    assert isinstance(element.product.factors[0], finat.GaussLegendre)
+    assert isinstance(element.product.factors[1], finat.GaussLegendre)
+
+
+def test_cache_hit(ufl_element):
+    A = create_element(ufl_element)
+    B = create_element(ufl_element)
+
+    assert A is B
+
+
+def test_cache_hit_vector(ufl_vector_element):
+    A = create_element(ufl_vector_element)
+    B = create_element(ufl_vector_element)
+
+    assert A is B
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_delta_elimination.py b/tests/tsfc/test_delta_elimination.py
new file mode 100644
index 0000000000..3ac3f8dc28
--- /dev/null
+++ b/tests/tsfc/test_delta_elimination.py
@@ -0,0 +1,26 @@
+import pytest
+
+from gem.gem import Delta, Identity, Index, Indexed, one
+from gem.optimise import delta_elimination, remove_componenttensors
+
+
+def test_delta_elimination():
+    i = Index()
+    j = Index()
+    k = Index()
+    I = Identity(3)
+
+    sum_indices = (i, j)
+    factors = [Delta(i, j), Delta(i, k), Indexed(I, (j, k))]
+
+    sum_indices, factors = delta_elimination(sum_indices, factors)
+    factors = remove_componenttensors(factors)
+
+    assert sum_indices == []
+    assert factors == [one, one, Indexed(I, (k, k))]
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_dual_evaluation.py b/tests/tsfc/test_dual_evaluation.py
new file mode 100644
index 0000000000..b4f6e9770a
--- /dev/null
+++ b/tests/tsfc/test_dual_evaluation.py
@@ -0,0 +1,62 @@
+import pytest
+import ufl
+import finat.ufl
+from tsfc.finatinterface import create_element
+from tsfc import compile_expression_dual_evaluation
+
+
+def test_ufl_only_simple():
+    mesh = ufl.Mesh(finat.ufl.VectorElement("P", ufl.triangle, 1))
+    V = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("P", ufl.triangle, 2))
+    v = ufl.Coefficient(V)
+    expr = ufl.inner(v, v)
+    W = V
+    to_element = create_element(W.ufl_element())
+    kernel = compile_expression_dual_evaluation(expr, to_element, W.ufl_element())
+    assert kernel.needs_external_coords is False
+
+
+def test_ufl_only_spatialcoordinate():
+    mesh = ufl.Mesh(finat.ufl.VectorElement("P", ufl.triangle, 1))
+    V = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("P", ufl.triangle, 2))
+    x, y = ufl.SpatialCoordinate(mesh)
+    expr = x*y - y**2 + x
+    W = V
+    to_element = create_element(W.ufl_element())
+    kernel = compile_expression_dual_evaluation(expr, to_element, W.ufl_element())
+    assert kernel.needs_external_coords is True
+
+
+def test_ufl_only_from_contravariant_piola():
+    mesh = ufl.Mesh(finat.ufl.VectorElement("P", ufl.triangle, 1))
+    V = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("RT", ufl.triangle, 1))
+    v = ufl.Coefficient(V)
+    expr = ufl.inner(v, v)
+    W = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("P", ufl.triangle, 2))
+    to_element = create_element(W.ufl_element())
+    kernel = compile_expression_dual_evaluation(expr, to_element, W.ufl_element())
+    assert kernel.needs_external_coords is True
+
+
+def test_ufl_only_to_contravariant_piola():
+    mesh = ufl.Mesh(finat.ufl.VectorElement("P", ufl.triangle, 1))
+    V = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("P", ufl.triangle, 2))
+    v = ufl.Coefficient(V)
+    expr = ufl.as_vector([v, v])
+    W = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("RT", ufl.triangle, 1))
+    to_element = create_element(W.ufl_element())
+    kernel = compile_expression_dual_evaluation(expr, to_element, W.ufl_element())
+    assert kernel.needs_external_coords is True
+
+
+def test_ufl_only_shape_mismatch():
+    mesh = ufl.Mesh(finat.ufl.VectorElement("P", ufl.triangle, 1))
+    V = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement("RT", ufl.triangle, 1))
+    v = ufl.Coefficient(V)
+    expr = ufl.inner(v, v)
+    assert expr.ufl_shape == ()
+    W = V
+    to_element = create_element(W.ufl_element())
+    assert to_element.value_shape == (2,)
+    with pytest.raises(ValueError):
+        compile_expression_dual_evaluation(expr, to_element, W.ufl_element())
diff --git a/tests/tsfc/test_estimated_degree.py b/tests/tsfc/test_estimated_degree.py
new file mode 100644
index 0000000000..e4842f2ee6
--- /dev/null
+++ b/tests/tsfc/test_estimated_degree.py
@@ -0,0 +1,35 @@
+import logging
+
+import pytest
+
+import ufl
+import finat.ufl
+from tsfc import compile_form
+from tsfc.logging import logger
+
+
+class MockHandler(logging.Handler):
+    def emit(self, record):
+        raise RuntimeError()
+
+
+def test_estimated_degree():
+    cell = ufl.tetrahedron
+    mesh = ufl.Mesh(finat.ufl.VectorElement('P', cell, 1))
+    V = ufl.FunctionSpace(mesh, finat.ufl.FiniteElement('P', cell, 1))
+    f = ufl.Coefficient(V)
+    u = ufl.TrialFunction(V)
+    v = ufl.TestFunction(V)
+    a = u * v * ufl.tanh(ufl.sqrt(ufl.sinh(f) / ufl.sin(f**f))) * ufl.dx
+
+    handler = MockHandler()
+    logger.addHandler(handler)
+    with pytest.raises(RuntimeError):
+        compile_form(a)
+    logger.removeHandler(handler)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_firedrake_972.py b/tests/tsfc/test_firedrake_972.py
new file mode 100644
index 0000000000..e308c7986d
--- /dev/null
+++ b/tests/tsfc/test_firedrake_972.py
@@ -0,0 +1,44 @@
+import numpy
+import pytest
+
+from ufl import (Mesh, FunctionSpace,
+                 Coefficient, TestFunction, interval, indices, dx)
+from finat.ufl import VectorElement, TensorElement
+from ufl.classes import IndexSum, Product, MultiIndex
+
+from tsfc import compile_form
+
+
+def count_flops(n):
+    mesh = Mesh(VectorElement('CG', interval, 1))
+    tfs = FunctionSpace(mesh, TensorElement('DG', interval, 1, shape=(n, n)))
+    vfs = FunctionSpace(mesh, VectorElement('DG', interval, 1, dim=n))
+
+    ensemble_f = Coefficient(vfs)
+    ensemble2_f = Coefficient(vfs)
+    phi = TestFunction(tfs)
+
+    i, j = indices(2)
+    nc = 42  # magic number
+    L = ((IndexSum(IndexSum(Product(nc * phi[i, j], Product(ensemble_f[i], ensemble_f[i])),
+                            MultiIndex((i,))), MultiIndex((j,))) * dx)
+         + (IndexSum(IndexSum(Product(nc * phi[i, j], Product(ensemble2_f[j], ensemble2_f[j])),
+                              MultiIndex((i,))), MultiIndex((j,))) * dx)
+         - (IndexSum(IndexSum(2 * nc * Product(phi[i, j], Product(ensemble_f[i], ensemble2_f[j])),
+                              MultiIndex((i,))), MultiIndex((j,))) * dx))
+
+    kernel, = compile_form(L, parameters=dict(mode='spectral'))
+    return kernel.flop_count
+
+
+def test_convergence():
+    ns = [10, 20, 40, 80, 100]
+    flops = [count_flops(n) for n in ns]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(ns))
+    assert (rates < 2).all()  # only quadratic operation count, not more
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_flexibly_indexed.py b/tests/tsfc/test_flexibly_indexed.py
new file mode 100644
index 0000000000..551e4b042b
--- /dev/null
+++ b/tests/tsfc/test_flexibly_indexed.py
@@ -0,0 +1,103 @@
+from itertools import product
+
+import numpy
+import pytest
+
+import gem
+import tsfc
+import tsfc.loopy
+
+
+parameters = tsfc.loopy.LoopyContext()
+parameters.names = {}
+
+
+def convert(expression, multiindex):
+    assert not expression.free_indices
+    element = gem.Indexed(expression, multiindex)
+    element, = gem.optimise.remove_componenttensors((element,))
+    subscript = tsfc.loopy.expression(element, parameters)
+    # Convert a pymbolic subscript expression to a rank tuple. For example
+    # the subscript:
+    #
+    #     Subscript(Variable('A'), (Sum((3,)), Sum((5,))))
+    #
+    # will yield a rank of (3, 5).
+    return sum((idx.children for idx in subscript.index), start=())
+
+
+@pytest.fixture(scope='module')
+def vector():
+    return gem.Variable('u', (12,))
+
+
+@pytest.fixture(scope='module')
+def matrix():
+    return gem.Variable('A', (10, 12))
+
+
+def test_reshape(vector):
+    expression = gem.reshape(vector, (3, 4))
+    assert expression.shape == (3, 4)
+
+    actual = [convert(expression, multiindex)
+              for multiindex in numpy.ndindex(expression.shape)]
+
+    assert [(i,) for i in range(12)] == actual
+
+
+def test_view(matrix):
+    expression = gem.view(matrix, slice(3, 8), slice(5, 12))
+    assert expression.shape == (5, 7)
+
+    actual = [convert(expression, multiindex)
+              for multiindex in numpy.ndindex(expression.shape)]
+
+    assert list(product(range(3, 8), range(5, 12))) == actual
+
+
+def test_view_view(matrix):
+    expression = gem.view(gem.view(matrix, slice(3, 8), slice(5, 12)),
+                          slice(4), slice(3, 6))
+    assert expression.shape == (4, 3)
+
+    actual = [convert(expression, multiindex)
+              for multiindex in numpy.ndindex(expression.shape)]
+
+    assert list(product(range(3, 7), range(8, 11))) == actual
+
+
+def test_view_reshape(vector):
+    expression = gem.view(gem.reshape(vector, (3, 4)), slice(2), slice(1, 3))
+    assert expression.shape == (2, 2)
+
+    actual = [convert(expression, multiindex)
+              for multiindex in numpy.ndindex(expression.shape)]
+
+    assert [(1,), (2,), (5,), (6,)] == actual
+
+
+def test_reshape_shape(vector):
+    expression = gem.reshape(gem.view(vector, slice(5, 11)), (3, 2))
+    assert expression.shape == (3, 2)
+
+    actual = [convert(expression, multiindex)
+              for multiindex in numpy.ndindex(expression.shape)]
+
+    assert [(i,) for i in range(5, 11)] == actual
+
+
+def test_reshape_reshape(vector):
+    expression = gem.reshape(gem.reshape(vector, (4, 3)), (2, 2), (3,))
+    assert expression.shape == (2, 2, 3)
+
+    actual = [convert(expression, multiindex)
+              for multiindex in numpy.ndindex(expression.shape)]
+
+    assert [(i,) for i in range(12)] == actual
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_flop_count.py b/tests/tsfc/test_flop_count.py
new file mode 100644
index 0000000000..1925fbaf28
--- /dev/null
+++ b/tests/tsfc/test_flop_count.py
@@ -0,0 +1,64 @@
+import pytest
+import gem.gem as gem
+from gem.flop_count import count_flops
+from gem.impero_utils import preprocess_gem
+from gem.impero_utils import compile_gem
+
+
+def test_count_flops(expression):
+    expr, expected = expression
+    flops = count_flops(expr)
+    assert flops == expected
+
+
+@pytest.fixture(params=("expr1", "expr2", "expr3", "expr4"))
+def expression(request):
+    if request.param == "expr1":
+        expr = gem.Sum(gem.Product(gem.Variable("a", ()), gem.Literal(2)),
+                       gem.Division(gem.Literal(3), gem.Variable("b", ())))
+        C = gem.Variable("C", (1,))
+        i, = gem.indices(1)
+        Ci = C[i]
+        expr, = preprocess_gem([expr])
+        assignments = [(Ci, expr)]
+        expr = compile_gem(assignments, (i,))
+        # C += a*2 + 3/b
+        expected = 1 + 3
+    elif request.param == "expr2":
+        expr = gem.Comparison(">=", gem.MaxValue(gem.Literal(1), gem.Literal(2)),
+                              gem.MinValue(gem.Literal(3), gem.Literal(1)))
+        C = gem.Variable("C", (1,))
+        i, = gem.indices(1)
+        Ci = C[i]
+        expr, = preprocess_gem([expr])
+        assignments = [(Ci, expr)]
+        expr = compile_gem(assignments, (i,))
+        # C += max(1, 2) >= min(3, 1)
+        expected = 1 + 3
+    elif request.param == "expr3":
+        expr = gem.Solve(gem.Identity(3), gem.Inverse(gem.Identity(3)))
+        C = gem.Variable("C", (3, 3))
+        i, j = gem.indices(2)
+        Cij = C[i, j]
+        expr, = preprocess_gem([expr[i, j]])
+        assignments = [(Cij, expr)]
+        expr = compile_gem(assignments, (i, j))
+        # C += solve(Id(3x3), Id(3x3)^{-1})
+        expected = 9 + 18 + 54 + 54
+    elif request.param == "expr4":
+        A = gem.Variable("A", (10, 15))
+        B = gem.Variable("B", (8, 10))
+        i, j, k = gem.indices(3)
+        Aij = A[i, j]
+        Bki = B[k, i]
+        Cjk = gem.IndexSum(Aij * Bki, (i,))
+        expr = Cjk
+        expr, = preprocess_gem([expr])
+        assignments = [(gem.Variable("C", (15, 8))[j, k], expr)]
+        expr = compile_gem(assignments, (j, k))
+        # Cjk += \sum_i Aij * Bki
+        expected = 2 * 10 * 8 * 15
+
+    else:
+        raise ValueError("Unexpected expression")
+    return expr, expected
diff --git a/tests/tsfc/test_gem_failure.py b/tests/tsfc/test_gem_failure.py
new file mode 100644
index 0000000000..0a79a39ab8
--- /dev/null
+++ b/tests/tsfc/test_gem_failure.py
@@ -0,0 +1,45 @@
+from ufl import (triangle, tetrahedron, FunctionSpace, Mesh,
+                 TrialFunction, TestFunction, inner, grad, dx, dS)
+from finat.ufl import FiniteElement, VectorElement
+from tsfc import compile_form
+from FIAT.hdiv_trace import TraceError
+import pytest
+
+
+@pytest.mark.parametrize('cell', [triangle, tetrahedron])
+@pytest.mark.parametrize('degree', range(3))
+def test_cell_error(cell, degree):
+    """Test that tabulating the trace element deliberatly on the
+    cell triggers `gem.Failure` to raise the TraceError exception.
+    """
+    trace_element = FiniteElement("HDiv Trace", cell, degree)
+    domain = Mesh(VectorElement("Lagrange", cell, 1))
+    space = FunctionSpace(domain, trace_element)
+    lambdar = TrialFunction(space)
+    gammar = TestFunction(space)
+
+    with pytest.raises(TraceError):
+        compile_form(lambdar * gammar * dx)
+
+
+@pytest.mark.parametrize('cell', [triangle, tetrahedron])
+@pytest.mark.parametrize('degree', range(3))
+def test_gradient_error(cell, degree):
+    """Test that tabulating gradient evaluations of the trace
+    element triggers `gem.Failure` to raise the TraceError
+    exception.
+    """
+    trace_element = FiniteElement("HDiv Trace", cell, degree)
+    domain = Mesh(VectorElement("Lagrange", cell, 1))
+    space = FunctionSpace(domain, trace_element)
+    lambdar = TrialFunction(space)
+    gammar = TestFunction(space)
+
+    with pytest.raises(TraceError):
+        compile_form(inner(grad(lambdar('+')), grad(gammar('+'))) * dS)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_geometry.py b/tests/tsfc/test_geometry.py
new file mode 100644
index 0000000000..458f1b5657
--- /dev/null
+++ b/tests/tsfc/test_geometry.py
@@ -0,0 +1,76 @@
+import pytest
+import numpy as np
+
+from FIAT.reference_element import UFCInterval, UFCTriangle, UFCTetrahedron
+from FIAT.reference_element import UFCQuadrilateral, TensorProductCell
+
+from tsfc.fem import make_cell_facet_jacobian
+
+interval = UFCInterval()
+triangle = UFCTriangle()
+quadrilateral = UFCQuadrilateral()
+tetrahedron = UFCTetrahedron()
+interval_x_interval = TensorProductCell(interval, interval)
+triangle_x_interval = TensorProductCell(triangle, interval)
+quadrilateral_x_interval = TensorProductCell(quadrilateral, interval)
+
+
+@pytest.mark.parametrize(('cell', 'cell_facet_jacobian'),
+                         [(interval, [[],
+                                      []]),
+                          (triangle, [[-1, 1],
+                                      [0, 1],
+                                      [1, 0]]),
+                          (quadrilateral, [[0, 1],
+                                           [0, 1],
+                                           [1, 0],
+                                           [1, 0]]),
+                          (tetrahedron, [[-1, -1, 1, 0, 0, 1],
+                                         [0, 0, 1, 0, 0, 1],
+                                         [1, 0, 0, 0, 0, 1],
+                                         [1, 0, 0, 1, 0, 0]])])
+def test_cell_facet_jacobian(cell, cell_facet_jacobian):
+    facet_dim = cell.get_spatial_dimension() - 1
+    for facet_number in range(len(cell.get_topology()[facet_dim])):
+        actual = make_cell_facet_jacobian(cell, facet_dim, facet_number)
+        expected = np.reshape(cell_facet_jacobian[facet_number], actual.shape)
+        assert np.allclose(expected, actual)
+
+
+@pytest.mark.parametrize(('cell', 'cell_facet_jacobian'),
+                         [(interval_x_interval, [1, 0]),
+                          (triangle_x_interval, [1, 0, 0, 1, 0, 0]),
+                          (quadrilateral_x_interval, [[1, 0, 0, 1, 0, 0]])])
+def test_cell_facet_jacobian_horiz(cell, cell_facet_jacobian):
+    dim = cell.get_spatial_dimension()
+
+    actual = make_cell_facet_jacobian(cell, (dim - 1, 0), 0)  # bottom facet
+    assert np.allclose(np.reshape(cell_facet_jacobian, actual.shape), actual)
+
+    actual = make_cell_facet_jacobian(cell, (dim - 1, 0), 1)  # top facet
+    assert np.allclose(np.reshape(cell_facet_jacobian, actual.shape), actual)
+
+
+@pytest.mark.parametrize(('cell', 'cell_facet_jacobian'),
+                         [(interval_x_interval, [[0, 1],
+                                                 [0, 1]]),
+                          (triangle_x_interval, [[-1, 0, 1, 0, 0, 1],
+                                                 [0, 0, 1, 0, 0, 1],
+                                                 [1, 0, 0, 0, 0, 1]]),
+                          (quadrilateral_x_interval, [[0, 0, 1, 0, 0, 1],
+                                                      [0, 0, 1, 0, 0, 1],
+                                                      [1, 0, 0, 0, 0, 1],
+                                                      [1, 0, 0, 0, 0, 1]])])
+def test_cell_facet_jacobian_vert(cell, cell_facet_jacobian):
+    dim = cell.get_spatial_dimension()
+    vert_dim = (dim - 2, 1)
+    for facet_number in range(len(cell.get_topology()[vert_dim])):
+        actual = make_cell_facet_jacobian(cell, vert_dim, facet_number)
+        expected = np.reshape(cell_facet_jacobian[facet_number], actual.shape)
+        assert np.allclose(expected, actual)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_idempotency.py b/tests/tsfc/test_idempotency.py
new file mode 100644
index 0000000000..cf554f4641
--- /dev/null
+++ b/tests/tsfc/test_idempotency.py
@@ -0,0 +1,72 @@
+import ufl
+import finat.ufl
+from tsfc import compile_form
+import loopy
+import pytest
+
+
+@pytest.fixture(params=[ufl.interval,
+                        ufl.triangle,
+                        ufl.quadrilateral,
+                        ufl.tetrahedron],
+                ids=lambda x: x.cellname())
+def cell(request):
+    return request.param
+
+
+@pytest.fixture(params=[1, 2],
+                ids=lambda x: "P%d-coords" % x)
+def coord_degree(request):
+    return request.param
+
+
+@pytest.fixture
+def mesh(cell, coord_degree):
+    c = finat.ufl.VectorElement("CG", cell, coord_degree)
+    return ufl.Mesh(c)
+
+
+@pytest.fixture(params=[finat.ufl.FiniteElement,
+                        finat.ufl.VectorElement,
+                        finat.ufl.TensorElement],
+                ids=["FE", "VE", "TE"])
+def V(request, mesh):
+    return ufl.FunctionSpace(mesh, request.param("CG", mesh.ufl_cell(), 2))
+
+
+@pytest.fixture(params=["cell", "ext_facet", "int_facet"])
+def itype(request):
+    return request.param
+
+
+@pytest.fixture(params=["functional", "1-form", "2-form"])
+def form(V, itype, request):
+    if request.param == "functional":
+        u = ufl.Coefficient(V)
+        v = ufl.Coefficient(V)
+    elif request.param == "1-form":
+        u = ufl.Coefficient(V)
+        v = ufl.TestFunction(V)
+    elif request.param == "2-form":
+        u = ufl.TrialFunction(V)
+        v = ufl.TestFunction(V)
+
+    if itype == "cell":
+        return ufl.inner(u, v)*ufl.dx
+    elif itype == "ext_facet":
+        return ufl.inner(u, v)*ufl.ds
+    elif itype == "int_facet":
+        return ufl.inner(u('+'), v('-'))*ufl.dS
+
+
+def test_idempotency(form):
+    k1 = compile_form(form)[0]
+    k2 = compile_form(form)[0]
+
+    assert loopy.generate_code_v2(k1.ast).device_code() == loopy.generate_code_v2(k2.ast).device_code()
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_impero_loopy_flop_counts.py b/tests/tsfc/test_impero_loopy_flop_counts.py
new file mode 100644
index 0000000000..76bdad96e3
--- /dev/null
+++ b/tests/tsfc/test_impero_loopy_flop_counts.py
@@ -0,0 +1,66 @@
+"""
+Tests impero flop counts against loopy.
+"""
+import pytest
+import numpy
+import loopy
+from tsfc import compile_form
+from ufl import (FunctionSpace, Mesh, TestFunction,
+                 TrialFunction, dx, grad, inner,
+                 interval, triangle, quadrilateral,
+                 TensorProductCell)
+from finat.ufl import FiniteElement, VectorElement
+from tsfc.parameters import target
+
+
+def count_loopy_flops(kernel):
+    name = kernel.name
+    program = kernel.ast
+    program = program.with_kernel(
+        program[name].copy(
+            target=target,
+            silenced_warnings=["insn_count_subgroups_upper_bound",
+                               "get_x_map_guessing_subgroup_size"])
+    )
+    op_map = loopy.get_op_map(program
+                              .with_entrypoints(kernel.name),
+                              subgroup_size=1)
+    return op_map.filter_by(name=['add', 'sub', 'mul', 'div',
+                                  'func:abs'],
+                            dtype=[float]).eval_and_sum({})
+
+
+@pytest.fixture(params=[interval, triangle, quadrilateral,
+                        TensorProductCell(triangle, interval)],
+                ids=lambda cell: cell.cellname())
+def cell(request):
+    return request.param
+
+
+@pytest.fixture(params=[{"mode": "vanilla"},
+                        {"mode": "spectral"}],
+                ids=["vanilla", "spectral"])
+def parameters(request):
+    return request.param
+
+
+def test_flop_count(cell, parameters):
+    mesh = Mesh(VectorElement("P", cell, 1))
+    loopy_flops = []
+    new_flops = []
+    for k in range(1, 5):
+        V = FunctionSpace(mesh, FiniteElement("P", cell, k))
+        u = TrialFunction(V)
+        v = TestFunction(V)
+        a = inner(u, v)*dx + inner(grad(u), grad(v))*dx
+        kernel, = compile_form(a, prefix="form",
+                               parameters=parameters)
+        # Record new flops here, and compare asymptotics and
+        # approximate order of magnitude.
+        new_flops.append(kernel.flop_count)
+        loopy_flops.append(count_loopy_flops(kernel))
+
+    new_flops = numpy.asarray(new_flops)
+    loopy_flops = numpy.asarray(loopy_flops)
+
+    assert all(new_flops == loopy_flops)
diff --git a/tests/tsfc/test_interpolation_factorisation.py b/tests/tsfc/test_interpolation_factorisation.py
new file mode 100644
index 0000000000..b3d4e3288b
--- /dev/null
+++ b/tests/tsfc/test_interpolation_factorisation.py
@@ -0,0 +1,64 @@
+from functools import partial
+import numpy
+import pytest
+
+from ufl import (Mesh, FunctionSpace, Coefficient,
+                 interval, quadrilateral, hexahedron)
+from finat.ufl import FiniteElement, VectorElement, TensorElement
+
+from tsfc import compile_expression_dual_evaluation
+from tsfc.finatinterface import create_element
+
+
+@pytest.fixture(params=[interval, quadrilateral, hexahedron],
+                ids=lambda x: x.cellname())
+def mesh(request):
+    return Mesh(VectorElement("P", request.param, 1))
+
+
+@pytest.fixture(params=[FiniteElement, VectorElement, TensorElement],
+                ids=lambda x: x.__name__)
+def element(request, mesh):
+    if mesh.ufl_cell() == interval:
+        family = "DP"
+    else:
+        family = "DQ"
+    return partial(request.param, family, mesh.ufl_cell())
+
+
+def flop_count(mesh, source, target):
+    Vtarget = FunctionSpace(mesh, target)
+    Vsource = FunctionSpace(mesh, source)
+    to_element = create_element(Vtarget.ufl_element())
+    expr = Coefficient(Vsource)
+    kernel = compile_expression_dual_evaluation(expr, to_element, Vtarget.ufl_element())
+    return kernel.flop_count
+
+
+def test_sum_factorisation(mesh, element):
+    # Interpolation between sum factorisable elements should cost
+    # O(p^{d+1})
+    degrees = numpy.asarray([2**n - 1 for n in range(2, 9)])
+    flops = []
+    for lo, hi in zip(degrees - 1, degrees):
+        flops.append(flop_count(mesh, element(int(lo)), element(int(hi))))
+    flops = numpy.asarray(flops)
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees))
+    assert (rates < (mesh.topological_dimension()+1)).all()
+
+
+def test_sum_factorisation_scalar_tensor(mesh, element):
+    # Interpolation into tensor elements should cost value_shape
+    # more than the equivalent scalar element.
+    degree = 2**7 - 1
+    source = element(degree - 1)
+    target = element(degree)
+    tensor_flops = flop_count(mesh, source, target)
+    expect = FunctionSpace(mesh, target).value_size
+    if isinstance(target, FiniteElement):
+        scalar_flops = tensor_flops
+    else:
+        target = target.sub_elements[0]
+        source = source.sub_elements[0]
+        scalar_flops = flop_count(mesh, source, target)
+    assert numpy.allclose(tensor_flops / scalar_flops, expect, rtol=1e-2)
diff --git a/tests/tsfc/test_pickle_gem.py b/tests/tsfc/test_pickle_gem.py
new file mode 100644
index 0000000000..beb101f912
--- /dev/null
+++ b/tests/tsfc/test_pickle_gem.py
@@ -0,0 +1,31 @@
+import pickle
+import gem
+import numpy
+import pytest
+
+
+@pytest.mark.parametrize('protocol', range(3))
+def test_pickle_gem(protocol):
+    f = gem.VariableIndex(gem.Indexed(gem.Variable('facet', (2,), dtype=gem.uint_type), (1,)))
+    q = gem.Index()
+    r = gem.Index()
+    _1 = gem.Indexed(gem.Literal(numpy.random.rand(3, 6, 8)), (f, q, r))
+    _2 = gem.Indexed(gem.view(gem.Variable('w', (None, None)), slice(8), slice(1)), (r, 0))
+    expr = gem.ComponentTensor(gem.IndexSum(gem.Product(_1, _2), (r,)), (q,))
+
+    unpickled = pickle.loads(pickle.dumps(expr, protocol))
+    assert repr(expr) == repr(unpickled)
+
+
+@pytest.mark.parametrize('protocol', range(3))
+def test_listtensor(protocol):
+    expr = gem.ListTensor([gem.Variable('x', ()), gem.Zero()])
+
+    unpickled = pickle.loads(pickle.dumps(expr, protocol))
+    assert expr == unpickled
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_refactorise.py b/tests/tsfc/test_refactorise.py
new file mode 100644
index 0000000000..8c8251dfa9
--- /dev/null
+++ b/tests/tsfc/test_refactorise.py
@@ -0,0 +1,66 @@
+from functools import partial
+
+import pytest
+
+import gem
+from gem.node import traversal
+from gem.refactorise import ATOMIC, COMPOUND, OTHER, Monomial, collect_monomials
+
+
+def test_refactorise():
+    f = gem.Variable('f', (3,))
+    u = gem.Variable('u', (3,))
+    v = gem.Variable('v', ())
+
+    i = gem.Index()
+    f_i = gem.Indexed(f, (i,))
+    u_i = gem.Indexed(u, (i,))
+
+    def classify(atomics_set, expression):
+        if expression in atomics_set:
+            return ATOMIC
+
+        for node in traversal([expression]):
+            if node in atomics_set:
+                return COMPOUND
+
+        return OTHER
+    classifier = partial(classify, {u_i, v})
+
+    # \sum_i 5*(2*u_i + -1*v)*(u_i + v*f)
+    expr = gem.IndexSum(
+        gem.Product(
+            gem.Literal(5),
+            gem.Product(
+                gem.Sum(gem.Product(gem.Literal(2), u_i),
+                        gem.Product(gem.Literal(-1), v)),
+                gem.Sum(u_i, gem.Product(v, f_i))
+            )
+        ),
+        (i,)
+    )
+
+    expected = [
+        Monomial((i,),
+                 (u_i, u_i),
+                 gem.Literal(10)),
+        Monomial((i,),
+                 (u_i, v),
+                 gem.Product(gem.Literal(5),
+                             gem.Sum(gem.Product(f_i, gem.Literal(2)),
+                                     gem.Literal(-1)))),
+        Monomial((),
+                 (v, v),
+                 gem.Product(gem.Literal(5),
+                             gem.IndexSum(gem.Product(f_i, gem.Literal(-1)),
+                                          (i,)))),
+    ]
+
+    actual, = collect_monomials([expr], classifier)
+    assert expected == list(actual)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_simplification.py b/tests/tsfc/test_simplification.py
new file mode 100644
index 0000000000..e5fe3d66e5
--- /dev/null
+++ b/tests/tsfc/test_simplification.py
@@ -0,0 +1,31 @@
+import pytest
+
+from gem.gem import Variable, Zero, Conditional, \
+    LogicalAnd, Index, Indexed, Product
+
+
+def test_conditional_simplification():
+    a = Variable("A", ())
+    b = Variable("B", ())
+
+    expr = Conditional(LogicalAnd(b, a), a, a)
+
+    assert expr == a
+
+
+def test_conditional_zero_folding():
+    b = Variable("B", ())
+    a = Variable("A", (3, ))
+    i = Index()
+    expr = Conditional(LogicalAnd(b, b),
+                       Product(Indexed(a, (i, )),
+                               Zero()),
+                       Zero())
+
+    assert expr == Zero()
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_sum_factorisation.py b/tests/tsfc/test_sum_factorisation.py
new file mode 100644
index 0000000000..3e785c5b26
--- /dev/null
+++ b/tests/tsfc/test_sum_factorisation.py
@@ -0,0 +1,174 @@
+import numpy
+import pytest
+
+from ufl import (Mesh, FunctionSpace, TestFunction, TrialFunction,
+                 TensorProductCell, dx, action, interval, triangle,
+                 quadrilateral, curl, dot, div, grad)
+from finat.ufl import (FiniteElement, VectorElement, EnrichedElement,
+                       TensorProductElement, HCurlElement, HDivElement)
+
+from tsfc import compile_form
+
+
+def helmholtz(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    V = FunctionSpace(m, FiniteElement('CG', cell, degree))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return (u*v + dot(grad(u), grad(v)))*dx
+
+
+def split_mixed_poisson(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    if cell.cellname() in ['interval * interval', 'quadrilateral']:
+        hdiv_element = FiniteElement('RTCF', cell, degree)
+    elif cell.cellname() == 'triangle * interval':
+        U0 = FiniteElement('RT', triangle, degree)
+        U1 = FiniteElement('DG', triangle, degree - 1)
+        V0 = FiniteElement('CG', interval, degree)
+        V1 = FiniteElement('DG', interval, degree - 1)
+        Wa = HDivElement(TensorProductElement(U0, V1))
+        Wb = HDivElement(TensorProductElement(U1, V0))
+        hdiv_element = EnrichedElement(Wa, Wb)
+    elif cell.cellname() == 'quadrilateral * interval':
+        hdiv_element = FiniteElement('NCF', cell, degree)
+    RT = FunctionSpace(m, hdiv_element)
+    DG = FunctionSpace(m, FiniteElement('DQ', cell, degree - 1))
+    sigma = TrialFunction(RT)
+    u = TrialFunction(DG)
+    tau = TestFunction(RT)
+    v = TestFunction(DG)
+    return [dot(sigma, tau) * dx, div(tau) * u * dx, div(sigma) * v * dx]
+
+
+def split_vector_laplace(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    if cell.cellname() in ['interval * interval', 'quadrilateral']:
+        hcurl_element = FiniteElement('RTCE', cell, degree)
+    elif cell.cellname() == 'triangle * interval':
+        U0 = FiniteElement('RT', triangle, degree)
+        U1 = FiniteElement('CG', triangle, degree)
+        V0 = FiniteElement('CG', interval, degree)
+        V1 = FiniteElement('DG', interval, degree - 1)
+        Wa = HCurlElement(TensorProductElement(U0, V0))
+        Wb = HCurlElement(TensorProductElement(U1, V1))
+        hcurl_element = EnrichedElement(Wa, Wb)
+    elif cell.cellname() == 'quadrilateral * interval':
+        hcurl_element = FiniteElement('NCE', cell, degree)
+    RT = FunctionSpace(m, hcurl_element)
+    CG = FunctionSpace(m, FiniteElement('Q', cell, degree))
+    sigma = TrialFunction(CG)
+    u = TrialFunction(RT)
+    tau = TestFunction(CG)
+    v = TestFunction(RT)
+    return [dot(u, grad(tau))*dx, dot(grad(sigma), v)*dx, dot(curl(u), curl(v))*dx]
+
+
+def count_flops(form):
+    kernel, = compile_form(form, parameters=dict(mode='spectral'))
+    flops = kernel.flop_count
+    return flops
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 5),
+                          (TensorProductCell(interval, interval), 5),
+                          (TensorProductCell(triangle, interval), 7),
+                          (TensorProductCell(quadrilateral, interval), 7)])
+def test_lhs(cell, order):
+    degrees = list(range(3, 8))
+    if cell == TensorProductCell(triangle, interval):
+        degrees = list(range(3, 6))
+    flops = [count_flops(helmholtz(cell, degree))
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 3),
+                          (TensorProductCell(interval, interval), 3),
+                          (TensorProductCell(triangle, interval), 5),
+                          (TensorProductCell(quadrilateral, interval), 4)])
+def test_rhs(cell, order):
+    degrees = list(range(3, 8))
+    if cell == TensorProductCell(triangle, interval):
+        degrees = list(range(3, 6))
+    flops = [count_flops(action(helmholtz(cell, degree)))
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 5),
+                          (TensorProductCell(interval, interval), 5),
+                          (TensorProductCell(triangle, interval), 7),
+                          (TensorProductCell(quadrilateral, interval), 7)
+                          ])
+def test_mixed_poisson(cell, order):
+    degrees = numpy.arange(3, 8)
+    if cell == TensorProductCell(triangle, interval):
+        degrees = numpy.arange(3, 6)
+    flops = [[count_flops(form)
+              for form in split_mixed_poisson(cell, int(degree))]
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops).T) / numpy.diff(numpy.log(degrees))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 3),
+                          (TensorProductCell(interval, interval), 3),
+                          (TensorProductCell(triangle, interval), 5),
+                          (TensorProductCell(quadrilateral, interval), 4)
+                          ])
+def test_mixed_poisson_action(cell, order):
+    degrees = numpy.arange(3, 8)
+    if cell == TensorProductCell(triangle, interval):
+        degrees = numpy.arange(3, 6)
+    flops = [[count_flops(action(form))
+              for form in split_mixed_poisson(cell, int(degree))]
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops).T) / numpy.diff(numpy.log(degrees))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 5),
+                          (TensorProductCell(interval, interval), 5),
+                          (TensorProductCell(triangle, interval), 7),
+                          (TensorProductCell(quadrilateral, interval), 7)
+                          ])
+def test_vector_laplace(cell, order):
+    degrees = numpy.arange(3, 8)
+    if cell == TensorProductCell(triangle, interval):
+        degrees = numpy.arange(3, 6)
+    flops = [[count_flops(form)
+              for form in split_vector_laplace(cell, int(degree))]
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops).T) / numpy.diff(numpy.log(degrees))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 3),
+                          (TensorProductCell(interval, interval), 3),
+                          (TensorProductCell(triangle, interval), 5),
+                          (TensorProductCell(quadrilateral, interval), 4)
+                          ])
+def test_vector_laplace_action(cell, order):
+    degrees = numpy.arange(3, 8)
+    if cell == TensorProductCell(triangle, interval):
+        degrees = numpy.arange(3, 6)
+    flops = [[count_flops(action(form))
+              for form in split_vector_laplace(cell, int(degree))]
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops).T) / numpy.diff(numpy.log(degrees))
+    assert (rates < order).all()
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_syntax_sugar.py b/tests/tsfc/test_syntax_sugar.py
new file mode 100644
index 0000000000..56bbc4f4ac
--- /dev/null
+++ b/tests/tsfc/test_syntax_sugar.py
@@ -0,0 +1,41 @@
+import pytest
+import gem
+
+
+def test_expressions():
+    x = gem.Variable("x", (3, 4))
+    y = gem.Variable("y", (4, ))
+    i, j = gem.indices(2)
+
+    xij = x[i, j]
+    yj = y[j]
+
+    assert xij == gem.Indexed(x, (i, j))
+    assert yj == gem.Indexed(y, (j, ))
+
+    assert xij + yj == gem.Sum(xij, yj)
+    assert xij * yj == gem.Product(xij, yj)
+    assert xij - yj == gem.Sum(xij, gem.Product(gem.Literal(-1), yj))
+    assert xij / yj == gem.Division(xij, yj)
+
+    assert xij + 1 == gem.Sum(xij, gem.Literal(1))
+    assert 1 + xij == gem.Sum(gem.Literal(1), xij)
+
+    assert (xij + y).shape == (4, )
+
+    assert (x @ y).shape == (3, )
+
+    assert x.T.shape == (4, 3)
+
+    with pytest.raises(ValueError):
+        xij.T @ y
+
+    with pytest.raises(ValueError):
+        xij + "foo"
+
+
+def test_as_gem():
+    with pytest.raises(ValueError):
+        gem.as_gem([1, 2])
+
+    assert gem.as_gem(1) == gem.Literal(1)
diff --git a/tests/tsfc/test_tensor.py b/tests/tsfc/test_tensor.py
new file mode 100644
index 0000000000..9d09a467fb
--- /dev/null
+++ b/tests/tsfc/test_tensor.py
@@ -0,0 +1,116 @@
+import numpy
+import pytest
+
+from ufl import (Mesh, FunctionSpace,
+                 Coefficient, TestFunction, TrialFunction, dx, div,
+                 inner, interval, triangle, tetrahedron, dot, grad)
+from finat.ufl import FiniteElement, VectorElement
+
+from tsfc import compile_form
+
+
+def mass(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    V = FunctionSpace(m, FiniteElement('CG', cell, degree))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return u*v*dx
+
+
+def poisson(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    V = FunctionSpace(m, FiniteElement('CG', cell, degree))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return dot(grad(u), grad(v))*dx
+
+
+def helmholtz(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    V = FunctionSpace(m, FiniteElement('CG', cell, degree))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return (u*v + dot(grad(u), grad(v)))*dx
+
+
+def elasticity(cell, degree):
+    m = Mesh(VectorElement('CG', cell, 1))
+    V = FunctionSpace(m, VectorElement('CG', cell, degree))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+
+    def eps(u):
+        return 0.5*(grad(u) + grad(u).T)
+    return inner(eps(u), eps(v))*dx
+
+
+def count_flops(form):
+    kernel, = compile_form(form, parameters=dict(mode='tensor'))
+    return kernel.flop_count
+
+
+@pytest.mark.parametrize('form', [mass, poisson, helmholtz, elasticity])
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(interval, 2),
+                          (triangle, 4),
+                          (tetrahedron, 6)])
+def test_bilinear(form, cell, order):
+    degrees = numpy.arange(1, 9 - 2 * cell.topological_dimension())
+    flops = [count_flops(form(cell, int(degree)))
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees + 1))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(interval, 1),
+                          (triangle, 2),
+                          (tetrahedron, 3)])
+def test_linear(cell, order):
+    def form(cell, degree):
+        m = Mesh(VectorElement('CG', cell, 1))
+        V = FunctionSpace(m, FiniteElement('CG', cell, degree))
+        v = TestFunction(V)
+        return v*dx
+
+    degrees = numpy.arange(2, 9 - 1.5 * cell.topological_dimension())
+    flops = [count_flops(form(cell, int(degree)))
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees + 1))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(interval, 1),
+                          (triangle, 2),
+                          (tetrahedron, 3)])
+def test_functional(cell, order):
+    def form(cell, degree):
+        m = Mesh(VectorElement('CG', cell, 1))
+        V = FunctionSpace(m, VectorElement('CG', cell, degree))
+        f = Coefficient(V)
+        return div(f)*dx
+
+    dim = cell.topological_dimension()
+    degrees = numpy.arange(2, 8 - dim) + (3 - dim)
+    flops = [count_flops(form(cell, int(degree)))
+             for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees + 1))
+    assert (rates < order).all()
+
+
+def test_mini():
+    m = Mesh(VectorElement('CG', triangle, 1))
+    P1 = FiniteElement('Lagrange', triangle, 1)
+    B = FiniteElement("Bubble", triangle, 3)
+    V = FunctionSpace(m, VectorElement(P1 + B))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    a = inner(grad(u), grad(v))*dx
+    count_flops(a)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_tsfc_182.py b/tests/tsfc/test_tsfc_182.py
new file mode 100644
index 0000000000..556a6bafb0
--- /dev/null
+++ b/tests/tsfc/test_tsfc_182.py
@@ -0,0 +1,35 @@
+import pytest
+
+from ufl import Coefficient, TestFunction, dx, inner, tetrahedron, Mesh, FunctionSpace
+from finat.ufl import FiniteElement, MixedElement, VectorElement
+
+from tsfc import compile_form
+
+
+@pytest.mark.parametrize('mode', ['vanilla', 'coffee', 'spectral'])
+def test_delta_elimination(mode):
+    # Code sample courtesy of Marco Morandini:
+    # https://github.com/firedrakeproject/tsfc/issues/182
+    scheme = "default"
+    degree = 3
+
+    element_lambda = FiniteElement("Quadrature", tetrahedron, degree,
+                                   quad_scheme=scheme)
+    element_eps_p = VectorElement("Quadrature", tetrahedron, degree,
+                                  dim=6, quad_scheme=scheme)
+
+    element_chi_lambda = MixedElement(element_eps_p, element_lambda)
+    domain = Mesh(VectorElement("Lagrange", tetrahedron, 1))
+    space = FunctionSpace(domain, element_chi_lambda)
+
+    chi_lambda = Coefficient(space)
+    delta_chi_lambda = TestFunction(space)
+
+    L = inner(delta_chi_lambda, chi_lambda) * dx(degree=degree, scheme=scheme)
+    kernel, = compile_form(L, parameters={'mode': mode})
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tests/tsfc/test_tsfc_204.py b/tests/tsfc/test_tsfc_204.py
new file mode 100644
index 0000000000..89f1481590
--- /dev/null
+++ b/tests/tsfc/test_tsfc_204.py
@@ -0,0 +1,34 @@
+from tsfc import compile_form
+from ufl import (Coefficient, FacetNormal,
+                 FunctionSpace, Mesh, as_matrix,
+                 dot, dS, ds, dx, facet, grad, inner, outer, split, triangle)
+from finat.ufl import BrokenElement, FiniteElement, MixedElement, VectorElement
+
+
+def test_physically_mapped_facet():
+    mesh = Mesh(VectorElement("P", triangle, 1))
+
+    # set up variational problem
+    U = FiniteElement("Morley", mesh.ufl_cell(), 2)
+    V = FiniteElement("P", mesh.ufl_cell(), 1)
+    R = FiniteElement("P", mesh.ufl_cell(), 1)
+    Vv = VectorElement(BrokenElement(V))
+    Qhat = VectorElement(BrokenElement(V[facet]), dim=2)
+    Vhat = VectorElement(V[facet], dim=2)
+    Z = FunctionSpace(mesh, MixedElement(U, Vv, Qhat, Vhat, R))
+
+    z = Coefficient(Z)
+    u, d, qhat, dhat, lam = split(z)
+
+    s = FacetNormal(mesh)
+    trans = as_matrix([[1, 0], [0, 1]])
+    mat = trans*grad(grad(u))*trans + outer(d, d) * u
+    J = (u**2*dx
+         + u**3*dx
+         + u**4*dx
+         + inner(mat, mat)*dx
+         + inner(grad(d), grad(d))*dx
+         + dot(s, d)**2*ds)
+    L_match = inner(qhat, dhat - d)
+    L = J + inner(lam, inner(d, d)-1)*dx + (L_match('+') + L_match('-'))*dS + L_match*ds
+    compile_form(L)
diff --git a/tests/tsfc/test_tsfc_274.py b/tests/tsfc/test_tsfc_274.py
new file mode 100644
index 0000000000..453d8746e8
--- /dev/null
+++ b/tests/tsfc/test_tsfc_274.py
@@ -0,0 +1,41 @@
+import gem
+import numpy
+from finat.point_set import PointSet
+from gem.interpreter import evaluate
+from tsfc.finatinterface import create_element
+from ufl import quadrilateral
+from finat.ufl import FiniteElement, RestrictedElement
+
+
+def test_issue_274():
+    # See https://github.com/firedrakeproject/tsfc/issues/274
+    ufl_element = RestrictedElement(
+        FiniteElement("Q", quadrilateral, 2), restriction_domain="facet"
+    )
+    ps = PointSet([[0.5]])
+    finat_element = create_element(ufl_element)
+    evaluations = []
+    for eid in range(4):
+        (val,) = finat_element.basis_evaluation(0, ps, (1, eid)).values()
+        evaluations.append(val)
+
+    i = gem.Index()
+    j = gem.Index()
+    (expr,) = evaluate(
+        [
+            gem.ComponentTensor(
+                gem.Indexed(gem.select_expression(evaluations, i), (j,)),
+                (*ps.indices, i, j),
+            )
+        ]
+    )
+
+    (expect,) = evaluate(
+        [
+            gem.ComponentTensor(
+                gem.Indexed(gem.ListTensor(evaluations), (i, j)), (*ps.indices, i, j)
+            )
+        ]
+    )
+
+    assert numpy.allclose(expr.arr, expect.arr)
diff --git a/tests/tsfc/test_underintegration.py b/tests/tsfc/test_underintegration.py
new file mode 100644
index 0000000000..24221e05a7
--- /dev/null
+++ b/tests/tsfc/test_underintegration.py
@@ -0,0 +1,106 @@
+from functools import reduce
+
+import numpy
+import pytest
+
+from ufl import (Mesh, FunctionSpace, TestFunction, TrialFunction, TensorProductCell, dx,
+                 action, interval, quadrilateral, dot, grad)
+from finat.ufl import FiniteElement, VectorElement
+
+from FIAT import ufc_cell
+from FIAT.quadrature import GaussLobattoLegendreQuadratureLineRule, GaussLegendreQuadratureLineRule
+
+from finat.point_set import GaussLobattoLegendrePointSet, GaussLegendrePointSet
+from finat.quadrature import QuadratureRule, TensorProductQuadratureRule
+
+from tsfc import compile_form
+
+
+def gll_quadrature_rule(cell, elem_deg):
+    fiat_cell = ufc_cell("interval")
+    fiat_rule = GaussLobattoLegendreQuadratureLineRule(fiat_cell, elem_deg + 1)
+    line_rules = [QuadratureRule(GaussLobattoLegendrePointSet(fiat_rule.get_points()),
+                                 fiat_rule.get_weights())
+                  for _ in range(cell.topological_dimension())]
+    finat_rule = reduce(lambda a, b: TensorProductQuadratureRule([a, b]), line_rules)
+    return finat_rule
+
+
+def gl_quadrature_rule(cell, elem_deg):
+    fiat_cell = ufc_cell("interval")
+    fiat_rule = GaussLegendreQuadratureLineRule(fiat_cell, elem_deg + 1)
+    line_rules = [QuadratureRule(GaussLegendrePointSet(fiat_rule.get_points()),
+                                 fiat_rule.get_weights())
+                  for _ in range(cell.topological_dimension())]
+    finat_rule = reduce(lambda a, b: TensorProductQuadratureRule([a, b]), line_rules)
+    return finat_rule
+
+
+def mass_cg(cell, degree):
+    m = Mesh(VectorElement('Q', cell, 1))
+    V = FunctionSpace(m, FiniteElement('Q', cell, degree, variant='spectral'))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return u*v*dx(scheme=gll_quadrature_rule(cell, degree))
+
+
+def mass_dg(cell, degree):
+    m = Mesh(VectorElement('Q', cell, 1))
+    V = FunctionSpace(m, FiniteElement('DQ', cell, degree, variant='spectral'))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return u*v*dx(scheme=gl_quadrature_rule(cell, degree))
+
+
+def laplace(cell, degree):
+    m = Mesh(VectorElement('Q', cell, 1))
+    V = FunctionSpace(m, FiniteElement('Q', cell, degree, variant='spectral'))
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    return dot(grad(u), grad(v))*dx(scheme=gll_quadrature_rule(cell, degree))
+
+
+def count_flops(form):
+    kernel, = compile_form(form, parameters=dict(mode='spectral'))
+    return kernel.flop_count
+
+
+@pytest.mark.parametrize('form', [mass_cg, mass_dg])
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 2),
+                          (TensorProductCell(interval, interval), 2),
+                          (TensorProductCell(quadrilateral, interval), 3)])
+def test_mass(form, cell, order):
+    degrees = numpy.arange(4, 10)
+    flops = [count_flops(form(cell, int(degree))) for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees + 1))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize('form', [mass_cg, mass_dg])
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 2),
+                          (TensorProductCell(interval, interval), 2),
+                          (TensorProductCell(quadrilateral, interval), 3)])
+def test_mass_action(form, cell, order):
+    degrees = numpy.arange(4, 10)
+    flops = [count_flops(action(form(cell, int(degree)))) for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees + 1))
+    assert (rates < order).all()
+
+
+@pytest.mark.parametrize(('cell', 'order'),
+                         [(quadrilateral, 4),
+                          (TensorProductCell(interval, interval), 4),
+                          (TensorProductCell(quadrilateral, interval), 5)])
+def test_laplace(cell, order):
+    degrees = numpy.arange(4, 10)
+    flops = [count_flops(laplace(cell, int(degree))) for degree in degrees]
+    rates = numpy.diff(numpy.log(flops)) / numpy.diff(numpy.log(degrees + 1))
+    assert (rates < order).all()
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    pytest.main(args=[os.path.abspath(__file__)] + sys.argv[1:])
diff --git a/tsfc/__init__.py b/tsfc/__init__.py
new file mode 100644
index 0000000000..f9075c71a6
--- /dev/null
+++ b/tsfc/__init__.py
@@ -0,0 +1,22 @@
+from tsfc.driver import compile_form, compile_expression_dual_evaluation  # noqa: F401
+from tsfc.parameters import default_parameters  # noqa: F401
+
+try:
+    from firedrake_citations import Citations
+    Citations().add("Kirby2006", """
+@Article{Kirby2006,
+  author =       {Kirby, Robert C. and Logg, Anders},
+  title =        {A Compiler for Variational Forms},
+  journal =      {ACM Trans. Math. Softw.},
+  year =         2006,
+  volume =       32,
+  number =       3,
+  pages =        {417--444},
+  month =        sep,
+  numpages =     28,
+  doi =          {10.1145/1163641.1163644},
+  acmid =        1163644,
+}""")
+    del Citations
+except ImportError:
+    pass
diff --git a/tsfc/coffee_mode.py b/tsfc/coffee_mode.py
new file mode 100644
index 0000000000..632b915b41
--- /dev/null
+++ b/tsfc/coffee_mode.py
@@ -0,0 +1,81 @@
+from functools import partial, reduce
+
+from gem.node import traversal, Memoizer
+from gem.gem import Failure, Sum, index_sum
+from gem.optimise import replace_division, unroll_indexsum
+from gem.refactorise import collect_monomials
+from gem.unconcatenate import unconcatenate
+from gem.coffee import optimise_monomial_sum
+from gem.utils import groupby
+
+import tsfc.spectral as spectral
+
+
+def Integrals(expressions, quadrature_multiindex, argument_multiindices, parameters):
+    """Constructs an integral representation for each GEM integrand
+    expression.
+
+    :arg expressions: integrand multiplied with quadrature weight;
+                      multi-root GEM expression DAG
+    :arg quadrature_multiindex: quadrature multiindex (tuple)
+    :arg argument_multiindices: tuple of argument multiindices,
+                                one multiindex for each argument
+    :arg parameters: parameters dictionary
+
+    :returns: list of integral representations
+    """
+    # Unroll
+    max_extent = parameters["unroll_indexsum"]
+    if max_extent:
+        def predicate(index):
+            return index.extent <= max_extent
+        expressions = unroll_indexsum(expressions, predicate=predicate)
+    # Integral representation: just a GEM expression
+    return replace_division([index_sum(e, quadrature_multiindex) for e in expressions])
+
+
+def flatten(var_reps, index_cache):
+    """Flatten mode-specific intermediate representation to a series of
+    assignments.
+
+    :arg var_reps: series of (return variable, [integral representation]) pairs
+    :arg index_cache: cache :py:class:`dict` for :py:func:`unconcatenate`
+
+    :returns: series of (return variable, GEM expression root) pairs
+    """
+    assignments = unconcatenate([(variable, reduce(Sum, reps))
+                                 for variable, reps in var_reps],
+                                cache=index_cache)
+
+    def group_key(assignment):
+        variable, expression = assignment
+        return variable.free_indices
+
+    for argument_indices, assignment_group in groupby(assignments, group_key):
+        variables, expressions = zip(*assignment_group)
+        expressions = optimise_expressions(expressions, argument_indices)
+        for var, expr in zip(variables, expressions):
+            yield (var, expr)
+
+
+finalise_options = dict(remove_componenttensors=False)
+
+
+def optimise_expressions(expressions, argument_indices):
+    """Perform loop optimisations on GEM DAGs
+
+    :arg expressions: list of GEM DAGs
+    :arg argument_indices: tuple of argument indices
+
+    :returns: list of optimised GEM DAGs
+    """
+    # Skip optimisation for if Failure node is present
+    for n in traversal(expressions):
+        if isinstance(n, Failure):
+            return expressions
+
+    # Apply argument factorisation unconditionally
+    classifier = partial(spectral.classify, set(argument_indices),
+                         delta_inside=Memoizer(spectral._delta_inside))
+    monomial_sums = collect_monomials(expressions, classifier)
+    return [optimise_monomial_sum(ms, argument_indices) for ms in monomial_sums]
diff --git a/tsfc/driver.py b/tsfc/driver.py
new file mode 100644
index 0000000000..6e3c3baaf3
--- /dev/null
+++ b/tsfc/driver.py
@@ -0,0 +1,331 @@
+import collections
+import time
+import sys
+from itertools import chain
+from finat.physically_mapped import DirectlyDefinedElement, PhysicallyMappedElement
+
+import ufl
+from ufl.algorithms import extract_arguments, extract_coefficients
+from ufl.algorithms.analysis import has_type
+from ufl.classes import Form, GeometricQuantity
+from ufl.domain import extract_unique_domain
+
+import gem
+import gem.impero_utils as impero_utils
+
+import finat
+
+from tsfc import fem, ufl_utils
+from tsfc.logging import logger
+from tsfc.parameters import default_parameters, is_complex
+from tsfc.ufl_utils import apply_mapping, extract_firedrake_constants
+import tsfc.kernel_interface.firedrake_loopy as firedrake_interface_loopy
+
+# To handle big forms. The various transformations might need a deeper stack
+sys.setrecursionlimit(3000)
+
+
+TSFCIntegralDataInfo = collections.namedtuple("TSFCIntegralDataInfo",
+                                              ["domain", "integral_type", "subdomain_id", "domain_number",
+                                               "arguments",
+                                               "coefficients", "coefficient_numbers"])
+TSFCIntegralDataInfo.__doc__ = """
+    Minimal set of objects for kernel builders.
+
+    domain - The mesh.
+    integral_type - The type of integral.
+    subdomain_id - What is the subdomain id for this kernel.
+    domain_number - Which domain number in the original form
+        does this kernel correspond to (can be used to index into
+        original_form.ufl_domains() to get the correct domain).
+    coefficients - A list of coefficients.
+    coefficient_numbers - A list of which coefficients from the
+        form the kernel needs.
+
+    This is a minimal set of objects that kernel builders need to
+    construct a kernel from :attr:`integrals` of :class:`~ufl.IntegralData`.
+    """
+
+
+def compile_form(form, prefix="form", parameters=None, interface=None, diagonal=False, log=False):
+    """Compiles a UFL form into a set of assembly kernels.
+
+    :arg form: UFL form
+    :arg prefix: kernel name will start with this string
+    :arg parameters: parameters object
+    :arg diagonal: Are we building a kernel for the diagonal of a rank-2 element tensor?
+    :arg log: bool if the Kernel should be profiled with Log events
+    :returns: list of kernels
+    """
+    cpu_time = time.time()
+
+    assert isinstance(form, Form)
+
+    GREEN = "\033[1;37;32m%s\033[0m"
+
+    # Determine whether in complex mode:
+    complex_mode = parameters and is_complex(parameters.get("scalar_type"))
+    fd = ufl_utils.compute_form_data(form, complex_mode=complex_mode)
+    logger.info(GREEN % "compute_form_data finished in %g seconds.", time.time() - cpu_time)
+
+    kernels = []
+    for integral_data in fd.integral_data:
+        start = time.time()
+        kernel = compile_integral(integral_data, fd, prefix, parameters, interface=interface, diagonal=diagonal, log=log)
+        if kernel is not None:
+            kernels.append(kernel)
+        logger.info(GREEN % "compile_integral finished in %g seconds.", time.time() - start)
+
+    logger.info(GREEN % "TSFC finished in %g seconds.", time.time() - cpu_time)
+    return kernels
+
+
+def compile_integral(integral_data, form_data, prefix, parameters, interface, *, diagonal=False, log=False):
+    """Compiles a UFL integral into an assembly kernel.
+
+    :arg integral_data: UFL integral data
+    :arg form_data: UFL form data
+    :arg prefix: kernel name will start with this string
+    :arg parameters: parameters object
+    :arg interface: backend module for the kernel interface
+    :arg diagonal: Are we building a kernel for the diagonal of a rank-2 element tensor?
+    :arg log: bool if the Kernel should be profiled with Log events
+    :returns: a kernel constructed by the kernel interface
+    """
+    parameters = preprocess_parameters(parameters)
+    if interface is None:
+        interface = firedrake_interface_loopy.KernelBuilder
+    scalar_type = parameters["scalar_type"]
+    integral_type = integral_data.integral_type
+    if integral_type.startswith("interior_facet") and diagonal:
+        raise NotImplementedError("Sorry, we can't assemble the diagonal of a form for interior facet integrals")
+    mesh = integral_data.domain
+    arguments = form_data.preprocessed_form.arguments()
+    kernel_name = f"{prefix}_{integral_type}_integral"
+    # Dict mapping domains to index in original_form.ufl_domains()
+    domain_numbering = form_data.original_form.domain_numbering()
+    domain_number = domain_numbering[integral_data.domain]
+    coefficients = [form_data.function_replace_map[c] for c in integral_data.integral_coefficients]
+    # This is which coefficient in the original form the
+    # current coefficient is.
+    # Consider f*v*dx + g*v*ds, the full form contains two
+    # coefficients, but each integral only requires one.
+    coefficient_numbers = tuple(form_data.original_coefficient_positions[i]
+                                for i, (_, enabled) in enumerate(zip(form_data.reduced_coefficients, integral_data.enabled_coefficients))
+                                if enabled)
+    integral_data_info = TSFCIntegralDataInfo(domain=integral_data.domain,
+                                              integral_type=integral_data.integral_type,
+                                              subdomain_id=integral_data.subdomain_id,
+                                              domain_number=domain_number,
+                                              arguments=arguments,
+                                              coefficients=coefficients,
+                                              coefficient_numbers=coefficient_numbers)
+    builder = interface(integral_data_info,
+                        scalar_type,
+                        diagonal=diagonal)
+    builder.set_coordinates(mesh)
+    builder.set_cell_sizes(mesh)
+    builder.set_coefficients(integral_data, form_data)
+    # TODO: We do not want pass constants to kernels that do not need them
+    # so we should attach the constants to integral data instead
+    builder.set_constants(form_data.constants)
+    ctx = builder.create_context()
+    for integral in integral_data.integrals:
+        params = parameters.copy()
+        params.update(integral.metadata())  # integral metadata overrides
+        integrand = ufl.replace(integral.integrand(), form_data.function_replace_map)
+        integrand_exprs = builder.compile_integrand(integrand, params, ctx)
+        integral_exprs = builder.construct_integrals(integrand_exprs, params)
+        builder.stash_integrals(integral_exprs, params, ctx)
+    return builder.construct_kernel(kernel_name, ctx, log)
+
+
+def preprocess_parameters(parameters):
+    if parameters is None:
+        parameters = default_parameters()
+    else:
+        _ = default_parameters()
+        _.update(parameters)
+        parameters = _
+    # Remove these here, they're handled later on.
+    if parameters.get("quadrature_degree") in ["auto", "default", None, -1, "-1"]:
+        del parameters["quadrature_degree"]
+    if parameters.get("quadrature_rule") in ["auto", "default", None]:
+        del parameters["quadrature_rule"]
+    return parameters
+
+
+def compile_expression_dual_evaluation(expression, to_element, ufl_element, *,
+                                       domain=None, interface=None,
+                                       parameters=None, log=False):
+    """Compile a UFL expression to be evaluated against a compile-time known reference element's dual basis.
+
+    Useful for interpolating UFL expressions into e.g. N1curl spaces.
+
+    :arg expression: UFL expression
+    :arg to_element: A FInAT element for the target space
+    :arg ufl_element: The UFL element of the target space.
+    :arg domain: optional UFL domain the expression is defined on (required when expression contains no domain).
+    :arg interface: backend module for the kernel interface
+    :arg parameters: parameters object
+    :arg log: bool if the Kernel should be profiled with Log events
+    :returns: Loopy-based ExpressionKernel object.
+    """
+    if parameters is None:
+        parameters = default_parameters()
+    else:
+        _ = default_parameters()
+        _.update(parameters)
+        parameters = _
+
+    # Determine whether in complex mode
+    complex_mode = is_complex(parameters["scalar_type"])
+
+    if isinstance(to_element, (PhysicallyMappedElement, DirectlyDefinedElement)):
+        raise NotImplementedError("Don't know how to interpolate onto zany spaces, sorry")
+
+    orig_expression = expression
+
+    # Map into reference space
+    expression = apply_mapping(expression, ufl_element, domain)
+
+    # Apply UFL preprocessing
+    expression = ufl_utils.preprocess_expression(expression,
+                                                 complex_mode=complex_mode)
+
+    # Initialise kernel builder
+    if interface is None:
+        # Delayed import, loopy is a runtime dependency
+        from tsfc.kernel_interface.firedrake_loopy import ExpressionKernelBuilder as interface
+
+    builder = interface(parameters["scalar_type"])
+    arguments = extract_arguments(expression)
+    argument_multiindices = tuple(builder.create_element(arg.ufl_element()).get_indices()
+                                  for arg in arguments)
+
+    # Replace coordinates (if any) unless otherwise specified by kwarg
+    if domain is None:
+        domain = extract_unique_domain(expression)
+    assert domain is not None
+
+    # Collect required coefficients and determine numbering
+    coefficients = extract_coefficients(expression)
+    orig_coefficients = extract_coefficients(orig_expression)
+    coefficient_numbers = tuple(orig_coefficients.index(c) for c in coefficients)
+    builder.set_coefficient_numbers(coefficient_numbers)
+
+    needs_external_coords = False
+    if has_type(expression, GeometricQuantity) or any(fem.needs_coordinate_mapping(c.ufl_element()) for c in coefficients):
+        # Create a fake coordinate coefficient for a domain.
+        coords_coefficient = ufl.Coefficient(ufl.FunctionSpace(domain, domain.ufl_coordinate_element()))
+        builder.domain_coordinate[domain] = coords_coefficient
+        builder.set_cell_sizes(domain)
+        coefficients = [coords_coefficient] + coefficients
+        needs_external_coords = True
+    builder.set_coefficients(coefficients)
+
+    constants = extract_firedrake_constants(expression)
+    builder.set_constants(constants)
+
+    # Split mixed coefficients
+    expression = ufl_utils.split_coefficients(expression, builder.coefficient_split)
+
+    # Set up kernel config for translation of UFL expression to gem
+    kernel_cfg = dict(interface=builder,
+                      ufl_cell=domain.ufl_cell(),
+                      # FIXME: change if we ever implement
+                      # interpolation on facets.
+                      integral_type="cell",
+                      argument_multiindices=argument_multiindices,
+                      index_cache={},
+                      scalar_type=parameters["scalar_type"])
+
+    # Allow interpolation onto QuadratureElements to refer to the quadrature
+    # rule they represent
+    if isinstance(to_element, finat.QuadratureElement):
+        kernel_cfg["quadrature_rule"] = to_element._rule
+
+    # Create callable for translation of UFL expression to gem
+    fn = DualEvaluationCallable(expression, kernel_cfg)
+
+    # Get the gem expression for dual evaluation and corresponding basis
+    # indices needed for compilation of the expression
+    evaluation, basis_indices = to_element.dual_evaluation(fn)
+
+    # Build kernel body
+    return_indices = basis_indices + tuple(chain(*argument_multiindices))
+    return_shape = tuple(i.extent for i in return_indices)
+    return_var = gem.Variable('A', return_shape)
+    return_expr = gem.Indexed(return_var, return_indices)
+
+    # TODO: one should apply some GEM optimisations as in assembly,
+    # but we don't for now.
+    evaluation, = impero_utils.preprocess_gem([evaluation])
+    impero_c = impero_utils.compile_gem([(return_expr, evaluation)], return_indices)
+    index_names = dict((idx, "p%d" % i) for (i, idx) in enumerate(basis_indices))
+    # Handle kernel interface requirements
+    builder.register_requirements([evaluation])
+    builder.set_output(return_var)
+    # Build kernel tuple
+    return builder.construct_kernel(impero_c, index_names, needs_external_coords, log=log)
+
+
+class DualEvaluationCallable(object):
+    """
+    Callable representing a function to dual evaluate.
+
+    When called, this takes in a
+    :class:`finat.point_set.AbstractPointSet` and returns a GEM
+    expression for evaluation of the function at those points.
+
+    :param expression: UFL expression for the function to dual evaluate.
+    :param kernel_cfg: A kernel configuration for creation of a
+        :class:`GemPointContext` or a :class:`PointSetContext`
+
+    Not intended for use outside of
+    :func:`compile_expression_dual_evaluation`.
+    """
+    def __init__(self, expression, kernel_cfg):
+        self.expression = expression
+        self.kernel_cfg = kernel_cfg
+
+    def __call__(self, ps):
+        """The function to dual evaluate.
+
+        :param ps: The :class:`finat.point_set.AbstractPointSet` for
+            evaluating at
+        :returns: a gem expression representing the evaluation of the
+            input UFL expression at the given point set ``ps``.
+            For point set points with some shape ``(*value_shape)``
+            (i.e. ``()`` for scalar points ``(x)`` for vector points
+            ``(x, y)`` for tensor points etc) then the gem expression
+            has shape ``(*value_shape)`` and free indices corresponding
+            to the input :class:`finat.point_set.AbstractPointSet`'s
+            free indices alongside any input UFL expression free
+            indices.
+        """
+
+        if not isinstance(ps, finat.point_set.AbstractPointSet):
+            raise ValueError("Callable argument not a point set!")
+
+        # Avoid modifying saved kernel config
+        kernel_cfg = self.kernel_cfg.copy()
+
+        if isinstance(ps, finat.point_set.UnknownPointSet):
+            # Run time known points
+            kernel_cfg.update(point_indices=ps.indices, point_expr=ps.expression)
+            # GemPointContext's aren't allowed to have quadrature rules
+            kernel_cfg.pop("quadrature_rule", None)
+            translation_context = fem.GemPointContext(**kernel_cfg)
+        else:
+            # Compile time known points
+            kernel_cfg.update(point_set=ps)
+            translation_context = fem.PointSetContext(**kernel_cfg)
+
+        gem_expr, = fem.compile_ufl(self.expression, translation_context, point_sum=False)
+        # In some cases ps.indices may be dropped from expr, but nothing
+        # new should now appear
+        argument_multiindices = kernel_cfg["argument_multiindices"]
+        assert set(gem_expr.free_indices) <= set(chain(ps.indices, *argument_multiindices))
+
+        return gem_expr
diff --git a/tsfc/fem.py b/tsfc/fem.py
new file mode 100644
index 0000000000..abc8bc7cb8
--- /dev/null
+++ b/tsfc/fem.py
@@ -0,0 +1,777 @@
+"""Functions to translate UFL finite element objects and reference
+geometric quantities into GEM expressions."""
+
+import collections
+import itertools
+from functools import singledispatch
+
+import gem
+import numpy
+import ufl
+from FIAT.orientation_utils import Orientation as FIATOrientation
+from FIAT.reference_element import UFCHexahedron, UFCSimplex, make_affine_mapping
+from FIAT.reference_element import TensorProductCell
+from finat.physically_mapped import (NeedsCoordinateMappingElement,
+                                     PhysicalGeometry)
+from finat.point_set import PointSet, PointSingleton
+from finat.quadrature import make_quadrature
+from gem.node import traversal
+from gem.optimise import constant_fold_zero, ffc_rounding
+from gem.unconcatenate import unconcatenate
+from gem.utils import cached_property
+from ufl.classes import (Argument, CellCoordinate, CellEdgeVectors,
+                         CellFacetJacobian, CellOrientation, CellOrigin,
+                         CellVertices, CellVolume, Coefficient, FacetArea,
+                         FacetCoordinate, GeometricQuantity, Jacobian,
+                         JacobianDeterminant, NegativeRestricted,
+                         PositiveRestricted, QuadratureWeight,
+                         ReferenceCellEdgeVectors, ReferenceCellVolume,
+                         ReferenceFacetVolume, ReferenceNormal,
+                         SpatialCoordinate)
+from ufl.corealg.map_dag import map_expr_dag, map_expr_dags
+from ufl.corealg.multifunction import MultiFunction
+from ufl.domain import extract_unique_domain
+
+from tsfc import ufl2gem
+from tsfc.finatinterface import as_fiat_cell, create_element
+from tsfc.kernel_interface import ProxyKernelInterface
+from tsfc.modified_terminals import (analyse_modified_terminal,
+                                     construct_modified_terminal)
+from tsfc.parameters import is_complex
+from tsfc.ufl_utils import (ModifiedTerminalMixin, PickRestriction,
+                            TSFCConstantMixin, entity_avg, one_times,
+                            preprocess_expression, simplify_abs)
+
+
+class ContextBase(ProxyKernelInterface):
+    """Common UFL -> GEM translation context."""
+
+    keywords = ('ufl_cell',
+                'fiat_cell',
+                'integral_type',
+                'integration_dim',
+                'entity_ids',
+                'argument_multiindices',
+                'facetarea',
+                'index_cache',
+                'scalar_type')
+
+    def __init__(self, interface, **kwargs):
+        ProxyKernelInterface.__init__(self, interface)
+
+        invalid_keywords = set(kwargs.keys()) - set(self.keywords)
+        if invalid_keywords:
+            raise ValueError("unexpected keyword argument '{0}'".format(invalid_keywords.pop()))
+        self.__dict__.update(kwargs)
+
+    @cached_property
+    def fiat_cell(self):
+        return as_fiat_cell(self.ufl_cell)
+
+    @cached_property
+    def integration_dim(self):
+        return self.fiat_cell.get_dimension()
+
+    entity_ids = [0]
+
+    @cached_property
+    def epsilon(self):
+        return numpy.finfo(self.scalar_type).resolution
+
+    @cached_property
+    def complex_mode(self):
+        return is_complex(self.scalar_type)
+
+    def entity_selector(self, callback, restriction):
+        """Selects code for the correct entity at run-time.  Callback
+        generates code for a specified entity.
+
+        This function passes ``callback`` the entity number.
+
+        :arg callback: A function to be called with an entity number
+                       that generates code for that entity.
+        :arg restriction: Restriction of the modified terminal, used
+                          for entity selection.
+        """
+        if len(self.entity_ids) == 1:
+            return callback(self.entity_ids[0])
+        else:
+            f = self.entity_number(restriction)
+            return gem.select_expression(list(map(callback, self.entity_ids)), f)
+
+    argument_multiindices = ()
+
+    @cached_property
+    def index_cache(self):
+        return {}
+
+    @cached_property
+    def translator(self):
+        # NOTE: reference cycle!
+        return Translator(self)
+
+    @cached_property
+    def use_canonical_quadrature_point_ordering(self):
+        return isinstance(self.fiat_cell, UFCHexahedron) and self.integral_type in ['exterior_facet', 'interior_facet']
+
+
+class CoordinateMapping(PhysicalGeometry):
+    """Callback class that provides physical geometry to FInAT elements.
+
+    Required for elements whose basis transformation requires physical
+    geometry such as Argyris and Hermite.
+
+    :arg mt: The modified terminal whose element will be tabulated.
+    :arg interface: The interface carrying information (generally a
+        :class:`PointSetContext`).
+    """
+    def __init__(self, mt, interface):
+        super().__init__()
+        self.mt = mt
+        self.interface = interface
+
+    def preprocess(self, expr, context):
+        """Preprocess a UFL expression for translation.
+
+        :arg expr: A UFL expression
+        :arg context: The translation context.
+        :returns: A new UFL expression
+        """
+        ifacet = self.interface.integral_type.startswith("interior_facet")
+        return preprocess_expression(expr, complex_mode=context.complex_mode,
+                                     do_apply_restrictions=ifacet)
+
+    @property
+    def config(self):
+        config = {name: getattr(self.interface, name)
+                  for name in ["ufl_cell", "index_cache", "scalar_type"]}
+        config["interface"] = self.interface
+        return config
+
+    def cell_size(self):
+        return self.interface.cell_size(self.mt.restriction)
+
+    def jacobian_at(self, point):
+        ps = PointSingleton(point)
+        expr = Jacobian(extract_unique_domain(self.mt.terminal))
+        assert ps.expression.shape == (extract_unique_domain(expr).topological_dimension(), )
+        if self.mt.restriction == '+':
+            expr = PositiveRestricted(expr)
+        elif self.mt.restriction == '-':
+            expr = NegativeRestricted(expr)
+        config = {"point_set": PointSingleton(point)}
+        config.update(self.config)
+        context = PointSetContext(**config)
+        expr = self.preprocess(expr, context)
+        return map_expr_dag(context.translator, expr)
+
+    def detJ_at(self, point):
+        expr = JacobianDeterminant(extract_unique_domain(self.mt.terminal))
+        if self.mt.restriction == '+':
+            expr = PositiveRestricted(expr)
+        elif self.mt.restriction == '-':
+            expr = NegativeRestricted(expr)
+        config = {"point_set": PointSingleton(point)}
+        config.update(self.config)
+        context = PointSetContext(**config)
+        expr = self.preprocess(expr, context)
+        return map_expr_dag(context.translator, expr)
+
+    def reference_normals(self):
+        cell = self.interface.fiat_cell
+        sd = cell.get_spatial_dimension()
+        num_faces = len(cell.get_topology()[sd-1])
+
+        return gem.Literal(numpy.asarray([cell.compute_normal(i) for i in range(num_faces)]))
+
+    def reference_edge_tangents(self):
+        cell = self.interface.fiat_cell
+        num_edges = len(cell.get_topology()[1])
+        return gem.Literal(numpy.asarray([cell.compute_edge_tangent(i) for i in range(num_edges)]))
+
+    def physical_tangents(self):
+        cell = self.interface.fiat_cell
+        sd = cell.get_spatial_dimension()
+        num_edges = len(cell.get_topology()[1])
+        els = self.physical_edge_lengths()
+        rts = gem.ListTensor([cell.compute_tangents(1, i)[0] / els[i] for i in range(num_edges)])
+        jac = self.jacobian_at(cell.make_points(sd, 0, sd+1)[0])
+
+        return rts @ jac.T
+
+    def physical_normals(self):
+        cell = self.interface.fiat_cell
+        if not (isinstance(cell, UFCSimplex) and cell.get_dimension() == 2):
+            raise NotImplementedError("Can't do physical normals on that cell yet")
+
+        num_edges = len(cell.get_topology()[1])
+        pts = self.physical_tangents()
+        return gem.ListTensor([[pts[i, 1], -1*pts[i, 0]] for i in range(num_edges)])
+
+    def physical_edge_lengths(self):
+        expr = ufl.classes.CellEdgeVectors(extract_unique_domain(self.mt.terminal))
+        if self.mt.restriction == '+':
+            expr = PositiveRestricted(expr)
+        elif self.mt.restriction == '-':
+            expr = NegativeRestricted(expr)
+
+        cell = self.interface.fiat_cell
+        sd = cell.get_spatial_dimension()
+        num_edges = len(cell.get_topology()[1])
+        expr = ufl.as_vector([ufl.sqrt(ufl.dot(expr[i, :], expr[i, :])) for i in range(num_edges)])
+        config = {"point_set": PointSingleton(cell.make_points(sd, 0, sd+1)[0])}
+        config.update(self.config)
+        context = PointSetContext(**config)
+        expr = self.preprocess(expr, context)
+        return map_expr_dag(context.translator, expr)
+
+    def physical_points(self, point_set, entity=None):
+        """Converts point_set from reference to physical space"""
+        expr = SpatialCoordinate(extract_unique_domain(self.mt.terminal))
+        point_shape, = point_set.expression.shape
+        if entity is not None:
+            e, _ = entity
+            assert point_shape == e
+        else:
+            assert point_shape == extract_unique_domain(expr).topological_dimension()
+        if self.mt.restriction == '+':
+            expr = PositiveRestricted(expr)
+        elif self.mt.restriction == '-':
+            expr = NegativeRestricted(expr)
+        config = {"point_set": point_set}
+        config.update(self.config)
+        if entity is not None:
+            config.update({name: getattr(self.interface, name)
+                           for name in ["integration_dim", "entity_ids"]})
+        context = PointSetContext(**config)
+        expr = self.preprocess(expr, context)
+        mapped = map_expr_dag(context.translator, expr)
+        indices = tuple(gem.Index() for _ in mapped.shape)
+        return gem.ComponentTensor(gem.Indexed(mapped, indices), point_set.indices + indices)
+
+    def physical_vertices(self):
+        vs = PointSet(self.interface.fiat_cell.vertices)
+        return self.physical_points(vs)
+
+
+def needs_coordinate_mapping(element):
+    """Does this UFL element require a CoordinateMapping for translation?"""
+    if element.family() == 'Real':
+        return False
+    else:
+        return isinstance(create_element(element), NeedsCoordinateMappingElement)
+
+
+class PointSetContext(ContextBase):
+    """Context for compile-time known evaluation points."""
+
+    keywords = ContextBase.keywords + (
+        'quadrature_degree',
+        'quadrature_rule',
+        'point_set',
+        'weight_expr',
+    )
+
+    @cached_property
+    def integration_cell(self):
+        return self.fiat_cell.construct_subelement(self.integration_dim)
+
+    @cached_property
+    def quadrature_rule(self):
+        return make_quadrature(self.integration_cell, self.quadrature_degree)
+
+    @cached_property
+    def point_set(self):
+        return self.quadrature_rule.point_set
+
+    @cached_property
+    def point_indices(self):
+        return self.point_set.indices
+
+    @cached_property
+    def point_expr(self):
+        return self.point_set.expression
+
+    @cached_property
+    def weight_expr(self):
+        return self.quadrature_rule.weight_expression
+
+    def basis_evaluation(self, finat_element, mt, entity_id):
+        return finat_element.basis_evaluation(mt.local_derivatives,
+                                              self.point_set,
+                                              (self.integration_dim, entity_id),
+                                              coordinate_mapping=CoordinateMapping(mt, self))
+
+
+class GemPointContext(ContextBase):
+    """Context for evaluation at arbitrary reference points."""
+
+    keywords = ContextBase.keywords + (
+        'point_indices',
+        'point_expr',
+        'weight_expr',
+    )
+
+    def basis_evaluation(self, finat_element, mt, entity_id):
+        return finat_element.point_evaluation(mt.local_derivatives,
+                                              self.point_expr,
+                                              (self.integration_dim, entity_id))
+
+
+class Translator(MultiFunction, ModifiedTerminalMixin, ufl2gem.Mixin):
+    """Multifunction for translating UFL -> GEM.  Incorporates ufl2gem.Mixin, and
+    dispatches on terminal type when reaching modified terminals."""
+
+    def __init__(self, context):
+        # MultiFunction.__init__ does not call further __init__
+        # methods, but ufl2gem.Mixin must be initialised.
+        # (ModifiedTerminalMixin requires no initialisation.)
+        MultiFunction.__init__(self)
+        ufl2gem.Mixin.__init__(self)
+
+        # Need context during translation!
+        self.context = context
+
+    # We just use the provided quadrature rule to
+    # perform the integration.
+    # Can't put these in the ufl2gem mixin, since they (unlike
+    # everything else) want access to the translation context.
+    def cell_avg(self, o):
+        if self.context.integral_type != "cell":
+            # Need to create a cell-based quadrature rule and
+            # translate the expression using that (c.f. CellVolume
+            # below).
+            raise NotImplementedError("CellAvg on non-cell integrals not yet implemented")
+        integrand, = o.ufl_operands
+        domain = extract_unique_domain(o)
+        measure = ufl.Measure(self.context.integral_type, domain=domain)
+        integrand, degree, argument_multiindices = entity_avg(integrand / CellVolume(domain), measure, self.context.argument_multiindices)
+
+        config = {name: getattr(self.context, name)
+                  for name in ["ufl_cell", "index_cache", "scalar_type"]}
+        config.update(quadrature_degree=degree, interface=self.context,
+                      argument_multiindices=argument_multiindices)
+        expr, = compile_ufl(integrand, PointSetContext(**config), point_sum=True)
+        return expr
+
+    def facet_avg(self, o):
+        if self.context.integral_type == "cell":
+            raise ValueError("Can't take FacetAvg in cell integral")
+        integrand, = o.ufl_operands
+        domain = extract_unique_domain(o)
+        measure = ufl.Measure(self.context.integral_type, domain=domain)
+        integrand, degree, argument_multiindices = entity_avg(integrand / FacetArea(domain), measure, self.context.argument_multiindices)
+
+        config = {name: getattr(self.context, name)
+                  for name in ["ufl_cell", "index_cache", "scalar_type",
+                               "integration_dim", "entity_ids",
+                               "integral_type"]}
+        config.update(quadrature_degree=degree, interface=self.context,
+                      argument_multiindices=argument_multiindices)
+        expr, = compile_ufl(integrand, PointSetContext(**config), point_sum=True)
+        return expr
+
+    def modified_terminal(self, o):
+        """Overrides the modified terminal handler from
+        :class:`ModifiedTerminalMixin`."""
+        mt = analyse_modified_terminal(o)
+        return translate(mt.terminal, mt, self.context)
+
+
+@singledispatch
+def translate(terminal, mt, ctx):
+    """Translates modified terminals into GEM.
+
+    :arg terminal: terminal, for dispatching
+    :arg mt: analysed modified terminal
+    :arg ctx: translator context
+    :returns: GEM translation of the modified terminal
+    """
+    raise AssertionError("Cannot handle terminal type: %s" % type(terminal))
+
+
+@translate.register(QuadratureWeight)
+def translate_quadratureweight(terminal, mt, ctx):
+    return ctx.weight_expr
+
+
+@translate.register(GeometricQuantity)
+def translate_geometricquantity(terminal, mt, ctx):
+    raise NotImplementedError("Cannot handle geometric quantity type: %s" % type(terminal))
+
+
+@translate.register(CellOrientation)
+def translate_cell_orientation(terminal, mt, ctx):
+    return ctx.cell_orientation(mt.restriction)
+
+
+@translate.register(ReferenceCellVolume)
+def translate_reference_cell_volume(terminal, mt, ctx):
+    return gem.Literal(ctx.fiat_cell.volume())
+
+
+@translate.register(ReferenceFacetVolume)
+def translate_reference_facet_volume(terminal, mt, ctx):
+    assert ctx.integral_type != "cell"
+    # Sum of quadrature weights is entity volume
+    return gem.optimise.aggressive_unroll(gem.index_sum(ctx.weight_expr,
+                                                        ctx.point_indices))
+
+
+@translate.register(CellFacetJacobian)
+def translate_cell_facet_jacobian(terminal, mt, ctx):
+    cell = ctx.fiat_cell
+    facet_dim = ctx.integration_dim
+    assert facet_dim != cell.get_dimension()
+
+    def callback(entity_id):
+        return gem.Literal(make_cell_facet_jacobian(cell, facet_dim, entity_id))
+    return ctx.entity_selector(callback, mt.restriction)
+
+
+def make_cell_facet_jacobian(cell, facet_dim, facet_i):
+    facet_cell = cell.construct_subelement(facet_dim)
+    xs = facet_cell.get_vertices()
+    ys = cell.get_vertices_of_subcomplex(cell.get_topology()[facet_dim][facet_i])
+
+    # Use first 'dim' points to make an affine mapping
+    dim = cell.get_spatial_dimension()
+    A, b = make_affine_mapping(xs[:dim], ys[:dim])
+
+    for x, y in zip(xs[dim:], ys[dim:]):
+        # The rest of the points are checked to make sure the
+        # mapping really *is* affine.
+        assert numpy.allclose(y, A.dot(x) + b)
+
+    return A
+
+
+@translate.register(ReferenceNormal)
+def translate_reference_normal(terminal, mt, ctx):
+    def callback(facet_i):
+        n = ctx.fiat_cell.compute_reference_normal(ctx.integration_dim, facet_i)
+        return gem.Literal(n)
+    return ctx.entity_selector(callback, mt.restriction)
+
+
+@translate.register(ReferenceCellEdgeVectors)
+def translate_reference_cell_edge_vectors(terminal, mt, ctx):
+    from FIAT.reference_element import \
+        TensorProductCell as fiat_TensorProductCell
+    fiat_cell = ctx.fiat_cell
+    if isinstance(fiat_cell, fiat_TensorProductCell):
+        raise NotImplementedError("ReferenceCellEdgeVectors not implemented on TensorProductElements yet")
+
+    nedges = len(fiat_cell.get_topology()[1])
+    vecs = numpy.vstack(map(fiat_cell.compute_edge_tangent, range(nedges)))
+    assert vecs.shape == terminal.ufl_shape
+    return gem.Literal(vecs)
+
+
+@translate.register(CellCoordinate)
+def translate_cell_coordinate(terminal, mt, ctx):
+    if ctx.integration_dim == ctx.fiat_cell.get_dimension():
+        return ctx.point_expr
+
+    # This destroys the structure of the quadrature points, but since
+    # this code path is only used to implement CellCoordinate in facet
+    # integrals, hopefully it does not matter much.
+    ps = ctx.point_set
+    point_shape = tuple(index.extent for index in ps.indices)
+
+    def callback(entity_id):
+        t = ctx.fiat_cell.get_entity_transform(ctx.integration_dim, entity_id)
+        data = numpy.asarray(list(map(t, ps.points)))
+        return gem.Literal(data.reshape(point_shape + data.shape[1:]))
+
+    return gem.partial_indexed(ctx.entity_selector(callback, mt.restriction),
+                               ps.indices)
+
+
+@translate.register(FacetCoordinate)
+def translate_facet_coordinate(terminal, mt, ctx):
+    assert ctx.integration_dim != ctx.fiat_cell.get_dimension()
+    return ctx.point_expr
+
+
+@translate.register(SpatialCoordinate)
+def translate_spatialcoordinate(terminal, mt, ctx):
+    # Replace terminal with a Coefficient
+    terminal = ctx.coordinate(extract_unique_domain(terminal))
+    # Get back to reference space
+    terminal = preprocess_expression(terminal, complex_mode=ctx.complex_mode)
+    # Rebuild modified terminal
+    expr = construct_modified_terminal(mt, terminal)
+    # Translate replaced UFL snippet
+    return ctx.translator(expr)
+
+
+class CellVolumeKernelInterface(ProxyKernelInterface):
+    # Since CellVolume is evaluated as a cell integral, we must ensure
+    # that the right restriction is applied when it is used in an
+    # interior facet integral.  This proxy diverts coefficient
+    # translation to use a specified restriction.
+
+    def __init__(self, wrapee, restriction):
+        ProxyKernelInterface.__init__(self, wrapee)
+        self.restriction = restriction
+
+    def coefficient(self, ufl_coefficient, r):
+        assert r is None
+        return self._wrapee.coefficient(ufl_coefficient, self.restriction)
+
+
+@translate.register(CellVolume)
+def translate_cellvolume(terminal, mt, ctx):
+    integrand, degree = one_times(ufl.dx(domain=extract_unique_domain(terminal)))
+    interface = CellVolumeKernelInterface(ctx, mt.restriction)
+
+    config = {name: getattr(ctx, name)
+              for name in ["ufl_cell", "index_cache", "scalar_type"]}
+    config.update(interface=interface, quadrature_degree=degree)
+    expr, = compile_ufl(integrand, PointSetContext(**config), point_sum=True)
+    return expr
+
+
+@translate.register(FacetArea)
+def translate_facetarea(terminal, mt, ctx):
+    assert ctx.integral_type != 'cell'
+    domain = extract_unique_domain(terminal)
+    integrand, degree = one_times(ufl.Measure(ctx.integral_type, domain=domain))
+
+    config = {name: getattr(ctx, name)
+              for name in ["ufl_cell", "integration_dim", "scalar_type",
+                           "entity_ids", "index_cache"]}
+    config.update(interface=ctx, quadrature_degree=degree)
+    expr, = compile_ufl(integrand, PointSetContext(**config), point_sum=True)
+    return expr
+
+
+@translate.register(CellOrigin)
+def translate_cellorigin(terminal, mt, ctx):
+    domain = extract_unique_domain(terminal)
+    coords = SpatialCoordinate(domain)
+    expression = construct_modified_terminal(mt, coords)
+    point_set = PointSingleton((0.0,) * domain.topological_dimension())
+
+    config = {name: getattr(ctx, name)
+              for name in ["ufl_cell", "index_cache", "scalar_type"]}
+    config.update(interface=ctx, point_set=point_set)
+    context = PointSetContext(**config)
+    return context.translator(expression)
+
+
+@translate.register(CellVertices)
+def translate_cell_vertices(terminal, mt, ctx):
+    coords = SpatialCoordinate(extract_unique_domain(terminal))
+    ufl_expr = construct_modified_terminal(mt, coords)
+    ps = PointSet(numpy.array(ctx.fiat_cell.get_vertices()))
+
+    config = {name: getattr(ctx, name)
+              for name in ["ufl_cell", "index_cache", "scalar_type"]}
+    config.update(interface=ctx, point_set=ps)
+    context = PointSetContext(**config)
+    expr = context.translator(ufl_expr)
+
+    # Wrap up point (vertex) index
+    c = gem.Index()
+    return gem.ComponentTensor(gem.Indexed(expr, (c,)), ps.indices + (c,))
+
+
+@translate.register(CellEdgeVectors)
+def translate_cell_edge_vectors(terminal, mt, ctx):
+    # WARNING: Assumes straight edges!
+    coords = CellVertices(extract_unique_domain(terminal))
+    ufl_expr = construct_modified_terminal(mt, coords)
+    cell_vertices = ctx.translator(ufl_expr)
+
+    e = gem.Index()
+    c = gem.Index()
+    expr = gem.ListTensor([
+        gem.Sum(gem.Indexed(cell_vertices, (u, c)),
+                gem.Product(gem.Literal(-1),
+                            gem.Indexed(cell_vertices, (v, c))))
+        for _, (u, v) in sorted(ctx.fiat_cell.get_topology()[1].items())
+    ])
+    return gem.ComponentTensor(gem.Indexed(expr, (e,)), (e, c))
+
+
+def fiat_to_ufl(fiat_dict, order):
+    # All derivative multiindices must be of the same dimension.
+    dimension, = set(len(alpha) for alpha in fiat_dict.keys())
+
+    # All derivative tables must have the same shape.
+    shape, = set(table.shape for table in fiat_dict.values())
+    sigma = tuple(gem.Index(extent=extent) for extent in shape)
+
+    # Convert from FIAT to UFL format
+    eye = numpy.eye(dimension, dtype=int)
+    tensor = numpy.empty((dimension,) * order, dtype=object)
+    for multiindex in numpy.ndindex(tensor.shape):
+        alpha = tuple(eye[multiindex, :].sum(axis=0))
+        tensor[multiindex] = gem.Indexed(fiat_dict[alpha], sigma)
+    delta = tuple(gem.Index(extent=dimension) for _ in range(order))
+    if order > 0:
+        tensor = gem.Indexed(gem.ListTensor(tensor), delta)
+    else:
+        tensor = tensor[()]
+    return gem.ComponentTensor(tensor, sigma + delta)
+
+
+@translate.register(Argument)
+def translate_argument(terminal, mt, ctx):
+    argument_multiindex = ctx.argument_multiindices[terminal.number()]
+    sigma = tuple(gem.Index(extent=d) for d in mt.expr.ufl_shape)
+    element = ctx.create_element(terminal.ufl_element(), restriction=mt.restriction)
+
+    def callback(entity_id):
+        finat_dict = ctx.basis_evaluation(element, mt, entity_id)
+        # Filter out irrelevant derivatives
+        filtered_dict = {alpha: table
+                         for alpha, table in finat_dict.items()
+                         if sum(alpha) == mt.local_derivatives}
+
+        # Change from FIAT to UFL arrangement
+        square = fiat_to_ufl(filtered_dict, mt.local_derivatives)
+
+        # A numerical hack that FFC used to apply on FIAT tables still
+        # lives on after ditching FFC and switching to FInAT.
+        return ffc_rounding(square, ctx.epsilon)
+    table = ctx.entity_selector(callback, mt.restriction)
+    if ctx.use_canonical_quadrature_point_ordering:
+        quad_multiindex = ctx.quadrature_rule.point_set.indices
+        quad_multiindex_permuted = _make_quad_multiindex_permuted(mt, ctx)
+        mapper = gem.node.MemoizerArg(gem.optimise.filtered_replace_indices)
+        table = mapper(table, tuple(zip(quad_multiindex, quad_multiindex_permuted)))
+    return gem.ComponentTensor(gem.Indexed(table, argument_multiindex + sigma), sigma)
+
+
+@translate.register(TSFCConstantMixin)
+def translate_constant_value(terminal, mt, ctx):
+    return ctx.constant(terminal)
+
+
+@translate.register(Coefficient)
+def translate_coefficient(terminal, mt, ctx):
+    vec = ctx.coefficient(terminal, mt.restriction)
+
+    if terminal.ufl_element().family() == 'Real':
+        assert mt.local_derivatives == 0
+        return vec
+
+    element = ctx.create_element(terminal.ufl_element(), restriction=mt.restriction)
+
+    # Collect FInAT tabulation for all entities
+    per_derivative = collections.defaultdict(list)
+    for entity_id in ctx.entity_ids:
+        finat_dict = ctx.basis_evaluation(element, mt, entity_id)
+        for alpha, table in finat_dict.items():
+            # Filter out irrelevant derivatives
+            if sum(alpha) == mt.local_derivatives:
+                # A numerical hack that FFC used to apply on FIAT
+                # tables still lives on after ditching FFC and
+                # switching to FInAT.
+                table = ffc_rounding(table, ctx.epsilon)
+                per_derivative[alpha].append(table)
+
+    # Merge entity tabulations for each derivative
+    if len(ctx.entity_ids) == 1:
+        def take_singleton(xs):
+            x, = xs  # asserts singleton
+            return x
+        per_derivative = {alpha: take_singleton(tables)
+                          for alpha, tables in per_derivative.items()}
+    else:
+        f = ctx.entity_number(mt.restriction)
+        per_derivative = {alpha: gem.select_expression(tables, f)
+                          for alpha, tables in per_derivative.items()}
+
+    # Coefficient evaluation
+    ctx.index_cache.setdefault(terminal.ufl_element(), element.get_indices())
+    beta = ctx.index_cache[terminal.ufl_element()]
+    zeta = element.get_value_indices()
+    vec_beta, = gem.optimise.remove_componenttensors([gem.Indexed(vec, beta)])
+    value_dict = {}
+    for alpha, table in per_derivative.items():
+        table_qi = gem.Indexed(table, beta + zeta)
+        summands = []
+        for var, expr in unconcatenate([(vec_beta, table_qi)], ctx.index_cache):
+            indices = tuple(i for i in var.index_ordering() if i not in ctx.unsummed_coefficient_indices)
+            value = gem.IndexSum(gem.Product(expr, var), indices)
+            summands.append(gem.optimise.contraction(value))
+        optimised_value = gem.optimise.make_sum(summands)
+        value_dict[alpha] = gem.ComponentTensor(optimised_value, zeta)
+
+    # Change from FIAT to UFL arrangement
+    result = fiat_to_ufl(value_dict, mt.local_derivatives)
+    assert result.shape == mt.expr.ufl_shape
+    assert set(result.free_indices) - ctx.unsummed_coefficient_indices <= set(ctx.point_indices)
+
+    # Detect Jacobian of affine cells
+    if not result.free_indices and all(numpy.count_nonzero(node.array) <= 2
+                                       for node in traversal((result,))
+                                       if isinstance(node, gem.Literal)):
+        result = gem.optimise.aggressive_unroll(result)
+
+    if ctx.use_canonical_quadrature_point_ordering:
+        quad_multiindex = ctx.quadrature_rule.point_set.indices
+        quad_multiindex_permuted = _make_quad_multiindex_permuted(mt, ctx)
+        mapper = gem.node.MemoizerArg(gem.optimise.filtered_replace_indices)
+        result = mapper(result, tuple(zip(quad_multiindex, quad_multiindex_permuted)))
+    return result
+
+
+def _make_quad_multiindex_permuted(mt, ctx):
+    quad_rule = ctx.quadrature_rule
+    # Note that each quad index here represents quad points on a physical
+    # cell axis, but the table is indexed by indices representing the points
+    # on each reference cell axis, so we need to apply permutation based on the orientation.
+    cell = quad_rule.ref_el
+    quad_multiindex = quad_rule.point_set.indices
+    if isinstance(cell, TensorProductCell):
+        for comp in set(cell.cells):
+            extents = set(q.extent for c, q in zip(cell.cells, quad_multiindex) if c == comp)
+            if len(extents) != 1:
+                raise ValueError("Must have the same number of quadrature points in each symmetric axis")
+    quad_multiindex_permuted = []
+    o = ctx.entity_orientation(mt.restriction)
+    if not isinstance(o, FIATOrientation):
+        raise ValueError(f"Expecting an instance of FIATOrientation : got {o}")
+    eo = cell.extract_extrinsic_orientation(o)
+    eo_perm_map = gem.Literal(quad_rule.extrinsic_orientation_permutation_map, dtype=gem.uint_type)
+    for ref_axis in range(len(quad_multiindex)):
+        io = cell.extract_intrinsic_orientation(o, ref_axis)
+        io_perm_map = gem.Literal(quad_rule.intrinsic_orientation_permutation_map_tuple[ref_axis], dtype=gem.uint_type)
+        # Effectively swap axes if needed.
+        ref_index = tuple((phys_index, gem.Indexed(eo_perm_map, (eo, ref_axis, phys_axis))) for phys_axis, phys_index in enumerate(quad_multiindex))
+        quad_index_permuted = gem.VariableIndex(gem.FlexiblyIndexed(io_perm_map, ((0, ((io, 1), )), (0, ref_index))))
+        quad_multiindex_permuted.append(quad_index_permuted)
+    return tuple(quad_multiindex_permuted)
+
+
+def compile_ufl(expression, context, interior_facet=False, point_sum=False):
+    """Translate a UFL expression to GEM.
+
+    :arg expression: The UFL expression to compile.
+    :arg context: translation context - either a :class:`GemPointContext`
+        or :class:`PointSetContext`
+    :arg interior_facet: If ``true``, treat expression as an interior
+        facet integral (default ``False``)
+    :arg point_sum: If ``true``, return a `gem.IndexSum` of the final
+        gem expression along the ``context.point_indices`` (if present).
+   """
+
+    # Abs-simplification
+    expression = simplify_abs(expression, context.complex_mode)
+    if interior_facet:
+        expressions = []
+        for rs in itertools.product(("+", "-"), repeat=len(context.argument_multiindices)):
+            expressions.append(map_expr_dag(PickRestriction(*rs), expression))
+    else:
+        expressions = [expression]
+
+    # Translate UFL to GEM, lowering finite element specific nodes
+    result = map_expr_dags(context.translator, expressions)
+    if point_sum:
+        result = [gem.index_sum(expr, context.point_indices) for expr in result]
+    return constant_fold_zero(result)
diff --git a/tsfc/finatinterface.py b/tsfc/finatinterface.py
new file mode 100644
index 0000000000..b7e3d0ad72
--- /dev/null
+++ b/tsfc/finatinterface.py
@@ -0,0 +1,365 @@
+# This file was modified from FFC
+# (http://bitbucket.org/fenics-project/ffc), copyright notice
+# reproduced below.
+#
+# Copyright (C) 2009-2013 Kristian B. Oelgaard and Anders Logg
+#
+# This file is part of FFC.
+#
+# FFC is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# FFC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with FFC. If not, see <http://www.gnu.org/licenses/>.
+
+import weakref
+from functools import singledispatch
+
+import FIAT
+import finat
+import finat.ufl
+import ufl
+
+__all__ = ("as_fiat_cell", "create_base_element",
+           "create_element", "supported_elements")
+
+
+supported_elements = {
+    # These all map directly to FInAT elements
+    "Bernstein": finat.Bernstein,
+    "Bernardi-Raugel": finat.BernardiRaugel,
+    "Bernardi-Raugel Bubble": finat.BernardiRaugelBubble,
+    "Brezzi-Douglas-Marini": finat.BrezziDouglasMarini,
+    "Brezzi-Douglas-Fortin-Marini": finat.BrezziDouglasFortinMarini,
+    "Bubble": finat.Bubble,
+    "FacetBubble": finat.FacetBubble,
+    "Crouzeix-Raviart": finat.CrouzeixRaviart,
+    "Discontinuous Lagrange": finat.DiscontinuousLagrange,
+    "Discontinuous Raviart-Thomas": lambda c, d: finat.DiscontinuousElement(finat.RaviartThomas(c, d)),
+    "Discontinuous Taylor": finat.DiscontinuousTaylor,
+    "Gauss-Legendre": finat.GaussLegendre,
+    "Gauss-Lobatto-Legendre": finat.GaussLobattoLegendre,
+    "HDiv Trace": finat.HDivTrace,
+    "Hellan-Herrmann-Johnson": finat.HellanHerrmannJohnson,
+    "Johnson-Mercier": finat.JohnsonMercier,
+    "Nonconforming Arnold-Winther": finat.ArnoldWintherNC,
+    "Conforming Arnold-Winther": finat.ArnoldWinther,
+    "Hu-Zhang": finat.HuZhang,
+    "Hermite": finat.Hermite,
+    "Kong-Mulder-Veldhuizen": finat.KongMulderVeldhuizen,
+    "Argyris": finat.Argyris,
+    "Hsieh-Clough-Tocher": finat.HsiehCloughTocher,
+    "QuadraticPowellSabin6": finat.QuadraticPowellSabin6,
+    "QuadraticPowellSabin12": finat.QuadraticPowellSabin12,
+    "Reduced-Hsieh-Clough-Tocher": finat.ReducedHsiehCloughTocher,
+    "Mardal-Tai-Winther": finat.MardalTaiWinther,
+    "Alfeld-Sorokina": finat.AlfeldSorokina,
+    "Arnold-Qin": finat.ArnoldQin,
+    "Reduced-Arnold-Qin": finat.ReducedArnoldQin,
+    "Christiansen-Hu": finat.ChristiansenHu,
+    "Guzman-Neilan 1st kind H1": finat.GuzmanNeilanFirstKindH1,
+    "Guzman-Neilan 2nd kind H1": finat.GuzmanNeilanSecondKindH1,
+    "Guzman-Neilan Bubble": finat.GuzmanNeilanBubble,
+    "Guzman-Neilan H1(div)": finat.GuzmanNeilanH1div,
+    "Morley": finat.Morley,
+    "Bell": finat.Bell,
+    "Lagrange": finat.Lagrange,
+    "Nedelec 1st kind H(curl)": finat.Nedelec,
+    "Nedelec 2nd kind H(curl)": finat.NedelecSecondKind,
+    "Raviart-Thomas": finat.RaviartThomas,
+    "Regge": finat.Regge,
+    "Gopalakrishnan-Lederer-Schoberl 1st kind": finat.GopalakrishnanLedererSchoberlFirstKind,
+    "Gopalakrishnan-Lederer-Schoberl 2nd kind": finat.GopalakrishnanLedererSchoberlSecondKind,
+    "BDMCE": finat.BrezziDouglasMariniCubeEdge,
+    "BDMCF": finat.BrezziDouglasMariniCubeFace,
+    # These require special treatment below
+    "DQ": None,
+    "Q": None,
+    "RTCE": None,
+    "RTCF": None,
+    "NCE": None,
+    "NCF": None,
+    "Real": finat.Real,
+    "DPC": finat.DPC,
+    "S": finat.Serendipity,
+    "SminusF": finat.TrimmedSerendipityFace,
+    "SminusDiv": finat.TrimmedSerendipityDiv,
+    "SminusE": finat.TrimmedSerendipityEdge,
+    "SminusCurl": finat.TrimmedSerendipityCurl,
+    "DPC L2": finat.DPC,
+    "Discontinuous Lagrange L2": finat.DiscontinuousLagrange,
+    "Gauss-Legendre L2": finat.GaussLegendre,
+    "DQ L2": None,
+    "Direct Serendipity": finat.DirectSerendipity,
+}
+"""A :class:`.dict` mapping UFL element family names to their
+FInAT-equivalent constructors.  If the value is ``None``, the UFL
+element is supported, but must be handled specially because it doesn't
+have a direct FInAT equivalent."""
+
+
+def as_fiat_cell(cell):
+    """Convert a ufl cell to a FIAT cell.
+
+    :arg cell: the :class:`ufl.Cell` to convert."""
+    if not isinstance(cell, ufl.AbstractCell):
+        raise ValueError("Expecting a UFL Cell")
+    return FIAT.ufc_cell(cell)
+
+
+@singledispatch
+def convert(element, **kwargs):
+    """Handler for converting UFL elements to FInAT elements.
+
+    :arg element: The UFL element to convert.
+
+    Do not use this function directly, instead call
+    :func:`create_element`."""
+    if element.family() in supported_elements:
+        raise ValueError("Element %s supported, but no handler provided" % element)
+    raise ValueError("Unsupported element type %s" % type(element))
+
+
+cg_interval_variants = {
+    "fdm": finat.FDMLagrange,
+    "fdm_ipdg": finat.FDMLagrange,
+    "fdm_quadrature": finat.FDMQuadrature,
+    "fdm_broken": finat.FDMBrokenH1,
+    "fdm_hermite": finat.FDMHermite,
+}
+
+
+dg_interval_variants = {
+    "fdm": finat.FDMDiscontinuousLagrange,
+    "fdm_quadrature": finat.FDMDiscontinuousLagrange,
+    "fdm_ipdg": lambda *args: finat.DiscontinuousElement(finat.FDMLagrange(*args)),
+    "fdm_broken": finat.FDMBrokenL2,
+}
+
+
+# Base finite elements first
+@convert.register(finat.ufl.FiniteElement)
+def convert_finiteelement(element, **kwargs):
+    cell = as_fiat_cell(element.cell)
+    if element.family() == "Quadrature":
+        degree = element.degree()
+        scheme = element.quadrature_scheme()
+        if degree is None or scheme is None:
+            raise ValueError("Quadrature scheme and degree must be specified!")
+
+        return finat.make_quadrature_element(cell, degree, scheme), set()
+    lmbda = supported_elements[element.family()]
+    if element.family() == "Real" and element.cell.cellname() in {"quadrilateral", "hexahedron"}:
+        lmbda = None
+        element = finat.ufl.FiniteElement("DQ", element.cell, 0)
+    if lmbda is None:
+        if element.cell.cellname() == "quadrilateral":
+            # Handle quadrilateral short names like RTCF and RTCE.
+            element = element.reconstruct(cell=quadrilateral_tpc)
+        elif element.cell.cellname() == "hexahedron":
+            # Handle hexahedron short names like NCF and NCE.
+            element = element.reconstruct(cell=hexahedron_tpc)
+        else:
+            raise ValueError("%s is supported, but handled incorrectly" %
+                             element.family())
+        finat_elem, deps = _create_element(element, **kwargs)
+        return finat.FlattenedDimensions(finat_elem), deps
+
+    finat_kwargs = {}
+    kind = element.variant()
+    if kind is None:
+        kind = 'spectral'  # default variant
+
+    if element.family() == "Lagrange":
+        if kind == 'spectral':
+            lmbda = finat.GaussLobattoLegendre
+        elif element.cell.cellname() == "interval" and kind in cg_interval_variants:
+            lmbda = cg_interval_variants[kind]
+        elif any(map(kind.startswith, ['integral', 'demkowicz', 'fdm'])):
+            lmbda = finat.IntegratedLegendre
+            finat_kwargs["variant"] = kind
+        elif kind in ['mgd', 'feec', 'qb', 'mse']:
+            degree = element.degree()
+            shift_axes = kwargs["shift_axes"]
+            restriction = kwargs["restriction"]
+            deps = {"shift_axes", "restriction"}
+            return finat.RuntimeTabulated(cell, degree, variant=kind, shift_axes=shift_axes, restriction=restriction), deps
+        else:
+            # Let FIAT handle the general case
+            lmbda = finat.Lagrange
+            finat_kwargs["variant"] = kind
+
+    elif element.family() in ["Discontinuous Lagrange", "Discontinuous Lagrange L2"]:
+        if kind == 'spectral':
+            lmbda = finat.GaussLegendre
+        elif element.cell.cellname() == "interval" and kind in dg_interval_variants:
+            lmbda = dg_interval_variants[kind]
+        elif any(map(kind.startswith, ['integral', 'demkowicz', 'fdm'])):
+            lmbda = finat.Legendre
+            finat_kwargs["variant"] = kind
+        elif kind in ['mgd', 'feec', 'qb', 'mse']:
+            degree = element.degree()
+            shift_axes = kwargs["shift_axes"]
+            restriction = kwargs["restriction"]
+            deps = {"shift_axes", "restriction"}
+            return finat.RuntimeTabulated(cell, degree, variant=kind, shift_axes=shift_axes, restriction=restriction, continuous=False), deps
+        else:
+            # Let FIAT handle the general case
+            lmbda = finat.DiscontinuousLagrange
+            finat_kwargs["variant"] = kind
+
+    elif element.variant() is not None:
+        finat_kwargs["variant"] = element.variant()
+
+    return lmbda(cell, element.degree(), **finat_kwargs), set()
+
+
+# Element modifiers and compound element types
+@convert.register(finat.ufl.BrokenElement)
+def convert_brokenelement(element, **kwargs):
+    finat_elem, deps = _create_element(element._element, **kwargs)
+    return finat.DiscontinuousElement(finat_elem), deps
+
+
+@convert.register(finat.ufl.EnrichedElement)
+def convert_enrichedelement(element, **kwargs):
+    elements, deps = zip(*[_create_element(elem, **kwargs)
+                           for elem in element._elements])
+    return finat.EnrichedElement(elements), set.union(*deps)
+
+
+@convert.register(finat.ufl.NodalEnrichedElement)
+def convert_nodalenrichedelement(element, **kwargs):
+    elements, deps = zip(*[_create_element(elem, **kwargs)
+                           for elem in element._elements])
+    return finat.NodalEnrichedElement(elements), set.union(*deps)
+
+
+@convert.register(finat.ufl.MixedElement)
+def convert_mixedelement(element, **kwargs):
+    elements, deps = zip(*[_create_element(elem, **kwargs)
+                           for elem in element.sub_elements])
+    return finat.MixedElement(elements), set.union(*deps)
+
+
+@convert.register(finat.ufl.VectorElement)
+@convert.register(finat.ufl.TensorElement)
+def convert_tensorelement(element, **kwargs):
+    inner_elem, deps = _create_element(element.sub_elements[0], **kwargs)
+    shape = element.reference_value_shape
+    shape = shape[:len(shape) - len(inner_elem.value_shape)]
+    shape_innermost = kwargs["shape_innermost"]
+    return (finat.TensorFiniteElement(inner_elem, shape, not shape_innermost),
+            deps | {"shape_innermost"})
+
+
+@convert.register(finat.ufl.TensorProductElement)
+def convert_tensorproductelement(element, **kwargs):
+    cell = element.cell
+    if type(cell) is not ufl.TensorProductCell:
+        raise ValueError("TensorProductElement not on TensorProductCell?")
+    shift_axes = kwargs["shift_axes"]
+    dim_offset = 0
+    elements = []
+    deps = set()
+    for elem in element.sub_elements:
+        kwargs["shift_axes"] = shift_axes + dim_offset
+        dim_offset += elem.cell.topological_dimension()
+        finat_elem, ds = _create_element(elem, **kwargs)
+        elements.append(finat_elem)
+        deps.update(ds)
+    return finat.TensorProductElement(elements), deps
+
+
+@convert.register(finat.ufl.HDivElement)
+def convert_hdivelement(element, **kwargs):
+    finat_elem, deps = _create_element(element._element, **kwargs)
+    return finat.HDivElement(finat_elem), deps
+
+
+@convert.register(finat.ufl.HCurlElement)
+def convert_hcurlelement(element, **kwargs):
+    finat_elem, deps = _create_element(element._element, **kwargs)
+    return finat.HCurlElement(finat_elem), deps
+
+
+@convert.register(finat.ufl.WithMapping)
+def convert_withmapping(element, **kwargs):
+    return _create_element(element.wrapee, **kwargs)
+
+
+@convert.register(finat.ufl.RestrictedElement)
+def convert_restrictedelement(element, **kwargs):
+    finat_elem, deps = _create_element(element._element, **kwargs)
+    return finat.RestrictedElement(finat_elem, element.restriction_domain()), deps
+
+
+hexahedron_tpc = ufl.TensorProductCell(ufl.interval, ufl.interval, ufl.interval)
+quadrilateral_tpc = ufl.TensorProductCell(ufl.interval, ufl.interval)
+_cache = weakref.WeakKeyDictionary()
+
+
+def create_element(ufl_element, shape_innermost=True, shift_axes=0, restriction=None):
+    """Create a FInAT element (suitable for tabulating with) given a UFL element.
+
+    :arg ufl_element: The UFL element to create a FInAT element from.
+    :arg shape_innermost: Vector/tensor indices come after basis function indices
+    :arg restriction: cell restriction in interior facet integrals
+                      (only for runtime tabulated elements)
+    """
+    finat_element, deps = _create_element(ufl_element,
+                                          shape_innermost=shape_innermost,
+                                          shift_axes=shift_axes,
+                                          restriction=restriction)
+    return finat_element
+
+
+def _create_element(ufl_element, **kwargs):
+    """A caching wrapper around :py:func:`convert`.
+
+    Takes a UFL element and an unspecified set of parameter options,
+    and returns the converted element with the set of keyword names
+    that were relevant for conversion.
+    """
+    # Look up conversion in cache
+    try:
+        cache = _cache[ufl_element]
+    except KeyError:
+        _cache[ufl_element] = {}
+        cache = _cache[ufl_element]
+
+    for key, finat_element in cache.items():
+        # Cache hit if all relevant parameter values match.
+        if all(kwargs[param] == value for param, value in key):
+            return finat_element, set(param for param, value in key)
+
+    # Convert if cache miss
+    if ufl_element.cell is None:
+        raise ValueError("Don't know how to build element when cell is not given")
+
+    finat_element, deps = convert(ufl_element, **kwargs)
+
+    # Store conversion in cache
+    key = frozenset((param, kwargs[param]) for param in deps)
+    cache[key] = finat_element
+
+    # Forward result
+    return finat_element, deps
+
+
+def create_base_element(ufl_element, **kwargs):
+    """Create a "scalar" base FInAT element given a UFL element.
+    Takes a UFL element and an unspecified set of parameter options,
+    and returns the converted element.
+    """
+    finat_element = create_element(ufl_element, **kwargs)
+    if isinstance(finat_element, finat.TensorFiniteElement):
+        finat_element = finat_element.base_element
+    return finat_element
diff --git a/tsfc/kernel_args.py b/tsfc/kernel_args.py
new file mode 100644
index 0000000000..a397f0f937
--- /dev/null
+++ b/tsfc/kernel_args.py
@@ -0,0 +1,62 @@
+import abc
+
+
+class KernelArg(abc.ABC):
+    """Abstract base class wrapping a loopy argument.
+
+    Defining this type system allows Firedrake (or other libraries) to
+    prepare arguments for the kernel without needing to worry about their
+    ordering. Instead this can be offloaded to tools such as
+    :func:`functools.singledispatch`.
+    """
+
+    def __init__(self, arg):
+        self.loopy_arg = arg
+
+    @property
+    def dtype(self):
+        return self.loopy_arg.dtype
+
+
+class OutputKernelArg(KernelArg):
+    ...
+
+
+class CoordinatesKernelArg(KernelArg):
+    ...
+
+
+class CoefficientKernelArg(KernelArg):
+    ...
+
+
+class ConstantKernelArg(KernelArg):
+    ...
+
+
+class CellOrientationsKernelArg(KernelArg):
+    ...
+
+
+class CellSizesKernelArg(KernelArg):
+    ...
+
+
+class TabulationKernelArg(KernelArg):
+    ...
+
+
+class ExteriorFacetKernelArg(KernelArg):
+    ...
+
+
+class InteriorFacetKernelArg(KernelArg):
+    ...
+
+
+class ExteriorFacetOrientationKernelArg(KernelArg):
+    ...
+
+
+class InteriorFacetOrientationKernelArg(KernelArg):
+    ...
diff --git a/tsfc/kernel_interface/__init__.py b/tsfc/kernel_interface/__init__.py
new file mode 100644
index 0000000000..5114263848
--- /dev/null
+++ b/tsfc/kernel_interface/__init__.py
@@ -0,0 +1,51 @@
+from abc import ABCMeta, abstractmethod, abstractproperty
+
+from gem.utils import make_proxy_class
+
+
+class KernelInterface(metaclass=ABCMeta):
+    """Abstract interface for accessing the GEM expressions corresponding
+    to kernel arguments."""
+
+    @abstractmethod
+    def coordinate(self, ufl_domain):
+        """A function that maps :class:`ufl.Domain`s to coordinate
+        :class:`ufl.Coefficient`s."""
+
+    @abstractmethod
+    def coefficient(self, ufl_coefficient, restriction):
+        """A function that maps :class:`ufl.Coefficient`s to GEM
+        expressions."""
+
+    @abstractmethod
+    def constant(self, const):
+        """Return the GEM expression corresponding to the constant."""
+
+    @abstractmethod
+    def cell_orientation(self, restriction):
+        """Cell orientation as a GEM expression."""
+
+    @abstractmethod
+    def cell_size(self, restriction):
+        """Mesh cell size as a GEM expression.  Shape (nvertex, ) in FIAT vertex ordering."""
+
+    @abstractmethod
+    def entity_number(self, restriction):
+        """Facet or vertex number as a GEM index."""
+
+    @abstractmethod
+    def entity_orientation(self, restriction):
+        """Entity orientation as a GEM index."""
+
+    @abstractmethod
+    def create_element(self, element, **kwargs):
+        """Create a FInAT element (suitable for tabulating with) given
+        a UFL element."""
+
+    @abstractproperty
+    def unsummed_coefficient_indices(self):
+        """A set of indices that coefficient evaluation should not sum over.
+        Used for macro-cell integration."""
+
+
+ProxyKernelInterface = make_proxy_class('ProxyKernelInterface', KernelInterface)
diff --git a/tsfc/kernel_interface/common.py b/tsfc/kernel_interface/common.py
new file mode 100644
index 0000000000..df7e879f09
--- /dev/null
+++ b/tsfc/kernel_interface/common.py
@@ -0,0 +1,545 @@
+import collections
+import operator
+import string
+from functools import reduce
+from itertools import chain, product
+
+import gem
+import gem.impero_utils as impero_utils
+import numpy
+from FIAT.reference_element import TensorProductCell
+from finat.cell_tools import max_complex
+from finat.quadrature import AbstractQuadratureRule, make_quadrature
+from gem.node import traversal
+from gem.optimise import constant_fold_zero
+from gem.optimise import remove_componenttensors as prune
+from gem.utils import cached_property
+from numpy import asarray
+from tsfc import fem, ufl_utils
+from tsfc.finatinterface import as_fiat_cell, create_element
+from tsfc.kernel_interface import KernelInterface
+from tsfc.logging import logger
+from ufl.utils.sequences import max_degree
+
+
+class KernelBuilderBase(KernelInterface):
+    """Helper class for building local assembly kernels."""
+
+    def __init__(self, scalar_type, interior_facet=False):
+        """Initialise a kernel builder.
+
+        :arg interior_facet: kernel accesses two cells
+        """
+        assert isinstance(interior_facet, bool)
+        self.scalar_type = scalar_type
+        self.interior_facet = interior_facet
+
+        self.prepare = []
+        self.finalise = []
+
+        # Coordinates
+        self.domain_coordinate = {}
+
+        # Coefficients
+        self.coefficient_map = collections.OrderedDict()
+
+        # Constants
+        self.constant_map = collections.OrderedDict()
+
+    @cached_property
+    def unsummed_coefficient_indices(self):
+        return frozenset()
+
+    def coordinate(self, domain):
+        return self.domain_coordinate[domain]
+
+    def coefficient(self, ufl_coefficient, restriction):
+        """A function that maps :class:`ufl.Coefficient`s to GEM
+        expressions."""
+        kernel_arg = self.coefficient_map[ufl_coefficient]
+        if ufl_coefficient.ufl_element().family() == 'Real':
+            return kernel_arg
+        elif not self.interior_facet:
+            return kernel_arg
+        else:
+            return kernel_arg[{'+': 0, '-': 1}[restriction]]
+
+    def constant(self, const):
+        return self.constant_map[const]
+
+    def cell_orientation(self, restriction):
+        """Cell orientation as a GEM expression."""
+        f = {None: 0, '+': 0, '-': 1}[restriction]
+        # Assume self._cell_orientations tuple is set up at this point.
+        co_int = self._cell_orientations[f]
+        return gem.Conditional(gem.Comparison("==", co_int, gem.Literal(1)),
+                               gem.Literal(-1),
+                               gem.Conditional(gem.Comparison("==", co_int, gem.Zero()),
+                                               gem.Literal(1),
+                                               gem.Literal(numpy.nan)))
+
+    def cell_size(self, restriction):
+        if not hasattr(self, "_cell_sizes"):
+            raise RuntimeError("Haven't called set_cell_sizes")
+        if self.interior_facet:
+            return self._cell_sizes[{'+': 0, '-': 1}[restriction]]
+        else:
+            return self._cell_sizes
+
+    def entity_number(self, restriction):
+        """Facet or vertex number as a GEM index."""
+        # Assume self._entity_number dict is set up at this point.
+        return self._entity_number[restriction]
+
+    def entity_orientation(self, restriction):
+        """Facet orientation as a GEM index."""
+        # Assume self._entity_orientation dict is set up at this point.
+        return self._entity_orientation[restriction]
+
+    def apply_glue(self, prepare=None, finalise=None):
+        """Append glue code for operations that are not handled in the
+        GEM abstraction.
+
+        Current uses: mixed interior facet mess
+
+        :arg prepare: code snippets to be prepended to the kernel
+        :arg finalise: code snippets to be appended to the kernel
+        """
+        if prepare is not None:
+            self.prepare.extend(prepare)
+        if finalise is not None:
+            self.finalise.extend(finalise)
+
+    def register_requirements(self, ir):
+        """Inspect what is referenced by the IR that needs to be
+        provided by the kernel interface.
+
+        :arg ir: multi-root GEM expression DAG
+        """
+        # Nothing is required by default
+        pass
+
+
+class KernelBuilderMixin(object):
+    """Mixin for KernelBuilder classes."""
+
+    def compile_integrand(self, integrand, params, ctx):
+        """Compile UFL integrand.
+
+        :arg integrand: UFL integrand.
+        :arg params: a dict containing "quadrature_rule".
+        :arg ctx: context created with :meth:`create_context` method.
+
+        See :meth:`create_context` for typical calling sequence.
+        """
+        # Split Coefficients
+        if self.coefficient_split:
+            integrand = ufl_utils.split_coefficients(integrand, self.coefficient_split)
+        # Compile: ufl -> gem
+        info = self.integral_data_info
+        functions = list(info.arguments) + [self.coordinate(info.domain)] + list(info.coefficients)
+        set_quad_rule(params, info.domain.ufl_cell(), info.integral_type, functions)
+        quad_rule = params["quadrature_rule"]
+        config = self.fem_config()
+        config['argument_multiindices'] = self.argument_multiindices
+        config['quadrature_rule'] = quad_rule
+        config['index_cache'] = ctx['index_cache']
+        expressions = fem.compile_ufl(integrand,
+                                      fem.PointSetContext(**config),
+                                      interior_facet=self.interior_facet)
+        ctx['quadrature_indices'].extend(quad_rule.point_set.indices)
+        return expressions
+
+    def construct_integrals(self, integrand_expressions, params):
+        """Construct integrals from integrand expressions.
+
+        :arg integrand_expressions: gem expressions for integrands.
+        :arg params: a dict containing "mode" and "quadrature_rule".
+
+        integrand_expressions must be indexed with :attr:`argument_multiindices`;
+        these gem expressions are obtained by calling :meth:`compile_integrand`
+        method or by modifying the gem expressions returned by
+        :meth:`compile_integrand`.
+
+        See :meth:`create_context` for typical calling sequence.
+        """
+        mode = pick_mode(params["mode"])
+        return mode.Integrals(integrand_expressions,
+                              params["quadrature_rule"].point_set.indices,
+                              self.argument_multiindices,
+                              params)
+
+    def stash_integrals(self, reps, params, ctx):
+        """Stash integral representations in ctx.
+
+        :arg reps: integral representations.
+        :arg params: a dict containing "mode".
+        :arg ctx: context in which reps are stored.
+
+        See :meth:`create_context` for typical calling sequence.
+        """
+        mode = pick_mode(params["mode"])
+        mode_irs = ctx['mode_irs']
+        mode_irs.setdefault(mode, collections.OrderedDict())
+        for var, rep in zip(self.return_variables, reps):
+            mode_irs[mode].setdefault(var, []).append(rep)
+
+    def compile_gem(self, ctx):
+        """Compile gem representation of integrals to impero_c.
+
+        :arg ctx: the context containing the gem representation of integrals.
+        :returns: a tuple of impero_c, oriented, needs_cell_sizes, tabulations
+            required to finally construct a kernel in :meth:`construct_kernel`.
+
+        See :meth:`create_context` for typical calling sequence.
+        """
+        # Finalise mode representations into a set of assignments
+        mode_irs = ctx['mode_irs']
+
+        assignments = []
+        for mode, var_reps in mode_irs.items():
+            assignments.extend(mode.flatten(var_reps.items(), ctx['index_cache']))
+
+        if assignments:
+            return_variables, expressions = zip(*assignments)
+        else:
+            return_variables = []
+            expressions = []
+        expressions = constant_fold_zero(expressions)
+
+        # Need optimised roots
+        options = dict(reduce(operator.and_,
+                              [mode.finalise_options.items()
+                               for mode in mode_irs.keys()]))
+        expressions = impero_utils.preprocess_gem(expressions, **options)
+
+        # Let the kernel interface inspect the optimised IR to register
+        # what kind of external data is required (e.g., cell orientations,
+        # cell sizes, etc.).
+        oriented, needs_cell_sizes, tabulations = self.register_requirements(expressions)
+
+        # Extract Variables that are actually used
+        active_variables = gem.extract_type(expressions, gem.Variable)
+        # Construct ImperoC
+        assignments = list(zip(return_variables, expressions))
+        index_ordering = get_index_ordering(ctx['quadrature_indices'], return_variables)
+        try:
+            impero_c = impero_utils.compile_gem(assignments, index_ordering, remove_zeros=True)
+        except impero_utils.NoopError:
+            impero_c = None
+        return impero_c, oriented, needs_cell_sizes, tabulations, active_variables
+
+    def fem_config(self):
+        """Return a dictionary used with fem.compile_ufl.
+
+        One needs to update this dictionary with "argument_multiindices",
+        "quadrature_rule", and "index_cache" before using this with
+        fem.compile_ufl.
+        """
+        info = self.integral_data_info
+        integral_type = info.integral_type
+        cell = info.domain.ufl_cell()
+        fiat_cell = as_fiat_cell(cell)
+        integration_dim, entity_ids = lower_integral_type(fiat_cell, integral_type)
+        return dict(interface=self,
+                    ufl_cell=cell,
+                    integral_type=integral_type,
+                    integration_dim=integration_dim,
+                    entity_ids=entity_ids,
+                    scalar_type=self.fem_scalar_type)
+
+    def create_context(self):
+        """Create builder context.
+
+        *index_cache*
+
+        Map from UFL FiniteElement objects to multiindices.
+        This is so we reuse Index instances when evaluating the same
+        coefficient multiple times with the same table.
+
+        We also use the same dict for the unconcatenate index cache,
+        which maps index objects to tuples of multiindices. These two
+        caches shall never conflict as their keys have different types
+        (UFL finite elements vs. GEM index objects).
+
+        *quadrature_indices*
+
+        List of quadrature indices used.
+
+        *mode_irs*
+
+        Dict for mode representations.
+
+        For each set of integrals to make a kernel for (i,e.,
+        `integral_data.integrals`), one must first create a ctx object by
+        calling :meth:`create_context` method.
+        This ctx object collects objects associated with the integrals that
+        are eventually used to construct the kernel.
+        The following is a typical calling sequence:
+
+        .. code-block:: python3
+
+            builder = KernelBuilder(...)
+            params = {"mode": "spectral"}
+            ctx = builder.create_context()
+            for integral in integral_data.integrals:
+                integrand = integral.integrand()
+                integrand_exprs = builder.compile_integrand(integrand, params, ctx)
+                integral_exprs = builder.construct_integrals(integrand_exprs, params)
+                builder.stash_integrals(integral_exprs, params, ctx)
+            kernel = builder.construct_kernel(kernel_name, ctx)
+
+        """
+        return {'index_cache': {},
+                'quadrature_indices': [],
+                'mode_irs': collections.OrderedDict()}
+
+
+def set_quad_rule(params, cell, integral_type, functions):
+    # Check if the integral has a quad degree attached, otherwise use
+    # the estimated polynomial degree attached by compute_form_data
+    try:
+        quadrature_degree = params["quadrature_degree"]
+    except KeyError:
+        quadrature_degree = params["estimated_polynomial_degree"]
+        function_degrees = [f.ufl_function_space().ufl_element().degree()
+                            for f in functions]
+        if all((asarray(quadrature_degree) > 10 * asarray(degree)).all()
+               for degree in function_degrees):
+            logger.warning("Estimated quadrature degree %s more "
+                           "than tenfold greater than any "
+                           "argument/coefficient degree (max %s)",
+                           quadrature_degree, max_degree(function_degrees))
+    quad_rule = params.get("quadrature_rule", "default")
+    if isinstance(quad_rule, str):
+        scheme = quad_rule
+        fiat_cell = as_fiat_cell(cell)
+        finat_elements = set(create_element(f.ufl_element()) for f in functions
+                             if f.ufl_element().family() != "Real")
+        fiat_cells = [fiat_cell] + [finat_el.complex for finat_el in finat_elements]
+        fiat_cell = max_complex(fiat_cells)
+
+        integration_dim, _ = lower_integral_type(fiat_cell, integral_type)
+        integration_cell = fiat_cell.construct_subcomplex(integration_dim)
+        quad_rule = make_quadrature(integration_cell, quadrature_degree, scheme=scheme)
+        params["quadrature_rule"] = quad_rule
+
+    if not isinstance(quad_rule, AbstractQuadratureRule):
+        raise ValueError("Expected to find a QuadratureRule object, not a %s" %
+                         type(quad_rule))
+
+
+def get_index_ordering(quadrature_indices, return_variables):
+    split_argument_indices = tuple(chain(*[var.index_ordering()
+                                           for var in return_variables]))
+    return tuple(quadrature_indices) + split_argument_indices
+
+
+def get_index_names(quadrature_indices, argument_multiindices, index_cache):
+    index_names = []
+
+    def name_index(index, name):
+        index_names.append((index, name))
+        if index in index_cache:
+            for multiindex, suffix in zip(index_cache[index],
+                                          string.ascii_lowercase):
+                name_multiindex(multiindex, name + suffix)
+
+    def name_multiindex(multiindex, name):
+        if len(multiindex) == 1:
+            name_index(multiindex[0], name)
+        else:
+            for i, index in enumerate(multiindex):
+                name_index(index, name + str(i))
+
+    name_multiindex(quadrature_indices, 'ip')
+    for multiindex, name in zip(argument_multiindices, ['j', 'k']):
+        name_multiindex(multiindex, name)
+    return index_names
+
+
+def lower_integral_type(fiat_cell, integral_type):
+    """Lower integral type into the dimension of the integration
+    subentity and a list of entity numbers for that dimension.
+
+    :arg fiat_cell: FIAT reference cell
+    :arg integral_type: integral type (string)
+    """
+    vert_facet_types = ['exterior_facet_vert', 'interior_facet_vert']
+    horiz_facet_types = ['exterior_facet_bottom', 'exterior_facet_top', 'interior_facet_horiz']
+
+    dim = fiat_cell.get_dimension()
+    if integral_type == 'cell':
+        integration_dim = dim
+    elif integral_type in ['exterior_facet', 'interior_facet']:
+        if isinstance(fiat_cell, TensorProductCell):
+            raise ValueError("{} integral cannot be used with a TensorProductCell; need to distinguish between vertical and horizontal contributions.".format(integral_type))
+        integration_dim = dim - 1
+    elif integral_type == 'vertex':
+        integration_dim = 0
+    elif integral_type in vert_facet_types + horiz_facet_types:
+        # Extrusion case
+        if not isinstance(fiat_cell, TensorProductCell):
+            raise ValueError("{} integral requires a TensorProductCell.".format(integral_type))
+        basedim, extrdim = dim
+        assert extrdim == 1
+
+        if integral_type in vert_facet_types:
+            integration_dim = (basedim - 1, 1)
+        elif integral_type in horiz_facet_types:
+            integration_dim = (basedim, 0)
+    else:
+        raise NotImplementedError("integral type %s not supported" % integral_type)
+
+    if integral_type == 'exterior_facet_bottom':
+        entity_ids = [0]
+    elif integral_type == 'exterior_facet_top':
+        entity_ids = [1]
+    else:
+        entity_ids = list(range(len(fiat_cell.get_topology()[integration_dim])))
+
+    return integration_dim, entity_ids
+
+
+def pick_mode(mode):
+    "Return one of the specialized optimisation modules from a mode string."
+    try:
+        from firedrake_citations import Citations
+        cites = {"vanilla": ("Homolya2017", ),
+                 "coffee": ("Luporini2016", "Homolya2017", ),
+                 "spectral": ("Luporini2016", "Homolya2017", "Homolya2017a"),
+                 "tensor": ("Kirby2006", "Homolya2017", )}
+        for c in cites[mode]:
+            Citations().register(c)
+    except ImportError:
+        pass
+    if mode == "vanilla":
+        import tsfc.vanilla as m
+    elif mode == "coffee":
+        import tsfc.coffee_mode as m
+    elif mode == "spectral":
+        import tsfc.spectral as m
+    elif mode == "tensor":
+        import tsfc.tensor as m
+    else:
+        raise ValueError("Unknown mode: {}".format(mode))
+    return m
+
+
+def check_requirements(ir):
+    """Look for cell orientations, cell sizes, and collect tabulations
+    in one pass."""
+    cell_orientations = False
+    cell_sizes = False
+    rt_tabs = {}
+    for node in traversal(ir):
+        if isinstance(node, gem.Variable):
+            if node.name == "cell_orientations":
+                cell_orientations = True
+            elif node.name == "cell_sizes":
+                cell_sizes = True
+            elif node.name.startswith("rt_"):
+                rt_tabs[node.name] = node.shape
+    return cell_orientations, cell_sizes, tuple(sorted(rt_tabs.items()))
+
+
+def prepare_constant(constant, number):
+    """Bridges the kernel interface and the GEM abstraction for
+    Constants.
+
+    :arg constant: Firedrake Constant
+    :arg number: Value to uniquely identify the constant
+    :returns: (funarg, expression)
+         expression - GEM expression referring to the Constant value(s)
+    """
+    value_size = numpy.prod(constant.ufl_shape, dtype=int)
+    return gem.reshape(gem.Variable(f"c_{number}", (value_size,)),
+                       constant.ufl_shape)
+
+
+def prepare_coefficient(coefficient, name, interior_facet=False):
+    """Bridges the kernel interface and the GEM abstraction for
+    Coefficients.
+
+    :arg coefficient: UFL Coefficient
+    :arg name: unique name to refer to the Coefficient in the kernel
+    :arg interior_facet: interior facet integral?
+    :returns: (funarg, expression)
+         expression - GEM expression referring to the Coefficient
+                      values
+    """
+    assert isinstance(interior_facet, bool)
+
+    if coefficient.ufl_element().family() == 'Real':
+        # Constant
+        value_size = coefficient.ufl_function_space().value_size
+        expression = gem.reshape(gem.Variable(name, (value_size,)),
+                                 coefficient.ufl_shape)
+        return expression
+
+    finat_element = create_element(coefficient.ufl_element())
+    shape = finat_element.index_shape
+    size = numpy.prod(shape, dtype=int)
+
+    if not interior_facet:
+        expression = gem.reshape(gem.Variable(name, (size,)), shape)
+    else:
+        varexp = gem.Variable(name, (2 * size,))
+        plus = gem.view(varexp, slice(size))
+        minus = gem.view(varexp, slice(size, 2 * size))
+        expression = (gem.reshape(plus, shape), gem.reshape(minus, shape))
+    return expression
+
+
+def prepare_arguments(arguments, multiindices, interior_facet=False, diagonal=False):
+    """Bridges the kernel interface and the GEM abstraction for
+    Arguments.  Vector Arguments are rearranged here for interior
+    facet integrals.
+
+    :arg arguments: UFL Arguments
+    :arg multiindices: Argument multiindices
+    :arg interior_facet: interior facet integral?
+    :arg diagonal: Are we assembling the diagonal of a rank-2 element tensor?
+    :returns: (funarg, expression)
+         expressions - GEM expressions referring to the argument
+                       tensor
+    """
+    assert isinstance(interior_facet, bool)
+
+    if len(arguments) == 0:
+        # No arguments
+        expression = gem.Indexed(gem.Variable("A", (1,)), (0,))
+        return (expression, )
+
+    elements = tuple(create_element(arg.ufl_element()) for arg in arguments)
+    shapes = tuple(element.index_shape for element in elements)
+
+    if diagonal:
+        if len(arguments) != 2:
+            raise ValueError("Diagonal only for 2-forms")
+        try:
+            element, = set(elements)
+        except ValueError:
+            raise ValueError("Diagonal only for diagonal blocks (test and trial spaces the same)")
+
+        elements = (element, )
+        shapes = tuple(element.index_shape for element in elements)
+        multiindices = multiindices[:1]
+
+    def expression(restricted):
+        return gem.Indexed(gem.reshape(restricted, *shapes),
+                           tuple(chain(*multiindices)))
+
+    u_shape = numpy.array([numpy.prod(shape, dtype=int) for shape in shapes])
+    if interior_facet:
+        c_shape = tuple(2 * u_shape)
+        slicez = [[slice(r * s, (r + 1) * s)
+                   for r, s in zip(restrictions, u_shape)]
+                  for restrictions in product((0, 1), repeat=len(arguments))]
+    else:
+        c_shape = tuple(u_shape)
+        slicez = [[slice(s) for s in u_shape]]
+
+    varexp = gem.Variable("A", c_shape)
+    expressions = [expression(gem.view(varexp, *slices)) for slices in slicez]
+    return tuple(prune(expressions))
diff --git a/tsfc/kernel_interface/firedrake_loopy.py b/tsfc/kernel_interface/firedrake_loopy.py
new file mode 100644
index 0000000000..0969854cd3
--- /dev/null
+++ b/tsfc/kernel_interface/firedrake_loopy.py
@@ -0,0 +1,455 @@
+import numpy
+from collections import namedtuple, OrderedDict
+from functools import partial
+
+from ufl import Coefficient, FunctionSpace
+from ufl.domain import extract_unique_domain
+from finat.ufl import MixedElement as ufl_MixedElement, FiniteElement
+
+import gem
+from gem.flop_count import count_flops
+
+import loopy as lp
+
+from tsfc import kernel_args, fem
+from tsfc.finatinterface import create_element
+from tsfc.kernel_interface.common import KernelBuilderBase as _KernelBuilderBase, KernelBuilderMixin, get_index_names, check_requirements, prepare_coefficient, prepare_arguments, prepare_constant
+from tsfc.loopy import generate as generate_loopy
+
+
+# Expression kernel description type
+ExpressionKernel = namedtuple('ExpressionKernel', ['ast', 'oriented', 'needs_cell_sizes',
+                                                   'coefficient_numbers',
+                                                   'needs_external_coords',
+                                                   'tabulations', 'name', 'arguments',
+                                                   'flop_count', 'event'])
+
+
+def make_builder(*args, **kwargs):
+    return partial(KernelBuilder, *args, **kwargs)
+
+
+class Kernel:
+    __slots__ = ("ast", "arguments", "integral_type", "oriented", "subdomain_id",
+                 "domain_number", "needs_cell_sizes", "tabulations",
+                 "coefficient_numbers", "name", "flop_count", "event",
+                 "__weakref__")
+    """A compiled Kernel object.
+
+    :kwarg ast: The loopy kernel object.
+    :kwarg integral_type: The type of integral.
+    :kwarg oriented: Does the kernel require cell_orientations.
+    :kwarg subdomain_id: What is the subdomain id for this kernel.
+    :kwarg domain_number: Which domain number in the original form
+        does this kernel correspond to (can be used to index into
+        original_form.ufl_domains() to get the correct domain).
+    :kwarg coefficient_numbers: A list of which coefficients from the
+        form the kernel needs.
+    :kwarg tabulations: The runtime tabulations this kernel requires
+    :kwarg needs_cell_sizes: Does the kernel require cell sizes.
+    :kwarg name: The name of this kernel.
+    :kwarg flop_count: Estimated total flops for this kernel.
+    :kwarg event: name for logging event
+    """
+    def __init__(self, ast=None, arguments=None, integral_type=None, oriented=False,
+                 subdomain_id=None, domain_number=None,
+                 coefficient_numbers=(),
+                 needs_cell_sizes=False,
+                 tabulations=None,
+                 flop_count=0,
+                 name=None,
+                 event=None):
+        # Defaults
+        self.ast = ast
+        self.arguments = arguments
+        self.integral_type = integral_type
+        self.oriented = oriented
+        self.domain_number = domain_number
+        self.subdomain_id = subdomain_id
+        self.coefficient_numbers = coefficient_numbers
+        self.needs_cell_sizes = needs_cell_sizes
+        self.tabulations = tabulations
+        self.flop_count = flop_count
+        self.name = name
+        self.event = event
+
+
+class KernelBuilderBase(_KernelBuilderBase):
+
+    def __init__(self, scalar_type, interior_facet=False):
+        """Initialise a kernel builder.
+
+        :arg interior_facet: kernel accesses two cells
+        """
+        super().__init__(scalar_type=scalar_type, interior_facet=interior_facet)
+
+        # Cell orientation
+        if self.interior_facet:
+            cell_orientations = gem.Variable("cell_orientations", (2,), dtype=gem.uint_type)
+            self._cell_orientations = (gem.Indexed(cell_orientations, (0,)),
+                                       gem.Indexed(cell_orientations, (1,)))
+        else:
+            cell_orientations = gem.Variable("cell_orientations", (1,), dtype=gem.uint_type)
+            self._cell_orientations = (gem.Indexed(cell_orientations, (0,)),)
+
+    def _coefficient(self, coefficient, name):
+        """Prepare a coefficient. Adds glue code for the coefficient
+        and adds the coefficient to the coefficient map.
+
+        :arg coefficient: :class:`ufl.Coefficient`
+        :arg name: coefficient name
+        :returns: GEM expression representing the coefficient
+        """
+        expr = prepare_coefficient(coefficient, name, interior_facet=self.interior_facet)
+        self.coefficient_map[coefficient] = expr
+        return expr
+
+    def set_cell_sizes(self, domain):
+        """Setup a fake coefficient for "cell sizes".
+
+        :arg domain: The domain of the integral.
+
+        This is required for scaling of derivative basis functions on
+        physically mapped elements (Argyris, Bell, etc...).  We need a
+        measure of the mesh size around each vertex (hence this lives
+        in P1).
+
+        Should the domain have topological dimension 0 this does
+        nothing.
+        """
+        if domain.ufl_cell().topological_dimension() > 0:
+            # Can't create P1 since only P0 is a valid finite element if
+            # topological_dimension is 0 and the concept of "cell size"
+            # is not useful for a vertex.
+            f = Coefficient(FunctionSpace(domain, FiniteElement("P", domain.ufl_cell(), 1)))
+            expr = prepare_coefficient(f, "cell_sizes", interior_facet=self.interior_facet)
+            self._cell_sizes = expr
+
+    def create_element(self, element, **kwargs):
+        """Create a FInAT element (suitable for tabulating with) given
+        a UFL element."""
+        return create_element(element, **kwargs)
+
+    def generate_arg_from_variable(self, var, dtype=None):
+        """Generate kernel arg from a :class:`gem.Variable`.
+
+        :arg var: a :class:`gem.Variable`
+        :arg dtype: dtype of the kernel arg
+        :returns: kernel arg
+        """
+        return lp.GlobalArg(var.name, dtype=dtype or self.scalar_type, shape=var.shape)
+
+    def generate_arg_from_expression(self, expr, dtype=None):
+        """Generate kernel arg from gem expression(s).
+
+        :arg expr: gem expression(s) representing a coefficient or the output tensor
+        :arg dtype: dtype of the kernel arg
+        :returns: kernel arg
+        """
+        var, = gem.extract_type(expr if isinstance(expr, tuple) else (expr, ), gem.Variable)
+        return self.generate_arg_from_variable(var, dtype=dtype or self.scalar_type)
+
+
+class ExpressionKernelBuilder(KernelBuilderBase):
+    """Builds expression kernels for UFL interpolation in Firedrake."""
+
+    def __init__(self, scalar_type):
+        super(ExpressionKernelBuilder, self).__init__(scalar_type=scalar_type)
+        self.oriented = False
+        self.cell_sizes = False
+
+    def set_coefficients(self, coefficients):
+        """Prepare the coefficients of the expression.
+
+        :arg coefficients: UFL coefficients from Firedrake
+        """
+        self.coefficient_split = {}
+
+        for i, coefficient in enumerate(coefficients):
+            if type(coefficient.ufl_element()) == ufl_MixedElement:
+                subcoeffs = coefficient.subfunctions  # Firedrake-specific
+                self.coefficient_split[coefficient] = subcoeffs
+                for j, subcoeff in enumerate(subcoeffs):
+                    self._coefficient(subcoeff, f"w_{i}_{j}")
+            else:
+                self._coefficient(coefficient, f"w_{i}")
+
+    def set_constants(self, constants):
+        for i, const in enumerate(constants):
+            gemexpr = prepare_constant(const, i)
+            self.constant_map[const] = gemexpr
+
+    def set_coefficient_numbers(self, coefficient_numbers):
+        """Store the coefficient indices of the original form.
+
+        :arg coefficient_numbers: Iterable of indices describing which coefficients
+            from the input expression need to be passed in to the kernel.
+        """
+        self.coefficient_numbers = coefficient_numbers
+
+    def register_requirements(self, ir):
+        """Inspect what is referenced by the IR that needs to be
+        provided by the kernel interface."""
+        self.oriented, self.cell_sizes, self.tabulations = check_requirements(ir)
+
+    def set_output(self, o):
+        """Produce the kernel return argument"""
+        loopy_arg = lp.GlobalArg(o.name, dtype=self.scalar_type, shape=o.shape)
+        self.output_arg = kernel_args.OutputKernelArg(loopy_arg)
+
+    def construct_kernel(self, impero_c, index_names, needs_external_coords, log=False):
+        """Constructs an :class:`ExpressionKernel`.
+
+        :arg impero_c: gem.ImperoC object that represents the kernel
+        :arg index_names: pre-assigned index names
+        :arg needs_external_coords: If ``True``, the first argument to
+            the kernel is an externally provided coordinate field.
+        :arg log: bool if the Kernel should be profiled with Log events
+
+        :returns: :class:`ExpressionKernel` object
+        """
+        args = [self.output_arg]
+        if self.oriented:
+            funarg = self.generate_arg_from_expression(self._cell_orientations, dtype=numpy.int32)
+            args.append(kernel_args.CellOrientationsKernelArg(funarg))
+        if self.cell_sizes:
+            funarg = self.generate_arg_from_expression(self._cell_sizes)
+            args.append(kernel_args.CellSizesKernelArg(funarg))
+        for _, expr in self.coefficient_map.items():
+            # coefficient_map is OrderedDict.
+            funarg = self.generate_arg_from_expression(expr)
+            args.append(kernel_args.CoefficientKernelArg(funarg))
+
+        # now constants
+        for gemexpr in self.constant_map.values():
+            funarg = self.generate_arg_from_expression(gemexpr)
+            args.append(kernel_args.ConstantKernelArg(funarg))
+
+        for name_, shape in self.tabulations:
+            tab_loopy_arg = lp.GlobalArg(name_, dtype=self.scalar_type, shape=shape)
+            args.append(kernel_args.TabulationKernelArg(tab_loopy_arg))
+
+        loopy_args = [arg.loopy_arg for arg in args]
+
+        name = "expression_kernel"
+        loopy_kernel, event = generate_loopy(impero_c, loopy_args, self.scalar_type,
+                                             name, index_names, log=log)
+        return ExpressionKernel(loopy_kernel, self.oriented, self.cell_sizes,
+                                self.coefficient_numbers, needs_external_coords,
+                                self.tabulations, name, args, count_flops(impero_c), event)
+
+
+class KernelBuilder(KernelBuilderBase, KernelBuilderMixin):
+    """Helper class for building a :class:`Kernel` object."""
+
+    def __init__(self, integral_data_info, scalar_type,
+                 dont_split=(), diagonal=False):
+        """Initialise a kernel builder."""
+        integral_type = integral_data_info.integral_type
+        super(KernelBuilder, self).__init__(scalar_type, integral_type.startswith("interior_facet"))
+        self.fem_scalar_type = scalar_type
+
+        self.diagonal = diagonal
+        self.local_tensor = None
+        self.coefficient_split = {}
+        self.coefficient_number_index_map = OrderedDict()
+        self.dont_split = frozenset(dont_split)
+
+        # Facet number
+        if integral_type in ['exterior_facet', 'exterior_facet_vert']:
+            facet = gem.Variable('facet', (1,), dtype=gem.uint_type)
+            self._entity_number = {None: gem.VariableIndex(gem.Indexed(facet, (0,)))}
+            facet_orientation = gem.Variable('facet_orientation', (1,), dtype=gem.uint_type)
+            self._entity_orientation = {None: gem.OrientationVariableIndex(gem.Indexed(facet_orientation, (0,)))}
+        elif integral_type in ['interior_facet', 'interior_facet_vert']:
+            facet = gem.Variable('facet', (2,), dtype=gem.uint_type)
+            self._entity_number = {
+                '+': gem.VariableIndex(gem.Indexed(facet, (0,))),
+                '-': gem.VariableIndex(gem.Indexed(facet, (1,)))
+            }
+            facet_orientation = gem.Variable('facet_orientation', (2,), dtype=gem.uint_type)
+            self._entity_orientation = {
+                '+': gem.OrientationVariableIndex(gem.Indexed(facet_orientation, (0,))),
+                '-': gem.OrientationVariableIndex(gem.Indexed(facet_orientation, (1,)))
+            }
+        elif integral_type == 'interior_facet_horiz':
+            self._entity_number = {'+': 1, '-': 0}
+            facet_orientation = gem.Variable('facet_orientation', (1,), dtype=gem.uint_type)  # base mesh entity orientation
+            self._entity_orientation = {
+                '+': gem.OrientationVariableIndex(gem.Indexed(facet_orientation, (0,))),
+                '-': gem.OrientationVariableIndex(gem.Indexed(facet_orientation, (0,)))
+            }
+
+        self.set_arguments(integral_data_info.arguments)
+        self.integral_data_info = integral_data_info
+
+    def set_arguments(self, arguments):
+        """Process arguments.
+
+        :arg arguments: :class:`ufl.Argument`s
+        :returns: GEM expression representing the return variable
+        """
+        argument_multiindices = tuple(create_element(arg.ufl_element()).get_indices()
+                                      for arg in arguments)
+        if self.diagonal:
+            # Error checking occurs in the builder constructor.
+            # Diagonal assembly is obtained by using the test indices for
+            # the trial space as well.
+            a, _ = argument_multiindices
+            argument_multiindices = (a, a)
+        return_variables = prepare_arguments(arguments,
+                                             argument_multiindices,
+                                             interior_facet=self.interior_facet,
+                                             diagonal=self.diagonal)
+        self.return_variables = return_variables
+        self.argument_multiindices = argument_multiindices
+
+    def set_coordinates(self, domain):
+        """Prepare the coordinate field.
+
+        :arg domain: :class:`ufl.Domain`
+        """
+        # Create a fake coordinate coefficient for a domain.
+        f = Coefficient(FunctionSpace(domain, domain.ufl_coordinate_element()))
+        self.domain_coordinate[domain] = f
+        self._coefficient(f, "coords")
+
+    def set_coefficients(self, integral_data, form_data):
+        """Prepare the coefficients of the form.
+
+        :arg integral_data: UFL integral data
+        :arg form_data: UFL form data
+        """
+        # enabled_coefficients is a boolean array that indicates which
+        # of reduced_coefficients the integral requires.
+        n, k = 0, 0
+        for i in range(len(integral_data.enabled_coefficients)):
+            if integral_data.enabled_coefficients[i]:
+                original = form_data.reduced_coefficients[i]
+                coefficient = form_data.function_replace_map[original]
+                if type(coefficient.ufl_element()) == ufl_MixedElement:
+                    if original in self.dont_split:
+                        self.coefficient_split[coefficient] = [coefficient]
+                        self._coefficient(coefficient, f"w_{k}")
+                        self.coefficient_number_index_map[coefficient] = (n, 0)
+                        k += 1
+                    else:
+                        self.coefficient_split[coefficient] = []
+                        for j, element in enumerate(coefficient.ufl_element().sub_elements):
+                            c = Coefficient(FunctionSpace(extract_unique_domain(coefficient), element))
+                            self.coefficient_split[coefficient].append(c)
+                            self._coefficient(c, f"w_{k}")
+                            self.coefficient_number_index_map[c] = (n, j)
+                            k += 1
+                else:
+                    self._coefficient(coefficient, f"w_{k}")
+                    self.coefficient_number_index_map[coefficient] = (n, 0)
+                    k += 1
+                n += 1
+
+    def set_constants(self, constants):
+        for i, const in enumerate(constants):
+            gemexpr = prepare_constant(const, i)
+            self.constant_map[const] = gemexpr
+
+    def register_requirements(self, ir):
+        """Inspect what is referenced by the IR that needs to be
+        provided by the kernel interface."""
+        return check_requirements(ir)
+
+    def construct_kernel(self, name, ctx, log=False):
+        """Construct a fully built :class:`Kernel`.
+
+        This function contains the logic for building the argument
+        list for assembly kernels.
+
+        :arg name: kernel name
+        :arg ctx: kernel builder context to get impero_c from
+        :arg log: bool if the Kernel should be profiled with Log events
+        :returns: :class:`Kernel` object
+        """
+        impero_c, oriented, needs_cell_sizes, tabulations, active_variables = self.compile_gem(ctx)
+        if impero_c is None:
+            return self.construct_empty_kernel(name)
+        info = self.integral_data_info
+        # In the following funargs are only generated
+        # for gem expressions that are actually used;
+        # see `generate_arg_from_expression()` method.
+        # Specifically, funargs are not generated for
+        # unused components of mixed coefficients.
+        # Problem solving environment, such as Firedrake,
+        # will know which components have been included
+        # in the list of kernel arguments by investigating
+        # `Kernel.coefficient_numbers`.
+        # Add return arg
+        funarg = self.generate_arg_from_expression(self.return_variables)
+        args = [kernel_args.OutputKernelArg(funarg)]
+        # Add coordinates arg
+        coord = self.domain_coordinate[info.domain]
+        expr = self.coefficient_map[coord]
+        funarg = self.generate_arg_from_expression(expr)
+        args.append(kernel_args.CoordinatesKernelArg(funarg))
+        if oriented:
+            funarg = self.generate_arg_from_expression(self._cell_orientations, dtype=numpy.int32)
+            args.append(kernel_args.CellOrientationsKernelArg(funarg))
+        if needs_cell_sizes:
+            funarg = self.generate_arg_from_expression(self._cell_sizes)
+            args.append(kernel_args.CellSizesKernelArg(funarg))
+        coefficient_indices = OrderedDict()
+        for coeff, (number, index) in self.coefficient_number_index_map.items():
+            a = coefficient_indices.setdefault(number, [])
+            expr = self.coefficient_map[coeff]
+            var, = gem.extract_type(expr if isinstance(expr, tuple) else (expr, ), gem.Variable)
+            if var in active_variables:
+                funarg = self.generate_arg_from_expression(expr)
+                args.append(kernel_args.CoefficientKernelArg(funarg))
+                a.append(index)
+
+        # now constants
+        for gemexpr in self.constant_map.values():
+            funarg = self.generate_arg_from_expression(gemexpr)
+            args.append(kernel_args.ConstantKernelArg(funarg))
+
+        coefficient_indices = tuple(tuple(v) for v in coefficient_indices.values())
+        assert len(coefficient_indices) == len(info.coefficient_numbers)
+        if info.integral_type in ["exterior_facet", "exterior_facet_vert"]:
+            ext_loopy_arg = lp.GlobalArg("facet", numpy.uint32, shape=(1,))
+            args.append(kernel_args.ExteriorFacetKernelArg(ext_loopy_arg))
+        elif info.integral_type in ["interior_facet", "interior_facet_vert"]:
+            int_loopy_arg = lp.GlobalArg("facet", numpy.uint32, shape=(2,))
+            args.append(kernel_args.InteriorFacetKernelArg(int_loopy_arg))
+        # Will generalise this in the submesh PR.
+        if fem.PointSetContext(**self.fem_config()).use_canonical_quadrature_point_ordering:
+            if info.integral_type == "exterior_facet":
+                ext_ornt_loopy_arg = lp.GlobalArg("facet_orientation", gem.uint_type, shape=(1,))
+                args.append(kernel_args.ExteriorFacetOrientationKernelArg(ext_ornt_loopy_arg))
+            elif info.integral_type == "interior_facet":
+                int_ornt_loopy_arg = lp.GlobalArg("facet_orientation", gem.uint_type, shape=(2,))
+                args.append(kernel_args.InteriorFacetOrientationKernelArg(int_ornt_loopy_arg))
+        for name_, shape in tabulations:
+            tab_loopy_arg = lp.GlobalArg(name_, dtype=self.scalar_type, shape=shape)
+            args.append(kernel_args.TabulationKernelArg(tab_loopy_arg))
+        index_names = get_index_names(ctx['quadrature_indices'], self.argument_multiindices, ctx['index_cache'])
+        ast, event_name = generate_loopy(impero_c, [arg.loopy_arg for arg in args],
+                                         self.scalar_type, name, index_names, log=log)
+        flop_count = count_flops(impero_c)  # Estimated total flops for this kernel.
+        return Kernel(ast=ast,
+                      arguments=tuple(args),
+                      integral_type=info.integral_type,
+                      subdomain_id=info.subdomain_id,
+                      domain_number=info.domain_number,
+                      coefficient_numbers=tuple(zip(info.coefficient_numbers, coefficient_indices)),
+                      oriented=oriented,
+                      needs_cell_sizes=needs_cell_sizes,
+                      tabulations=tabulations,
+                      flop_count=flop_count,
+                      name=name,
+                      event=event_name)
+
+    def construct_empty_kernel(self, name):
+        """Return None, since Firedrake needs no empty kernels.
+
+        :arg name: function name
+        :returns: None
+        """
+        return None
diff --git a/tsfc/logging.py b/tsfc/logging.py
new file mode 100644
index 0000000000..6bced525f4
--- /dev/null
+++ b/tsfc/logging.py
@@ -0,0 +1,6 @@
+"""Logging for TSFC."""
+
+import logging
+
+logger = logging.getLogger('tsfc')
+logger.addHandler(logging.StreamHandler())
diff --git a/tsfc/loopy.py b/tsfc/loopy.py
new file mode 100644
index 0000000000..6826f0b672
--- /dev/null
+++ b/tsfc/loopy.py
@@ -0,0 +1,578 @@
+"""Generate loopy kernel from ImperoC tuple data.
+
+This is the final stage of code generation in TSFC."""
+
+from numbers import Integral
+import numpy
+from functools import singledispatch
+from collections import defaultdict, OrderedDict
+
+from gem import gem, impero as imp
+from gem.node import Memoizer
+
+import islpy as isl
+import loopy as lp
+
+import pymbolic.primitives as p
+from loopy.symbolic import SubArrayRef
+
+from pytools import UniqueNameGenerator
+
+from tsfc.parameters import is_complex
+
+from contextlib import contextmanager
+from tsfc.parameters import target
+
+
+def profile_insns(kernel_name, instructions, log=False):
+    if log:
+        event_name = "Log_Event_" + kernel_name
+        event_id_var_name = "ID_" + event_name
+        # Logging registration
+        # The events are registered in PyOP2 and the event id is passed onto the dll
+        preamble = "PetscLogEvent "+event_id_var_name+" = -1;"
+        # Profiling
+        prepend = [lp.CInstruction("", "PetscLogEventBegin("+event_id_var_name+",0,0,0,0);")]
+        append = [lp.CInstruction("", "PetscLogEventEnd("+event_id_var_name+",0,0,0,0);")]
+        instructions = prepend + instructions + append
+        return instructions, event_name, [(str(2**31-1)+"_"+kernel_name, preamble)]
+    else:
+        return instructions, None, None
+
+
+@singledispatch
+def _assign_dtype(expression, self):
+    return set.union(*map(self, expression.children))
+
+
+@_assign_dtype.register(gem.Terminal)
+def _assign_dtype_terminal(expression, self):
+    return {expression.dtype or self.scalar_type}
+
+
+@_assign_dtype.register(gem.Variable)
+def _assign_dtype_variable(expression, self):
+    return {expression.dtype or self.scalar_type}
+
+
+@_assign_dtype.register(gem.Zero)
+@_assign_dtype.register(gem.Identity)
+@_assign_dtype.register(gem.Delta)
+def _assign_dtype_real(expression, self):
+    return {expression.dtype or self.real_type}
+
+
+@_assign_dtype.register(gem.Literal)
+def _assign_dtype_identity(expression, self):
+    return {expression.array.dtype}
+
+
+@_assign_dtype.register(gem.Power)
+def _assign_dtype_power(expression, self):
+    # Conservative
+    return {expression.dtype or self.scalar_type}
+
+
+@_assign_dtype.register(gem.MathFunction)
+def _assign_dtype_mathfunction(expression, self):
+    if expression.name in {"abs", "real", "imag"}:
+        return {expression.dtype or self.real_type}
+    elif expression.name == "sqrt":
+        return {expression.dtype or self.scalar_type}
+    else:
+        return set.union(*map(self, expression.children))
+
+
+@_assign_dtype.register(gem.MinValue)
+@_assign_dtype.register(gem.MaxValue)
+def _assign_dtype_minmax(expression, self):
+    # UFL did correctness checking
+    return {expression.dtype or self.real_type}
+
+
+@_assign_dtype.register(gem.Conditional)
+def _assign_dtype_conditional(expression, self):
+    return set.union(*map(self, expression.children[1:]))
+
+
+@_assign_dtype.register(gem.Comparison)
+@_assign_dtype.register(gem.LogicalNot)
+@_assign_dtype.register(gem.LogicalAnd)
+@_assign_dtype.register(gem.LogicalOr)
+def _assign_dtype_logical(expression, self):
+    return {expression.dtype or numpy.int8}
+
+
+def assign_dtypes(expressions, scalar_type):
+    """Assign numpy data types to expressions.
+
+    Used for declaring temporaries when converting from Impero to lower level code.
+
+    :arg expressions: List of GEM expressions.
+    :arg scalar_type: Default scalar type.
+
+    :returns: list of tuples (expression, dtype)."""
+    mapper = Memoizer(_assign_dtype)
+    mapper.scalar_type = scalar_type
+    mapper.real_type = numpy.finfo(scalar_type).dtype
+    return [(e, numpy.result_type(*mapper(e))) for e in expressions]
+
+
+class LoopyContext(object):
+    def __init__(self, target=None):
+        self.indices = {}  # indices for declarations and referencing values, from ImperoC
+        self.active_indices = {}  # gem index -> pymbolic variable
+        self.index_extent = OrderedDict()  # pymbolic variable for indices -> extent
+        self.gem_to_pymbolic = {}  # gem node -> pymbolic variable
+        self.name_gen = UniqueNameGenerator()
+        self.target = target
+        self.loop_priorities = set()  # used to avoid disadvantageous loop interchanges
+
+    def fetch_multiindex(self, multiindex):
+        indices = []
+        for index in multiindex:
+            if isinstance(index, gem.Index):
+                indices.append(self.active_indices[index])
+            elif isinstance(index, gem.VariableIndex):
+                indices.append(expression(index.expression, self))
+            else:
+                assert isinstance(index, int)
+                indices.append(index)
+        return tuple(indices)
+
+    # Generate index from gem multiindex
+    def gem_to_pym_multiindex(self, multiindex):
+        indices = []
+        for index in multiindex:
+            assert index.extent
+            if not index.name:
+                name = self.name_gen(self.index_names[index])
+            else:
+                name = index.name
+            self.index_extent[name] = index.extent
+            indices.append(p.Variable(name))
+        return tuple(indices)
+
+    # Generate index from shape
+    def pymbolic_multiindex(self, shape):
+        indices = []
+        for extent in shape:
+            name = self.name_gen(self.index_names[extent])
+            self.index_extent[name] = extent
+            indices.append(p.Variable(name))
+        return tuple(indices)
+
+    # Generate pym variable from gem
+    def pymbolic_variable_and_destruct(self, node):
+        pym = self.pymbolic_variable(node)
+        if isinstance(pym, p.Subscript):
+            return pym.aggregate, pym.index_tuple
+        else:
+            return pym, ()
+
+    # Generate pym variable or subscript
+    def pymbolic_variable(self, node):
+        pym = self._gem_to_pym_var(node)
+        if node in self.indices:
+            indices = self.fetch_multiindex(self.indices[node])
+            if indices:
+                return p.Subscript(pym, indices)
+        return pym
+
+    def _gem_to_pym_var(self, node):
+        try:
+            pym = self.gem_to_pymbolic[node]
+        except KeyError:
+            name = self.name_gen(node.name)
+            pym = p.Variable(name)
+            self.gem_to_pymbolic[node] = pym
+        return pym
+
+    def active_inames(self):
+        # Return all active indices
+        return frozenset([i.name for i in self.active_indices.values()])
+
+    def save_loop_ordering(self):
+        """Save the active loops to prevent loop reordering."""
+        priority = tuple(map(str, self.active_indices.values()))
+        if len(priority) > 1:
+            self.loop_priorities.add(priority)
+
+
+@contextmanager
+def active_indices(mapping, ctx):
+    """Push active indices onto context.
+   :arg mapping: dict mapping gem indices to pymbolic index expressions
+   :arg ctx: code generation context.
+   :returns: new code generation context."""
+    ctx.active_indices.update(mapping)
+    ctx.save_loop_ordering()
+    yield ctx
+    for key in mapping:
+        ctx.active_indices.pop(key)
+
+
+def generate(impero_c, args, scalar_type, kernel_name="loopy_kernel", index_names=[],
+             return_increments=True, log=False):
+    """Generates loopy code.
+
+    :arg impero_c: ImperoC tuple with Impero AST and other data
+    :arg args: list of loopy.GlobalArgs
+    :arg scalar_type: type of scalars as C typename string
+    :arg kernel_name: function name of the kernel
+    :arg index_names: pre-assigned index names
+    :arg return_increments: Does codegen for Return nodes increment the lvalue, or assign?
+    :arg log: bool if the Kernel should be profiled with Log events
+    :returns: loopy kernel
+    """
+    ctx = LoopyContext(target=target)
+    ctx.indices = impero_c.indices
+    ctx.index_names = defaultdict(lambda: "i", index_names)
+    ctx.epsilon = numpy.finfo(scalar_type).resolution
+    ctx.scalar_type = scalar_type
+    ctx.return_increments = return_increments
+
+    # Create arguments
+    data = list(args)
+    for i, (temp, dtype) in enumerate(assign_dtypes(impero_c.temporaries, scalar_type)):
+        name = "t%d" % i
+        if isinstance(temp, gem.Constant):
+            data.append(lp.TemporaryVariable(name, shape=temp.shape, dtype=dtype, initializer=temp.array, address_space=lp.AddressSpace.LOCAL, read_only=True))
+        else:
+            shape = tuple([i.extent for i in ctx.indices[temp]]) + temp.shape
+            data.append(lp.TemporaryVariable(name, shape=shape, dtype=dtype, initializer=None, address_space=lp.AddressSpace.LOCAL, read_only=False))
+        ctx.gem_to_pymbolic[temp] = p.Variable(name)
+
+    # Create instructions
+    instructions = statement(impero_c.tree, ctx)
+
+    # add a no-op touching all kernel arguments to make sure they
+    # are not silently dropped
+    noop = lp.CInstruction(
+        (), "", read_variables=frozenset({a.name for a in args}),
+        within_inames=frozenset(), within_inames_is_final=True)
+    instructions.append(noop)
+
+    # Profile the instructions
+    instructions, event_name, preamble = profile_insns(kernel_name, instructions, log)
+
+    # Create domains
+    domains = create_domains(ctx.index_extent.items())
+
+    # Create loopy kernel
+    knl = lp.make_kernel(
+        domains,
+        instructions,
+        data,
+        name=kernel_name,
+        target=target,
+        seq_dependencies=True,
+        silenced_warnings=["summing_if_branches_ops"],
+        lang_version=(2018, 2),
+        preambles=preamble,
+        loop_priority=frozenset(ctx.loop_priorities),
+    )
+
+    return knl, event_name
+
+
+def create_domains(indices):
+    """ Create ISL domains from indices
+
+    :arg indices: iterable of (index_name, extent) pairs
+    :returns: A list of ISL sets representing the iteration domain of the indices."""
+
+    domains = []
+    for idx, extent in indices:
+        inames = isl.make_zero_and_vars([idx])
+        domains.append(((inames[0].le_set(inames[idx])) & (inames[idx].lt_set(inames[0] + extent))))
+
+    if not domains:
+        domains = [isl.BasicSet("[] -> {[]}")]
+    return domains
+
+
+@singledispatch
+def statement(tree, ctx):
+    """Translates an Impero (sub)tree into a loopy instructions corresponding
+    to a C statement.
+
+    :arg tree: Impero (sub)tree
+    :arg ctx: miscellaneous code generation data
+    :returns: list of loopy instructions
+    """
+    raise AssertionError("cannot generate loopy from %s" % type(tree))
+
+
+@statement.register(imp.Block)
+def statement_block(tree, ctx):
+    from itertools import chain
+    return list(chain(*(statement(child, ctx) for child in tree.children)))
+
+
+@statement.register(imp.For)
+def statement_for(tree, ctx):
+    extent = tree.index.extent
+    assert extent
+    idx = ctx.name_gen(ctx.index_names[tree.index])
+    ctx.index_extent[idx] = extent
+    with active_indices({tree.index: p.Variable(idx)}, ctx) as ctx_active:
+        return statement(tree.children[0], ctx_active)
+
+
+@statement.register(imp.Initialise)
+def statement_initialise(leaf, ctx):
+    return [lp.Assignment(expression(leaf.indexsum, ctx), 0.0, within_inames=ctx.active_inames())]
+
+
+@statement.register(imp.Accumulate)
+def statement_accumulate(leaf, ctx):
+    lhs = expression(leaf.indexsum, ctx)
+    rhs = lhs + expression(leaf.indexsum.children[0], ctx)
+    return [lp.Assignment(lhs, rhs, within_inames=ctx.active_inames())]
+
+
+@statement.register(imp.Return)
+def statement_return(leaf, ctx):
+    lhs = expression(leaf.variable, ctx)
+    rhs = expression(leaf.expression, ctx)
+    if ctx.return_increments:
+        rhs = lhs + rhs
+    return [lp.Assignment(lhs, rhs, within_inames=ctx.active_inames())]
+
+
+@statement.register(imp.ReturnAccumulate)
+def statement_returnaccumulate(leaf, ctx):
+    lhs = expression(leaf.variable, ctx)
+    rhs = lhs + expression(leaf.indexsum.children[0], ctx)
+    return [lp.Assignment(lhs, rhs, within_inames=ctx.active_inames())]
+
+
+@statement.register(imp.Evaluate)
+def statement_evaluate(leaf, ctx):
+    expr = leaf.expression
+    if isinstance(expr, gem.ListTensor):
+        ops = []
+        var, index = ctx.pymbolic_variable_and_destruct(expr)
+        for multiindex, value in numpy.ndenumerate(expr.array):
+            ops.append(lp.Assignment(p.Subscript(var, index + multiindex), expression(value, ctx), within_inames=ctx.active_inames()))
+        return ops
+    elif isinstance(expr, gem.Constant):
+        return []
+    elif isinstance(expr, gem.ComponentTensor):
+        idx = ctx.gem_to_pym_multiindex(expr.multiindex)
+        var, sub_idx = ctx.pymbolic_variable_and_destruct(expr)
+        lhs = p.Subscript(var, idx + sub_idx)
+        with active_indices(dict(zip(expr.multiindex, idx)), ctx) as ctx_active:
+            return [lp.Assignment(lhs, expression(expr.children[0], ctx_active), within_inames=ctx_active.active_inames())]
+    elif isinstance(expr, gem.Inverse):
+        idx = ctx.pymbolic_multiindex(expr.shape)
+        var = ctx.pymbolic_variable(expr)
+        lhs = (SubArrayRef(idx, p.Subscript(var, idx)),)
+
+        idx_reads = ctx.pymbolic_multiindex(expr.children[0].shape)
+        var_reads = ctx.pymbolic_variable(expr.children[0])
+        reads = (SubArrayRef(idx_reads, p.Subscript(var_reads, idx_reads)),)
+        rhs = p.Call(p.Variable("inverse"), reads)
+
+        return [lp.CallInstruction(lhs, rhs, within_inames=ctx.active_inames())]
+    elif isinstance(expr, gem.Solve):
+        idx = ctx.pymbolic_multiindex(expr.shape)
+        var = ctx.pymbolic_variable(expr)
+        lhs = (SubArrayRef(idx, p.Subscript(var, idx)),)
+
+        reads = []
+        for child in expr.children:
+            idx_reads = ctx.pymbolic_multiindex(child.shape)
+            var_reads = ctx.pymbolic_variable(child)
+            reads.append(SubArrayRef(idx_reads, p.Subscript(var_reads, idx_reads)))
+        rhs = p.Call(p.Variable("solve"), tuple(reads))
+
+        return [lp.CallInstruction(lhs, rhs, within_inames=ctx.active_inames())]
+    else:
+        return [lp.Assignment(ctx.pymbolic_variable(expr), expression(expr, ctx, top=True), within_inames=ctx.active_inames())]
+
+
+def expression(expr, ctx, top=False):
+    """Translates GEM expression into a pymbolic expression
+
+    :arg expr: GEM expression
+    :arg ctx: miscellaneous code generation data
+    :arg top: do not generate temporary reference for the root node
+    :returns: pymbolic expression
+    """
+    if not top and expr in ctx.gem_to_pymbolic:
+        return ctx.pymbolic_variable(expr)
+    else:
+        return _expression(expr, ctx)
+
+
+@singledispatch
+def _expression(expr, ctx):
+    raise AssertionError("cannot generate expression from %s" % type(expr))
+
+
+@_expression.register(gem.Failure)
+def _expression_failure(expr, ctx):
+    raise expr.exception
+
+
+@_expression.register(gem.Product)
+def _expression_product(expr, ctx):
+    return p.Product(tuple(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.Sum)
+def _expression_sum(expr, ctx):
+    return p.Sum(tuple(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.Division)
+def _expression_division(expr, ctx):
+    return p.Quotient(*(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.FloorDiv)
+def _expression_floordiv(expr, ctx):
+    return p.FloorDiv(*(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.Remainder)
+def _expression_remainder(expr, ctx):
+    return p.Remainder(*(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.Power)
+def _expression_power(expr, ctx):
+    return p.Variable("pow")(*(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.MathFunction)
+def _expression_mathfunction(expr, ctx):
+    if expr.name.startswith('cyl_bessel_'):
+        # Bessel functions
+        if is_complex(ctx.scalar_type):
+            raise NotImplementedError("Bessel functions for complex numbers: "
+                                      "missing implementation")
+        nu, arg = expr.children
+        nu_ = expression(nu, ctx)
+        arg_ = expression(arg, ctx)
+        if isinstance(ctx.target, lp.target.c.CWithGNULibcTarget):
+            # Generate right functions calls to gnulibc bessel functions
+            # cyl_bessel_{jy} -> bessel_{jy}
+            name = expr.name[4:]
+            return p.Variable(f"{name}n")(int(nu_), arg_)
+        else:
+            # Modified Bessel functions (C++ only)
+            # These mappings work for FEniCS only, and fail with Firedrake
+            # since no Boost available.
+            # Is this actually still supported/has ever been used by anyone?
+            if expr.name in {'cyl_bessel_i', 'cyl_bessel_k'}:
+                name = 'boost::math::' + expr.name
+                return p.Variable(name)(nu_, arg_)
+            else:
+                # cyl_bessel_{jy} -> {jy}
+                name = expr.name[-1:]
+                if nu == gem.Zero():
+                    return p.Variable(f"{name}0")(arg_)
+                elif nu == gem.one:
+                    return p.Variable(f"{name}1")(arg_)
+                else:
+                    return p.Variable(f"{name}n")(nu_, arg_)
+    else:
+        if expr.name == "ln":
+            name = "log"
+        else:
+            name = expr.name
+        # Not all mathfunctions apply to complex numbers, but this
+        # will be picked up in loopy. This way we allow erf(real(...))
+        # in complex mode (say).
+        return p.Variable(name)(*(expression(c, ctx) for c in expr.children))
+
+
+@_expression.register(gem.MinValue)
+def _expression_minvalue(expr, ctx):
+    return p.Variable("min")(*[expression(c, ctx) for c in expr.children])
+
+
+@_expression.register(gem.MaxValue)
+def _expression_maxvalue(expr, ctx):
+    return p.Variable("max")(*[expression(c, ctx) for c in expr.children])
+
+
+@_expression.register(gem.Comparison)
+def _expression_comparison(expr, ctx):
+    left, right = [expression(c, ctx) for c in expr.children]
+    return p.Comparison(left, expr.operator, right)
+
+
+@_expression.register(gem.LogicalNot)
+def _expression_logicalnot(expr, ctx):
+    child, = expr.children
+    return p.LogicalNot(expression(child, ctx))
+
+
+@_expression.register(gem.LogicalAnd)
+def _expression_logicaland(expr, ctx):
+    return p.LogicalAnd(tuple([expression(c, ctx) for c in expr.children]))
+
+
+@_expression.register(gem.LogicalOr)
+def _expression_logicalor(expr, ctx):
+    return p.LogicalOr(tuple([expression(c, ctx) for c in expr.children]))
+
+
+@_expression.register(gem.Conditional)
+def _expression_conditional(expr, ctx):
+    return p.If(*[expression(c, ctx) for c in expr.children])
+
+
+@_expression.register(gem.Constant)
+def _expression_scalar(expr, ctx):
+    assert not expr.shape
+    v = expr.value
+    if numpy.isnan(v):
+        return p.Variable("NAN")
+    r = numpy.round(v, 1)
+    if r and numpy.abs(v - r) < ctx.epsilon:
+        return r
+    return v
+
+
+@_expression.register(gem.Variable)
+def _expression_variable(expr, ctx):
+    return ctx.pymbolic_variable(expr)
+
+
+@_expression.register(gem.Indexed)
+def _expression_indexed(expr, ctx):
+    rank = ctx.fetch_multiindex(expr.multiindex)
+    var = expression(expr.children[0], ctx)
+    if isinstance(var, p.Subscript):
+        rank = var.index + rank
+        var = var.aggregate
+    return p.Subscript(var, rank)
+
+
+@_expression.register(gem.FlexiblyIndexed)
+def _expression_flexiblyindexed(expr, ctx):
+    var = expression(expr.children[0], ctx)
+
+    rank = []
+    for off, idxs in expr.dim2idxs:
+        rank_ = [expression(off, ctx)]
+        for index, stride in idxs:
+            if isinstance(index, gem.Index):
+                rank_.append(p.Product((ctx.active_indices[index], expression(stride, ctx))))
+            elif isinstance(index, gem.VariableIndex):
+                rank_.append(p.Product((expression(index.expression, ctx), expression(stride, ctx))))
+            else:
+                raise ValueError(f"Expecting Index or VariableIndex, not {type(index)}")
+        rank.append(p.Sum(tuple(rank_)))
+
+    return p.Subscript(var, tuple(rank))
+
+
+@_expression.register(Integral)
+def _expression_numbers_integral(expr, ctx):
+    return expr
diff --git a/tsfc/modified_terminals.py b/tsfc/modified_terminals.py
new file mode 100644
index 0000000000..8c5162bf97
--- /dev/null
+++ b/tsfc/modified_terminals.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2011-2015 Martin Sandve Alnæs
+#
+# This file is part of UFLACS.
+#
+# UFLACS is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# UFLACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with UFLACS. If not, see <http://www.gnu.org/licenses/>.
+#
+# Modified by Miklós Homolya, 2016.
+
+"""Definitions of 'modified terminals', a core concept in uflacs."""
+
+from ufl.classes import (ReferenceValue, ReferenceGrad,
+                         NegativeRestricted, PositiveRestricted,
+                         Restricted, ConstantValue,
+                         Jacobian, SpatialCoordinate, Zero)
+from ufl.checks import is_cellwise_constant
+from ufl.domain import extract_unique_domain
+
+
+class ModifiedTerminal(object):
+
+    """A modified terminal expression is an object of a Terminal subtype, wrapped in terminal modifier types.
+
+    The variables of this class are:
+
+        expr - The original UFL expression
+
+        terminal           - the underlying Terminal object
+        local_derivatives  - tuple of ints, each meaning derivative in that local direction
+        reference_value    - bool, whether this is represented in reference frame
+        restriction        - None, '+' or '-'
+    """
+
+    def __init__(self, expr, terminal, local_derivatives, restriction, reference_value):
+        # The original expression
+        self.expr = expr
+
+        # The underlying terminal expression
+        self.terminal = terminal
+
+        # Components
+        self.reference_value = reference_value
+        self.restriction = restriction
+
+        # Derivatives
+        self.local_derivatives = local_derivatives
+
+    def as_tuple(self):
+        t = self.terminal
+        rv = self.reference_value
+        ld = self.local_derivatives
+        r = self.restriction
+        return (t, rv, ld, r)
+
+    def __hash__(self):
+        return hash(self.as_tuple())
+
+    def __eq__(self, other):
+        return isinstance(other, ModifiedTerminal) and self.as_tuple() == other.as_tuple()
+
+    def __lt__(self, other):
+        return self.as_tuple() < other.as_tuple()
+
+    def __str__(self):
+        s = []
+        s += ["terminal:           {0}".format(self.terminal)]
+        s += ["local_derivatives:  {0}".format(self.local_derivatives)]
+        s += ["restriction:        {0}".format(self.restriction)]
+        return '\n'.join(s)
+
+
+def is_modified_terminal(v):
+    "Check if v is a terminal or a terminal wrapped in terminal modifier types."
+    while not v._ufl_is_terminal_:
+        if v._ufl_is_terminal_modifier_:
+            v = v.ufl_operands[0]
+        else:
+            return False
+    return True
+
+
+def strip_modified_terminal(v):
+    "Extract core Terminal from a modified terminal or return None."
+    while not v._ufl_is_terminal_:
+        if v._ufl_is_terminal_modifier_:
+            v = v.ufl_operands[0]
+        else:
+            return None
+    return v
+
+
+def analyse_modified_terminal(expr):
+    """Analyse a so-called 'modified terminal' expression and return its properties in more compact form.
+
+    A modified terminal expression is an object of a Terminal subtype, wrapped in terminal modifier types.
+
+    The wrapper types can include 0-* Grad or ReferenceGrad objects,
+    and 0-1 ReferenceValue, 0-1 Restricted, and 0-1 Indexed objects.
+    """
+    # Data to determine
+    local_derivatives = 0
+    reference_value = None
+    restriction = None
+
+    # Start with expr and strip away layers of modifiers
+    t = expr
+    while not t._ufl_is_terminal_:
+        if isinstance(t, ReferenceValue):
+            assert reference_value is None, "Got twice pulled back terminal!"
+            reference_value = True
+            t, = t.ufl_operands
+
+        elif isinstance(t, ReferenceGrad):
+            local_derivatives += 1
+            t, = t.ufl_operands
+
+        elif isinstance(t, Restricted):
+            assert restriction is None, "Got twice restricted terminal!"
+            restriction = t._side
+            t, = t.ufl_operands
+
+        elif t._ufl_terminal_modifiers_:
+            raise ValueError("Missing handler for terminal modifier type %s, object is %s." % (type(t), repr(t)))
+
+        else:
+            raise ValueError("Unexpected type %s object %s." % (type(t), repr(t)))
+
+    # Make reference_value true or false
+    if reference_value is None:
+        reference_value = False
+
+    # Consistency check
+    if isinstance(t, (SpatialCoordinate, Jacobian)):
+        pass
+    else:
+        if local_derivatives and not reference_value:
+            raise ValueError("Local derivatives of non-local value?")
+
+    return ModifiedTerminal(expr, t, local_derivatives, restriction, reference_value)
+
+
+def construct_modified_terminal(mt, terminal):
+    """Construct a modified terminal given terminal modifiers from an
+    analysed modified terminal and a terminal."""
+    expr = terminal
+
+    if mt.reference_value:
+        expr = ReferenceValue(expr)
+
+    dim = extract_unique_domain(expr).topological_dimension()
+    for n in range(mt.local_derivatives):
+        # Return zero if expression is trivially constant. This has to
+        # happen here because ReferenceGrad has no access to the
+        # topological dimension of a literal zero.
+        if is_cellwise_constant(expr):
+            expr = Zero(expr.ufl_shape + (dim,), expr.ufl_free_indices,
+                        expr.ufl_index_dimensions)
+        else:
+            expr = ReferenceGrad(expr)
+
+    # No need to apply restrictions to ConstantValue terminals
+    if not isinstance(expr, ConstantValue):
+        if mt.restriction == '+':
+            expr = PositiveRestricted(expr)
+        elif mt.restriction == '-':
+            expr = NegativeRestricted(expr)
+
+    return expr
diff --git a/tsfc/parameters.py b/tsfc/parameters.py
new file mode 100644
index 0000000000..1277713ad5
--- /dev/null
+++ b/tsfc/parameters.py
@@ -0,0 +1,36 @@
+import numpy
+from loopy.target.c import CWithGNULibcTarget
+
+
+PARAMETERS = {
+    "quadrature_rule": "auto",
+    "quadrature_degree": "auto",
+
+    # Default mode
+    "mode": "spectral",
+
+    # Maximum extent to unroll index sums. Default is 3, so that loops
+    # over geometric dimensions are unrolled; this improves assembly
+    # performance.  Can be disabled by setting it to None, False or 0;
+    # that makes compilation time much shorter.
+    "unroll_indexsum": 3,
+
+    # Scalar type numpy dtype
+    "scalar_type": numpy.dtype(numpy.float64),
+
+    # So that tests pass (needs to match scalar_type)
+    "scalar_type_c": "double",
+}
+
+
+target = CWithGNULibcTarget()
+
+
+def default_parameters():
+    return PARAMETERS.copy()
+
+
+def is_complex(scalar_type):
+    """Decides complex mode based on scalar type."""
+    return scalar_type and (isinstance(scalar_type, numpy.dtype) and scalar_type.kind == 'c') \
+        or (isinstance(scalar_type, str) and "complex" in scalar_type)
diff --git a/tsfc/spectral.py b/tsfc/spectral.py
new file mode 100644
index 0000000000..8ee838481d
--- /dev/null
+++ b/tsfc/spectral.py
@@ -0,0 +1,193 @@
+from collections import OrderedDict, defaultdict, namedtuple
+from functools import partial, reduce
+from itertools import chain, zip_longest
+
+from gem.gem import Delta, Indexed, Sum, index_sum, one
+from gem.node import Memoizer
+from gem.optimise import delta_elimination as _delta_elimination
+from gem.optimise import remove_componenttensors, replace_division, unroll_indexsum
+from gem.refactorise import ATOMIC, COMPOUND, OTHER, MonomialSum, collect_monomials
+from gem.unconcatenate import unconcatenate
+from gem.coffee import optimise_monomial_sum
+from gem.utils import groupby
+
+
+Integral = namedtuple('Integral', ['expression',
+                                   'quadrature_multiindex',
+                                   'argument_indices'])
+
+
+def Integrals(expressions, quadrature_multiindex, argument_multiindices, parameters):
+    """Constructs an integral representation for each GEM integrand
+    expression.
+
+    :arg expressions: integrand multiplied with quadrature weight;
+                      multi-root GEM expression DAG
+    :arg quadrature_multiindex: quadrature multiindex (tuple)
+    :arg argument_multiindices: tuple of argument multiindices,
+                                one multiindex for each argument
+    :arg parameters: parameters dictionary
+
+    :returns: list of integral representations
+    """
+    # Rewrite: a / b => a * (1 / b)
+    expressions = replace_division(expressions)
+
+    # Unroll
+    max_extent = parameters["unroll_indexsum"]
+    if max_extent:
+        def predicate(index):
+            return index.extent <= max_extent
+        expressions = unroll_indexsum(expressions, predicate=predicate)
+
+    expressions = [index_sum(e, quadrature_multiindex) for e in expressions]
+    argument_indices = tuple(chain(*argument_multiindices))
+    return [Integral(e, quadrature_multiindex, argument_indices) for e in expressions]
+
+
+def _delta_inside(node, self):
+    """Does node contain a Delta?"""
+    return any(isinstance(child, Delta) or self(child)
+               for child in node.children)
+
+
+def flatten(var_reps, index_cache):
+    quadrature_indices = OrderedDict()
+
+    pairs = []  # assignment pairs
+    for variable, reps in var_reps:
+        # Extract argument indices
+        argument_indices, = set(r.argument_indices for r in reps)
+        assert set(variable.free_indices) == set(argument_indices)
+
+        # Extract and verify expressions
+        expressions = [r.expression for r in reps]
+        assert all(set(e.free_indices) <= set(argument_indices)
+                   for e in expressions)
+
+        # Save assignment pair
+        pairs.append((variable, reduce(Sum, expressions)))
+
+        # Collect quadrature_indices
+        for r in reps:
+            quadrature_indices.update(zip_longest(r.quadrature_multiindex, ()))
+
+    # Split Concatenate nodes
+    pairs = unconcatenate(pairs, cache=index_cache)
+
+    def group_key(pair):
+        variable, expression = pair
+        return frozenset(variable.free_indices)
+
+    delta_inside = Memoizer(_delta_inside)
+    # Variable ordering after delta cancellation
+    narrow_variables = OrderedDict()
+    # Assignments are variable -> MonomialSum map
+    delta_simplified = defaultdict(MonomialSum)
+    # Group assignment pairs by argument indices
+    for free_indices, pair_group in groupby(pairs, group_key):
+        variables, expressions = zip(*pair_group)
+        # Argument factorise expressions
+        classifier = partial(classify, set(free_indices), delta_inside=delta_inside)
+        monomial_sums = collect_monomials(expressions, classifier)
+        # For each monomial, apply delta cancellation and insert
+        # result into delta_simplified.
+        for variable, monomial_sum in zip(variables, monomial_sums):
+            for monomial in monomial_sum:
+                var, s, a, r = delta_elimination(variable, *monomial)
+                narrow_variables.setdefault(var)
+                delta_simplified[var].add(s, a, r)
+
+    # Final factorisation
+    for variable in narrow_variables:
+        monomial_sum = delta_simplified[variable]
+        # Collect sum indices applicable to the current MonomialSum
+        sum_indices = set().union(*[m.sum_indices for m in monomial_sum])
+        # Put them in a deterministic order
+        sum_indices = [i for i in quadrature_indices if i in sum_indices]
+        # Sort for increasing index extent, this obtains the good
+        # factorisation for triangle x interval cells.  Python sort is
+        # stable, so in the common case when index extents are equal,
+        # the previous deterministic ordering applies which is good
+        # for getting smaller temporaries.
+        sum_indices = sorted(sum_indices, key=lambda index: index.extent)
+        # Apply sum factorisation combined with COFFEE technology
+        expression = sum_factorise(variable, sum_indices, monomial_sum)
+        yield (variable, expression)
+
+
+finalise_options = dict(replace_delta=False)
+
+
+def classify(argument_indices, expression, delta_inside):
+    """Classifier for argument factorisation"""
+    n = len(argument_indices.intersection(expression.free_indices))
+    if n == 0:
+        return OTHER
+    elif n == 1:
+        if isinstance(expression, (Delta, Indexed)) and not delta_inside(expression):
+            return ATOMIC
+        else:
+            return COMPOUND
+    else:
+        return COMPOUND
+
+
+def delta_elimination(variable, sum_indices, args, rest):
+    """IndexSum-Delta cancellation for monomials."""
+    factors = list(args) + [variable, rest]  # construct factors
+
+    def prune(factors):
+        # Skip last factor (``rest``, see above) which can be
+        # arbitrarily complicated, so its pruning may be expensive,
+        # and its early pruning brings no advantages.
+        result = remove_componenttensors(factors[:-1])
+        result.append(factors[-1])
+        return result
+
+    # Cancel sum indices
+    sum_indices, factors = _delta_elimination(sum_indices, factors)
+    factors = prune(factors)
+
+    # Cancel variable indices
+    var_indices, factors = _delta_elimination(variable.free_indices, factors)
+    factors = prune(factors)
+
+    # Destructure factors after cancellation
+    rest = factors.pop()
+    variable = factors.pop()
+    args = [f for f in factors if f != one]
+
+    assert set(var_indices) == set(variable.free_indices)
+    return variable, sum_indices, args, rest
+
+
+def sum_factorise(variable, tail_ordering, monomial_sum):
+    if tail_ordering:
+        key_ordering = OrderedDict()
+        sub_monosums = defaultdict(MonomialSum)
+        for sum_indices, atomics, rest in monomial_sum:
+            # Pull out those sum indices that are not contained in the
+            # tail ordering, together with those atomics which do not
+            # share free indices with the tail ordering.
+            #
+            # Based on this, split the monomial sum, then recursively
+            # optimise each sub monomial sum with the first tail index
+            # removed.
+            tail_indices = tuple(i for i in sum_indices if i in tail_ordering)
+            tail_atomics = tuple(a for a in atomics
+                                 if set(tail_indices) & set(a.free_indices))
+            head_indices = tuple(i for i in sum_indices if i not in tail_ordering)
+            head_atomics = tuple(a for a in atomics if a not in tail_atomics)
+            key = (head_indices, head_atomics)
+            key_ordering.setdefault(key)
+            sub_monosums[key].add(tail_indices, tail_atomics, rest)
+        sub_monosums = [(k, sub_monosums[k]) for k in key_ordering]
+
+        monomial_sum = MonomialSum()
+        for (sum_indices, atomics), monosum in sub_monosums:
+            new_rest = sum_factorise(variable, tail_ordering[1:], monosum)
+            monomial_sum.add(sum_indices, atomics, new_rest)
+
+    # Use COFFEE algorithm to optimise the monomial sum
+    return optimise_monomial_sum(monomial_sum, variable.index_ordering())
diff --git a/tsfc/tensor.py b/tsfc/tensor.py
new file mode 100644
index 0000000000..8902b470a2
--- /dev/null
+++ b/tsfc/tensor.py
@@ -0,0 +1,93 @@
+from collections import defaultdict
+from functools import partial, reduce
+from itertools import count
+
+import numpy
+
+import gem
+from gem.optimise import remove_componenttensors, unroll_indexsum
+from gem.refactorise import ATOMIC, COMPOUND, OTHER, collect_monomials
+from gem.unconcatenate import flatten as concatenate
+
+
+def einsum(factors, sum_indices):
+    """Evaluates a tensor product at compile time.
+
+    :arg factors: iterable of indexed GEM literals
+    :arg sum_indices: indices to sum over
+    :returns: a single indexed GEM literal
+    """
+    # Maps the problem onto numpy.einsum
+    index2letter = defaultdict(partial(lambda c: chr(ord('i') + next(c)), count()))
+    operands = []
+    subscript_parts = []
+    for factor in factors:
+        literal, = factor.children
+        selectors = []
+        letters = []
+        for index in factor.multiindex:
+            if isinstance(index, int):
+                selectors.append(index)
+            else:
+                selectors.append(slice(None))
+                letters.append(index2letter[index])
+        operands.append(literal.array.__getitem__(tuple(selectors)))
+        subscript_parts.append(''.join(letters))
+
+    result_pairs = sorted((letter, index)
+                          for index, letter in index2letter.items()
+                          if index not in sum_indices)
+
+    subscripts = ','.join(subscript_parts) + '->' + ''.join(l for l, i in result_pairs)
+    tensor = numpy.einsum(subscripts, *operands)
+    return gem.Indexed(gem.Literal(tensor), tuple(i for l, i in result_pairs))
+
+
+def Integrals(expressions, quadrature_multiindex, argument_multiindices, parameters):
+    # Concatenate
+    expressions = concatenate(expressions)
+
+    # Unroll
+    max_extent = parameters["unroll_indexsum"]
+    if max_extent:
+        def predicate(index):
+            return index.extent <= max_extent
+        expressions = unroll_indexsum(expressions, predicate=predicate)
+
+    # Refactorise
+    def classify(quadrature_indices, expression):
+        if not quadrature_indices.intersection(expression.free_indices):
+            return OTHER
+        elif isinstance(expression, gem.Indexed) and isinstance(expression.children[0], gem.Literal):
+            return ATOMIC
+        else:
+            return COMPOUND
+    classifier = partial(classify, set(quadrature_multiindex))
+
+    result = []
+    for expr, monomial_sum in zip(expressions, collect_monomials(expressions, classifier)):
+        # Select quadrature indices that are present
+        quadrature_indices = set(index for index in quadrature_multiindex
+                                 if index in expr.free_indices)
+
+        products = []
+        for sum_indices, factors, rest in monomial_sum:
+            # Collapse quadrature literals for each monomial
+            if factors or quadrature_indices:
+                replacement = einsum(remove_componenttensors(factors), quadrature_indices)
+            else:
+                replacement = gem.Literal(1)
+            # Rebuild expression
+            products.append(gem.IndexSum(gem.Product(replacement, rest), sum_indices))
+        result.append(reduce(gem.Sum, products, gem.Zero()))
+    return result
+
+
+def flatten(var_reps, index_cache):
+    for variable, reps in var_reps:
+        expressions = reps
+        for expression in expressions:
+            yield (variable, expression)
+
+
+finalise_options = {}
diff --git a/tsfc/ufl2gem.py b/tsfc/ufl2gem.py
new file mode 100644
index 0000000000..e283fb1414
--- /dev/null
+++ b/tsfc/ufl2gem.py
@@ -0,0 +1,160 @@
+"""Translation of UFL tensor-algebra into GEM tensor-algebra."""
+
+import collections
+import ufl
+
+from gem import (Literal, Zero, Identity, Sum, Product, Division,
+                 Power, MathFunction, MinValue, MaxValue, Comparison,
+                 LogicalNot, LogicalAnd, LogicalOr, Conditional,
+                 Index, Indexed, ComponentTensor, IndexSum,
+                 ListTensor)
+
+
+class Mixin(object):
+    """A mixin to be used with a UFL MultiFunction to translate UFL
+    algebra into GEM tensor-algebra.  This node types translate pretty
+    straightforwardly to GEM.  Other node types are not handled in
+    this mixin."""
+
+    def __init__(self):
+        self.index_map = collections.defaultdict(Index)
+        """A map for translating UFL free indices into GEM free
+        indices."""
+
+    def scalar_value(self, o):
+        return Literal(o.value())
+
+    def identity(self, o):
+        return Identity(o._dim)
+
+    def zero(self, o):
+        return Zero(o.ufl_shape)
+
+    def sum(self, o, *ops):
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(Sum(*[Indexed(op, indices) for op in ops]), indices)
+        else:
+            return Sum(*ops)
+
+    def product(self, o, *ops):
+        assert o.ufl_shape == ()
+        return Product(*ops)
+
+    def division(self, o, numerator, denominator):
+        return Division(numerator, denominator)
+
+    def real(self, o, expr):
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(MathFunction('real', Indexed(expr, indices)), indices)
+        else:
+            return MathFunction('real', expr)
+
+    def imag(self, o, expr):
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(MathFunction('imag', Indexed(expr, indices)), indices)
+        else:
+            return MathFunction('imag', expr)
+
+    def conj(self, o, expr):
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(MathFunction('conj', Indexed(expr, indices)), indices)
+        else:
+            return MathFunction('conj', expr)
+
+    def abs(self, o, expr):
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(MathFunction('abs', Indexed(expr, indices)), indices)
+        else:
+            return MathFunction('abs', expr)
+
+    def power(self, o, base, exponent):
+        return Power(base, exponent)
+
+    def math_function(self, o, expr):
+        return MathFunction(o._name, expr)
+
+    def atan2(self, o, y, x):
+        return MathFunction("atan2", y, x)
+
+    def bessel_i(self, o, nu, arg):
+        return MathFunction(o._name, nu, arg)
+
+    def bessel_j(self, o, nu, arg):
+        return MathFunction(o._name, nu, arg)
+
+    def bessel_k(self, o, nu, arg):
+        return MathFunction(o._name, nu, arg)
+
+    def bessel_y(self, o, nu, arg):
+        return MathFunction(o._name, nu, arg)
+
+    def min_value(self, o, *ops):
+        return MinValue(*ops)
+
+    def max_value(self, o, *ops):
+        return MaxValue(*ops)
+
+    def binary_condition(self, o, left, right):
+        return Comparison(o._name, left, right)
+
+    def not_condition(self, o, expr):
+        return LogicalNot(expr)
+
+    def and_condition(self, o, *ops):
+        return LogicalAnd(*ops)
+
+    def or_condition(self, o, *ops):
+        return LogicalOr(*ops)
+
+    def conditional(self, o, condition, then, else_):
+        assert condition.shape == ()
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(Conditional(condition, Indexed(then, indices),
+                                               Indexed(else_, indices)),
+                                   indices)
+        else:
+            return Conditional(condition, then, else_)
+
+    def multi_index(self, o):
+        indices = []
+        for i in o:
+            if isinstance(i, ufl.classes.FixedIndex):
+                indices.append(int(i))
+            elif isinstance(i, ufl.classes.Index):
+                indices.append(self.index_map[i.count()])
+        return tuple(indices)
+
+    def indexed(self, o, aggregate, index):
+        return Indexed(aggregate, index)
+
+    def list_tensor(self, o, *ops):
+        return ListTensor(ops)
+
+    def component_tensor(self, o, expression, index):
+        return ComponentTensor(expression, index)
+
+    def index_sum(self, o, summand, indices):
+        # ufl.IndexSum technically has a MultiIndex, but it must have
+        # exactly one index in it.
+        index, = indices
+
+        if o.ufl_shape:
+            indices = tuple(Index() for i in range(len(o.ufl_shape)))
+            return ComponentTensor(IndexSum(Indexed(summand, indices), (index,)), indices)
+        else:
+            return IndexSum(summand, (index,))
+
+    def variable(self, o, expression, label):
+        # Only used by UFL AD, at this point, the bare expression is
+        # what we want.
+        return expression
+
+    def label(self, o):
+        # Only used by UFL AD, don't need it at this point.
+        pass
diff --git a/tsfc/ufl_utils.py b/tsfc/ufl_utils.py
new file mode 100644
index 0000000000..c789459cdf
--- /dev/null
+++ b/tsfc/ufl_utils.py
@@ -0,0 +1,497 @@
+"""Utilities for preprocessing UFL objects."""
+
+from functools import singledispatch
+
+import numpy
+
+import ufl
+from ufl import as_tensor, indices, replace
+from ufl.algorithms import compute_form_data as ufl_compute_form_data
+from ufl.algorithms import estimate_total_polynomial_degree
+from ufl.algorithms.analysis import extract_arguments, extract_type
+from ufl.algorithms.apply_function_pullbacks import apply_function_pullbacks
+from ufl.algorithms.apply_algebra_lowering import apply_algebra_lowering
+from ufl.algorithms.apply_derivatives import apply_derivatives
+from ufl.algorithms.apply_geometry_lowering import apply_geometry_lowering
+from ufl.algorithms.apply_restrictions import apply_restrictions
+from ufl.algorithms.comparison_checker import do_comparison_check
+from ufl.algorithms.remove_complex_nodes import remove_complex_nodes
+from ufl.corealg.map_dag import map_expr_dag
+from ufl.corealg.multifunction import MultiFunction
+from ufl.geometry import QuadratureWeight
+from ufl.geometry import Jacobian, JacobianDeterminant, JacobianInverse
+from ufl.classes import (Abs, Argument, CellOrientation, Coefficient,
+                         ComponentTensor, Expr, FloatValue, Division,
+                         MultiIndex, Product,
+                         ScalarValue, Sqrt, Zero, CellVolume, FacetArea)
+from ufl.domain import extract_unique_domain
+from finat.ufl import MixedElement
+from ufl.utils.sorting import sorted_by_count
+
+from gem.node import MemoizerArg
+
+from tsfc.modified_terminals import (is_modified_terminal,
+                                     analyse_modified_terminal,
+                                     construct_modified_terminal)
+
+
+preserve_geometry_types = (CellVolume, FacetArea)
+
+
+def compute_form_data(form,
+                      do_apply_function_pullbacks=True,
+                      do_apply_integral_scaling=True,
+                      do_apply_geometry_lowering=True,
+                      preserve_geometry_types=preserve_geometry_types,
+                      do_apply_restrictions=True,
+                      do_estimate_degrees=True,
+                      complex_mode=False):
+    """Preprocess UFL form in a format suitable for TSFC. Return
+    form data.
+
+    This is merely a wrapper to UFL compute_form_data with default
+    kwargs overriden in the way TSFC needs it and is provided for
+    other form compilers based on TSFC.
+    """
+    fd = ufl_compute_form_data(
+        form,
+        do_apply_function_pullbacks=do_apply_function_pullbacks,
+        do_apply_integral_scaling=do_apply_integral_scaling,
+        do_apply_geometry_lowering=do_apply_geometry_lowering,
+        preserve_geometry_types=preserve_geometry_types,
+        do_apply_restrictions=do_apply_restrictions,
+        do_estimate_degrees=do_estimate_degrees,
+        complex_mode=complex_mode
+    )
+    constants = extract_firedrake_constants(form)
+    fd.constants = constants
+    return fd
+
+
+def extract_firedrake_constants(a):
+    """Build a sorted list of all constants in a"""
+    return sorted_by_count(extract_type(a, TSFCConstantMixin))
+
+
+def one_times(measure):
+    # Workaround for UFL issue #80:
+    # https://bitbucket.org/fenics-project/ufl/issues/80
+    form = 1 * measure
+    fd = compute_form_data(form, do_estimate_degrees=False)
+    itg_data, = fd.integral_data
+    integral, = itg_data.integrals
+    integrand = integral.integrand()
+
+    # UFL considers QuadratureWeight a geometric quantity, and the
+    # general handler for geometric quantities estimates the degree of
+    # the coordinate element.  This would unnecessarily increase the
+    # estimated degree, so we drop QuadratureWeight instead.
+    expression = replace(integrand, {QuadratureWeight(itg_data.domain): 1})
+
+    # Now estimate degree for the preprocessed form
+    degree = estimate_total_polynomial_degree(expression)
+
+    return integrand, degree
+
+
+def entity_avg(integrand, measure, argument_multiindices):
+    arguments = extract_arguments(integrand)
+    if len(arguments) == 1:
+        a, = arguments
+        integrand = ufl.replace(integrand, {a: ufl.Argument(a.function_space(),
+                                                            number=0,
+                                                            part=a.part())})
+        argument_multiindices = (argument_multiindices[a.number()], )
+
+    degree = estimate_total_polynomial_degree(integrand)
+    form = integrand * measure
+    fd = compute_form_data(form, do_estimate_degrees=False,
+                           do_apply_function_pullbacks=False)
+    itg_data, = fd.integral_data
+    integral, = itg_data.integrals
+    integrand = integral.integrand()
+    return integrand, degree, argument_multiindices
+
+
+def preprocess_expression(expression, complex_mode=False,
+                          do_apply_restrictions=False):
+    """Imitates the compute_form_data processing pipeline.
+
+    :arg complex_mode: Are we in complex UFL mode?
+    :arg do_apply_restrictions: Propogate restrictions to terminals?
+
+    Useful, for example, to preprocess non-scalar expressions, which
+    are not and cannot be forms.
+    """
+    if complex_mode:
+        expression = do_comparison_check(expression)
+    else:
+        expression = remove_complex_nodes(expression)
+    expression = apply_algebra_lowering(expression)
+    expression = apply_derivatives(expression)
+    expression = apply_function_pullbacks(expression)
+    expression = apply_geometry_lowering(expression, preserve_geometry_types)
+    expression = apply_derivatives(expression)
+    expression = apply_geometry_lowering(expression, preserve_geometry_types)
+    expression = apply_derivatives(expression)
+    if not complex_mode:
+        expression = remove_complex_nodes(expression)
+    if do_apply_restrictions:
+        expression = apply_restrictions(expression)
+    return expression
+
+
+class ModifiedTerminalMixin(object):
+    """Mixin to use with MultiFunctions that operate on modified
+    terminals."""
+
+    def unexpected(self, o):
+        assert False, "Not expected %r at this stage." % o
+
+    # global derivates should have been pulled back
+    grad = unexpected
+    div = unexpected
+    curl = unexpected
+
+    # div and curl should have been algebraically lowered
+    reference_div = unexpected
+    reference_curl = unexpected
+
+    def _modified_terminal(self, o):
+        assert is_modified_terminal(o)
+        return self.modified_terminal(o)
+
+    # Unlike UFL, we do not regard Indexed as a terminal modifier.
+    # indexed = _modified_terminal
+
+    positive_restricted = _modified_terminal
+    negative_restricted = _modified_terminal
+
+    reference_grad = _modified_terminal
+    reference_value = _modified_terminal
+
+    terminal = _modified_terminal
+
+
+class CoefficientSplitter(MultiFunction, ModifiedTerminalMixin):
+    def __init__(self, split):
+        MultiFunction.__init__(self)
+        self._split = split
+
+    expr = MultiFunction.reuse_if_untouched
+
+    def modified_terminal(self, o):
+        mt = analyse_modified_terminal(o)
+        terminal = mt.terminal
+
+        if not isinstance(terminal, Coefficient):
+            # Only split coefficients
+            return o
+
+        if type(terminal.ufl_element()) != MixedElement:
+            # Only split mixed coefficients
+            return o
+
+        # Reference value expected
+        assert mt.reference_value
+
+        # Derivative indices
+        beta = indices(mt.local_derivatives)
+
+        components = []
+        for subcoeff in self._split[terminal]:
+            # Apply terminal modifiers onto the subcoefficient
+            component = construct_modified_terminal(mt, subcoeff)
+            # Collect components of the subcoefficient
+            for alpha in numpy.ndindex(subcoeff.ufl_element().reference_value_shape):
+                # New modified terminal: component[alpha + beta]
+                components.append(component[alpha + beta])
+        # Repack derivative indices to shape
+        c, = indices(1)
+        return ComponentTensor(as_tensor(components)[c], MultiIndex((c,) + beta))
+
+
+def split_coefficients(expression, split):
+    """Split mixed coefficients, so mixed elements need not be
+    implemented.
+
+    :arg split: A :py:class:`dict` mapping each mixed coefficient to a
+                sequence of subcoefficients.  If None, calling this
+                function is a no-op.
+    """
+    if split is None:
+        return expression
+
+    splitter = CoefficientSplitter(split)
+    return map_expr_dag(splitter, expression)
+
+
+class PickRestriction(MultiFunction, ModifiedTerminalMixin):
+    """Pick out parts of an expression with specified restrictions on
+    the arguments.
+
+    :arg test: The restriction on the test function.
+    :arg trial:  The restriction on the trial function.
+
+    Returns those parts of the expression that have the requested
+    restrictions, or else :class:`ufl.classes.Zero` if no such part
+    exists.
+    """
+    def __init__(self, test=None, trial=None):
+        self.restrictions = {0: test, 1: trial}
+        MultiFunction.__init__(self)
+
+    expr = MultiFunction.reuse_if_untouched
+
+    def multi_index(self, o):
+        return o
+
+    def modified_terminal(self, o):
+        mt = analyse_modified_terminal(o)
+        t = mt.terminal
+        r = mt.restriction
+        if isinstance(t, Argument) and r != self.restrictions[t.number()]:
+            return Zero(o.ufl_shape, o.ufl_free_indices, o.ufl_index_dimensions)
+        else:
+            return o
+
+
+def ufl_reuse_if_untouched(o, *ops):
+    """Reuse object if operands are the same objects."""
+    if all(a is b for a, b in zip(o.ufl_operands, ops)):
+        return o
+    else:
+        return o._ufl_expr_reconstruct_(*ops)
+
+
+@singledispatch
+def _simplify_abs(o, self, in_abs):
+    """Single-dispatch function to simplify absolute values.
+
+    :arg o: UFL node
+    :arg self: Callback handler for recursion
+    :arg in_abs: Is ``o`` inside an absolute value?
+
+    When ``in_abs`` we must return a non-negative value, potentially
+    by wrapping the returned node with ``Abs``.
+    """
+    raise AssertionError("UFL node expected, not %s" % type(o))
+
+
+@_simplify_abs.register(Expr)
+def _simplify_abs_expr(o, self, in_abs):
+    # General case, only wrap the outer expression (if necessary)
+    operands = [self(op, False) for op in o.ufl_operands]
+    result = ufl_reuse_if_untouched(o, *operands)
+    if in_abs:
+        result = Abs(result)
+    return result
+
+
+@_simplify_abs.register(Sqrt)
+def _simplify_abs_sqrt(o, self, in_abs):
+    result = ufl_reuse_if_untouched(o, self(o.ufl_operands[0], False))
+    if self.complex_mode and in_abs:
+        return Abs(result)
+    else:
+        return result
+
+
+@_simplify_abs.register(ScalarValue)
+def _simplify_abs_(o, self, in_abs):
+    if not in_abs:
+        return o
+    # Inline abs(constant)
+    return ufl.as_ufl(abs(o._value))
+
+
+@_simplify_abs.register(CellOrientation)
+def _simplify_abs_cellorientation(o, self, in_abs):
+    if not in_abs:
+        return o
+    # Cell orientation is +-1
+    return FloatValue(1)
+
+
+@_simplify_abs.register(Division)
+@_simplify_abs.register(Product)
+def _simplify_abs_product(o, self, in_abs):
+    if not in_abs:
+        # Just reconstruct
+        ops = [self(op, False) for op in o.ufl_operands]
+        return ufl_reuse_if_untouched(o, *ops)
+
+    # Visit children, distributing Abs
+    ops = [self(op, True) for op in o.ufl_operands]
+
+    # Strip Abs off again (we will put it outside now)
+    stripped = False
+    strip_ops = []
+    for op in ops:
+        if isinstance(op, Abs):
+            stripped = True
+            strip_ops.append(op.ufl_operands[0])
+        else:
+            strip_ops.append(op)
+
+    # Rebuild, and wrap with Abs if necessary
+    result = ufl_reuse_if_untouched(o, *strip_ops)
+    if stripped:
+        result = Abs(result)
+    return result
+
+
+@_simplify_abs.register(Abs)
+def _simplify_abs_abs(o, self, in_abs):
+    return self(o.ufl_operands[0], True)
+
+
+def simplify_abs(expression, complex_mode):
+    """Simplify absolute values in a UFL expression.  Its primary
+    purpose is to "neutralise" CellOrientation nodes that are
+    surrounded by absolute values and thus not at all necessary."""
+    mapper = MemoizerArg(_simplify_abs)
+    mapper.complex_mode = complex_mode
+    return mapper(expression, False)
+
+
+def apply_mapping(expression, element, domain):
+    """Apply the inverse of the pullback for element to an expression.
+
+    :arg expression: An expression in physical space
+    :arg element: The element we're going to interpolate into, whose
+         value_shape must match the shape of the expression, and will
+         advertise the pullback to apply.
+    :arg domain: Optional domain to provide in case expression does
+         not contain a domain (used for constructing geometric quantities).
+    :returns: A new UFL expression with shape element.reference_value_shape
+    :raises NotImplementedError: If we don't know how to apply the
+        inverse of the pullback.
+    :raises ValueError: If we get shape mismatches.
+
+    The following is borrowed from the UFC documentation:
+
+    Let g be a field defined on a physical domain T with physical
+    coordinates x. Let T_0 be a reference domain with coordinates
+    X. Assume that F: T_0 -> T such that
+
+      x = F(X)
+
+    Let J be the Jacobian of F, i.e J = dx/dX and let K denote the
+    inverse of the Jacobian K = J^{-1}. Then we (currently) have the
+    following four types of mappings:
+
+    'identity' mapping for g:
+
+      G(X) = g(x)
+
+    For vector fields g:
+
+    'contravariant piola' mapping for g:
+
+      G(X) = det(J) K g(x)   i.e  G_i(X) = det(J) K_ij g_j(x)
+
+    'covariant piola' mapping for g:
+
+      G(X) = J^T g(x)          i.e  G_i(X) = J^T_ij g(x) = J_ji g_j(x)
+
+    'double covariant piola' mapping for g:
+
+      G(X) = J^T g(x) J     i.e. G_il(X) = J_ji g_jk(x) J_kl
+
+    'double contravariant piola' mapping for g:
+
+      G(X) = det(J)^2 K g(x) K^T  i.e. G_il(X)=(detJ)^2 K_ij g_jk K_lk
+
+    'covariant contravariant piola' mapping for g:
+
+      G(X) = det(J) J^T g(x) K^T     i.e. G_il(X) = det(J) J_ji g_jk(x) K_lk
+
+    If 'contravariant piola' or 'covariant piola' (or their double
+    variants) are applied to a matrix-valued function, the appropriate
+    mappings are applied row-by-row.
+    """
+    mesh = extract_unique_domain(expression)
+    if mesh is None:
+        mesh = domain
+    if domain is not None and mesh != domain:
+        raise NotImplementedError("Multiple domains not supported")
+    pvs = element.pullback.physical_value_shape(element, mesh)
+    if expression.ufl_shape != pvs:
+        raise ValueError(f"Mismatching shapes, got {expression.ufl_shape}, expected {pvs}")
+    mapping = element.mapping().lower()
+    if mapping == "identity":
+        rexpression = expression
+    elif mapping == "covariant piola":
+        J = Jacobian(mesh)
+        *k, i, j = indices(len(expression.ufl_shape) + 1)
+        kj = (*k, j)
+        rexpression = as_tensor(J[j, i] * expression[kj], (*k, i))
+    elif mapping == "l2 piola":
+        detJ = JacobianDeterminant(mesh)
+        rexpression = expression * detJ
+    elif mapping == "contravariant piola":
+        K = JacobianInverse(mesh)
+        detJ = JacobianDeterminant(mesh)
+        *k, i, j = indices(len(expression.ufl_shape) + 1)
+        kj = (*k, j)
+        rexpression = as_tensor(detJ * K[i, j] * expression[kj], (*k, i))
+    elif mapping == "double covariant piola":
+        J = Jacobian(mesh)
+        *k, i, j, m, n = indices(len(expression.ufl_shape) + 2)
+        kmn = (*k, m, n)
+        rexpression = as_tensor(J[m, i] * expression[kmn] * J[n, j], (*k, i, j))
+    elif mapping == "double contravariant piola":
+        K = JacobianInverse(mesh)
+        detJ = JacobianDeterminant(mesh)
+        *k, i, j, m, n = indices(len(expression.ufl_shape) + 2)
+        kmn = (*k, m, n)
+        rexpression = as_tensor(detJ**2 * K[i, m] * expression[kmn] * K[j, n], (*k, i, j))
+    elif mapping == "covariant contravariant piola":
+        J = Jacobian(mesh)
+        K = JacobianInverse(mesh)
+        detJ = JacobianDeterminant(mesh)
+        *k, i, j, m, n = indices(len(expression.ufl_shape) + 2)
+        kmn = (*k, m, n)
+        rexpression = as_tensor(detJ * J[m, i] * expression[kmn] * K[j, n], (*k, i, j))
+    elif mapping == "symmetries":
+        # This tells us how to get from the pieces of the reference
+        # space expression to the physical space one.
+        # We're going to apply the inverse of the physical to
+        # reference space mapping.
+        fcm = element.flattened_sub_element_mapping()
+        sub_elem = element.sub_elements[0]
+        shape = expression.ufl_shape
+        flat = ufl.as_vector([expression[i] for i in numpy.ndindex(shape)])
+        vs = sub_elem.pullback.physical_value_shape(sub_elem, mesh)
+        rvs = sub_elem.reference_value_shape
+        seen = set()
+        rpieces = []
+        gm = int(numpy.prod(vs, dtype=int))
+        for gi, ri in enumerate(fcm):
+            # For each unique piece in reference space
+            if ri in seen:
+                continue
+            seen.add(ri)
+            # Get the physical space piece
+            piece = [flat[gm*gi + j] for j in range(gm)]
+            piece = as_tensor(numpy.asarray(piece).reshape(vs))
+            # get into reference space
+            piece = apply_mapping(piece, sub_elem, mesh)
+            assert piece.ufl_shape == rvs
+            # Concatenate with the other pieces
+            rpieces.extend([piece[idx] for idx in numpy.ndindex(rvs)])
+        # And reshape
+        rexpression = as_tensor(numpy.asarray(rpieces).reshape(element.reference_value_shape))
+    else:
+        raise NotImplementedError(f"Don't know how to handle mapping type {mapping} for expression of rank {ufl.FunctionSpace(mesh, element).value_shape}")
+    if rexpression.ufl_shape != element.reference_value_shape:
+        raise ValueError(f"Mismatching reference shapes, got {rexpression.ufl_shape} expected {element.reference_value_shape}")
+    return rexpression
+
+
+class TSFCConstantMixin:
+    """ Mixin class to identify Constants """
+
+    def __init__(self):
+        pass
diff --git a/tsfc/vanilla.py b/tsfc/vanilla.py
new file mode 100644
index 0000000000..ecf24cd5e4
--- /dev/null
+++ b/tsfc/vanilla.py
@@ -0,0 +1,47 @@
+from functools import reduce
+
+from gem import index_sum, Sum
+from gem.optimise import unroll_indexsum
+from gem.unconcatenate import unconcatenate
+
+
+def Integrals(expressions, quadrature_multiindex, argument_multiindices, parameters):
+    """Constructs an integral representation for each GEM integrand
+    expression.
+
+    :arg expressions: integrand multiplied with quadrature weight;
+                      multi-root GEM expression DAG
+    :arg quadrature_multiindex: quadrature multiindex (tuple)
+    :arg argument_multiindices: tuple of argument multiindices,
+                                one multiindex for each argument
+    :arg parameters: parameters dictionary
+
+    :returns: list of integral representations
+    """
+    # Unroll
+    max_extent = parameters["unroll_indexsum"]
+    if max_extent:
+        def predicate(index):
+            return index.extent <= max_extent
+        expressions = unroll_indexsum(expressions, predicate=predicate)
+    # Integral representation: just a GEM expression
+    return [index_sum(e, quadrature_multiindex) for e in expressions]
+
+
+def flatten(var_reps, index_cache):
+    """Flatten mode-specific intermediate representation to a series of
+    assignments.
+
+    :arg var_reps: series of (return variable, [integral representation]) pairs
+    :arg index_cache: cache :py:class:`dict` for :py:func:`unconcatenate`
+
+    :returns: series of (return variable, GEM expression root) pairs
+    """
+    return unconcatenate([(variable, reduce(Sum, reps))
+                          for variable, reps in var_reps],
+                         cache=index_cache)
+
+
+finalise_options = dict(remove_componenttensors=False)
+"""To avoid duplicate work, these options that are safe to pass to
+:py:func:`gem.impero_utils.preprocess_gem`."""