diff --git a/docs/make.jl b/docs/make.jl
index 34bdcfaa..348a4fb5 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -66,5 +66,5 @@ end
 deploydocs(
     repo = "github.com/jipolanco/BSplineKit.jl",
     forcepush = true,
-    push_preview = true,  # PRs deploy at https://jipolanco.github.io/BSplineKit.jl/previews/PR##
+    push_preview = true,  # PRs deploy at https://jipolanco.github.io/BSplineKit.jl/previews/PR??
 )
diff --git a/docs/src/bsplines.md b/docs/src/bsplines.md
index b1b8f025..82f423f3 100644
--- a/docs/src/bsplines.md
+++ b/docs/src/bsplines.md
@@ -26,6 +26,8 @@ length(::BSplineBasis)
 BasisFunction
 support
 common_support
+find_knot_interval
+evaluate_all
 evaluate
 evaluate!
 nonzero_in_segment
diff --git a/src/BSplines/BSplines.jl b/src/BSplines/BSplines.jl
index bac50c7a..db8ba12d 100644
--- a/src/BSplines/BSplines.jl
+++ b/src/BSplines/BSplines.jl
@@ -16,6 +16,7 @@ export
     basis,
     evaluate,
     evaluate!,
+    evaluate_all,
     nonzero_in_segment,
     support
 
@@ -28,9 +29,53 @@ Abstract type defining a B-spline basis, or more generally, a functional basis
 defined from B-splines.
 
 The basis is represented by a B-spline order `k` and a knot element type `T`.
+
+---
+
+    (B::AbstractBSplineBasis)(
+        x::Real, [op = Derivative(0)], [T = float(typeof(x))];
+        [ileft = nothing],
+    ) -> (i, bs)
+
+Evaluates all basis functions which are non-zero at `x`.
+
+This is a convenience alias for `evaluate_all`.
+See [`evaluate_all`](@ref) for details on optional arguments and on the returned values.
+
+# Examples
+
+```jldoctest
+julia> B = BSplineBasis(BSplineOrder(4), -1:0.1:1)
+23-element BSplineBasis of order 4, domain [-1.0, 1.0]
+ knots: [-1.0, -1.0, -1.0, -1.0, -0.9, -0.8, -0.7, -0.6, -0.5, -0.4  …  0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
+
+julia> i, bs = B(0.42)
+(18, (0.0013333333333333268, 0.28266666666666657, 0.6306666666666667, 0.08533333333333339))
+
+julia> sum(bs)
+1.0
+
+julia> bs[1] - B[i](0.42)
+0.0
+
+julia> bs[2] - B[i - 1](0.42)
+-5.551115123125783e-17
+
+julia> B(0.44; ileft = i)
+(18, (0.01066666666666666, 0.4146666666666667, 0.5386666666666665, 0.03599999999999999))
+
+julia> B(0.42, Float32)
+(18, (0.0013333336f0, 0.28266668f0, 0.6306667f0, 0.085333325f0))
+
+julia> B(0.42, Derivative(1))
+(18, (0.19999999999999937, 6.4, -3.3999999999999977, -3.200000000000001))
+```
 """
 abstract type AbstractBSplineBasis{k,T} end
 
+@inline (B::AbstractBSplineBasis)(args...; kws...) =
+    evaluate_all(B, args...; kws...)
+
 """
     BSplineOrder(k::Integer)
 
@@ -43,5 +88,6 @@ struct BSplineOrder{k} end
 include("basis.jl")
 include("basis_function.jl")
 include("knots.jl")
+include("evaluate_all.jl")
 
 end
diff --git a/src/BSplines/evaluate_all.jl b/src/BSplines/evaluate_all.jl
new file mode 100644
index 00000000..0a4ed754
--- /dev/null
+++ b/src/BSplines/evaluate_all.jl
@@ -0,0 +1,305 @@
+# Functions for efficiently evaluating all non-zero B-splines (and/or their
+# derivatives) at a given point.
+
+using Base.Cartesian: @ntuple
+using Base: @propagate_inbounds
+using StaticArrays: MVector
+
+"""
+    evaluate_all(
+        B::BSplineBasis, x::Real,
+        [op = Derivative(0)], [T = float(typeof(x))];
+        [ileft = nothing],
+    ) -> i, bs
+
+Evaluate all B-splines which are non-zero at coordinate `x`.
+
+Returns a tuple `(i, bs)`, where `i` is an index identifying the basis functions
+that were computed, and `bs` is a tuple with the actual values.
+
+More precisely:
+
+- `i` is the index of the first B-spline knot ``t_{i}`` when going from ``x``
+  towards the left.
+  In other words, it is such that ``t_{i} ≤ x < t_{i + 1}``.
+
+  It is effectively computed as `i = searchsortedlast(knots(B), x)`.
+  If the correct value of `i` is already known, one can avoid this computation by
+  manually passing this index via the optional `ileft` keyword argument.
+
+- `bs` is a tuple of B-splines evaluated at ``x``:
+
+  ```math
+  (b_i(x), b_{i - 1}(x), …, b_{i - k + 1}(x)).
+  ```
+
+  It contains ``k`` values, where ``k`` is the order of the B-spline basis.
+  Note that values are returned in backwards order starting from the ``i``-th
+  B-spline.
+
+## Computing derivatives
+
+One can pass the optional `op` argument to compute B-spline derivatives instead
+of the actual B-spline values.
+
+## Examples
+
+See [`AbstractBSplineBasis`](@ref) for some examples using the alternative
+evaluation syntax `B(x, [op], [T]; [ileft])`, which calls this function.
+"""
+@propagate_inbounds function evaluate_all(
+        B::BSplineBasis, x::Real, op::Derivative, ::Type{T}; kws...,
+    ) where {T <: Number}
+    _evaluate_all(knots(B), x, BSplineOrder(order(B)), op, T; kws...)
+end
+
+@propagate_inbounds evaluate_all(
+    B, x, op::AbstractDifferentialOp = Derivative(0); kws...,
+) = evaluate_all(B, x, op, float(typeof(x)); kws...)
+
+@propagate_inbounds evaluate_all(B, x, ::Type{T}; kws...) where {T <: Number} =
+    evaluate_all(B, x, Derivative(0), T; kws...)
+
+@propagate_inbounds function _knotdiff(x, ts, i, n)
+    @boundscheck checkbounds(ts, i:(i + n))
+    @inbounds ti = ts[i]
+    @inbounds tj = ts[i + n]
+    # @assert ti ≠ tj
+    (x - ti) / (tj - ti)
+end
+
+# TODO
+# - this is redundant with Splines.knot_interval...
+"""
+    find_knot_interval(ts::AbstractVector, x::Real) -> (i, zone)
+
+Finds the index ``i`` corresponding to the knot interval ``[t_i, t_{i + 1}]``
+that should be used to evaluate B-splines at location ``x``.
+
+The knot vector is assumed to be sorted in non-decreasing order.
+
+It also returns a `zone` integer, which is:
+
+- `0`  if `x` is within the knot domain (`ts[begin] ≤ x ≤ ts[end]`),
+- `-1` if `x < ts[begin]`,
+- `1`  if `x > ts[end]`.
+
+This function is functionally equivalent to de Boor's `INTERV` routine (de Boor
+2001, p. 74).
+"""
+function find_knot_interval(ts::AbstractVector, x::Real)
+    if x < first(ts)
+        return firstindex(ts), -1
+    end
+    i = searchsortedlast(ts, x)
+    Nt = lastindex(ts)
+    if i == Nt
+        tlast = ts[Nt]
+        while true
+            i -= 1
+            ts[i] ≠ tlast && break
+        end
+        zone = (x > tlast) ? 1 : 0
+        return i, zone
+    else
+        return i, 0  # usual case
+    end
+end
+
+function _evaluate_all(
+        ts::AbstractVector, x::Real, ::BSplineOrder{k},
+        op::Derivative{0}, ::Type{T};
+        ileft = nothing,
+    ) where {k, T}
+    if @generated
+        @assert k ≥ 1
+        ex = quote
+            i, zone = _find_knot_interval(ts, x, ileft)
+            if zone ≠ 0
+                return (i, @ntuple($k, j -> zero($T)))
+            end
+            # We assume that the value of `i` corresponds to the good interval,
+            # even if `ileft` was passed by the user.
+            # This is the same as in de Boor's BSPLVB routine.
+            # In particular, this allows plotting the extension of a polynomial
+            # piece outside of its knot interval (as in de Boor's Fig. 6, p. 114).
+            bs_1 = (one(T),)
+        end
+        for q ∈ 2:k
+            bp = Symbol(:bs_, q - 1)
+            bq = Symbol(:bs_, q)
+            ex = quote
+                $ex
+                Δs = @ntuple(
+                    $(q - 1),
+                    j -> @inbounds($T(_knotdiff(x, ts, i - j + 1, $q - 1))),
+                )
+                $bq = _evaluate_step(Δs, $bp, BSplineOrder($q))
+            end
+        end
+        bk = Symbol(:bs_, k)
+        quote
+            $ex
+            return i, $bk
+        end
+    else
+        _evaluate_all_alt(ts, x, BSplineOrder(k), op, T; ileft = ileft)
+    end
+end
+
+# Derivatives
+function _evaluate_all(
+        ts::AbstractVector, x::Real, ::BSplineOrder{k},
+        op::Derivative{n}, ::Type{T};
+        ileft = nothing,
+    ) where {k, n, T}
+    if @generated
+        n::Int
+        @assert n ≥ 1
+        # We first need to evaluate the B-splines of order p.
+        p = k - n
+        if p < 1
+            # Derivatives are zero. The returned index is arbitrary...
+            return :( firstindex(ts), ntuple(_ -> zero($T), Val($k)) )
+        end
+        bp = Symbol(:bs_, p)
+        ex = quote
+            i, $bp = _evaluate_all(ts, x, BSplineOrder($p), Derivative(0), $T; ileft)
+        end
+        for q ∈ (p + 1):k
+            bp = Symbol(:bs_, q - 1)
+            bq = Symbol(:bs_, q)
+            ex = quote
+                $ex
+                $bq = _evaluate_step_deriv(ts, i, $bp, BSplineOrder($q), $T)
+            end
+        end
+        bk = Symbol(:bs_, k)
+        quote
+            $ex
+            return i, $bk
+        end
+    else
+        _evaluate_all_alt(ts, x, BSplineOrder(k), op, T; ileft = ileft)
+    end
+end
+
+# Non-@generated version
+@inline function _evaluate_all_alt(
+        ts::AbstractVector, x::Real, ::BSplineOrder{k},
+        ::Derivative{0}, ::Type{T};
+        ileft = nothing,
+    ) where {k, T}
+    @assert k ≥ 1
+    i, zone = _find_knot_interval(ts, x, ileft)
+    if zone ≠ 0
+        return (i, ntuple(j -> zero(T), Val(k)))
+    end
+    bq = MVector{k, T}(undef)
+    Δs = MVector{k - 1, T}(undef)
+    bq[1] = one(T)
+    @inbounds for q ∈ 2:k
+        for j ∈ 1:(q - 1)
+            Δs[j] = _knotdiff(x, ts, i - j + 1, q - 1)
+        end
+        bp = bq[1]
+        Δp = Δs[1]
+        bq[1] = Δp * bp
+        for j = 2:(q - 1)
+            bpp, bp = bp, bq[j]
+            Δpp, Δp = Δp, Δs[j]
+            bq[j] = Δp * bp + (1 - Δpp) * bpp
+        end
+        bq[q] = (1 - Δp) * bp
+    end
+    i, Tuple(bq)
+end
+
+# Derivatives / non-@generated variant
+function _evaluate_all_alt(
+        ts::AbstractVector, x::Real, ::BSplineOrder{k},
+        ::Derivative{n}, ::Type{T};
+        ileft = nothing,
+    ) where {k, n, T}
+    n::Int
+    @assert n ≥ 1
+    # We first need to evaluate the B-splines of order m.
+    m = k - n
+    if m < 1
+        # Derivatives are zero. The returned index is arbitrary...
+        return firstindex(ts), ntuple(_ -> zero(T), Val(k))
+    end
+    i, bp = _evaluate_all(ts, x, BSplineOrder(m), Derivative(0), T; ileft)
+    bq = MVector{k, T}(undef)
+    us = MVector{k - 1, T}(undef)
+    bq[1:m] .= bp
+    for q = (m + 1):k
+        p = q - 1
+        for δj = 1:p
+            @inbounds us[δj] = bq[δj] / (ts[i + q - δj] - ts[i + 1 - δj])
+        end
+        @inbounds bq[1] = p * us[1]
+        for j = 2:(q - 1)
+            # Note: adding @inbounds here can slow down stuff (from 25ns to
+            # 80ns), which is very strange!! (Julia 1.8-beta2)
+            bq[j] = p * (us[j] - us[j - 1])
+        end
+        @inbounds bq[q] = -p * us[p]
+    end
+    i, Tuple(bq)
+end
+
+function _find_knot_interval(ts, x, ileft)
+    if isnothing(ileft)
+        i, zone = find_knot_interval(ts, x)
+    else
+        i = ileft
+        zone = (x < first(ts)) ? -1 : (x > last(ts)) ? 1 : 0
+    end
+    i, zone
+end
+
+@inline @generated function _evaluate_step(Δs, bp, ::BSplineOrder{k}) where {k}
+    ex = quote
+        @inbounds b_1 = Δs[1] * bp[1]
+    end
+    for j = 2:(k - 1)
+        bj = Symbol(:b_, j)
+        ex = quote
+            $ex
+            @inbounds $bj = (1 - Δs[$j - 1]) * bp[$j - 1] + Δs[$j] * bp[$j]
+        end
+    end
+    b_last = Symbol(:b_, k)
+    quote
+        $ex
+        @inbounds $b_last = (1 - Δs[$k - 1]) * bp[$k - 1]
+        @ntuple $k b
+    end
+end
+
+@inline @generated function _evaluate_step_deriv(
+        ts, i, bp, ::BSplineOrder{k}, ::Type{T},
+    ) where {k, T}
+    p = k - 1
+    ex = quote
+        us = @ntuple(
+            $p,
+            δj -> @inbounds($T(bp[δj] / (ts[i + $k - δj] - ts[i + 1 - δj]))),
+        )
+        @inbounds b_1 = $p * us[1]
+    end
+    for j = 2:(k - 1)
+        bj = Symbol(:b_, j)
+        ex = quote
+            $ex
+            @inbounds $bj = $p * (-us[$j - 1] + us[$j])
+        end
+    end
+    b_last = Symbol(:b_, k)
+    quote
+        $ex
+        @inbounds $b_last = -$p * us[$p]
+        @ntuple $k b
+    end
+end
diff --git a/src/BandedTensors/BandedTensors.jl b/src/BandedTensors/BandedTensors.jl
index 1267ebca..0abbd217 100644
--- a/src/BandedTensors/BandedTensors.jl
+++ b/src/BandedTensors/BandedTensors.jl
@@ -1,5 +1,7 @@
 module BandedTensors
 
+using Base: @propagate_inbounds
+
 using StaticArrays
 
 using BandedMatrices
@@ -216,7 +218,7 @@ struct SubMatrix{T, N, M <: SMatrix{N,N,T},
     inds :: Indices
 end
 
-@inline function SubMatrix(A::BandedTensor3D, k)
+@propagate_inbounds function SubMatrix(A::BandedTensor3D, k)
     @boundscheck checkbounds(A.data, k)
     dims = size(A, 1), size(A, 2)
     inds = band_indices(A, k)
@@ -229,7 +231,9 @@ indices(s::SubMatrix) = (s.inds, s.inds)
 Base.parent(Asub::SubMatrix) = Asub.data
 Base.size(S::SubMatrix) = S.dims
 
-@inline function Base.getindex(S::SubMatrix{T}, i::Integer, j::Integer) where {T}
+@propagate_inbounds function Base.getindex(
+        S::SubMatrix{T}, i::Integer, j::Integer,
+    ) where {T}
     ri, rj = indices(S)
     (i ∈ ri) && (j ∈ rj) || return zero(T)
     ii = i - first(ri) + 1
@@ -301,25 +305,31 @@ Set submatrix `A[:, :, k]` to the matrix `Ak`.
 The `Ak` matrix must have dimensions `(r, r)`, where `r = 2b + 1` is the total
 number of bands of `A`.
 """
-function Base.setindex!(A::BandedTensor3D, Ak::AbstractMatrix,
-                        ::Colon, ::Colon, k)
+@propagate_inbounds function Base.setindex!(
+        A::BandedTensor3D, Ak::AbstractMatrix, ::Colon, ::Colon, k,
+    )
     @boundscheck checkbounds(A, :, :, k)
     @boundscheck size(Ak) === (size(A, 1), size(A, 2))
     @inbounds A.data[k] = Ak
 end
 
-function Base.setindex!(A::BandedTensor3D, Asub::SubMatrix, ::Colon, ::Colon, k)
+@propagate_inbounds function Base.setindex!(
+        A::BandedTensor3D, Asub::SubMatrix, ::Colon, ::Colon, k,
+    )
     @boundscheck checkbounds(A, :, :, k)
     @boundscheck k == Asub.k
     @inbounds A.data[k] = parent(Asub)
 end
 
 # Get submatrix A[:, :, k].
-Base.getindex(A::BandedTensor3D, ::Colon, ::Colon, k::Integer) = SubMatrix(A, k)
-Base.view(A::BandedTensor3D, ::Colon, ::Colon, k::Integer) = A[:, :, k]
-
-function Base.getindex(A::BandedTensor3D{T,b}, ::Colon, ::Colon,
-                       ks::AbstractUnitRange) where {T,b}
+@propagate_inbounds Base.getindex(A::BandedTensor3D, ::Colon, ::Colon, k::Integer) =
+    SubMatrix(A, k)
+@propagate_inbounds Base.view(A::BandedTensor3D, ::Colon, ::Colon, k::Integer) =
+    A[:, :, k]
+
+@propagate_inbounds function Base.getindex(
+        A::BandedTensor3D{T,b}, ::Colon, ::Colon, ks::AbstractUnitRange,
+    ) where {T,b}
     shift = bandshift(A) .+ (0, 0, first(ks) - 1)
     Nk = length(ks)
     dims = (size(A, 1), size(A, 2), Nk)
@@ -331,20 +341,21 @@ function Base.getindex(A::BandedTensor3D{T,b}, ::Colon, ::Colon,
     B
 end
 
-@inline function Base.getindex(A::BandedTensor3D,
-                               i::Integer, j::Integer, k::Integer)
+@propagate_inbounds function Base.getindex(
+        A::BandedTensor3D, i::Integer, j::Integer, k::Integer,
+    )
     @boundscheck checkbounds(A, i, j, k)
     @inbounds Asub = SubMatrix(A, k)
-    Asub[i, j]
+    @inbounds Asub[i, j]
 end
 
 function Base.fill!(A::BandedTensor3D, x)
     M = submatrix_type(A)
     T = eltype(M)
     L = length(M)
-    Ak = M(ntuple(_ -> T(x), L))
+    Ak = M(ntuple(_ -> T(x), Val(L)))
     for k in axes(A, 3)
-        A[:, :, k] = Ak
+        @inbounds A[:, :, k] = Ak
     end
     A
 end
@@ -365,7 +376,7 @@ As an (allocating) alternative, one can use `Y = A * b`, which returns `Y` as a
 function muladd!(
         Y::AbstractMatrix, A::BandedTensor3D, b::AbstractVector,
     )
-    for k in axes(A, 3)
+    @inbounds for k in axes(A, 3)
         sub = SubMatrix(A, k)
         is, js = indices(sub)
         C = parent(sub) * b[k]
diff --git a/src/Collocation/Collocation.jl b/src/Collocation/Collocation.jl
index 769d512f..918d505b 100644
--- a/src/Collocation/Collocation.jl
+++ b/src/Collocation/Collocation.jl
@@ -154,31 +154,33 @@ function collocation_matrix!(
         C::AbstractMatrix{T}, B::AbstractBSplineBasis, x::AbstractVector,
         deriv::Derivative = Derivative(0);
         clip_threshold = eps(T)) where {T}
-    Nx, Nb = size(C)
-
-    if Nx != length(x) || Nb != length(B)
+    if axes(C, 1) != axes(x, 1) || size(C, 2) != length(B)
         throw(ArgumentError("wrong dimensions of collocation matrix"))
     end
 
     fill!(C, 0)
     b_lo, b_hi = max_bandwidths(C)
 
-    @inbounds for j = 1:Nb, i = 1:Nx
-        b = evaluate(B, j, x[i], deriv, T)
+    @inbounds for i ∈ axes(C, 1)
+        jlast, bs = evaluate_all(B, x[i], deriv, T)  # = (b_{jlast + 1 - k}(x), …, b_{jlast}(x))
 
-        # Skip very small values (and zeros).
-        # This is important for SparseMatrixCSC, which also stores explicit
-        # zeros.
-        abs(b) <= clip_threshold && continue
+        for (δj, bj) ∈ pairs(bs)
+            j = jlast + 1 - δj
 
-        if (i > j && i - j > b_lo) || (i < j && j - i > b_hi)
-            # This will cause problems if C is a BandedMatrix, and (i, j) is
-            # outside the allowed bands. This may be the case if the collocation
-            # points are not properly distributed.
-            @warn "Non-zero value outside of matrix bands: b[$j](x[$i]) = $b"
-        end
+            # Skip very small values (and zeros).
+            # This is important for SparseMatrixCSC, which also stores explicit
+            # zeros.
+            abs(bj) <= clip_threshold && continue
 
-        C[i, j] = b
+            if (i > j && i - j > b_lo) || (i < j && j - i > b_hi)
+                # This will cause problems if C is a BandedMatrix, and (i, j) is
+                # outside the allowed bands. This may be the case if the collocation
+                # points are not properly distributed.
+                @warn "Non-zero value outside of matrix bands: b[$j](x[$i]) = $bj"
+            end
+
+            C[i, j] = bj
+        end
     end
 
     C
diff --git a/src/Galerkin/linear.jl b/src/Galerkin/linear.jl
index d4d54ce7..7dc9c29c 100644
--- a/src/Galerkin/linear.jl
+++ b/src/Galerkin/linear.jl
@@ -172,6 +172,7 @@ function galerkin_matrix!(
     _check_bases(Bs)
     B1, B2 = Bs
     same_ij = B1 == B2 && deriv[1] == deriv[2]
+    T = eltype(S)
 
     if size(S) != length.(Bs)
         throw(DimensionMismatch("wrong dimensions of Galerkin matrix"))
@@ -202,6 +203,8 @@ function galerkin_matrix!(
 
     fill!(S, 0)
     nlast = last(eachindex(ts))
+    ioff = first(num_constraints(B1))
+    joff = first(num_constraints(B2))
 
     # We loop over all knot segments Ω[n] = (ts[n], ts[n + 1]).
     # We integrate all supported B-spline products B1[i] * B2[j] over this
@@ -217,24 +220,29 @@ function galerkin_matrix!(
         xs = metric .* quadx
         # @assert all(x -> tn ≤ x ≤ tn1, xs)
 
-        is = nonzero_in_segment(B1, n)
-        js = nonzero_in_segment(B2, n)
-
-        # This is a property of B-spline bases, which should be preserved by
-        # derived (recombined) bases.
-        @assert 0 < length(is) ≤ k && 0 < length(js) ≤ k
-
-        # Evaluate all required basis functions on quadrature nodes.
-        bis = eval_basis_functions(B1, is, xs, deriv[1])
-        bjs = same_ij ? bis : eval_basis_functions(B2, js, xs, deriv[2])
-
-        for (nj, j) in enumerate(js), (ni, i) in enumerate(is)
-            if !fill_upper && i < j
-                continue
-            elseif !fill_lower && i > j
-                continue
+        for (x, w) ∈ zip(xs, quadw)
+            ilast = n - ioff
+            jlast = n - joff
+            _, bis = evaluate_all(B1, x, deriv[1], T; ileft = ilast)
+            _, bjs = same_ij ?
+                (ilast, bis) : evaluate_all(B2, x, deriv[2], T; ileft = jlast)
+            y = metric.α * w
+            for (δj, bj) ∈ pairs(bjs), (δi, bi) ∈ pairs(bis)
+                i = ilast + 1 - δi
+                j = jlast + 1 - δj
+                if !fill_upper && i < j
+                    continue
+                elseif !fill_lower && i > j
+                    continue
+                elseif i ∉ axes(A, 1)  # this can be true for recombined bases
+                    @assert iszero(bi)
+                    continue
+                elseif j ∉ axes(A, 2)
+                    @assert iszero(bj)
+                    continue
+                end
+                @inbounds A[i, j] += y * bi * bj
             end
-            A[i, j] += metric.α * ((bis[ni] .* bjs[nj]) ⋅ quadw)
         end
     end
 
diff --git a/src/Galerkin/projection.jl b/src/Galerkin/projection.jl
index 3e664909..b7d170a5 100644
--- a/src/Galerkin/projection.jl
+++ b/src/Galerkin/projection.jl
@@ -62,9 +62,10 @@ function galerkin_projection!(
     quadx, quadw = _quadrature_prod(Val(2k - 2))
     @assert length(quadx) == k  # we need k quadrature points per knot segment
 
+    T = eltype(φ)
     fill!(φ, 0)
-
     nlast = last(eachindex(ts))
+    ioff = first(num_constraints(B))
 
     # We loop over all knot segments Ω[n] = (ts[n], ts[n + 1]).
     # For all B-splines with support in this segment, we integrate the product
@@ -80,25 +81,17 @@ function galerkin_projection!(
         xs = metric .* quadx
         # @assert all(x -> tn ≤ x ≤ tn1, xs)
 
-        is = nonzero_in_segment(B, n)
-
-        # This is a property of B-spline bases, which should be preserved by
-        # derived (recombined) bases.
-        @assert 0 < length(is) ≤ k
-
-        # Evaluate all required basis functions on quadrature nodes.
-        bis = eval_basis_functions(B, is, xs, deriv)
-
-        fs = ntuple(i -> f(xs[i]), Val(length(xs)))  # this works fine!
-
-        # For some reason, the alternatives below allocate and give bad
-        # performance... (on Julia 1.7-beta2)
-        # fs = map(f, xs)
-        # fs = f.(xs)
-
-        for (ni, i) in enumerate(is)
-            bs = bis[ni]
-            φ[i] += metric.α * ((bs .* fs) ⋅ quadw)
+        for (x, w) ∈ zip(xs, quadw)
+            ilast = n - ioff
+            _, bis = evaluate_all(B, x, deriv, T; ileft = ilast)
+            y = metric.α * w * f(x)
+            for (δi, bi) ∈ pairs(bis)
+                i = ilast + 1 - δi
+                if i ∉ axes(φ, 1)  # this can be true for recombined bases
+                    continue
+                end
+                @inbounds φ[i] += y * bi
+            end
         end
     end
 
diff --git a/src/Galerkin/quadratic.jl b/src/Galerkin/quadratic.jl
index 476c372b..f723c05b 100644
--- a/src/Galerkin/quadratic.jl
+++ b/src/Galerkin/quadratic.jl
@@ -27,7 +27,7 @@ function galerkin_tensor(
     if length(Bs[1]) != length(Bs[2])
         throw(DimensionMismatch("the first two bases must have the same lengths"))
     end
-    δl, δr = num_constraints(Bs[3]) .- num_constraints(Bs[1])
+    δl = first(num_constraints(Bs[3])) - first(num_constraints(Bs[1]))
     A = BandedTensor3D{T}(undef, dims, Val(b), bandshift=(0, 0, δl))
     galerkin_tensor!(A, Bs, deriv)
 end
@@ -63,6 +63,10 @@ function galerkin_tensor!(
     Ni, Nj, Nl = Ns
     @assert Ni == Nj  # verified at construction of BandedTensor3D
 
+    ioff = first(num_constraints(Bi))
+    joff = first(num_constraints(Bj))
+    loff = first(num_constraints(Bl))
+
     # Orders and knots are assumed to be the same (see _check_bases).
     k = order(Bi)
     ts = knots(Bi)
@@ -86,7 +90,7 @@ function galerkin_tensor!(
 
     fill!(A, 0)
     T = eltype(A)
-    Al = @MMatrix zeros(T, 2k - 1, 2k - 1)
+    Al = zero(MMatrix{2k - 1, 2k - 1, T})
     nlast = last(eachindex(ts))
 
     @inbounds for n in eachindex(ts)
@@ -97,30 +101,46 @@ function galerkin_tensor!(
         metric = QuadratureMetric(tn, tn1)
         xs = metric .* quadx
 
-        is = nonzero_in_segment(Bi, n)
-        js = nonzero_in_segment(Bj, n)
-        ls = nonzero_in_segment(Bl, n)
-
-        bis = eval_basis_functions(Bi, is, xs, deriv[1])
-        bjs = same_12 ?
-            bis : eval_basis_functions(Bj, js, xs, deriv[2])
-        bls = same_13 ?
-            bis : same_23 ?
-            bjs : eval_basis_functions(Bl, ls, xs, deriv[3])
-
-        # We compute the submatrix A[:, :, l] for each `l`.
-        for (nl, l) in enumerate(ls)
-            fill!(Al, 0)
-            inds = BandedTensors.band_indices(A, l)
-            # @assert is ⊆ inds && js ⊆ inds
-            # @assert size(Al, 1) == size(Al, 2) == length(inds)
-            δi = searchsortedlast(inds, is[1]) - 1
-            δj = searchsortedlast(inds, js[1]) - 1
-            for nj in eachindex(js), ni in eachindex(is)
-                Al[ni + δi, nj + δj] =
-                    metric.α * ((bis[ni] .* bjs[nj] .* bls[nl]) ⋅ quadw)
+        ilast = n - ioff
+        jlast = n - joff
+        llast = n - loff
+        l₀ = llast - order(Bs[3])
+
+        for (x, w) ∈ zip(xs, quadw)
+            _, bis = evaluate_all(Bi, x, deriv[1], T; ileft = ilast)
+            _, bjs = same_12 ?
+                (ilast, bis) : evaluate_all(Bj, x, deriv[2], T; ileft = jlast)
+            _, bls = same_13 ?
+                (ilast, bis) : same_23 ?
+                (jlast, bjs) : evaluate_all(Bl, x, deriv[3], T; ileft = llast)
+
+            # Iterate in increasing order (not sure if it really helps performance)
+            bls = reverse(bls)
+
+            # We compute the submatrix A[:, :, l] for each `l`.
+            for (δl, bl) in pairs(bls)
+                iszero(bl) && continue  # can prevent problems at the borders
+                l = l₀ + δl
+                y₀ = metric.α * w * bl
+                @inbounds fill!(Al, 0)
+                inds = BandedTensors.band_indices(A, l)
+                # @assert inds == (l - k + 1:l + k - 1) .+ bandshift(A)[3]
+                # @assert (ilast - k + 1:ilast) ⊆ inds
+                # @assert (jlast - k + 1:jlast) ⊆ inds
+                Δi = first(inds) - 1
+                Δj = Δi
+                for (δj, bj) ∈ pairs(bjs)
+                    y₁ = y₀ * bj
+                    j = jlast + 1 - δj
+                    jj = j - Δj
+                    for (δi, bi) ∈ pairs(bis)
+                        i = ilast + 1 - δi  # actual index of basis function
+                        ii = i - Δi         # index in submatrix Al
+                        @inbounds Al[ii, jj] = y₁ * bi
+                    end
+                end
+                @inbounds A[:, :, l] += Al
             end
-            A[:, :, l] += Al
         end
     end
 
diff --git a/src/Galerkin/quadratures.jl b/src/Galerkin/quadratures.jl
index 71a14db1..eb75d387 100644
--- a/src/Galerkin/quadratures.jl
+++ b/src/Galerkin/quadratures.jl
@@ -57,24 +57,3 @@ end
 # Apply metric to normalised coordinate (x ∈ [-1, 1]).
 Base.:*(M::QuadratureMetric, x::Real) = M.α * x + M.β
 Broadcast.broadcastable(M::QuadratureMetric) = Ref(M)
-
-# Evaluate elements of basis `B` (given by indices `is`) at points `xs`.
-# The length of `xs` is assumed static.
-# The length of `is` is generally equal to the B-spline order, but may me
-# smaller near the boundaries (this is not significant if knots are "augmented",
-# as is the default).
-# TODO implement evaluation of all B-splines at once (should be much faster...)
-function eval_basis_functions(B, is, xs, args...)
-    k = order(B)
-    @assert length(is) ≤ k
-    ntuple(Val(k)) do n
-        # In general, `is` will have length equal `k`.
-        # If its length is less than `k` (may happen near boundaries, if knots
-        # are not "augmented"), we repeat values for the first B-spline, just
-        # for type stability concerns. These values are never used.
-        i = n > length(is) ? is[1] : is[n]
-        map(xs) do x
-            B[i](x, args...)
-        end
-    end
-end
diff --git a/src/Recombinations/bases.jl b/src/Recombinations/bases.jl
index 30f2b193..809daeb3 100644
--- a/src/Recombinations/bases.jl
+++ b/src/Recombinations/bases.jl
@@ -369,3 +369,39 @@ end
 
     ϕ::T
 end
+
+@propagate_inbounds function BSplines.evaluate_all(
+        R::RecombinedBSplineBasis, x::Real, op::AbstractDifferentialOp, ::Type{T};
+        ileft::Union{Int, Nothing} = nothing,
+    ) where {T}
+    B = parent(R)
+    k = order(B)
+    off = num_constraints(R)[1]
+    if !isnothing(ileft)
+        ileft += off
+    end
+    ilast, bs = evaluate_all(B, x, op, T; ileft)
+    jlast = ilast - off
+    A = recombination_matrix(R)
+    N = length(R)
+
+    ϕs = zero(MVector{k, eltype(bs)})
+    for δj = 1:k
+        j = jlast + 1 - δj  # recombined index ϕ_j
+        1 ≤ j ≤ N || continue
+        block = which_recombine_block(A, j)
+        if block == 2
+            @inbounds ϕs[δj] = bs[δj]  # no recombination needed (most common case)
+        else
+            # TODO can this be optimised? (and is it worth it?)
+            for δi = 1:k
+                i = ilast + 1 - δi
+                i ≤ 0 && continue  # fixes rare corner case in tests...
+                @inbounds ϕs[δj] += getindex(A, i, j; block) * bs[δi]
+            end
+        end
+    end
+
+    # We return the index of the last recombined basis function ϕ_j.
+    jlast, Tuple(ϕs)
+end
diff --git a/src/Recombinations/matrices.jl b/src/Recombinations/matrices.jl
index 90442608..9b4faf5c 100644
--- a/src/Recombinations/matrices.jl
+++ b/src/Recombinations/matrices.jl
@@ -263,19 +263,40 @@ end
 end
 @inline _natural_ops(::Val{1}) = ()
 
+# Case h = 1: return empty (0 × 2) matrix.
+_natural_eval_derivatives(B, x, js, ::Val{1}, ::Type{T}) where {T} =
+    zero(SMatrix{0, 2, T})
+
 # Evaluate derivatives 2:h of B-splines 1:(h + 1) at the boundaries.
-function _natural_eval_derivatives(B, x, js, ::Val{h}, ::Type{T}) where {h, T}
-    @assert length(js) == h + 1
-    M = MMatrix{h - 1, h + 1, T}(undef)
-    if h == 1
-        return SMatrix(M)
+@generated function _natural_eval_derivatives(
+        B, x, js, ::Val{h}, ::Type{T},
+    ) where {h, T}
+    @assert h ≥ 2
+    ex = quote
+        @assert length(js) == $h + 1
+        M = zero(MMatrix{$h - 1, $h + 1, $T})
     end
-    for k ∈ axes(M, 2)
-        j = js[k]
-        bs = B[j, T](x, Derivative(2:h))  # returns tuple (bⱼ⁽²⁾, bⱼ⁽³⁾, …, bⱼ⁽ʰ⁾)
-        M[:, k] = SVector(bs)
+    for i ∈ 1:(h - 1)
+        jlast = Symbol(:jlast_, i)
+        bs = Symbol(:bs_, i)
+        ileft = i == 1 ? :(nothing) : Symbol(:jlast_, i - 1)
+        ex = quote
+            $ex
+            $jlast, $bs = evaluate_all(B, x, Derivative($i + 1), $T; ileft = $ileft)
+            @assert length($bs) == order(B)
+            for n ∈ axes(M, 2)
+                j = js[n]
+                δj = $jlast + 1 - j
+                if δj ∈ eachindex($bs)
+                    @inbounds M[$i, n] = $bs[δj]
+                end
+            end
+        end
+    end
+    quote
+        $ex
+        SMatrix(M)
     end
-    SMatrix(M)
 end
 
 # On the left boundary, `i` is the index of the resulting recombined basis
@@ -451,21 +472,23 @@ end
 
 Returns the range of row indices `i` such that `A[i, col]` is non-zero.
 """
-@propagate_inbounds function nzrows(A::RecombineMatrix,
-                                    j::Integer) :: UnitRange{Int}
-    block = which_recombine_block(A, j)
+@propagate_inbounds function nzrows(
+        A::RecombineMatrix, j::Integer;
+        block = nothing,  # to avoid recomputing it if already known
+    )
+    blk = something(block, which_recombine_block(A, j))
     j += num_constraints(A)[1]
-    if block == 1
+    if blk == 1
         # We take advantage of the locality condition imposed when constructing
         # the recombination matrix.
         δ = A.allowed_nonzeros_per_column[1]
         (j - δ + 1):j
-    elseif block == 2
+    elseif blk == 2
         j:j  # shifted diagonal of ones
     else
         δ = A.allowed_nonzeros_per_column[2]
         j:(j + δ - 1)
-    end
+    end :: UnitRange{Int}
 end
 
 # Pretty-printing, adapted from BandedMatrices.jl code.
@@ -474,18 +497,21 @@ function Base.replace_in_print_matrix(
     iszero(A[i, j]) ? Base.replace_with_centered_mark(s) : s
 end
 
-@inline function Base.getindex(A::RecombineMatrix, i::Integer, j::Integer)
+@inline function Base.getindex(
+        A::RecombineMatrix, i::Integer, j::Integer;
+        block = nothing,
+    )
     @boundscheck checkbounds(A, i, j)
     T = eltype(A)
-    @inbounds block = which_recombine_block(A, j)
+    @inbounds blk = something(block, which_recombine_block(A, j))
 
     cl, cr = num_constraints(A)  # left/right constraints
 
-    if block == 2
+    if blk == 2
         return T(i == j + cl)  # δ_{i, j+c}
     end
 
-    if block == 1
+    if blk == 1
         C = A.ul
         if i <= size(C, 1)
             return @inbounds C[i, j] :: T
diff --git a/src/SplineApproximations/SplineApproximations.jl b/src/SplineApproximations/SplineApproximations.jl
index ed6f5209..97c06b88 100644
--- a/src/SplineApproximations/SplineApproximations.jl
+++ b/src/SplineApproximations/SplineApproximations.jl
@@ -83,7 +83,7 @@ SplineApproximation containing the 7-element Spline{Float64}:
  basis: 7-element BSplineBasis of order 3, domain [-1.0, 1.0]
  order: 3
  knots: [-1.0, -1.0, -1.0, -0.6, -0.2, 0.2, 0.6, 1.0, 1.0, 1.0]
- coefficients: [-0.841471, -0.731727, -0.39727, 0.0, 0.39727, 0.731727, 0.841471]
+ coefficients: [-0.841471, -0.731727, -0.39727, 2.85767e-17, 0.39727, 0.731727, 0.841471]
  approximation method: interpolation at [-1.0, -0.8, -0.4, 0.0, 0.4, 0.8, 1.0]
 
 julia> sin(0.3), S_interp(0.3)
@@ -98,7 +98,7 @@ SplineApproximation containing the 7-element Spline{Float64}:
  approximation method: interpolation at [-1.0, -0.8, -0.4, 0.0, 0.4, 0.8, 1.0]
 
 julia> exp(0.3), S_interp(0.3)
-(1.3498588075760032, 1.3491015490105398)
+(1.3498588075760032, 1.3491015490105396)
 
 julia> S_fast = approximate(exp, B, VariationDiminishing())
 SplineApproximation containing the 7-element Spline{Float64}:
@@ -117,7 +117,7 @@ SplineApproximation containing the 7-element Spline{Float64}:
  approximation method: MinimiseL2Error()
 
 julia> x = 0.34; exp(x), S_opt(x), S_interp(x), S_fast(x)
-(1.4049475905635938, 1.4044530324752085, 1.4044149581073815, 1.4328668494041878)
+(1.4049475905635938, 1.4044530324752076, 1.4044149581073813, 1.4328668494041878)
 ```
 """
 approximate(f, B::AbstractBSplineBasis) =
diff --git a/src/SplineInterpolations/SplineInterpolations.jl b/src/SplineInterpolations/SplineInterpolations.jl
index bdc8034d..5da4fa81 100644
--- a/src/SplineInterpolations/SplineInterpolations.jl
+++ b/src/SplineInterpolations/SplineInterpolations.jl
@@ -158,7 +158,7 @@ julia> (Derivative(1) * itp)(-1)
 -0.01663433622896893
 
 julia> (Derivative(2) * itp)(-1)
-10.527273287554962
+10.52727328755495
 
 julia> Snat = interpolate(xs, ys, BSplineOrder(4), Natural())
 SplineInterpolation containing the 21-element Spline{Float64}:
diff --git a/src/Splines/spline.jl b/src/Splines/spline.jl
index 80c83cb8..63b8a1ed 100644
--- a/src/Splines/spline.jl
+++ b/src/Splines/spline.jl
@@ -204,6 +204,9 @@ julia> S = Spline(B, rand(length(B)))
  knots: [-1.0, -1.0, -1.0, -1.0, -0.8, -0.6, -0.4, -0.2, 0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.0, 1.0, 1.0]
  coefficients: [0.461501, 0.619799, 0.654451, 0.667213, 0.334672, 0.618022, 0.967496, 0.900014, 0.611195, 0.469467, 0.221618, 0.80084, 0.269533]
 
+julia> Derivative(0) * S === S
+true
+
 julia> Derivative(1) * S
 12-element Spline{Float64}:
  basis: 12-element BSplineBasis of order 3, domain [-1.0, 1.0]
@@ -232,6 +235,8 @@ Base.diff(S::Spline, op = Derivative(1)) = op * S
 
 _diff(::AbstractBSplineBasis, S, etc...) = diff(parent_spline(S), etc...)
 
+_diff(::BSplineBasis, S::Spline, ::Derivative{0}) = S
+
 function _diff(
         ::BSplineBasis, S::Spline, ::Derivative{Ndiff} = Derivative(1),
     ) where {Ndiff}
diff --git a/test/airy.jl b/test/airy.jl
index b2cdd9e3..62961c10 100644
--- a/test/airy.jl
+++ b/test/airy.jl
@@ -37,7 +37,7 @@ function test_airy_equation()
         Tc = @inferred galerkin_tensor((R, R, B), Derivative.((0, 0, 0)))
         Mc = @inferred Tc * cs
         @test Mc isa BandedMatrix
-        @test issymmetric(Mc)
+        @test Mc ≈ Mc'  # the matrix is practically symmetric
         Hermitian(Mc)
     end
 
diff --git a/test/bsplines.jl b/test/bsplines.jl
new file mode 100644
index 00000000..100f5db5
--- /dev/null
+++ b/test/bsplines.jl
@@ -0,0 +1,147 @@
+using Test
+using BSplineKit
+using Random
+using LinearAlgebra: dot
+
+using BSplineKit.BSplines:
+    find_knot_interval
+
+function test_bsplines(B::AbstractBSplineBasis)
+    N = length(B)
+    ts = knots(B)
+    a = first(ts)
+    b = last(ts)
+    @test (a, b) == boundaries(B)
+    k = order(B)
+
+    if B isa BSplineBasis
+        @testset "Find interval" begin
+            @inferred find_knot_interval(ts, 0.3)
+            @test find_knot_interval(ts, a - 1) == (1, -1)
+            @test find_knot_interval(ts, a) == (k, 0)
+            @test find_knot_interval(ts, b) == (N, 0)
+            @test find_knot_interval(ts, b + 1) == (N, 1)
+            let x = (a + b) / 3  # test on a "normal" location
+                @test find_knot_interval(ts, x) == (searchsortedlast(ts, x), 0)
+            end
+            let i = length(ts) ÷ 2  # test on a knot location
+                @test find_knot_interval(ts, ts[i]) == (i, 0)
+            end
+        end
+    end
+
+    xs_eval = [
+        ts[begin],                                          # at left boundary
+        ts[begin + k + 2],                                  # on a knot
+        0.2 * ts[begin + k + 2] + 0.8 * ts[begin + k + 3],  # between two knots
+        ts[end],                                            # at right boundary
+    ]
+
+    derivs = Derivative.((0, 1, 2))
+    _order(::Derivative{n}) where {n} = n
+
+    S = Spline(B, randn(rng, length(B)))
+
+    let x = 0.3
+        @test evaluate_all(B, x) == @inferred B(x)  # alias for `evaluate_all`
+    end
+
+    @testset "Evaluate $op | x = $x" for x ∈ xs_eval, op ∈ derivs
+        i, bs = @inferred evaluate_all(B, x, op)
+        @test length(bs) == k
+        @test eltype(bs) === Float64
+
+        if B isa BSplineBasis
+            i′, bs′ = @inferred evaluate_all(B, x, op, Float32)
+            @test eltype(bs′) === Float32
+            @test i === i′
+            @test all(bs .≈ bs′)
+        end
+
+        # Test reusing knot interval (ileft)
+        @test (i, bs) == @inferred evaluate_all(B, x, op; ileft = i)
+
+        if B isa BSplineBasis
+            if op === Derivative(0)
+                @test sum(bs) ≈ 1  # partition of unity property
+            else
+                @test abs(sum(bs)) < 10eps(maximum(bs))  # ≈ 0
+            end
+        end
+
+        nderiv = _order(op)
+
+        # 1. Compare with evaluation of single B-spline
+        if nderiv < k
+            for δj ∈ eachindex(bs)
+                j = i - δj + 1
+                # For recombined bases, the index can fall outside of the domain.
+                if B isa RecombinedBSplineBasis && j ∉ 1:N
+                    continue
+                end
+                bj = bs[δj]
+                if x == first(ts) && op ∈ first(constraints(B))
+                    # In this case all results are expected to be zero.
+                    @test abs(bj) < 1e-10
+                elseif x == last(ts) && op ∈ last(constraints(B))
+                    @test abs(bj) < 1e-10
+                else
+                    # Results are non-zero.
+                    @test bj ≈ B[j](x, op)
+                end
+            end
+        end
+
+        # 2. Compare with full evaluation of a spline
+        if nderiv < k
+            S′ = op * S  # this is exactly `S` if op == Derivative(0)
+            cs = let us = coefficients(S)
+                if B isa BSplineBasis
+                    ntuple(δ -> us[i + 1 - δ], Val(k))
+                else
+                    ntuple(Val(k)) do δ
+                        j = i + 1 - δ
+                        checkbounds(Bool, us, j) ? us[j] : zero(eltype(us))
+                    end
+                end
+            end
+            ϵ = 10eps(maximum(abs, coefficients(S′)))
+            u, v = S′(x), dot(cs, bs)
+            if abs(u) < ϵ
+                @test abs(v) < ϵ
+            else
+                @test u ≈ v
+            end
+        end
+
+        # 3. Compare to non-generated version (assuming the @generated version
+        #    is called)
+        if B isa BSplineBasis
+            @test evaluate_all(B, x, op) == @inferred BSplines._evaluate_all_alt(
+                knots(B), x, BSplineOrder(k), op, eltype(bs),
+            )
+            @test evaluate_all(B, x, op, Float32) == @inferred BSplines._evaluate_all_alt(
+                knots(B), x, BSplineOrder(k), op, Float32,
+            )
+        end
+    end
+
+    nothing
+end
+
+rng = MersenneTwister(42)
+ξs = sort!(rand(rng, 20))
+ξs[begin] = 0; ξs[end] = 1;
+
+@testset "B-splines (k = $k)" for k ∈ (2, 4, 5, 6, 8)
+    B = BSplineBasis(BSplineOrder(k), copy(ξs))
+    @testset "B-spline basis" begin
+        test_bsplines(B)
+    end
+    ops = (Derivative(0), Derivative(1), Natural())
+    @testset "Recombined (op = $op)" for op ∈ ops
+        op == Natural() && isodd(k) && continue
+        R = RecombinedBSplineBasis(op, B)
+        test_bsplines(R)
+    end
+end
diff --git a/test/galerkin.jl b/test/galerkin.jl
index c286a842..4b73f458 100644
--- a/test/galerkin.jl
+++ b/test/galerkin.jl
@@ -171,7 +171,7 @@ function test_galerkin_recombined()
                     T″ = galerkin_tensor(R, Derivative.((0, 0, 2)))
                     T1 = galerkin_tensor(R, Derivative.((1, 0, 1)))
                     T2 = galerkin_tensor(R, Derivative.((0, 1, 1)))
-                    @test T2 == permutedims(T1, (2, 1, 3))
+                    @test T2 ≈ permutedims(T1, (2, 1, 3))
                     @test norm(T″ + T1 + T2, Inf) < norm(T1, Inf) * ε
                 end
             end
@@ -184,7 +184,7 @@ function test_galerkin_recombined()
             @testset "3D tensor" begin
                 test_galerkin_tensor(R)
             end
-            @test M == galerkin_matrix((B, R))'
+            @test M ≈ galerkin_matrix((B, R))'
             let (δl, δr) = num_constraints(R)
                 n, m = size(M)
                 @test n + δl + δr == m
@@ -196,7 +196,7 @@ function test_galerkin_recombined()
                 # basis.
                 M_base = galerkin_matrix(B)
                 I = (δl + r + 1):(m - (δr + r))
-                @test view(M, (r + 1):(n - r), I) == view(M_base, I, I)
+                @test view(M, (r + 1):(n - r), I) ≈ view(M_base, I, I)
             end
         end
     end
diff --git a/test/runtests.jl b/test/runtests.jl
index bc45e350..f27408fc 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -22,6 +22,7 @@ gauss_lobatto_points(N) = [-cos(π * n / N) for n = 0:N]
 
 include("diffops.jl")
 include("knots.jl")
+include("bsplines.jl")
 include("splines.jl")
 include("natural.jl")
 include("interpolation.jl")