From e67909fbe909383873ccdc897d70a7bcecb6e91a Mon Sep 17 00:00:00 2001
From: Andrew Ning <aning@byu.edu>
Date: Wed, 5 Jul 2023 11:09:50 +0200
Subject: [PATCH] examples

---
 examples/README.md    |   4 +
 examples/explicit.jl  | 234 ++++++++++++++++++++++++++++++++++++++++++
 examples/implicit.jl  | 211 +++++++++++++++++++++++++++++++++++++
 examples/nonlinear.jl |  89 ++++++++++++++++
 4 files changed, 538 insertions(+)
 create mode 100644 examples/README.md
 create mode 100644 examples/explicit.jl
 create mode 100644 examples/implicit.jl
 create mode 100644 examples/nonlinear.jl

diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..1423737
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,4 @@
+# Examples
+
+This folder contains the examples from the paper: <https://doi.org/10.48550/arXiv.2306.15243>.  There are three scripts: nonlinear.jl, explicit.jl, and implicit.jl.  All are written with for loops, solving problems of increasing size, but in practice I often ran them one size at a time as I found that produced more consistent timings.  The last case (implicit.jl) requires a patch to NLsolve described here: <https://github.com/JuliaNLSolvers/NLsolve.jl/issues/281>.  The new methodology, with ImplicitAD, will work fine without it since it doesn't propagate through the solver.  But if you want to compare using revese mode directly through NLsolve the patch is required.
+
diff --git a/examples/explicit.jl b/examples/explicit.jl
new file mode 100644
index 0000000..d7a84c6
--- /dev/null
+++ b/examples/explicit.jl
@@ -0,0 +1,234 @@
+using ImplicitAD
+using ForwardDiff
+using ReverseDiff
+using BenchmarkTools
+
+struct Tsit5{TV, TF}
+    k1::TV
+    k2::TV
+    k3::TV
+    k4::TV
+    k5::TV
+    k6::TV
+    ytemp2::TV
+    ytemp3::TV
+    ytemp4::TV
+    ytemp5::TV
+    ytemp6::TV
+    c1::TF
+    c2::TF
+    c3::TF
+    c4::TF
+    a21::TF
+    a31::TF
+    a32::TF
+    a41::TF
+    a42::TF
+    a43::TF
+    a51::TF
+    a52::TF
+    a53::TF
+    a54::TF
+    a61::TF
+    a62::TF
+    a63::TF
+    a64::TF
+    a65::TF
+    a71::TF
+    a72::TF
+    a73::TF
+    a74::TF
+    a75::TF
+    a76::TF
+end
+
+function Tsit5(ny, T)
+
+    k1 = Vector{T}(undef, ny)
+    k2 = Vector{T}(undef, ny)
+    k3 = Vector{T}(undef, ny)
+    k4 = Vector{T}(undef, ny)
+    k5 = Vector{T}(undef, ny)
+    k6 = Vector{T}(undef, ny)
+    ytemp2 = Vector{T}(undef, ny)
+    ytemp3 = Vector{T}(undef, ny)
+    ytemp4 = Vector{T}(undef, ny)
+    ytemp5 = Vector{T}(undef, ny)
+    ytemp6 = Vector{T}(undef, ny)
+
+    # constants
+    c1 = 0.161; c2 = 0.327; c3 = 0.9; c4 = 0.9800255409045097;
+    a21 = 0.161;
+    a31 = -0.008480655492356989; a32 = 0.335480655492357;
+    a41 = 2.8971530571054935; a42 = -6.359448489975075; a43 = 4.3622954328695815;
+    a51 = 5.325864828439257; a52 = -11.748883564062828; a53 = 7.4955393428898365; a54 = -0.09249506636175525;
+    a61 = 5.86145544294642; a62 = -12.92096931784711; a63 = 8.159367898576159; a64 = -0.071584973281401; a65 = -0.028269050394068383;
+    a71 = 0.09646076681806523; a72 = 0.01; a73 = 0.4798896504144996; a74 = 1.379008574103742; a75 = -3.290069515436081; a76 = 2.324710524099774
+
+    return Tsit5(k1, k2, k3, k4, k5, k6, ytemp2, ytemp3, ytemp4, ytemp5, ytemp6, c1, c2, c3, c4, a21, a31, a32, a41, a42, a43, a51, a52, a53, a54, a61, a62, a63, a64, a65, a71, a72, a73, a74, a75, a76)
+end
+
+function odestep!(tsit::Tsit5, odefun, y, yprev, t, tprev, xd, xci, p)
+
+    dt = t - tprev
+
+    odefun(tsit.k1, yprev, t, xd, xci, p)
+    @. tsit.ytemp2 = yprev + dt*tsit.a21*tsit.k1
+    odefun(tsit.k2, tsit.ytemp2, t+tsit.c1*dt, xd, xci, p)
+    @. tsit.ytemp3 = yprev + dt*(tsit.a31*tsit.k1+tsit.a32*tsit.k2)
+    odefun(tsit.k3, tsit.ytemp3, t+tsit.c2*dt, xd, xci, p)
+    @. tsit.ytemp4 = yprev + dt*(tsit.a41*tsit.k1+tsit.a42*tsit.k2+tsit.a43*tsit.k3)
+    odefun(tsit.k4, tsit.ytemp4, t+tsit.c3*dt, xd, xci, p)
+    @. tsit.ytemp5 = yprev + dt*(tsit.a51*tsit.k1+tsit.a52*tsit.k2+tsit.a53*tsit.k3+tsit.a54*tsit.k4)
+    odefun(tsit.k5, tsit.ytemp5, t+tsit.c4*dt, xd, xci, p)
+    @. tsit.ytemp6 = yprev + dt*(tsit.a61*tsit.k1+tsit.a62*tsit.k2+tsit.a63*tsit.k3+tsit.a64*tsit.k4+tsit.a65*tsit.k5)
+    odefun(tsit.k6, tsit.ytemp6, t+dt, xd, xci, p)
+
+    @. y = yprev + dt*(tsit.a71*tsit.k1+tsit.a72*tsit.k2+tsit.a73*tsit.k3+tsit.a74*tsit.k4+tsit.a75*tsit.k5+tsit.a76*tsit.k6)
+
+    return y
+end
+
+
+function Q(T, p)
+    (; hc, Ta, ϵ, σ) = p
+    Qc = hc*(T - Ta)
+    Qr = ϵ*σ*(T^4 - Ta^4)
+    return Qc + Qr
+end
+
+function plate(dy, y, t, xd, xci, p)
+
+    (; k, rho, Cp, δ, tz, n, T, dT) = p
+    alpha = k/(rho*Cp*δ^2)
+    beta = 2/(rho*Cp*tz)
+
+    # set temperature grid
+    T[2:n-1, 2:n-1] .= reshape(y, n-2, n-2)
+
+    # Direchlet b.c. on bottom
+    T[end, :] .= xci
+
+    # Neuman b.c. on sides and top
+    @views for i = 2:n-1
+        T[i, 1] = T[i, 2]  # left
+        T[i, end] = T[i, end-1]  # right
+    end
+    @views for j = 1:n
+        T[1, j] = T[2, j] # top
+    end
+
+    # update interior points
+    @views for i = 2:n-1
+        for j = 2:n-1
+            Tij = T[i, j]
+            dT[i, j] = alpha*(T[i+1, j] + T[i-1, j] + T[i, j+1] + T[i, j-1] - 4*Tij) - beta*(Q(Tij, p))
+        end
+    end
+
+    dy .= dT[2:n-1, 2:n-1][:]
+end
+
+
+function initialize(t0, xd, xc0, p)
+    (; n) = p
+    return 300*ones((n-2)*(n-2))
+end
+
+function runit(n, nt)
+
+    tsit = Tsit5((n-2)*(n-2), Float64)
+    onestep!(y, yprev, t, tprev, xd, xci, p) = odestep!(tsit, plate, y, yprev, t, tprev, xd, xci, p)
+
+    # problem constants
+    p = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(n, n), dT=zeros(n, n), n=n, δ=1/(n-1))
+
+    t = range(0.0, 5000, nt)
+    nt = length(t)
+
+    xc = reverse(range(600, 1000, n))*ones(nt)'
+    xd = Float64[]
+
+    function program(xc)
+        xcm = reshape(xc, n, nt)
+
+        TF = eltype(xc)
+        if eltype(tsit.k1) != TF
+            tsit = Tsit5((n-2)*(n-2), TF)
+            pf = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(TF, n, n), dT=zeros(TF, n, n), n=n, δ=1/(n-1))
+            onestep!(y, yprev, t, tprev, xd, xci, p) = odestep!(tsit, plate, y, yprev, t, tprev, xd, xci, pf)
+        end
+
+        y = ImplicitAD.odesolve(initialize, onestep!, t, xd, xcm, p)
+
+        return y[1, end]  # top left corner temperature at last time
+    end
+
+    # ------ implicitAD ----------
+
+    TR = eltype(ReverseDiff.track([1.0]))
+    tsitf = Tsit5((n-2)*(n-2), Float64)
+    tsitr = Tsit5((n-2)*(n-2), TR)
+    pr = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(TR, n, n), dT=zeros(TR, n, n), n=n, δ=1/(n-1))
+    onestepf!(y, yprev, t, tprev, xd, xci, p) = odestep!(tsitf, plate, y, yprev, t, tprev, xd, xci, p)
+    onestepr!(y, yprev, t, tprev, xd, xci, p) = odestep!(tsitr, plate, y, yprev, t, tprev, xd, xci, pr)
+    cache = ImplicitAD.explicit_unsteady_cache(initialize, onestepr!, (n-2)*(n-2), 0, n, p; compile=true)
+
+    function modprogram(xc)
+        xcm = reshape(xc, n, nt)
+
+        y = explicit_unsteady(initialize, onestepf!, t, xd, xcm, p; cache)
+
+        return y[1, end]
+    end
+
+    xcv = xc[:]
+
+    fwd_cache = ForwardDiff.GradientConfig(program, xcv)
+    f_tape1 = ReverseDiff.GradientTape(modprogram, xcv)
+    rev_cache1 = ReverseDiff.compile(f_tape1)
+    f_tape2 = ReverseDiff.GradientTape(program, xcv)
+    rev_cache2 = ReverseDiff.compile(f_tape2)
+
+    g1 = zeros(length(xcv))
+    g2 = zeros(length(xcv))
+    g3 = zeros(length(xcv))
+
+
+    # approximate cost of central diff
+    t1 = @benchmark $program($xcv)
+    time1 = median(t1).time * 1e-9 * (2*length(xcv) + 1)
+
+    # forward
+    t2 = @benchmark ForwardDiff.gradient!($g1, $program, $xcv, $fwd_cache)
+    time2 = median(t2).time * 1e-9
+
+    #reverse diff
+    t3 = @benchmark ReverseDiff.gradient!($g2, $rev_cache2, $xcv)
+    time3 = median(t3).time * 1e-9
+
+    # reverse w/ implicitad
+    t4 = @benchmark ReverseDiff.gradient!($g3, $rev_cache1, $xcv)
+    time4 = median(t4).time * 1e-9  # reverse implicit diff
+
+    println(time1, " ", time2, " ", time3, " ", time4)
+
+    return time1, time2, time3, time4
+
+end
+
+nt = 1000
+nvec = [3, 5, 7, 9, 11, 13, 15, 17, 19]
+# nvec = [19]
+nn = length(nvec)
+t1 = zeros(nn)
+t2 = zeros(nn)
+t3 = zeros(nn)
+t4 = zeros(nn)
+states = zeros(nn)
+
+for i = 1:nn
+    n = nvec[i]
+    t1[i], t2[i], t3[i], t4[i] = runit(n, nt)
+    states[i] = (n-2)^2
+end
diff --git a/examples/implicit.jl b/examples/implicit.jl
new file mode 100644
index 0000000..615fee8
--- /dev/null
+++ b/examples/implicit.jl
@@ -0,0 +1,211 @@
+using ImplicitAD
+using ForwardDiff
+using ReverseDiff
+using BenchmarkTools
+using NLsolve
+
+
+function residual!(r, y, yprev, t, tprev, xd, xci, p)
+    plate(r, y, t, xd, xci, p)  # using r as a placeholder for dy
+    r .-= (y .- yprev)/(t - tprev)
+end
+
+function onestep!(y, yprev, t, tprev, xd, xci, p)
+    f!(r, yt) = residual!(r, yt, yprev, t, tprev, xd, xci, p)
+    j!(J, yt) = dTdy(J, yt, t, tprev, p)
+    T = eltype(xci)
+    if T <: ReverseDiff.TrackedReal
+        T = eltype(ReverseDiff.track([1.0]))
+    end
+    sol = nlsolve(f!, j!, T.(yprev), ftol=1e-12)
+    y .= sol.zero
+    return nothing
+end
+
+
+function Q(T, p)
+    (; hc, Ta, ϵ, σ) = p
+    Qc = hc*(T - Ta)
+    Qr = ϵ*σ*(T^4 - Ta^4)
+    return Qc + Qr
+end
+
+function plate(dy, y, t, xd, xci, p)
+
+    (; k, rho, Cp, δ, tz, n, T, dT) = p
+    alpha = k/(rho*Cp*δ^2)
+    beta = 2/(rho*Cp*tz)
+
+    # set temperature grid
+    T[2:n-1, 2:n-1] .= reshape(y, n-2, n-2)
+
+    # Direchlet b.c. on bottom
+    T[end, :] .= xci
+
+    # Neuman b.c. on sides and top
+    @views for i = 2:n-1
+        T[i, 1] = T[i, 2]  # left
+        T[i, end] = T[i, end-1]  # right
+    end
+    @views for j = 1:n
+        T[1, j] = T[2, j] # top
+    end
+
+    # update interior points
+    @views for i = 2:n-1
+        for j = 2:n-1
+            Tij = T[i, j]
+            dT[i, j] = alpha*(T[i+1, j] + T[i-1, j] + T[i, j+1] + T[i, j-1] - 4*Tij) - beta*(Q(Tij, p))
+        end
+    end
+
+    dy .= dT[2:n-1, 2:n-1][:]
+end
+
+function dTdy(J, y, t, tprev, p)
+
+    (; k, rho, Cp, δ, tz, n, hc, ϵ, σ) = p
+    alpha = k/(rho*Cp*δ^2)
+    beta = 2/(rho*Cp*tz)
+
+    J .= 0.0
+
+    indices = LinearIndices((2:n-1, 2:n-1))
+    ni, nj = size(indices)
+    for i = 1:ni
+        for j = 1:nj
+            ij = indices[i, j]
+            J[ij, ij] = -alpha*4 - beta*(hc + ϵ*σ*4*y[ij]^3)
+            if i < ni
+                J[ij, indices[i+1, j]] = alpha
+            end
+            if i > 1
+                J[ij, indices[i-1, j]] = alpha
+            end
+            if i == 1
+                J[ij, ij] += alpha
+            end
+            if j < nj
+                J[ij, indices[i, j+1]] = alpha
+            end
+            if j == nj
+                J[ij, ij] += alpha
+            end
+            if j > 1
+                J[ij, indices[i, j-1]] = alpha
+            end
+            if j == 1
+                J[ij, ij] += alpha
+            end
+            J[ij, ij] -= 1.0 / (t - tprev)
+        end
+    end
+
+end
+
+
+
+
+function initialize(t0, xd, xc0, p)
+    (; n) = p
+    return 300*ones((n-2)*(n-2))
+end
+
+function runit(n, nt)
+
+    # problem constants
+    p = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(n, n), dT=zeros(n, n), n=n, δ=1/(n-1))
+
+    t = range(0.0, 5000, nt)
+    nt = length(t)
+
+    xc = reverse(range(600, 1000, n))*ones(nt)' # 1000*ones(n, nt)
+    xd = Float64[]
+
+
+    pf = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(n, n), dT=zeros(n, n), n=n, δ=1/(n-1))
+    function program(xc)
+        xcm = reshape(xc, n, nt)
+
+        TF = eltype(xc)
+        if eltype(pf.T) != TF
+            pf = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(TF, n, n), dT=zeros(TF, n, n), n=n, δ=1/(n-1))
+        end
+
+        y = ImplicitAD.odesolve(initialize, onestep!, t, xd, xcm, pf)
+
+        return y[1, end]  # top left corner temperature at last time
+    end
+
+    # ------ implicitAD ----------
+
+    TR = eltype(ReverseDiff.track([1.0]))
+    pr = (k=400.0, rho=8960.0, Cp=386.0, tz=.01, σ=5.670373e-8, hc=1.0, Ta=300.0, ϵ=0.5, T=zeros(TR, n, n), dT=zeros(TR, n, n), n=n, δ=1/(n-1))
+    cache = ImplicitAD.implicit_unsteady_cache(initialize, residual!, (n-2)*(n-2), 0, n, pr; compile=true)
+
+    Jdrdy = zeros((n-2)*(n-2), (n-2)*(n-2))
+    function drdy(residual, r, y, yprev, t, tprev, xd, xci, p)
+        dTdy(Jdrdy, y, t, tprev, p)
+        return Jdrdy
+    end
+
+    function modprogram(xc)
+        xcm = reshape(xc, n, nt)
+
+        y = implicit_unsteady(initialize, onestep!, residual!, t, xd, xcm, p; cache, drdy)
+
+        return y[1, end]
+    end
+
+    xcv = xc[:]
+
+    fwd_cache = ForwardDiff.GradientConfig(program, xcv)
+    f_tape1 = ReverseDiff.GradientTape(modprogram, xcv)
+    rev_cache1 = ReverseDiff.compile(f_tape1)
+    f_tape2 = ReverseDiff.GradientTape(program, xcv)
+    rev_cache2 = ReverseDiff.compile(f_tape2)
+
+    g1 = zeros(length(xcv))
+    g2 = zeros(length(xcv))
+    g3 = zeros(length(xcv))
+
+
+    # approximate cost of finite diff
+    program(xcv)
+    t1 = @benchmark $program($xcv)
+    time1 = median(t1).time * 1e-9 * (2*length(xcv) + 1)
+
+    # forward
+    t2 = @benchmark ForwardDiff.gradient!($g1, $program, $xcv, $fwd_cache)
+    time2 = median(t2).time * 1e-9
+
+    # reverse diff
+    t3 = @benchmark ReverseDiff.gradient!($g2, $rev_cache2, $xcv)
+    time3 = median(t3).time * 1e-9
+
+    # reverse w/ implicitad
+    t4 = @benchmark ReverseDiff.gradient!($g3, $rev_cache1, $xcv)
+    time4 = median(t4).time * 1e-9  # reverse implicit diff
+
+    println(time1, " ", time2, " ", time3, " ", time4)
+
+    return nothing
+    # return time1, time2, time3, time4
+
+end
+
+nt = 100
+nvec = [3, 5, 7, 9, 11, 13, 15, 17, 19]
+# nvec = [9]
+nn = length(nvec)
+t1 = zeros(nn)
+t2 = zeros(nn)
+t3 = zeros(nn)
+t4 = zeros(nn)
+states = zeros(nn)
+
+for i = 1:nn
+    n = nvec[i]
+    runit(n, nt)
+    states[i] = (n-2)^2
+end
diff --git a/examples/nonlinear.jl b/examples/nonlinear.jl
new file mode 100644
index 0000000..67a5bab
--- /dev/null
+++ b/examples/nonlinear.jl
@@ -0,0 +1,89 @@
+using ImplicitAD
+using ForwardDiff
+using ReverseDiff
+using BenchmarkTools
+using NLsolve
+using FiniteDiff
+using DiffResults
+
+function residual!(r, y, x, p=())
+    offset = 1
+
+    r[1] = -4*x[1]*y[1]*(y[2] - y[1]^2) - 2*(offset - y[1])
+    n = length(y)
+    for i = 2:n-1
+        r[i] = -4*x[i]*y[i]*(y[i+1] - y[i]^2) - 2*(offset - y[i]) + 2*x[i-1]*(y[i] - y[i-1]^2)
+    end
+    r[n] = 2*x[n]*(y[n] - y[n-1]^2)
+    return nothing
+end
+
+function solve(x, p=())
+    rwrap(r, y) = residual!(r, y, x, p)
+    res = nlsolve(rwrap, 2*ones(eltype(x), length(x)), autodiff=:forward)
+    return res.zero
+end
+
+function program!(y, x)
+    y .= solve(x)
+end
+
+function modprogram!(y, x)
+    y .= implicit(solve, residual!, x)
+end
+
+function runit()
+
+    nvec = [2, 4, 8, 16, 32, 64, 128]
+    nn = length(nvec)
+
+    time1 = zeros(nn)
+    time2 = zeros(nn)
+    time3 = zeros(nn)
+    time4 = zeros(nn)
+    time5 = zeros(nn)
+
+    for i = 1:length(nvec)
+        n = nvec[i]
+        x = 100*ones(n)
+
+        f = zeros(n)
+        J = DiffResults.JacobianResult(f, x)
+        Jdiff = zeros(length(f), length(x))
+
+        fwd_cache1 = ForwardDiff.JacobianConfig(program!, f, x)
+        fwd_cache2 = ForwardDiff.JacobianConfig(modprogram!, f, x)
+
+        f_tape1 = ReverseDiff.JacobianTape(program!, f, x)
+        rev_cache1 = ReverseDiff.compile(f_tape1)
+        f_tape2 = ReverseDiff.JacobianTape(modprogram!, f, x)
+        rev_cache2 = ReverseDiff.compile(f_tape2)
+
+        finite_cache = FiniteDiff.JacobianCache(x, Val{:central})
+
+        t1 = @benchmark ForwardDiff.jacobian!($J, $program!, $f, $x, $fwd_cache1)
+        t2 = @benchmark ForwardDiff.jacobian!($J, $modprogram!, $f, $x, $fwd_cache2)
+        t3 = @benchmark ReverseDiff.jacobian!($J, $rev_cache1, $x)
+        t4 = @benchmark ReverseDiff.jacobian!($J, $rev_cache2, $x)
+        t5 = @benchmark FiniteDiff.finite_difference_jacobian!($Jdiff, $program!, $x, $finite_cache)
+
+
+        time1[i] = median(t1).time * 1e-9
+        time2[i] = median(t2).time * 1e-9
+        time3[i] = median(t3).time * 1e-9
+        time4[i] = median(t4).time * 1e-9
+        time5[i] = median(t5).time * 1e-9
+
+        println(time1[i])
+        println(time2[i])
+        println(time3[i])
+        println(time4[i])
+        println(time5[i])
+        println()
+    end
+    return time1, time2, time3, time4, time5
+end
+
+
+time1, time2, time3, time4, time5 = runit()
+