Benchmarks

Here are some simple benchmarks. Take them with a grain of salt since they run on virtual machines in the cloud to generate the documentation automatically.

First-derivative operators

Periodic domains:

using BenchmarkTools
using LinearAlgebra, SparseArrays
using SummationByPartsOperators, DiffEqOperators

BLAS.set_num_threads(1) # make sure that BLAS is serial to be fair

T = Float64
xmin, xmax = T(0), T(1)

D_SBP = periodic_derivative_operator(derivative_order=1, accuracy_order=2,
                                     xmin=xmin, xmax=xmax, N=101)
x = grid(D_SBP)
D_DEO = CenteredDifference(derivative_order(D_SBP), accuracy_order(D_SBP),
                           step(x), length(x)) * PeriodicBC(eltype(D_SBP))

D_sparse = sparse(D_SBP)

u = randn(eltype(D_SBP), length(x)); du = similar(u);
@show D_SBP * u ≈ D_DEO * u ≈ D_sparse * u

function doit(D, text, du, u)
  println(text)
  sleep(0.1)
  show(stdout, MIME"text/plain"(), @benchmark mul!($du, $D, $u))
  println()
end

doit(D_SBP, "D_SBP:", du, u)
doit(D_DEO, "D_DEO:", du, u)
doit(D_sparse, "D_sparse:", du, u)
D_SBP * u ≈ D_DEO * u ≈ D_sparse * u = true
D_SBP:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     34.139 ns (0.00% GC)
  median time:      46.325 ns (0.00% GC)
  mean time:        44.165 ns (0.00% GC)
  maximum time:     98.188 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     993
D_DEO:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     269.167 ns (0.00% GC)
  median time:      360.004 ns (0.00% GC)
  mean time:        365.347 ns (0.00% GC)
  maximum time:     619.175 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     240
D_sparse:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     298.837 ns (0.00% GC)
  median time:      398.841 ns (0.00% GC)
  mean time:        386.219 ns (0.00% GC)
  maximum time:     593.031 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     258

General domains:

using BenchmarkTools
using LinearAlgebra, SparseArrays
using SummationByPartsOperators, BandedMatrices

BLAS.set_num_threads(1) # make sure that BLAS is serial to be fair

T = Float64
xmin, xmax = T(0), T(1)

D_SBP = derivative_operator(MattssonNordström2004(), derivative_order=1,
                            accuracy_order=6, xmin=xmin, xmax=xmax, N=10^3)
D_sparse = sparse(D_SBP)
D_banded = BandedMatrix(D_SBP)

u = randn(eltype(D_SBP), size(D_SBP, 1)); du = similar(u);
@show D_SBP * u ≈ D_sparse * u ≈ D_banded * u

function doit(D, text, du, u)
  println(text)
  sleep(0.1)
  show(stdout, MIME"text/plain"(), @benchmark mul!($du, $D, $u))
  println()
end

doit(D_SBP, "D_SBP:", du, u)
doit(D_sparse, "D_sparse:", du, u)
doit(D_banded, "D_banded:", du, u)
D_SBP * u ≈ D_sparse * u ≈ D_banded * u = true
D_SBP:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     663.930 ns (0.00% GC)
  median time:      775.956 ns (0.00% GC)
  mean time:        817.637 ns (0.00% GC)
  maximum time:     1.235 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     158
D_sparse:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     5.300 μs (0.00% GC)
  median time:      7.100 μs (0.00% GC)
  mean time:        6.845 μs (0.00% GC)
  maximum time:     13.300 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     6
D_banded:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     11.000 μs (0.00% GC)
  median time:      14.900 μs (0.00% GC)
  mean time:        14.448 μs (0.00% GC)
  maximum time:     61.501 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

Dissipation operators

using BenchmarkTools
using LinearAlgebra, SparseArrays
using SummationByPartsOperators, BandedMatrices

BLAS.set_num_threads(1) # make sure that BLAS is serial to be fair

T = Float64
xmin, xmax = T(0), T(1)

D_SBP = derivative_operator(MattssonNordström2004(), derivative_order=1,
                            accuracy_order=6, xmin=xmin, xmax=xmax, N=10^3)
Di_SBP  = dissipation_operator(MattssonSvärdNordström2004(), D_SBP)
Di_sparse = sparse(Di_SBP)
Di_banded = BandedMatrix(Di_SBP)
Di_full   = Matrix(Di_SBP)

u = randn(eltype(D_SBP), size(D_SBP, 1)); du = similar(u);
@show Di_SBP * u ≈ Di_sparse * u ≈ Di_banded * u ≈ Di_full * u

function doit(D, text, du, u)
  println(text)
  sleep(0.1)
  show(stdout, MIME"text/plain"(), @benchmark mul!($du, $D, $u))
  println()
end

doit(D_SBP, "D_SBP:", du, u)
doit(Di_SBP, "Di_SBP:", du, u)
doit(Di_sparse, "Di_sparse:", du, u)
doit(Di_banded, "Di_banded:", du, u)
doit(Di_full, "Di_full:", du, u)
Di_SBP * u ≈ Di_sparse * u ≈ Di_banded * u ≈ Di_full * u = true
D_SBP:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     753.790 ns (0.00% GC)
  median time:      1.007 μs (0.00% GC)
  mean time:        965.844 ns (0.00% GC)
  maximum time:     1.520 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     119
Di_SBP:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     1.300 μs (0.00% GC)
  median time:      1.750 μs (0.00% GC)
  mean time:        1.711 μs (0.00% GC)
  maximum time:     5.070 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     10
Di_sparse:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     5.783 μs (0.00% GC)
  median time:      6.767 μs (0.00% GC)
  mean time:        7.152 μs (0.00% GC)
  maximum time:     14.133 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     6
Di_banded:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     8.700 μs (0.00% GC)
  median time:      11.834 μs (0.00% GC)
  mean time:        11.697 μs (0.00% GC)
  maximum time:     27.167 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     3
Di_full:
BenchmarkTools.Trial:
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     321.406 μs (0.00% GC)
  median time:      342.907 μs (0.00% GC)
  mean time:        346.281 μs (0.00% GC)
  maximum time:     671.013 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1