Benchmark and Comparison with Matlab, Astra, torch-radon

Tested on a AMD Ryzen 9 5900X 12-Core Processor with 24 Threads and a NVIDIA GeForce RTX 3060 with Julia 1.10.0 on Ubuntu 22.04.

Results

	RadonKA.jl CPU	RadonKA.jl CUDA	Matlab CPU	Astra CPU	Astra CUDA	torch-radon CUDA
2D sample - Radon	1.1s	0.07s	0.39s	7.0s	0.025s	0.011s
2D sample - Backprojection	1.4s	0.50s	0.37s	6.4s	N/A	0.008s
3D sample - Radon	7.4s	0.28s	9.01s	N/A	1.12s	0.062s
3D sample - Backprojection	7.9s	0.53s	3.24s	N/A	N/A	0.063s

RadonKA.jl

 using IndexFunArrays, ImageShow, Plots, ImageIO, PlutoUI, PlutoTest, TestImages
 using RadonKA
 using CUDA, CUDA.CUDAKernels
 using BenchmarkTools
 
 sample_2D = Float32.(testimage("resolution_test_1920"));
 sample_2D_c = CuArray(sample_2D);
 simshow(sample_2D)
 
 angles = range(0, 2π, 500);
 angles_c = CuArray(angles);
 
 # run those cells multiple times
 sinogram = radon(sample_2D, angles);
 @btime sinogram = radon($sample_2D, $angles);
 simshow(sinogram)
 @btime sample_backproject = backproject($sinogram, $angles);
 
 @btime CUDA.@sync sinogram_c = radon($sample_2D_c, $angles_c);
 sinogram_c = radon(sample_2D_c, angles_c);
 @btime CUDA.@sync sample_backproject_c = backproject($sinogram_c, $angles_c);
 
 
 sample_3D = randn(Float32, (512, 512, 100));
 sample_3D_c = CuArray(sample_3D)
 
 sinogram_3D = radon(sample_3D, angles);
 @btime radon($sample_3D, $angles)
 @btime backproject($sinogram_3D, $angles)
 
 sinogram_3D_c = radon(sample_3D_c, angles_c)
 @btime CUDA.@sync radon($sample_3D_c, $angles_c)
 @btime CUDA.@sync backproject($sinogram_3D_c, $angles_c)

torch-radon

import torch
import torch_radon


volume = torch_radon.volumes.Volume2D()
angles = torch.tensor(np.linspace(0, 2*np.pi, 500), dtype=torch.float32, device="cuda")
sample = torch.rand(1,1920, 1920, device="cuda")
radon = torch_radon.ParallelBeam(volume=volume, angles=angles, det_spacing=1.0, det_count=1920)


%%timeit
radon.forward(sample)
torch.cuda.synchronize()

sinogram = radon.forward(sample)

%%timeit
radon.backward(sinogram)
torch.cuda.synchronize()



angles = torch.tensor(np.linspace(0, 2*np.pi, 500), dtype=torch.float32, device="cuda")
sample = torch.rand(100, 512, 512, device="cuda")
radon = torch_radon.ParallelBeam(volume=volume, angles=angles, det_spacing=1.0, det_count=512)


%%timeit
radon.forward(sample)
torch.cuda.synchronize()

sinogram = radon.forward(sample)

%%timeit
radon.backward(sinogram)
torch.cuda.synchronize()

Matlab (R2023a)

arr = single(imread("/tmp/sample.jpg")); 
%arr = rand(1920, 1920, "single");
theta = linspace(0, 360, 500);

tic;
R = radon(arr, theta);
toc;
R = R / max(R(:));
imwrite(R, "/tmp/matlab_sinogram.png");

tic; 
iR = backproject(R, theta, "linear", "none");
toc;
iR = iR / max(iR(:));
imwrite(iR, "/tmp/matlab_backproject.png");



x = (rand(100, 512, 512, 'single'));

theta = linspace(0, 360, 500);
y = (zeros(size(x, 1), size(radon(squeeze(x(1, :, :)), theta), 1), size(radon(squeeze(x(1, :, :)), theta), 2), 'single'));
ix = zeros(100, 514, 514, "single");

tic;
for i = 1:size(x, 1)
    y(i, :, :) = radon(squeeze(x(i, :, :)), theta);
end
toc;


tic;
for i = 1:size(y, 1)
    ix(i, :, :) = backproject(squeeze(y(i, :, :)), theta);
end
toc;

Astra

Some of the benchmarks did not run properly or were providing non-sense. Astra's docs are unfortunately slightly confusing...

import numpy as np
import matplotlib.pyplot as plt
import imageio.v3 as iio
import astra

im = np.array(iio.imread('/tmp/sample.png'), dtype=np.float32)

plt.imshow(im)


vol_geom = astra.create_vol_geom(1920, 1920)
angles =  np.linspace(0,2 * np.pi,500)

proj_geom = astra.create_proj_geom('parallel', 1.0, 1920, angles)
proj_id = astra.create_projector('line', proj_geom,vol_geom)


%%time
sinogram_id, sinogram = astra.create_sino(im, proj_id)
np.copy(sinogram);

plt.imshow(sinogram)

proj_geom = astra.create_proj_geom('parallel', 1.0, 1920, angles)
proj_id = astra.create_projector('cuda', proj_geom,vol_geom)

%%time
sinogram_id, sinogram = astra.create_sino(im, proj_id)
np.copy(sinogram);

rec_id = astra.data2d.create("-vol", vol_geom)
cfg = astra.astra_dict('BP')
cfg["ReconstructionDataId"] = rec_id
cfg["ProjectionDataId"] = sinogram_id
cfg["ProjectorId"] = proj_id
# Create the algorithm object from the configuration structure
alg_id = astra.algorithm.create(cfg)
# Run back-projection and get the reconstruction

%%time
astra.algorithm.run(alg_id)
f_rec = astra.data2d.get(rec_id)
#np.copy(f_rec)


im_3D = np.random.rand(100, 512, 512)

vol_geom = astra.create_vol_geom(512, 512, 100)

proj_geom = astra.create_proj_geom('parallel3d', 1.0, 1.0, 512, 512, angles)
#proj_id = astra.create_projector('line', proj_geom,vol_geom)

%%time
proj_id, proj_data = astra.create_sino3d_gpu(im_3D, proj_geom, vol_geom)
np.copy(proj_data)


rec_id = astra.data3d.create('-vol', vol_geom)

# Set up the parameters for a reconstruction algorithm using the CUDA
cfg = astra.astra_dict('BP3D_CUDA')
cfg['ReconstructionDataId'] = rec_id
cfg['ProjectionDataId'] = proj_id


# Create the algorithm object from the configuration structure
alg_id = astra.algorithm.create(cfg)

%time
astra.algorithm.run(alg_id, 1)
rec = astra.data3d.get(rec_id)
np.copy(rec)