using Lux, Reactant, Random
rng = Random.default_rng()
Random.seed!(rng, 0)
dev = reactant_device()
model = Chain(
Dense(2 => 32, gelu),
Dense(32 => 32, gelu),
Dense(32 => 2)
)
ps, st = Lux.setup(rng, model)
for i in [2^j for j in 2:8]
GC.gc()
x = randn(rng, Float32, 2, i)
x_ra = x |> dev
ps_ra = ps |> dev
st_ra = st |> dev
pred_lux, _ = model(x, ps, Lux.testmode(st))
model_compiled = @compile model(x_ra, ps_ra, Lux.testmode(st_ra))
pred_compiled, _ = model_compiled(x_ra, ps_ra, Lux.testmode(st_ra))
println("second dim's number: $i, result difference between CPU and GPU: ", sum(pred_lux .- Array(pred_compiled)))
end
Julia Version 1.12.6
Commit 15346901f00 (2026-04-09 19:20 UTC)
Build Info:
Official https://julialang.org release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 6 × AMD Ryzen 5 3500X 6-Core Processor
WORD_SIZE: 64
LLVM: libLLVM-18.1.7 (ORCJIT, znver2)
GC: Built with stock GC
Threads: 4 default, 1 interactive, 4 GC (on 6 virtual cores)
Environment:
JULIA_NUM_THREADS = 4
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1776236078.926649 151598 service.cc:154] XLA service 0x3793d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1776236078.926701 151598 service.cc:170] StreamExecutor [0]: NVIDIA GeForce GTX 1650, Compute Capability 7.5 (Driver: 12.2.0[535.129.3]; Runtime: 12.9.0; Toolkit: 12.9.0; DNN: 9.14.0)
I0000 00:00:1776236078.927121 151598 se_gpu_pjrt_client.cc:1540] Using BFC allocator.
I0000 00:00:1776236078.927173 151598 gpu_helpers.cc:141] XLA backend allocating 3067871232 bytes on device 0 for BFCAllocator.
I0000 00:00:1776236078.927226 151598 gpu_helpers.cc:183] XLA backend will use up to 1022623744 bytes on device 0 for CollectiveBFCAllocator.
I0000 00:00:1776236078.930612 151598 cuda_dnn.cc:461] Loaded cuDNN version 91400
second dim's number: 4, result difference between CPU and GPU: -5.662441e-7
second dim's number: 8, result difference between CPU and GPU: -7.450581e-9
second dim's number: 16, result difference between CPU and GPU: -8.940697e-8
second dim's number: 32, result difference between CPU and GPU: 3.2893305 # Inaccurate result
second dim's number: 64, result difference between CPU and GPU: 7.1525574e-7
second dim's number: 128, result difference between CPU and GPU: -6.891787e-7
second dim's number: 256, result difference between CPU and GPU: -1.4603138e-6
Program code:
Result: