In [1]:
using MetagraphOptimization
using BenchmarkTools

Threads.nthreads()

12

In [6]:
machine = get_machine_info()
model = QEDModel()
process = parse_process("ke->kkkkke", model)

inputs = [gen_process_input(process) for _ in 1:1e3];
graph = gen_graph(process)

Graph:
  Nodes: Total: 15866, DataTask: 7937, ComputeTaskQED_S2: 720, 
         ComputeTaskQED_Sum: 1, ComputeTaskQED_V: 4320, ComputeTaskQED_S1: 2880, 
         ComputeTaskQED_U: 8
  Edges: 21617
  Total Compute Effort: 66249.0
  Total Data Transfer: 1.314048e6
  Total Compute Intensity: 0.050415966540035065


In [7]:
optimizer = ReductionOptimizer()

compute_compton = get_compute_function(graph, process, machine)
optimize_to_fixpoint!(optimizer, graph)
graph

Graph:
  Nodes: Total: 2234, DataTask: 1121, ComputeTaskQED_S2: 720, 
         ComputeTaskQED_Sum: 1, ComputeTaskQED_V: 312, ComputeTaskQED_S1: 72, 
         ComputeTaskQED_U: 8
  Edges: 3977
  Total Compute Effort: 11313.0
  Total Data Transfer: 659712.0
  Total Compute Intensity: 0.017148392025611175


In [8]:
compute_compton_reduced = get_compute_function(graph, process, machine)
outputs = [zero(ComplexF64) for _ in 1:1e6]

bench_result = @benchmark begin
    Threads.@threads :static for i in eachindex(inputs)
        outputs[i] = compute_compton_reduced(inputs[i])
    end
end

rate = length(inputs) / (mean(bench_result.times) / 1.0e9)
rate_per_thread = rate / Threads.nthreads()
println("Calculated $(round(rate)) results/s, $(round(rate_per_thread)) results/s per thread for $(process) ($(Threads.nthreads()) threads)")

Calculated 133942.0 results/s, 11162.0 results/s per thread for QED Process: 'ke->kkkkke' (12 threads)


In [9]:
bench_result = @benchmark begin
    Threads.@threads :static for i in eachindex(inputs)
        outputs[i] = compute_compton(inputs[i])
    end
end

rate = length(inputs) / (mean(bench_result.times) / 1.0e9)
rate_per_thread = rate / Threads.nthreads()
println("Calculated $(round(rate)) results/s, $(round(rate_per_thread)) results/s per thread for $(process) ($(Threads.nthreads()) threads)")

Calculated 17124.0 results/s, 1427.0 results/s per thread for QED Process: 'ke->kkkkke' (12 threads)
