2023-12-18 14:31:52 +01:00
|
|
|
using MetagraphOptimization
|
|
|
|
using LIKWID
|
|
|
|
using CUDA
|
|
|
|
using UUIDs
|
2024-02-02 06:56:16 +01:00
|
|
|
using DataFrames
|
|
|
|
using CSV
|
|
|
|
using Random
|
2024-02-13 15:54:09 +01:00
|
|
|
using BenchmarkTools
|
|
|
|
using Dates
|
2024-02-02 06:56:16 +01:00
|
|
|
|
2024-02-06 09:35:04 +01:00
|
|
|
DISABLE_GPU = true
|
2024-02-02 06:56:16 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
function log(x...)
|
|
|
|
println(now(), " ", join(x, " ")...)
|
|
|
|
return flush(stdout)
|
|
|
|
end
|
|
|
|
|
|
|
|
results_filename = "bench_results_$(Threads.nthreads()).csv"
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
df = DataFrame(
|
|
|
|
process_name = String[],
|
|
|
|
graph_gen_time = Float64[],
|
|
|
|
optimization_time = Float64[],
|
|
|
|
function_generation_time = Float64[],
|
|
|
|
graph_nodes = Int[],
|
|
|
|
graph_edges = Int[],
|
|
|
|
graph_mem = Float64[],
|
|
|
|
cpu_threads = Int[],
|
|
|
|
n_inputs = Int[],
|
|
|
|
nflops_likwid = Int[],
|
|
|
|
cpu_time = Float64[],
|
2024-02-13 15:54:09 +01:00
|
|
|
cpu_std = Float64[],
|
2024-02-02 06:56:16 +01:00
|
|
|
cpu_rate = Float64[],
|
|
|
|
cpu_gflops = Float64[],
|
|
|
|
gpu_name = String[],
|
|
|
|
gpu_time = Float64[],
|
2024-02-13 15:54:09 +01:00
|
|
|
gpu_std = Float64[],
|
2024-02-02 06:56:16 +01:00
|
|
|
gpu_rate = Float64[],
|
|
|
|
gpu_gflops = Float64[],
|
|
|
|
)
|
|
|
|
|
|
|
|
# if they exist, read existing results and append new ones
|
|
|
|
if isfile(results_filename)
|
|
|
|
df = CSV.read(results_filename, DataFrame)
|
|
|
|
end
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
nInputs = 1_000
|
2023-12-18 14:31:52 +01:00
|
|
|
|
|
|
|
function cpu_bench(compute_function, inputs)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench = @benchmark begin
|
|
|
|
@inbounds Threads.@threads for i in eachindex($inputs)
|
|
|
|
@invokelatest $compute_function($inputs[i])
|
|
|
|
end
|
|
|
|
end gcsample = true seconds = 300
|
|
|
|
|
|
|
|
time = mean(bench.times) / 1e9
|
|
|
|
s = std(bench.times) / 1e9
|
2023-12-18 14:31:52 +01:00
|
|
|
rate = length(inputs) / time
|
2024-02-13 15:54:09 +01:00
|
|
|
|
|
|
|
return (time, rate, s)
|
2023-12-18 14:31:52 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
function gpu_bench(compute_function, inputs)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench = @benchmark begin
|
|
|
|
CUDA.@sync $compute_function.($inputs)
|
|
|
|
end gcsample = true seconds = 300
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
time = mean(bench.times) / 1e9
|
|
|
|
s = std(bench.times) / 1e9
|
2023-12-18 14:31:52 +01:00
|
|
|
rate = length(inputs) / time
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
return (time, rate, s)
|
2023-12-18 14:31:52 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
function bench_process(
|
|
|
|
process::MetagraphOptimization.AbstractProcessDescription,
|
2024-02-02 06:56:16 +01:00
|
|
|
process_name::String,
|
|
|
|
graph::DAG,
|
2023-12-18 14:31:52 +01:00
|
|
|
func,
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time::Float64,
|
|
|
|
opt_time::Float64,
|
2024-02-13 15:54:09 +01:00
|
|
|
func_time::Float64;
|
2023-12-18 14:31:52 +01:00
|
|
|
use_likwid = true,
|
2024-02-02 06:56:16 +01:00
|
|
|
use_gpu = true,
|
2023-12-18 14:31:52 +01:00
|
|
|
)
|
2024-02-13 15:54:09 +01:00
|
|
|
log("\n--- Benchmarking $(process_name) ---")
|
2024-02-02 06:56:16 +01:00
|
|
|
if DISABLE_GPU
|
|
|
|
use_gpu = false
|
|
|
|
end
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
graph_props = GraphProperties(graph)
|
|
|
|
NFLOPs = graph_props.computeEffort
|
|
|
|
nflops_likwid = 0
|
2023-12-18 14:31:52 +01:00
|
|
|
if use_likwid
|
|
|
|
input = gen_process_input(process)
|
|
|
|
func(input) # compile first
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
# get rid of annoying output to console
|
|
|
|
oldstd = stdout
|
|
|
|
redirect_stdout(devnull)
|
2023-12-18 14:31:52 +01:00
|
|
|
_, events = @perfmon "FLOPS_DP" func(input)
|
2024-02-02 06:56:16 +01:00
|
|
|
redirect_stdout(oldstd) # recover original stdout
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
|
2024-02-02 06:56:16 +01:00
|
|
|
nflops_likwid = NFLOPs
|
2023-12-18 14:31:52 +01:00
|
|
|
end
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
2023-12-18 14:31:52 +01:00
|
|
|
|
|
|
|
inputs = Vector{typeof(gen_process_input(process))}()
|
|
|
|
resize!(inputs, nInputs)
|
|
|
|
processes = Vector{typeof(process)}()
|
|
|
|
for i in 1:Threads.nthreads()
|
|
|
|
push!(processes, copy(process))
|
|
|
|
end
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
@inbounds Threads.@threads for i in eachindex(inputs)
|
2023-12-18 14:31:52 +01:00
|
|
|
inputs[i] = gen_process_input(processes[Threads.nthreads()])
|
|
|
|
end
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Benchmarking CPU with $(Threads.nthreads()) threads...")
|
|
|
|
(time_cpu, rate_cpu, std_cpu) = cpu_bench(func, inputs)
|
2024-02-02 06:56:16 +01:00
|
|
|
flops_cpu = (rate_cpu * NFLOPs) / 10^9
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
time_gpu = 0.0
|
2024-02-13 15:54:09 +01:00
|
|
|
std_gpu = 0.0
|
2024-02-02 06:56:16 +01:00
|
|
|
rate_gpu = 0.0
|
|
|
|
flops_gpu = 0.0
|
|
|
|
gpu_name = "none"
|
|
|
|
if use_gpu
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Benchmarking GPU...")
|
2024-02-02 06:56:16 +01:00
|
|
|
gpu_name = "$(name(first(CUDA.devices())))"
|
|
|
|
cuInputs = CuArray(inputs)
|
2024-02-13 15:54:09 +01:00
|
|
|
(time_gpu, rate_gpu, std_gpu) = gpu_bench(func, cuInputs)
|
2024-02-02 06:56:16 +01:00
|
|
|
flops_gpu = (rate_gpu * NFLOPs) / 10^9
|
|
|
|
else
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Skipping GPU...")
|
2024-02-02 06:56:16 +01:00
|
|
|
end
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
log("\nBenchmark Summary for $(process):")
|
2023-12-18 14:31:52 +01:00
|
|
|
|
|
|
|
if use_likwid
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Measured FLOPS by LIKWID: $NFLOPs")
|
2023-12-18 14:31:52 +01:00
|
|
|
else
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Total graph compute effort: $NFLOPs")
|
2023-12-18 14:31:52 +01:00
|
|
|
end
|
2024-02-13 15:54:09 +01:00
|
|
|
log("Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
|
|
|
|
log("CPU, $(Threads.nthreads()) threads")
|
|
|
|
log(" Time: $time_cpu")
|
|
|
|
log(" Rate: $rate_cpu")
|
|
|
|
log(" GFLOPS: $flops_cpu")
|
2024-02-02 06:56:16 +01:00
|
|
|
if use_gpu
|
2024-02-13 15:54:09 +01:00
|
|
|
log("GPU, $gpu_name")
|
|
|
|
log(" Time: $time_gpu")
|
|
|
|
log(" Rate: $rate_gpu")
|
|
|
|
log(" GFLOPS: $flops_gpu")
|
2024-02-02 06:56:16 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
if (process_name != "warmup")
|
|
|
|
push!(
|
|
|
|
df,
|
|
|
|
Dict(
|
|
|
|
:process_name => process_name,
|
|
|
|
:graph_gen_time => gen_time,
|
|
|
|
:optimization_time => opt_time,
|
|
|
|
:function_generation_time => func_time,
|
|
|
|
:graph_nodes => graph_props.noNodes,
|
|
|
|
:graph_edges => graph_props.noEdges,
|
|
|
|
:graph_mem => MetagraphOptimization.mem(graph),
|
|
|
|
:cpu_threads => Threads.nthreads(),
|
|
|
|
:n_inputs => nInputs,
|
|
|
|
:nflops_likwid => nflops_likwid,
|
|
|
|
:cpu_time => time_cpu,
|
2024-02-13 15:54:09 +01:00
|
|
|
:cpu_std => std_cpu,
|
2024-02-02 06:56:16 +01:00
|
|
|
:cpu_rate => rate_cpu,
|
|
|
|
:cpu_gflops => flops_cpu,
|
|
|
|
:gpu_name => gpu_name,
|
|
|
|
:gpu_time => time_gpu,
|
2024-02-13 15:54:09 +01:00
|
|
|
:gpu_std => std_gpu,
|
2024-02-02 06:56:16 +01:00
|
|
|
:gpu_rate => rate_gpu,
|
|
|
|
:gpu_gflops => flops_gpu,
|
|
|
|
),
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
return nothing
|
2023-12-18 14:31:52 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
# use "mock" machine that only uses cpu
|
|
|
|
machine = Machine(
|
|
|
|
[
|
|
|
|
MetagraphOptimization.NumaNode(
|
|
|
|
0,
|
|
|
|
1,
|
|
|
|
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
|
|
|
-1.0,
|
|
|
|
UUIDs.uuid1(),
|
|
|
|
),
|
|
|
|
],
|
|
|
|
[-1.0;;],
|
|
|
|
)
|
|
|
|
|
|
|
|
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
|
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
## -- WARMUP TO COMPILE FUNCTIONS first
|
|
|
|
|
|
|
|
optimizer = RandomWalkOptimizer(MersenneTwister(0))
|
|
|
|
|
|
|
|
# 2-photon compton
|
|
|
|
process = parse_process("ke->kke", QEDModel())
|
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
opt_time = @elapsed optimize!(optimizer, graph, 200)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
|
|
|
|
|
|
|
|
optimizer = ReductionOptimizer()
|
|
|
|
|
|
|
|
# AB->AB^3
|
|
|
|
process = parse_process("AB->ABBB", ABCModel())
|
|
|
|
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
|
|
|
|
|
|
|
|
## -- WARMUP END
|
|
|
|
|
|
|
|
optimizer = ReductionOptimizer()
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
# compton
|
|
|
|
process = parse_process("ke->ke", QEDModel())
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
# 2-photon compton
|
|
|
|
process = parse_process("ke->kke", QEDModel())
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
# 3-photon compton
|
|
|
|
process = parse_process("ke->kkke", QEDModel())
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
# 4-photon compton
|
|
|
|
process = parse_process("ke->kkkke", QEDModel())
|
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
# 5-photon compton
|
|
|
|
process = parse_process("ke->kkkkke", QEDModel())
|
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
# 6-photon compton
|
|
|
|
process = parse_process("ke->kkkkkke", QEDModel())
|
|
|
|
gen_time = @elapsed graph = gen_graph(process)
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
# AB->AB
|
|
|
|
process = parse_process("AB->AB", ABCModel())
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time = @elapsed graph = parse_dag("input/AB->AB.txt", ABCModel())
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
# AB->AB^3
|
|
|
|
process = parse_process("AB->ABBB", ABCModel())
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-02 06:56:16 +01:00
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
2023-12-18 14:31:52 +01:00
|
|
|
|
2024-02-13 15:54:09 +01:00
|
|
|
CSV.write(results_filename, df)
|
|
|
|
|
2023-12-18 14:31:52 +01:00
|
|
|
# AB->AB^5
|
|
|
|
process = parse_process("AB->ABBBBB", ABCModel())
|
2024-02-02 06:56:16 +01:00
|
|
|
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
2024-02-13 15:54:09 +01:00
|
|
|
bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
2024-02-02 06:56:16 +01:00
|
|
|
|
|
|
|
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
|
|
|
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
|
|
|
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
|
|
|
|
|
|
|
CSV.write(results_filename, df)
|