Add reduction benchmarks

This commit is contained in:
2024-02-20 21:18:19 +01:00
parent fce9110e2a
commit 17c2df800c
338 changed files with 15341 additions and 385 deletions

View File

@ -5,5 +5,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"

View File

@ -0,0 +1,115 @@
using MetagraphOptimization
using CUDA
using UUIDs
using BenchmarkTools
using DataFrames
using CSV
results_filename = "bench_results_reduction_steps.csv"
df = DataFrame(
threads = Int[],
process = String[],
operations = Int[],
cumulative_optimization_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_ce = Float64[],
graph_dt = Float64[],
graph_ci = Float64[],
gen_func_t = Float64[],
cpu_compile_t = Float64[],
cpu_st_t = Float64[],
cpu_mt_t = Float64[],
gpu_compile_t = Float64[],
gpu_t = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
function bench(func, inputs)
compile_time = @elapsed func(inputs[1])
single_thread = @benchmark $func.($inputs)
multi_threaded = @benchmark Threads.@threads for i in eachindex($inputs)
$func($inputs[i])
end
return (
cpu_compile_time = compile_time,
gpu_compile_time = 0.0,
cpu_single_thread_time = mean(single_thread.times) / 1e9,
cpu_multi_thread_time = mean(multi_threaded.times) / 1e9,
gpu_time = 0.0,
)
end
# bench and produce data
n_inputs = 10_000
optimizer = ReductionOptimizer()
processes = [("ke->kke", 50), ("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 5), ("ke->kkkke", 5), ("ke->kkkkke", 10), ("ke->kkkkkke", 20)]
for (process_str, STEPSIZE) in processes
n = 0
opt_time_cum = 0
# preparation of graph
machine = Machine(
[
MetagraphOptimization.NumaNode(
0,
1,
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
)
process = parse_process(process_str, QEDModel())
graph = gen_graph(process)
inputs = [gen_process_input(process) for _ in 1:n_inputs]
get_compute_function(graph, process, machine)
while true
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
res = bench(func, inputs)
graph_properties = get_properties(graph)
push!(
df,
(
Threads.nthreads(),
process_str,
n,
opt_time_cum,
graph_properties.noNodes,
graph_properties.noEdges,
graph_properties.computeEffort,
graph_properties.data,
graph_properties.computeIntensity,
func_gen_time,
res.cpu_compile_time,
res.cpu_single_thread_time,
res.cpu_multi_thread_time,
res.gpu_compile_time,
res.gpu_time,
),
)
CSV.write(results_filename, df)
if fixpoint_reached(optimizer, graph)
break
end
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
n += STEPSIZE
end
end
CSV.write(results_filename, df)

View File

@ -0,0 +1,118 @@
using MetagraphOptimization
using CUDA
using UUIDs
using BenchmarkTools
using DataFrames
using CSV
results_filename = "bench_results_reduction_steps_gpu.csv"
df = DataFrame(
threads = Int[],
process = String[],
operations = Int[],
cumulative_optimization_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_ce = Float64[],
graph_dt = Float64[],
graph_ci = Float64[],
gen_func_t = Float64[],
cpu_compile_t = Float64[],
cpu_st_t = Float64[],
cpu_mt_t = Float64[],
gpu_compile_t = Float64[],
gpu_t = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
function log(x...)
println(now(), " ", join(x, " ")...)
return flush(stdout)
end
function bench(func, inputs)
gpu_compile_time = @elapsed func.(inputs[1:2])
gpu_time = @benchmark $func.($inputs)
return (
cpu_compile_time = 0.0,
gpu_compile_time = gpu_compile_time,
cpu_single_thread_time = 0.0,
cpu_multi_thread_time = 0.0,
gpu_time = mean(gpu_time.times) / 1e9,
)
end
log("CUDA devices: $(CUDA.devices())")
# bench and produce data
n_inputs = 10_000
optimizer = ReductionOptimizer()
processes = [("ke->kke", 50), ("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 1), ("ke->kkkke", 5)]
for (process_str, STEPSIZE) in processes
n = 0
opt_time_cum = 0
# preparation of graph
machine = Machine(
[
MetagraphOptimization.NumaNode(
0,
1,
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
)
process = parse_process(process_str, QEDModel())
graph = gen_graph(process)
inputs = CuVector([gen_process_input(process) for _ in 1:n_inputs])
get_compute_function(graph, process, machine)
while true
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
res = bench(func, inputs)
graph_properties = get_properties(graph)
push!(
df,
(
Threads.nthreads(),
process_str,
n,
opt_time_cum,
graph_properties.noNodes,
graph_properties.noEdges,
graph_properties.computeEffort,
graph_properties.data,
graph_properties.computeIntensity,
func_gen_time,
res.cpu_compile_time,
res.cpu_single_thread_time,
res.cpu_multi_thread_time,
res.gpu_compile_time,
res.gpu_time,
),
)
CSV.write(results_filename, df)
if fixpoint_reached(optimizer, graph)
break
end
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
n += STEPSIZE
end
end
CSV.write(results_filename, df)

File diff suppressed because one or more lines are too long