Add reduction benchmarks
This commit is contained in:
@ -5,5 +5,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
|
||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
|
||||
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
|
||||
|
115
examples/qed_bench_reduction_steps.jl
Normal file
115
examples/qed_bench_reduction_steps.jl
Normal file
@ -0,0 +1,115 @@
|
||||
using MetagraphOptimization
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using BenchmarkTools
|
||||
using DataFrames
|
||||
using CSV
|
||||
|
||||
results_filename = "bench_results_reduction_steps.csv"
|
||||
|
||||
df = DataFrame(
|
||||
threads = Int[],
|
||||
process = String[],
|
||||
operations = Int[],
|
||||
cumulative_optimization_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_ce = Float64[],
|
||||
graph_dt = Float64[],
|
||||
graph_ci = Float64[],
|
||||
gen_func_t = Float64[],
|
||||
cpu_compile_t = Float64[],
|
||||
cpu_st_t = Float64[],
|
||||
cpu_mt_t = Float64[],
|
||||
gpu_compile_t = Float64[],
|
||||
gpu_t = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
function bench(func, inputs)
|
||||
compile_time = @elapsed func(inputs[1])
|
||||
|
||||
single_thread = @benchmark $func.($inputs)
|
||||
multi_threaded = @benchmark Threads.@threads for i in eachindex($inputs)
|
||||
$func($inputs[i])
|
||||
end
|
||||
|
||||
return (
|
||||
cpu_compile_time = compile_time,
|
||||
gpu_compile_time = 0.0,
|
||||
cpu_single_thread_time = mean(single_thread.times) / 1e9,
|
||||
cpu_multi_thread_time = mean(multi_threaded.times) / 1e9,
|
||||
gpu_time = 0.0,
|
||||
)
|
||||
end
|
||||
|
||||
|
||||
# bench and produce data
|
||||
n_inputs = 10_000
|
||||
optimizer = ReductionOptimizer()
|
||||
processes = [("ke->kke", 50), ("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 5), ("ke->kkkke", 5), ("ke->kkkkke", 10), ("ke->kkkkkke", 20)]
|
||||
|
||||
for (process_str, STEPSIZE) in processes
|
||||
n = 0
|
||||
opt_time_cum = 0
|
||||
|
||||
# preparation of graph
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
process = parse_process(process_str, QEDModel())
|
||||
graph = gen_graph(process)
|
||||
inputs = [gen_process_input(process) for _ in 1:n_inputs]
|
||||
|
||||
get_compute_function(graph, process, machine)
|
||||
|
||||
while true
|
||||
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
|
||||
res = bench(func, inputs)
|
||||
|
||||
graph_properties = get_properties(graph)
|
||||
push!(
|
||||
df,
|
||||
(
|
||||
Threads.nthreads(),
|
||||
process_str,
|
||||
n,
|
||||
opt_time_cum,
|
||||
graph_properties.noNodes,
|
||||
graph_properties.noEdges,
|
||||
graph_properties.computeEffort,
|
||||
graph_properties.data,
|
||||
graph_properties.computeIntensity,
|
||||
func_gen_time,
|
||||
res.cpu_compile_time,
|
||||
res.cpu_single_thread_time,
|
||||
res.cpu_multi_thread_time,
|
||||
res.gpu_compile_time,
|
||||
res.gpu_time,
|
||||
),
|
||||
)
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
if fixpoint_reached(optimizer, graph)
|
||||
break
|
||||
end
|
||||
|
||||
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
|
||||
n += STEPSIZE
|
||||
end
|
||||
end
|
||||
|
||||
CSV.write(results_filename, df)
|
118
examples/qed_bench_reduction_steps_gpu.jl
Normal file
118
examples/qed_bench_reduction_steps_gpu.jl
Normal file
@ -0,0 +1,118 @@
|
||||
using MetagraphOptimization
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using BenchmarkTools
|
||||
using DataFrames
|
||||
using CSV
|
||||
|
||||
results_filename = "bench_results_reduction_steps_gpu.csv"
|
||||
|
||||
df = DataFrame(
|
||||
threads = Int[],
|
||||
process = String[],
|
||||
operations = Int[],
|
||||
cumulative_optimization_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_ce = Float64[],
|
||||
graph_dt = Float64[],
|
||||
graph_ci = Float64[],
|
||||
gen_func_t = Float64[],
|
||||
cpu_compile_t = Float64[],
|
||||
cpu_st_t = Float64[],
|
||||
cpu_mt_t = Float64[],
|
||||
gpu_compile_t = Float64[],
|
||||
gpu_t = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
function log(x...)
|
||||
println(now(), " ", join(x, " ")...)
|
||||
return flush(stdout)
|
||||
end
|
||||
|
||||
function bench(func, inputs)
|
||||
gpu_compile_time = @elapsed func.(inputs[1:2])
|
||||
|
||||
gpu_time = @benchmark $func.($inputs)
|
||||
|
||||
return (
|
||||
cpu_compile_time = 0.0,
|
||||
gpu_compile_time = gpu_compile_time,
|
||||
cpu_single_thread_time = 0.0,
|
||||
cpu_multi_thread_time = 0.0,
|
||||
gpu_time = mean(gpu_time.times) / 1e9,
|
||||
)
|
||||
end
|
||||
|
||||
log("CUDA devices: $(CUDA.devices())")
|
||||
|
||||
# bench and produce data
|
||||
n_inputs = 10_000
|
||||
optimizer = ReductionOptimizer()
|
||||
processes = [("ke->kke", 50), ("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 1), ("ke->kkkke", 5)]
|
||||
|
||||
for (process_str, STEPSIZE) in processes
|
||||
n = 0
|
||||
opt_time_cum = 0
|
||||
|
||||
# preparation of graph
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
process = parse_process(process_str, QEDModel())
|
||||
graph = gen_graph(process)
|
||||
inputs = CuVector([gen_process_input(process) for _ in 1:n_inputs])
|
||||
|
||||
get_compute_function(graph, process, machine)
|
||||
|
||||
while true
|
||||
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
|
||||
res = bench(func, inputs)
|
||||
|
||||
graph_properties = get_properties(graph)
|
||||
push!(
|
||||
df,
|
||||
(
|
||||
Threads.nthreads(),
|
||||
process_str,
|
||||
n,
|
||||
opt_time_cum,
|
||||
graph_properties.noNodes,
|
||||
graph_properties.noEdges,
|
||||
graph_properties.computeEffort,
|
||||
graph_properties.data,
|
||||
graph_properties.computeIntensity,
|
||||
func_gen_time,
|
||||
res.cpu_compile_time,
|
||||
res.cpu_single_thread_time,
|
||||
res.cpu_multi_thread_time,
|
||||
res.gpu_compile_time,
|
||||
res.gpu_time,
|
||||
),
|
||||
)
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
if fixpoint_reached(optimizer, graph)
|
||||
break
|
||||
end
|
||||
|
||||
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
|
||||
n += STEPSIZE
|
||||
end
|
||||
end
|
||||
|
||||
CSV.write(results_filename, df)
|
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user