Add qed bench run script and singularity container
This commit is contained in:
@ -34,9 +34,10 @@ function import_bench()
|
||||
bench_txt("AB->ABBB.txt")
|
||||
bench_txt("AB->ABBBBB.txt")
|
||||
bench_txt("AB->ABBBBBBB.txt")
|
||||
#bench_txt("AB->ABBBBBBBBB.txt")
|
||||
bench_txt("AB->ABBBBBBBBB.txt")
|
||||
bench_txt("ABAB->ABAB.txt")
|
||||
return bench_txt("ABAB->ABC.txt")
|
||||
bench_txt("ABAB->ABC.txt")
|
||||
return nothing
|
||||
end
|
||||
|
||||
import_bench()
|
||||
|
@ -2,6 +2,40 @@ using MetagraphOptimization
|
||||
using LIKWID
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using DataFrames
|
||||
using CSV
|
||||
using Random
|
||||
|
||||
DISABLE_GPU = false
|
||||
|
||||
results_filename = "results.csv"
|
||||
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
graph_gen_time = Float64[],
|
||||
optimization_time = Float64[],
|
||||
function_generation_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_mem = Float64[],
|
||||
cpu_threads = Int[],
|
||||
n_inputs = Int[],
|
||||
nflops_likwid = Int[],
|
||||
cpu_time = Float64[],
|
||||
cpu_rate = Float64[],
|
||||
cpu_gflops = Float64[],
|
||||
gpu_name = String[],
|
||||
gpu_time = Float64[],
|
||||
gpu_rate = Float64[],
|
||||
gpu_gflops = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
nInputs = 100_000
|
||||
|
||||
function cpu_bench(compute_function, inputs)
|
||||
compute_function.(inputs[begin:10]) # make sure it's compiled
|
||||
@ -24,21 +58,38 @@ end
|
||||
|
||||
function bench_process(
|
||||
process::MetagraphOptimization.AbstractProcessDescription,
|
||||
process_name::String,
|
||||
graph::DAG,
|
||||
func,
|
||||
gen_time::Float64,
|
||||
opt_time::Float64,
|
||||
func_time::Float64,
|
||||
io::IO = stdout;
|
||||
use_likwid = true,
|
||||
use_gpu = true,
|
||||
)
|
||||
println(io, "\n--- Benchmarking $(process) ---")
|
||||
println(io, "\n--- Benchmarking $(process_name) ---")
|
||||
if DISABLE_GPU
|
||||
use_gpu = false
|
||||
end
|
||||
|
||||
NFLOPs = GraphProperties(graph).computeEffort
|
||||
graph_props = GraphProperties(graph)
|
||||
NFLOPs = graph_props.computeEffort
|
||||
nflops_likwid = 0
|
||||
if use_likwid
|
||||
input = gen_process_input(process)
|
||||
func(input) # compile first
|
||||
|
||||
# get rid of annoying output to console
|
||||
oldstd = stdout
|
||||
redirect_stdout(devnull)
|
||||
_, events = @perfmon "FLOPS_DP" func(input)
|
||||
redirect_stdout(oldstd) # recover original stdout
|
||||
|
||||
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
|
||||
nflops_likwid = NFLOPs
|
||||
end
|
||||
|
||||
nInputs = 10000000 # ten million
|
||||
println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
|
||||
inputs = Vector{typeof(gen_process_input(process))}()
|
||||
@ -54,12 +105,21 @@ function bench_process(
|
||||
|
||||
println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
|
||||
(time_cpu, rate_cpu) = cpu_bench(func, inputs)
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 1024^3
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 10^9
|
||||
|
||||
println(io, "Benchmarking GPU...")
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 1024^3
|
||||
time_gpu = 0.0
|
||||
rate_gpu = 0.0
|
||||
flops_gpu = 0.0
|
||||
gpu_name = "none"
|
||||
if use_gpu
|
||||
println(io, "Benchmarking GPU...")
|
||||
gpu_name = "$(name(first(CUDA.devices())))"
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 10^9
|
||||
else
|
||||
println(io, "Skipping GPU...")
|
||||
end
|
||||
|
||||
println(io, "\nBenchmark Summary for $(process):")
|
||||
|
||||
@ -73,10 +133,39 @@ function bench_process(
|
||||
println(io, " Time: $time_cpu")
|
||||
println(io, " Rate: $rate_cpu")
|
||||
println(io, " GFLOPS: $flops_cpu")
|
||||
println(io, "GPU, $(name(first(CUDA.devices())))")
|
||||
println(io, " Time: $time_gpu")
|
||||
println(io, " Rate: $rate_gpu")
|
||||
return println(io, " GFLOPS: $flops_gpu")
|
||||
if use_gpu
|
||||
println(io, "GPU, $gpu_name")
|
||||
println(io, " Time: $time_gpu")
|
||||
println(io, " Rate: $rate_gpu")
|
||||
println(io, " GFLOPS: $flops_gpu")
|
||||
end
|
||||
|
||||
if (process_name != "warmup")
|
||||
push!(
|
||||
df,
|
||||
Dict(
|
||||
:process_name => process_name,
|
||||
:graph_gen_time => gen_time,
|
||||
:optimization_time => opt_time,
|
||||
:function_generation_time => func_time,
|
||||
:graph_nodes => graph_props.noNodes,
|
||||
:graph_edges => graph_props.noEdges,
|
||||
:graph_mem => MetagraphOptimization.mem(graph),
|
||||
:cpu_threads => Threads.nthreads(),
|
||||
:n_inputs => nInputs,
|
||||
:nflops_likwid => nflops_likwid,
|
||||
:cpu_time => time_cpu,
|
||||
:cpu_rate => rate_cpu,
|
||||
:cpu_gflops => flops_cpu,
|
||||
:gpu_name => gpu_name,
|
||||
:gpu_time => time_gpu,
|
||||
:gpu_rate => rate_gpu,
|
||||
:gpu_gflops => flops_gpu,
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
# use "mock" machine that only uses cpu
|
||||
@ -92,57 +181,151 @@ machine = Machine(
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
|
||||
|
||||
# compton
|
||||
process = parse_process("ke->ke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
## -- WARMUP TO COMPILE FUNCTIONS first
|
||||
|
||||
optimizer = RandomWalkOptimizer(MersenneTwister(0))
|
||||
|
||||
# 2-photon compton
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
opt_time = @elapsed optimize!(optimizer, graph, 200)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 3-photon compton
|
||||
process = parse_process("ke->kkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# AB->AB
|
||||
process = parse_process("AB->AB", ABCModel())
|
||||
graph = parse_dag("input/AB->AB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
exit(0)
|
||||
## -- WARMUP END
|
||||
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# compton
|
||||
process = parse_process("ke->ke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 2-photon compton
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 3-photon compton
|
||||
process = parse_process("ke->kkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 4-photon compton
|
||||
process = parse_process("ke->kkkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 5-photon compton
|
||||
process = parse_process("ke->kkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# 6-photon compton
|
||||
process = parse_process("ke->kkkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# 7-photon compton
|
||||
process = parse_process("ke->kkkkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# 8-photon compton
|
||||
process = parse_process("ke->kkkkkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# AB->AB
|
||||
process = parse_process("AB->AB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->AB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# AB->AB^5
|
||||
process = parse_process("AB->ABBBBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# AB->AB^7
|
||||
process = parse_process("AB->ABBBBBBB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBBBB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
CSV.write(results_filename, df)
|
||||
|
Reference in New Issue
Block a user