Add qed bench run script and singularity container

2024-02-02 06:56:16 +01:00
parent 82ed774b7e
commit 7098d1801a
6 changed files with 415 additions and 51 deletions
--- a/examples/import_bench.jl
+++ b/examples/import_bench.jl
@@ -34,9 +34,10 @@ function import_bench()
    bench_txt("AB->ABBB.txt")
    bench_txt("AB->ABBBBB.txt")
    bench_txt("AB->ABBBBBBB.txt")
-    #bench_txt("AB->ABBBBBBBBB.txt")
+    bench_txt("AB->ABBBBBBBBB.txt")
    bench_txt("ABAB->ABAB.txt")
-    return bench_txt("ABAB->ABC.txt")
+    bench_txt("ABAB->ABC.txt")
+    return nothing
 end

 import_bench()
--- a/examples/qed_bench.jl
+++ b/examples/qed_bench.jl
@@ -2,6 +2,40 @@ using MetagraphOptimization
 using LIKWID
 using CUDA
 using UUIDs
+using DataFrames
+using CSV
+using Random
+
+DISABLE_GPU = false
+
+results_filename = "results.csv"
+
+df = DataFrame(
+    process_name = String[],
+    graph_gen_time = Float64[],
+    optimization_time = Float64[],
+    function_generation_time = Float64[],
+    graph_nodes = Int[],
+    graph_edges = Int[],
+    graph_mem = Float64[],
+    cpu_threads = Int[],
+    n_inputs = Int[],
+    nflops_likwid = Int[],
+    cpu_time = Float64[],
+    cpu_rate = Float64[],
+    cpu_gflops = Float64[],
+    gpu_name = String[],
+    gpu_time = Float64[],
+    gpu_rate = Float64[],
+    gpu_gflops = Float64[],
+)
+
+# if they exist, read existing results and append new ones
+if isfile(results_filename)
+    df = CSV.read(results_filename, DataFrame)
+end
+
+nInputs = 100_000

 function cpu_bench(compute_function, inputs)
    compute_function.(inputs[begin:10]) # make sure it's compiled
@@ -24,21 +58,38 @@ end

 function bench_process(
    process::MetagraphOptimization.AbstractProcessDescription,
+    process_name::String,
+    graph::DAG,
    func,
+    gen_time::Float64,
+    opt_time::Float64,
+    func_time::Float64,
    io::IO = stdout;
    use_likwid = true,
+    use_gpu = true,
 )
-    println(io, "\n--- Benchmarking $(process) ---")
+    println(io, "\n--- Benchmarking $(process_name) ---")
+    if DISABLE_GPU
+        use_gpu = false
+    end

-    NFLOPs = GraphProperties(graph).computeEffort
+    graph_props = GraphProperties(graph)
+    NFLOPs = graph_props.computeEffort
+    nflops_likwid = 0
    if use_likwid
        input = gen_process_input(process)
        func(input) # compile first
+
+        # get rid of annoying output to console
+        oldstd = stdout
+        redirect_stdout(devnull)
        _, events = @perfmon "FLOPS_DP" func(input)
+        redirect_stdout(oldstd) # recover original stdout
+
        NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
+        nflops_likwid = NFLOPs
    end

-    nInputs = 10000000  # ten million
    println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")

    inputs = Vector{typeof(gen_process_input(process))}()
@@ -54,12 +105,21 @@ function bench_process(

    println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
    (time_cpu, rate_cpu) = cpu_bench(func, inputs)
-    flops_cpu = (rate_cpu * NFLOPs) / 1024^3
+    flops_cpu = (rate_cpu * NFLOPs) / 10^9

-    println(io, "Benchmarking GPU...")
-    cuInputs = CuArray(inputs)
-    (time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
-    flops_gpu = (rate_gpu * NFLOPs) / 1024^3
+    time_gpu = 0.0
+    rate_gpu = 0.0
+    flops_gpu = 0.0
+    gpu_name = "none"
+    if use_gpu
+        println(io, "Benchmarking GPU...")
+        gpu_name = "$(name(first(CUDA.devices())))"
+        cuInputs = CuArray(inputs)
+        (time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
+        flops_gpu = (rate_gpu * NFLOPs) / 10^9
+    else
+        println(io, "Skipping GPU...")
+    end

    println(io, "\nBenchmark Summary for $(process):")

@@ -73,10 +133,39 @@ function bench_process(
    println(io, "  Time:  $time_cpu")
    println(io, "  Rate:  $rate_cpu")
    println(io, "  GFLOPS: $flops_cpu")
-    println(io, "GPU, $(name(first(CUDA.devices())))")
-    println(io, "  Time:  $time_gpu")
-    println(io, "  Rate:  $rate_gpu")
-    return println(io, "  GFLOPS: $flops_gpu")
+    if use_gpu
+        println(io, "GPU, $gpu_name")
+        println(io, "  Time:  $time_gpu")
+        println(io, "  Rate:  $rate_gpu")
+        println(io, "  GFLOPS: $flops_gpu")
+    end
+
+    if (process_name != "warmup")
+        push!(
+            df,
+            Dict(
+                :process_name => process_name,
+                :graph_gen_time => gen_time,
+                :optimization_time => opt_time,
+                :function_generation_time => func_time,
+                :graph_nodes => graph_props.noNodes,
+                :graph_edges => graph_props.noEdges,
+                :graph_mem => MetagraphOptimization.mem(graph),
+                :cpu_threads => Threads.nthreads(),
+                :n_inputs => nInputs,
+                :nflops_likwid => nflops_likwid,
+                :cpu_time => time_cpu,
+                :cpu_rate => rate_cpu,
+                :cpu_gflops => flops_cpu,
+                :gpu_name => gpu_name,
+                :gpu_time => time_gpu,
+                :gpu_rate => rate_gpu,
+                :gpu_gflops => flops_gpu,
+            ),
+        )
+    end
+
+    return nothing
 end

 # use "mock" machine that only uses cpu
@@ -92,57 +181,151 @@ machine = Machine(
    ],
    [-1.0;;],
 )
-optimizer = ReductionOptimizer()

 # sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope

-# compton
-process = parse_process("ke->ke", QEDModel())
-graph = gen_graph(process)
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
+## -- WARMUP TO COMPILE FUNCTIONS first
+
+optimizer = RandomWalkOptimizer(MersenneTwister(0))

 # 2-photon compton
 process = parse_process("ke->kke", QEDModel())
-graph = gen_graph(process)
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
+gen_time = @elapsed graph = gen_graph(process)
+opt_time = @elapsed optimize!(optimizer, graph, 200)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)

-# 3-photon compton
-process = parse_process("ke->kkke", QEDModel())
-graph = gen_graph(process)
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
-
-# AB->AB
-process = parse_process("AB->AB", ABCModel())
-graph = parse_dag("input/AB->AB.txt", ABCModel())
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
+optimizer = ReductionOptimizer()

 # AB->AB^3
 process = parse_process("AB->ABBB", ABCModel())
-graph = parse_dag("input/AB->ABBB.txt", ABCModel())
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
+gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)

-exit(0)
+## -- WARMUP END
+
+optimizer = ReductionOptimizer()
+
+# compton
+process = parse_process("ke->ke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
+
+# 2-photon compton
+process = parse_process("ke->kke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
+
+# 3-photon compton
+process = parse_process("ke->kkke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)

 # 4-photon compton
 process = parse_process("ke->kkkke", QEDModel())
-graph = gen_graph(process)
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
+
+# 5-photon compton
+process = parse_process("ke->kkkkke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+
+# 6-photon compton
+process = parse_process("ke->kkkkkke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+
+# 7-photon compton
+process = parse_process("ke->kkkkkkke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+
+# 8-photon compton
+process = parse_process("ke->kkkkkkkke", QEDModel())
+gen_time = @elapsed graph = gen_graph(process)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+
+# AB->AB
+process = parse_process("AB->AB", ABCModel())
+gen_time = @elapsed graph = parse_dag("input/AB->AB.txt", ABCModel())
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
+
+# AB->AB^3
+process = parse_process("AB->ABBB", ABCModel())
+gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)

 # AB->AB^5
 process = parse_process("AB->ABBBBB", ABCModel())
-graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
-optimize_to_fixpoint!(optimizer, graph)
-compute_func = get_compute_function(graph, process, machine)
-bench_process(process, compute_func)
+gen_time = @elapsed graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+
+# AB->AB^7
+process = parse_process("AB->ABBBBBBB", ABCModel())
+gen_time = @elapsed graph = parse_dag("input/AB->ABBBBBBB.txt", ABCModel())
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+
+opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
+func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+
+CSV.write(results_filename, df)