From 0d9b066915cd56c6957877ac793fe27b3145c110 Mon Sep 17 00:00:00 2001
From: Anton Reinhard <anton.reinhard@proton.me>
Date: Tue, 5 Mar 2024 23:32:02 +0100
Subject: [PATCH] A100 bench

---
 examples/qed_bench.jl                    | 37 +++++++++---------------
 experiments/diagram_bench_hemera_a100.sh | 24 +++++++++++++++
 experiments/run_qed_exec.sh              |  1 +
 3 files changed, 39 insertions(+), 23 deletions(-)
 create mode 100755 experiments/diagram_bench_hemera_a100.sh

diff --git a/examples/qed_bench.jl b/examples/qed_bench.jl
index 91a99ad..07fc7dd 100644
--- a/examples/qed_bench.jl
+++ b/examples/qed_bench.jl
@@ -8,7 +8,7 @@ using Random
 using BenchmarkTools
 using Dates
 
-DISABLE_GPU = true
+DISABLE_GPU = false
 
 function log(x...)
     println(now(), " ", join(x, " ")...)
@@ -44,7 +44,7 @@ if isfile(results_filename)
     df = CSV.read(results_filename, DataFrame)
 end
 
-nInputs = 1_000_000
+nInputs = 10_000_000
 
 function cpu_bench(compute_function, inputs)
     bench = @benchmark begin
@@ -53,7 +53,7 @@ function cpu_bench(compute_function, inputs)
         end
     end gcsample = true seconds = 300
 
-    time = mean(bench.times) / 1e9
+    time = median(bench.times) / 1e9
     s = std(bench.times) / 1e9
     rate = length(inputs) / time
 
@@ -65,7 +65,7 @@ function gpu_bench(compute_function, inputs)
         CUDA.@sync $compute_function.($inputs)
     end gcsample = true seconds = 300
 
-    time = mean(bench.times) / 1e9
+    time = median(bench.times) / 1e9
     s = std(bench.times) / 1e9
     rate = length(inputs) / time
 
@@ -230,7 +230,7 @@ optimizer = ReductionOptimizer()
 process = parse_process("ke->ke", QEDModel())
 gen_time = @elapsed graph = gen_graph(process)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
@@ -242,7 +242,7 @@ CSV.write(results_filename, df)
 process = parse_process("ke->kke", QEDModel())
 gen_time = @elapsed graph = gen_graph(process)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
@@ -254,7 +254,7 @@ CSV.write(results_filename, df)
 process = parse_process("ke->kkke", QEDModel())
 gen_time = @elapsed graph = gen_graph(process)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
@@ -266,7 +266,7 @@ CSV.write(results_filename, df)
 process = parse_process("ke->kkkke", QEDModel())
 gen_time = @elapsed graph = gen_graph(process)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
@@ -278,20 +278,11 @@ CSV.write(results_filename, df)
 process = parse_process("ke->kkkkke", QEDModel())
 gen_time = @elapsed graph = gen_graph(process)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
-
-CSV.write(results_filename, df)
-
-# 6-photon compton
-process = parse_process("ke->kkkkkke", QEDModel())
-gen_time = @elapsed graph = gen_graph(process)
-opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
-func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
 
 CSV.write(results_filename, df)
 
@@ -299,7 +290,7 @@ CSV.write(results_filename, df)
 process = parse_process("AB->AB", ABCModel())
 gen_time = @elapsed graph = parse_dag("input/AB->AB.txt", ABCModel())
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
@@ -311,7 +302,7 @@ CSV.write(results_filename, df)
 process = parse_process("AB->ABBB", ABCModel())
 gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
@@ -323,10 +314,10 @@ CSV.write(results_filename, df)
 process = parse_process("AB->ABBBBB", ABCModel())
 gen_time = @elapsed graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
+#bench_process(process, "$process not optimized", graph, compute_func, gen_time, 0.0, func_gen_time)
 
 opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
 func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
-bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
+bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
 
 CSV.write(results_filename, df)
diff --git a/experiments/diagram_bench_hemera_a100.sh b/experiments/diagram_bench_hemera_a100.sh
new file mode 100755
index 0000000..a5d6962
--- /dev/null
+++ b/experiments/diagram_bench_hemera_a100.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#SBATCH --job-name=qed_bench
+#SBATCH --partition=casus_a100
+#SBATCH --account=casus
+#SBATCH --time=8:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=32
+#SBATCH --gres=gpu:1
+#SBATCH --mem=256GB
+#SBATCH --output=simulation-%A-%a.out
+#SBATCH --error=simulation-%A-%a.err
+
+cd $HOME/repos/metagraph_optimization
+
+module load singularity
+module load git
+module load cuda/12.1
+
+printf "Current git commit hash: " > results/git.txt
+git rev-parse HEAD >> results/git.txt
+git status >> results/git.txt
+
+singularity exec --nv experiments/CUDA_container.sif ./experiments/run_qed_exec.sh 32
diff --git a/experiments/run_qed_exec.sh b/experiments/run_qed_exec.sh
index 3b9ddf2..d735dc3 100755
--- a/experiments/run_qed_exec.sh
+++ b/experiments/run_qed_exec.sh
@@ -21,6 +21,7 @@ lspci > results/pci_$i.txt
 #echo "Initiating julia..."
 #julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1    # need current dev version of QEDprocesses
 #julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1        # add requirements for the bench script
+julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
 
 echo "Benchmarking $i Threads"
 julia --project --threads=$i examples/qed_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"