evaluation
This commit is contained in:
parent
ae99be7207
commit
ddfc9191d5
17
data/bench_results_a100_32threads.csv
Normal file
17
data/bench_results_a100_32threads.csv
Normal file
@ -0,0 +1,17 @@
|
||||
process_name,graph_gen_time,optimization_time,function_generation_time,graph_nodes,graph_edges,graph_mem,cpu_threads,n_inputs,nflops_likwid,cpu_time,cpu_std,cpu_rate,cpu_gflops,gpu_name,gpu_time,gpu_std,gpu_rate,gpu_gflops
|
||||
QED Process: 'ke->ke' not optimized,0.451921113,0.0,2.14306036,26,29,6532.0,32,1048576,0,0.0295500115,8.885463496445566e-5,3.548479160490344e7,193.67599257956294,NVIDIA A100-SXM4-80GB,0.0005345045,0.00015057262207453253,1.9617720711425252e9,10707.351964295902
|
||||
QED Process: 'ke->ke' reduced,0.451921113,2.351144988,0.007459328,26,29,6948.0,32,1048576,0,0.032075236,0.0012127475826961272,3.2691139045711152e7,178.42823691149147,NVIDIA A100-SXM4-80GB,0.0004144495,2.314476769164037e-5,2.530045276927587e9,13808.98712147077
|
||||
QED Process: 'ke->kke' not optimized,0.000756524,0.0,0.227593109,77,101,19175.0,32,1048576,0,0.118123535,0.005439195292530201,8.876943955326091e6,207.0258676901263,NVIDIA A100-SXM4-80GB,0.000764644,3.164787574545881e-5,1.3713257411292052e9,31981.716103180042
|
||||
QED Process: 'ke->kke' reduced,0.000756524,2.213239148,0.012998109,59,77,16383.0,32,1048576,0,0.090563566,0.003566800695425846,1.1578342663759507e7,190.03244355461888,NVIDIA A100-SXM4-80GB,0.000896431,1.2561601767556025e-5,1.16972304616864e9,19198.371926004344
|
||||
QED Process: 'ke->kkke' not optimized,0.001148917,0.0,0.197975335,356,493,85898.0,32,1048576,0,0.845482461,0.07123353122024402,1.2402102330541424e6,153.60189767910512,NVIDIA A100-SXM4-80GB,0.0051661485,4.558145844273378e-5,2.0297054953027385e8,25138.20701514871
|
||||
QED Process: 'ke->kkke' reduced,0.001148917,0.025159492,0.03528292,188,273,54426.0,32,1048576,0,0.331087292,0.05694664098686965,3.1670680975577887e6,193.5822868610735,NVIDIA A100-SXM4-80GB,0.0052421655,1.2131630189989583e-5,2.0002725972692013e8,12226.366209918402
|
||||
QED Process: 'ke->kkkke' not optimized,0.003466628,0.0,0.835965949,2183,3015,504653.0,32,1048576,0,4.038223548,NaN,259662.69265091213,202.89659801270614,NVIDIA A100-SXM4-80GB,0.032951423,0.00027221495848960984,3.18218730644804e7,24865.142239957284
|
||||
QED Process: 'ke->kkkke' reduced,0.003466628,0.036556559,0.159378554,853,1295,243781.0,32,1048576,0,1.434366504,0.0016660912785982046,731037.7069429948,215.14457991275012,NVIDIA A100-SXM4-80GB,0.02999597,1.213502455704149e-5,3.495722925446318e7,10287.921308895828
|
||||
QED Process: 'ke->kkkkke' not optimized,0.020601425,0.0,11.212760712,15866,21617,3.982352e6,32,1048576,0,28.474378922,NaN,36825.24570149078,209.22863596806917,NVIDIA A100-SXM4-80GB,0.2143423735,0.0005946906538251631,4.89206115840646e6,27795.042877455122
|
||||
QED Process: 'ke->kkkkke' reduced,0.020601425,0.70128262,1.171371284,4982,7655,1.807728e6,32,1048576,0,9.117928529,NaN,115001.5594731802,200.7318870152113,NVIDIA A100-SXM4-80GB,0.218809487,0.0012623373353923995,4.792187095617111e6,8364.623601973895
|
||||
ABC Process: 'AB->AB' not optimized,0.971094109,0.0,2.060371784,34,37,8624.0,32,1048576,0,0.021601539,0.002037950755102709,4.854172658716585e7,2.5727115091197903,NVIDIA A100-SXM4-80GB,0.0001573225,0.011099784405901598,6.665136900316229e9,353.25225571676015
|
||||
ABC Process: 'AB->AB' reduced,0.971094109,2.513935435,0.00827303,34,37,9296.0,32,1048576,0,0.019712653,0.0006279822486413927,5.3193043067313164e7,2.819231282567598,NVIDIA A100-SXM4-80GB,0.00015603,5.183805230394624e-6,6.720348650900468e9,356.17847849772477
|
||||
ABC Process: 'AB->ABBB' not optimized,0.028151784,0.0,0.282960646,280,385,69428.0,32,1048576,0,0.026436742,0.00026485415525001675,3.966358638292117e7,42.63835536164025,NVIDIA A100-SXM4-80GB,0.0003672985,0.00012555745652834268,2.85483333038387e9,3068.94583016266
|
||||
ABC Process: 'AB->ABBB' reduced,0.028151784,2.479253592,0.036600485,200,285,57156.0,32,1048576,0,0.02722382,0.0013358585629396548,3.851685766361958e7,28.30989038276039,NVIDIA A100-SXM4-80GB,0.000442651,3.118776404724562e-5,2.3688549218232875e9,1741.1083675401162
|
||||
ABC Process: 'AB->ABBBBB' not optimized,0.020961406,0.0,6.527425109,7854,11241,1.982968e6,32,1048576,0,0.153860476,0.005632691248110826,6.815109554191162e6,231.13444053039325,NVIDIA A100-SXM4-80GB,0.032277095,2.7966680501983068e-5,3.2486690639290806e7,1101.7861130315475
|
||||
ABC Process: 'AB->ABBBBB' reduced,0.020961406,0.240323814,1.157408425,4998,7671,1.507432e6,32,1048576,0,0.205006419,0.00615246960279379,5.114844721032857e6,111.38597348993252,NVIDIA A100-SXM4-80GB,0.065801576,0.00019093082620729427,1.5935423795928536e7,347.02572400393575
|
|
@ -67,7 +67,7 @@ n_inputs = df[:, "n_inputs"][1]
|
||||
|
||||
|
||||
|
||||
title_string = "QED N-Photon Compton Scattering\nCalculate 10,000,000 Matrix Elements"
|
||||
title_string = "QED N-Photon Compton Scattering\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
|
||||
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
|
||||
|
||||
@ -106,7 +106,7 @@ savefig("cpu_vs_gpu_qed.pdf")
|
||||
|
||||
|
||||
|
||||
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 10,000,000 Matrix Elements"
|
||||
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
|
||||
|
||||
df_filt = filter(:process_name => x -> abc_proc_to_n(x) >= 1, df)
|
||||
|
||||
|
@ -12,11 +12,11 @@ processes = [
|
||||
"QED Process: 'ke->ke'",
|
||||
"QED Process: 'ke->kke'",
|
||||
"QED Process: 'ke->kkke'",
|
||||
#"QED Process: 'ke->kkkke'",
|
||||
#"QED Process: 'ke->kkkkke'",
|
||||
"QED Process: 'ke->kkkke'",
|
||||
"QED Process: 'ke->kkkkke'",
|
||||
"ABC Process: 'AB->AB'",
|
||||
"ABC Process: 'AB->ABBB'",
|
||||
#"ABC Process: 'AB->ABBBBB'",
|
||||
"ABC Process: 'AB->ABBBBB'",
|
||||
]
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
@ -65,23 +65,23 @@ end
|
||||
title_string = "GPU $gpu_name, $n_inputs samples"
|
||||
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
|
||||
df_filt.gpu_rate = df_filt.gpu_rate .* 1e9
|
||||
df_filt.gpu_time = df_filt.gpu_time ./ 1e9
|
||||
df_filt.gpu_gflops = df_filt.gpu_gflops .* 1e9
|
||||
df_filt.gpu_rate = df_filt.gpu_rate
|
||||
df_filt.gpu_time = df_filt.gpu_time
|
||||
df_filt.gpu_gflops = df_filt.gpu_gflops
|
||||
|
||||
df_filt.process_size = @. proc_to_n(df_filt.process_name)
|
||||
|
||||
df_no_opt = filter(:process_name => x -> match(r" no optimization$", x) !== nothing, df_filt)
|
||||
df_no_opt = filter(:process_name => x -> match(r" not optimized$", x) !== nothing, df_filt)
|
||||
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
|
||||
|
||||
@df df_no_opt scatter(:process_size, :gpu_rate, label = "unoptimized function execution rate", markersize = 7)
|
||||
@df df_red scatter!(:process_size, :gpu_rate, label = "reduced function execution rate", markersize = 7)
|
||||
|
||||
plot!(
|
||||
title = title_string * ", sample rate",
|
||||
#title = title_string * ", sample rate",
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
xticks = [1, 2, 3],
|
||||
xticks = [1, 2, 3, 4, 5],
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
@ -97,10 +97,10 @@ savefig("gpu_rate_$(gpu_name).pdf")
|
||||
@df df_red scatter!(:process_size, :gpu_time, label = "reduced function execution time", markersize = 7)
|
||||
|
||||
plot!(
|
||||
title = title_string * ", execution time",
|
||||
#title = title_string * ", execution time",
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
xticks = [1, 2, 3],
|
||||
xticks = [1, 2, 3, 4, 5],
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
@ -116,10 +116,10 @@ savefig("gpu_times_$(gpu_name).pdf")
|
||||
@df df_red scatter!(:process_size, :gpu_gflops, label = "reduced function", markersize = 7)
|
||||
|
||||
plot!(
|
||||
title = title_string * ", GFLOPS",
|
||||
#title = title_string * ", GFLOPS",
|
||||
yscale = :linear,
|
||||
legend = :outerbottom,
|
||||
xticks = [1, 2, 3],
|
||||
xticks = [1, 2, 3, 4, 5],
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
|
@ -22,4 +22,4 @@ lspci > results/pci_bench_reduce_gpu.txt
|
||||
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
|
||||
|
||||
echo "Benchmarking Reduction 32 Threads, *GPU*"
|
||||
julia --project --threads=32 examples/qed_bench_reduction_steps_gpu.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
||||
julia --project -O3 --threads=32 examples/qed_bench_reduction_steps_gpu.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
||||
|
Binary file not shown.
BIN
images/gpu_rate_NVIDIA A100-SXM4-80GB.pdf
Normal file
BIN
images/gpu_rate_NVIDIA A100-SXM4-80GB.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
images/gpu_times_NVIDIA A100-SXM4-80GB.pdf
Normal file
BIN
images/gpu_times_NVIDIA A100-SXM4-80GB.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_fuse.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_fuse.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_greedy.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_greedy.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_random.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_random.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_reduce.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_reduce.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_split.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_split.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_ce_dt_fuse.pdf
Normal file
BIN
images/optim_plots/qed_k3_ce_dt_fuse.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_ce_dt_greedy.pdf
Normal file
BIN
images/optim_plots/qed_k3_ce_dt_greedy.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_ce_dt_random.pdf
Normal file
BIN
images/optim_plots/qed_k3_ce_dt_random.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_ce_dt_reduce.pdf
Normal file
BIN
images/optim_plots/qed_k3_ce_dt_reduce.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_ce_dt_split.pdf
Normal file
BIN
images/optim_plots/qed_k3_ce_dt_split.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_execution_fuse.pdf
Normal file
BIN
images/optim_plots/qed_k3_execution_fuse.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_execution_greedy.pdf
Normal file
BIN
images/optim_plots/qed_k3_execution_greedy.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_execution_random.pdf
Normal file
BIN
images/optim_plots/qed_k3_execution_random.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_execution_reduce.pdf
Normal file
BIN
images/optim_plots/qed_k3_execution_reduce.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_execution_split.pdf
Normal file
BIN
images/optim_plots/qed_k3_execution_split.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_nodes_edges_fuse.pdf
Normal file
BIN
images/optim_plots/qed_k3_nodes_edges_fuse.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_nodes_edges_greedy.pdf
Normal file
BIN
images/optim_plots/qed_k3_nodes_edges_greedy.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_nodes_edges_random.pdf
Normal file
BIN
images/optim_plots/qed_k3_nodes_edges_random.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_nodes_edges_reduce.pdf
Normal file
BIN
images/optim_plots/qed_k3_nodes_edges_reduce.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_nodes_edges_split.pdf
Normal file
BIN
images/optim_plots/qed_k3_nodes_edges_split.pdf
Normal file
Binary file not shown.
@ -197,8 +197,7 @@ function generate_operations(graph::DAG)
|
||||
|
||||
# launch thread for node reduction insertion
|
||||
# remove duplicates
|
||||
nr_task = @task nr_insertion!(graph.possibleOperations, generatedReductions)
|
||||
schedule(nr_task)
|
||||
nr_task = @spawn nr_insertion!(graph.possibleOperations, generatedReductions)
|
||||
|
||||
# --- find possible node fusions ---
|
||||
@threads for node in nodeArray
|
||||
@ -223,8 +222,7 @@ function generate_operations(graph::DAG)
|
||||
end
|
||||
|
||||
# launch thread for node fusion insertion
|
||||
nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
|
||||
schedule(nf_task)
|
||||
nf_task = @spawn nf_insertion!(graph, graph.possibleOperations, generatedFusions)
|
||||
|
||||
# find possible node splits
|
||||
@threads for node in nodeArray
|
||||
@ -234,8 +232,7 @@ function generate_operations(graph::DAG)
|
||||
end
|
||||
|
||||
# launch thread for node split insertion
|
||||
ns_task = @task ns_insertion!(graph.possibleOperations, generatedSplits)
|
||||
schedule(ns_task)
|
||||
ns_task = @spawn ns_insertion!(graph.possibleOperations, generatedSplits)
|
||||
|
||||
empty!(graph.dirtyNodes)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user