Use CUDA kernels in bench scripts

This commit is contained in:
2024-03-07 00:08:04 +01:00
parent 2e16b0dca7
commit d036f21862
11 changed files with 236 additions and 124 deletions

View File

@ -0,0 +1,9 @@
process_name,graph_gen_time,optimization_time,function_generation_time,graph_nodes,graph_edges,graph_mem,cpu_threads,n_inputs,nflops_likwid,cpu_time,cpu_std,cpu_rate,cpu_gflops,gpu_name,gpu_time,gpu_std,gpu_rate,gpu_gflops
QED Process: 'ke->ke' reduced,0.004851193,0.001290078,0.006093401,26,29,6948.0,32,10000000,0,0.2810178885,0.00909457898005121,3.5584923270818755e7,1.886000933353394,NVIDIA A100-SXM4-80GB,0.4060797745,0.0013320688448668838,2.462570319418851e7,1.305162269291991
QED Process: 'ke->kke' reduced,0.001065397,0.010432606,0.014287271,59,77,16383.0,32,10000000,0,0.823029796,0.01692859562197734,1.2150228398292398e7,2.223491796887509,NVIDIA A100-SXM4-80GB,2.3333098275,0.0014037935241043983,4.285757460128814e6,0.784293615203573
QED Process: 'ke->kkke' reduced,0.001348518,0.005210738,0.034243651,188,273,54426.0,32,10000000,0,2.9432864705,0.031053960614444084,3.397562588700793e6,2.497208502695083,NVIDIA A100-SXM4-80GB,10.340032588,0.0028660606476431714,967114.9403924877,0.7108294811884784
QED Process: 'ke->kkkke' reduced,0.004413783,0.039469525,0.15704043,853,1295,243781.0,32,10000000,0,14.980394603,0.5162977440607073,667539.1580137269,2.4318451526440072,NVIDIA A100-SXM4-80GB,54.2063089555,0.006347197107681703,184480.371246258,0.672061992450118
QED Process: 'ke->kkkkke' reduced,0.021871728,0.716956567,1.121625045,4982,7655,1.800816e6,32,10000000,0,82.035650126,0.3421310894344223,121898.22332901397,2.6545776094359375,NVIDIA A100-SXM4-80GB,321.789538108,NaN,31076.212293277757,0.6767466751107096
ABC Process: 'AB->AB' reduced,0.000867035,0.002263493,0.007340721,34,37,9296.0,32,10000000,0,0.1877912925,0.0029540808349122686,5.325060532292784e7,2.8222820821151755,NVIDIA A100-SXM4-80GB,0.0016617045,1.5729813606955104e-5,6.01791714471496e9,318.9496086698929
ABC Process: 'AB->ABBB' reduced,0.000547175,0.004720326,0.035918118,200,285,57156.0,32,10000000,0,0.257040364,0.007250633041861087,3.8904395575785905e7,28.59473074820264,NVIDIA A100-SXM4-80GB,0.003641165,3.2217340292524716e-5,2.74637375675093e9,2018.5847112119334
ABC Process: 'AB->ABBBBB' reduced,0.019826198,0.258674017,1.136386232,4998,7671,1.507432e6,32,10000000,0,1.818710381,0.03353568966350073,5.498401562156146e6,119.7386908190744,NVIDIA A100-SXM4-80GB,0.492263776,0.0031065569742746986,2.031431213821429e7,442.38477543389257
1 process_name graph_gen_time optimization_time function_generation_time graph_nodes graph_edges graph_mem cpu_threads n_inputs nflops_likwid cpu_time cpu_std cpu_rate cpu_gflops gpu_name gpu_time gpu_std gpu_rate gpu_gflops
2 QED Process: 'ke->ke' reduced 0.004851193 0.001290078 0.006093401 26 29 6948.0 32 10000000 0 0.2810178885 0.00909457898005121 3.5584923270818755e7 1.886000933353394 NVIDIA A100-SXM4-80GB 0.4060797745 0.0013320688448668838 2.462570319418851e7 1.305162269291991
3 QED Process: 'ke->kke' reduced 0.001065397 0.010432606 0.014287271 59 77 16383.0 32 10000000 0 0.823029796 0.01692859562197734 1.2150228398292398e7 2.223491796887509 NVIDIA A100-SXM4-80GB 2.3333098275 0.0014037935241043983 4.285757460128814e6 0.784293615203573
4 QED Process: 'ke->kkke' reduced 0.001348518 0.005210738 0.034243651 188 273 54426.0 32 10000000 0 2.9432864705 0.031053960614444084 3.397562588700793e6 2.497208502695083 NVIDIA A100-SXM4-80GB 10.340032588 0.0028660606476431714 967114.9403924877 0.7108294811884784
5 QED Process: 'ke->kkkke' reduced 0.004413783 0.039469525 0.15704043 853 1295 243781.0 32 10000000 0 14.980394603 0.5162977440607073 667539.1580137269 2.4318451526440072 NVIDIA A100-SXM4-80GB 54.2063089555 0.006347197107681703 184480.371246258 0.672061992450118
6 QED Process: 'ke->kkkkke' reduced 0.021871728 0.716956567 1.121625045 4982 7655 1.800816e6 32 10000000 0 82.035650126 0.3421310894344223 121898.22332901397 2.6545776094359375 NVIDIA A100-SXM4-80GB 321.789538108 NaN 31076.212293277757 0.6767466751107096
7 ABC Process: 'AB->AB' reduced 0.000867035 0.002263493 0.007340721 34 37 9296.0 32 10000000 0 0.1877912925 0.0029540808349122686 5.325060532292784e7 2.8222820821151755 NVIDIA A100-SXM4-80GB 0.0016617045 1.5729813606955104e-5 6.01791714471496e9 318.9496086698929
8 ABC Process: 'AB->ABBB' reduced 0.000547175 0.004720326 0.035918118 200 285 57156.0 32 10000000 0 0.257040364 0.007250633041861087 3.8904395575785905e7 28.59473074820264 NVIDIA A100-SXM4-80GB 0.003641165 3.2217340292524716e-5 2.74637375675093e9 2018.5847112119334
9 ABC Process: 'AB->ABBBBB' reduced 0.019826198 0.258674017 1.136386232 4998 7671 1.507432e6 32 10000000 0 1.818710381 0.03353568966350073 5.498401562156146e6 119.7386908190744 NVIDIA A100-SXM4-80GB 0.492263776 0.0031065569742746986 2.031431213821429e7 442.38477543389257

View File

@ -0,0 +1,143 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 1)
println("Please use with \"input_file.csv\"")
end
processes = [
"QED Process: 'ke->ke'",
"QED Process: 'ke->kke'",
"QED Process: 'ke->kkke'",
"QED Process: 'ke->kkkke'",
"QED Process: 'ke->kkkkke'",
#"QED Process: 'ke->kkkkkke'",
#"QED Process: 'ke->kkkkkkke'",
"ABC Process: 'AB->AB'",
"ABC Process: 'AB->ABBB'",
"ABC Process: 'AB->ABBBBB'",
]
function proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
function abc_proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
b_count = count(c -> c == 'B', parts[2])
return b_count
end
function beautify_title(str::AbstractString)
parts = split(str, "'")
preprefix = parts[1]
infix = parts[2]
sufsuffix = parts[3]
parts = split(infix, "->")
prefix = parts[1]
suffix = parts[2]
k_count = count(c -> c == 'k', suffix)
B_count = count(c -> c == 'B', suffix)
if k_count == 1 || B_count == 1
new_suffix = suffix
elseif k_count >= 1
new_suffix = replace(suffix, r"k+" => "k^$k_count")
elseif B_count >= 1
new_suffix = replace(suffix, r"B+" => "B^$B_count")
end
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
end
input_file = ARGS[1]
df = CSV.read(input_file, DataFrame)
n_inputs = df[:, "n_inputs"][1]
title_string = "QED N-Photon Compton Scattering\nCalculate 10,000,000 Matrix Elements"
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
df_filt.process_size = @. proc_to_n(df_filt.process_name)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
@df df_red scatter(
:process_size,
:cpu_time,
yerror = :cpu_std,
label = "CPU execution time, 32 threads (s)",
markersize = 6,
)
@df df_red scatter!(
:process_size,
:gpu_time,
yerror = :gpu_std,
label = "GPU execution time, A100 80GB (s)",
markersize = 6,
)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("cpu_vs_gpu_qed.pdf")
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 10,000,000 Matrix Elements"
df_filt = filter(:process_name => x -> abc_proc_to_n(x) >= 1, df)
df_filt.process_size = @. abc_proc_to_n(df_filt.process_name)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
@df df_red scatter(
:process_size,
:cpu_time,
yerror = :cpu_std,
label = "CPU execution time, 32 threads (s)",
markersize = 6,
)
@df df_red scatter!(
:process_size,
:gpu_time,
yerror = :gpu_std,
label = "GPU execution time, A100 80GB (s)",
markersize = 6,
)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("cpu_vs_gpu_abc.pdf")