experiments #1

Merged
rubydragon merged 39 commits from experiments into main 2024-05-08 12:03:28 +02:00
30 changed files with 36 additions and 22 deletions
Showing only changes of commit ddfc9191d5 - Show all commits

View File

@ -0,0 +1,17 @@
process_name,graph_gen_time,optimization_time,function_generation_time,graph_nodes,graph_edges,graph_mem,cpu_threads,n_inputs,nflops_likwid,cpu_time,cpu_std,cpu_rate,cpu_gflops,gpu_name,gpu_time,gpu_std,gpu_rate,gpu_gflops
QED Process: 'ke->ke' not optimized,0.451921113,0.0,2.14306036,26,29,6532.0,32,1048576,0,0.0295500115,8.885463496445566e-5,3.548479160490344e7,193.67599257956294,NVIDIA A100-SXM4-80GB,0.0005345045,0.00015057262207453253,1.9617720711425252e9,10707.351964295902
QED Process: 'ke->ke' reduced,0.451921113,2.351144988,0.007459328,26,29,6948.0,32,1048576,0,0.032075236,0.0012127475826961272,3.2691139045711152e7,178.42823691149147,NVIDIA A100-SXM4-80GB,0.0004144495,2.314476769164037e-5,2.530045276927587e9,13808.98712147077
QED Process: 'ke->kke' not optimized,0.000756524,0.0,0.227593109,77,101,19175.0,32,1048576,0,0.118123535,0.005439195292530201,8.876943955326091e6,207.0258676901263,NVIDIA A100-SXM4-80GB,0.000764644,3.164787574545881e-5,1.3713257411292052e9,31981.716103180042
QED Process: 'ke->kke' reduced,0.000756524,2.213239148,0.012998109,59,77,16383.0,32,1048576,0,0.090563566,0.003566800695425846,1.1578342663759507e7,190.03244355461888,NVIDIA A100-SXM4-80GB,0.000896431,1.2561601767556025e-5,1.16972304616864e9,19198.371926004344
QED Process: 'ke->kkke' not optimized,0.001148917,0.0,0.197975335,356,493,85898.0,32,1048576,0,0.845482461,0.07123353122024402,1.2402102330541424e6,153.60189767910512,NVIDIA A100-SXM4-80GB,0.0051661485,4.558145844273378e-5,2.0297054953027385e8,25138.20701514871
QED Process: 'ke->kkke' reduced,0.001148917,0.025159492,0.03528292,188,273,54426.0,32,1048576,0,0.331087292,0.05694664098686965,3.1670680975577887e6,193.5822868610735,NVIDIA A100-SXM4-80GB,0.0052421655,1.2131630189989583e-5,2.0002725972692013e8,12226.366209918402
QED Process: 'ke->kkkke' not optimized,0.003466628,0.0,0.835965949,2183,3015,504653.0,32,1048576,0,4.038223548,NaN,259662.69265091213,202.89659801270614,NVIDIA A100-SXM4-80GB,0.032951423,0.00027221495848960984,3.18218730644804e7,24865.142239957284
QED Process: 'ke->kkkke' reduced,0.003466628,0.036556559,0.159378554,853,1295,243781.0,32,1048576,0,1.434366504,0.0016660912785982046,731037.7069429948,215.14457991275012,NVIDIA A100-SXM4-80GB,0.02999597,1.213502455704149e-5,3.495722925446318e7,10287.921308895828
QED Process: 'ke->kkkkke' not optimized,0.020601425,0.0,11.212760712,15866,21617,3.982352e6,32,1048576,0,28.474378922,NaN,36825.24570149078,209.22863596806917,NVIDIA A100-SXM4-80GB,0.2143423735,0.0005946906538251631,4.89206115840646e6,27795.042877455122
QED Process: 'ke->kkkkke' reduced,0.020601425,0.70128262,1.171371284,4982,7655,1.807728e6,32,1048576,0,9.117928529,NaN,115001.5594731802,200.7318870152113,NVIDIA A100-SXM4-80GB,0.218809487,0.0012623373353923995,4.792187095617111e6,8364.623601973895
ABC Process: 'AB->AB' not optimized,0.971094109,0.0,2.060371784,34,37,8624.0,32,1048576,0,0.021601539,0.002037950755102709,4.854172658716585e7,2.5727115091197903,NVIDIA A100-SXM4-80GB,0.0001573225,0.011099784405901598,6.665136900316229e9,353.25225571676015
ABC Process: 'AB->AB' reduced,0.971094109,2.513935435,0.00827303,34,37,9296.0,32,1048576,0,0.019712653,0.0006279822486413927,5.3193043067313164e7,2.819231282567598,NVIDIA A100-SXM4-80GB,0.00015603,5.183805230394624e-6,6.720348650900468e9,356.17847849772477
ABC Process: 'AB->ABBB' not optimized,0.028151784,0.0,0.282960646,280,385,69428.0,32,1048576,0,0.026436742,0.00026485415525001675,3.966358638292117e7,42.63835536164025,NVIDIA A100-SXM4-80GB,0.0003672985,0.00012555745652834268,2.85483333038387e9,3068.94583016266
ABC Process: 'AB->ABBB' reduced,0.028151784,2.479253592,0.036600485,200,285,57156.0,32,1048576,0,0.02722382,0.0013358585629396548,3.851685766361958e7,28.30989038276039,NVIDIA A100-SXM4-80GB,0.000442651,3.118776404724562e-5,2.3688549218232875e9,1741.1083675401162
ABC Process: 'AB->ABBBBB' not optimized,0.020961406,0.0,6.527425109,7854,11241,1.982968e6,32,1048576,0,0.153860476,0.005632691248110826,6.815109554191162e6,231.13444053039325,NVIDIA A100-SXM4-80GB,0.032277095,2.7966680501983068e-5,3.2486690639290806e7,1101.7861130315475
ABC Process: 'AB->ABBBBB' reduced,0.020961406,0.240323814,1.157408425,4998,7671,1.507432e6,32,1048576,0,0.205006419,0.00615246960279379,5.114844721032857e6,111.38597348993252,NVIDIA A100-SXM4-80GB,0.065801576,0.00019093082620729427,1.5935423795928536e7,347.02572400393575
1 process_name graph_gen_time optimization_time function_generation_time graph_nodes graph_edges graph_mem cpu_threads n_inputs nflops_likwid cpu_time cpu_std cpu_rate cpu_gflops gpu_name gpu_time gpu_std gpu_rate gpu_gflops
2 QED Process: 'ke->ke' not optimized 0.451921113 0.0 2.14306036 26 29 6532.0 32 1048576 0 0.0295500115 8.885463496445566e-5 3.548479160490344e7 193.67599257956294 NVIDIA A100-SXM4-80GB 0.0005345045 0.00015057262207453253 1.9617720711425252e9 10707.351964295902
3 QED Process: 'ke->ke' reduced 0.451921113 2.351144988 0.007459328 26 29 6948.0 32 1048576 0 0.032075236 0.0012127475826961272 3.2691139045711152e7 178.42823691149147 NVIDIA A100-SXM4-80GB 0.0004144495 2.314476769164037e-5 2.530045276927587e9 13808.98712147077
4 QED Process: 'ke->kke' not optimized 0.000756524 0.0 0.227593109 77 101 19175.0 32 1048576 0 0.118123535 0.005439195292530201 8.876943955326091e6 207.0258676901263 NVIDIA A100-SXM4-80GB 0.000764644 3.164787574545881e-5 1.3713257411292052e9 31981.716103180042
5 QED Process: 'ke->kke' reduced 0.000756524 2.213239148 0.012998109 59 77 16383.0 32 1048576 0 0.090563566 0.003566800695425846 1.1578342663759507e7 190.03244355461888 NVIDIA A100-SXM4-80GB 0.000896431 1.2561601767556025e-5 1.16972304616864e9 19198.371926004344
6 QED Process: 'ke->kkke' not optimized 0.001148917 0.0 0.197975335 356 493 85898.0 32 1048576 0 0.845482461 0.07123353122024402 1.2402102330541424e6 153.60189767910512 NVIDIA A100-SXM4-80GB 0.0051661485 4.558145844273378e-5 2.0297054953027385e8 25138.20701514871
7 QED Process: 'ke->kkke' reduced 0.001148917 0.025159492 0.03528292 188 273 54426.0 32 1048576 0 0.331087292 0.05694664098686965 3.1670680975577887e6 193.5822868610735 NVIDIA A100-SXM4-80GB 0.0052421655 1.2131630189989583e-5 2.0002725972692013e8 12226.366209918402
8 QED Process: 'ke->kkkke' not optimized 0.003466628 0.0 0.835965949 2183 3015 504653.0 32 1048576 0 4.038223548 NaN 259662.69265091213 202.89659801270614 NVIDIA A100-SXM4-80GB 0.032951423 0.00027221495848960984 3.18218730644804e7 24865.142239957284
9 QED Process: 'ke->kkkke' reduced 0.003466628 0.036556559 0.159378554 853 1295 243781.0 32 1048576 0 1.434366504 0.0016660912785982046 731037.7069429948 215.14457991275012 NVIDIA A100-SXM4-80GB 0.02999597 1.213502455704149e-5 3.495722925446318e7 10287.921308895828
10 QED Process: 'ke->kkkkke' not optimized 0.020601425 0.0 11.212760712 15866 21617 3.982352e6 32 1048576 0 28.474378922 NaN 36825.24570149078 209.22863596806917 NVIDIA A100-SXM4-80GB 0.2143423735 0.0005946906538251631 4.89206115840646e6 27795.042877455122
11 QED Process: 'ke->kkkkke' reduced 0.020601425 0.70128262 1.171371284 4982 7655 1.807728e6 32 1048576 0 9.117928529 NaN 115001.5594731802 200.7318870152113 NVIDIA A100-SXM4-80GB 0.218809487 0.0012623373353923995 4.792187095617111e6 8364.623601973895
12 ABC Process: 'AB->AB' not optimized 0.971094109 0.0 2.060371784 34 37 8624.0 32 1048576 0 0.021601539 0.002037950755102709 4.854172658716585e7 2.5727115091197903 NVIDIA A100-SXM4-80GB 0.0001573225 0.011099784405901598 6.665136900316229e9 353.25225571676015
13 ABC Process: 'AB->AB' reduced 0.971094109 2.513935435 0.00827303 34 37 9296.0 32 1048576 0 0.019712653 0.0006279822486413927 5.3193043067313164e7 2.819231282567598 NVIDIA A100-SXM4-80GB 0.00015603 5.183805230394624e-6 6.720348650900468e9 356.17847849772477
14 ABC Process: 'AB->ABBB' not optimized 0.028151784 0.0 0.282960646 280 385 69428.0 32 1048576 0 0.026436742 0.00026485415525001675 3.966358638292117e7 42.63835536164025 NVIDIA A100-SXM4-80GB 0.0003672985 0.00012555745652834268 2.85483333038387e9 3068.94583016266
15 ABC Process: 'AB->ABBB' reduced 0.028151784 2.479253592 0.036600485 200 285 57156.0 32 1048576 0 0.02722382 0.0013358585629396548 3.851685766361958e7 28.30989038276039 NVIDIA A100-SXM4-80GB 0.000442651 3.118776404724562e-5 2.3688549218232875e9 1741.1083675401162
16 ABC Process: 'AB->ABBBBB' not optimized 0.020961406 0.0 6.527425109 7854 11241 1.982968e6 32 1048576 0 0.153860476 0.005632691248110826 6.815109554191162e6 231.13444053039325 NVIDIA A100-SXM4-80GB 0.032277095 2.7966680501983068e-5 3.2486690639290806e7 1101.7861130315475
17 ABC Process: 'AB->ABBBBB' reduced 0.020961406 0.240323814 1.157408425 4998 7671 1.507432e6 32 1048576 0 0.205006419 0.00615246960279379 5.114844721032857e6 111.38597348993252 NVIDIA A100-SXM4-80GB 0.065801576 0.00019093082620729427 1.5935423795928536e7 347.02572400393575

View File

@ -67,7 +67,7 @@ n_inputs = df[:, "n_inputs"][1]
title_string = "QED N-Photon Compton Scattering\nCalculate 10,000,000 Matrix Elements"
title_string = "QED N-Photon Compton Scattering\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
@ -106,7 +106,7 @@ savefig("cpu_vs_gpu_qed.pdf")
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 10,000,000 Matrix Elements"
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
df_filt = filter(:process_name => x -> abc_proc_to_n(x) >= 1, df)

View File

@ -12,11 +12,11 @@ processes = [
"QED Process: 'ke->ke'",
"QED Process: 'ke->kke'",
"QED Process: 'ke->kkke'",
#"QED Process: 'ke->kkkke'",
#"QED Process: 'ke->kkkkke'",
"QED Process: 'ke->kkkke'",
"QED Process: 'ke->kkkkke'",
"ABC Process: 'AB->AB'",
"ABC Process: 'AB->ABBB'",
#"ABC Process: 'AB->ABBBBB'",
"ABC Process: 'AB->ABBBBB'",
]
function proc_to_n(str::AbstractString)
@ -65,23 +65,23 @@ end
title_string = "GPU $gpu_name, $n_inputs samples"
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
df_filt.gpu_rate = df_filt.gpu_rate .* 1e9
df_filt.gpu_time = df_filt.gpu_time ./ 1e9
df_filt.gpu_gflops = df_filt.gpu_gflops .* 1e9
df_filt.gpu_rate = df_filt.gpu_rate
df_filt.gpu_time = df_filt.gpu_time
df_filt.gpu_gflops = df_filt.gpu_gflops
df_filt.process_size = @. proc_to_n(df_filt.process_name)
df_no_opt = filter(:process_name => x -> match(r" no optimization$", x) !== nothing, df_filt)
df_no_opt = filter(:process_name => x -> match(r" not optimized$", x) !== nothing, df_filt)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
@df df_no_opt scatter(:process_size, :gpu_rate, label = "unoptimized function execution rate", markersize = 7)
@df df_red scatter!(:process_size, :gpu_rate, label = "reduced function execution rate", markersize = 7)
plot!(
title = title_string * ", sample rate",
#title = title_string * ", sample rate",
yscale = :log10,
legend = :outerbottom,
xticks = [1, 2, 3],
xticks = [1, 2, 3, 4, 5],
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
@ -97,10 +97,10 @@ savefig("gpu_rate_$(gpu_name).pdf")
@df df_red scatter!(:process_size, :gpu_time, label = "reduced function execution time", markersize = 7)
plot!(
title = title_string * ", execution time",
#title = title_string * ", execution time",
yscale = :log10,
legend = :outerbottom,
xticks = [1, 2, 3],
xticks = [1, 2, 3, 4, 5],
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
@ -116,10 +116,10 @@ savefig("gpu_times_$(gpu_name).pdf")
@df df_red scatter!(:process_size, :gpu_gflops, label = "reduced function", markersize = 7)
plot!(
title = title_string * ", GFLOPS",
#title = title_string * ", GFLOPS",
yscale = :linear,
legend = :outerbottom,
xticks = [1, 2, 3],
xticks = [1, 2, 3, 4, 5],
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),

View File

@ -22,4 +22,4 @@ lspci > results/pci_bench_reduce_gpu.txt
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
echo "Benchmarking Reduction 32 Threads, *GPU*"
julia --project --threads=32 examples/qed_bench_reduction_steps_gpu.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
julia --project -O3 --threads=32 examples/qed_bench_reduction_steps_gpu.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -197,8 +197,7 @@ function generate_operations(graph::DAG)
# launch thread for node reduction insertion
# remove duplicates
nr_task = @task nr_insertion!(graph.possibleOperations, generatedReductions)
schedule(nr_task)
nr_task = @spawn nr_insertion!(graph.possibleOperations, generatedReductions)
# --- find possible node fusions ---
@threads for node in nodeArray
@ -223,8 +222,7 @@ function generate_operations(graph::DAG)
end
# launch thread for node fusion insertion
nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
schedule(nf_task)
nf_task = @spawn nf_insertion!(graph, graph.possibleOperations, generatedFusions)
# find possible node splits
@threads for node in nodeArray
@ -234,8 +232,7 @@ function generate_operations(graph::DAG)
end
# launch thread for node split insertion
ns_task = @task ns_insertion!(graph.possibleOperations, generatedSplits)
schedule(ns_task)
ns_task = @spawn ns_insertion!(graph.possibleOperations, generatedSplits)
empty!(graph.dirtyNodes)