Compare commits
4 Commits
heterogene
...
main
Author | SHA1 | Date | |
---|---|---|---|
d888713e97 | |||
38e7ff3b90 | |||
7d7782f97f | |||
87dbaf2c32 |
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,2 +1,3 @@
|
||||
input/AB->ABBBBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
|
||||
input/AB->ABBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
|
@ -17,9 +17,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Julia environment
|
||||
uses: https://github.com/julia-actions/setup-julia@v1.9.2
|
||||
uses: https://github.com/julia-actions/setup-julia@v2
|
||||
with:
|
||||
version: '1.9.2'
|
||||
version: '1.10'
|
||||
|
||||
- name: Instantiate
|
||||
run: |
|
||||
@ -46,6 +46,7 @@ jobs:
|
||||
run: |
|
||||
julia --project=examples/ -e 'using Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
|
||||
julia --project=examples/ -t 4 -e 'include("examples/import_bench.jl")' -O3
|
||||
julia --project=examples/ -t 4 -e 'include("examples/ab5.jl")' -O3
|
||||
|
||||
docs:
|
||||
runs-on: ubuntu-22.04
|
||||
@ -57,9 +58,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Julia environment
|
||||
uses: https://github.com/julia-actions/setup-julia@v1.9.2
|
||||
uses: https://github.com/julia-actions/setup-julia@v2
|
||||
with:
|
||||
version: '1.9.2'
|
||||
version: '1.10'
|
||||
|
||||
- name: Build docs
|
||||
run: |
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -30,3 +30,7 @@ Manifest.toml
|
||||
.julia
|
||||
**/.ipynb_checkpoints/
|
||||
*.bkp
|
||||
|
||||
*.sif
|
||||
|
||||
data/hemera_temp
|
||||
|
@ -4,8 +4,10 @@ authors = ["Anton Reinhard <anton.reinhard@proton.me>"]
|
||||
version = "0.1.0"
|
||||
|
||||
[deps]
|
||||
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
|
||||
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
|
||||
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
|
||||
@ -17,8 +19,10 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
|
||||
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
|
||||
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
|
||||
|
||||
[extras]
|
||||
CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[targets]
|
||||
|
170
data/evaluate.jl
Normal file
170
data/evaluate.jl
Normal file
@ -0,0 +1,170 @@
|
||||
using CSV
|
||||
using DataFrames
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using LaTeXStrings
|
||||
|
||||
if (length(ARGS) < 1)
|
||||
println("Please use with \"input_file.csv\"")
|
||||
end
|
||||
|
||||
processes = [
|
||||
"QED Process: 'ke->ke'",
|
||||
"QED Process: 'ke->kke'",
|
||||
"QED Process: 'ke->kkke'",
|
||||
#"QED Process: 'ke->kkkke'",
|
||||
#"QED Process: 'ke->kkkkke'",
|
||||
#"QED Process: 'ke->kkkkkke'",
|
||||
#"QED Process: 'ke->kkkkkkke'",
|
||||
"ABC Process: 'AB->AB'",
|
||||
"ABC Process: 'AB->ABBB'",
|
||||
#"ABC Process: 'AB->ABBBBB'",
|
||||
]
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
parts = split(parts[2], "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
function beautify_title(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
|
||||
preprefix = parts[1]
|
||||
infix = parts[2]
|
||||
sufsuffix = parts[3]
|
||||
|
||||
parts = split(infix, "->")
|
||||
|
||||
prefix = parts[1]
|
||||
suffix = parts[2]
|
||||
|
||||
k_count = count(c -> c == 'k', suffix)
|
||||
B_count = count(c -> c == 'B', suffix)
|
||||
|
||||
if k_count == 1 || B_count == 1
|
||||
new_suffix = suffix
|
||||
elseif k_count >= 1
|
||||
new_suffix = replace(suffix, r"k+" => "k^$k_count")
|
||||
elseif B_count >= 1
|
||||
new_suffix = replace(suffix, r"B+" => "B^$B_count")
|
||||
end
|
||||
|
||||
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
|
||||
end
|
||||
|
||||
input_file = ARGS[1]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
n_inputs = df[:, "n_inputs"][1]
|
||||
|
||||
# plotting with threads as x axis
|
||||
for process_name in processes
|
||||
title_string = "$(beautify_title(process_name)), $n_inputs samples"
|
||||
println("$title_string")
|
||||
|
||||
process_no_opt = process_name * " not optimized"
|
||||
process_red = process_name * " reduced"
|
||||
process_tape_no_opt = process_name * " not optimized tape"
|
||||
process_tape_red = process_name * " reduced tape"
|
||||
|
||||
df_no_opt = filter(:process_name => x -> x == process_no_opt, df)
|
||||
df_red = filter(:process_name => x -> x == process_red, df)
|
||||
df_tape_no_opt = filter(:process_name => x -> x == process_tape_no_opt, df)
|
||||
df_tape_red = filter(:process_name => x -> x == process_tape_red, df)
|
||||
|
||||
@df df_no_opt scatter(:cpu_threads, :cpu_time, label = "unoptimized function", markershape = :circle)
|
||||
@df df_red scatter!(:cpu_threads, :cpu_time, label = "reduced function", markershape = :rect)
|
||||
@df df_tape_no_opt scatter!(:cpu_threads, :cpu_time, label = "unoptimized tape", markershape = :utriangle)
|
||||
@df df_tape_red scatter!(:cpu_threads, :cpu_time, label = "reduced tape", markershape = :star)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :linear,
|
||||
legend = :outerright,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "execution time (s)",
|
||||
xlabel = "threads (#)",
|
||||
)
|
||||
|
||||
savefig("$(process_name)_time.pdf")
|
||||
|
||||
|
||||
@df df_no_opt scatter(:cpu_threads, :cpu_rate, label = "unoptimized function", markershape = :circle)
|
||||
@df df_red scatter!(:cpu_threads, :cpu_rate, label = "reduced function", markershape = :rect)
|
||||
@df df_tape_no_opt scatter!(:cpu_threads, :cpu_rate, label = "unoptimized tape", markershape = :utriangle)
|
||||
@df df_tape_red scatter!(:cpu_threads, :cpu_rate, label = "reduced tape", markershape = :star)
|
||||
|
||||
plot!(
|
||||
title = "Sample rate, " * title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerright,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "rate (" * L"s^{-1}" * ")",
|
||||
xlabel = "threads (#)",
|
||||
)
|
||||
|
||||
savefig("$(process_name)_rate.pdf")
|
||||
|
||||
|
||||
@df df_no_opt scatter(:cpu_threads, :cpu_gflops, label = "unoptimized function", markershape = :circle)
|
||||
@df df_red scatter!(:cpu_threads, :cpu_gflops, label = "reduced function", markershape = :rect)
|
||||
@df df_tape_no_opt scatter!(:cpu_threads, :cpu_gflops, label = "unoptimized tape", markershape = :utriangle)
|
||||
@df df_tape_red scatter!(:cpu_threads, :cpu_gflops, label = "reduced tape", markershape = :star)
|
||||
|
||||
plot!(
|
||||
title = "CPU performance, " * title_string,
|
||||
yscale = :linear,
|
||||
legend = :outerright,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "CPU performance (GFLOPS)",
|
||||
xlabel = "threads (#)",
|
||||
)
|
||||
|
||||
savefig("$(process_name)_performance.pdf")
|
||||
|
||||
end
|
||||
|
||||
# plotting with process size as x axis
|
||||
THREADS = [1, 2, 4, 8, 16]
|
||||
|
||||
for threads in THREADS
|
||||
title_string = "$threads threads, $n_inputs samples"
|
||||
|
||||
df_filt = filter(:cpu_threads => x -> x == threads, df)
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df_filt)
|
||||
|
||||
df_filt.process_size = @. proc_to_n(df_filt.process_name)
|
||||
|
||||
df_no_opt = filter(:process_name => x -> match(r" not optimized$", x) !== nothing, df_filt)
|
||||
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
|
||||
df_tape_no_opt = filter(:process_name => x -> match(r" not optimized tape$", x) !== nothing, df_filt)
|
||||
df_tape_red = filter(:process_name => x -> match(r" reduced tape$", x) !== nothing, df_filt)
|
||||
|
||||
|
||||
@df df_no_opt scatter(:process_size, :graph_gen_time, label = "graph generation time")
|
||||
@df df_red scatter!(:process_size, :optimization_time, label = "optimization time")
|
||||
@df df_no_opt scatter!(:process_size, :function_generation_time, label = "unoptimized function generation time")
|
||||
@df df_tape_no_opt scatter!(:process_size, :function_generation_time, label = "unoptimized tape generation time")
|
||||
@df df_red scatter!(:process_size, :function_generation_time, label = "reduced function generation time")
|
||||
@df df_tape_red scatter!(:process_size, :function_generation_time, label = "reduced tape generation time")
|
||||
|
||||
plot!(
|
||||
title = "function generation times, " * title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gen_times_$(threads)_threads.pdf")
|
||||
end
|
143
data/evaluate_cpu_gpu_exec.jl
Normal file
143
data/evaluate_cpu_gpu_exec.jl
Normal file
@ -0,0 +1,143 @@
|
||||
using CSV
|
||||
using DataFrames
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using LaTeXStrings
|
||||
|
||||
if (length(ARGS) < 1)
|
||||
println("Please use with \"input_file.csv\"")
|
||||
end
|
||||
|
||||
processes = [
|
||||
"QED Process: 'ke->ke'",
|
||||
"QED Process: 'ke->kke'",
|
||||
"QED Process: 'ke->kkke'",
|
||||
"QED Process: 'ke->kkkke'",
|
||||
"QED Process: 'ke->kkkkke'",
|
||||
#"QED Process: 'ke->kkkkkke'",
|
||||
#"QED Process: 'ke->kkkkkkke'",
|
||||
"ABC Process: 'AB->AB'",
|
||||
"ABC Process: 'AB->ABBB'",
|
||||
"ABC Process: 'AB->ABBBBB'",
|
||||
]
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
parts = split(parts[2], "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
function abc_proc_to_n(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
parts = split(parts[2], "->")
|
||||
b_count = count(c -> c == 'B', parts[2])
|
||||
return b_count
|
||||
end
|
||||
|
||||
function beautify_title(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
|
||||
preprefix = parts[1]
|
||||
infix = parts[2]
|
||||
sufsuffix = parts[3]
|
||||
|
||||
parts = split(infix, "->")
|
||||
|
||||
prefix = parts[1]
|
||||
suffix = parts[2]
|
||||
|
||||
k_count = count(c -> c == 'k', suffix)
|
||||
B_count = count(c -> c == 'B', suffix)
|
||||
|
||||
if k_count == 1 || B_count == 1
|
||||
new_suffix = suffix
|
||||
elseif k_count >= 1
|
||||
new_suffix = replace(suffix, r"k+" => "k^$k_count")
|
||||
elseif B_count >= 1
|
||||
new_suffix = replace(suffix, r"B+" => "B^$B_count")
|
||||
end
|
||||
|
||||
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
|
||||
end
|
||||
|
||||
input_file = ARGS[1]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
n_inputs = df[:, "n_inputs"][1]
|
||||
|
||||
|
||||
|
||||
title_string = "QED N-Photon Compton Scattering\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
|
||||
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
|
||||
|
||||
df_filt.process_size = @. proc_to_n(df_filt.process_name)
|
||||
|
||||
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
|
||||
|
||||
@df df_red scatter(
|
||||
:process_size,
|
||||
:cpu_time,
|
||||
yerror = :cpu_std,
|
||||
label = "CPU execution time, 32 threads (s)",
|
||||
markersize = 6,
|
||||
)
|
||||
@df df_red scatter!(
|
||||
:process_size,
|
||||
:gpu_time,
|
||||
yerror = :gpu_std,
|
||||
label = "GPU execution time, A100 80GB (s)",
|
||||
markersize = 6,
|
||||
)
|
||||
|
||||
plot!(
|
||||
#title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("cpu_vs_gpu_qed.pdf")
|
||||
|
||||
|
||||
|
||||
|
||||
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
|
||||
|
||||
df_filt = filter(:process_name => x -> abc_proc_to_n(x) >= 1, df)
|
||||
|
||||
df_filt.process_size = @. abc_proc_to_n(df_filt.process_name)
|
||||
|
||||
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
|
||||
|
||||
@df df_red scatter(
|
||||
:process_size,
|
||||
:cpu_time,
|
||||
yerror = :cpu_std,
|
||||
label = "CPU execution time, 32 threads (s)",
|
||||
markersize = 6,
|
||||
)
|
||||
@df df_red scatter!(
|
||||
:process_size,
|
||||
:gpu_time,
|
||||
yerror = :gpu_std,
|
||||
label = "GPU execution time, A100 80GB (s)",
|
||||
markersize = 6,
|
||||
)
|
||||
|
||||
plot!(
|
||||
#title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("cpu_vs_gpu_abc.pdf")
|
212
data/evaluate_full_node_bench.jl
Normal file
212
data/evaluate_full_node_bench.jl
Normal file
@ -0,0 +1,212 @@
|
||||
using CSV
|
||||
using DataFrames
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using LaTeXStrings
|
||||
|
||||
if (length(ARGS) < 1)
|
||||
println("Please use with \"input_file.csv\"")
|
||||
end
|
||||
|
||||
processes = [
|
||||
"QED Process: 'ke->ke'",
|
||||
"QED Process: 'ke->kke'",
|
||||
"QED Process: 'ke->kkke'",
|
||||
"QED Process: 'ke->kkkke'",
|
||||
"QED Process: 'ke->kkkkke'",
|
||||
]
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
parts = split(parts[2], "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
function beautify_title(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
|
||||
preprefix = parts[1]
|
||||
infix = parts[2]
|
||||
sufsuffix = parts[3]
|
||||
|
||||
parts = split(infix, "->")
|
||||
|
||||
prefix = parts[1]
|
||||
suffix = parts[2]
|
||||
|
||||
k_count = count(c -> c == 'k', suffix)
|
||||
B_count = count(c -> c == 'B', suffix)
|
||||
|
||||
if k_count == 1 || B_count == 1
|
||||
new_suffix = suffix
|
||||
elseif k_count >= 1
|
||||
new_suffix = replace(suffix, r"k+" => "k^$k_count")
|
||||
elseif B_count >= 1
|
||||
new_suffix = replace(suffix, r"B+" => "B^$B_count")
|
||||
end
|
||||
|
||||
return "QED Compton Scattering Process " * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
|
||||
end
|
||||
|
||||
input_file = ARGS[1]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
n_inputs = df[:, "n_inputs"][1]
|
||||
gpus = df.gpu_devices[1]
|
||||
cpus = df.cpu_threads[1]
|
||||
|
||||
power = Int(round(log2(n_inputs)))
|
||||
|
||||
chunk_sizes = [
|
||||
"\$2^{10}\$",
|
||||
"\$2^{11}\$",
|
||||
"\$2^{12}\$",
|
||||
"\$2^{13}\$",
|
||||
"\$2^{14}\$",
|
||||
"\$2^{15}\$",
|
||||
"\$2^{16}\$",
|
||||
"\$2^{17}\$",
|
||||
"\$2^{18}\$",
|
||||
"\$2^{19}\$",
|
||||
"\$2^{20}\$",
|
||||
]
|
||||
|
||||
best_times = Vector{Float64}()
|
||||
best_times_std = Vector{Float64}()
|
||||
|
||||
# plotting with threads as x axis
|
||||
for process_name in processes
|
||||
df_filt = filter(:process_name => x -> x == process_name, df)
|
||||
|
||||
df_filt.cpu_ratio = df_filt.cpu_chunks ./ (df_filt.cpu_chunks .+ df_filt.gpu_chunks) .* 100.0
|
||||
df_filt.gpu_ratio = df_filt.gpu_chunks ./ (df_filt.cpu_chunks .+ df_filt.gpu_chunks) .* 100.0
|
||||
|
||||
push!(best_times, minimum(df_filt.time))
|
||||
|
||||
bar(chunk_sizes, df_filt.cpu_ratio, label = "workload completed by \$$(cpus)\$ CPU threads (%)")
|
||||
bar!(
|
||||
chunk_sizes,
|
||||
[100 for _ in chunk_sizes],
|
||||
label = "workload completed by $(gpus) GPUs (%)",
|
||||
fillto = df_filt.cpu_ratio,
|
||||
)
|
||||
|
||||
plot!(
|
||||
#title = "$(beautify_title(process_name))\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
|
||||
yscale = :linear,
|
||||
#xticks = [1024 4096 16384 65536 262144 1048576],
|
||||
ylim = (0, 105),
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
#size = (800, 600),
|
||||
ylabel = "contribution (%)",
|
||||
xlabel = "chunk size (#)",
|
||||
)
|
||||
|
||||
savefig("full_node_chunk_size_$(proc_to_n(process_name))k_ratio.pdf")
|
||||
|
||||
|
||||
scatter(
|
||||
chunk_sizes,
|
||||
df_filt.rate,
|
||||
label = "total execution rate (\$s^{-1}\$)",
|
||||
#title = "$(beautify_title(process_name))\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
|
||||
ylim = (0, :auto),
|
||||
#yscale = :log10,
|
||||
#xticks = [1024 4096 16384 65536 262144 1048576],
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
#size = (800, 600),
|
||||
ylabel = "rate (\$s^{-1}\$)",
|
||||
xlabel = "chunk size (#)",
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
savefig("full_node_chunk_size_$(proc_to_n(process_name))k_rate.pdf")
|
||||
|
||||
|
||||
scatter(
|
||||
chunk_sizes,
|
||||
df_filt.time,
|
||||
yerror = df_filt.std,
|
||||
label = "total execution time (s)",
|
||||
#title = "$(beautify_title(process_name))\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
|
||||
#xticks = [1024 4096 16384 65536 262144 1048576],
|
||||
ylim = (0, maximum(df_filt.time) * 1.05),
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
#size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "chunk size (#)",
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
savefig("full_node_chunk_size_$(proc_to_n(process_name))k_time.pdf")
|
||||
|
||||
end
|
||||
|
||||
|
||||
# plotting with process size as x axis
|
||||
A100_rates = [2.530045276927587e9, 1.16972304616864e9, 2.0002725972692013e8, 3.495722925446318e7, 4.792187095617111e6]
|
||||
CPU_32threads_rates =
|
||||
[3.2691139045711152e7, 1.1578342663759507e7, 3.1670680975577887e6, 731037.7069429948, 115001.5594731802]
|
||||
|
||||
theory_rates = (A100_rates .+ CPU_32threads_rates) .* 4
|
||||
|
||||
scatter(
|
||||
proc_to_n.(processes),
|
||||
best_times,
|
||||
label = "full node best achieved time (s)",
|
||||
#title = "QED N-Photon Compton Scattering\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
|
||||
ylim = (0, maximum(best_times) * 1.05),
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
#size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
savefig("full_node_process_best_time.pdf")
|
||||
|
||||
|
||||
scatter(
|
||||
proc_to_n.(processes),
|
||||
(n_inputs ./ best_times),
|
||||
label = "full node best achieved rate (\$s^{-1}\$)",
|
||||
#title = "QED N-Photon Compton Scattering\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
|
||||
ylim = (0, maximum(n_inputs ./ best_times) * 1.05),
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
#size = (800, 600),
|
||||
ylabel = "rate (\$s^{-1}\$)",
|
||||
xlabel = "process size (#)",
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
savefig("full_node_process_best_rate.pdf")
|
||||
|
||||
|
||||
|
||||
scatter(
|
||||
proc_to_n.(processes),
|
||||
[(n_inputs ./ best_times) theory_rates],
|
||||
label = ["full node best achieved rate (\$s^{-1}\$)" "theoretical rate from previous benchmarks (\$s^{-1}\$)"],
|
||||
#title = "QED N-Photon Compton Scattering\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
|
||||
#ylim = (0, max(maximum(n_inputs ./ best_times), maximum(theory_rates)) * 1.05),
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
#size = (800, 600),
|
||||
ylabel = "rate (\$s^{-1}\$)",
|
||||
xlabel = "process size (#)",
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
savefig("full_node_process_best_rate_plus_theory.pdf")
|
232
data/evaluate_gen.jl
Normal file
232
data/evaluate_gen.jl
Normal file
@ -0,0 +1,232 @@
|
||||
using CSV
|
||||
using DataFrames
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using LaTeXStrings
|
||||
|
||||
if (length(ARGS) < 1)
|
||||
println("Please use with \"input_file.csv\"")
|
||||
end
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
input_file = ARGS[1]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
|
||||
# plotting with process size as x axis
|
||||
THREADS = [1]
|
||||
|
||||
for threads in THREADS
|
||||
title_string = "n-photon Compton diagram generation"
|
||||
|
||||
df_filt = filter(:cpu_threads => x -> x == threads, df)
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df_filt)
|
||||
|
||||
# ns -> s
|
||||
df_filt.graph_gen_mean = @. df_filt.graph_gen_mean / 1e9
|
||||
df_filt.graph_gen_std = @. df_filt.graph_gen_std / 1e9
|
||||
|
||||
# B -> MB (not MiB since the log scale is base 10)
|
||||
df_filt.graph_mem = @. df_filt.graph_mem / 1e6
|
||||
df_filt.graph_mem_reduced = @. df_filt.graph_mem_reduced / 1e6
|
||||
|
||||
df_filt.process_size = @. proc_to_n(df_filt.process_name)
|
||||
l = length(df_filt.process_size)
|
||||
|
||||
println(df_filt[!, :process_size])
|
||||
println(df_filt[!, :graph_mem])
|
||||
println(df_filt[!, :graph_mem_reduced])
|
||||
|
||||
|
||||
@df df_filt scatter(:process_size, :graph_mem, label = "unreduced graph", markersize = 7)
|
||||
scatter!(
|
||||
df_filt[!, :process_size],
|
||||
df_filt[!, :graph_mem_reduced],
|
||||
label = "reduced graph",
|
||||
markershape = :square,
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
plot!(
|
||||
title = "n-photon Compton diagram memory footprint",
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :process_size,
|
||||
#yticks = [1e-3, 1e-1, 1e1, 1e3],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 12,
|
||||
fontsize = 12,
|
||||
size = (800, 600),
|
||||
ylabel = "memory footprint (MB)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gen_memory_$(threads).pdf")
|
||||
|
||||
@df df_filt scatter(
|
||||
:process_size,
|
||||
:graph_gen_mean,
|
||||
yerror = :graph_gen_std,
|
||||
label = "graph generation time",
|
||||
markersize = 7,
|
||||
)
|
||||
scatter!(
|
||||
df_filt[!, :process_size],
|
||||
df_filt[!, :graph_elapsed_reduce],
|
||||
label = "graph reduction time",
|
||||
markershape = :square,
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :process_size,
|
||||
yticks = [1e-3, 1e-1, 1e1, 1e3],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 12,
|
||||
fontsize = 12,
|
||||
size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gen_times_$(threads)_threads.pdf")
|
||||
exit(0)
|
||||
|
||||
# graph size
|
||||
title_string = "n-photon Compton unreduced graph size"
|
||||
|
||||
@df df_filt scatter(:process_size, :graph_nodes, label = "nodes", markershape = :circle)
|
||||
@df df_filt scatter!(:process_size, :graph_edges, label = "edges", markershape = :square)
|
||||
@df df_filt scatter!(:process_size, :graph_u_nodes, label = "U-nodes", markershape = :star)
|
||||
@df df_filt scatter!(:process_size, :graph_v_nodes, label = "V-nodes", markershape = :utriangle)
|
||||
@df df_filt scatter!(:process_size[2:end], :graph_s1_nodes[2:end], label = "S1-nodes", markershape = :x)
|
||||
@df df_filt scatter!(:process_size, :graph_s2_nodes, label = "S2-nodes", markershape = :diamond)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
yminorgrid = true,
|
||||
xticks = :process_size,
|
||||
yticks = [1e1, 1e3, 1e5, 1e7],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "(#)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("compton_graph_size_unreduced.pdf")
|
||||
|
||||
|
||||
# graph size
|
||||
title_string = "n-photon Compton reduced graph size"
|
||||
|
||||
@df df_filt scatter(:process_size, :graph_nodes_reduced, label = "nodes", markershape = :circle)
|
||||
@df df_filt scatter!(:process_size, :graph_edges_reduced, label = "edges", markershape = :square)
|
||||
@df df_filt scatter!(:process_size, :graph_u_nodes_reduced, label = "U-nodes", markershape = :star)
|
||||
@df df_filt scatter!(:process_size, :graph_v_nodes_reduced, label = "V-nodes", markershape = :utriangle)
|
||||
@df df_filt scatter!(:process_size[2:end], :graph_s1_nodes_reduced[2:end], label = "S1-nodes", markershape = :x)
|
||||
@df df_filt scatter!(:process_size, :graph_s2_nodes_reduced, label = "S2-nodes", markershape = :diamond)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
yminorgrid = true,
|
||||
xticks = :process_size,
|
||||
yticks = [1e1, 1e2, 1e3, 1e4, 1e5, 1e6],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "(#)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("compton_graph_size_reduced.pdf")
|
||||
|
||||
|
||||
# graph size versus
|
||||
title_string = "n-photon Compton graph sizes"
|
||||
|
||||
@df df_filt scatter(:process_size, :graph_nodes, label = "nodes", markershape = :circle)
|
||||
@df df_filt scatter!(:process_size, :graph_edges, label = "edges", markershape = :square)
|
||||
@df df_filt scatter!(:process_size, :graph_nodes_reduced, label = "nodes (after reduction)", markershape = :star)
|
||||
@df df_filt scatter!(
|
||||
:process_size,
|
||||
:graph_edges_reduced,
|
||||
label = "edges (after reduction)",
|
||||
markershape = :utriangle,
|
||||
)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
yminorgrid = true,
|
||||
xticks = :process_size,
|
||||
yticks = [1e1, 1e2, 1e3, 1e4, 1e5, 1e6],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "(#)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("compton_graph_size_versus.pdf")
|
||||
|
||||
end
|
||||
|
||||
# for a specific process, plot times with threads as x
|
||||
process = "ke->kkkkkkkke"
|
||||
title_string = "n-photon Compton diagram generation times, $process"
|
||||
|
||||
df_filt = filter(:process_name => x -> x == process, df)
|
||||
df_filt.graph_gen_mean = @. df_filt.graph_gen_mean / 1e9
|
||||
df_filt.graph_gen_std = @. df_filt.graph_gen_std / 1e9
|
||||
|
||||
@df df_filt scatter(
|
||||
:cpu_threads,
|
||||
:graph_gen_mean,
|
||||
yerror = :graph_gen_std,
|
||||
label = "graph generation time",
|
||||
markersize = 7,
|
||||
)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :linear,
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :cpu_threads,
|
||||
#yticks = [1e-3, 1e-2, 1e-1, 1e-0, 1e1],
|
||||
ylim = (0, max(df_filt[!, :graph_gen_mean]...) * 1.1),
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gen_times_$(process).pdf")
|
52
data/evaluate_gen_one_sided_comparison.jl
Normal file
52
data/evaluate_gen_one_sided_comparison.jl
Normal file
@ -0,0 +1,52 @@
|
||||
using CSV
|
||||
using DataFrames
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using LaTeXStrings
|
||||
|
||||
if (length(ARGS) < 2)
|
||||
println("Please use with \"input_file.csv\" \"input_file_onesided.csv\"")
|
||||
end
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
input_file = ARGS[1]
|
||||
input_file_onesided = ARGS[2]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
df2 = CSV.read(input_file_onesided, DataFrame)
|
||||
|
||||
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1 && proc_to_n(x) <= 7, df)
|
||||
df_filt.process_size = @. proc_to_n(df_filt.process_name)
|
||||
|
||||
df_filt2 = filter(:process_name => x -> proc_to_n(x) >= 1 && proc_to_n(x) <= 7, df2)
|
||||
df_filt2.process_size = @. proc_to_n(df_filt2.process_name)
|
||||
|
||||
|
||||
# graph size
|
||||
title_string = "n-photon Compton reduced graph size"
|
||||
|
||||
@df df_filt scatter(:process_size, :graph_nodes_reduced, label = "nodes, two-sided generation", markershape = :circle)
|
||||
@df df_filt2 scatter!(:process_size, :graph_nodes_reduced, label = "nodes, one-sided generation", markershape = :square)
|
||||
|
||||
plot!(
|
||||
title = title_string,
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
yminorgrid = true,
|
||||
xticks = :process_size,
|
||||
yticks = [1e1, 1e2, 1e3, 1e4, 1e5, 1e6],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "(#)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("compton_diagram_gen_comparison.pdf")
|
130
data/evaluate_gpu.jl
Normal file
130
data/evaluate_gpu.jl
Normal file
@ -0,0 +1,130 @@
|
||||
using CSV
|
||||
using DataFrames
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using LaTeXStrings
|
||||
|
||||
if (length(ARGS) < 1)
|
||||
println("Please use with \"input_file.csv\"")
|
||||
end
|
||||
|
||||
processes = [
|
||||
"QED Process: 'ke->ke'",
|
||||
"QED Process: 'ke->kke'",
|
||||
"QED Process: 'ke->kkke'",
|
||||
"QED Process: 'ke->kkkke'",
|
||||
"QED Process: 'ke->kkkkke'",
|
||||
"ABC Process: 'AB->AB'",
|
||||
"ABC Process: 'AB->ABBB'",
|
||||
"ABC Process: 'AB->ABBBBB'",
|
||||
]
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
parts = split(parts[2], "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
function beautify_title(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
|
||||
preprefix = parts[1]
|
||||
infix = parts[2]
|
||||
sufsuffix = parts[3]
|
||||
|
||||
parts = split(infix, "->")
|
||||
|
||||
prefix = parts[1]
|
||||
suffix = parts[2]
|
||||
|
||||
k_count = count(c -> c == 'k', suffix)
|
||||
B_count = count(c -> c == 'B', suffix)
|
||||
|
||||
if k_count == 1 || B_count == 1
|
||||
new_suffix = suffix
|
||||
elseif k_count >= 1
|
||||
new_suffix = replace(suffix, r"k+" => "k^$k_count")
|
||||
elseif B_count >= 1
|
||||
new_suffix = replace(suffix, r"B+" => "B^$B_count")
|
||||
end
|
||||
|
||||
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
|
||||
end
|
||||
|
||||
input_file = ARGS[1]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
n_inputs = df[:, "n_inputs"][1]
|
||||
gpu_name = df[:, "gpu_name"][1]
|
||||
if (gpu_name == "")
|
||||
println("Results file did not execute everything on GPU! (or didn't write gpu name)")
|
||||
exit(0)
|
||||
end
|
||||
|
||||
# plotting with process size as x axis
|
||||
title_string = "GPU $gpu_name, $n_inputs samples"
|
||||
|
||||
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
|
||||
df_filt.gpu_rate = df_filt.gpu_rate
|
||||
df_filt.gpu_time = df_filt.gpu_time
|
||||
df_filt.gpu_gflops = df_filt.gpu_gflops
|
||||
|
||||
df_filt.process_size = @. proc_to_n(df_filt.process_name)
|
||||
|
||||
df_no_opt = filter(:process_name => x -> match(r" not optimized$", x) !== nothing, df_filt)
|
||||
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
|
||||
|
||||
@df df_no_opt scatter(:process_size, :gpu_rate, label = "unoptimized function execution rate", markersize = 7)
|
||||
@df df_red scatter!(:process_size, :gpu_rate, label = "reduced function execution rate", markersize = 7)
|
||||
|
||||
plot!(
|
||||
#title = title_string * ", sample rate",
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
xticks = [1, 2, 3, 4, 5],
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "rate (" * L"s^{-1}" * ")",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gpu_rate_$(gpu_name).pdf")
|
||||
|
||||
|
||||
|
||||
@df df_no_opt scatter(:process_size, :gpu_time, label = "unoptimized function execution time", markersize = 7)
|
||||
@df df_red scatter!(:process_size, :gpu_time, label = "reduced function execution time", markersize = 7)
|
||||
|
||||
plot!(
|
||||
#title = title_string * ", execution time",
|
||||
yscale = :log10,
|
||||
legend = :outerbottom,
|
||||
xticks = [1, 2, 3, 4, 5],
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "time (s)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gpu_times_$(gpu_name).pdf")
|
||||
|
||||
|
||||
|
||||
@df df_no_opt scatter(:process_size, :gpu_gflops, label = "unoptimized function", markersize = 7)
|
||||
@df df_red scatter!(:process_size, :gpu_gflops, label = "reduced function", markersize = 7)
|
||||
|
||||
plot!(
|
||||
#title = title_string * ", GFLOPS",
|
||||
yscale = :linear,
|
||||
legend = :outerbottom,
|
||||
xticks = [1, 2, 3, 4, 5],
|
||||
legendcolumns = 2,
|
||||
legend_font_pointsize = 10,
|
||||
size = (800, 600),
|
||||
ylabel = "performance (GFLOPS)",
|
||||
xlabel = "process size (#)",
|
||||
)
|
||||
|
||||
savefig("gpu_perf_$(gpu_name).pdf")
|
279
data/evaluate_reduce_bench.jl
Normal file
279
data/evaluate_reduce_bench.jl
Normal file
@ -0,0 +1,279 @@
|
||||
using Plots
|
||||
using StatsPlots
|
||||
using CSV
|
||||
using DataFrames
|
||||
using LaTeXStrings
|
||||
|
||||
|
||||
if (length(ARGS) < 2)
|
||||
println("Please use with \"input_file.csv\" \"input_file_gpu.csv\"")
|
||||
end
|
||||
|
||||
|
||||
function proc_to_n(str::AbstractString)
|
||||
parts = split(str, "'")
|
||||
parts = split(parts[2], "->")
|
||||
k_count = count(c -> c == 'k', parts[2])
|
||||
return k_count
|
||||
end
|
||||
|
||||
function beautify_title(str::AbstractString)
|
||||
parts = split(str, "->")
|
||||
|
||||
prefix = parts[1]
|
||||
suffix = parts[2]
|
||||
|
||||
k_count = count(c -> c == 'k', suffix)
|
||||
B_count = count(c -> c == 'B', suffix)
|
||||
|
||||
if k_count == 1 || B_count == 1
|
||||
new_suffix = suffix
|
||||
elseif k_count >= 1
|
||||
new_suffix = replace(suffix, r"k+" => "k^$k_count")
|
||||
elseif B_count >= 1
|
||||
new_suffix = replace(suffix, r"B+" => "B^$B_count")
|
||||
end
|
||||
|
||||
return L"%$prefix \rightarrow %$new_suffix"
|
||||
end
|
||||
|
||||
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke", "AB->AB", "AB->ABBB", "AB->ABBBBB"]
|
||||
|
||||
input_file = ARGS[1]
|
||||
input_file_gpu = ARGS[2]
|
||||
df = CSV.read(input_file, DataFrame)
|
||||
df_gpu = CSV.read(input_file_gpu, DataFrame)
|
||||
n_inputs = 2^20
|
||||
|
||||
#=
|
||||
for process in processes
|
||||
df_filt = filter(:process => x -> x == process, df)
|
||||
df_filt_gpu = filter(:process => x -> x == process, df_gpu)
|
||||
|
||||
# add dummy factors to get the numbers in similar orders of magnitude
|
||||
df_filt.cumulative_optimization_time = df_filt.cumulative_optimization_time .* 1e4
|
||||
df_filt_gpu.cumulative_optimization_time = df_filt_gpu.cumulative_optimization_time .* 1e4
|
||||
df_filt_gpu.gpu_t = df_filt_gpu.gpu_t .* 1e3
|
||||
|
||||
cpu = !isempty(df_filt)
|
||||
gpu = !isempty(df_filt_gpu)
|
||||
|
||||
ymax = 0.0
|
||||
|
||||
if cpu
|
||||
@df df_filt scatter(
|
||||
:operations,
|
||||
:cumulative_optimization_time,
|
||||
label = "Cumulative Optimization Time (x10000) (s)",
|
||||
markersize = 4,
|
||||
)
|
||||
ymax = max(df_filt[!, :cpu_st_t]..., df_filt[!, :cumulative_optimization_time]...) * 1.1 * 1e4
|
||||
@df df_filt scatter!(
|
||||
:operations,
|
||||
:cpu_st_t,
|
||||
label = "Single-Threaded Execution (s)",
|
||||
markersize = 4,
|
||||
markershape = :square,
|
||||
)
|
||||
end
|
||||
|
||||
if gpu
|
||||
if !cpu
|
||||
@df df_filt_gpu scatter(
|
||||
:operations,
|
||||
:cumulative_optimization_time,
|
||||
label = "Cumulative Optimization Time (x10000) (s)",
|
||||
markersize = 4,
|
||||
)
|
||||
ymax = max(df_filt_gpu[!, :gpu_t]..., df_filt_gpu[!, :cumulative_optimization_time]...) * 1.1 * 1e4
|
||||
end
|
||||
|
||||
@df df_filt_gpu scatter!(
|
||||
:operations,
|
||||
:gpu_t,
|
||||
label = "GPU Execution (x1000) (s)",
|
||||
markersize = 4,
|
||||
markershape = :diamond,
|
||||
)
|
||||
end
|
||||
|
||||
if cpu || gpu
|
||||
plot!(
|
||||
title = ("$(beautify_title(process)) Reduction Progression ($(n_inputs) Inputs)"),
|
||||
xscale = :linear,
|
||||
yscale = :linear,
|
||||
#ylim = (0, ymax),
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :process_size,
|
||||
#yticks = [1e-3, 1e-1, 1e1, 1e3],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 12,
|
||||
fontsize = 12,
|
||||
size = (800, 600),
|
||||
xlabel = "optimizer steps (#)",
|
||||
ylabel = "time (s)",
|
||||
)
|
||||
|
||||
savefig("$(String(process))_reduction_bench.pdf")
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# ABC vs QED
|
||||
AB_process = "AB->ABBB"
|
||||
abc_label = "\$A + B \\rightarrow A + 3B\$"
|
||||
|
||||
QED_process = "ke->kkkke"
|
||||
qed_label = "\$e^- + \\gamma \\rightarrow e^- + 4\\gamma\$"
|
||||
|
||||
df_filt_AB = filter(:process => x -> x == AB_process, df)
|
||||
df_filt_QED = filter(:process => x -> x == QED_process, df)
|
||||
|
||||
max_AB = max(df_filt_AB[!, :operations]...)
|
||||
max_QED = max(df_filt_QED[!, :operations]...)
|
||||
|
||||
df_filt_AB.reduction_progress = df_filt_AB.operations ./ max_AB .* 100.0
|
||||
df_filt_QED.reduction_progress = df_filt_QED.operations ./ max_QED .* 100.0
|
||||
|
||||
df_filt_AB.relative_performance = df_filt_AB.cpu_st_t ./ df_filt_AB[!, :cpu_st_t][1] .* 100.0
|
||||
df_filt_QED.relative_performance = df_filt_QED.cpu_st_t ./ df_filt_QED[!, :cpu_st_t][1] .* 100.0
|
||||
|
||||
@df df_filt_AB scatter(:reduction_progress, :relative_performance, label = abc_label, markersize = 4)
|
||||
@df df_filt_QED scatter!(:reduction_progress, :relative_performance, label = qed_label, markersize = 4)
|
||||
|
||||
plot!(
|
||||
#title = ("Relative Performance of $(beautify_title(QED_process)) versus $(beautify_title(AB_process)) on CPU"),
|
||||
xscale = :linear,
|
||||
yscale = :linear,
|
||||
#ylim = (0, ymax),
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :process_size,
|
||||
#yticks = [1e-3, 1e-1, 1e1, 1e3],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 12,
|
||||
fontsize = 12,
|
||||
size = (800, 600),
|
||||
xlabel = "reduction progress (%)",
|
||||
ylabel = "relative time taken (%)",
|
||||
)
|
||||
|
||||
savefig("reduction_bench_relative.pdf")
|
||||
|
||||
|
||||
# ABC vs QED on GPU
|
||||
AB_process = "AB->ABBB"
|
||||
abc_label = "\$A + B \\rightarrow A + 3B\$"
|
||||
|
||||
QED_process = "ke->kkkke"
|
||||
qed_label = "\$e^- + \\gamma \\rightarrow e^- + 4\\gamma\$"
|
||||
|
||||
df_filt_AB = filter(:process => x -> x == AB_process, df_gpu)
|
||||
df_filt_QED = filter(:process => x -> x == QED_process, df_gpu)
|
||||
|
||||
max_AB = max(df_filt_AB[!, :operations]...)
|
||||
max_QED = max(df_filt_QED[!, :operations]...)
|
||||
|
||||
df_filt_AB.reduction_progress = df_filt_AB.operations ./ max_AB .* 100.0
|
||||
df_filt_QED.reduction_progress = df_filt_QED.operations ./ max_QED .* 100.0
|
||||
|
||||
df_filt_AB.relative_performance = df_filt_AB.gpu_t ./ df_filt_AB[!, :gpu_t][1] .* 100.0
|
||||
df_filt_QED.relative_performance = df_filt_QED.gpu_t ./ df_filt_QED[!, :gpu_t][1] .* 100.0
|
||||
|
||||
df_filt_AB.relative_std = df_filt_AB.gpu_s ./ df_filt_AB[!, :gpu_t][1] .* 100.0
|
||||
df_filt_QED.relative_std = df_filt_QED.gpu_s ./ df_filt_QED[!, :gpu_t][1] .* 100.0
|
||||
|
||||
@df df_filt_AB scatter(
|
||||
:reduction_progress,
|
||||
:relative_performance,
|
||||
yerror = :relative_std,
|
||||
label = abc_label,
|
||||
markersize = 4,
|
||||
)
|
||||
@df df_filt_QED scatter!(
|
||||
:reduction_progress,
|
||||
:relative_performance,
|
||||
yerror = :relative_std,
|
||||
label = qed_label,
|
||||
markersize = 4,
|
||||
)
|
||||
|
||||
plot!(
|
||||
#title = "Relative Performance of $(beautify_title(QED_process)) versus $(beautify_title(AB_process)) on GPU (A100)",
|
||||
xscale = :linear,
|
||||
yscale = :linear,
|
||||
#ylim = (0, ymax),
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :process_size,
|
||||
#yticks = [1e-3, 1e-1, 1e1, 1e3],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 12,
|
||||
fontsize = 12,
|
||||
size = (800, 600),
|
||||
xlabel = "reduction progress (%)",
|
||||
ylabel = "relative time taken (%)",
|
||||
)
|
||||
|
||||
savefig("reduction_bench_relative_gpu.pdf")
|
||||
=#
|
||||
|
||||
QED_process = "ke->kkke"
|
||||
QED_label = "\$\\e^- + \\gamma \\rightarrow \\e^- + 3\\gamma\$"
|
||||
|
||||
df_filt_QED_GPU = filter(:process => x -> x == QED_process, df_gpu)
|
||||
df_filt_QED_CPU = filter(:process => x -> x == QED_process, df)
|
||||
|
||||
max_QED = max(df_filt_QED_CPU[!, :operations]...)
|
||||
|
||||
df_filt_QED_GPU.reduction_progress = df_filt_QED_GPU.operations ./ max_QED .* 100.0
|
||||
df_filt_QED_CPU.reduction_progress = df_filt_QED_CPU.operations ./ max_QED .* 100.0
|
||||
|
||||
df_filt_QED_GPU.relative_performance = df_filt_QED_GPU.gpu_t ./ df_filt_QED_GPU[!, :gpu_t][1] .* 100.0
|
||||
df_filt_QED_CPU.relative_performance = df_filt_QED_CPU.cpu_st_t ./ df_filt_QED_CPU[!, :cpu_st_t][1] .* 100.0
|
||||
|
||||
df_filt_QED_GPU.relative_std = df_filt_QED_GPU.gpu_s ./ df_filt_QED_GPU[!, :gpu_t][1] .* 100.0
|
||||
df_filt_QED_CPU.relative_std = df_filt_QED_CPU.cpu_st_s ./ df_filt_QED_CPU[!, :cpu_st_t][1] .* 100.0
|
||||
|
||||
@df df_filt_QED_CPU scatter(
|
||||
:reduction_progress,
|
||||
:relative_performance,
|
||||
yerror = :relative_std,
|
||||
label = "CPU relative time taken (%)",
|
||||
markersize = 4,
|
||||
)
|
||||
@df df_filt_QED_GPU scatter!(
|
||||
:reduction_progress,
|
||||
:relative_performance,
|
||||
yerror = :relative_std,
|
||||
label = "GPU relative time taken (%)",
|
||||
markersize = 4,
|
||||
)
|
||||
|
||||
plot!(
|
||||
#title = "Relative Performance of $(beautify_title(QED_process)) on CPU versus GPU",
|
||||
xscale = :linear,
|
||||
yscale = :linear,
|
||||
ylim = (0, :auto),
|
||||
legend = :outerbottom,
|
||||
minorgrid = true,
|
||||
xticks = :process_size,
|
||||
#yticks = [1e-3, 1e-1, 1e1, 1e3],
|
||||
xgrid = false,
|
||||
xminorticks = false,
|
||||
legendcolumns = 1,
|
||||
legend_font_pointsize = 12,
|
||||
fontsize = 12,
|
||||
size = (800, 600),
|
||||
xlabel = "reduction progress (%)",
|
||||
ylabel = "relative time taken (%)",
|
||||
)
|
||||
|
||||
savefig("reduction_bench_relative_cpu_vs_gpu.pdf")
|
@ -1,16 +0,0 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,77,101,252.0,6240.0,0.04038461538461539,0.02087051,8.691e-6,3.405098066,0.244763721,1.565749515,0.936213163
|
||||
1,76,99,246.0,6240.0,0.03942307692307692,0.020658734,9.36e-6,3.244313848,0.230460257,1.548012602,0.887605389
|
||||
2,75,97,240.0,6240.0,0.038461538461538464,0.045333482,8.74e-6,3.163679857,0.217614064,1.52780456,0.816496837
|
||||
3,74,95,234.0,6240.0,0.0375,0.020314034,9.081e-6,2.956421016,0.183415997,1.524262179,0.793770075
|
||||
4,73,93,228.0,6240.0,0.03653846153846154,0.033579409,8.52e-6,2.845414866,0.19168374,1.50907807,0.742734411
|
||||
5,72,92,228.0,6144.0,0.037109375,0.019736718,8.87e-6,2.827109937,0.207452606,1.497203204,0.719774022
|
||||
6,71,90,222.0,6144.0,0.0361328125,0.043612693,1.01e-5,2.62776692,0.166492497,1.602060948,0.668929854
|
||||
7,70,89,222.0,6048.0,0.03670634920634921,0.042731148,1.053e-5,2.631288029,0.185812224,1.514154792,0.694503947
|
||||
8,69,87,216.0,6048.0,0.03571428571428571,0.042148711,8.19e-6,2.493343257,0.183595081,1.506478504,0.652420896
|
||||
9,68,86,216.0,5952.0,0.036290322580645164,0.041568955,8.571e-6,2.487317627,0.147773078,1.472141844,0.653143947
|
||||
10,67,85,216.0,5856.0,0.036885245901639344,0.041307868,9.13e-6,2.491634709,0.175728138,1.482162906,0.63058774
|
||||
11,66,84,216.0,5760.0,0.0375,0.041265756,8.43e-6,2.516916643,0.180420842,1.463053866,0.650627815
|
||||
12,65,83,205.0,5760.0,0.035590277777777776,0.039711293,9.22e-6,2.479664249,0.178013433,1.459566956,0.652477867
|
||||
13,64,82,205.0,5664.0,0.03619350282485876,0.030866093,8.87e-6,2.485424881,0.179983608,1.564961227,0.647932468
|
||||
14,63,81,205.0,5568.0,0.03681752873563218,0.029946916,8.93e-6,2.469922022,0.179443854,1.485935831,0.651804318
|
|
@ -1,176 +0,0 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,356,493,1399.0,30528.0,0.0458267819706499,0.077070556,2.6761e-5,17.804336617,0.960385595,10.618577031,4.95440474
|
||||
1,354,491,1399.0,30432.0,0.04597134595162986,1.030851104,2.37e-5,17.726472964,0.933074463,2.174912444,4.959474851
|
||||
2,352,489,1399.0,30336.0,0.04611682489451477,0.376282553,2.3861e-5,17.935912907,0.968087391,2.238665483,4.912705328
|
||||
3,350,487,1399.0,30240.0,0.04626322751322751,0.076651194,4.2451e-5,17.976779783,0.977130996,2.246167674,4.954520005
|
||||
4,348,485,1399.0,30144.0,0.04641056263269639,0.223709216,2.8031e-5,17.67129111,0.97799748,2.175788856,4.923999491
|
||||
5,346,483,1399.0,30048.0,0.04655883919062833,0.076034997,4.3191e-5,17.766336956,0.967055891,2.187609178,4.922574669
|
||||
6,344,481,1399.0,29952.0,0.04670806623931624,0.398917781,4.3422e-5,17.709032771,0.971142926,2.170963978,4.917191185
|
||||
7,342,479,1399.0,29856.0,0.04685825294748124,0.352569343,4.3801e-5,17.690255833,0.952966242,2.159295978,4.945842152
|
||||
8,340,477,1399.0,29760.0,0.04700940860215054,0.117620751,4.2992e-5,17.905787431,0.749896479,2.19940915,4.922882222
|
||||
9,338,475,1399.0,29664.0,0.04716154261057174,0.318053898,2.3481e-5,17.522775542,0.745113955,2.202366151,4.928734427
|
||||
10,336,473,1399.0,29568.0,0.047314664502164504,0.184069985,2.3381e-5,17.529935879,0.74637911,2.238397648,4.919919125
|
||||
11,334,471,1399.0,29472.0,0.047468783930510315,0.086029218,2.365e-5,17.560859257,0.75559668,2.249242933,4.956561058
|
||||
12,332,469,1399.0,29376.0,0.04762391067538126,0.077326472,2.4361e-5,17.559317648,0.746726769,2.1818156,4.938490196
|
||||
13,330,467,1399.0,29280.0,0.047780054644808743,0.169738661,2.342e-5,17.517109121,0.751453942,2.187781478,4.923659727
|
||||
14,328,465,1399.0,29184.0,0.047937225877192985,0.077817676,2.315e-5,17.533304215,0.745481303,2.209343496,4.960503415
|
||||
15,326,463,1399.0,29088.0,0.04809543454345434,0.171584444,2.352e-5,17.579912576,0.754778436,2.210370024,4.934281254
|
||||
16,324,461,1399.0,28992.0,0.04825469094922737,0.084223667,2.305e-5,17.570464754,0.751290178,2.22797709,4.939806799
|
||||
17,322,459,1399.0,28896.0,0.04841500553709856,0.123005102,2.3661e-5,17.605650973,0.756929676,2.269940175,4.937928844
|
||||
18,320,457,1399.0,28800.0,0.04857638888888889,0.086677986,2.37e-5,17.5539199,0.746367967,2.264938904,4.959258096
|
||||
19,318,455,1399.0,28704.0,0.04873885172798216,0.12293158,2.3711e-5,17.609395222,0.755783994,2.264754078,4.92827168
|
||||
20,316,453,1399.0,28608.0,0.04890240492170023,0.124475123,2.4281e-5,17.597716228,0.75106304,2.20218749,4.933120236
|
||||
21,314,451,1399.0,28512.0,0.04906705948372615,0.112172177,2.6391e-5,17.623178954,0.755694751,2.186417905,4.921509117
|
||||
22,312,449,1399.0,28416.0,0.04923282657657658,0.219362642,2.321e-5,17.593459902,0.747914841,2.168628993,4.952994795
|
||||
23,310,447,1399.0,28320.0,0.049399717514124294,0.080729209,2.358e-5,17.571675834,0.755489634,2.209531477,4.951190234
|
||||
24,308,445,1399.0,28224.0,0.049567743764172334,0.080235835,2.3271e-5,17.615791747,0.750314688,2.21464245,4.949496195
|
||||
25,306,443,1399.0,28128.0,0.049736916951080776,0.124106403,2.374e-5,17.60716179,0.753826187,2.186184237,4.920128786
|
||||
26,304,441,1399.0,28032.0,0.04990724885844749,0.080715608,2.3781e-5,17.581988477,0.750266997,2.209826064,4.937813884
|
||||
27,302,439,1399.0,27936.0,0.05007875143184422,0.080606465,2.4071e-5,17.633096607,0.749125265,2.198599437,4.935320693
|
||||
28,300,437,1399.0,27840.0,0.0502514367816092,0.081056137,2.3781e-5,17.564695624,0.746230293,2.225110355,4.939656214
|
||||
29,298,435,1399.0,27744.0,0.05042531718569781,0.096545225,2.379e-5,17.58144781,0.747458632,2.263551336,4.924245431
|
||||
30,296,433,1399.0,27648.0,0.050600405092592594,0.120638697,2.383e-5,17.574370836,0.748933285,2.234417803,4.915183371
|
||||
31,294,431,1399.0,27552.0,0.0507767131242741,0.125073582,2.393e-5,17.627352699,0.754384428,2.214199106,4.938130459
|
||||
32,292,429,1399.0,27456.0,0.05095425407925408,0.12314953,2.468e-5,17.697160429,0.796488763,2.261473826,4.956976138
|
||||
33,290,427,1399.0,27360.0,0.051133040935672516,0.125481487,2.354e-5,17.636971006,0.748416796,2.222200724,4.948970096
|
||||
34,288,425,1399.0,27264.0,0.051313086854460094,0.094052012,2.4301e-5,17.62971842,0.805139938,2.205015347,4.959455536
|
||||
35,286,423,1399.0,27168.0,0.051494405182567725,0.08136377,2.4041e-5,17.621304482,0.747718686,2.244362062,4.941432169
|
||||
36,284,421,1399.0,27072.0,0.05167700945626478,0.080217839,2.3921e-5,17.61427713,0.747754586,2.212103901,4.933185029
|
||||
37,282,417,1399.0,26976.0,0.051860913404507714,0.126372199,2.376e-5,17.601417663,0.750036789,2.163344775,4.926698186
|
||||
38,280,414,1399.0,26880.0,0.052046130952380955,0.125444544,2.476e-5,17.612452443,0.748155225,2.195259021,4.91594575
|
||||
39,278,412,1399.0,26784.0,0.05223267622461171,0.083158944,2.4551e-5,17.599589645,0.741671021,2.208064301,4.9351555
|
||||
40,276,410,1399.0,26688.0,0.05242056354916067,0.083321959,2.4101e-5,17.567124159,0.748238012,2.197233222,4.954754226
|
||||
41,274,408,1399.0,26592.0,0.052609807460890494,0.084803792,2.3901e-5,17.549365204,0.754817994,2.229499405,4.94957165
|
||||
42,272,405,1399.0,26496.0,0.05280042270531401,0.127648261,2.3851e-5,17.582852416,0.750759497,2.230398721,4.937220319
|
||||
43,270,401,1399.0,26400.0,0.052992424242424244,0.128445184,2.428e-5,17.596647819,0.75777713,2.160922996,4.937371146
|
||||
44,268,399,1399.0,26304.0,0.053185827250608275,0.129526096,2.5081e-5,17.594476326,0.746906342,2.219401891,4.93357998
|
||||
45,266,397,1399.0,26208.0,0.05338064713064713,0.129819495,2.4731e-5,17.568331366,0.750368555,2.18948505,4.922275732
|
||||
46,264,394,1399.0,26112.0,0.05357689950980392,0.087649075,2.462e-5,17.585414218,0.751605626,2.198684054,4.941424565
|
||||
47,262,391,1399.0,26016.0,0.05377460024600246,0.089110637,2.4551e-5,17.614139291,0.750622403,2.168793662,4.953321773
|
||||
48,260,389,1399.0,25920.0,0.053973765432098766,0.090307061,2.45e-5,17.633806293,0.749096576,2.224521298,4.930813246
|
||||
49,258,387,1399.0,25824.0,0.054174411400247834,0.133480181,2.461e-5,17.634768586,0.756613261,2.201452177,4.972809945
|
||||
50,256,385,1399.0,25728.0,0.05437655472636816,0.134254424,2.425e-5,17.606323938,0.748779206,2.216818872,4.939295094
|
||||
51,254,382,1399.0,25632.0,0.05458021223470662,0.134016868,2.4531e-5,17.5926305,0.75625873,2.227679889,4.968213894
|
||||
52,252,379,1399.0,25536.0,0.054785401002506263,0.135650945,2.4601e-5,17.642803637,0.751975585,2.226011125,4.9285844
|
||||
53,250,375,1399.0,25440.0,0.054992138364779876,0.136647933,2.4161e-5,17.799738254,0.76667472,2.165144989,4.930427128
|
||||
54,248,373,1399.0,25344.0,0.05520044191919192,0.123103164,2.4461e-5,17.745879754,0.760526742,2.161495227,4.940492285
|
||||
55,246,370,1399.0,25248.0,0.05541032953105197,0.09476826,2.3511e-5,17.596131758,0.756924114,2.180021837,4.954121771
|
||||
56,244,365,1399.0,25152.0,0.05562181933842239,0.095345787,2.4171e-5,17.612023424,0.747989147,2.215139082,4.945396527
|
||||
57,242,362,1399.0,25056.0,0.05583492975734355,0.139570128,2.3801e-5,17.630922372,0.750668446,2.186529739,4.961981394
|
||||
58,240,359,1399.0,24960.0,0.05604967948717949,0.097466916,2.4451e-5,17.61078772,0.7485922,2.217673752,4.95291513
|
||||
59,238,357,1399.0,24864.0,0.05626608751608752,0.138599302,2.3601e-5,17.586404505,0.756929027,2.233374301,4.935342135
|
||||
60,236,352,1399.0,24768.0,0.05648417312661499,0.147210964,2.4911e-5,17.650436019,0.74908103,2.157077946,4.937714591
|
||||
61,234,350,1399.0,24672.0,0.05670395590142672,0.099491094,2.3601e-5,17.608002511,0.756924473,2.165309665,4.932434479
|
||||
62,232,348,1399.0,24576.0,0.056925455729166664,0.141929827,2.454e-5,17.605756917,0.749178717,2.234082435,4.957629943
|
||||
63,230,344,1399.0,24480.0,0.057148692810457515,0.142483983,2.4211e-5,17.623883273,0.758216784,2.210078838,4.930940098
|
||||
64,228,341,1399.0,24384.0,0.057373687664041995,0.101524943,2.4371e-5,17.662312587,0.751128917,2.22449657,4.96708528
|
||||
65,226,339,1399.0,24288.0,0.05760046113306983,0.102619253,2.3831e-5,17.610112922,0.758167777,2.187456785,4.957519684
|
||||
66,224,337,1399.0,24192.0,0.05782903439153439,0.10351088,2.3401e-5,17.611932402,0.749178457,2.236980212,4.933450322
|
||||
67,222,335,1399.0,24096.0,0.05805942895086321,0.148780402,2.3711e-5,17.636035095,0.75707833,2.252138664,4.951632995
|
||||
68,220,333,1399.0,24000.0,0.058291666666666665,0.148311059,2.4851e-5,17.617252052,0.750104986,2.22330739,4.9243139
|
||||
69,218,329,1399.0,23904.0,0.05852576974564926,0.151678794,2.4181e-5,17.627742278,0.755299894,2.248062201,4.951401482
|
||||
70,216,326,1399.0,23808.0,0.05876176075268817,0.15082361,2.3851e-5,17.647410652,0.752445605,2.240948426,4.949599133
|
||||
71,214,323,1399.0,23712.0,0.05899966261808367,0.153382492,2.4011e-5,17.654743596,0.752802907,2.253819342,4.966250371
|
||||
72,212,320,1399.0,23616.0,0.05923949864498645,0.151516131,2.3931e-5,17.672908543,0.750257716,2.220003155,4.944782327
|
||||
73,210,317,1399.0,23520.0,0.059481292517006804,0.154244628,2.386e-5,17.60330678,0.750422813,2.211295295,4.943727837
|
||||
74,208,313,1399.0,23424.0,0.05972506830601093,0.153767234,2.4291e-5,17.640950842,0.74988433,2.24794966,4.952712228
|
||||
75,206,311,1399.0,23328.0,0.05997085048010974,0.155927375,2.406e-5,17.589128666,0.749120129,2.253801308,4.953014816
|
||||
76,204,306,1399.0,23232.0,0.06021866391184573,0.15464184,2.4521e-5,17.662616581,0.750484429,2.227511412,4.924026259
|
||||
77,202,304,1399.0,23136.0,0.06046853388658368,0.157807248,2.4041e-5,17.611953814,0.755679546,2.178734374,4.943974526
|
||||
78,200,301,1399.0,23040.0,0.06072048611111111,0.155978707,2.4051e-5,17.624250437,0.794935481,2.247188963,4.940403894
|
||||
79,198,298,1399.0,22944.0,0.06097454672245467,0.158377905,2.5091e-5,17.634938402,0.754743461,2.245248812,4.919902064
|
||||
80,196,296,1399.0,22848.0,0.061230742296918765,0.158750786,2.4511e-5,17.6360904,0.750867213,2.200032233,4.942215648
|
||||
81,194,293,1399.0,22752.0,0.061489099859353025,0.161152794,2.4831e-5,17.780761042,0.765338482,2.204873372,4.939655562
|
||||
82,192,290,1399.0,22656.0,0.061749646892655365,0.160175486,2.318e-5,17.798147683,0.76168194,2.230891056,4.955801153
|
||||
83,190,287,1399.0,22560.0,0.06201241134751773,0.159868767,2.4791e-5,17.764165058,0.796377137,2.239618185,4.928054627
|
||||
84,188,283,1399.0,22464.0,0.06227742165242165,0.160933577,2.4221e-5,17.798426962,0.848255338,2.218112612,4.932433146
|
||||
85,186,280,1399.0,22368.0,0.06254470672389127,0.163393917,2.4371e-5,17.808464853,0.765692696,2.213490844,4.943298137
|
||||
86,184,277,1399.0,22272.0,0.06281429597701149,0.163792118,2.4261e-5,17.805783627,0.761027705,2.232891092,4.919454211
|
||||
87,182,275,1399.0,22176.0,0.06308621933621934,0.162177953,2.43e-5,17.797665375,0.761040026,2.236586089,4.951072155
|
||||
88,180,271,1399.0,22080.0,0.06336050724637682,0.165377424,2.557e-5,17.805099359,0.763146286,2.212611436,4.921150887
|
||||
89,178,268,1399.0,21984.0,0.06363719068413391,0.166754373,2.5141e-5,17.770997205,0.764361801,2.199943181,4.934748884
|
||||
90,176,266,1399.0,21888.0,0.06391630116959064,0.167241957,2.4571e-5,17.770223198,0.759580227,2.247867501,4.935730147
|
||||
91,174,264,1399.0,21792.0,0.06419787077826726,0.169623073,2.5e-5,17.771153368,0.750276145,2.243455929,4.939933808
|
||||
92,172,261,1399.0,21696.0,0.06448193215339233,0.168358288,2.5181e-5,17.799224982,0.760906435,2.210000929,4.943923374
|
||||
93,170,259,1399.0,21600.0,0.06476851851851852,0.170287483,2.529e-5,17.79271252,0.763151029,2.205444892,4.924953813
|
||||
94,168,254,1399.0,21504.0,0.06505766369047619,0.168986856,2.5021e-5,17.775583682,0.760237647,2.222811993,4.951301097
|
||||
95,166,250,1399.0,21408.0,0.06534940209267563,0.171662521,2.4401e-5,17.636022254,0.749599438,2.234944605,4.958431762
|
||||
96,164,246,1399.0,21312.0,0.06564376876876876,0.170911431,2.4481e-5,17.633556045,0.788097892,2.198060879,4.922871993
|
||||
97,162,244,1399.0,21216.0,0.06594079939668175,0.172387252,2.4781e-5,17.620254381,0.799269067,2.202436673,4.936411908
|
||||
98,160,241,1399.0,21120.0,0.0662405303030303,0.171830017,2.581e-5,17.656653806,0.750275098,2.200933622,4.94776375
|
||||
99,158,238,1399.0,21024.0,0.06654299847792998,0.174560093,2.447e-5,17.625724723,0.756745741,2.249721096,4.958786002
|
||||
100,156,235,1399.0,20928.0,0.06684824159021406,0.178996759,2.453e-5,17.669194606,0.749422535,2.218089817,4.960858653
|
||||
101,154,231,1399.0,20832.0,0.0671562980030722,0.175032127,2.3871e-5,17.642586975,0.754643863,2.194675279,4.944134534
|
||||
102,152,229,1399.0,20736.0,0.06746720679012345,0.176393906,2.4731e-5,17.592973556,0.749943551,2.229565622,4.927935661
|
||||
103,150,225,1399.0,20640.0,0.06778100775193799,0.178017631,2.412e-5,17.630568322,0.755272802,2.221125776,4.952348991
|
||||
104,148,223,1399.0,20544.0,0.0680977414330218,0.175897841,2.36e-5,17.661766307,0.749293633,2.2201698,4.963634779
|
||||
105,146,221,1399.0,20448.0,0.06841744913928012,0.178367362,2.5001e-5,17.654508999,0.755361234,2.185187066,4.938710949
|
||||
106,144,218,1399.0,20352.0,0.06874017295597484,0.178791594,2.502e-5,17.649520916,0.749748217,2.238645461,4.955141284
|
||||
107,142,216,1399.0,20256.0,0.06906595576619273,0.175900502,2.3291e-5,17.648252045,0.755157659,2.250102545,4.948078116
|
||||
108,140,212,1399.0,20160.0,0.06939484126984127,0.180050739,2.3901e-5,17.642556024,0.751139061,2.195233955,4.92102672
|
||||
109,138,210,1399.0,20064.0,0.06972687400318979,0.182587052,2.492e-5,17.631301401,0.754040144,2.177296385,4.948297571
|
||||
110,136,207,1399.0,19968.0,0.07006209935897435,0.181449712,2.4401e-5,17.618787463,0.748940439,2.251932822,4.950366155
|
||||
111,134,203,1399.0,19872.0,0.07040056360708534,0.183466877,2.407e-5,17.658532693,0.756589176,2.240568188,4.97337861
|
||||
112,132,201,1399.0,19776.0,0.0707423139158576,0.181545084,2.485e-5,17.63441504,0.751343023,2.183033772,4.975534251
|
||||
113,130,199,1399.0,19680.0,0.07108739837398374,0.177809314,2.417e-5,17.627163359,0.754577307,2.211080446,4.977438563
|
||||
114,128,195,1399.0,19584.0,0.07143586601307189,0.183038393,2.5541e-5,17.63366534,0.751510139,2.237832092,4.969644912
|
||||
115,126,191,1399.0,19488.0,0.07178776683087028,0.186344151,2.4971e-5,17.711808739,0.759177,2.236586017,4.951292022
|
||||
116,124,187,1399.0,19392.0,0.07214315181518152,0.184833587,2.475e-5,17.648467279,0.749564641,2.179772409,4.97017709
|
||||
117,122,183,1399.0,19296.0,0.07250207296849089,0.193249355,2.3811e-5,17.639230223,0.755564354,2.195109482,4.982434629
|
||||
118,120,180,1399.0,19200.0,0.07286458333333333,0.186818046,2.372e-5,17.635977046,0.750626058,2.243877912,4.972608068
|
||||
119,118,177,1399.0,19104.0,0.07323073701842546,0.189204719,2.4961e-5,17.791522288,0.766082656,2.242948358,4.980365418
|
||||
120,116,173,1399.0,19008.0,0.07360058922558922,0.186391669,2.4181e-5,17.645956891,0.750893368,2.197914806,4.98745469
|
||||
121,114,171,1399.0,18912.0,0.07397419627749577,0.19060573,2.4701e-5,17.771140583,0.765197694,2.20643796,4.959618561
|
||||
122,112,169,1399.0,18816.0,0.0743516156462585,0.188466188,2.381e-5,17.795228145,0.759434429,2.26208531,4.965068853
|
||||
123,110,165,1399.0,18720.0,0.07473290598290598,0.191524927,2.3841e-5,17.779734215,0.767242896,2.242967333,4.950554681
|
||||
124,108,161,1399.0,18624.0,0.07511812714776632,0.189450326,2.3601e-5,17.807849571,0.762371273,2.196711688,4.966122065
|
||||
125,106,157,1399.0,18528.0,0.0755073402417962,0.191473057,2.357e-5,17.632877767,0.755845465,2.188474891,4.977562868
|
||||
126,104,153,1399.0,18432.0,0.0759006076388889,0.191382079,2.3851e-5,17.775729988,0.758861116,2.278116886,4.979965119
|
||||
127,102,151,1399.0,18336.0,0.07629799301919721,0.192296369,2.394e-5,17.777918793,0.764981303,2.224818047,4.949944943
|
||||
128,100,149,1399.0,18240.0,0.07669956140350877,0.191424719,2.4331e-5,17.856475915,0.76057459,2.201588049,4.941974925
|
||||
129,98,146,1399.0,18144.0,0.07710537918871252,0.194280932,2.3951e-5,17.779963845,0.766401736,2.223182601,4.961465017
|
||||
130,96,142,1399.0,18048.0,0.07751551418439716,0.192850597,2.3861e-5,17.765033828,0.760509569,2.250897799,4.967399083
|
||||
131,94,138,1399.0,17952.0,0.07793003565062388,0.194741823,2.38e-5,17.778261696,0.764271609,2.248898068,4.975998565
|
||||
132,92,136,1399.0,17856.0,0.07834901433691756,0.193567295,2.5281e-5,17.791322862,0.759809249,2.216694812,4.962092553
|
||||
133,90,132,1399.0,17760.0,0.07877252252252252,0.196949912,2.4641e-5,17.775924767,0.766636532,2.192664527,4.943809886
|
||||
134,88,129,1399.0,17664.0,0.07920063405797101,0.19423328,2.4491e-5,17.775940481,0.759698903,2.241454301,4.965419114
|
||||
135,86,125,1399.0,17568.0,0.07963342440801457,0.196021362,2.4541e-5,17.749824568,0.77002309,2.244133161,4.973507276
|
||||
136,84,123,1399.0,17472.0,0.08007097069597069,0.195945063,2.4791e-5,17.793381264,0.758984676,2.223761942,4.967845004
|
||||
137,82,120,1399.0,17376.0,0.0805133517495396,0.196404909,2.5491e-5,17.781126567,0.76777764,2.208548873,4.942758101
|
||||
138,80,116,1399.0,17280.0,0.08096064814814814,0.197313346,2.469e-5,17.785944557,0.814271788,2.200296465,4.939179018
|
||||
139,78,114,1399.0,17184.0,0.08141294227188083,0.155633427,2.5181e-5,17.79491891,0.767423131,2.233213884,4.963944358
|
||||
140,76,111,1399.0,17088.0,0.08187031835205992,0.194686919,2.4311e-5,17.835512877,0.761171578,2.216772786,4.968370761
|
||||
141,74,108,1399.0,16992.0,0.0823328625235405,0.19895497,2.4301e-5,17.80769545,0.768202031,2.212642548,4.971369432
|
||||
142,72,106,1399.0,16896.0,0.08280066287878787,0.197589165,2.4241e-5,17.817799582,0.760097766,2.219367009,4.967751237
|
||||
143,70,102,1399.0,16800.0,0.08327380952380953,0.200103786,2.425e-5,17.804210307,0.767108387,2.264925155,4.965506236
|
||||
144,68,99,1399.0,16704.0,0.08375239463601533,0.196633322,2.5371e-5,17.822197608,0.762852947,2.20877412,4.971541033
|
||||
145,66,97,1399.0,16608.0,0.08423651252408478,0.200144552,2.4801e-5,17.823667792,0.766965999,2.209992675,4.969252216
|
||||
146,64,93,1399.0,16512.0,0.08472625968992248,0.199816644,2.4901e-5,17.838429006,0.764432365,2.241092809,4.961995819
|
||||
147,62,89,1399.0,16416.0,0.08522173489278752,0.187325579,2.5321e-5,17.811923957,0.767393244,2.227406228,4.960056608
|
||||
148,60,85,1399.0,16320.0,0.08572303921568628,0.198893612,2.4451e-5,17.82940565,0.760747136,2.209815727,4.971563658
|
||||
149,58,83,1399.0,16224.0,0.08623027613412229,0.201039293,2.4651e-5,17.817639935,0.767607352,2.210546374,4.97066195
|
||||
150,56,81,1399.0,16128.0,0.08674355158730158,0.199841932,2.414e-5,17.82203287,0.760048809,2.243550629,4.954439346
|
||||
151,54,79,1399.0,16032.0,0.0872629740518962,0.2011596,2.4741e-5,17.804574042,0.767800679,2.250206119,4.955980994
|
||||
152,52,75,1399.0,15936.0,0.08778865461847389,0.19971389,2.4331e-5,17.829821975,0.762018993,2.205143141,4.970086548
|
||||
153,50,73,1399.0,15840.0,0.08832070707070708,0.201368798,2.4881e-5,17.836101646,0.767371477,2.218711432,4.96364023
|
||||
154,48,71,1399.0,15744.0,0.08885924796747967,0.200798594,2.4491e-5,17.830384655,0.765407907,2.286796949,4.939295093
|
||||
155,46,67,1399.0,15648.0,0.08940439672801637,0.202551163,2.5121e-5,17.827221721,0.768466657,2.262575248,4.943430916
|
||||
156,44,65,1399.0,15552.0,0.08995627572016461,0.198816901,2.578e-5,17.840506569,0.760760306,2.220630133,4.952844324
|
||||
157,42,63,1399.0,15456.0,0.09051501035196688,0.201424744,2.5021e-5,17.814439397,0.767553139,2.196934945,4.958506547
|
||||
158,40,59,1399.0,15360.0,0.09108072916666667,0.202145126,2.565e-5,17.808712307,0.76137146,2.235801178,4.949559042
|
||||
159,38,55,1399.0,15264.0,0.0916535639412998,0.201663393,2.4591e-5,17.784477195,0.766209648,2.249329555,4.964028527
|
||||
160,36,53,1399.0,15168.0,0.09223364978902954,0.199579456,2.5461e-5,17.900752023,0.761934363,2.209582978,4.950507063
|
||||
161,34,48,1399.0,15072.0,0.09282112526539278,0.159541692,2.5211e-5,17.769415534,0.935609132,2.216664395,4.962977201
|
||||
162,32,44,1399.0,14976.0,0.09341613247863248,0.201979445,2.5581e-5,17.802148727,0.758630938,2.257162782,4.954367291
|
||||
163,30,40,1399.0,14880.0,0.09401881720430108,0.203381244,2.5411e-5,17.808584074,0.768160516,2.239967841,4.949515694
|
||||
164,28,35,1399.0,14784.0,0.09462932900432901,0.200707381,2.5071e-5,17.811958674,0.765546396,2.222827481,4.962523474
|
||||
165,26,31,1399.0,14688.0,0.09524782135076253,0.203476579,2.4431e-5,17.791537057,0.759747517,2.210172596,4.96717851
|
||||
166,24,29,1399.0,14592.0,0.09587445175438597,0.38619058,2.5161e-5,17.784565893,0.765981903,2.205094732,4.970469758
|
||||
167,22,25,1399.0,14496.0,0.09650938189845475,0.209174268,2.6071e-5,17.886396985,0.762283972,2.251379768,4.9348063
|
||||
168,20,21,1399.0,14400.0,0.09715277777777778,0.184182012,2.5331e-5,17.791795342,0.760972528,2.229551257,4.941190792
|
||||
169,18,17,1399.0,14304.0,0.09780480984340045,0.203935864,2.572e-5,17.823665061,0.762353868,2.199132836,4.965200905
|
||||
170,16,15,1399.0,14208.0,0.09846565315315316,0.200164969,2.4631e-5,17.792385586,0.76804392,2.174965407,4.972074439
|
||||
171,14,13,1399.0,14112.0,0.09913548752834467,0.204567903,2.5071e-5,17.806154396,0.759505453,2.2340466,4.972671228
|
||||
172,12,11,1399.0,14016.0,0.09981449771689498,0.201861418,2.5971e-5,18.529840195,0.789347616,2.23167521,4.947890089
|
||||
173,10,9,1399.0,13920.0,0.1005028735632184,0.202902727,2.4951e-5,17.865867105,0.761004999,2.194876208,4.93177029
|
||||
174,8,7,1399.0,13824.0,0.10120081018518519,0.198079003,2.4651e-5,17.791197743,0.767399089,2.226370372,4.951979965
|
|
@ -1,82 +0,0 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,356,493,1399.0,30528.0,0.0458267819706499,0.084389903,2.4971e-5,17.802549835,0.960409581,2.406448706,4.927079076
|
||||
1,351,483,1369.0,30528.0,0.044844077568134175,0.126855933,2.9211e-5,16.868735557,0.927387188,2.257632484,4.697683068
|
||||
2,346,478,1369.0,30048.0,0.04556043663471779,0.08319682,3.5431e-5,16.871399152,0.834869326,2.264361993,4.701280771
|
||||
3,341,473,1314.0,30048.0,0.04373003194888179,0.124422234,2.392e-5,16.454231193,0.856669072,2.271991539,4.68580348
|
||||
4,336,463,1284.0,30048.0,0.042731629392971246,0.121696991,2.2921e-5,15.881542683,0.816430136,2.213686135,4.449106524
|
||||
5,331,458,1284.0,29568.0,0.04342532467532467,0.124024888,2.314e-5,15.879200155,0.799333453,2.194093083,4.435654931
|
||||
6,326,448,1254.0,29568.0,0.04241071428571429,0.121610951,2.2e-5,15.325702423,0.833341953,2.203843882,4.199677306
|
||||
7,321,438,1224.0,29568.0,0.041396103896103896,0.118972208,2.1631e-5,14.367273685,0.711553932,2.16189756,3.948872646
|
||||
8,316,433,1224.0,29088.0,0.04207920792079208,0.074826839,2.2031e-5,14.367107152,0.792981221,2.169096496,3.961630969
|
||||
9,311,428,1169.0,29088.0,0.04018839383938394,0.116237162,2.15e-5,14.416973472,0.788583102,2.092186151,3.946339564
|
||||
10,306,418,1139.0,29088.0,0.03915704070407041,0.114647398,2.031e-5,13.671420757,0.745657392,2.037551329,3.657411205
|
||||
11,301,408,1109.0,29088.0,0.03812568756875687,0.11434652,1.951e-5,13.093103664,0.686554396,2.065489584,3.441139671
|
||||
12,296,403,1109.0,28608.0,0.03876538031319911,0.112282663,1.8991e-5,13.11525848,0.705183633,2.0639299,3.422598036
|
||||
13,291,398,1109.0,28128.0,0.039426905574516495,0.111549203,1.9661e-5,13.08100601,0.700772882,2.065935946,3.41679234
|
||||
14,286,388,1079.0,28128.0,0.0383603526734926,0.109881396,1.907e-5,11.871746271,0.665244638,2.063828106,3.187580585
|
||||
15,281,378,1049.0,28128.0,0.037293799772468716,0.108444747,1.7961e-5,10.963517612,0.62180291,2.037926216,2.935137574
|
||||
16,276,373,1049.0,27648.0,0.03794126157407408,0.107959773,1.874e-5,11.021594456,0.541779823,2.003876106,2.931304737
|
||||
17,271,368,1049.0,27168.0,0.03861160188457008,0.105629068,1.8241e-5,11.017450178,0.581974375,2.017201027,2.952118903
|
||||
18,266,363,1049.0,26688.0,0.0393060551558753,0.107303406,1.8301e-5,11.028597789,0.556078309,2.037535226,2.911405619
|
||||
19,261,358,994.0,26688.0,0.03724520383693045,0.106584986,1.7111e-5,10.789192026,0.525275525,2.011931363,2.931360979
|
||||
20,256,353,939.0,26688.0,0.035184352517985615,0.105743463,1.7521e-5,10.50283261,0.535253087,1.962456949,2.941274646
|
||||
21,255,351,933.0,26688.0,0.03495953237410072,0.105189187,1.7471e-5,10.739591259,0.555102576,2.013201521,2.896175037
|
||||
22,254,350,933.0,26592.0,0.035085740072202165,0.105895137,1.6631e-5,10.68514711,0.571809578,1.974934611,2.890503396
|
||||
23,253,348,927.0,26592.0,0.0348601083032491,0.104181459,1.817e-5,10.344271645,0.572483889,2.002875753,2.842241926
|
||||
24,252,347,927.0,26496.0,0.034986413043478264,0.103568232,1.7471e-5,10.363216025,0.602207417,1.943794016,2.811132729
|
||||
25,247,342,927.0,26016.0,0.035631918819188195,0.102006829,1.669e-5,10.360319761,0.588967585,1.942523675,2.838431844
|
||||
26,246,340,921.0,26016.0,0.03540129151291513,0.103244544,1.672e-5,10.140255758,0.565172778,1.980058606,2.776594151
|
||||
27,245,339,921.0,25920.0,0.03553240740740741,0.102991317,1.723e-5,10.166352736,0.588556746,2.025713505,2.754827976
|
||||
28,244,337,915.0,25920.0,0.03530092592592592,0.102527335,1.6261e-5,9.965044496,0.527648944,1.966870364,2.708992883
|
||||
29,243,335,909.0,25920.0,0.035069444444444445,0.101020632,1.6541e-5,9.899918186,0.530837495,1.99964346,2.686936268
|
||||
30,242,334,909.0,25824.0,0.03519981412639405,0.099846559,1.614e-5,9.924451078,0.532149983,1.992832633,2.667590089
|
||||
31,241,333,909.0,25728.0,0.035331156716417914,0.103293156,1.634e-5,9.893503718,0.500188044,1.971455575,2.661440862
|
||||
32,236,328,909.0,25248.0,0.036002851711026615,0.110948742,1.5851e-5,9.916889596,0.515528547,2.014256204,2.691654688
|
||||
33,235,326,903.0,25248.0,0.03576520912547528,0.099799239,1.658e-5,9.667648582,0.561210643,1.981308261,2.647665444
|
||||
34,234,324,897.0,25248.0,0.035527566539923956,0.099455409,1.6561e-5,9.588166052,0.544847505,1.932560182,2.56349283
|
||||
35,233,323,897.0,25152.0,0.035663167938931296,0.103335368,1.6271e-5,9.590387462,0.542413718,1.965145602,2.559435691
|
||||
36,232,321,891.0,25152.0,0.03542461832061069,0.097770562,1.6571e-5,9.362808632,0.543288523,2.017894491,2.498672404
|
||||
37,231,320,891.0,25056.0,0.03556034482758621,0.100428616,1.5941e-5,9.340302395,0.548822639,1.994799194,2.525394
|
||||
38,230,319,891.0,24960.0,0.03569711538461538,0.056667955,1.5341e-5,9.356871677,0.537041949,1.921246656,2.507595034
|
||||
39,225,314,891.0,24480.0,0.036397058823529414,0.099323026,1.636e-5,9.383625024,0.506403697,1.972101141,2.529248938
|
||||
40,220,309,836.0,24480.0,0.03415032679738562,0.096789665,1.645e-5,9.524601658,0.473707387,1.980933173,2.524768525
|
||||
41,215,304,836.0,24000.0,0.034833333333333334,0.053463925,1.671e-5,9.520567128,0.487585179,1.942542795,2.535491481
|
||||
42,214,302,830.0,24000.0,0.034583333333333334,0.096303802,1.6011e-5,9.137262758,0.4297148,1.950560163,2.478408276
|
||||
43,213,301,830.0,23904.0,0.034722222222222224,0.070596338,1.6901e-5,9.143790565,0.492842898,1.949332161,2.476752284
|
||||
44,212,299,824.0,23904.0,0.034471218206157964,0.09696925,1.612e-5,9.089211511,0.456930617,2.022026121,2.419473874
|
||||
45,211,297,818.0,23904.0,0.03422021419009371,0.052526649,1.536e-5,8.807671694,0.471203239,1.970488502,2.372441242
|
||||
46,210,296,818.0,23808.0,0.03435819892473118,0.096716114,1.5701e-5,8.806210783,0.451452844,1.960073481,2.387451098
|
||||
47,209,295,818.0,23712.0,0.034497300944669365,0.05145174,1.6061e-5,8.867215342,0.450895098,1.968012818,2.394204111
|
||||
48,204,290,818.0,23232.0,0.03521005509641873,0.093248236,1.9521e-5,8.844517253,0.476030278,1.963827031,2.389413849
|
||||
49,203,288,812.0,23232.0,0.034951790633608815,0.093881584,1.527e-5,8.849095772,0.446415074,1.974782212,2.332439097
|
||||
50,202,287,812.0,23136.0,0.03509681881051176,0.050473481,1.5851e-5,8.784636116,0.469233287,1.953068913,2.321316886
|
||||
51,201,285,806.0,23136.0,0.034837482710926695,0.092750242,1.5541e-5,8.632088328,0.491467054,1.945455141,2.29300329
|
||||
52,200,284,806.0,23040.0,0.03498263888888889,0.092540087,1.7161e-5,8.637677414,0.471865872,1.975464118,2.259260411
|
||||
53,199,282,800.0,23040.0,0.034722222222222224,0.092944049,1.5261e-5,8.624992966,0.478249573,1.931707577,2.232058939
|
||||
54,198,281,800.0,22944.0,0.03486750348675035,0.091660013,1.575e-5,8.680034605,0.429976994,2.022314921,2.224544849
|
||||
55,197,279,794.0,22944.0,0.03460599721059972,0.092591389,1.582e-5,8.266084761,0.442472956,1.949268775,2.165130527
|
||||
56,196,278,794.0,22848.0,0.03475140056022409,0.090376966,1.529e-5,8.26930839,0.438461132,1.960119483,2.169387658
|
||||
57,191,273,739.0,22848.0,0.03234418767507003,0.090398736,1.589e-5,8.061516101,0.468233752,1.825342557,2.144808638
|
||||
58,186,268,739.0,22368.0,0.03303826895565093,0.090566151,1.5781e-5,8.051685873,0.472555774,1.827021946,2.175475243
|
||||
59,185,266,733.0,22368.0,0.03277002861230329,0.046301524,1.4931e-5,7.809555195,0.466519375,1.819191936,2.095906173
|
||||
60,184,264,727.0,22368.0,0.03250178826895565,0.087977349,1.4771e-5,7.825535183,0.452072238,1.820734702,2.06485156
|
||||
61,183,263,727.0,22272.0,0.032641882183908046,0.08908488,1.4591e-5,7.77560322,0.445728609,1.804235078,2.06763398
|
||||
62,182,262,727.0,22176.0,0.03278318903318903,0.076517376,1.461e-5,7.754359737,0.421063625,1.812681957,2.076417548
|
||||
63,181,260,721.0,22176.0,0.032512626262626264,0.088983767,1.4091e-5,7.616158878,0.422402602,1.868182992,2.016601005
|
||||
64,180,259,721.0,22080.0,0.03265398550724638,0.089172453,1.467e-5,7.63910266,0.402654247,1.844390793,2.031385412
|
||||
65,175,254,666.0,22080.0,0.03016304347826087,0.091971222,1.3851e-5,7.35822511,0.443635961,1.719023302,2.007792679
|
||||
66,170,249,666.0,21600.0,0.030833333333333334,0.073480651,1.3871e-5,7.291999508,0.434965958,1.750073777,1.999358953
|
||||
67,169,247,660.0,21600.0,0.030555555555555555,0.085309774,1.7211e-5,7.245192983,0.412650069,1.744681817,1.962798523
|
||||
68,168,245,654.0,21600.0,0.03027777777777778,0.089043539,1.367e-5,7.024436477,0.421292773,1.722710908,1.890918459
|
||||
69,167,243,648.0,21600.0,0.03,0.084353527,1.428e-5,6.8832018,0.415786727,1.715216258,1.830282141
|
||||
70,166,242,648.0,21504.0,0.030133928571428572,0.084367977,1.3441e-5,6.899982477,0.419080281,1.707637056,1.843529005
|
||||
71,165,241,648.0,21408.0,0.030269058295964126,0.085701815,1.4031e-5,6.936174291,0.377346024,1.704252961,1.85218872
|
||||
72,164,240,648.0,21312.0,0.030405405405405407,0.083910355,1.3601e-5,6.9051589,0.389477478,1.75740328,1.867258596
|
||||
73,159,235,593.0,21312.0,0.0278246996996997,0.082135195,1.3351e-5,7.031037571,0.356084586,1.631072,1.797434919
|
||||
74,154,230,593.0,20832.0,0.028465821812596007,0.080356395,1.358e-5,7.040766129,0.405151789,1.620631997,1.781269114
|
||||
75,153,228,587.0,20832.0,0.02817780337941628,0.066967517,1.3391e-5,6.644186555,0.395240289,1.641155866,1.743666486
|
||||
76,152,226,581.0,20832.0,0.02788978494623656,0.080763676,1.298e-5,6.633937959,0.388869331,1.630064054,1.701302723
|
||||
77,151,225,581.0,20736.0,0.028018904320987654,0.080671833,1.2781e-5,6.622133299,0.392564435,1.625932508,1.711411428
|
||||
78,150,224,581.0,20640.0,0.02814922480620155,0.080368195,1.358e-5,6.599986437,0.397419271,1.657700695,1.694756709
|
||||
79,149,222,575.0,20640.0,0.027858527131782947,0.080015475,1.298e-5,6.281191715,0.37819019,1.622522233,1.656839741
|
||||
80,148,221,575.0,20544.0,0.027988707165109036,0.065331671,1.334e-5,6.313635402,0.380955078,1.627111603,1.638795233
|
|
@ -1,79 +0,0 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,15866,21617,66249.0,1.314048e6,0.050415966540035065,6.468999136,0.001398329,8.478099553,0.43958521,0.0,0.0
|
||||
10,14676,19713,60656.0,1.279776e6,0.0473957942639962,5.993535435,0.000745961,7.192805963,0.417393835,0.0,0.0
|
||||
20,13774,18527,56334.0,1.243296e6,0.04531020770596865,5.489738392,0.000682889,6.652182167,0.336339503,0.0,0.0
|
||||
30,13352,17940,53276.0,1.236672e6,0.04308013765978368,5.169906767,0.000675318,6.370526843,0.313517861,0.0,0.0
|
||||
40,12714,17168,51163.0,1.199712e6,0.042646068389746876,4.845906388,0.000634457,6.124306725,0.311820244,0.0,0.0
|
||||
50,12004,16270,48473.0,1.163232e6,0.04167096503534978,4.433653313,0.000596017,5.760561483,0.320897852,0.0,0.0
|
||||
60,11750,15983,48022.0,1.144224e6,0.04196905501020779,4.316924709,0.000596237,5.738809149,0.283214404,0.0,0.0
|
||||
70,11538,15697,47325.0,1.133184e6,0.04176285581158929,4.201152631,0.000554855,5.438337093,0.313985744,0.0,0.0
|
||||
80,11434,15550,46814.0,1.129536e6,0.04144533684628024,4.216359254,0.000553545,5.429706297,0.268223845,0.0,0.0
|
||||
90,11066,15085,46232.0,1.10352e6,0.041895026823256486,3.924567625,0.000560535,5.412444055,0.274917428,0.0,0.0
|
||||
100,10848,14847,44297.0,1.100352e6,0.04025711772232885,3.848048388,0.000527955,5.127227854,0.294706757,0.0,0.0
|
||||
110,10462,14382,42261.0,1.084512e6,0.038967756926617685,3.674674179,0.000509054,4.922064369,0.276530272,0.0,0.0
|
||||
120,10304,14191,41810.0,1.07472e6,0.038903156170909635,3.58233155,0.000516074,5.02371138,0.266906519,0.0,0.0
|
||||
130,10200,14067,41437.0,1.068864e6,0.03876732680677804,3.529160319,0.000501634,4.863804478,0.24639169,0.0,0.0
|
||||
140,10042,13871,40956.0,1.059552e6,0.03865407266467337,3.346890818,0.000488403,4.753116119,0.254509861,0.0,0.0
|
||||
150,9956,13765,40583.0,1.055424e6,0.038451844945727974,3.41847396,0.000500654,4.756966153,0.255966291,0.0,0.0
|
||||
160,9906,13690,40433.0,1.053024e6,0.03839703558513386,3.405093274,0.000496774,4.812050085,0.24421971,0.0,0.0
|
||||
170,9838,13597,40283.0,1.048896e6,0.038405142168527674,3.348340057,0.000481363,4.669473296,0.234701411,0.0,0.0
|
||||
180,9242,12790,37708.0,1.02336e6,0.03684724828017511,3.063089187,0.000449352,4.335668832,0.228471471,0.0,0.0
|
||||
190,9120,12648,37082.0,1.017984e6,0.03642689865459575,2.994073054,0.000429002,4.181894908,0.224361729,0.0,0.0
|
||||
200,9052,12555,36932.0,1.013856e6,0.03642726383233911,3.046147594,0.000427282,4.151250123,0.212513705,0.0,0.0
|
||||
210,8912,12405,36366.0,1.005792e6,0.03615658108237091,2.937579863,0.000433982,4.261727394,0.214012817,0.0,0.0
|
||||
220,8808,12281,35993.0,999936.0,0.035995303699436765,2.892146284,0.000432382,4.198423468,0.219749812,0.0,0.0
|
||||
230,8626,12061,35765.0,986112.0,0.03626869970145379,2.752333211,0.000414672,4.035044142,0.241721263,0.0,0.0
|
||||
240,8426,11841,34336.0,980256.0,0.03502758463095355,2.714773746,0.000414522,4.036870861,0.235365769,0.0,0.0
|
||||
250,8118,11464,33416.0,961728.0,0.03474579090969588,2.579966689,0.000402461,3.870568035,0.20937257,0.0,0.0
|
||||
260,7942,11242,32634.0,953664.0,0.034219599355747934,2.520293442,0.000391581,3.72881432,0.191238985,0.0,0.0
|
||||
270,7838,11100,32153.0,949536.0,0.0338618019748593,2.456319106,0.000383211,3.635092003,0.187908484,0.0,0.0
|
||||
280,7716,10940,31672.0,943680.0,0.033562224482875554,2.402192681,0.00037687,3.594882506,0.194062713,0.0,0.0
|
||||
290,7576,10772,30745.0,939552.0,0.032723042471305475,2.338714319,0.00037334,3.556085038,0.194369971,0.0,0.0
|
||||
300,7376,10529,30487.0,924480.0,0.0329774575977847,2.279512925,0.00036552,3.504723807,0.191079171,0.0,0.0
|
||||
310,7218,10310,29868.0,917376.0,0.03255807869401423,2.207692656,0.000355539,3.30937664,0.181261073,0.0,0.0
|
||||
320,7078,10137,29417.0,909312.0,0.03235083227759009,2.147511905,0.000352659,3.30461376,0.18005858,0.0,0.0
|
||||
330,6860,9848,28991.0,895200.0,0.032384941912421805,2.078259266,0.00033941,3.211808988,0.172834084,0.0,0.0
|
||||
340,6702,9611,28264.0,889824.0,0.03176358470888625,2.069880378,0.000318959,3.033092324,0.154811992,0.0,0.0
|
||||
350,6616,9505,27891.0,885696.0,0.03149048883589855,2.005510172,0.000326369,3.008426711,0.173417779,0.0,0.0
|
||||
360,6512,9391,27325.0,881088.0,0.03101279327377061,1.968347618,0.000315789,2.921325386,0.168873786,0.0,0.0
|
||||
370,6426,9280,27175.0,875232.0,0.03104891046031224,1.92734893,0.000315548,2.990437001,0.181187901,0.0,0.0
|
||||
380,6358,9187,27025.0,871104.0,0.031023850194695467,1.889258172,0.000308689,2.846738111,0.181651873,0.0,0.0
|
||||
390,6272,9081,26652.0,866976.0,0.030741335400287898,1.840892272,0.000329279,2.825270586,0.177422669,0.0,0.0
|
||||
400,6204,8993,26532.0,862368.0,0.03076644773460982,1.820608708,0.000296329,2.759355249,0.175583708,0.0,0.0
|
||||
410,6118,8864,26274.0,858240.0,0.030613814317673377,1.783961229,0.000290708,2.707626007,0.172954176,0.0,0.0
|
||||
420,6014,8740,25901.0,852384.0,0.030386539400082593,1.774576254,0.000288998,2.694176581,0.173939173,0.0,0.0
|
||||
430,5928,8629,25498.0,848736.0,0.030042321758473777,1.7065974,0.000284277,2.675798329,0.170062674,0.0,0.0
|
||||
440,5842,8523,25125.0,844608.0,0.029747527847238008,1.685087395,0.000287118,2.688215586,0.166480549,0.0,0.0
|
||||
450,5738,8399,24752.0,838752.0,0.02951051085422151,1.673553823,0.000274969,2.523253333,0.167824913,0.0,0.0
|
||||
460,5670,8316,24662.0,833664.0,0.02958266159987717,1.625105871,0.000272178,2.52817126,0.164730041,0.0,0.0
|
||||
470,5548,8161,24211.0,827328.0,0.029264088729016785,1.583826656,0.000262318,2.419247276,0.160768733,0.0,0.0
|
||||
480,5426,8006,23760.0,820992.0,0.028940598690364826,1.58433006,0.000264708,2.454129792,0.155746163,0.0,0.0
|
||||
490,5358,7918,23640.0,816384.0,0.028956961429915332,1.520887155,0.000253268,2.329551174,0.153813499,0.0,0.0
|
||||
500,5272,7807,23237.0,812736.0,0.02859108000629971,1.488167166,0.000248837,2.282665244,0.154234105,0.0,0.0
|
||||
510,5150,7647,22756.0,806880.0,0.028202458853856832,1.448681065,0.000247727,2.275316917,0.149501885,0.0,0.0
|
||||
520,5028,7487,22022.0,803232.0,0.02741673638500458,1.43939862,0.000236057,2.14942739,0.146771977,0.0,0.0
|
||||
530,4906,7350,21679.0,795168.0,0.02726342106322186,1.367826149,0.000242258,2.188588822,0.148076932,0.0,0.0
|
||||
540,4838,7257,21529.0,791040.0,0.027216069983818772,1.341798982,0.000230357,2.096237881,0.141709174,0.0,0.0
|
||||
550,4752,7151,21156.0,786912.0,0.02688483591557887,1.339939443,0.000227267,2.062687036,0.13782156,0.0,0.0
|
||||
560,4684,7068,21066.0,781824.0,0.026944683202357565,1.327848904,0.000222317,2.00294804,0.139508498,0.0,0.0
|
||||
570,4634,6993,20916.0,779424.0,0.02683520137948023,1.276183945,0.000224717,2.021180753,0.13573571,0.0,0.0
|
||||
580,4548,6882,20766.0,773568.0,0.026844440307768676,1.235522514,0.000212457,1.917354147,0.128401984,0.0,0.0
|
||||
590,4498,6807,20616.0,771168.0,0.026733474418025645,1.267249751,0.000212506,1.899792552,0.133449083,0.0,0.0
|
||||
600,4376,6657,20195.0,764352.0,0.0264210730134807,1.209891149,0.000205326,1.850663451,0.129490109,0.0,0.0
|
||||
610,4326,6582,20045.0,761952.0,0.026307431439250767,1.18887911,0.000203196,1.819359467,0.129183977,0.0,0.0
|
||||
620,4204,6422,19564.0,756096.0,0.02587502116133401,1.172245936,0.000212366,1.757557943,0.125887084,0.0,0.0
|
||||
630,3836,5980,17558.0,741504.0,0.02367890126014155,1.043747354,0.000175996,1.554965777,0.115650062,0.0,0.0
|
||||
640,3732,5856,17438.0,733440.0,0.023775632635253053,1.010298683,0.000174715,1.562411059,0.113877446,0.0,0.0
|
||||
650,3628,5714,16957.0,729312.0,0.023250680093019175,0.985957627,0.000170445,1.474744854,0.110990727,0.0,0.0
|
||||
660,3506,5549,16446.0,723936.0,0.022717477788091765,0.948042334,0.000161975,1.420057878,0.106426767,0.0,0.0
|
||||
670,3420,5448,16103.0,719328.0,0.0223861715378798,0.921840457,0.000156765,1.356400004,0.10491163,0.0,0.0
|
||||
680,3316,5319,15700.0,713952.0,0.021990273855945496,0.892707383,0.000162605,1.335548894,0.100909488,0.0,0.0
|
||||
690,3212,5200,15357.0,707616.0,0.02170244878578212,0.89578919,0.000149085,1.299462304,0.099173414,0.0,0.0
|
||||
700,2916,4871,13850.0,693792.0,0.019962755407960886,0.781393124,0.000134984,1.179737113,0.096642976,0.0,0.0
|
||||
710,2722,4598,13123.0,684960.0,0.019158782994627425,0.725161332,0.000122213,1.056813282,0.08619269,0.0,0.0
|
||||
720,2636,4492,12750.0,680832.0,0.018727086858432038,0.701632434,0.000128984,1.019551067,0.085388434,0.0,0.0
|
||||
730,2532,4373,12407.0,674496.0,0.018394475282284845,0.675037355,0.000119134,0.993660466,0.082709493,0.0,0.0
|
||||
740,2428,4231,11926.0,670368.0,0.017790228650532244,0.6435086,0.000109403,0.927737064,0.078423743,0.0,0.0
|
||||
750,2342,4125,11553.0,666240.0,0.017340597982708934,0.619218823,0.000106693,0.883708241,0.075467284,0.0,0.0
|
||||
760,2274,4032,11403.0,662112.0,0.017222161809482384,0.635081649,0.000103493,0.919860114,0.074058132,0.0,0.0
|
||||
770,2234,3977,11313.0,659712.0,0.017148392025611175,0.593953439,0.000110543,0.84404911,0.077019298,0.0,0.0
|
|
BIN
data/results.zip
(Stored with Git LFS)
Normal file
BIN
data/results.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
@ -2,3 +2,4 @@
|
||||
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
||||
DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
|
||||
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
|
||||
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
|
||||
|
@ -5,5 +5,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
|
||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
|
||||
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
|
||||
|
249
examples/full_node_bench.jl
Normal file
249
examples/full_node_bench.jl
Normal file
@ -0,0 +1,249 @@
|
||||
using MetagraphOptimization
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using DataFrames
|
||||
using CSV
|
||||
using Random
|
||||
using BenchmarkTools
|
||||
using Dates
|
||||
|
||||
using Base.Threads
|
||||
|
||||
|
||||
function log(x...)
|
||||
println(now(), " ", join(x, " ")...)
|
||||
flush(stdout)
|
||||
return nothing
|
||||
end
|
||||
|
||||
results_filename = "full_node_bench.csv"
|
||||
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
cpu_threads = Int[],
|
||||
gpu_devices = Int[],
|
||||
n_inputs = Int[],
|
||||
chunk_size = Int[],
|
||||
time = Float64[],
|
||||
std = Float64[],
|
||||
rate = Float64[],
|
||||
cpu_chunks = Float64[],
|
||||
gpu_chunks = Float64[],
|
||||
memory_est = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
nInputs = 2^26
|
||||
|
||||
lck = ReentrantLock()
|
||||
|
||||
progress = 1
|
||||
cpu_chunks = 0
|
||||
gpu_chunks = 0
|
||||
|
||||
chunkSizes = [1024, 4096, 16384, 65536, 262144, 1048576] # 2^10 to 2^20
|
||||
|
||||
function cpu_worker(compute_func, inputs, chunk_size)
|
||||
global progress
|
||||
global cpu_chunks
|
||||
global lck
|
||||
quit = false
|
||||
work_start = 0
|
||||
work_end = 0
|
||||
while true
|
||||
lock(lck) do
|
||||
if progress >= nInputs
|
||||
quit = true
|
||||
else
|
||||
work_start = progress
|
||||
progress = progress + chunk_size
|
||||
work_end = min(progress - 1, nInputs)
|
||||
cpu_chunks = cpu_chunks + 1
|
||||
#log("CPU Worker $(Threads.threadid()) computing $(cpu_chunks)th cpu chunk ($work_start, $work_end)")
|
||||
end
|
||||
end
|
||||
if quit
|
||||
break
|
||||
end
|
||||
|
||||
for i in work_start:work_end
|
||||
compute_func(inputs[i])
|
||||
end
|
||||
end
|
||||
|
||||
#log("CPU Worker on $(Threads.threadid()) finished!")
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
# called with a specific device selected
|
||||
function gpu_worker(kernel!, inputs, chunk_size)
|
||||
global progress
|
||||
global gpu_chunks
|
||||
global lck
|
||||
cuOutputs = CuVector{ComplexF64}()
|
||||
resize!(cuOutputs, chunk_size)
|
||||
|
||||
quit = false
|
||||
work_start = 0
|
||||
work_end = 0
|
||||
while true
|
||||
lock(lck) do
|
||||
if progress >= nInputs
|
||||
quit = true
|
||||
else
|
||||
work_start = progress
|
||||
progress = progress + chunk_size
|
||||
work_end = min(progress - 1, nInputs)
|
||||
gpu_chunks = gpu_chunks + 1
|
||||
#log("GPU Worker $(CUDA.device()) computing $(gpu_chunks)th gpu chunk ($work_start, $work_end)")
|
||||
end
|
||||
end
|
||||
if quit
|
||||
break
|
||||
end
|
||||
|
||||
cuInputs = CuVector(inputs[work_start:work_end])
|
||||
ts = 32
|
||||
bs = Int(chunk_size / 32)
|
||||
@cuda threads = ts blocks = bs always_inline = true kernel!(cuInputs, cuOutputs, chunk_size)
|
||||
CUDA.device_synchronize()
|
||||
end
|
||||
|
||||
#log("GPU Worker on Device $(CUDA.device()) finished!")
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
cpu_gpu_ratio = Vector{Tuple{Int, Int}}()
|
||||
|
||||
function full_compute(compute_func, kernel!, inputs, chunk_size)
|
||||
global progress
|
||||
progress = 1
|
||||
global cpu_chunks
|
||||
cpu_chunks = 0
|
||||
global gpu_chunks
|
||||
gpu_chunks = 0
|
||||
|
||||
tasks = Vector()
|
||||
|
||||
for dev in CUDA.devices()
|
||||
t = Threads.@spawn device!(dev) do
|
||||
gpu_worker(kernel!, inputs, chunk_size)
|
||||
return nothing
|
||||
end
|
||||
push!(tasks, t)
|
||||
end
|
||||
|
||||
for i in 1:(Threads.nthreads() - length(CUDA.devices()))
|
||||
t = Threads.@spawn cpu_worker(compute_func, inputs, chunk_size)
|
||||
push!(tasks, t)
|
||||
end
|
||||
|
||||
for t in tasks
|
||||
wait(t)
|
||||
end
|
||||
|
||||
push!(cpu_gpu_ratio, (cpu_chunks, gpu_chunks))
|
||||
return nothing
|
||||
end
|
||||
|
||||
function bench(compute_function, kernel!, inputs, chunk_size)
|
||||
global cpu_gpu_ratio
|
||||
empty!(cpu_gpu_ratio)
|
||||
|
||||
bench = @benchmark begin
|
||||
full_compute($compute_function, $kernel!, $inputs, $chunk_size)
|
||||
end gcsample = true seconds = 60
|
||||
|
||||
time = median(bench.times) / 1e9
|
||||
s = std(bench.times) / 1e9
|
||||
rate = length(inputs) / time
|
||||
|
||||
med_cpu_chunks = median(getindex.(cpu_gpu_ratio, 1))
|
||||
med_gpu_chunks = median(getindex.(cpu_gpu_ratio, 2))
|
||||
mem_estimate = bench.memory
|
||||
|
||||
log("CPU/GPU ratios: $(cpu_gpu_ratio)")
|
||||
|
||||
return (time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate)
|
||||
end
|
||||
|
||||
function full_node_bench(process::MetagraphOptimization.AbstractProcessDescription, func, kernel!, chunk_size, inputs)
|
||||
process_name = string(process)
|
||||
log("\n--- Benchmarking $(process_name) on $(nInputs) with chunk size $(chunk_size) ---")
|
||||
|
||||
log("Available Cuda Devices:")
|
||||
display.(CUDA.devices())
|
||||
|
||||
log("Benchmarking full node...")
|
||||
(time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate) = bench(func, kernel!, inputs, chunk_size)
|
||||
log(
|
||||
"Benchmarking complete with median time $(time), $(med_cpu_chunks) cpu chunks, and $(med_gpu_chunks) gpu chunks.",
|
||||
)
|
||||
|
||||
push!(
|
||||
df,
|
||||
Dict(
|
||||
:process_name => process_name,
|
||||
:cpu_threads => Threads.nthreads() - length(CUDA.devices()),
|
||||
:gpu_devices => length(CUDA.devices()),
|
||||
:n_inputs => nInputs,
|
||||
:chunk_size => chunk_size,
|
||||
:time => time,
|
||||
:std => s,
|
||||
:rate => rate,
|
||||
:cpu_chunks => med_cpu_chunks,
|
||||
:gpu_chunks => med_gpu_chunks,
|
||||
:memory_est => mem_estimate,
|
||||
),
|
||||
)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
# use "mock" machine that only uses cpu for compilation
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
|
||||
optimizer = ReductionOptimizer()
|
||||
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke"]
|
||||
|
||||
for proc in processes
|
||||
process = parse_process(proc, QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
|
||||
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
inputs = Vector{typeof(gen_process_input(process))}()
|
||||
resize!(inputs, nInputs)
|
||||
procs = Vector{typeof(process)}()
|
||||
for i in 1:Threads.nthreads()
|
||||
push!(procs, copy(process))
|
||||
end
|
||||
|
||||
@inbounds Threads.@threads for i in eachindex(inputs)
|
||||
inputs[i] = gen_process_input(procs[Threads.nthreads()])
|
||||
end
|
||||
|
||||
for chunk_size in chunkSizes
|
||||
full_node_bench(process, compute_func, kernel!, chunk_size, inputs)
|
||||
CSV.write(results_filename, df)
|
||||
end
|
||||
end;
|
@ -34,9 +34,10 @@ function import_bench()
|
||||
bench_txt("AB->ABBB.txt")
|
||||
bench_txt("AB->ABBBBB.txt")
|
||||
bench_txt("AB->ABBBBBBB.txt")
|
||||
#bench_txt("AB->ABBBBBBBBB.txt")
|
||||
bench_txt("AB->ABBBBBBBBB.txt")
|
||||
bench_txt("ABAB->ABAB.txt")
|
||||
return bench_txt("ABAB->ABC.txt")
|
||||
bench_txt("ABAB->ABC.txt")
|
||||
return nothing
|
||||
end
|
||||
|
||||
import_bench()
|
||||
|
@ -2,44 +2,117 @@ using MetagraphOptimization
|
||||
using LIKWID
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using DataFrames
|
||||
using CSV
|
||||
using Random
|
||||
using BenchmarkTools
|
||||
using Dates
|
||||
|
||||
function cpu_bench(compute_function, inputs)
|
||||
compute_function.(inputs[begin:10]) # make sure it's compiled
|
||||
DISABLE_GPU = false
|
||||
|
||||
time = @elapsed Threads.@threads for i in eachindex(inputs)
|
||||
@invokelatest compute_function(inputs[i])
|
||||
end
|
||||
rate = length(inputs) / time
|
||||
return (time, rate)
|
||||
function log(x...)
|
||||
println(now(), " ", join(x, " ")...)
|
||||
return flush(stdout)
|
||||
end
|
||||
|
||||
function gpu_bench(compute_function, inputs)
|
||||
CUDA.@sync compute_function.(inputs[begin:10]) # make sure it's compiled
|
||||
results_filename = "bench_results_$(Threads.nthreads()).csv"
|
||||
|
||||
time = @elapsed CUDA.@sync compute_function.(inputs)
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
graph_gen_time = Float64[],
|
||||
optimization_time = Float64[],
|
||||
function_generation_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_mem = Float64[],
|
||||
cpu_threads = Int[],
|
||||
n_inputs = Int[],
|
||||
nflops_likwid = Int[],
|
||||
cpu_time = Float64[],
|
||||
cpu_std = Float64[],
|
||||
cpu_rate = Float64[],
|
||||
cpu_gflops = Float64[],
|
||||
gpu_name = String[],
|
||||
gpu_time = Float64[],
|
||||
gpu_std = Float64[],
|
||||
gpu_rate = Float64[],
|
||||
gpu_gflops = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
nInputs = 2^20
|
||||
|
||||
function cpu_bench(compute_function, inputs)
|
||||
bench = @benchmark begin
|
||||
@inbounds Threads.@threads for i in eachindex($inputs)
|
||||
@invokelatest $compute_function($inputs[i])
|
||||
end
|
||||
end gcsample = true samples = 20 evals = 1
|
||||
|
||||
time = median(bench.times) / 1e9
|
||||
s = std(bench.times) / 1e9
|
||||
rate = length(inputs) / time
|
||||
|
||||
return (time, rate)
|
||||
return (time, rate, s)
|
||||
end
|
||||
|
||||
function gpu_bench(kernel!, inputs)
|
||||
n = length(inputs)
|
||||
outputs = CuVector{ComplexF64}()
|
||||
resize!(outputs, n)
|
||||
ts = 32
|
||||
bs = Int(n / ts)
|
||||
bench = @benchmark begin
|
||||
@cuda threads = $ts blocks = $bs always_inline = true $kernel!($inputs, $outputs, $n)
|
||||
CUDA.device_synchronize()
|
||||
end gcsample = true samples = 20 evals = 1
|
||||
|
||||
time = median(bench.times) / 1e9
|
||||
s = std(bench.times) / 1e9
|
||||
rate = length(inputs) / time
|
||||
|
||||
return (time, rate, s)
|
||||
end
|
||||
|
||||
function bench_process(
|
||||
process::MetagraphOptimization.AbstractProcessDescription,
|
||||
process_name::String,
|
||||
graph::DAG,
|
||||
func,
|
||||
io::IO = stdout;
|
||||
use_likwid = true,
|
||||
kernel!,
|
||||
gen_time::Float64,
|
||||
opt_time::Float64,
|
||||
func_time::Float64;
|
||||
use_likwid = false,
|
||||
use_gpu = true,
|
||||
)
|
||||
println(io, "\n--- Benchmarking $(process) ---")
|
||||
log("\n--- Benchmarking $(process_name) ---")
|
||||
if DISABLE_GPU
|
||||
use_gpu = false
|
||||
end
|
||||
|
||||
NFLOPs = GraphProperties(graph).computeEffort
|
||||
graph_props = GraphProperties(graph)
|
||||
NFLOPs = graph_props.computeEffort
|
||||
nflops_likwid = 0
|
||||
if use_likwid
|
||||
input = gen_process_input(process)
|
||||
func(input) # compile first
|
||||
|
||||
# get rid of annoying output to console
|
||||
oldstd = stdout
|
||||
redirect_stdout(devnull)
|
||||
_, events = @perfmon "FLOPS_DP" func(input)
|
||||
redirect_stdout(oldstd) # recover original stdout
|
||||
|
||||
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
|
||||
nflops_likwid = NFLOPs
|
||||
end
|
||||
|
||||
nInputs = 10000000 # ten million
|
||||
println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
|
||||
inputs = Vector{typeof(gen_process_input(process))}()
|
||||
resize!(inputs, nInputs)
|
||||
@ -48,35 +121,76 @@ function bench_process(
|
||||
push!(processes, copy(process))
|
||||
end
|
||||
|
||||
Threads.@threads for i in eachindex(inputs)
|
||||
@inbounds Threads.@threads for i in eachindex(inputs)
|
||||
inputs[i] = gen_process_input(processes[Threads.nthreads()])
|
||||
end
|
||||
|
||||
println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
|
||||
(time_cpu, rate_cpu) = cpu_bench(func, inputs)
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 1024^3
|
||||
log("Benchmarking CPU with $(Threads.nthreads()) threads...")
|
||||
(time_cpu, rate_cpu, std_cpu) = cpu_bench(func, inputs)
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 10^9
|
||||
|
||||
println(io, "Benchmarking GPU...")
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 1024^3
|
||||
time_gpu = 0.0
|
||||
std_gpu = 0.0
|
||||
rate_gpu = 0.0
|
||||
flops_gpu = 0.0
|
||||
gpu_name = "none"
|
||||
if use_gpu
|
||||
log("Benchmarking GPU...")
|
||||
gpu_name = "$(name(first(CUDA.devices())))"
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu, std_gpu) = gpu_bench(kernel!, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 10^9
|
||||
else
|
||||
log("Skipping GPU...")
|
||||
end
|
||||
|
||||
println(io, "\nBenchmark Summary for $(process):")
|
||||
log("\nBenchmark Summary for $(process):")
|
||||
|
||||
if use_likwid
|
||||
println(io, "Measured FLOPS by LIKWID: $NFLOPs")
|
||||
log("Measured FLOPS by LIKWID: $NFLOPs")
|
||||
else
|
||||
println(io, "Total graph compute effort: $NFLOPs")
|
||||
log("Total graph compute effort: $NFLOPs")
|
||||
end
|
||||
println(io, "Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
|
||||
println(io, "CPU, $(Threads.nthreads()) threads")
|
||||
println(io, " Time: $time_cpu")
|
||||
println(io, " Rate: $rate_cpu")
|
||||
println(io, " GFLOPS: $flops_cpu")
|
||||
println(io, "GPU, $(name(first(CUDA.devices())))")
|
||||
println(io, " Time: $time_gpu")
|
||||
println(io, " Rate: $rate_gpu")
|
||||
return println(io, " GFLOPS: $flops_gpu")
|
||||
log("Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
|
||||
log("CPU, $(Threads.nthreads()) threads")
|
||||
log(" Time: $time_cpu")
|
||||
log(" Rate: $rate_cpu")
|
||||
log(" GFLOPS: $flops_cpu")
|
||||
if use_gpu
|
||||
log("GPU, $gpu_name")
|
||||
log(" Time: $time_gpu")
|
||||
log(" Rate: $rate_gpu")
|
||||
log(" GFLOPS: $flops_gpu")
|
||||
end
|
||||
|
||||
if (process_name != "warmup")
|
||||
push!(
|
||||
df,
|
||||
Dict(
|
||||
:process_name => process_name,
|
||||
:graph_gen_time => gen_time,
|
||||
:optimization_time => opt_time,
|
||||
:function_generation_time => func_time,
|
||||
:graph_nodes => graph_props.noNodes,
|
||||
:graph_edges => graph_props.noEdges,
|
||||
:graph_mem => MetagraphOptimization.mem(graph),
|
||||
:cpu_threads => Threads.nthreads(),
|
||||
:n_inputs => nInputs,
|
||||
:nflops_likwid => nflops_likwid,
|
||||
:cpu_time => time_cpu,
|
||||
:cpu_std => std_cpu,
|
||||
:cpu_rate => rate_cpu,
|
||||
:cpu_gflops => flops_cpu,
|
||||
:gpu_name => gpu_name,
|
||||
:gpu_time => time_gpu,
|
||||
:gpu_std => std_gpu,
|
||||
:gpu_rate => rate_gpu,
|
||||
:gpu_gflops => flops_gpu,
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
# use "mock" machine that only uses cpu
|
||||
@ -92,57 +206,67 @@ machine = Machine(
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
|
||||
|
||||
# compton
|
||||
process = parse_process("ke->ke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
## -- WARMUP TO COMPILE FUNCTIONS first
|
||||
#=
|
||||
optimizer = RandomWalkOptimizer(MersenneTwister(0))
|
||||
|
||||
# 2-photon compton
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
opt_time = @elapsed optimize!(optimizer, graph, 200)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
bench_process(process, "warmup", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 3-photon compton
|
||||
process = parse_process("ke->kkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# AB->AB
|
||||
process = parse_process("AB->AB", ABCModel())
|
||||
graph = parse_dag("input/AB->AB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
bench_process(process, "warmup", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
|
||||
=#
|
||||
## -- WARMUP END
|
||||
|
||||
exit(0)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# 4-photon compton
|
||||
process = parse_process("ke->kkkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke"]
|
||||
|
||||
# AB->AB^5
|
||||
process = parse_process("AB->ABBBBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
for process_str in processes
|
||||
# compton
|
||||
process = parse_process(process_str, QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
bench_process(process, "$process not optimized", graph, compute_func, kernel!, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
|
||||
|
||||
CSV.write(results_filename, df)
|
||||
end
|
||||
|
||||
processes = ["AB->AB", "AB->ABBB", "AB->ABBBBB", "AB->ABBBBBBB"]
|
||||
|
||||
for process_str in processes
|
||||
# AB->AB
|
||||
process = parse_process(process_str, ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/$(process_str).txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
bench_process(process, "$process not optimized", graph, compute_func, kernel!, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
|
||||
|
||||
CSV.write(results_filename, df)
|
||||
end
|
||||
|
163
examples/qed_bench_reduction_steps.jl
Normal file
163
examples/qed_bench_reduction_steps.jl
Normal file
@ -0,0 +1,163 @@
|
||||
using MetagraphOptimization
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using BenchmarkTools
|
||||
using DataFrames
|
||||
using CSV
|
||||
|
||||
results_filename = "bench_results_reduction_steps.csv"
|
||||
|
||||
df = DataFrame(
|
||||
threads = Int[],
|
||||
process = String[],
|
||||
operations = Int[],
|
||||
cumulative_optimization_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_ce = Float64[],
|
||||
graph_dt = Float64[],
|
||||
graph_ci = Float64[],
|
||||
gen_func_t = Float64[],
|
||||
cpu_compile_t = Float64[],
|
||||
cpu_st_t = Float64[],
|
||||
cpu_mt_t = Float64[],
|
||||
gpu_compile_t = Float64[],
|
||||
gpu_t = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
function bench(func, inputs)
|
||||
compile_time = @elapsed func(inputs[1])
|
||||
|
||||
single_thread = @benchmark $func.($inputs)
|
||||
multi_threaded = @benchmark Threads.@threads for i in eachindex($inputs)
|
||||
$func($inputs[i])
|
||||
end
|
||||
|
||||
return (
|
||||
cpu_compile_time = compile_time,
|
||||
gpu_compile_time = 0.0,
|
||||
cpu_single_thread_time = mean(single_thread.times) / 1e9,
|
||||
cpu_multi_thread_time = mean(multi_threaded.times) / 1e9,
|
||||
gpu_time = 0.0,
|
||||
)
|
||||
end
|
||||
|
||||
# preparation of machine
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
|
||||
# bench and produce data
|
||||
n_inputs = 50_000
|
||||
optimizer = ReductionOptimizer()
|
||||
processes = [("ke->kke", 5), ("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 1), ("ke->kkkke", 1), ("ke->kkkkke", 1)]
|
||||
|
||||
for (process_str, STEPSIZE) in processes
|
||||
n = 0
|
||||
opt_time_cum = 0
|
||||
|
||||
process = parse_process(process_str, QEDModel())
|
||||
graph = gen_graph(process)
|
||||
inputs = [gen_process_input(process) for _ in 1:n_inputs]
|
||||
|
||||
get_compute_function(graph, process, machine)
|
||||
|
||||
while true
|
||||
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
|
||||
res = bench(func, inputs)
|
||||
|
||||
graph_properties = get_properties(graph)
|
||||
push!(
|
||||
df,
|
||||
(
|
||||
Threads.nthreads(),
|
||||
process_str,
|
||||
n,
|
||||
opt_time_cum,
|
||||
graph_properties.noNodes,
|
||||
graph_properties.noEdges,
|
||||
graph_properties.computeEffort,
|
||||
graph_properties.data,
|
||||
graph_properties.computeIntensity,
|
||||
func_gen_time,
|
||||
res.cpu_compile_time,
|
||||
res.cpu_single_thread_time,
|
||||
res.cpu_multi_thread_time,
|
||||
res.gpu_compile_time,
|
||||
res.gpu_time,
|
||||
),
|
||||
)
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
if fixpoint_reached(optimizer, graph)
|
||||
break
|
||||
end
|
||||
|
||||
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
|
||||
n += STEPSIZE
|
||||
end
|
||||
end
|
||||
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
for (process_str, STEPSIZE) in [("AB->AB", 1), ("AB->ABBB", 1), ("AB->ABBBBB", 1)]
|
||||
n = 0
|
||||
opt_time_cum = 0
|
||||
|
||||
process = parse_process(process_str, ABCModel())
|
||||
graph = parse_dag("input/$process_str.txt", ABCModel())
|
||||
inputs = [gen_process_input(process) for _ in 1:n_inputs]
|
||||
|
||||
get_compute_function(graph, process, machine)
|
||||
|
||||
while true
|
||||
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
|
||||
res = bench(func, inputs)
|
||||
|
||||
graph_properties = get_properties(graph)
|
||||
push!(
|
||||
df,
|
||||
(
|
||||
Threads.nthreads(),
|
||||
process_str,
|
||||
n,
|
||||
opt_time_cum,
|
||||
graph_properties.noNodes,
|
||||
graph_properties.noEdges,
|
||||
graph_properties.computeEffort,
|
||||
graph_properties.data,
|
||||
graph_properties.computeIntensity,
|
||||
func_gen_time,
|
||||
res.cpu_compile_time,
|
||||
res.cpu_single_thread_time,
|
||||
res.cpu_multi_thread_time,
|
||||
res.gpu_compile_time,
|
||||
res.gpu_time,
|
||||
),
|
||||
)
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
if fixpoint_reached(optimizer, graph)
|
||||
break
|
||||
end
|
||||
|
||||
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
|
||||
n += STEPSIZE
|
||||
end
|
||||
end
|
||||
|
||||
CSV.write(results_filename, df)
|
208
examples/qed_bench_reduction_steps_gpu.jl
Normal file
208
examples/qed_bench_reduction_steps_gpu.jl
Normal file
@ -0,0 +1,208 @@
|
||||
using MetagraphOptimization
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using BenchmarkTools
|
||||
using DataFrames
|
||||
using CSV
|
||||
using Dates
|
||||
|
||||
results_filename = "bench_results_reduction_steps_gpu.csv"
|
||||
|
||||
df = DataFrame(
|
||||
threads = Int[],
|
||||
process = String[],
|
||||
operations = Int[],
|
||||
cumulative_optimization_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_ce = Float64[],
|
||||
graph_dt = Float64[],
|
||||
graph_ci = Float64[],
|
||||
cpu_st_t = Float64[],
|
||||
cpu_st_s = Float64[],
|
||||
cpu_mt_t = Float64[],
|
||||
cpu_mt_s = Float64[],
|
||||
cpu_mem = Float64[],
|
||||
gpu_t = Float64[],
|
||||
gpu_s = Float64[],
|
||||
gpu_mem = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
function log(x...)
|
||||
println(now(), " ", join(x, " ")...)
|
||||
return flush(stdout)
|
||||
end
|
||||
|
||||
function bench(func, kernel!, inputs)
|
||||
# gpu part
|
||||
n = length(inputs)
|
||||
cu_inputs = CuVector(inputs)
|
||||
cu_outputs = CuVector{ComplexF64}()
|
||||
resize!(cu_outputs, n)
|
||||
ts = 32
|
||||
bs = Int(n / ts)
|
||||
bench = @benchmark begin
|
||||
@cuda threads = $ts blocks = $bs always_inline = true $kernel!($cu_inputs, $cu_outputs, $n)
|
||||
CUDA.device_synchronize()
|
||||
end gcsample = true samples = 20 evals = 1
|
||||
|
||||
gpu_time = median(bench.times) / 1e9
|
||||
gpu_std = std(bench.times) / 1e9
|
||||
gpu_mem = bench.memory
|
||||
|
||||
# cpu part
|
||||
single_thread = @benchmark $func.($inputs)
|
||||
multi_threaded = @benchmark Threads.@threads for i in eachindex($inputs)
|
||||
$func($inputs[i])
|
||||
end
|
||||
|
||||
cpu_st_time = median(single_thread.times) / 1e9
|
||||
cpu_st_std = std(single_thread.times) / 1e9
|
||||
cpu_mt_time = median(multi_threaded.times) / 1e9
|
||||
cpu_mt_std = std(multi_threaded.times) / 1e9
|
||||
cpu_mem = std(single_thread.times)
|
||||
|
||||
|
||||
return (
|
||||
cpu_single_thread_time = cpu_st_time,
|
||||
cpu_single_thread_std = cpu_st_std,
|
||||
cpu_multi_thread_time = cpu_mt_time,
|
||||
cpu_multi_thread_std = cpu_mt_std,
|
||||
cpu_mem = cpu_mem,
|
||||
gpu_time = gpu_time,
|
||||
gpu_std = gpu_std,
|
||||
gpu_mem = gpu_mem,
|
||||
)
|
||||
end
|
||||
|
||||
log("Available CUDA devices:")
|
||||
for dev in CUDA.devices()
|
||||
display(dev)
|
||||
end
|
||||
|
||||
# preparation of machine
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
|
||||
|
||||
# bench and produce data
|
||||
n_inputs = 2^16
|
||||
optimizer = ReductionOptimizer()
|
||||
processes = [("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 1), ("ke->kkkke", 5)]
|
||||
|
||||
for (process_str, STEPSIZE) in processes
|
||||
n = 0
|
||||
opt_time_cum = 0
|
||||
|
||||
process = parse_process(process_str, QEDModel())
|
||||
graph = gen_graph(process)
|
||||
inputs = Vector([gen_process_input(process) for _ in 1:n_inputs])
|
||||
|
||||
get_compute_function(graph, process, machine)
|
||||
|
||||
while true
|
||||
func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
res = bench(func, kernel!, inputs)
|
||||
|
||||
graph_properties = get_properties(graph)
|
||||
push!(
|
||||
df,
|
||||
(
|
||||
Threads.nthreads(),
|
||||
process_str,
|
||||
n,
|
||||
opt_time_cum,
|
||||
graph_properties.noNodes,
|
||||
graph_properties.noEdges,
|
||||
graph_properties.computeEffort,
|
||||
graph_properties.data,
|
||||
graph_properties.computeIntensity,
|
||||
res.cpu_single_thread_time,
|
||||
res.cpu_single_thread_std,
|
||||
res.cpu_multi_thread_time,
|
||||
res.cpu_multi_thread_std,
|
||||
res.cpu_mem,
|
||||
res.gpu_time,
|
||||
res.gpu_std,
|
||||
res.gpu_mem,
|
||||
),
|
||||
)
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
if fixpoint_reached(optimizer, graph)
|
||||
break
|
||||
end
|
||||
|
||||
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
|
||||
n += STEPSIZE
|
||||
end
|
||||
end
|
||||
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
for (process_str, STEPSIZE) in [("AB->AB", 1), ("AB->ABBB", 1), ("AB->ABBBBB", 1)]
|
||||
n = 0
|
||||
opt_time_cum = 0
|
||||
|
||||
process = parse_process(process_str, ABCModel())
|
||||
graph = parse_dag("input/$process_str.txt", ABCModel())
|
||||
inputs = Vector([gen_process_input(process) for _ in 1:n_inputs])
|
||||
|
||||
get_compute_function(graph, process, machine)
|
||||
|
||||
while true
|
||||
func = get_compute_function(graph, process, machine)
|
||||
kernel! = get_cuda_kernel(graph, process, machine)
|
||||
res = bench(func, kernel!, inputs)
|
||||
|
||||
graph_properties = get_properties(graph)
|
||||
push!(
|
||||
df,
|
||||
(
|
||||
Threads.nthreads(),
|
||||
process_str,
|
||||
n,
|
||||
opt_time_cum,
|
||||
graph_properties.noNodes,
|
||||
graph_properties.noEdges,
|
||||
graph_properties.computeEffort,
|
||||
graph_properties.data,
|
||||
graph_properties.computeIntensity,
|
||||
res.cpu_single_thread_time,
|
||||
res.cpu_single_thread_std,
|
||||
res.cpu_multi_thread_time,
|
||||
res.cpu_multi_thread_std,
|
||||
res.cpu_mem,
|
||||
res.gpu_time,
|
||||
res.gpu_std,
|
||||
res.gpu_mem,
|
||||
),
|
||||
)
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
if fixpoint_reached(optimizer, graph)
|
||||
break
|
||||
end
|
||||
|
||||
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
|
||||
n += STEPSIZE
|
||||
end
|
||||
end
|
||||
|
||||
CSV.write(results_filename, df)
|
232
examples/qed_bench_tape.jl
Normal file
232
examples/qed_bench_tape.jl
Normal file
@ -0,0 +1,232 @@
|
||||
using MetagraphOptimization
|
||||
using LIKWID
|
||||
using UUIDs
|
||||
using DataFrames
|
||||
using CSV
|
||||
using Random
|
||||
using BenchmarkTools
|
||||
using Dates
|
||||
|
||||
function log(x...)
|
||||
println(now(), " ", join(x, " ")...)
|
||||
return flush(stdout)
|
||||
end
|
||||
|
||||
results_filename = "bench_results_tape_$(Threads.nthreads()).csv"
|
||||
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
graph_gen_time = Float64[],
|
||||
optimization_time = Float64[],
|
||||
function_generation_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_mem = Float64[],
|
||||
cpu_threads = Int[],
|
||||
n_inputs = Int[],
|
||||
nflops_likwid = Int[],
|
||||
cpu_time = Float64[],
|
||||
cpu_rate = Float64[],
|
||||
cpu_gflops = Float64[],
|
||||
cpu_std = Float64[],
|
||||
gpu_name = String[],
|
||||
gpu_time = Float64[],
|
||||
gpu_std = Float64[],
|
||||
gpu_rate = Float64[],
|
||||
gpu_gflops = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
nInputs = 1_000_000
|
||||
|
||||
# use "mock" machine that only uses cpu
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
|
||||
|
||||
function cpu_bench(tape, inputs)
|
||||
bench = @benchmark begin
|
||||
@inbounds Threads.@threads for i in eachindex($inputs)
|
||||
execute_tape($tape, $inputs[i])
|
||||
end
|
||||
end gcsample = true seconds = 300
|
||||
|
||||
time = mean(bench.times) / 1e9
|
||||
s = std(bench.times) / 1e9
|
||||
rate = length(inputs) / time
|
||||
|
||||
return (time, rate, s)
|
||||
end
|
||||
|
||||
function bench_process(
|
||||
process::MetagraphOptimization.AbstractProcessDescription,
|
||||
process_name::String,
|
||||
graph::DAG,
|
||||
gen_time::Float64,
|
||||
opt_time::Float64,
|
||||
io::IO = stdout;
|
||||
use_likwid = false,
|
||||
)
|
||||
log("\n--- Benchmarking $(process_name) ---")
|
||||
|
||||
func_time = @elapsed tape = gen_tape(graph, process, machine)
|
||||
|
||||
graph_props = GraphProperties(graph)
|
||||
NFLOPs = graph_props.computeEffort
|
||||
nflops_likwid = 0
|
||||
if use_likwid
|
||||
input = gen_process_input(process)
|
||||
|
||||
# get rid of annoying output to console
|
||||
oldstd = stdout
|
||||
redirect_stdout(devnull)
|
||||
_, events = @perfmon "FLOPS_DP" execute_tape(tape, input)
|
||||
redirect_stdout(oldstd) # recover original stdout
|
||||
|
||||
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
|
||||
nflops_likwid = NFLOPs
|
||||
end
|
||||
|
||||
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
|
||||
inputs = Vector{typeof(gen_process_input(process))}()
|
||||
resize!(inputs, nInputs)
|
||||
processes = Vector{typeof(process)}()
|
||||
for i in 1:Threads.nthreads()
|
||||
push!(processes, copy(process))
|
||||
end
|
||||
|
||||
@inbounds Threads.@threads for i in eachindex(inputs)
|
||||
inputs[i] = gen_process_input(processes[Threads.nthreads()])
|
||||
end
|
||||
|
||||
log("Benchmarking CPU with $(Threads.nthreads()) threads...")
|
||||
(time_cpu, rate_cpu, std_cpu) = cpu_bench(tape, inputs)
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 10^9
|
||||
|
||||
log("\nBenchmark Summary for $(process):")
|
||||
|
||||
if use_likwid
|
||||
log("Measured FLOPS by LIKWID: $NFLOPs")
|
||||
else
|
||||
log("Total graph compute effort: $NFLOPs")
|
||||
end
|
||||
log("Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
|
||||
log("CPU, $(Threads.nthreads()) threads")
|
||||
log(" Time: $time_cpu")
|
||||
log(" Rate: $rate_cpu")
|
||||
log(" GFLOPS: $flops_cpu")
|
||||
|
||||
if (process_name != "warmup")
|
||||
push!(
|
||||
df,
|
||||
Dict(
|
||||
:process_name => process_name,
|
||||
:graph_gen_time => gen_time,
|
||||
:optimization_time => opt_time,
|
||||
:function_generation_time => func_time,
|
||||
:graph_nodes => graph_props.noNodes,
|
||||
:graph_edges => graph_props.noEdges,
|
||||
:graph_mem => MetagraphOptimization.mem(graph),
|
||||
:cpu_threads => Threads.nthreads(),
|
||||
:n_inputs => nInputs,
|
||||
:nflops_likwid => nflops_likwid,
|
||||
:cpu_time => time_cpu,
|
||||
:cpu_std => std_cpu,
|
||||
:cpu_rate => rate_cpu,
|
||||
:cpu_gflops => flops_cpu,
|
||||
:gpu_name => "none",
|
||||
:gpu_time => 0.0,
|
||||
:gpu_std => 0.0,
|
||||
:gpu_rate => 0.0,
|
||||
:gpu_gflops => 0.0,
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function bench_qed(process_string::String, skip_unoptimized = false)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
process = parse_process(process_string, QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
opt_time = 0.0
|
||||
if !skip_unoptimized
|
||||
bench_process(process, "$process not optimized tape", graph, gen_time, opt_time)
|
||||
end
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
bench_process(process, "$process reduced tape", graph, gen_time, opt_time)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function bench_abc(process_string::String)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
process = parse_process(process_string, ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/$process_string.txt", ABCModel())
|
||||
bench_process(process, "$process not optimized tape", graph, gen_time, 0.0)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
bench_process(process, "$process reduced tape", graph, gen_time, opt_time)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
|
||||
|
||||
## -- WARMUP TO COMPILE FUNCTIONS first
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
bench_process(process, "warmup", graph, gen_time, opt_time)
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
bench_process(process, "warmup", graph, gen_time, opt_time)
|
||||
|
||||
## -- WARMUP END
|
||||
|
||||
# compton
|
||||
bench_qed("ke->ke")
|
||||
CSV.write(results_filename, df)
|
||||
bench_qed("ke->kke")
|
||||
CSV.write(results_filename, df)
|
||||
bench_qed("ke->kkke")
|
||||
CSV.write(results_filename, df)
|
||||
bench_qed("ke->kkkke")
|
||||
CSV.write(results_filename, df)
|
||||
bench_qed("ke->kkkkke")
|
||||
CSV.write(results_filename, df)
|
||||
bench_qed("ke->kkkkkke")
|
||||
CSV.write(results_filename, df)
|
||||
bench_qed("ke->kkkkkkke")
|
||||
CSV.write(results_filename, df)
|
||||
|
||||
bench_abc("AB->AB")
|
||||
CSV.write(results_filename, df)
|
||||
bench_abc("AB->ABBB")
|
||||
CSV.write(results_filename, df)
|
||||
bench_abc("AB->ABBBBB")
|
||||
CSV.write(results_filename, df)
|
144
examples/qed_gen_bench.jl
Normal file
144
examples/qed_gen_bench.jl
Normal file
@ -0,0 +1,144 @@
|
||||
using MetagraphOptimization
|
||||
using DataFrames
|
||||
using CSV
|
||||
using BenchmarkTools
|
||||
using StatsBase
|
||||
|
||||
results_filename = "qed_gen_results_$(Threads.nthreads()).csv"
|
||||
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
cpu_threads = Int[],
|
||||
graph_gen_samples = Int[],
|
||||
graph_gen_mean = Float64[],
|
||||
graph_gen_std = Float64[],
|
||||
graph_gen_median = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_data_nodes = Int[],
|
||||
graph_u_nodes = Int[],
|
||||
graph_v_nodes = Int[],
|
||||
graph_s1_nodes = Int[],
|
||||
graph_s2_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_nodes_reduced = Int[],
|
||||
graph_data_nodes_reduced = Int[],
|
||||
graph_u_nodes_reduced = Int[],
|
||||
graph_v_nodes_reduced = Int[],
|
||||
graph_s1_nodes_reduced = Int[],
|
||||
graph_s2_nodes_reduced = Int[],
|
||||
graph_edges_reduced = Int[],
|
||||
graph_mem = Float64[],
|
||||
graph_mem_reduced = Float64[],
|
||||
graph_elapsed_reduce = Float64[],
|
||||
)
|
||||
|
||||
function bench_process(process::AbstractString; warmup = false, optimize = true)
|
||||
println("Benchmarking $process...")
|
||||
model = QEDModel()
|
||||
|
||||
proc = parse_process(process, model)
|
||||
|
||||
gen_bench = @benchmark gen_graph($proc) gcsample = true seconds = 5
|
||||
|
||||
graph = gen_graph(proc)
|
||||
|
||||
props = GraphProperties(graph)
|
||||
node_dict = countmap(typeof.(graph.nodes))
|
||||
graph_size = Base.summarysize(graph)
|
||||
|
||||
reduce_elapsed = -1.0
|
||||
node_dict_reduced = Dict()
|
||||
graph_size_reduced = -1.0
|
||||
props_reduced = GraphProperties()
|
||||
if optimize
|
||||
reduce_elapsed = @elapsed optimize_to_fixpoint!(ReductionOptimizer(), graph)
|
||||
|
||||
props_reduced = GraphProperties(graph)
|
||||
node_dict_reduced = countmap(typeof.(graph.nodes))
|
||||
graph_size_reduced = Base.summarysize(graph)
|
||||
end
|
||||
|
||||
if warmup
|
||||
return nothing
|
||||
end
|
||||
|
||||
push!(
|
||||
df,
|
||||
Dict(
|
||||
:process_name => process,
|
||||
:cpu_threads => Threads.nthreads(),
|
||||
:graph_gen_samples => length(gen_bench.times),
|
||||
:graph_gen_mean => mean(gen_bench.times),
|
||||
:graph_gen_std => std(gen_bench.times),
|
||||
:graph_gen_median => median(gen_bench.times),
|
||||
:graph_nodes => props.noNodes,
|
||||
:graph_data_nodes => get(node_dict, DataTaskNode{DataTask}, 0),
|
||||
:graph_u_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_U}, 0),
|
||||
:graph_v_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_V}, 0),
|
||||
:graph_s1_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_S1}, 0),
|
||||
:graph_s2_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_S2}, 0),
|
||||
:graph_edges => props.noEdges,
|
||||
:graph_nodes_reduced => props_reduced.noNodes,
|
||||
:graph_data_nodes_reduced => get(node_dict_reduced, DataTaskNode{DataTask}, 0),
|
||||
:graph_u_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_U}, 0),
|
||||
:graph_v_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_V}, 0),
|
||||
:graph_s1_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_S1}, 0),
|
||||
:graph_s2_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_S2}, 0),
|
||||
:graph_edges_reduced => props_reduced.noEdges,
|
||||
:graph_mem => graph_size,
|
||||
:graph_mem_reduced => graph_size_reduced,
|
||||
:graph_elapsed_reduce => reduce_elapsed,
|
||||
),
|
||||
)
|
||||
return nothing
|
||||
end
|
||||
|
||||
processes = [
|
||||
("ke->ke", true),
|
||||
("ke->kke", true),
|
||||
("ke->kkke", true),
|
||||
("ke->kkkke", true),
|
||||
("ke->kkkkke", true),
|
||||
("ke->kkkkkke", true),
|
||||
("ke->kkkkkkke", true),
|
||||
#("ke->kkkkkkkke", false),
|
||||
#("ke->kkkkkkkkke", false),
|
||||
]
|
||||
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
cpu_threads = Int[],
|
||||
graph_gen_samples = Int[],
|
||||
graph_gen_mean = Float64[],
|
||||
graph_gen_std = Float64[],
|
||||
graph_gen_median = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_data_nodes = Int[],
|
||||
graph_u_nodes = Int[],
|
||||
graph_v_nodes = Int[],
|
||||
graph_s1_nodes = Int[],
|
||||
graph_s2_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_nodes_reduced = Int[],
|
||||
graph_data_nodes_reduced = Int[],
|
||||
graph_u_nodes_reduced = Int[],
|
||||
graph_v_nodes_reduced = Int[],
|
||||
graph_s1_nodes_reduced = Int[],
|
||||
graph_s2_nodes_reduced = Int[],
|
||||
graph_edges_reduced = Int[],
|
||||
graph_mem = Float64[],
|
||||
graph_mem_reduced = Float64[],
|
||||
graph_elapsed_reduce = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
bench_process("ke->kke", warmup = true)
|
||||
|
||||
for (process, opt) in processes
|
||||
bench_process(process, optimize = opt)
|
||||
CSV.write(results_filename, df)
|
||||
end
|
File diff suppressed because one or more lines are too long
63
experiments/CUDA_container.def
Normal file
63
experiments/CUDA_container.def
Normal file
@ -0,0 +1,63 @@
|
||||
Bootstrap: docker
|
||||
From: nvidia/cuda:12.3.1-devel-ubuntu20.04
|
||||
|
||||
%labels
|
||||
Requires CUDA driver 470.57+.
|
||||
|
||||
%environment
|
||||
export LANG=C
|
||||
|
||||
%runscript
|
||||
nvidia-smi
|
||||
./run.sh
|
||||
|
||||
%post
|
||||
. /.singularity.d/env/10-docker*.sh
|
||||
|
||||
apt-get update
|
||||
apt-get install -y pciutils
|
||||
DEBIAN_FRONTEND='noninteractive' apt-get -y -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' install build-essential cuda-compat-12-3 libibverbs-dev ibverbs-utils gcc wget git libcap2-bin
|
||||
apt-get -y autoremove; apt-get -y clean
|
||||
|
||||
cd /tmp
|
||||
|
||||
# install slurm
|
||||
: ${SLURM_VERSION:=17-02-11-1}
|
||||
wget https://github.com/SchedMD/slurm/archive/slurm-${SLURM_VERSION}.tar.gz
|
||||
tar -xf slurm-${SLURM_VERSION}.tar.gz
|
||||
cd slurm-slurm-${SLURM_VERSION}
|
||||
./configure --prefix=/usr/ --sysconfdir=/etc/slurm --localstatedir=/var --disable-debug
|
||||
make -C contribs/pmi2 -j$(nproc) install
|
||||
cd ..
|
||||
rm -rf slurm-*
|
||||
|
||||
# install julia
|
||||
cd ~
|
||||
wget https://julialang-s3.julialang.org/bin/linux/x64/1.9/julia-1.9.4-linux-x86_64.tar.gz
|
||||
tar zxvf julia-1.9.4-linux-x86_64.tar.gz
|
||||
mv julia-1.9.4/ /opt/julia-1.9.4
|
||||
#mkdir /usr/local/bin
|
||||
ln -s /opt/julia-1.9.4/bin/julia /usr/local/bin/julia
|
||||
|
||||
#Add nvidia driver paths to the environment variables
|
||||
echo "\n #Nvidia driver paths \n" >> /environment
|
||||
echo 'export PATH="/nvbin:$PATH"' >> /environment
|
||||
echo 'export LD_LIBRARY_PATH="/nvlib:$LD_LIBRARY_PATH"' >> /environment
|
||||
|
||||
#Add CUDA paths
|
||||
echo "\n #Cuda paths \n" >> /environment
|
||||
echo 'export CPATH="/usr/local/cuda/include:$CPATH"' >> /environment
|
||||
echo 'export PATH="/usr/local/cuda/bin:$PATH"' >> /environment
|
||||
echo 'export LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH"' >> /environment
|
||||
echo 'export CUDA_HOME="/usr/local/cuda"' >> /environment
|
||||
|
||||
# install likwid
|
||||
VERSION=5.3.0
|
||||
wget http://ftp.fau.de/pub/likwid/likwid-$VERSION.tar.gz
|
||||
tar -xaf likwid-$VERSION.tar.gz
|
||||
cd likwid-$VERSION
|
||||
# accessdaemon doesn't work because of permissions
|
||||
sed -i 's/ACCESSMODE = accessdaemon/ACCESSMODE = perf_event/g' config.mk
|
||||
make -j4
|
||||
make -j4 install
|
||||
echo 'export LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"' >> /environment
|
22
experiments/cluster/diagram_bench_hemera.sh
Executable file
22
experiments/cluster/diagram_bench_hemera.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --array=1-32
|
||||
#SBATCH --job-name=qed_bench
|
||||
#SBATCH --partition=intel
|
||||
#SBATCH --time=16:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=32
|
||||
#SBATCH --mem=16GB
|
||||
#SBATCH --output=simulation-%A-%a.out
|
||||
#SBATCH --error=simulation-%A-%a.err
|
||||
|
||||
cd $HOME/repos/metagraph_optimization
|
||||
|
||||
module load singularity
|
||||
module load git
|
||||
|
||||
printf "Current git commit hash: " > results/git.txt
|
||||
git rev-parse HEAD >> results/git.txt
|
||||
git status >> results/git.txt
|
||||
|
||||
singularity exec experiments/CUDA_container.sif ./experiments/run_qed_exec.sh $SLURM_ARRAY_TASK_ID
|
24
experiments/cluster/diagram_bench_hemera_a100.sh
Executable file
24
experiments/cluster/diagram_bench_hemera_a100.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=qed_bench
|
||||
#SBATCH --partition=casus_a100
|
||||
#SBATCH --account=casus
|
||||
#SBATCH --time=8:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=32
|
||||
#SBATCH --gres=gpu:1
|
||||
#SBATCH --mem=256GB
|
||||
#SBATCH --output=simulation-%A-%a.out
|
||||
#SBATCH --error=simulation-%A-%a.err
|
||||
|
||||
cd $HOME/repos/metagraph_optimization
|
||||
|
||||
module load singularity
|
||||
module load git
|
||||
module load cuda/12.1
|
||||
|
||||
printf "Current git commit hash: " > results/git.txt
|
||||
git rev-parse HEAD >> results/git.txt
|
||||
git status >> results/git.txt
|
||||
|
||||
singularity exec --nv experiments/CUDA_container.sif ./experiments/run_qed_exec.sh 32
|
24
experiments/cluster/full_node_hemera.sh
Executable file
24
experiments/cluster/full_node_hemera.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=qed_bench
|
||||
#SBATCH --partition=casus_a100
|
||||
#SBATCH --account=casus
|
||||
#SBATCH --time=8:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=128
|
||||
#SBATCH --gres=gpu:4
|
||||
#SBATCH --mem=2048GB
|
||||
#SBATCH --output=simulation-%A-%a.out
|
||||
#SBATCH --error=simulation-%A-%a.err
|
||||
|
||||
cd $HOME/repos/metagraph_optimization
|
||||
|
||||
module load singularity
|
||||
module load git
|
||||
module load cuda/12.1
|
||||
|
||||
printf "Current git commit hash: " > results/git_reduce_bench_gpu.txt
|
||||
git rev-parse HEAD >> results/git_reduce_bench_gpu.txt
|
||||
git status >> results/git_reduce_bench_gpu.txt
|
||||
|
||||
singularity exec --nv experiments/CUDA_container.sif ./experiments/full_node.sh
|
22
experiments/cluster/gen_diagram_hemera.sh
Executable file
22
experiments/cluster/gen_diagram_hemera.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --array=1-8
|
||||
#SBATCH --job-name=qed_diag_gen
|
||||
#SBATCH --partition=intel
|
||||
#SBATCH --time=4:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=8
|
||||
#SBATCH --mem=64GB
|
||||
#SBATCH --output=simulation-%A-%a.out
|
||||
#SBATCH --error=simulation-%A-%a.err
|
||||
|
||||
cd $HOME/repos/metagraph_optimization
|
||||
|
||||
module load singularity
|
||||
module load git
|
||||
|
||||
printf "Current git commit hash: " > results/git.txt
|
||||
git rev-parse HEAD >> results/git.txt
|
||||
git status >> results/git.txt
|
||||
|
||||
singularity exec experiments/CUDA_container.sif ./experiments/run_gen_diagram.sh $SLURM_ARRAY_TASK_ID
|
21
experiments/cluster/reduce_bench_hemera.sh
Executable file
21
experiments/cluster/reduce_bench_hemera.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=qed_bench
|
||||
#SBATCH --partition=intel
|
||||
#SBATCH --time=48:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=32
|
||||
#SBATCH --mem=24GB
|
||||
#SBATCH --output=simulation-%A-%a.out
|
||||
#SBATCH --error=simulation-%A-%a.err
|
||||
|
||||
cd $HOME/repos/metagraph_optimization
|
||||
|
||||
module load singularity
|
||||
module load git
|
||||
|
||||
printf "Current git commit hash: " > results/git_reduce_bench.txt
|
||||
git rev-parse HEAD >> results/git_reduce_bench.txt
|
||||
git status >> results/git_reduce_bench.txt
|
||||
|
||||
singularity exec experiments/CUDA_container.sif ./experiments/run_reduce_bench.sh
|
24
experiments/cluster/reduce_bench_hemera_gpu.sh
Executable file
24
experiments/cluster/reduce_bench_hemera_gpu.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=qed_bench
|
||||
#SBATCH --partition=casus_a100
|
||||
#SBATCH --account=casus
|
||||
#SBATCH --time=16:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=32
|
||||
#SBATCH --gres=gpu:1
|
||||
#SBATCH --mem=256GB
|
||||
#SBATCH --output=simulation-%A-%a.out
|
||||
#SBATCH --error=simulation-%A-%a.err
|
||||
|
||||
cd $HOME/repos/metagraph_optimization
|
||||
|
||||
module load singularity
|
||||
module load git
|
||||
module load cuda/12.1
|
||||
|
||||
printf "Current git commit hash: " > results/git_reduce_bench_gpu.txt
|
||||
git rev-parse HEAD >> results/git_reduce_bench_gpu.txt
|
||||
git status >> results/git_reduce_bench_gpu.txt
|
||||
|
||||
singularity exec --nv experiments/CUDA_container.sif ./experiments/run_reduce_bench_gpu.sh
|
25
experiments/full_node.sh
Executable file
25
experiments/full_node.sh
Executable file
@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
LOG_FILE="$SCRIPT_DIR/../julia_full_node.log"
|
||||
|
||||
cd $SCRIPT_DIR/..
|
||||
|
||||
echo "Writing system info..."
|
||||
|
||||
# collect some information of the used node and system
|
||||
uname -a > results/system_full_node.txt
|
||||
julia --version > results/julia_full_node.txt
|
||||
lscpu > results/cpu_full_node.txt
|
||||
nvidia-smi > results/cuda_gpu_full_node.txt
|
||||
lsblk > results/storage_full_node.txt
|
||||
lspci > results/pci_full_node.txt
|
||||
|
||||
#echo "Initiating julia..."
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
|
||||
|
||||
echo "Benchmarking Full Node 128 Threads + *GPUs*"
|
||||
julia --project -O3 --threads=128 examples/full_node_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
27
experiments/run_gen_diagram.sh
Executable file
27
experiments/run_gen_diagram.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
# first arg = number of threads
|
||||
i=$1
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
LOG_FILE="$SCRIPT_DIR/../julia.log"
|
||||
|
||||
cd $SCRIPT_DIR/..
|
||||
|
||||
echo "Writing system info..."
|
||||
|
||||
# collect some information of the used node and system
|
||||
uname -a > results/system.txt
|
||||
julia --version > results/julia.txt
|
||||
lscpu > results/cpu.txt
|
||||
lsblk > results/storage.txt
|
||||
lspci > results/pci.txt
|
||||
|
||||
echo "Initiating julia..."
|
||||
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("BenchmarkTools"); Pkg.add("StatsBase")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
|
||||
echo "Benchmarking with $i threads..."
|
||||
|
||||
julia --project -O3 --threads=$i examples/qed_gen_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
31
experiments/run_qed_exec.sh
Executable file
31
experiments/run_qed_exec.sh
Executable file
@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
# first arg = number of threads
|
||||
i=$1
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
LOG_FILE="$SCRIPT_DIR/../julia_$i.log"
|
||||
|
||||
cd $SCRIPT_DIR/..
|
||||
|
||||
echo "Writing system info..."
|
||||
|
||||
# collect some information of the used node and system
|
||||
uname -a > results/system_$i.txt
|
||||
julia --version > results/julia_$i.txt
|
||||
lscpu > results/cpu_$i.txt
|
||||
nvidia-smi > results/cuda_gpu_$i.txt
|
||||
lsblk > results/storage_$i.txt
|
||||
lspci > results/pci_$i.txt
|
||||
|
||||
echo "Initiating julia..."
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
|
||||
|
||||
echo "Benchmarking $i Threads"
|
||||
julia --project -O3 --threads=$i examples/qed_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
||||
|
||||
echo "Benchmarking Tape variant $i Threads"
|
||||
julia --project -O3 --threads=$i examples/qed_bench_tape.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
24
experiments/run_reduce_bench.sh
Executable file
24
experiments/run_reduce_bench.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
LOG_FILE="$SCRIPT_DIR/../julia_bench_reduce.log"
|
||||
|
||||
cd $SCRIPT_DIR/..
|
||||
|
||||
echo "Writing system info..."
|
||||
|
||||
# collect some information of the used node and system
|
||||
uname -a > results/system_bench_reduce.txt
|
||||
julia --version > results/julia_bench_reduce.txt
|
||||
lscpu > results/cpu_bench_reduce.txt
|
||||
nvidia-smi > results/cuda_gpu_bench_reduce.txt
|
||||
lsblk > results/storage_bench_reduce.txt
|
||||
lspci > results/pci_bench_reduce.txt
|
||||
|
||||
#echo "Initiating julia..."
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
|
||||
echo "Benchmarking Reduction 32 Threads"
|
||||
julia --project -O3 --threads=32 examples/qed_bench_reduction_steps.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
25
experiments/run_reduce_bench_gpu.sh
Executable file
25
experiments/run_reduce_bench_gpu.sh
Executable file
@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
LOG_FILE="$SCRIPT_DIR/../julia_bench_reduce_gpu.log"
|
||||
|
||||
cd $SCRIPT_DIR/..
|
||||
|
||||
echo "Writing system info..."
|
||||
|
||||
# collect some information of the used node and system
|
||||
uname -a > results/system_bench_reduce_gpu.txt
|
||||
julia --version > results/julia_bench_reduce_gpu.txt
|
||||
lscpu > results/cpu_bench_reduce_gpu.txt
|
||||
nvidia-smi > results/cuda_gpu_bench_reduce_gpu.txt
|
||||
lsblk > results/storage_bench_reduce_gpu.txt
|
||||
lspci > results/pci_bench_reduce_gpu.txt
|
||||
|
||||
#echo "Initiating julia..."
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
|
||||
|
||||
echo "Benchmarking Reduction 32 Threads, *GPU*"
|
||||
julia --project -O3 --threads=32 examples/qed_bench_reduction_steps_gpu.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
BIN
images/AB->ABBBBB_reduction_bench.pdf
Normal file
BIN
images/AB->ABBBBB_reduction_bench.pdf
Normal file
Binary file not shown.
BIN
images/AB->ABBB_reduction_bench.pdf
Normal file
BIN
images/AB->ABBB_reduction_bench.pdf
Normal file
Binary file not shown.
3
images/README.md
Normal file
3
images/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Images
|
||||
|
||||
In this folder we collect benchmark results in pdf form which may be useful in the future.
|
BIN
images/compton_diagram_gen_comparison.pdf
Normal file
BIN
images/compton_diagram_gen_comparison.pdf
Normal file
Binary file not shown.
BIN
images/compton_graph_size_reduced.pdf
Normal file
BIN
images/compton_graph_size_reduced.pdf
Normal file
Binary file not shown.
BIN
images/compton_graph_size_unreduced.pdf
Normal file
BIN
images/compton_graph_size_unreduced.pdf
Normal file
Binary file not shown.
BIN
images/compton_graph_size_versus.pdf
Normal file
BIN
images/compton_graph_size_versus.pdf
Normal file
Binary file not shown.
BIN
images/cpu_vs_gpu_abc.pdf
Normal file
BIN
images/cpu_vs_gpu_abc.pdf
Normal file
Binary file not shown.
BIN
images/cpu_vs_gpu_qed.pdf
Normal file
BIN
images/cpu_vs_gpu_qed.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_1k_rate.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_1k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_1k_ratio.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_1k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_1k_time.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_1k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_2k_rate.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_2k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_2k_ratio.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_2k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_2k_time.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_2k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_3k_rate.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_3k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_3k_ratio.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_3k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_3k_time.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_3k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_4k_rate.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_4k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_4k_ratio.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_4k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_4k_time.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_4k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_5k_rate.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_5k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_5k_ratio.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_5k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_chunk_size_5k_time.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_chunk_size_5k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_process_best_rate.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_process_best_rate.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
images/full_node/1bil_size/full_node_process_best_time.pdf
Normal file
BIN
images/full_node/1bil_size/full_node_process_best_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_1k_rate.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_1k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_1k_ratio.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_1k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_1k_time.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_1k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_2k_rate.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_2k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_2k_ratio.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_2k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_2k_time.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_2k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_3k_rate.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_3k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_3k_ratio.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_3k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_3k_time.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_3k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_4k_rate.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_4k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_4k_ratio.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_4k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_4k_time.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_4k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_5k_rate.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_5k_rate.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_5k_ratio.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_5k_ratio.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_chunk_size_5k_time.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_chunk_size_5k_time.pdf
Normal file
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_process_best_rate.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_process_best_rate.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
images/full_node/64mil_size/full_node_process_best_time.pdf
Normal file
BIN
images/full_node/64mil_size/full_node_process_best_time.pdf
Normal file
Binary file not shown.
3
images/full_node/README.md
Normal file
3
images/full_node/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Full Node Benchmarks
|
||||
|
||||
Done using `experiments/full_node.sh` on hemera, using 128 threads and 4 A100 GPUs.
|
BIN
images/fuse_reduce_split_cdplot_greedy.pdf
Normal file
BIN
images/fuse_reduce_split_cdplot_greedy.pdf
Normal file
Binary file not shown.
BIN
images/gen_memory.pdf
Normal file
BIN
images/gen_memory.pdf
Normal file
Binary file not shown.
BIN
images/gen_times.pdf
Normal file
BIN
images/gen_times.pdf
Normal file
Binary file not shown.
BIN
images/gpu_rate_NVIDIA A100-SXM4-80GB.pdf
Normal file
BIN
images/gpu_rate_NVIDIA A100-SXM4-80GB.pdf
Normal file
Binary file not shown.
BIN
images/gpu_times_NVIDIA A100-SXM4-80GB.pdf
Normal file
BIN
images/gpu_times_NVIDIA A100-SXM4-80GB.pdf
Normal file
Binary file not shown.
BIN
images/ke->kke_reduction_bench.pdf
Normal file
BIN
images/ke->kke_reduction_bench.pdf
Normal file
Binary file not shown.
BIN
images/ke->kkke_reduction_bench_lin.pdf
Normal file
BIN
images/ke->kkke_reduction_bench_lin.pdf
Normal file
Binary file not shown.
BIN
images/ke->kkkke_reduction_bench.pdf
Normal file
BIN
images/ke->kkkke_reduction_bench.pdf
Normal file
Binary file not shown.
BIN
images/ke->kkkkke_reduction_bench.pdf
Normal file
BIN
images/ke->kkkkke_reduction_bench.pdf
Normal file
Binary file not shown.
5
images/optim_plots/README.md
Normal file
5
images/optim_plots/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# Optimizer Plots
|
||||
|
||||
Plots of FusionOptimizer, ReductionOptimizer, SplitOptimizer, RandomWalkOptimizer, and GreedyOptimizer, executed on a system with 32 threads and an A30 GPU.
|
||||
|
||||
Benchmarked using `notebooks/optimizers.ipynb`.
|
BIN
images/optim_plots/qed_k3_cdplot_fuse.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_fuse.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_greedy.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_greedy.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_random.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_random.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_reduce.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_reduce.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_cdplot_split.pdf
Normal file
BIN
images/optim_plots/qed_k3_cdplot_split.pdf
Normal file
Binary file not shown.
BIN
images/optim_plots/qed_k3_ce_dt_fuse.pdf
Normal file
BIN
images/optim_plots/qed_k3_ce_dt_fuse.pdf
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user