Compare commits

..

4 Commits

Author SHA1 Message Date
d888713e97 Enable oneAPI and ROCm (#9)
All checks were successful
MetagraphOptimization_CI / docs (push) Successful in 8m30s
MetagraphOptimization_CI / test (push) Successful in 22m2s
Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Reviewed-on: #9
2024-05-08 19:26:18 +02:00
38e7ff3b90 Seed Randomness, Fix tests (#8)
All checks were successful
MetagraphOptimization_CI / docs (push) Successful in 7m34s
MetagraphOptimization_CI / test (push) Successful in 20m49s
Seeded randomness in all places, however, multithreaded randomness still exists.

Disabled some tests that are failing, will add issues and fix later. These are related to (likely) precision problems in the ABC model, which is not priority, and the Node Fusion, which will be fundamentally reworked anyways.

Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Reviewed-on: #8
2024-05-08 18:04:48 +02:00
7d7782f97f Add Workaround for Trie implementation for Julia version 1.10+ (#7)
Some checks failed
MetagraphOptimization_CI / docs (push) Successful in 8m32s
MetagraphOptimization_CI / test (push) Failing after 12m1s
Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Reviewed-on: #7
2024-05-08 14:00:25 +02:00
87dbaf2c32 experiments (#1)
All checks were successful
MetagraphOptimization_CI / docs (push) Successful in 10m41s
MetagraphOptimization_CI / test (push) Successful in 30m40s
Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Reviewed-on: #1
2024-05-08 12:03:27 +02:00
179 changed files with 5554 additions and 1537 deletions

1
.gitattributes vendored
View File

@ -1,2 +1,3 @@
input/AB->ABBBBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
input/AB->ABBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text

View File

@ -17,9 +17,9 @@ jobs:
fetch-depth: 0
- name: Setup Julia environment
uses: https://github.com/julia-actions/setup-julia@v1.9.2
uses: https://github.com/julia-actions/setup-julia@v2
with:
version: '1.9.2'
version: '1.10'
- name: Instantiate
run: |
@ -46,6 +46,7 @@ jobs:
run: |
julia --project=examples/ -e 'using Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
julia --project=examples/ -t 4 -e 'include("examples/import_bench.jl")' -O3
julia --project=examples/ -t 4 -e 'include("examples/ab5.jl")' -O3
docs:
runs-on: ubuntu-22.04
@ -57,9 +58,9 @@ jobs:
fetch-depth: 0
- name: Setup Julia environment
uses: https://github.com/julia-actions/setup-julia@v1.9.2
uses: https://github.com/julia-actions/setup-julia@v2
with:
version: '1.9.2'
version: '1.10'
- name: Build docs
run: |

4
.gitignore vendored
View File

@ -30,3 +30,7 @@ Manifest.toml
.julia
**/.ipynb_checkpoints/
*.bkp
*.sif
data/hemera_temp

View File

@ -4,8 +4,10 @@ authors = ["Anton Reinhard <anton.reinhard@proton.me>"]
version = "0.1.0"
[deps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
@ -17,8 +19,10 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
[extras]
CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]

170
data/evaluate.jl Normal file
View File

@ -0,0 +1,170 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 1)
println("Please use with \"input_file.csv\"")
end
processes = [
"QED Process: 'ke->ke'",
"QED Process: 'ke->kke'",
"QED Process: 'ke->kkke'",
#"QED Process: 'ke->kkkke'",
#"QED Process: 'ke->kkkkke'",
#"QED Process: 'ke->kkkkkke'",
#"QED Process: 'ke->kkkkkkke'",
"ABC Process: 'AB->AB'",
"ABC Process: 'AB->ABBB'",
#"ABC Process: 'AB->ABBBBB'",
]
function proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
function beautify_title(str::AbstractString)
parts = split(str, "'")
preprefix = parts[1]
infix = parts[2]
sufsuffix = parts[3]
parts = split(infix, "->")
prefix = parts[1]
suffix = parts[2]
k_count = count(c -> c == 'k', suffix)
B_count = count(c -> c == 'B', suffix)
if k_count == 1 || B_count == 1
new_suffix = suffix
elseif k_count >= 1
new_suffix = replace(suffix, r"k+" => "k^$k_count")
elseif B_count >= 1
new_suffix = replace(suffix, r"B+" => "B^$B_count")
end
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
end
input_file = ARGS[1]
df = CSV.read(input_file, DataFrame)
n_inputs = df[:, "n_inputs"][1]
# plotting with threads as x axis
for process_name in processes
title_string = "$(beautify_title(process_name)), $n_inputs samples"
println("$title_string")
process_no_opt = process_name * " not optimized"
process_red = process_name * " reduced"
process_tape_no_opt = process_name * " not optimized tape"
process_tape_red = process_name * " reduced tape"
df_no_opt = filter(:process_name => x -> x == process_no_opt, df)
df_red = filter(:process_name => x -> x == process_red, df)
df_tape_no_opt = filter(:process_name => x -> x == process_tape_no_opt, df)
df_tape_red = filter(:process_name => x -> x == process_tape_red, df)
@df df_no_opt scatter(:cpu_threads, :cpu_time, label = "unoptimized function", markershape = :circle)
@df df_red scatter!(:cpu_threads, :cpu_time, label = "reduced function", markershape = :rect)
@df df_tape_no_opt scatter!(:cpu_threads, :cpu_time, label = "unoptimized tape", markershape = :utriangle)
@df df_tape_red scatter!(:cpu_threads, :cpu_time, label = "reduced tape", markershape = :star)
plot!(
title = title_string,
yscale = :linear,
legend = :outerright,
legendcolumns = 1,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "execution time (s)",
xlabel = "threads (#)",
)
savefig("$(process_name)_time.pdf")
@df df_no_opt scatter(:cpu_threads, :cpu_rate, label = "unoptimized function", markershape = :circle)
@df df_red scatter!(:cpu_threads, :cpu_rate, label = "reduced function", markershape = :rect)
@df df_tape_no_opt scatter!(:cpu_threads, :cpu_rate, label = "unoptimized tape", markershape = :utriangle)
@df df_tape_red scatter!(:cpu_threads, :cpu_rate, label = "reduced tape", markershape = :star)
plot!(
title = "Sample rate, " * title_string,
yscale = :log10,
legend = :outerright,
legendcolumns = 1,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "rate (" * L"s^{-1}" * ")",
xlabel = "threads (#)",
)
savefig("$(process_name)_rate.pdf")
@df df_no_opt scatter(:cpu_threads, :cpu_gflops, label = "unoptimized function", markershape = :circle)
@df df_red scatter!(:cpu_threads, :cpu_gflops, label = "reduced function", markershape = :rect)
@df df_tape_no_opt scatter!(:cpu_threads, :cpu_gflops, label = "unoptimized tape", markershape = :utriangle)
@df df_tape_red scatter!(:cpu_threads, :cpu_gflops, label = "reduced tape", markershape = :star)
plot!(
title = "CPU performance, " * title_string,
yscale = :linear,
legend = :outerright,
legendcolumns = 1,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "CPU performance (GFLOPS)",
xlabel = "threads (#)",
)
savefig("$(process_name)_performance.pdf")
end
# plotting with process size as x axis
THREADS = [1, 2, 4, 8, 16]
for threads in THREADS
title_string = "$threads threads, $n_inputs samples"
df_filt = filter(:cpu_threads => x -> x == threads, df)
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df_filt)
df_filt.process_size = @. proc_to_n(df_filt.process_name)
df_no_opt = filter(:process_name => x -> match(r" not optimized$", x) !== nothing, df_filt)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
df_tape_no_opt = filter(:process_name => x -> match(r" not optimized tape$", x) !== nothing, df_filt)
df_tape_red = filter(:process_name => x -> match(r" reduced tape$", x) !== nothing, df_filt)
@df df_no_opt scatter(:process_size, :graph_gen_time, label = "graph generation time")
@df df_red scatter!(:process_size, :optimization_time, label = "optimization time")
@df df_no_opt scatter!(:process_size, :function_generation_time, label = "unoptimized function generation time")
@df df_tape_no_opt scatter!(:process_size, :function_generation_time, label = "unoptimized tape generation time")
@df df_red scatter!(:process_size, :function_generation_time, label = "reduced function generation time")
@df df_tape_red scatter!(:process_size, :function_generation_time, label = "reduced tape generation time")
plot!(
title = "function generation times, " * title_string,
yscale = :log10,
legend = :outerbottom,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("gen_times_$(threads)_threads.pdf")
end

View File

@ -0,0 +1,143 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 1)
println("Please use with \"input_file.csv\"")
end
processes = [
"QED Process: 'ke->ke'",
"QED Process: 'ke->kke'",
"QED Process: 'ke->kkke'",
"QED Process: 'ke->kkkke'",
"QED Process: 'ke->kkkkke'",
#"QED Process: 'ke->kkkkkke'",
#"QED Process: 'ke->kkkkkkke'",
"ABC Process: 'AB->AB'",
"ABC Process: 'AB->ABBB'",
"ABC Process: 'AB->ABBBBB'",
]
function proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
function abc_proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
b_count = count(c -> c == 'B', parts[2])
return b_count
end
function beautify_title(str::AbstractString)
parts = split(str, "'")
preprefix = parts[1]
infix = parts[2]
sufsuffix = parts[3]
parts = split(infix, "->")
prefix = parts[1]
suffix = parts[2]
k_count = count(c -> c == 'k', suffix)
B_count = count(c -> c == 'B', suffix)
if k_count == 1 || B_count == 1
new_suffix = suffix
elseif k_count >= 1
new_suffix = replace(suffix, r"k+" => "k^$k_count")
elseif B_count >= 1
new_suffix = replace(suffix, r"B+" => "B^$B_count")
end
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
end
input_file = ARGS[1]
df = CSV.read(input_file, DataFrame)
n_inputs = df[:, "n_inputs"][1]
title_string = "QED N-Photon Compton Scattering\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
df_filt.process_size = @. proc_to_n(df_filt.process_name)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
@df df_red scatter(
:process_size,
:cpu_time,
yerror = :cpu_std,
label = "CPU execution time, 32 threads (s)",
markersize = 6,
)
@df df_red scatter!(
:process_size,
:gpu_time,
yerror = :gpu_std,
label = "GPU execution time, A100 80GB (s)",
markersize = 6,
)
plot!(
#title = title_string,
yscale = :log10,
legend = :outerbottom,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("cpu_vs_gpu_qed.pdf")
title_string = "\$AB\\rightarrow AB^n\$ ABC Processes\nCalculate 1,048,576 (\$2^{20}\$) Matrix Elements"
df_filt = filter(:process_name => x -> abc_proc_to_n(x) >= 1, df)
df_filt.process_size = @. abc_proc_to_n(df_filt.process_name)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
@df df_red scatter(
:process_size,
:cpu_time,
yerror = :cpu_std,
label = "CPU execution time, 32 threads (s)",
markersize = 6,
)
@df df_red scatter!(
:process_size,
:gpu_time,
yerror = :gpu_std,
label = "GPU execution time, A100 80GB (s)",
markersize = 6,
)
plot!(
#title = title_string,
yscale = :log10,
legend = :outerbottom,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("cpu_vs_gpu_abc.pdf")

View File

@ -0,0 +1,212 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 1)
println("Please use with \"input_file.csv\"")
end
processes = [
"QED Process: 'ke->ke'",
"QED Process: 'ke->kke'",
"QED Process: 'ke->kkke'",
"QED Process: 'ke->kkkke'",
"QED Process: 'ke->kkkkke'",
]
function proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
function beautify_title(str::AbstractString)
parts = split(str, "'")
preprefix = parts[1]
infix = parts[2]
sufsuffix = parts[3]
parts = split(infix, "->")
prefix = parts[1]
suffix = parts[2]
k_count = count(c -> c == 'k', suffix)
B_count = count(c -> c == 'B', suffix)
if k_count == 1 || B_count == 1
new_suffix = suffix
elseif k_count >= 1
new_suffix = replace(suffix, r"k+" => "k^$k_count")
elseif B_count >= 1
new_suffix = replace(suffix, r"B+" => "B^$B_count")
end
return "QED Compton Scattering Process " * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
end
input_file = ARGS[1]
df = CSV.read(input_file, DataFrame)
n_inputs = df[:, "n_inputs"][1]
gpus = df.gpu_devices[1]
cpus = df.cpu_threads[1]
power = Int(round(log2(n_inputs)))
chunk_sizes = [
"\$2^{10}\$",
"\$2^{11}\$",
"\$2^{12}\$",
"\$2^{13}\$",
"\$2^{14}\$",
"\$2^{15}\$",
"\$2^{16}\$",
"\$2^{17}\$",
"\$2^{18}\$",
"\$2^{19}\$",
"\$2^{20}\$",
]
best_times = Vector{Float64}()
best_times_std = Vector{Float64}()
# plotting with threads as x axis
for process_name in processes
df_filt = filter(:process_name => x -> x == process_name, df)
df_filt.cpu_ratio = df_filt.cpu_chunks ./ (df_filt.cpu_chunks .+ df_filt.gpu_chunks) .* 100.0
df_filt.gpu_ratio = df_filt.gpu_chunks ./ (df_filt.cpu_chunks .+ df_filt.gpu_chunks) .* 100.0
push!(best_times, minimum(df_filt.time))
bar(chunk_sizes, df_filt.cpu_ratio, label = "workload completed by \$$(cpus)\$ CPU threads (%)")
bar!(
chunk_sizes,
[100 for _ in chunk_sizes],
label = "workload completed by $(gpus) GPUs (%)",
fillto = df_filt.cpu_ratio,
)
plot!(
#title = "$(beautify_title(process_name))\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
yscale = :linear,
#xticks = [1024 4096 16384 65536 262144 1048576],
ylim = (0, 105),
legend = :outerbottom,
legendcolumns = 1,
legend_font_pointsize = 10,
#size = (800, 600),
ylabel = "contribution (%)",
xlabel = "chunk size (#)",
)
savefig("full_node_chunk_size_$(proc_to_n(process_name))k_ratio.pdf")
scatter(
chunk_sizes,
df_filt.rate,
label = "total execution rate (\$s^{-1}\$)",
#title = "$(beautify_title(process_name))\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
ylim = (0, :auto),
#yscale = :log10,
#xticks = [1024 4096 16384 65536 262144 1048576],
legend = :outerbottom,
legendcolumns = 1,
legend_font_pointsize = 10,
#size = (800, 600),
ylabel = "rate (\$s^{-1}\$)",
xlabel = "chunk size (#)",
markersize = 7,
)
savefig("full_node_chunk_size_$(proc_to_n(process_name))k_rate.pdf")
scatter(
chunk_sizes,
df_filt.time,
yerror = df_filt.std,
label = "total execution time (s)",
#title = "$(beautify_title(process_name))\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
#xticks = [1024 4096 16384 65536 262144 1048576],
ylim = (0, maximum(df_filt.time) * 1.05),
legend = :outerbottom,
legendcolumns = 1,
legend_font_pointsize = 10,
#size = (800, 600),
ylabel = "time (s)",
xlabel = "chunk size (#)",
markersize = 7,
)
savefig("full_node_chunk_size_$(proc_to_n(process_name))k_time.pdf")
end
# plotting with process size as x axis
A100_rates = [2.530045276927587e9, 1.16972304616864e9, 2.0002725972692013e8, 3.495722925446318e7, 4.792187095617111e6]
CPU_32threads_rates =
[3.2691139045711152e7, 1.1578342663759507e7, 3.1670680975577887e6, 731037.7069429948, 115001.5594731802]
theory_rates = (A100_rates .+ CPU_32threads_rates) .* 4
scatter(
proc_to_n.(processes),
best_times,
label = "full node best achieved time (s)",
#title = "QED N-Photon Compton Scattering\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
ylim = (0, maximum(best_times) * 1.05),
legend = :outerbottom,
legendcolumns = 1,
legend_font_pointsize = 10,
#size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
markersize = 7,
)
savefig("full_node_process_best_time.pdf")
scatter(
proc_to_n.(processes),
(n_inputs ./ best_times),
label = "full node best achieved rate (\$s^{-1}\$)",
#title = "QED N-Photon Compton Scattering\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
ylim = (0, maximum(n_inputs ./ best_times) * 1.05),
legend = :outerbottom,
legendcolumns = 1,
legend_font_pointsize = 10,
#size = (800, 600),
ylabel = "rate (\$s^{-1}\$)",
xlabel = "process size (#)",
markersize = 7,
)
savefig("full_node_process_best_rate.pdf")
scatter(
proc_to_n.(processes),
[(n_inputs ./ best_times) theory_rates],
label = ["full node best achieved rate (\$s^{-1}\$)" "theoretical rate from previous benchmarks (\$s^{-1}\$)"],
#title = "QED N-Photon Compton Scattering\nComputing $(n_inputs) (\$2^{$(power)}\$) Matrix Elements",
#ylim = (0, max(maximum(n_inputs ./ best_times), maximum(theory_rates)) * 1.05),
yscale = :log10,
legend = :outerbottom,
legendcolumns = 1,
legend_font_pointsize = 10,
#size = (800, 600),
ylabel = "rate (\$s^{-1}\$)",
xlabel = "process size (#)",
markersize = 7,
)
savefig("full_node_process_best_rate_plus_theory.pdf")

232
data/evaluate_gen.jl Normal file
View File

@ -0,0 +1,232 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 1)
println("Please use with \"input_file.csv\"")
end
function proc_to_n(str::AbstractString)
parts = split(str, "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
input_file = ARGS[1]
df = CSV.read(input_file, DataFrame)
# plotting with process size as x axis
THREADS = [1]
for threads in THREADS
title_string = "n-photon Compton diagram generation"
df_filt = filter(:cpu_threads => x -> x == threads, df)
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df_filt)
# ns -> s
df_filt.graph_gen_mean = @. df_filt.graph_gen_mean / 1e9
df_filt.graph_gen_std = @. df_filt.graph_gen_std / 1e9
# B -> MB (not MiB since the log scale is base 10)
df_filt.graph_mem = @. df_filt.graph_mem / 1e6
df_filt.graph_mem_reduced = @. df_filt.graph_mem_reduced / 1e6
df_filt.process_size = @. proc_to_n(df_filt.process_name)
l = length(df_filt.process_size)
println(df_filt[!, :process_size])
println(df_filt[!, :graph_mem])
println(df_filt[!, :graph_mem_reduced])
@df df_filt scatter(:process_size, :graph_mem, label = "unreduced graph", markersize = 7)
scatter!(
df_filt[!, :process_size],
df_filt[!, :graph_mem_reduced],
label = "reduced graph",
markershape = :square,
markersize = 7,
)
plot!(
title = "n-photon Compton diagram memory footprint",
yscale = :log10,
legend = :outerbottom,
minorgrid = true,
xticks = :process_size,
#yticks = [1e-3, 1e-1, 1e1, 1e3],
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 12,
fontsize = 12,
size = (800, 600),
ylabel = "memory footprint (MB)",
xlabel = "process size (#)",
)
savefig("gen_memory_$(threads).pdf")
@df df_filt scatter(
:process_size,
:graph_gen_mean,
yerror = :graph_gen_std,
label = "graph generation time",
markersize = 7,
)
scatter!(
df_filt[!, :process_size],
df_filt[!, :graph_elapsed_reduce],
label = "graph reduction time",
markershape = :square,
markersize = 7,
)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
minorgrid = true,
xticks = :process_size,
yticks = [1e-3, 1e-1, 1e1, 1e3],
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 12,
fontsize = 12,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("gen_times_$(threads)_threads.pdf")
exit(0)
# graph size
title_string = "n-photon Compton unreduced graph size"
@df df_filt scatter(:process_size, :graph_nodes, label = "nodes", markershape = :circle)
@df df_filt scatter!(:process_size, :graph_edges, label = "edges", markershape = :square)
@df df_filt scatter!(:process_size, :graph_u_nodes, label = "U-nodes", markershape = :star)
@df df_filt scatter!(:process_size, :graph_v_nodes, label = "V-nodes", markershape = :utriangle)
@df df_filt scatter!(:process_size[2:end], :graph_s1_nodes[2:end], label = "S1-nodes", markershape = :x)
@df df_filt scatter!(:process_size, :graph_s2_nodes, label = "S2-nodes", markershape = :diamond)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
yminorgrid = true,
xticks = :process_size,
yticks = [1e1, 1e3, 1e5, 1e7],
xgrid = false,
xminorticks = false,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "(#)",
xlabel = "process size (#)",
)
savefig("compton_graph_size_unreduced.pdf")
# graph size
title_string = "n-photon Compton reduced graph size"
@df df_filt scatter(:process_size, :graph_nodes_reduced, label = "nodes", markershape = :circle)
@df df_filt scatter!(:process_size, :graph_edges_reduced, label = "edges", markershape = :square)
@df df_filt scatter!(:process_size, :graph_u_nodes_reduced, label = "U-nodes", markershape = :star)
@df df_filt scatter!(:process_size, :graph_v_nodes_reduced, label = "V-nodes", markershape = :utriangle)
@df df_filt scatter!(:process_size[2:end], :graph_s1_nodes_reduced[2:end], label = "S1-nodes", markershape = :x)
@df df_filt scatter!(:process_size, :graph_s2_nodes_reduced, label = "S2-nodes", markershape = :diamond)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
yminorgrid = true,
xticks = :process_size,
yticks = [1e1, 1e2, 1e3, 1e4, 1e5, 1e6],
xgrid = false,
xminorticks = false,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "(#)",
xlabel = "process size (#)",
)
savefig("compton_graph_size_reduced.pdf")
# graph size versus
title_string = "n-photon Compton graph sizes"
@df df_filt scatter(:process_size, :graph_nodes, label = "nodes", markershape = :circle)
@df df_filt scatter!(:process_size, :graph_edges, label = "edges", markershape = :square)
@df df_filt scatter!(:process_size, :graph_nodes_reduced, label = "nodes (after reduction)", markershape = :star)
@df df_filt scatter!(
:process_size,
:graph_edges_reduced,
label = "edges (after reduction)",
markershape = :utriangle,
)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
yminorgrid = true,
xticks = :process_size,
yticks = [1e1, 1e2, 1e3, 1e4, 1e5, 1e6],
xgrid = false,
xminorticks = false,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "(#)",
xlabel = "process size (#)",
)
savefig("compton_graph_size_versus.pdf")
end
# for a specific process, plot times with threads as x
process = "ke->kkkkkkkke"
title_string = "n-photon Compton diagram generation times, $process"
df_filt = filter(:process_name => x -> x == process, df)
df_filt.graph_gen_mean = @. df_filt.graph_gen_mean / 1e9
df_filt.graph_gen_std = @. df_filt.graph_gen_std / 1e9
@df df_filt scatter(
:cpu_threads,
:graph_gen_mean,
yerror = :graph_gen_std,
label = "graph generation time",
markersize = 7,
)
plot!(
title = title_string,
yscale = :linear,
legend = :outerbottom,
minorgrid = true,
xticks = :cpu_threads,
#yticks = [1e-3, 1e-2, 1e-1, 1e-0, 1e1],
ylim = (0, max(df_filt[!, :graph_gen_mean]...) * 1.1),
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("gen_times_$(process).pdf")

View File

@ -0,0 +1,52 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 2)
println("Please use with \"input_file.csv\" \"input_file_onesided.csv\"")
end
function proc_to_n(str::AbstractString)
parts = split(str, "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
input_file = ARGS[1]
input_file_onesided = ARGS[2]
df = CSV.read(input_file, DataFrame)
df2 = CSV.read(input_file_onesided, DataFrame)
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1 && proc_to_n(x) <= 7, df)
df_filt.process_size = @. proc_to_n(df_filt.process_name)
df_filt2 = filter(:process_name => x -> proc_to_n(x) >= 1 && proc_to_n(x) <= 7, df2)
df_filt2.process_size = @. proc_to_n(df_filt2.process_name)
# graph size
title_string = "n-photon Compton reduced graph size"
@df df_filt scatter(:process_size, :graph_nodes_reduced, label = "nodes, two-sided generation", markershape = :circle)
@df df_filt2 scatter!(:process_size, :graph_nodes_reduced, label = "nodes, one-sided generation", markershape = :square)
plot!(
title = title_string,
yscale = :log10,
legend = :outerbottom,
yminorgrid = true,
xticks = :process_size,
yticks = [1e1, 1e2, 1e3, 1e4, 1e5, 1e6],
xgrid = false,
xminorticks = false,
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "(#)",
xlabel = "process size (#)",
)
savefig("compton_diagram_gen_comparison.pdf")

130
data/evaluate_gpu.jl Normal file
View File

@ -0,0 +1,130 @@
using CSV
using DataFrames
using Plots
using StatsPlots
using LaTeXStrings
if (length(ARGS) < 1)
println("Please use with \"input_file.csv\"")
end
processes = [
"QED Process: 'ke->ke'",
"QED Process: 'ke->kke'",
"QED Process: 'ke->kkke'",
"QED Process: 'ke->kkkke'",
"QED Process: 'ke->kkkkke'",
"ABC Process: 'AB->AB'",
"ABC Process: 'AB->ABBB'",
"ABC Process: 'AB->ABBBBB'",
]
function proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
function beautify_title(str::AbstractString)
parts = split(str, "'")
preprefix = parts[1]
infix = parts[2]
sufsuffix = parts[3]
parts = split(infix, "->")
prefix = parts[1]
suffix = parts[2]
k_count = count(c -> c == 'k', suffix)
B_count = count(c -> c == 'B', suffix)
if k_count == 1 || B_count == 1
new_suffix = suffix
elseif k_count >= 1
new_suffix = replace(suffix, r"k+" => "k^$k_count")
elseif B_count >= 1
new_suffix = replace(suffix, r"B+" => "B^$B_count")
end
return preprefix * L"%$prefix \rightarrow %$new_suffix" * sufsuffix
end
input_file = ARGS[1]
df = CSV.read(input_file, DataFrame)
n_inputs = df[:, "n_inputs"][1]
gpu_name = df[:, "gpu_name"][1]
if (gpu_name == "")
println("Results file did not execute everything on GPU! (or didn't write gpu name)")
exit(0)
end
# plotting with process size as x axis
title_string = "GPU $gpu_name, $n_inputs samples"
df_filt = filter(:process_name => x -> proc_to_n(x) >= 1, df)
df_filt.gpu_rate = df_filt.gpu_rate
df_filt.gpu_time = df_filt.gpu_time
df_filt.gpu_gflops = df_filt.gpu_gflops
df_filt.process_size = @. proc_to_n(df_filt.process_name)
df_no_opt = filter(:process_name => x -> match(r" not optimized$", x) !== nothing, df_filt)
df_red = filter(:process_name => x -> match(r" reduced$", x) !== nothing, df_filt)
@df df_no_opt scatter(:process_size, :gpu_rate, label = "unoptimized function execution rate", markersize = 7)
@df df_red scatter!(:process_size, :gpu_rate, label = "reduced function execution rate", markersize = 7)
plot!(
#title = title_string * ", sample rate",
yscale = :log10,
legend = :outerbottom,
xticks = [1, 2, 3, 4, 5],
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "rate (" * L"s^{-1}" * ")",
xlabel = "process size (#)",
)
savefig("gpu_rate_$(gpu_name).pdf")
@df df_no_opt scatter(:process_size, :gpu_time, label = "unoptimized function execution time", markersize = 7)
@df df_red scatter!(:process_size, :gpu_time, label = "reduced function execution time", markersize = 7)
plot!(
#title = title_string * ", execution time",
yscale = :log10,
legend = :outerbottom,
xticks = [1, 2, 3, 4, 5],
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "time (s)",
xlabel = "process size (#)",
)
savefig("gpu_times_$(gpu_name).pdf")
@df df_no_opt scatter(:process_size, :gpu_gflops, label = "unoptimized function", markersize = 7)
@df df_red scatter!(:process_size, :gpu_gflops, label = "reduced function", markersize = 7)
plot!(
#title = title_string * ", GFLOPS",
yscale = :linear,
legend = :outerbottom,
xticks = [1, 2, 3, 4, 5],
legendcolumns = 2,
legend_font_pointsize = 10,
size = (800, 600),
ylabel = "performance (GFLOPS)",
xlabel = "process size (#)",
)
savefig("gpu_perf_$(gpu_name).pdf")

View File

@ -0,0 +1,279 @@
using Plots
using StatsPlots
using CSV
using DataFrames
using LaTeXStrings
if (length(ARGS) < 2)
println("Please use with \"input_file.csv\" \"input_file_gpu.csv\"")
end
function proc_to_n(str::AbstractString)
parts = split(str, "'")
parts = split(parts[2], "->")
k_count = count(c -> c == 'k', parts[2])
return k_count
end
function beautify_title(str::AbstractString)
parts = split(str, "->")
prefix = parts[1]
suffix = parts[2]
k_count = count(c -> c == 'k', suffix)
B_count = count(c -> c == 'B', suffix)
if k_count == 1 || B_count == 1
new_suffix = suffix
elseif k_count >= 1
new_suffix = replace(suffix, r"k+" => "k^$k_count")
elseif B_count >= 1
new_suffix = replace(suffix, r"B+" => "B^$B_count")
end
return L"%$prefix \rightarrow %$new_suffix"
end
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke", "AB->AB", "AB->ABBB", "AB->ABBBBB"]
input_file = ARGS[1]
input_file_gpu = ARGS[2]
df = CSV.read(input_file, DataFrame)
df_gpu = CSV.read(input_file_gpu, DataFrame)
n_inputs = 2^20
#=
for process in processes
df_filt = filter(:process => x -> x == process, df)
df_filt_gpu = filter(:process => x -> x == process, df_gpu)
# add dummy factors to get the numbers in similar orders of magnitude
df_filt.cumulative_optimization_time = df_filt.cumulative_optimization_time .* 1e4
df_filt_gpu.cumulative_optimization_time = df_filt_gpu.cumulative_optimization_time .* 1e4
df_filt_gpu.gpu_t = df_filt_gpu.gpu_t .* 1e3
cpu = !isempty(df_filt)
gpu = !isempty(df_filt_gpu)
ymax = 0.0
if cpu
@df df_filt scatter(
:operations,
:cumulative_optimization_time,
label = "Cumulative Optimization Time (x10000) (s)",
markersize = 4,
)
ymax = max(df_filt[!, :cpu_st_t]..., df_filt[!, :cumulative_optimization_time]...) * 1.1 * 1e4
@df df_filt scatter!(
:operations,
:cpu_st_t,
label = "Single-Threaded Execution (s)",
markersize = 4,
markershape = :square,
)
end
if gpu
if !cpu
@df df_filt_gpu scatter(
:operations,
:cumulative_optimization_time,
label = "Cumulative Optimization Time (x10000) (s)",
markersize = 4,
)
ymax = max(df_filt_gpu[!, :gpu_t]..., df_filt_gpu[!, :cumulative_optimization_time]...) * 1.1 * 1e4
end
@df df_filt_gpu scatter!(
:operations,
:gpu_t,
label = "GPU Execution (x1000) (s)",
markersize = 4,
markershape = :diamond,
)
end
if cpu || gpu
plot!(
title = ("$(beautify_title(process)) Reduction Progression ($(n_inputs) Inputs)"),
xscale = :linear,
yscale = :linear,
#ylim = (0, ymax),
legend = :outerbottom,
minorgrid = true,
xticks = :process_size,
#yticks = [1e-3, 1e-1, 1e1, 1e3],
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 12,
fontsize = 12,
size = (800, 600),
xlabel = "optimizer steps (#)",
ylabel = "time (s)",
)
savefig("$(String(process))_reduction_bench.pdf")
end
end
# ABC vs QED
AB_process = "AB->ABBB"
abc_label = "\$A + B \\rightarrow A + 3B\$"
QED_process = "ke->kkkke"
qed_label = "\$e^- + \\gamma \\rightarrow e^- + 4\\gamma\$"
df_filt_AB = filter(:process => x -> x == AB_process, df)
df_filt_QED = filter(:process => x -> x == QED_process, df)
max_AB = max(df_filt_AB[!, :operations]...)
max_QED = max(df_filt_QED[!, :operations]...)
df_filt_AB.reduction_progress = df_filt_AB.operations ./ max_AB .* 100.0
df_filt_QED.reduction_progress = df_filt_QED.operations ./ max_QED .* 100.0
df_filt_AB.relative_performance = df_filt_AB.cpu_st_t ./ df_filt_AB[!, :cpu_st_t][1] .* 100.0
df_filt_QED.relative_performance = df_filt_QED.cpu_st_t ./ df_filt_QED[!, :cpu_st_t][1] .* 100.0
@df df_filt_AB scatter(:reduction_progress, :relative_performance, label = abc_label, markersize = 4)
@df df_filt_QED scatter!(:reduction_progress, :relative_performance, label = qed_label, markersize = 4)
plot!(
#title = ("Relative Performance of $(beautify_title(QED_process)) versus $(beautify_title(AB_process)) on CPU"),
xscale = :linear,
yscale = :linear,
#ylim = (0, ymax),
legend = :outerbottom,
minorgrid = true,
xticks = :process_size,
#yticks = [1e-3, 1e-1, 1e1, 1e3],
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 12,
fontsize = 12,
size = (800, 600),
xlabel = "reduction progress (%)",
ylabel = "relative time taken (%)",
)
savefig("reduction_bench_relative.pdf")
# ABC vs QED on GPU
AB_process = "AB->ABBB"
abc_label = "\$A + B \\rightarrow A + 3B\$"
QED_process = "ke->kkkke"
qed_label = "\$e^- + \\gamma \\rightarrow e^- + 4\\gamma\$"
df_filt_AB = filter(:process => x -> x == AB_process, df_gpu)
df_filt_QED = filter(:process => x -> x == QED_process, df_gpu)
max_AB = max(df_filt_AB[!, :operations]...)
max_QED = max(df_filt_QED[!, :operations]...)
df_filt_AB.reduction_progress = df_filt_AB.operations ./ max_AB .* 100.0
df_filt_QED.reduction_progress = df_filt_QED.operations ./ max_QED .* 100.0
df_filt_AB.relative_performance = df_filt_AB.gpu_t ./ df_filt_AB[!, :gpu_t][1] .* 100.0
df_filt_QED.relative_performance = df_filt_QED.gpu_t ./ df_filt_QED[!, :gpu_t][1] .* 100.0
df_filt_AB.relative_std = df_filt_AB.gpu_s ./ df_filt_AB[!, :gpu_t][1] .* 100.0
df_filt_QED.relative_std = df_filt_QED.gpu_s ./ df_filt_QED[!, :gpu_t][1] .* 100.0
@df df_filt_AB scatter(
:reduction_progress,
:relative_performance,
yerror = :relative_std,
label = abc_label,
markersize = 4,
)
@df df_filt_QED scatter!(
:reduction_progress,
:relative_performance,
yerror = :relative_std,
label = qed_label,
markersize = 4,
)
plot!(
#title = "Relative Performance of $(beautify_title(QED_process)) versus $(beautify_title(AB_process)) on GPU (A100)",
xscale = :linear,
yscale = :linear,
#ylim = (0, ymax),
legend = :outerbottom,
minorgrid = true,
xticks = :process_size,
#yticks = [1e-3, 1e-1, 1e1, 1e3],
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 12,
fontsize = 12,
size = (800, 600),
xlabel = "reduction progress (%)",
ylabel = "relative time taken (%)",
)
savefig("reduction_bench_relative_gpu.pdf")
=#
QED_process = "ke->kkke"
QED_label = "\$\\e^- + \\gamma \\rightarrow \\e^- + 3\\gamma\$"
df_filt_QED_GPU = filter(:process => x -> x == QED_process, df_gpu)
df_filt_QED_CPU = filter(:process => x -> x == QED_process, df)
max_QED = max(df_filt_QED_CPU[!, :operations]...)
df_filt_QED_GPU.reduction_progress = df_filt_QED_GPU.operations ./ max_QED .* 100.0
df_filt_QED_CPU.reduction_progress = df_filt_QED_CPU.operations ./ max_QED .* 100.0
df_filt_QED_GPU.relative_performance = df_filt_QED_GPU.gpu_t ./ df_filt_QED_GPU[!, :gpu_t][1] .* 100.0
df_filt_QED_CPU.relative_performance = df_filt_QED_CPU.cpu_st_t ./ df_filt_QED_CPU[!, :cpu_st_t][1] .* 100.0
df_filt_QED_GPU.relative_std = df_filt_QED_GPU.gpu_s ./ df_filt_QED_GPU[!, :gpu_t][1] .* 100.0
df_filt_QED_CPU.relative_std = df_filt_QED_CPU.cpu_st_s ./ df_filt_QED_CPU[!, :cpu_st_t][1] .* 100.0
@df df_filt_QED_CPU scatter(
:reduction_progress,
:relative_performance,
yerror = :relative_std,
label = "CPU relative time taken (%)",
markersize = 4,
)
@df df_filt_QED_GPU scatter!(
:reduction_progress,
:relative_performance,
yerror = :relative_std,
label = "GPU relative time taken (%)",
markersize = 4,
)
plot!(
#title = "Relative Performance of $(beautify_title(QED_process)) on CPU versus GPU",
xscale = :linear,
yscale = :linear,
ylim = (0, :auto),
legend = :outerbottom,
minorgrid = true,
xticks = :process_size,
#yticks = [1e-3, 1e-1, 1e1, 1e3],
xgrid = false,
xminorticks = false,
legendcolumns = 1,
legend_font_pointsize = 12,
fontsize = 12,
size = (800, 600),
xlabel = "reduction progress (%)",
ylabel = "relative time taken (%)",
)
savefig("reduction_bench_relative_cpu_vs_gpu.pdf")

View File

@ -1,16 +0,0 @@
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
0,77,101,252.0,6240.0,0.04038461538461539,0.02087051,8.691e-6,3.405098066,0.244763721,1.565749515,0.936213163
1,76,99,246.0,6240.0,0.03942307692307692,0.020658734,9.36e-6,3.244313848,0.230460257,1.548012602,0.887605389
2,75,97,240.0,6240.0,0.038461538461538464,0.045333482,8.74e-6,3.163679857,0.217614064,1.52780456,0.816496837
3,74,95,234.0,6240.0,0.0375,0.020314034,9.081e-6,2.956421016,0.183415997,1.524262179,0.793770075
4,73,93,228.0,6240.0,0.03653846153846154,0.033579409,8.52e-6,2.845414866,0.19168374,1.50907807,0.742734411
5,72,92,228.0,6144.0,0.037109375,0.019736718,8.87e-6,2.827109937,0.207452606,1.497203204,0.719774022
6,71,90,222.0,6144.0,0.0361328125,0.043612693,1.01e-5,2.62776692,0.166492497,1.602060948,0.668929854
7,70,89,222.0,6048.0,0.03670634920634921,0.042731148,1.053e-5,2.631288029,0.185812224,1.514154792,0.694503947
8,69,87,216.0,6048.0,0.03571428571428571,0.042148711,8.19e-6,2.493343257,0.183595081,1.506478504,0.652420896
9,68,86,216.0,5952.0,0.036290322580645164,0.041568955,8.571e-6,2.487317627,0.147773078,1.472141844,0.653143947
10,67,85,216.0,5856.0,0.036885245901639344,0.041307868,9.13e-6,2.491634709,0.175728138,1.482162906,0.63058774
11,66,84,216.0,5760.0,0.0375,0.041265756,8.43e-6,2.516916643,0.180420842,1.463053866,0.650627815
12,65,83,205.0,5760.0,0.035590277777777776,0.039711293,9.22e-6,2.479664249,0.178013433,1.459566956,0.652477867
13,64,82,205.0,5664.0,0.03619350282485876,0.030866093,8.87e-6,2.485424881,0.179983608,1.564961227,0.647932468
14,63,81,205.0,5568.0,0.03681752873563218,0.029946916,8.93e-6,2.469922022,0.179443854,1.485935831,0.651804318
1 operations graph_nodes graph_edges graph_ce graph_dt graph_ci gen_func_t cpu_compile_t cpu_st_t cpu_mt_t gpu_compile_t gpu_t
2 0 77 101 252.0 6240.0 0.04038461538461539 0.02087051 8.691e-6 3.405098066 0.244763721 1.565749515 0.936213163
3 1 76 99 246.0 6240.0 0.03942307692307692 0.020658734 9.36e-6 3.244313848 0.230460257 1.548012602 0.887605389
4 2 75 97 240.0 6240.0 0.038461538461538464 0.045333482 8.74e-6 3.163679857 0.217614064 1.52780456 0.816496837
5 3 74 95 234.0 6240.0 0.0375 0.020314034 9.081e-6 2.956421016 0.183415997 1.524262179 0.793770075
6 4 73 93 228.0 6240.0 0.03653846153846154 0.033579409 8.52e-6 2.845414866 0.19168374 1.50907807 0.742734411
7 5 72 92 228.0 6144.0 0.037109375 0.019736718 8.87e-6 2.827109937 0.207452606 1.497203204 0.719774022
8 6 71 90 222.0 6144.0 0.0361328125 0.043612693 1.01e-5 2.62776692 0.166492497 1.602060948 0.668929854
9 7 70 89 222.0 6048.0 0.03670634920634921 0.042731148 1.053e-5 2.631288029 0.185812224 1.514154792 0.694503947
10 8 69 87 216.0 6048.0 0.03571428571428571 0.042148711 8.19e-6 2.493343257 0.183595081 1.506478504 0.652420896
11 9 68 86 216.0 5952.0 0.036290322580645164 0.041568955 8.571e-6 2.487317627 0.147773078 1.472141844 0.653143947
12 10 67 85 216.0 5856.0 0.036885245901639344 0.041307868 9.13e-6 2.491634709 0.175728138 1.482162906 0.63058774
13 11 66 84 216.0 5760.0 0.0375 0.041265756 8.43e-6 2.516916643 0.180420842 1.463053866 0.650627815
14 12 65 83 205.0 5760.0 0.035590277777777776 0.039711293 9.22e-6 2.479664249 0.178013433 1.459566956 0.652477867
15 13 64 82 205.0 5664.0 0.03619350282485876 0.030866093 8.87e-6 2.485424881 0.179983608 1.564961227 0.647932468
16 14 63 81 205.0 5568.0 0.03681752873563218 0.029946916 8.93e-6 2.469922022 0.179443854 1.485935831 0.651804318

View File

@ -1,176 +0,0 @@
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
0,356,493,1399.0,30528.0,0.0458267819706499,0.077070556,2.6761e-5,17.804336617,0.960385595,10.618577031,4.95440474
1,354,491,1399.0,30432.0,0.04597134595162986,1.030851104,2.37e-5,17.726472964,0.933074463,2.174912444,4.959474851
2,352,489,1399.0,30336.0,0.04611682489451477,0.376282553,2.3861e-5,17.935912907,0.968087391,2.238665483,4.912705328
3,350,487,1399.0,30240.0,0.04626322751322751,0.076651194,4.2451e-5,17.976779783,0.977130996,2.246167674,4.954520005
4,348,485,1399.0,30144.0,0.04641056263269639,0.223709216,2.8031e-5,17.67129111,0.97799748,2.175788856,4.923999491
5,346,483,1399.0,30048.0,0.04655883919062833,0.076034997,4.3191e-5,17.766336956,0.967055891,2.187609178,4.922574669
6,344,481,1399.0,29952.0,0.04670806623931624,0.398917781,4.3422e-5,17.709032771,0.971142926,2.170963978,4.917191185
7,342,479,1399.0,29856.0,0.04685825294748124,0.352569343,4.3801e-5,17.690255833,0.952966242,2.159295978,4.945842152
8,340,477,1399.0,29760.0,0.04700940860215054,0.117620751,4.2992e-5,17.905787431,0.749896479,2.19940915,4.922882222
9,338,475,1399.0,29664.0,0.04716154261057174,0.318053898,2.3481e-5,17.522775542,0.745113955,2.202366151,4.928734427
10,336,473,1399.0,29568.0,0.047314664502164504,0.184069985,2.3381e-5,17.529935879,0.74637911,2.238397648,4.919919125
11,334,471,1399.0,29472.0,0.047468783930510315,0.086029218,2.365e-5,17.560859257,0.75559668,2.249242933,4.956561058
12,332,469,1399.0,29376.0,0.04762391067538126,0.077326472,2.4361e-5,17.559317648,0.746726769,2.1818156,4.938490196
13,330,467,1399.0,29280.0,0.047780054644808743,0.169738661,2.342e-5,17.517109121,0.751453942,2.187781478,4.923659727
14,328,465,1399.0,29184.0,0.047937225877192985,0.077817676,2.315e-5,17.533304215,0.745481303,2.209343496,4.960503415
15,326,463,1399.0,29088.0,0.04809543454345434,0.171584444,2.352e-5,17.579912576,0.754778436,2.210370024,4.934281254
16,324,461,1399.0,28992.0,0.04825469094922737,0.084223667,2.305e-5,17.570464754,0.751290178,2.22797709,4.939806799
17,322,459,1399.0,28896.0,0.04841500553709856,0.123005102,2.3661e-5,17.605650973,0.756929676,2.269940175,4.937928844
18,320,457,1399.0,28800.0,0.04857638888888889,0.086677986,2.37e-5,17.5539199,0.746367967,2.264938904,4.959258096
19,318,455,1399.0,28704.0,0.04873885172798216,0.12293158,2.3711e-5,17.609395222,0.755783994,2.264754078,4.92827168
20,316,453,1399.0,28608.0,0.04890240492170023,0.124475123,2.4281e-5,17.597716228,0.75106304,2.20218749,4.933120236
21,314,451,1399.0,28512.0,0.04906705948372615,0.112172177,2.6391e-5,17.623178954,0.755694751,2.186417905,4.921509117
22,312,449,1399.0,28416.0,0.04923282657657658,0.219362642,2.321e-5,17.593459902,0.747914841,2.168628993,4.952994795
23,310,447,1399.0,28320.0,0.049399717514124294,0.080729209,2.358e-5,17.571675834,0.755489634,2.209531477,4.951190234
24,308,445,1399.0,28224.0,0.049567743764172334,0.080235835,2.3271e-5,17.615791747,0.750314688,2.21464245,4.949496195
25,306,443,1399.0,28128.0,0.049736916951080776,0.124106403,2.374e-5,17.60716179,0.753826187,2.186184237,4.920128786
26,304,441,1399.0,28032.0,0.04990724885844749,0.080715608,2.3781e-5,17.581988477,0.750266997,2.209826064,4.937813884
27,302,439,1399.0,27936.0,0.05007875143184422,0.080606465,2.4071e-5,17.633096607,0.749125265,2.198599437,4.935320693
28,300,437,1399.0,27840.0,0.0502514367816092,0.081056137,2.3781e-5,17.564695624,0.746230293,2.225110355,4.939656214
29,298,435,1399.0,27744.0,0.05042531718569781,0.096545225,2.379e-5,17.58144781,0.747458632,2.263551336,4.924245431
30,296,433,1399.0,27648.0,0.050600405092592594,0.120638697,2.383e-5,17.574370836,0.748933285,2.234417803,4.915183371
31,294,431,1399.0,27552.0,0.0507767131242741,0.125073582,2.393e-5,17.627352699,0.754384428,2.214199106,4.938130459
32,292,429,1399.0,27456.0,0.05095425407925408,0.12314953,2.468e-5,17.697160429,0.796488763,2.261473826,4.956976138
33,290,427,1399.0,27360.0,0.051133040935672516,0.125481487,2.354e-5,17.636971006,0.748416796,2.222200724,4.948970096
34,288,425,1399.0,27264.0,0.051313086854460094,0.094052012,2.4301e-5,17.62971842,0.805139938,2.205015347,4.959455536
35,286,423,1399.0,27168.0,0.051494405182567725,0.08136377,2.4041e-5,17.621304482,0.747718686,2.244362062,4.941432169
36,284,421,1399.0,27072.0,0.05167700945626478,0.080217839,2.3921e-5,17.61427713,0.747754586,2.212103901,4.933185029
37,282,417,1399.0,26976.0,0.051860913404507714,0.126372199,2.376e-5,17.601417663,0.750036789,2.163344775,4.926698186
38,280,414,1399.0,26880.0,0.052046130952380955,0.125444544,2.476e-5,17.612452443,0.748155225,2.195259021,4.91594575
39,278,412,1399.0,26784.0,0.05223267622461171,0.083158944,2.4551e-5,17.599589645,0.741671021,2.208064301,4.9351555
40,276,410,1399.0,26688.0,0.05242056354916067,0.083321959,2.4101e-5,17.567124159,0.748238012,2.197233222,4.954754226
41,274,408,1399.0,26592.0,0.052609807460890494,0.084803792,2.3901e-5,17.549365204,0.754817994,2.229499405,4.94957165
42,272,405,1399.0,26496.0,0.05280042270531401,0.127648261,2.3851e-5,17.582852416,0.750759497,2.230398721,4.937220319
43,270,401,1399.0,26400.0,0.052992424242424244,0.128445184,2.428e-5,17.596647819,0.75777713,2.160922996,4.937371146
44,268,399,1399.0,26304.0,0.053185827250608275,0.129526096,2.5081e-5,17.594476326,0.746906342,2.219401891,4.93357998
45,266,397,1399.0,26208.0,0.05338064713064713,0.129819495,2.4731e-5,17.568331366,0.750368555,2.18948505,4.922275732
46,264,394,1399.0,26112.0,0.05357689950980392,0.087649075,2.462e-5,17.585414218,0.751605626,2.198684054,4.941424565
47,262,391,1399.0,26016.0,0.05377460024600246,0.089110637,2.4551e-5,17.614139291,0.750622403,2.168793662,4.953321773
48,260,389,1399.0,25920.0,0.053973765432098766,0.090307061,2.45e-5,17.633806293,0.749096576,2.224521298,4.930813246
49,258,387,1399.0,25824.0,0.054174411400247834,0.133480181,2.461e-5,17.634768586,0.756613261,2.201452177,4.972809945
50,256,385,1399.0,25728.0,0.05437655472636816,0.134254424,2.425e-5,17.606323938,0.748779206,2.216818872,4.939295094
51,254,382,1399.0,25632.0,0.05458021223470662,0.134016868,2.4531e-5,17.5926305,0.75625873,2.227679889,4.968213894
52,252,379,1399.0,25536.0,0.054785401002506263,0.135650945,2.4601e-5,17.642803637,0.751975585,2.226011125,4.9285844
53,250,375,1399.0,25440.0,0.054992138364779876,0.136647933,2.4161e-5,17.799738254,0.76667472,2.165144989,4.930427128
54,248,373,1399.0,25344.0,0.05520044191919192,0.123103164,2.4461e-5,17.745879754,0.760526742,2.161495227,4.940492285
55,246,370,1399.0,25248.0,0.05541032953105197,0.09476826,2.3511e-5,17.596131758,0.756924114,2.180021837,4.954121771
56,244,365,1399.0,25152.0,0.05562181933842239,0.095345787,2.4171e-5,17.612023424,0.747989147,2.215139082,4.945396527
57,242,362,1399.0,25056.0,0.05583492975734355,0.139570128,2.3801e-5,17.630922372,0.750668446,2.186529739,4.961981394
58,240,359,1399.0,24960.0,0.05604967948717949,0.097466916,2.4451e-5,17.61078772,0.7485922,2.217673752,4.95291513
59,238,357,1399.0,24864.0,0.05626608751608752,0.138599302,2.3601e-5,17.586404505,0.756929027,2.233374301,4.935342135
60,236,352,1399.0,24768.0,0.05648417312661499,0.147210964,2.4911e-5,17.650436019,0.74908103,2.157077946,4.937714591
61,234,350,1399.0,24672.0,0.05670395590142672,0.099491094,2.3601e-5,17.608002511,0.756924473,2.165309665,4.932434479
62,232,348,1399.0,24576.0,0.056925455729166664,0.141929827,2.454e-5,17.605756917,0.749178717,2.234082435,4.957629943
63,230,344,1399.0,24480.0,0.057148692810457515,0.142483983,2.4211e-5,17.623883273,0.758216784,2.210078838,4.930940098
64,228,341,1399.0,24384.0,0.057373687664041995,0.101524943,2.4371e-5,17.662312587,0.751128917,2.22449657,4.96708528
65,226,339,1399.0,24288.0,0.05760046113306983,0.102619253,2.3831e-5,17.610112922,0.758167777,2.187456785,4.957519684
66,224,337,1399.0,24192.0,0.05782903439153439,0.10351088,2.3401e-5,17.611932402,0.749178457,2.236980212,4.933450322
67,222,335,1399.0,24096.0,0.05805942895086321,0.148780402,2.3711e-5,17.636035095,0.75707833,2.252138664,4.951632995
68,220,333,1399.0,24000.0,0.058291666666666665,0.148311059,2.4851e-5,17.617252052,0.750104986,2.22330739,4.9243139
69,218,329,1399.0,23904.0,0.05852576974564926,0.151678794,2.4181e-5,17.627742278,0.755299894,2.248062201,4.951401482
70,216,326,1399.0,23808.0,0.05876176075268817,0.15082361,2.3851e-5,17.647410652,0.752445605,2.240948426,4.949599133
71,214,323,1399.0,23712.0,0.05899966261808367,0.153382492,2.4011e-5,17.654743596,0.752802907,2.253819342,4.966250371
72,212,320,1399.0,23616.0,0.05923949864498645,0.151516131,2.3931e-5,17.672908543,0.750257716,2.220003155,4.944782327
73,210,317,1399.0,23520.0,0.059481292517006804,0.154244628,2.386e-5,17.60330678,0.750422813,2.211295295,4.943727837
74,208,313,1399.0,23424.0,0.05972506830601093,0.153767234,2.4291e-5,17.640950842,0.74988433,2.24794966,4.952712228
75,206,311,1399.0,23328.0,0.05997085048010974,0.155927375,2.406e-5,17.589128666,0.749120129,2.253801308,4.953014816
76,204,306,1399.0,23232.0,0.06021866391184573,0.15464184,2.4521e-5,17.662616581,0.750484429,2.227511412,4.924026259
77,202,304,1399.0,23136.0,0.06046853388658368,0.157807248,2.4041e-5,17.611953814,0.755679546,2.178734374,4.943974526
78,200,301,1399.0,23040.0,0.06072048611111111,0.155978707,2.4051e-5,17.624250437,0.794935481,2.247188963,4.940403894
79,198,298,1399.0,22944.0,0.06097454672245467,0.158377905,2.5091e-5,17.634938402,0.754743461,2.245248812,4.919902064
80,196,296,1399.0,22848.0,0.061230742296918765,0.158750786,2.4511e-5,17.6360904,0.750867213,2.200032233,4.942215648
81,194,293,1399.0,22752.0,0.061489099859353025,0.161152794,2.4831e-5,17.780761042,0.765338482,2.204873372,4.939655562
82,192,290,1399.0,22656.0,0.061749646892655365,0.160175486,2.318e-5,17.798147683,0.76168194,2.230891056,4.955801153
83,190,287,1399.0,22560.0,0.06201241134751773,0.159868767,2.4791e-5,17.764165058,0.796377137,2.239618185,4.928054627
84,188,283,1399.0,22464.0,0.06227742165242165,0.160933577,2.4221e-5,17.798426962,0.848255338,2.218112612,4.932433146
85,186,280,1399.0,22368.0,0.06254470672389127,0.163393917,2.4371e-5,17.808464853,0.765692696,2.213490844,4.943298137
86,184,277,1399.0,22272.0,0.06281429597701149,0.163792118,2.4261e-5,17.805783627,0.761027705,2.232891092,4.919454211
87,182,275,1399.0,22176.0,0.06308621933621934,0.162177953,2.43e-5,17.797665375,0.761040026,2.236586089,4.951072155
88,180,271,1399.0,22080.0,0.06336050724637682,0.165377424,2.557e-5,17.805099359,0.763146286,2.212611436,4.921150887
89,178,268,1399.0,21984.0,0.06363719068413391,0.166754373,2.5141e-5,17.770997205,0.764361801,2.199943181,4.934748884
90,176,266,1399.0,21888.0,0.06391630116959064,0.167241957,2.4571e-5,17.770223198,0.759580227,2.247867501,4.935730147
91,174,264,1399.0,21792.0,0.06419787077826726,0.169623073,2.5e-5,17.771153368,0.750276145,2.243455929,4.939933808
92,172,261,1399.0,21696.0,0.06448193215339233,0.168358288,2.5181e-5,17.799224982,0.760906435,2.210000929,4.943923374
93,170,259,1399.0,21600.0,0.06476851851851852,0.170287483,2.529e-5,17.79271252,0.763151029,2.205444892,4.924953813
94,168,254,1399.0,21504.0,0.06505766369047619,0.168986856,2.5021e-5,17.775583682,0.760237647,2.222811993,4.951301097
95,166,250,1399.0,21408.0,0.06534940209267563,0.171662521,2.4401e-5,17.636022254,0.749599438,2.234944605,4.958431762
96,164,246,1399.0,21312.0,0.06564376876876876,0.170911431,2.4481e-5,17.633556045,0.788097892,2.198060879,4.922871993
97,162,244,1399.0,21216.0,0.06594079939668175,0.172387252,2.4781e-5,17.620254381,0.799269067,2.202436673,4.936411908
98,160,241,1399.0,21120.0,0.0662405303030303,0.171830017,2.581e-5,17.656653806,0.750275098,2.200933622,4.94776375
99,158,238,1399.0,21024.0,0.06654299847792998,0.174560093,2.447e-5,17.625724723,0.756745741,2.249721096,4.958786002
100,156,235,1399.0,20928.0,0.06684824159021406,0.178996759,2.453e-5,17.669194606,0.749422535,2.218089817,4.960858653
101,154,231,1399.0,20832.0,0.0671562980030722,0.175032127,2.3871e-5,17.642586975,0.754643863,2.194675279,4.944134534
102,152,229,1399.0,20736.0,0.06746720679012345,0.176393906,2.4731e-5,17.592973556,0.749943551,2.229565622,4.927935661
103,150,225,1399.0,20640.0,0.06778100775193799,0.178017631,2.412e-5,17.630568322,0.755272802,2.221125776,4.952348991
104,148,223,1399.0,20544.0,0.0680977414330218,0.175897841,2.36e-5,17.661766307,0.749293633,2.2201698,4.963634779
105,146,221,1399.0,20448.0,0.06841744913928012,0.178367362,2.5001e-5,17.654508999,0.755361234,2.185187066,4.938710949
106,144,218,1399.0,20352.0,0.06874017295597484,0.178791594,2.502e-5,17.649520916,0.749748217,2.238645461,4.955141284
107,142,216,1399.0,20256.0,0.06906595576619273,0.175900502,2.3291e-5,17.648252045,0.755157659,2.250102545,4.948078116
108,140,212,1399.0,20160.0,0.06939484126984127,0.180050739,2.3901e-5,17.642556024,0.751139061,2.195233955,4.92102672
109,138,210,1399.0,20064.0,0.06972687400318979,0.182587052,2.492e-5,17.631301401,0.754040144,2.177296385,4.948297571
110,136,207,1399.0,19968.0,0.07006209935897435,0.181449712,2.4401e-5,17.618787463,0.748940439,2.251932822,4.950366155
111,134,203,1399.0,19872.0,0.07040056360708534,0.183466877,2.407e-5,17.658532693,0.756589176,2.240568188,4.97337861
112,132,201,1399.0,19776.0,0.0707423139158576,0.181545084,2.485e-5,17.63441504,0.751343023,2.183033772,4.975534251
113,130,199,1399.0,19680.0,0.07108739837398374,0.177809314,2.417e-5,17.627163359,0.754577307,2.211080446,4.977438563
114,128,195,1399.0,19584.0,0.07143586601307189,0.183038393,2.5541e-5,17.63366534,0.751510139,2.237832092,4.969644912
115,126,191,1399.0,19488.0,0.07178776683087028,0.186344151,2.4971e-5,17.711808739,0.759177,2.236586017,4.951292022
116,124,187,1399.0,19392.0,0.07214315181518152,0.184833587,2.475e-5,17.648467279,0.749564641,2.179772409,4.97017709
117,122,183,1399.0,19296.0,0.07250207296849089,0.193249355,2.3811e-5,17.639230223,0.755564354,2.195109482,4.982434629
118,120,180,1399.0,19200.0,0.07286458333333333,0.186818046,2.372e-5,17.635977046,0.750626058,2.243877912,4.972608068
119,118,177,1399.0,19104.0,0.07323073701842546,0.189204719,2.4961e-5,17.791522288,0.766082656,2.242948358,4.980365418
120,116,173,1399.0,19008.0,0.07360058922558922,0.186391669,2.4181e-5,17.645956891,0.750893368,2.197914806,4.98745469
121,114,171,1399.0,18912.0,0.07397419627749577,0.19060573,2.4701e-5,17.771140583,0.765197694,2.20643796,4.959618561
122,112,169,1399.0,18816.0,0.0743516156462585,0.188466188,2.381e-5,17.795228145,0.759434429,2.26208531,4.965068853
123,110,165,1399.0,18720.0,0.07473290598290598,0.191524927,2.3841e-5,17.779734215,0.767242896,2.242967333,4.950554681
124,108,161,1399.0,18624.0,0.07511812714776632,0.189450326,2.3601e-5,17.807849571,0.762371273,2.196711688,4.966122065
125,106,157,1399.0,18528.0,0.0755073402417962,0.191473057,2.357e-5,17.632877767,0.755845465,2.188474891,4.977562868
126,104,153,1399.0,18432.0,0.0759006076388889,0.191382079,2.3851e-5,17.775729988,0.758861116,2.278116886,4.979965119
127,102,151,1399.0,18336.0,0.07629799301919721,0.192296369,2.394e-5,17.777918793,0.764981303,2.224818047,4.949944943
128,100,149,1399.0,18240.0,0.07669956140350877,0.191424719,2.4331e-5,17.856475915,0.76057459,2.201588049,4.941974925
129,98,146,1399.0,18144.0,0.07710537918871252,0.194280932,2.3951e-5,17.779963845,0.766401736,2.223182601,4.961465017
130,96,142,1399.0,18048.0,0.07751551418439716,0.192850597,2.3861e-5,17.765033828,0.760509569,2.250897799,4.967399083
131,94,138,1399.0,17952.0,0.07793003565062388,0.194741823,2.38e-5,17.778261696,0.764271609,2.248898068,4.975998565
132,92,136,1399.0,17856.0,0.07834901433691756,0.193567295,2.5281e-5,17.791322862,0.759809249,2.216694812,4.962092553
133,90,132,1399.0,17760.0,0.07877252252252252,0.196949912,2.4641e-5,17.775924767,0.766636532,2.192664527,4.943809886
134,88,129,1399.0,17664.0,0.07920063405797101,0.19423328,2.4491e-5,17.775940481,0.759698903,2.241454301,4.965419114
135,86,125,1399.0,17568.0,0.07963342440801457,0.196021362,2.4541e-5,17.749824568,0.77002309,2.244133161,4.973507276
136,84,123,1399.0,17472.0,0.08007097069597069,0.195945063,2.4791e-5,17.793381264,0.758984676,2.223761942,4.967845004
137,82,120,1399.0,17376.0,0.0805133517495396,0.196404909,2.5491e-5,17.781126567,0.76777764,2.208548873,4.942758101
138,80,116,1399.0,17280.0,0.08096064814814814,0.197313346,2.469e-5,17.785944557,0.814271788,2.200296465,4.939179018
139,78,114,1399.0,17184.0,0.08141294227188083,0.155633427,2.5181e-5,17.79491891,0.767423131,2.233213884,4.963944358
140,76,111,1399.0,17088.0,0.08187031835205992,0.194686919,2.4311e-5,17.835512877,0.761171578,2.216772786,4.968370761
141,74,108,1399.0,16992.0,0.0823328625235405,0.19895497,2.4301e-5,17.80769545,0.768202031,2.212642548,4.971369432
142,72,106,1399.0,16896.0,0.08280066287878787,0.197589165,2.4241e-5,17.817799582,0.760097766,2.219367009,4.967751237
143,70,102,1399.0,16800.0,0.08327380952380953,0.200103786,2.425e-5,17.804210307,0.767108387,2.264925155,4.965506236
144,68,99,1399.0,16704.0,0.08375239463601533,0.196633322,2.5371e-5,17.822197608,0.762852947,2.20877412,4.971541033
145,66,97,1399.0,16608.0,0.08423651252408478,0.200144552,2.4801e-5,17.823667792,0.766965999,2.209992675,4.969252216
146,64,93,1399.0,16512.0,0.08472625968992248,0.199816644,2.4901e-5,17.838429006,0.764432365,2.241092809,4.961995819
147,62,89,1399.0,16416.0,0.08522173489278752,0.187325579,2.5321e-5,17.811923957,0.767393244,2.227406228,4.960056608
148,60,85,1399.0,16320.0,0.08572303921568628,0.198893612,2.4451e-5,17.82940565,0.760747136,2.209815727,4.971563658
149,58,83,1399.0,16224.0,0.08623027613412229,0.201039293,2.4651e-5,17.817639935,0.767607352,2.210546374,4.97066195
150,56,81,1399.0,16128.0,0.08674355158730158,0.199841932,2.414e-5,17.82203287,0.760048809,2.243550629,4.954439346
151,54,79,1399.0,16032.0,0.0872629740518962,0.2011596,2.4741e-5,17.804574042,0.767800679,2.250206119,4.955980994
152,52,75,1399.0,15936.0,0.08778865461847389,0.19971389,2.4331e-5,17.829821975,0.762018993,2.205143141,4.970086548
153,50,73,1399.0,15840.0,0.08832070707070708,0.201368798,2.4881e-5,17.836101646,0.767371477,2.218711432,4.96364023
154,48,71,1399.0,15744.0,0.08885924796747967,0.200798594,2.4491e-5,17.830384655,0.765407907,2.286796949,4.939295093
155,46,67,1399.0,15648.0,0.08940439672801637,0.202551163,2.5121e-5,17.827221721,0.768466657,2.262575248,4.943430916
156,44,65,1399.0,15552.0,0.08995627572016461,0.198816901,2.578e-5,17.840506569,0.760760306,2.220630133,4.952844324
157,42,63,1399.0,15456.0,0.09051501035196688,0.201424744,2.5021e-5,17.814439397,0.767553139,2.196934945,4.958506547
158,40,59,1399.0,15360.0,0.09108072916666667,0.202145126,2.565e-5,17.808712307,0.76137146,2.235801178,4.949559042
159,38,55,1399.0,15264.0,0.0916535639412998,0.201663393,2.4591e-5,17.784477195,0.766209648,2.249329555,4.964028527
160,36,53,1399.0,15168.0,0.09223364978902954,0.199579456,2.5461e-5,17.900752023,0.761934363,2.209582978,4.950507063
161,34,48,1399.0,15072.0,0.09282112526539278,0.159541692,2.5211e-5,17.769415534,0.935609132,2.216664395,4.962977201
162,32,44,1399.0,14976.0,0.09341613247863248,0.201979445,2.5581e-5,17.802148727,0.758630938,2.257162782,4.954367291
163,30,40,1399.0,14880.0,0.09401881720430108,0.203381244,2.5411e-5,17.808584074,0.768160516,2.239967841,4.949515694
164,28,35,1399.0,14784.0,0.09462932900432901,0.200707381,2.5071e-5,17.811958674,0.765546396,2.222827481,4.962523474
165,26,31,1399.0,14688.0,0.09524782135076253,0.203476579,2.4431e-5,17.791537057,0.759747517,2.210172596,4.96717851
166,24,29,1399.0,14592.0,0.09587445175438597,0.38619058,2.5161e-5,17.784565893,0.765981903,2.205094732,4.970469758
167,22,25,1399.0,14496.0,0.09650938189845475,0.209174268,2.6071e-5,17.886396985,0.762283972,2.251379768,4.9348063
168,20,21,1399.0,14400.0,0.09715277777777778,0.184182012,2.5331e-5,17.791795342,0.760972528,2.229551257,4.941190792
169,18,17,1399.0,14304.0,0.09780480984340045,0.203935864,2.572e-5,17.823665061,0.762353868,2.199132836,4.965200905
170,16,15,1399.0,14208.0,0.09846565315315316,0.200164969,2.4631e-5,17.792385586,0.76804392,2.174965407,4.972074439
171,14,13,1399.0,14112.0,0.09913548752834467,0.204567903,2.5071e-5,17.806154396,0.759505453,2.2340466,4.972671228
172,12,11,1399.0,14016.0,0.09981449771689498,0.201861418,2.5971e-5,18.529840195,0.789347616,2.23167521,4.947890089
173,10,9,1399.0,13920.0,0.1005028735632184,0.202902727,2.4951e-5,17.865867105,0.761004999,2.194876208,4.93177029
174,8,7,1399.0,13824.0,0.10120081018518519,0.198079003,2.4651e-5,17.791197743,0.767399089,2.226370372,4.951979965
1 operations graph_nodes graph_edges graph_ce graph_dt graph_ci gen_func_t cpu_compile_t cpu_st_t cpu_mt_t gpu_compile_t gpu_t
2 0 356 493 1399.0 30528.0 0.0458267819706499 0.077070556 2.6761e-5 17.804336617 0.960385595 10.618577031 4.95440474
3 1 354 491 1399.0 30432.0 0.04597134595162986 1.030851104 2.37e-5 17.726472964 0.933074463 2.174912444 4.959474851
4 2 352 489 1399.0 30336.0 0.04611682489451477 0.376282553 2.3861e-5 17.935912907 0.968087391 2.238665483 4.912705328
5 3 350 487 1399.0 30240.0 0.04626322751322751 0.076651194 4.2451e-5 17.976779783 0.977130996 2.246167674 4.954520005
6 4 348 485 1399.0 30144.0 0.04641056263269639 0.223709216 2.8031e-5 17.67129111 0.97799748 2.175788856 4.923999491
7 5 346 483 1399.0 30048.0 0.04655883919062833 0.076034997 4.3191e-5 17.766336956 0.967055891 2.187609178 4.922574669
8 6 344 481 1399.0 29952.0 0.04670806623931624 0.398917781 4.3422e-5 17.709032771 0.971142926 2.170963978 4.917191185
9 7 342 479 1399.0 29856.0 0.04685825294748124 0.352569343 4.3801e-5 17.690255833 0.952966242 2.159295978 4.945842152
10 8 340 477 1399.0 29760.0 0.04700940860215054 0.117620751 4.2992e-5 17.905787431 0.749896479 2.19940915 4.922882222
11 9 338 475 1399.0 29664.0 0.04716154261057174 0.318053898 2.3481e-5 17.522775542 0.745113955 2.202366151 4.928734427
12 10 336 473 1399.0 29568.0 0.047314664502164504 0.184069985 2.3381e-5 17.529935879 0.74637911 2.238397648 4.919919125
13 11 334 471 1399.0 29472.0 0.047468783930510315 0.086029218 2.365e-5 17.560859257 0.75559668 2.249242933 4.956561058
14 12 332 469 1399.0 29376.0 0.04762391067538126 0.077326472 2.4361e-5 17.559317648 0.746726769 2.1818156 4.938490196
15 13 330 467 1399.0 29280.0 0.047780054644808743 0.169738661 2.342e-5 17.517109121 0.751453942 2.187781478 4.923659727
16 14 328 465 1399.0 29184.0 0.047937225877192985 0.077817676 2.315e-5 17.533304215 0.745481303 2.209343496 4.960503415
17 15 326 463 1399.0 29088.0 0.04809543454345434 0.171584444 2.352e-5 17.579912576 0.754778436 2.210370024 4.934281254
18 16 324 461 1399.0 28992.0 0.04825469094922737 0.084223667 2.305e-5 17.570464754 0.751290178 2.22797709 4.939806799
19 17 322 459 1399.0 28896.0 0.04841500553709856 0.123005102 2.3661e-5 17.605650973 0.756929676 2.269940175 4.937928844
20 18 320 457 1399.0 28800.0 0.04857638888888889 0.086677986 2.37e-5 17.5539199 0.746367967 2.264938904 4.959258096
21 19 318 455 1399.0 28704.0 0.04873885172798216 0.12293158 2.3711e-5 17.609395222 0.755783994 2.264754078 4.92827168
22 20 316 453 1399.0 28608.0 0.04890240492170023 0.124475123 2.4281e-5 17.597716228 0.75106304 2.20218749 4.933120236
23 21 314 451 1399.0 28512.0 0.04906705948372615 0.112172177 2.6391e-5 17.623178954 0.755694751 2.186417905 4.921509117
24 22 312 449 1399.0 28416.0 0.04923282657657658 0.219362642 2.321e-5 17.593459902 0.747914841 2.168628993 4.952994795
25 23 310 447 1399.0 28320.0 0.049399717514124294 0.080729209 2.358e-5 17.571675834 0.755489634 2.209531477 4.951190234
26 24 308 445 1399.0 28224.0 0.049567743764172334 0.080235835 2.3271e-5 17.615791747 0.750314688 2.21464245 4.949496195
27 25 306 443 1399.0 28128.0 0.049736916951080776 0.124106403 2.374e-5 17.60716179 0.753826187 2.186184237 4.920128786
28 26 304 441 1399.0 28032.0 0.04990724885844749 0.080715608 2.3781e-5 17.581988477 0.750266997 2.209826064 4.937813884
29 27 302 439 1399.0 27936.0 0.05007875143184422 0.080606465 2.4071e-5 17.633096607 0.749125265 2.198599437 4.935320693
30 28 300 437 1399.0 27840.0 0.0502514367816092 0.081056137 2.3781e-5 17.564695624 0.746230293 2.225110355 4.939656214
31 29 298 435 1399.0 27744.0 0.05042531718569781 0.096545225 2.379e-5 17.58144781 0.747458632 2.263551336 4.924245431
32 30 296 433 1399.0 27648.0 0.050600405092592594 0.120638697 2.383e-5 17.574370836 0.748933285 2.234417803 4.915183371
33 31 294 431 1399.0 27552.0 0.0507767131242741 0.125073582 2.393e-5 17.627352699 0.754384428 2.214199106 4.938130459
34 32 292 429 1399.0 27456.0 0.05095425407925408 0.12314953 2.468e-5 17.697160429 0.796488763 2.261473826 4.956976138
35 33 290 427 1399.0 27360.0 0.051133040935672516 0.125481487 2.354e-5 17.636971006 0.748416796 2.222200724 4.948970096
36 34 288 425 1399.0 27264.0 0.051313086854460094 0.094052012 2.4301e-5 17.62971842 0.805139938 2.205015347 4.959455536
37 35 286 423 1399.0 27168.0 0.051494405182567725 0.08136377 2.4041e-5 17.621304482 0.747718686 2.244362062 4.941432169
38 36 284 421 1399.0 27072.0 0.05167700945626478 0.080217839 2.3921e-5 17.61427713 0.747754586 2.212103901 4.933185029
39 37 282 417 1399.0 26976.0 0.051860913404507714 0.126372199 2.376e-5 17.601417663 0.750036789 2.163344775 4.926698186
40 38 280 414 1399.0 26880.0 0.052046130952380955 0.125444544 2.476e-5 17.612452443 0.748155225 2.195259021 4.91594575
41 39 278 412 1399.0 26784.0 0.05223267622461171 0.083158944 2.4551e-5 17.599589645 0.741671021 2.208064301 4.9351555
42 40 276 410 1399.0 26688.0 0.05242056354916067 0.083321959 2.4101e-5 17.567124159 0.748238012 2.197233222 4.954754226
43 41 274 408 1399.0 26592.0 0.052609807460890494 0.084803792 2.3901e-5 17.549365204 0.754817994 2.229499405 4.94957165
44 42 272 405 1399.0 26496.0 0.05280042270531401 0.127648261 2.3851e-5 17.582852416 0.750759497 2.230398721 4.937220319
45 43 270 401 1399.0 26400.0 0.052992424242424244 0.128445184 2.428e-5 17.596647819 0.75777713 2.160922996 4.937371146
46 44 268 399 1399.0 26304.0 0.053185827250608275 0.129526096 2.5081e-5 17.594476326 0.746906342 2.219401891 4.93357998
47 45 266 397 1399.0 26208.0 0.05338064713064713 0.129819495 2.4731e-5 17.568331366 0.750368555 2.18948505 4.922275732
48 46 264 394 1399.0 26112.0 0.05357689950980392 0.087649075 2.462e-5 17.585414218 0.751605626 2.198684054 4.941424565
49 47 262 391 1399.0 26016.0 0.05377460024600246 0.089110637 2.4551e-5 17.614139291 0.750622403 2.168793662 4.953321773
50 48 260 389 1399.0 25920.0 0.053973765432098766 0.090307061 2.45e-5 17.633806293 0.749096576 2.224521298 4.930813246
51 49 258 387 1399.0 25824.0 0.054174411400247834 0.133480181 2.461e-5 17.634768586 0.756613261 2.201452177 4.972809945
52 50 256 385 1399.0 25728.0 0.05437655472636816 0.134254424 2.425e-5 17.606323938 0.748779206 2.216818872 4.939295094
53 51 254 382 1399.0 25632.0 0.05458021223470662 0.134016868 2.4531e-5 17.5926305 0.75625873 2.227679889 4.968213894
54 52 252 379 1399.0 25536.0 0.054785401002506263 0.135650945 2.4601e-5 17.642803637 0.751975585 2.226011125 4.9285844
55 53 250 375 1399.0 25440.0 0.054992138364779876 0.136647933 2.4161e-5 17.799738254 0.76667472 2.165144989 4.930427128
56 54 248 373 1399.0 25344.0 0.05520044191919192 0.123103164 2.4461e-5 17.745879754 0.760526742 2.161495227 4.940492285
57 55 246 370 1399.0 25248.0 0.05541032953105197 0.09476826 2.3511e-5 17.596131758 0.756924114 2.180021837 4.954121771
58 56 244 365 1399.0 25152.0 0.05562181933842239 0.095345787 2.4171e-5 17.612023424 0.747989147 2.215139082 4.945396527
59 57 242 362 1399.0 25056.0 0.05583492975734355 0.139570128 2.3801e-5 17.630922372 0.750668446 2.186529739 4.961981394
60 58 240 359 1399.0 24960.0 0.05604967948717949 0.097466916 2.4451e-5 17.61078772 0.7485922 2.217673752 4.95291513
61 59 238 357 1399.0 24864.0 0.05626608751608752 0.138599302 2.3601e-5 17.586404505 0.756929027 2.233374301 4.935342135
62 60 236 352 1399.0 24768.0 0.05648417312661499 0.147210964 2.4911e-5 17.650436019 0.74908103 2.157077946 4.937714591
63 61 234 350 1399.0 24672.0 0.05670395590142672 0.099491094 2.3601e-5 17.608002511 0.756924473 2.165309665 4.932434479
64 62 232 348 1399.0 24576.0 0.056925455729166664 0.141929827 2.454e-5 17.605756917 0.749178717 2.234082435 4.957629943
65 63 230 344 1399.0 24480.0 0.057148692810457515 0.142483983 2.4211e-5 17.623883273 0.758216784 2.210078838 4.930940098
66 64 228 341 1399.0 24384.0 0.057373687664041995 0.101524943 2.4371e-5 17.662312587 0.751128917 2.22449657 4.96708528
67 65 226 339 1399.0 24288.0 0.05760046113306983 0.102619253 2.3831e-5 17.610112922 0.758167777 2.187456785 4.957519684
68 66 224 337 1399.0 24192.0 0.05782903439153439 0.10351088 2.3401e-5 17.611932402 0.749178457 2.236980212 4.933450322
69 67 222 335 1399.0 24096.0 0.05805942895086321 0.148780402 2.3711e-5 17.636035095 0.75707833 2.252138664 4.951632995
70 68 220 333 1399.0 24000.0 0.058291666666666665 0.148311059 2.4851e-5 17.617252052 0.750104986 2.22330739 4.9243139
71 69 218 329 1399.0 23904.0 0.05852576974564926 0.151678794 2.4181e-5 17.627742278 0.755299894 2.248062201 4.951401482
72 70 216 326 1399.0 23808.0 0.05876176075268817 0.15082361 2.3851e-5 17.647410652 0.752445605 2.240948426 4.949599133
73 71 214 323 1399.0 23712.0 0.05899966261808367 0.153382492 2.4011e-5 17.654743596 0.752802907 2.253819342 4.966250371
74 72 212 320 1399.0 23616.0 0.05923949864498645 0.151516131 2.3931e-5 17.672908543 0.750257716 2.220003155 4.944782327
75 73 210 317 1399.0 23520.0 0.059481292517006804 0.154244628 2.386e-5 17.60330678 0.750422813 2.211295295 4.943727837
76 74 208 313 1399.0 23424.0 0.05972506830601093 0.153767234 2.4291e-5 17.640950842 0.74988433 2.24794966 4.952712228
77 75 206 311 1399.0 23328.0 0.05997085048010974 0.155927375 2.406e-5 17.589128666 0.749120129 2.253801308 4.953014816
78 76 204 306 1399.0 23232.0 0.06021866391184573 0.15464184 2.4521e-5 17.662616581 0.750484429 2.227511412 4.924026259
79 77 202 304 1399.0 23136.0 0.06046853388658368 0.157807248 2.4041e-5 17.611953814 0.755679546 2.178734374 4.943974526
80 78 200 301 1399.0 23040.0 0.06072048611111111 0.155978707 2.4051e-5 17.624250437 0.794935481 2.247188963 4.940403894
81 79 198 298 1399.0 22944.0 0.06097454672245467 0.158377905 2.5091e-5 17.634938402 0.754743461 2.245248812 4.919902064
82 80 196 296 1399.0 22848.0 0.061230742296918765 0.158750786 2.4511e-5 17.6360904 0.750867213 2.200032233 4.942215648
83 81 194 293 1399.0 22752.0 0.061489099859353025 0.161152794 2.4831e-5 17.780761042 0.765338482 2.204873372 4.939655562
84 82 192 290 1399.0 22656.0 0.061749646892655365 0.160175486 2.318e-5 17.798147683 0.76168194 2.230891056 4.955801153
85 83 190 287 1399.0 22560.0 0.06201241134751773 0.159868767 2.4791e-5 17.764165058 0.796377137 2.239618185 4.928054627
86 84 188 283 1399.0 22464.0 0.06227742165242165 0.160933577 2.4221e-5 17.798426962 0.848255338 2.218112612 4.932433146
87 85 186 280 1399.0 22368.0 0.06254470672389127 0.163393917 2.4371e-5 17.808464853 0.765692696 2.213490844 4.943298137
88 86 184 277 1399.0 22272.0 0.06281429597701149 0.163792118 2.4261e-5 17.805783627 0.761027705 2.232891092 4.919454211
89 87 182 275 1399.0 22176.0 0.06308621933621934 0.162177953 2.43e-5 17.797665375 0.761040026 2.236586089 4.951072155
90 88 180 271 1399.0 22080.0 0.06336050724637682 0.165377424 2.557e-5 17.805099359 0.763146286 2.212611436 4.921150887
91 89 178 268 1399.0 21984.0 0.06363719068413391 0.166754373 2.5141e-5 17.770997205 0.764361801 2.199943181 4.934748884
92 90 176 266 1399.0 21888.0 0.06391630116959064 0.167241957 2.4571e-5 17.770223198 0.759580227 2.247867501 4.935730147
93 91 174 264 1399.0 21792.0 0.06419787077826726 0.169623073 2.5e-5 17.771153368 0.750276145 2.243455929 4.939933808
94 92 172 261 1399.0 21696.0 0.06448193215339233 0.168358288 2.5181e-5 17.799224982 0.760906435 2.210000929 4.943923374
95 93 170 259 1399.0 21600.0 0.06476851851851852 0.170287483 2.529e-5 17.79271252 0.763151029 2.205444892 4.924953813
96 94 168 254 1399.0 21504.0 0.06505766369047619 0.168986856 2.5021e-5 17.775583682 0.760237647 2.222811993 4.951301097
97 95 166 250 1399.0 21408.0 0.06534940209267563 0.171662521 2.4401e-5 17.636022254 0.749599438 2.234944605 4.958431762
98 96 164 246 1399.0 21312.0 0.06564376876876876 0.170911431 2.4481e-5 17.633556045 0.788097892 2.198060879 4.922871993
99 97 162 244 1399.0 21216.0 0.06594079939668175 0.172387252 2.4781e-5 17.620254381 0.799269067 2.202436673 4.936411908
100 98 160 241 1399.0 21120.0 0.0662405303030303 0.171830017 2.581e-5 17.656653806 0.750275098 2.200933622 4.94776375
101 99 158 238 1399.0 21024.0 0.06654299847792998 0.174560093 2.447e-5 17.625724723 0.756745741 2.249721096 4.958786002
102 100 156 235 1399.0 20928.0 0.06684824159021406 0.178996759 2.453e-5 17.669194606 0.749422535 2.218089817 4.960858653
103 101 154 231 1399.0 20832.0 0.0671562980030722 0.175032127 2.3871e-5 17.642586975 0.754643863 2.194675279 4.944134534
104 102 152 229 1399.0 20736.0 0.06746720679012345 0.176393906 2.4731e-5 17.592973556 0.749943551 2.229565622 4.927935661
105 103 150 225 1399.0 20640.0 0.06778100775193799 0.178017631 2.412e-5 17.630568322 0.755272802 2.221125776 4.952348991
106 104 148 223 1399.0 20544.0 0.0680977414330218 0.175897841 2.36e-5 17.661766307 0.749293633 2.2201698 4.963634779
107 105 146 221 1399.0 20448.0 0.06841744913928012 0.178367362 2.5001e-5 17.654508999 0.755361234 2.185187066 4.938710949
108 106 144 218 1399.0 20352.0 0.06874017295597484 0.178791594 2.502e-5 17.649520916 0.749748217 2.238645461 4.955141284
109 107 142 216 1399.0 20256.0 0.06906595576619273 0.175900502 2.3291e-5 17.648252045 0.755157659 2.250102545 4.948078116
110 108 140 212 1399.0 20160.0 0.06939484126984127 0.180050739 2.3901e-5 17.642556024 0.751139061 2.195233955 4.92102672
111 109 138 210 1399.0 20064.0 0.06972687400318979 0.182587052 2.492e-5 17.631301401 0.754040144 2.177296385 4.948297571
112 110 136 207 1399.0 19968.0 0.07006209935897435 0.181449712 2.4401e-5 17.618787463 0.748940439 2.251932822 4.950366155
113 111 134 203 1399.0 19872.0 0.07040056360708534 0.183466877 2.407e-5 17.658532693 0.756589176 2.240568188 4.97337861
114 112 132 201 1399.0 19776.0 0.0707423139158576 0.181545084 2.485e-5 17.63441504 0.751343023 2.183033772 4.975534251
115 113 130 199 1399.0 19680.0 0.07108739837398374 0.177809314 2.417e-5 17.627163359 0.754577307 2.211080446 4.977438563
116 114 128 195 1399.0 19584.0 0.07143586601307189 0.183038393 2.5541e-5 17.63366534 0.751510139 2.237832092 4.969644912
117 115 126 191 1399.0 19488.0 0.07178776683087028 0.186344151 2.4971e-5 17.711808739 0.759177 2.236586017 4.951292022
118 116 124 187 1399.0 19392.0 0.07214315181518152 0.184833587 2.475e-5 17.648467279 0.749564641 2.179772409 4.97017709
119 117 122 183 1399.0 19296.0 0.07250207296849089 0.193249355 2.3811e-5 17.639230223 0.755564354 2.195109482 4.982434629
120 118 120 180 1399.0 19200.0 0.07286458333333333 0.186818046 2.372e-5 17.635977046 0.750626058 2.243877912 4.972608068
121 119 118 177 1399.0 19104.0 0.07323073701842546 0.189204719 2.4961e-5 17.791522288 0.766082656 2.242948358 4.980365418
122 120 116 173 1399.0 19008.0 0.07360058922558922 0.186391669 2.4181e-5 17.645956891 0.750893368 2.197914806 4.98745469
123 121 114 171 1399.0 18912.0 0.07397419627749577 0.19060573 2.4701e-5 17.771140583 0.765197694 2.20643796 4.959618561
124 122 112 169 1399.0 18816.0 0.0743516156462585 0.188466188 2.381e-5 17.795228145 0.759434429 2.26208531 4.965068853
125 123 110 165 1399.0 18720.0 0.07473290598290598 0.191524927 2.3841e-5 17.779734215 0.767242896 2.242967333 4.950554681
126 124 108 161 1399.0 18624.0 0.07511812714776632 0.189450326 2.3601e-5 17.807849571 0.762371273 2.196711688 4.966122065
127 125 106 157 1399.0 18528.0 0.0755073402417962 0.191473057 2.357e-5 17.632877767 0.755845465 2.188474891 4.977562868
128 126 104 153 1399.0 18432.0 0.0759006076388889 0.191382079 2.3851e-5 17.775729988 0.758861116 2.278116886 4.979965119
129 127 102 151 1399.0 18336.0 0.07629799301919721 0.192296369 2.394e-5 17.777918793 0.764981303 2.224818047 4.949944943
130 128 100 149 1399.0 18240.0 0.07669956140350877 0.191424719 2.4331e-5 17.856475915 0.76057459 2.201588049 4.941974925
131 129 98 146 1399.0 18144.0 0.07710537918871252 0.194280932 2.3951e-5 17.779963845 0.766401736 2.223182601 4.961465017
132 130 96 142 1399.0 18048.0 0.07751551418439716 0.192850597 2.3861e-5 17.765033828 0.760509569 2.250897799 4.967399083
133 131 94 138 1399.0 17952.0 0.07793003565062388 0.194741823 2.38e-5 17.778261696 0.764271609 2.248898068 4.975998565
134 132 92 136 1399.0 17856.0 0.07834901433691756 0.193567295 2.5281e-5 17.791322862 0.759809249 2.216694812 4.962092553
135 133 90 132 1399.0 17760.0 0.07877252252252252 0.196949912 2.4641e-5 17.775924767 0.766636532 2.192664527 4.943809886
136 134 88 129 1399.0 17664.0 0.07920063405797101 0.19423328 2.4491e-5 17.775940481 0.759698903 2.241454301 4.965419114
137 135 86 125 1399.0 17568.0 0.07963342440801457 0.196021362 2.4541e-5 17.749824568 0.77002309 2.244133161 4.973507276
138 136 84 123 1399.0 17472.0 0.08007097069597069 0.195945063 2.4791e-5 17.793381264 0.758984676 2.223761942 4.967845004
139 137 82 120 1399.0 17376.0 0.0805133517495396 0.196404909 2.5491e-5 17.781126567 0.76777764 2.208548873 4.942758101
140 138 80 116 1399.0 17280.0 0.08096064814814814 0.197313346 2.469e-5 17.785944557 0.814271788 2.200296465 4.939179018
141 139 78 114 1399.0 17184.0 0.08141294227188083 0.155633427 2.5181e-5 17.79491891 0.767423131 2.233213884 4.963944358
142 140 76 111 1399.0 17088.0 0.08187031835205992 0.194686919 2.4311e-5 17.835512877 0.761171578 2.216772786 4.968370761
143 141 74 108 1399.0 16992.0 0.0823328625235405 0.19895497 2.4301e-5 17.80769545 0.768202031 2.212642548 4.971369432
144 142 72 106 1399.0 16896.0 0.08280066287878787 0.197589165 2.4241e-5 17.817799582 0.760097766 2.219367009 4.967751237
145 143 70 102 1399.0 16800.0 0.08327380952380953 0.200103786 2.425e-5 17.804210307 0.767108387 2.264925155 4.965506236
146 144 68 99 1399.0 16704.0 0.08375239463601533 0.196633322 2.5371e-5 17.822197608 0.762852947 2.20877412 4.971541033
147 145 66 97 1399.0 16608.0 0.08423651252408478 0.200144552 2.4801e-5 17.823667792 0.766965999 2.209992675 4.969252216
148 146 64 93 1399.0 16512.0 0.08472625968992248 0.199816644 2.4901e-5 17.838429006 0.764432365 2.241092809 4.961995819
149 147 62 89 1399.0 16416.0 0.08522173489278752 0.187325579 2.5321e-5 17.811923957 0.767393244 2.227406228 4.960056608
150 148 60 85 1399.0 16320.0 0.08572303921568628 0.198893612 2.4451e-5 17.82940565 0.760747136 2.209815727 4.971563658
151 149 58 83 1399.0 16224.0 0.08623027613412229 0.201039293 2.4651e-5 17.817639935 0.767607352 2.210546374 4.97066195
152 150 56 81 1399.0 16128.0 0.08674355158730158 0.199841932 2.414e-5 17.82203287 0.760048809 2.243550629 4.954439346
153 151 54 79 1399.0 16032.0 0.0872629740518962 0.2011596 2.4741e-5 17.804574042 0.767800679 2.250206119 4.955980994
154 152 52 75 1399.0 15936.0 0.08778865461847389 0.19971389 2.4331e-5 17.829821975 0.762018993 2.205143141 4.970086548
155 153 50 73 1399.0 15840.0 0.08832070707070708 0.201368798 2.4881e-5 17.836101646 0.767371477 2.218711432 4.96364023
156 154 48 71 1399.0 15744.0 0.08885924796747967 0.200798594 2.4491e-5 17.830384655 0.765407907 2.286796949 4.939295093
157 155 46 67 1399.0 15648.0 0.08940439672801637 0.202551163 2.5121e-5 17.827221721 0.768466657 2.262575248 4.943430916
158 156 44 65 1399.0 15552.0 0.08995627572016461 0.198816901 2.578e-5 17.840506569 0.760760306 2.220630133 4.952844324
159 157 42 63 1399.0 15456.0 0.09051501035196688 0.201424744 2.5021e-5 17.814439397 0.767553139 2.196934945 4.958506547
160 158 40 59 1399.0 15360.0 0.09108072916666667 0.202145126 2.565e-5 17.808712307 0.76137146 2.235801178 4.949559042
161 159 38 55 1399.0 15264.0 0.0916535639412998 0.201663393 2.4591e-5 17.784477195 0.766209648 2.249329555 4.964028527
162 160 36 53 1399.0 15168.0 0.09223364978902954 0.199579456 2.5461e-5 17.900752023 0.761934363 2.209582978 4.950507063
163 161 34 48 1399.0 15072.0 0.09282112526539278 0.159541692 2.5211e-5 17.769415534 0.935609132 2.216664395 4.962977201
164 162 32 44 1399.0 14976.0 0.09341613247863248 0.201979445 2.5581e-5 17.802148727 0.758630938 2.257162782 4.954367291
165 163 30 40 1399.0 14880.0 0.09401881720430108 0.203381244 2.5411e-5 17.808584074 0.768160516 2.239967841 4.949515694
166 164 28 35 1399.0 14784.0 0.09462932900432901 0.200707381 2.5071e-5 17.811958674 0.765546396 2.222827481 4.962523474
167 165 26 31 1399.0 14688.0 0.09524782135076253 0.203476579 2.4431e-5 17.791537057 0.759747517 2.210172596 4.96717851
168 166 24 29 1399.0 14592.0 0.09587445175438597 0.38619058 2.5161e-5 17.784565893 0.765981903 2.205094732 4.970469758
169 167 22 25 1399.0 14496.0 0.09650938189845475 0.209174268 2.6071e-5 17.886396985 0.762283972 2.251379768 4.9348063
170 168 20 21 1399.0 14400.0 0.09715277777777778 0.184182012 2.5331e-5 17.791795342 0.760972528 2.229551257 4.941190792
171 169 18 17 1399.0 14304.0 0.09780480984340045 0.203935864 2.572e-5 17.823665061 0.762353868 2.199132836 4.965200905
172 170 16 15 1399.0 14208.0 0.09846565315315316 0.200164969 2.4631e-5 17.792385586 0.76804392 2.174965407 4.972074439
173 171 14 13 1399.0 14112.0 0.09913548752834467 0.204567903 2.5071e-5 17.806154396 0.759505453 2.2340466 4.972671228
174 172 12 11 1399.0 14016.0 0.09981449771689498 0.201861418 2.5971e-5 18.529840195 0.789347616 2.23167521 4.947890089
175 173 10 9 1399.0 13920.0 0.1005028735632184 0.202902727 2.4951e-5 17.865867105 0.761004999 2.194876208 4.93177029
176 174 8 7 1399.0 13824.0 0.10120081018518519 0.198079003 2.4651e-5 17.791197743 0.767399089 2.226370372 4.951979965

View File

@ -1,82 +0,0 @@
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
0,356,493,1399.0,30528.0,0.0458267819706499,0.084389903,2.4971e-5,17.802549835,0.960409581,2.406448706,4.927079076
1,351,483,1369.0,30528.0,0.044844077568134175,0.126855933,2.9211e-5,16.868735557,0.927387188,2.257632484,4.697683068
2,346,478,1369.0,30048.0,0.04556043663471779,0.08319682,3.5431e-5,16.871399152,0.834869326,2.264361993,4.701280771
3,341,473,1314.0,30048.0,0.04373003194888179,0.124422234,2.392e-5,16.454231193,0.856669072,2.271991539,4.68580348
4,336,463,1284.0,30048.0,0.042731629392971246,0.121696991,2.2921e-5,15.881542683,0.816430136,2.213686135,4.449106524
5,331,458,1284.0,29568.0,0.04342532467532467,0.124024888,2.314e-5,15.879200155,0.799333453,2.194093083,4.435654931
6,326,448,1254.0,29568.0,0.04241071428571429,0.121610951,2.2e-5,15.325702423,0.833341953,2.203843882,4.199677306
7,321,438,1224.0,29568.0,0.041396103896103896,0.118972208,2.1631e-5,14.367273685,0.711553932,2.16189756,3.948872646
8,316,433,1224.0,29088.0,0.04207920792079208,0.074826839,2.2031e-5,14.367107152,0.792981221,2.169096496,3.961630969
9,311,428,1169.0,29088.0,0.04018839383938394,0.116237162,2.15e-5,14.416973472,0.788583102,2.092186151,3.946339564
10,306,418,1139.0,29088.0,0.03915704070407041,0.114647398,2.031e-5,13.671420757,0.745657392,2.037551329,3.657411205
11,301,408,1109.0,29088.0,0.03812568756875687,0.11434652,1.951e-5,13.093103664,0.686554396,2.065489584,3.441139671
12,296,403,1109.0,28608.0,0.03876538031319911,0.112282663,1.8991e-5,13.11525848,0.705183633,2.0639299,3.422598036
13,291,398,1109.0,28128.0,0.039426905574516495,0.111549203,1.9661e-5,13.08100601,0.700772882,2.065935946,3.41679234
14,286,388,1079.0,28128.0,0.0383603526734926,0.109881396,1.907e-5,11.871746271,0.665244638,2.063828106,3.187580585
15,281,378,1049.0,28128.0,0.037293799772468716,0.108444747,1.7961e-5,10.963517612,0.62180291,2.037926216,2.935137574
16,276,373,1049.0,27648.0,0.03794126157407408,0.107959773,1.874e-5,11.021594456,0.541779823,2.003876106,2.931304737
17,271,368,1049.0,27168.0,0.03861160188457008,0.105629068,1.8241e-5,11.017450178,0.581974375,2.017201027,2.952118903
18,266,363,1049.0,26688.0,0.0393060551558753,0.107303406,1.8301e-5,11.028597789,0.556078309,2.037535226,2.911405619
19,261,358,994.0,26688.0,0.03724520383693045,0.106584986,1.7111e-5,10.789192026,0.525275525,2.011931363,2.931360979
20,256,353,939.0,26688.0,0.035184352517985615,0.105743463,1.7521e-5,10.50283261,0.535253087,1.962456949,2.941274646
21,255,351,933.0,26688.0,0.03495953237410072,0.105189187,1.7471e-5,10.739591259,0.555102576,2.013201521,2.896175037
22,254,350,933.0,26592.0,0.035085740072202165,0.105895137,1.6631e-5,10.68514711,0.571809578,1.974934611,2.890503396
23,253,348,927.0,26592.0,0.0348601083032491,0.104181459,1.817e-5,10.344271645,0.572483889,2.002875753,2.842241926
24,252,347,927.0,26496.0,0.034986413043478264,0.103568232,1.7471e-5,10.363216025,0.602207417,1.943794016,2.811132729
25,247,342,927.0,26016.0,0.035631918819188195,0.102006829,1.669e-5,10.360319761,0.588967585,1.942523675,2.838431844
26,246,340,921.0,26016.0,0.03540129151291513,0.103244544,1.672e-5,10.140255758,0.565172778,1.980058606,2.776594151
27,245,339,921.0,25920.0,0.03553240740740741,0.102991317,1.723e-5,10.166352736,0.588556746,2.025713505,2.754827976
28,244,337,915.0,25920.0,0.03530092592592592,0.102527335,1.6261e-5,9.965044496,0.527648944,1.966870364,2.708992883
29,243,335,909.0,25920.0,0.035069444444444445,0.101020632,1.6541e-5,9.899918186,0.530837495,1.99964346,2.686936268
30,242,334,909.0,25824.0,0.03519981412639405,0.099846559,1.614e-5,9.924451078,0.532149983,1.992832633,2.667590089
31,241,333,909.0,25728.0,0.035331156716417914,0.103293156,1.634e-5,9.893503718,0.500188044,1.971455575,2.661440862
32,236,328,909.0,25248.0,0.036002851711026615,0.110948742,1.5851e-5,9.916889596,0.515528547,2.014256204,2.691654688
33,235,326,903.0,25248.0,0.03576520912547528,0.099799239,1.658e-5,9.667648582,0.561210643,1.981308261,2.647665444
34,234,324,897.0,25248.0,0.035527566539923956,0.099455409,1.6561e-5,9.588166052,0.544847505,1.932560182,2.56349283
35,233,323,897.0,25152.0,0.035663167938931296,0.103335368,1.6271e-5,9.590387462,0.542413718,1.965145602,2.559435691
36,232,321,891.0,25152.0,0.03542461832061069,0.097770562,1.6571e-5,9.362808632,0.543288523,2.017894491,2.498672404
37,231,320,891.0,25056.0,0.03556034482758621,0.100428616,1.5941e-5,9.340302395,0.548822639,1.994799194,2.525394
38,230,319,891.0,24960.0,0.03569711538461538,0.056667955,1.5341e-5,9.356871677,0.537041949,1.921246656,2.507595034
39,225,314,891.0,24480.0,0.036397058823529414,0.099323026,1.636e-5,9.383625024,0.506403697,1.972101141,2.529248938
40,220,309,836.0,24480.0,0.03415032679738562,0.096789665,1.645e-5,9.524601658,0.473707387,1.980933173,2.524768525
41,215,304,836.0,24000.0,0.034833333333333334,0.053463925,1.671e-5,9.520567128,0.487585179,1.942542795,2.535491481
42,214,302,830.0,24000.0,0.034583333333333334,0.096303802,1.6011e-5,9.137262758,0.4297148,1.950560163,2.478408276
43,213,301,830.0,23904.0,0.034722222222222224,0.070596338,1.6901e-5,9.143790565,0.492842898,1.949332161,2.476752284
44,212,299,824.0,23904.0,0.034471218206157964,0.09696925,1.612e-5,9.089211511,0.456930617,2.022026121,2.419473874
45,211,297,818.0,23904.0,0.03422021419009371,0.052526649,1.536e-5,8.807671694,0.471203239,1.970488502,2.372441242
46,210,296,818.0,23808.0,0.03435819892473118,0.096716114,1.5701e-5,8.806210783,0.451452844,1.960073481,2.387451098
47,209,295,818.0,23712.0,0.034497300944669365,0.05145174,1.6061e-5,8.867215342,0.450895098,1.968012818,2.394204111
48,204,290,818.0,23232.0,0.03521005509641873,0.093248236,1.9521e-5,8.844517253,0.476030278,1.963827031,2.389413849
49,203,288,812.0,23232.0,0.034951790633608815,0.093881584,1.527e-5,8.849095772,0.446415074,1.974782212,2.332439097
50,202,287,812.0,23136.0,0.03509681881051176,0.050473481,1.5851e-5,8.784636116,0.469233287,1.953068913,2.321316886
51,201,285,806.0,23136.0,0.034837482710926695,0.092750242,1.5541e-5,8.632088328,0.491467054,1.945455141,2.29300329
52,200,284,806.0,23040.0,0.03498263888888889,0.092540087,1.7161e-5,8.637677414,0.471865872,1.975464118,2.259260411
53,199,282,800.0,23040.0,0.034722222222222224,0.092944049,1.5261e-5,8.624992966,0.478249573,1.931707577,2.232058939
54,198,281,800.0,22944.0,0.03486750348675035,0.091660013,1.575e-5,8.680034605,0.429976994,2.022314921,2.224544849
55,197,279,794.0,22944.0,0.03460599721059972,0.092591389,1.582e-5,8.266084761,0.442472956,1.949268775,2.165130527
56,196,278,794.0,22848.0,0.03475140056022409,0.090376966,1.529e-5,8.26930839,0.438461132,1.960119483,2.169387658
57,191,273,739.0,22848.0,0.03234418767507003,0.090398736,1.589e-5,8.061516101,0.468233752,1.825342557,2.144808638
58,186,268,739.0,22368.0,0.03303826895565093,0.090566151,1.5781e-5,8.051685873,0.472555774,1.827021946,2.175475243
59,185,266,733.0,22368.0,0.03277002861230329,0.046301524,1.4931e-5,7.809555195,0.466519375,1.819191936,2.095906173
60,184,264,727.0,22368.0,0.03250178826895565,0.087977349,1.4771e-5,7.825535183,0.452072238,1.820734702,2.06485156
61,183,263,727.0,22272.0,0.032641882183908046,0.08908488,1.4591e-5,7.77560322,0.445728609,1.804235078,2.06763398
62,182,262,727.0,22176.0,0.03278318903318903,0.076517376,1.461e-5,7.754359737,0.421063625,1.812681957,2.076417548
63,181,260,721.0,22176.0,0.032512626262626264,0.088983767,1.4091e-5,7.616158878,0.422402602,1.868182992,2.016601005
64,180,259,721.0,22080.0,0.03265398550724638,0.089172453,1.467e-5,7.63910266,0.402654247,1.844390793,2.031385412
65,175,254,666.0,22080.0,0.03016304347826087,0.091971222,1.3851e-5,7.35822511,0.443635961,1.719023302,2.007792679
66,170,249,666.0,21600.0,0.030833333333333334,0.073480651,1.3871e-5,7.291999508,0.434965958,1.750073777,1.999358953
67,169,247,660.0,21600.0,0.030555555555555555,0.085309774,1.7211e-5,7.245192983,0.412650069,1.744681817,1.962798523
68,168,245,654.0,21600.0,0.03027777777777778,0.089043539,1.367e-5,7.024436477,0.421292773,1.722710908,1.890918459
69,167,243,648.0,21600.0,0.03,0.084353527,1.428e-5,6.8832018,0.415786727,1.715216258,1.830282141
70,166,242,648.0,21504.0,0.030133928571428572,0.084367977,1.3441e-5,6.899982477,0.419080281,1.707637056,1.843529005
71,165,241,648.0,21408.0,0.030269058295964126,0.085701815,1.4031e-5,6.936174291,0.377346024,1.704252961,1.85218872
72,164,240,648.0,21312.0,0.030405405405405407,0.083910355,1.3601e-5,6.9051589,0.389477478,1.75740328,1.867258596
73,159,235,593.0,21312.0,0.0278246996996997,0.082135195,1.3351e-5,7.031037571,0.356084586,1.631072,1.797434919
74,154,230,593.0,20832.0,0.028465821812596007,0.080356395,1.358e-5,7.040766129,0.405151789,1.620631997,1.781269114
75,153,228,587.0,20832.0,0.02817780337941628,0.066967517,1.3391e-5,6.644186555,0.395240289,1.641155866,1.743666486
76,152,226,581.0,20832.0,0.02788978494623656,0.080763676,1.298e-5,6.633937959,0.388869331,1.630064054,1.701302723
77,151,225,581.0,20736.0,0.028018904320987654,0.080671833,1.2781e-5,6.622133299,0.392564435,1.625932508,1.711411428
78,150,224,581.0,20640.0,0.02814922480620155,0.080368195,1.358e-5,6.599986437,0.397419271,1.657700695,1.694756709
79,149,222,575.0,20640.0,0.027858527131782947,0.080015475,1.298e-5,6.281191715,0.37819019,1.622522233,1.656839741
80,148,221,575.0,20544.0,0.027988707165109036,0.065331671,1.334e-5,6.313635402,0.380955078,1.627111603,1.638795233
1 operations graph_nodes graph_edges graph_ce graph_dt graph_ci gen_func_t cpu_compile_t cpu_st_t cpu_mt_t gpu_compile_t gpu_t
2 0 356 493 1399.0 30528.0 0.0458267819706499 0.084389903 2.4971e-5 17.802549835 0.960409581 2.406448706 4.927079076
3 1 351 483 1369.0 30528.0 0.044844077568134175 0.126855933 2.9211e-5 16.868735557 0.927387188 2.257632484 4.697683068
4 2 346 478 1369.0 30048.0 0.04556043663471779 0.08319682 3.5431e-5 16.871399152 0.834869326 2.264361993 4.701280771
5 3 341 473 1314.0 30048.0 0.04373003194888179 0.124422234 2.392e-5 16.454231193 0.856669072 2.271991539 4.68580348
6 4 336 463 1284.0 30048.0 0.042731629392971246 0.121696991 2.2921e-5 15.881542683 0.816430136 2.213686135 4.449106524
7 5 331 458 1284.0 29568.0 0.04342532467532467 0.124024888 2.314e-5 15.879200155 0.799333453 2.194093083 4.435654931
8 6 326 448 1254.0 29568.0 0.04241071428571429 0.121610951 2.2e-5 15.325702423 0.833341953 2.203843882 4.199677306
9 7 321 438 1224.0 29568.0 0.041396103896103896 0.118972208 2.1631e-5 14.367273685 0.711553932 2.16189756 3.948872646
10 8 316 433 1224.0 29088.0 0.04207920792079208 0.074826839 2.2031e-5 14.367107152 0.792981221 2.169096496 3.961630969
11 9 311 428 1169.0 29088.0 0.04018839383938394 0.116237162 2.15e-5 14.416973472 0.788583102 2.092186151 3.946339564
12 10 306 418 1139.0 29088.0 0.03915704070407041 0.114647398 2.031e-5 13.671420757 0.745657392 2.037551329 3.657411205
13 11 301 408 1109.0 29088.0 0.03812568756875687 0.11434652 1.951e-5 13.093103664 0.686554396 2.065489584 3.441139671
14 12 296 403 1109.0 28608.0 0.03876538031319911 0.112282663 1.8991e-5 13.11525848 0.705183633 2.0639299 3.422598036
15 13 291 398 1109.0 28128.0 0.039426905574516495 0.111549203 1.9661e-5 13.08100601 0.700772882 2.065935946 3.41679234
16 14 286 388 1079.0 28128.0 0.0383603526734926 0.109881396 1.907e-5 11.871746271 0.665244638 2.063828106 3.187580585
17 15 281 378 1049.0 28128.0 0.037293799772468716 0.108444747 1.7961e-5 10.963517612 0.62180291 2.037926216 2.935137574
18 16 276 373 1049.0 27648.0 0.03794126157407408 0.107959773 1.874e-5 11.021594456 0.541779823 2.003876106 2.931304737
19 17 271 368 1049.0 27168.0 0.03861160188457008 0.105629068 1.8241e-5 11.017450178 0.581974375 2.017201027 2.952118903
20 18 266 363 1049.0 26688.0 0.0393060551558753 0.107303406 1.8301e-5 11.028597789 0.556078309 2.037535226 2.911405619
21 19 261 358 994.0 26688.0 0.03724520383693045 0.106584986 1.7111e-5 10.789192026 0.525275525 2.011931363 2.931360979
22 20 256 353 939.0 26688.0 0.035184352517985615 0.105743463 1.7521e-5 10.50283261 0.535253087 1.962456949 2.941274646
23 21 255 351 933.0 26688.0 0.03495953237410072 0.105189187 1.7471e-5 10.739591259 0.555102576 2.013201521 2.896175037
24 22 254 350 933.0 26592.0 0.035085740072202165 0.105895137 1.6631e-5 10.68514711 0.571809578 1.974934611 2.890503396
25 23 253 348 927.0 26592.0 0.0348601083032491 0.104181459 1.817e-5 10.344271645 0.572483889 2.002875753 2.842241926
26 24 252 347 927.0 26496.0 0.034986413043478264 0.103568232 1.7471e-5 10.363216025 0.602207417 1.943794016 2.811132729
27 25 247 342 927.0 26016.0 0.035631918819188195 0.102006829 1.669e-5 10.360319761 0.588967585 1.942523675 2.838431844
28 26 246 340 921.0 26016.0 0.03540129151291513 0.103244544 1.672e-5 10.140255758 0.565172778 1.980058606 2.776594151
29 27 245 339 921.0 25920.0 0.03553240740740741 0.102991317 1.723e-5 10.166352736 0.588556746 2.025713505 2.754827976
30 28 244 337 915.0 25920.0 0.03530092592592592 0.102527335 1.6261e-5 9.965044496 0.527648944 1.966870364 2.708992883
31 29 243 335 909.0 25920.0 0.035069444444444445 0.101020632 1.6541e-5 9.899918186 0.530837495 1.99964346 2.686936268
32 30 242 334 909.0 25824.0 0.03519981412639405 0.099846559 1.614e-5 9.924451078 0.532149983 1.992832633 2.667590089
33 31 241 333 909.0 25728.0 0.035331156716417914 0.103293156 1.634e-5 9.893503718 0.500188044 1.971455575 2.661440862
34 32 236 328 909.0 25248.0 0.036002851711026615 0.110948742 1.5851e-5 9.916889596 0.515528547 2.014256204 2.691654688
35 33 235 326 903.0 25248.0 0.03576520912547528 0.099799239 1.658e-5 9.667648582 0.561210643 1.981308261 2.647665444
36 34 234 324 897.0 25248.0 0.035527566539923956 0.099455409 1.6561e-5 9.588166052 0.544847505 1.932560182 2.56349283
37 35 233 323 897.0 25152.0 0.035663167938931296 0.103335368 1.6271e-5 9.590387462 0.542413718 1.965145602 2.559435691
38 36 232 321 891.0 25152.0 0.03542461832061069 0.097770562 1.6571e-5 9.362808632 0.543288523 2.017894491 2.498672404
39 37 231 320 891.0 25056.0 0.03556034482758621 0.100428616 1.5941e-5 9.340302395 0.548822639 1.994799194 2.525394
40 38 230 319 891.0 24960.0 0.03569711538461538 0.056667955 1.5341e-5 9.356871677 0.537041949 1.921246656 2.507595034
41 39 225 314 891.0 24480.0 0.036397058823529414 0.099323026 1.636e-5 9.383625024 0.506403697 1.972101141 2.529248938
42 40 220 309 836.0 24480.0 0.03415032679738562 0.096789665 1.645e-5 9.524601658 0.473707387 1.980933173 2.524768525
43 41 215 304 836.0 24000.0 0.034833333333333334 0.053463925 1.671e-5 9.520567128 0.487585179 1.942542795 2.535491481
44 42 214 302 830.0 24000.0 0.034583333333333334 0.096303802 1.6011e-5 9.137262758 0.4297148 1.950560163 2.478408276
45 43 213 301 830.0 23904.0 0.034722222222222224 0.070596338 1.6901e-5 9.143790565 0.492842898 1.949332161 2.476752284
46 44 212 299 824.0 23904.0 0.034471218206157964 0.09696925 1.612e-5 9.089211511 0.456930617 2.022026121 2.419473874
47 45 211 297 818.0 23904.0 0.03422021419009371 0.052526649 1.536e-5 8.807671694 0.471203239 1.970488502 2.372441242
48 46 210 296 818.0 23808.0 0.03435819892473118 0.096716114 1.5701e-5 8.806210783 0.451452844 1.960073481 2.387451098
49 47 209 295 818.0 23712.0 0.034497300944669365 0.05145174 1.6061e-5 8.867215342 0.450895098 1.968012818 2.394204111
50 48 204 290 818.0 23232.0 0.03521005509641873 0.093248236 1.9521e-5 8.844517253 0.476030278 1.963827031 2.389413849
51 49 203 288 812.0 23232.0 0.034951790633608815 0.093881584 1.527e-5 8.849095772 0.446415074 1.974782212 2.332439097
52 50 202 287 812.0 23136.0 0.03509681881051176 0.050473481 1.5851e-5 8.784636116 0.469233287 1.953068913 2.321316886
53 51 201 285 806.0 23136.0 0.034837482710926695 0.092750242 1.5541e-5 8.632088328 0.491467054 1.945455141 2.29300329
54 52 200 284 806.0 23040.0 0.03498263888888889 0.092540087 1.7161e-5 8.637677414 0.471865872 1.975464118 2.259260411
55 53 199 282 800.0 23040.0 0.034722222222222224 0.092944049 1.5261e-5 8.624992966 0.478249573 1.931707577 2.232058939
56 54 198 281 800.0 22944.0 0.03486750348675035 0.091660013 1.575e-5 8.680034605 0.429976994 2.022314921 2.224544849
57 55 197 279 794.0 22944.0 0.03460599721059972 0.092591389 1.582e-5 8.266084761 0.442472956 1.949268775 2.165130527
58 56 196 278 794.0 22848.0 0.03475140056022409 0.090376966 1.529e-5 8.26930839 0.438461132 1.960119483 2.169387658
59 57 191 273 739.0 22848.0 0.03234418767507003 0.090398736 1.589e-5 8.061516101 0.468233752 1.825342557 2.144808638
60 58 186 268 739.0 22368.0 0.03303826895565093 0.090566151 1.5781e-5 8.051685873 0.472555774 1.827021946 2.175475243
61 59 185 266 733.0 22368.0 0.03277002861230329 0.046301524 1.4931e-5 7.809555195 0.466519375 1.819191936 2.095906173
62 60 184 264 727.0 22368.0 0.03250178826895565 0.087977349 1.4771e-5 7.825535183 0.452072238 1.820734702 2.06485156
63 61 183 263 727.0 22272.0 0.032641882183908046 0.08908488 1.4591e-5 7.77560322 0.445728609 1.804235078 2.06763398
64 62 182 262 727.0 22176.0 0.03278318903318903 0.076517376 1.461e-5 7.754359737 0.421063625 1.812681957 2.076417548
65 63 181 260 721.0 22176.0 0.032512626262626264 0.088983767 1.4091e-5 7.616158878 0.422402602 1.868182992 2.016601005
66 64 180 259 721.0 22080.0 0.03265398550724638 0.089172453 1.467e-5 7.63910266 0.402654247 1.844390793 2.031385412
67 65 175 254 666.0 22080.0 0.03016304347826087 0.091971222 1.3851e-5 7.35822511 0.443635961 1.719023302 2.007792679
68 66 170 249 666.0 21600.0 0.030833333333333334 0.073480651 1.3871e-5 7.291999508 0.434965958 1.750073777 1.999358953
69 67 169 247 660.0 21600.0 0.030555555555555555 0.085309774 1.7211e-5 7.245192983 0.412650069 1.744681817 1.962798523
70 68 168 245 654.0 21600.0 0.03027777777777778 0.089043539 1.367e-5 7.024436477 0.421292773 1.722710908 1.890918459
71 69 167 243 648.0 21600.0 0.03 0.084353527 1.428e-5 6.8832018 0.415786727 1.715216258 1.830282141
72 70 166 242 648.0 21504.0 0.030133928571428572 0.084367977 1.3441e-5 6.899982477 0.419080281 1.707637056 1.843529005
73 71 165 241 648.0 21408.0 0.030269058295964126 0.085701815 1.4031e-5 6.936174291 0.377346024 1.704252961 1.85218872
74 72 164 240 648.0 21312.0 0.030405405405405407 0.083910355 1.3601e-5 6.9051589 0.389477478 1.75740328 1.867258596
75 73 159 235 593.0 21312.0 0.0278246996996997 0.082135195 1.3351e-5 7.031037571 0.356084586 1.631072 1.797434919
76 74 154 230 593.0 20832.0 0.028465821812596007 0.080356395 1.358e-5 7.040766129 0.405151789 1.620631997 1.781269114
77 75 153 228 587.0 20832.0 0.02817780337941628 0.066967517 1.3391e-5 6.644186555 0.395240289 1.641155866 1.743666486
78 76 152 226 581.0 20832.0 0.02788978494623656 0.080763676 1.298e-5 6.633937959 0.388869331 1.630064054 1.701302723
79 77 151 225 581.0 20736.0 0.028018904320987654 0.080671833 1.2781e-5 6.622133299 0.392564435 1.625932508 1.711411428
80 78 150 224 581.0 20640.0 0.02814922480620155 0.080368195 1.358e-5 6.599986437 0.397419271 1.657700695 1.694756709
81 79 149 222 575.0 20640.0 0.027858527131782947 0.080015475 1.298e-5 6.281191715 0.37819019 1.622522233 1.656839741
82 80 148 221 575.0 20544.0 0.027988707165109036 0.065331671 1.334e-5 6.313635402 0.380955078 1.627111603 1.638795233

View File

@ -1,79 +0,0 @@
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
0,15866,21617,66249.0,1.314048e6,0.050415966540035065,6.468999136,0.001398329,8.478099553,0.43958521,0.0,0.0
10,14676,19713,60656.0,1.279776e6,0.0473957942639962,5.993535435,0.000745961,7.192805963,0.417393835,0.0,0.0
20,13774,18527,56334.0,1.243296e6,0.04531020770596865,5.489738392,0.000682889,6.652182167,0.336339503,0.0,0.0
30,13352,17940,53276.0,1.236672e6,0.04308013765978368,5.169906767,0.000675318,6.370526843,0.313517861,0.0,0.0
40,12714,17168,51163.0,1.199712e6,0.042646068389746876,4.845906388,0.000634457,6.124306725,0.311820244,0.0,0.0
50,12004,16270,48473.0,1.163232e6,0.04167096503534978,4.433653313,0.000596017,5.760561483,0.320897852,0.0,0.0
60,11750,15983,48022.0,1.144224e6,0.04196905501020779,4.316924709,0.000596237,5.738809149,0.283214404,0.0,0.0
70,11538,15697,47325.0,1.133184e6,0.04176285581158929,4.201152631,0.000554855,5.438337093,0.313985744,0.0,0.0
80,11434,15550,46814.0,1.129536e6,0.04144533684628024,4.216359254,0.000553545,5.429706297,0.268223845,0.0,0.0
90,11066,15085,46232.0,1.10352e6,0.041895026823256486,3.924567625,0.000560535,5.412444055,0.274917428,0.0,0.0
100,10848,14847,44297.0,1.100352e6,0.04025711772232885,3.848048388,0.000527955,5.127227854,0.294706757,0.0,0.0
110,10462,14382,42261.0,1.084512e6,0.038967756926617685,3.674674179,0.000509054,4.922064369,0.276530272,0.0,0.0
120,10304,14191,41810.0,1.07472e6,0.038903156170909635,3.58233155,0.000516074,5.02371138,0.266906519,0.0,0.0
130,10200,14067,41437.0,1.068864e6,0.03876732680677804,3.529160319,0.000501634,4.863804478,0.24639169,0.0,0.0
140,10042,13871,40956.0,1.059552e6,0.03865407266467337,3.346890818,0.000488403,4.753116119,0.254509861,0.0,0.0
150,9956,13765,40583.0,1.055424e6,0.038451844945727974,3.41847396,0.000500654,4.756966153,0.255966291,0.0,0.0
160,9906,13690,40433.0,1.053024e6,0.03839703558513386,3.405093274,0.000496774,4.812050085,0.24421971,0.0,0.0
170,9838,13597,40283.0,1.048896e6,0.038405142168527674,3.348340057,0.000481363,4.669473296,0.234701411,0.0,0.0
180,9242,12790,37708.0,1.02336e6,0.03684724828017511,3.063089187,0.000449352,4.335668832,0.228471471,0.0,0.0
190,9120,12648,37082.0,1.017984e6,0.03642689865459575,2.994073054,0.000429002,4.181894908,0.224361729,0.0,0.0
200,9052,12555,36932.0,1.013856e6,0.03642726383233911,3.046147594,0.000427282,4.151250123,0.212513705,0.0,0.0
210,8912,12405,36366.0,1.005792e6,0.03615658108237091,2.937579863,0.000433982,4.261727394,0.214012817,0.0,0.0
220,8808,12281,35993.0,999936.0,0.035995303699436765,2.892146284,0.000432382,4.198423468,0.219749812,0.0,0.0
230,8626,12061,35765.0,986112.0,0.03626869970145379,2.752333211,0.000414672,4.035044142,0.241721263,0.0,0.0
240,8426,11841,34336.0,980256.0,0.03502758463095355,2.714773746,0.000414522,4.036870861,0.235365769,0.0,0.0
250,8118,11464,33416.0,961728.0,0.03474579090969588,2.579966689,0.000402461,3.870568035,0.20937257,0.0,0.0
260,7942,11242,32634.0,953664.0,0.034219599355747934,2.520293442,0.000391581,3.72881432,0.191238985,0.0,0.0
270,7838,11100,32153.0,949536.0,0.0338618019748593,2.456319106,0.000383211,3.635092003,0.187908484,0.0,0.0
280,7716,10940,31672.0,943680.0,0.033562224482875554,2.402192681,0.00037687,3.594882506,0.194062713,0.0,0.0
290,7576,10772,30745.0,939552.0,0.032723042471305475,2.338714319,0.00037334,3.556085038,0.194369971,0.0,0.0
300,7376,10529,30487.0,924480.0,0.0329774575977847,2.279512925,0.00036552,3.504723807,0.191079171,0.0,0.0
310,7218,10310,29868.0,917376.0,0.03255807869401423,2.207692656,0.000355539,3.30937664,0.181261073,0.0,0.0
320,7078,10137,29417.0,909312.0,0.03235083227759009,2.147511905,0.000352659,3.30461376,0.18005858,0.0,0.0
330,6860,9848,28991.0,895200.0,0.032384941912421805,2.078259266,0.00033941,3.211808988,0.172834084,0.0,0.0
340,6702,9611,28264.0,889824.0,0.03176358470888625,2.069880378,0.000318959,3.033092324,0.154811992,0.0,0.0
350,6616,9505,27891.0,885696.0,0.03149048883589855,2.005510172,0.000326369,3.008426711,0.173417779,0.0,0.0
360,6512,9391,27325.0,881088.0,0.03101279327377061,1.968347618,0.000315789,2.921325386,0.168873786,0.0,0.0
370,6426,9280,27175.0,875232.0,0.03104891046031224,1.92734893,0.000315548,2.990437001,0.181187901,0.0,0.0
380,6358,9187,27025.0,871104.0,0.031023850194695467,1.889258172,0.000308689,2.846738111,0.181651873,0.0,0.0
390,6272,9081,26652.0,866976.0,0.030741335400287898,1.840892272,0.000329279,2.825270586,0.177422669,0.0,0.0
400,6204,8993,26532.0,862368.0,0.03076644773460982,1.820608708,0.000296329,2.759355249,0.175583708,0.0,0.0
410,6118,8864,26274.0,858240.0,0.030613814317673377,1.783961229,0.000290708,2.707626007,0.172954176,0.0,0.0
420,6014,8740,25901.0,852384.0,0.030386539400082593,1.774576254,0.000288998,2.694176581,0.173939173,0.0,0.0
430,5928,8629,25498.0,848736.0,0.030042321758473777,1.7065974,0.000284277,2.675798329,0.170062674,0.0,0.0
440,5842,8523,25125.0,844608.0,0.029747527847238008,1.685087395,0.000287118,2.688215586,0.166480549,0.0,0.0
450,5738,8399,24752.0,838752.0,0.02951051085422151,1.673553823,0.000274969,2.523253333,0.167824913,0.0,0.0
460,5670,8316,24662.0,833664.0,0.02958266159987717,1.625105871,0.000272178,2.52817126,0.164730041,0.0,0.0
470,5548,8161,24211.0,827328.0,0.029264088729016785,1.583826656,0.000262318,2.419247276,0.160768733,0.0,0.0
480,5426,8006,23760.0,820992.0,0.028940598690364826,1.58433006,0.000264708,2.454129792,0.155746163,0.0,0.0
490,5358,7918,23640.0,816384.0,0.028956961429915332,1.520887155,0.000253268,2.329551174,0.153813499,0.0,0.0
500,5272,7807,23237.0,812736.0,0.02859108000629971,1.488167166,0.000248837,2.282665244,0.154234105,0.0,0.0
510,5150,7647,22756.0,806880.0,0.028202458853856832,1.448681065,0.000247727,2.275316917,0.149501885,0.0,0.0
520,5028,7487,22022.0,803232.0,0.02741673638500458,1.43939862,0.000236057,2.14942739,0.146771977,0.0,0.0
530,4906,7350,21679.0,795168.0,0.02726342106322186,1.367826149,0.000242258,2.188588822,0.148076932,0.0,0.0
540,4838,7257,21529.0,791040.0,0.027216069983818772,1.341798982,0.000230357,2.096237881,0.141709174,0.0,0.0
550,4752,7151,21156.0,786912.0,0.02688483591557887,1.339939443,0.000227267,2.062687036,0.13782156,0.0,0.0
560,4684,7068,21066.0,781824.0,0.026944683202357565,1.327848904,0.000222317,2.00294804,0.139508498,0.0,0.0
570,4634,6993,20916.0,779424.0,0.02683520137948023,1.276183945,0.000224717,2.021180753,0.13573571,0.0,0.0
580,4548,6882,20766.0,773568.0,0.026844440307768676,1.235522514,0.000212457,1.917354147,0.128401984,0.0,0.0
590,4498,6807,20616.0,771168.0,0.026733474418025645,1.267249751,0.000212506,1.899792552,0.133449083,0.0,0.0
600,4376,6657,20195.0,764352.0,0.0264210730134807,1.209891149,0.000205326,1.850663451,0.129490109,0.0,0.0
610,4326,6582,20045.0,761952.0,0.026307431439250767,1.18887911,0.000203196,1.819359467,0.129183977,0.0,0.0
620,4204,6422,19564.0,756096.0,0.02587502116133401,1.172245936,0.000212366,1.757557943,0.125887084,0.0,0.0
630,3836,5980,17558.0,741504.0,0.02367890126014155,1.043747354,0.000175996,1.554965777,0.115650062,0.0,0.0
640,3732,5856,17438.0,733440.0,0.023775632635253053,1.010298683,0.000174715,1.562411059,0.113877446,0.0,0.0
650,3628,5714,16957.0,729312.0,0.023250680093019175,0.985957627,0.000170445,1.474744854,0.110990727,0.0,0.0
660,3506,5549,16446.0,723936.0,0.022717477788091765,0.948042334,0.000161975,1.420057878,0.106426767,0.0,0.0
670,3420,5448,16103.0,719328.0,0.0223861715378798,0.921840457,0.000156765,1.356400004,0.10491163,0.0,0.0
680,3316,5319,15700.0,713952.0,0.021990273855945496,0.892707383,0.000162605,1.335548894,0.100909488,0.0,0.0
690,3212,5200,15357.0,707616.0,0.02170244878578212,0.89578919,0.000149085,1.299462304,0.099173414,0.0,0.0
700,2916,4871,13850.0,693792.0,0.019962755407960886,0.781393124,0.000134984,1.179737113,0.096642976,0.0,0.0
710,2722,4598,13123.0,684960.0,0.019158782994627425,0.725161332,0.000122213,1.056813282,0.08619269,0.0,0.0
720,2636,4492,12750.0,680832.0,0.018727086858432038,0.701632434,0.000128984,1.019551067,0.085388434,0.0,0.0
730,2532,4373,12407.0,674496.0,0.018394475282284845,0.675037355,0.000119134,0.993660466,0.082709493,0.0,0.0
740,2428,4231,11926.0,670368.0,0.017790228650532244,0.6435086,0.000109403,0.927737064,0.078423743,0.0,0.0
750,2342,4125,11553.0,666240.0,0.017340597982708934,0.619218823,0.000106693,0.883708241,0.075467284,0.0,0.0
760,2274,4032,11403.0,662112.0,0.017222161809482384,0.635081649,0.000103493,0.919860114,0.074058132,0.0,0.0
770,2234,3977,11313.0,659712.0,0.017148392025611175,0.593953439,0.000110543,0.84404911,0.077019298,0.0,0.0
1 operations graph_nodes graph_edges graph_ce graph_dt graph_ci gen_func_t cpu_compile_t cpu_st_t cpu_mt_t gpu_compile_t gpu_t
2 0 15866 21617 66249.0 1.314048e6 0.050415966540035065 6.468999136 0.001398329 8.478099553 0.43958521 0.0 0.0
3 10 14676 19713 60656.0 1.279776e6 0.0473957942639962 5.993535435 0.000745961 7.192805963 0.417393835 0.0 0.0
4 20 13774 18527 56334.0 1.243296e6 0.04531020770596865 5.489738392 0.000682889 6.652182167 0.336339503 0.0 0.0
5 30 13352 17940 53276.0 1.236672e6 0.04308013765978368 5.169906767 0.000675318 6.370526843 0.313517861 0.0 0.0
6 40 12714 17168 51163.0 1.199712e6 0.042646068389746876 4.845906388 0.000634457 6.124306725 0.311820244 0.0 0.0
7 50 12004 16270 48473.0 1.163232e6 0.04167096503534978 4.433653313 0.000596017 5.760561483 0.320897852 0.0 0.0
8 60 11750 15983 48022.0 1.144224e6 0.04196905501020779 4.316924709 0.000596237 5.738809149 0.283214404 0.0 0.0
9 70 11538 15697 47325.0 1.133184e6 0.04176285581158929 4.201152631 0.000554855 5.438337093 0.313985744 0.0 0.0
10 80 11434 15550 46814.0 1.129536e6 0.04144533684628024 4.216359254 0.000553545 5.429706297 0.268223845 0.0 0.0
11 90 11066 15085 46232.0 1.10352e6 0.041895026823256486 3.924567625 0.000560535 5.412444055 0.274917428 0.0 0.0
12 100 10848 14847 44297.0 1.100352e6 0.04025711772232885 3.848048388 0.000527955 5.127227854 0.294706757 0.0 0.0
13 110 10462 14382 42261.0 1.084512e6 0.038967756926617685 3.674674179 0.000509054 4.922064369 0.276530272 0.0 0.0
14 120 10304 14191 41810.0 1.07472e6 0.038903156170909635 3.58233155 0.000516074 5.02371138 0.266906519 0.0 0.0
15 130 10200 14067 41437.0 1.068864e6 0.03876732680677804 3.529160319 0.000501634 4.863804478 0.24639169 0.0 0.0
16 140 10042 13871 40956.0 1.059552e6 0.03865407266467337 3.346890818 0.000488403 4.753116119 0.254509861 0.0 0.0
17 150 9956 13765 40583.0 1.055424e6 0.038451844945727974 3.41847396 0.000500654 4.756966153 0.255966291 0.0 0.0
18 160 9906 13690 40433.0 1.053024e6 0.03839703558513386 3.405093274 0.000496774 4.812050085 0.24421971 0.0 0.0
19 170 9838 13597 40283.0 1.048896e6 0.038405142168527674 3.348340057 0.000481363 4.669473296 0.234701411 0.0 0.0
20 180 9242 12790 37708.0 1.02336e6 0.03684724828017511 3.063089187 0.000449352 4.335668832 0.228471471 0.0 0.0
21 190 9120 12648 37082.0 1.017984e6 0.03642689865459575 2.994073054 0.000429002 4.181894908 0.224361729 0.0 0.0
22 200 9052 12555 36932.0 1.013856e6 0.03642726383233911 3.046147594 0.000427282 4.151250123 0.212513705 0.0 0.0
23 210 8912 12405 36366.0 1.005792e6 0.03615658108237091 2.937579863 0.000433982 4.261727394 0.214012817 0.0 0.0
24 220 8808 12281 35993.0 999936.0 0.035995303699436765 2.892146284 0.000432382 4.198423468 0.219749812 0.0 0.0
25 230 8626 12061 35765.0 986112.0 0.03626869970145379 2.752333211 0.000414672 4.035044142 0.241721263 0.0 0.0
26 240 8426 11841 34336.0 980256.0 0.03502758463095355 2.714773746 0.000414522 4.036870861 0.235365769 0.0 0.0
27 250 8118 11464 33416.0 961728.0 0.03474579090969588 2.579966689 0.000402461 3.870568035 0.20937257 0.0 0.0
28 260 7942 11242 32634.0 953664.0 0.034219599355747934 2.520293442 0.000391581 3.72881432 0.191238985 0.0 0.0
29 270 7838 11100 32153.0 949536.0 0.0338618019748593 2.456319106 0.000383211 3.635092003 0.187908484 0.0 0.0
30 280 7716 10940 31672.0 943680.0 0.033562224482875554 2.402192681 0.00037687 3.594882506 0.194062713 0.0 0.0
31 290 7576 10772 30745.0 939552.0 0.032723042471305475 2.338714319 0.00037334 3.556085038 0.194369971 0.0 0.0
32 300 7376 10529 30487.0 924480.0 0.0329774575977847 2.279512925 0.00036552 3.504723807 0.191079171 0.0 0.0
33 310 7218 10310 29868.0 917376.0 0.03255807869401423 2.207692656 0.000355539 3.30937664 0.181261073 0.0 0.0
34 320 7078 10137 29417.0 909312.0 0.03235083227759009 2.147511905 0.000352659 3.30461376 0.18005858 0.0 0.0
35 330 6860 9848 28991.0 895200.0 0.032384941912421805 2.078259266 0.00033941 3.211808988 0.172834084 0.0 0.0
36 340 6702 9611 28264.0 889824.0 0.03176358470888625 2.069880378 0.000318959 3.033092324 0.154811992 0.0 0.0
37 350 6616 9505 27891.0 885696.0 0.03149048883589855 2.005510172 0.000326369 3.008426711 0.173417779 0.0 0.0
38 360 6512 9391 27325.0 881088.0 0.03101279327377061 1.968347618 0.000315789 2.921325386 0.168873786 0.0 0.0
39 370 6426 9280 27175.0 875232.0 0.03104891046031224 1.92734893 0.000315548 2.990437001 0.181187901 0.0 0.0
40 380 6358 9187 27025.0 871104.0 0.031023850194695467 1.889258172 0.000308689 2.846738111 0.181651873 0.0 0.0
41 390 6272 9081 26652.0 866976.0 0.030741335400287898 1.840892272 0.000329279 2.825270586 0.177422669 0.0 0.0
42 400 6204 8993 26532.0 862368.0 0.03076644773460982 1.820608708 0.000296329 2.759355249 0.175583708 0.0 0.0
43 410 6118 8864 26274.0 858240.0 0.030613814317673377 1.783961229 0.000290708 2.707626007 0.172954176 0.0 0.0
44 420 6014 8740 25901.0 852384.0 0.030386539400082593 1.774576254 0.000288998 2.694176581 0.173939173 0.0 0.0
45 430 5928 8629 25498.0 848736.0 0.030042321758473777 1.7065974 0.000284277 2.675798329 0.170062674 0.0 0.0
46 440 5842 8523 25125.0 844608.0 0.029747527847238008 1.685087395 0.000287118 2.688215586 0.166480549 0.0 0.0
47 450 5738 8399 24752.0 838752.0 0.02951051085422151 1.673553823 0.000274969 2.523253333 0.167824913 0.0 0.0
48 460 5670 8316 24662.0 833664.0 0.02958266159987717 1.625105871 0.000272178 2.52817126 0.164730041 0.0 0.0
49 470 5548 8161 24211.0 827328.0 0.029264088729016785 1.583826656 0.000262318 2.419247276 0.160768733 0.0 0.0
50 480 5426 8006 23760.0 820992.0 0.028940598690364826 1.58433006 0.000264708 2.454129792 0.155746163 0.0 0.0
51 490 5358 7918 23640.0 816384.0 0.028956961429915332 1.520887155 0.000253268 2.329551174 0.153813499 0.0 0.0
52 500 5272 7807 23237.0 812736.0 0.02859108000629971 1.488167166 0.000248837 2.282665244 0.154234105 0.0 0.0
53 510 5150 7647 22756.0 806880.0 0.028202458853856832 1.448681065 0.000247727 2.275316917 0.149501885 0.0 0.0
54 520 5028 7487 22022.0 803232.0 0.02741673638500458 1.43939862 0.000236057 2.14942739 0.146771977 0.0 0.0
55 530 4906 7350 21679.0 795168.0 0.02726342106322186 1.367826149 0.000242258 2.188588822 0.148076932 0.0 0.0
56 540 4838 7257 21529.0 791040.0 0.027216069983818772 1.341798982 0.000230357 2.096237881 0.141709174 0.0 0.0
57 550 4752 7151 21156.0 786912.0 0.02688483591557887 1.339939443 0.000227267 2.062687036 0.13782156 0.0 0.0
58 560 4684 7068 21066.0 781824.0 0.026944683202357565 1.327848904 0.000222317 2.00294804 0.139508498 0.0 0.0
59 570 4634 6993 20916.0 779424.0 0.02683520137948023 1.276183945 0.000224717 2.021180753 0.13573571 0.0 0.0
60 580 4548 6882 20766.0 773568.0 0.026844440307768676 1.235522514 0.000212457 1.917354147 0.128401984 0.0 0.0
61 590 4498 6807 20616.0 771168.0 0.026733474418025645 1.267249751 0.000212506 1.899792552 0.133449083 0.0 0.0
62 600 4376 6657 20195.0 764352.0 0.0264210730134807 1.209891149 0.000205326 1.850663451 0.129490109 0.0 0.0
63 610 4326 6582 20045.0 761952.0 0.026307431439250767 1.18887911 0.000203196 1.819359467 0.129183977 0.0 0.0
64 620 4204 6422 19564.0 756096.0 0.02587502116133401 1.172245936 0.000212366 1.757557943 0.125887084 0.0 0.0
65 630 3836 5980 17558.0 741504.0 0.02367890126014155 1.043747354 0.000175996 1.554965777 0.115650062 0.0 0.0
66 640 3732 5856 17438.0 733440.0 0.023775632635253053 1.010298683 0.000174715 1.562411059 0.113877446 0.0 0.0
67 650 3628 5714 16957.0 729312.0 0.023250680093019175 0.985957627 0.000170445 1.474744854 0.110990727 0.0 0.0
68 660 3506 5549 16446.0 723936.0 0.022717477788091765 0.948042334 0.000161975 1.420057878 0.106426767 0.0 0.0
69 670 3420 5448 16103.0 719328.0 0.0223861715378798 0.921840457 0.000156765 1.356400004 0.10491163 0.0 0.0
70 680 3316 5319 15700.0 713952.0 0.021990273855945496 0.892707383 0.000162605 1.335548894 0.100909488 0.0 0.0
71 690 3212 5200 15357.0 707616.0 0.02170244878578212 0.89578919 0.000149085 1.299462304 0.099173414 0.0 0.0
72 700 2916 4871 13850.0 693792.0 0.019962755407960886 0.781393124 0.000134984 1.179737113 0.096642976 0.0 0.0
73 710 2722 4598 13123.0 684960.0 0.019158782994627425 0.725161332 0.000122213 1.056813282 0.08619269 0.0 0.0
74 720 2636 4492 12750.0 680832.0 0.018727086858432038 0.701632434 0.000128984 1.019551067 0.085388434 0.0 0.0
75 730 2532 4373 12407.0 674496.0 0.018394475282284845 0.675037355 0.000119134 0.993660466 0.082709493 0.0 0.0
76 740 2428 4231 11926.0 670368.0 0.017790228650532244 0.6435086 0.000109403 0.927737064 0.078423743 0.0 0.0
77 750 2342 4125 11553.0 666240.0 0.017340597982708934 0.619218823 0.000106693 0.883708241 0.075467284 0.0 0.0
78 760 2274 4032 11403.0 662112.0 0.017222161809482384 0.635081649 0.000103493 0.919860114 0.074058132 0.0 0.0
79 770 2234 3977 11313.0 659712.0 0.017148392025611175 0.593953439 0.000110543 0.84404911 0.077019298 0.0 0.0

BIN
data/results.zip (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -2,3 +2,4 @@
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"

View File

@ -5,5 +5,6 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"

249
examples/full_node_bench.jl Normal file
View File

@ -0,0 +1,249 @@
using MetagraphOptimization
using CUDA
using UUIDs
using DataFrames
using CSV
using Random
using BenchmarkTools
using Dates
using Base.Threads
function log(x...)
println(now(), " ", join(x, " ")...)
flush(stdout)
return nothing
end
results_filename = "full_node_bench.csv"
df = DataFrame(
process_name = String[],
cpu_threads = Int[],
gpu_devices = Int[],
n_inputs = Int[],
chunk_size = Int[],
time = Float64[],
std = Float64[],
rate = Float64[],
cpu_chunks = Float64[],
gpu_chunks = Float64[],
memory_est = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
nInputs = 2^26
lck = ReentrantLock()
progress = 1
cpu_chunks = 0
gpu_chunks = 0
chunkSizes = [1024, 4096, 16384, 65536, 262144, 1048576] # 2^10 to 2^20
function cpu_worker(compute_func, inputs, chunk_size)
global progress
global cpu_chunks
global lck
quit = false
work_start = 0
work_end = 0
while true
lock(lck) do
if progress >= nInputs
quit = true
else
work_start = progress
progress = progress + chunk_size
work_end = min(progress - 1, nInputs)
cpu_chunks = cpu_chunks + 1
#log("CPU Worker $(Threads.threadid()) computing $(cpu_chunks)th cpu chunk ($work_start, $work_end)")
end
end
if quit
break
end
for i in work_start:work_end
compute_func(inputs[i])
end
end
#log("CPU Worker on $(Threads.threadid()) finished!")
return nothing
end
# called with a specific device selected
function gpu_worker(kernel!, inputs, chunk_size)
global progress
global gpu_chunks
global lck
cuOutputs = CuVector{ComplexF64}()
resize!(cuOutputs, chunk_size)
quit = false
work_start = 0
work_end = 0
while true
lock(lck) do
if progress >= nInputs
quit = true
else
work_start = progress
progress = progress + chunk_size
work_end = min(progress - 1, nInputs)
gpu_chunks = gpu_chunks + 1
#log("GPU Worker $(CUDA.device()) computing $(gpu_chunks)th gpu chunk ($work_start, $work_end)")
end
end
if quit
break
end
cuInputs = CuVector(inputs[work_start:work_end])
ts = 32
bs = Int(chunk_size / 32)
@cuda threads = ts blocks = bs always_inline = true kernel!(cuInputs, cuOutputs, chunk_size)
CUDA.device_synchronize()
end
#log("GPU Worker on Device $(CUDA.device()) finished!")
return nothing
end
cpu_gpu_ratio = Vector{Tuple{Int, Int}}()
function full_compute(compute_func, kernel!, inputs, chunk_size)
global progress
progress = 1
global cpu_chunks
cpu_chunks = 0
global gpu_chunks
gpu_chunks = 0
tasks = Vector()
for dev in CUDA.devices()
t = Threads.@spawn device!(dev) do
gpu_worker(kernel!, inputs, chunk_size)
return nothing
end
push!(tasks, t)
end
for i in 1:(Threads.nthreads() - length(CUDA.devices()))
t = Threads.@spawn cpu_worker(compute_func, inputs, chunk_size)
push!(tasks, t)
end
for t in tasks
wait(t)
end
push!(cpu_gpu_ratio, (cpu_chunks, gpu_chunks))
return nothing
end
function bench(compute_function, kernel!, inputs, chunk_size)
global cpu_gpu_ratio
empty!(cpu_gpu_ratio)
bench = @benchmark begin
full_compute($compute_function, $kernel!, $inputs, $chunk_size)
end gcsample = true seconds = 60
time = median(bench.times) / 1e9
s = std(bench.times) / 1e9
rate = length(inputs) / time
med_cpu_chunks = median(getindex.(cpu_gpu_ratio, 1))
med_gpu_chunks = median(getindex.(cpu_gpu_ratio, 2))
mem_estimate = bench.memory
log("CPU/GPU ratios: $(cpu_gpu_ratio)")
return (time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate)
end
function full_node_bench(process::MetagraphOptimization.AbstractProcessDescription, func, kernel!, chunk_size, inputs)
process_name = string(process)
log("\n--- Benchmarking $(process_name) on $(nInputs) with chunk size $(chunk_size) ---")
log("Available Cuda Devices:")
display.(CUDA.devices())
log("Benchmarking full node...")
(time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate) = bench(func, kernel!, inputs, chunk_size)
log(
"Benchmarking complete with median time $(time), $(med_cpu_chunks) cpu chunks, and $(med_gpu_chunks) gpu chunks.",
)
push!(
df,
Dict(
:process_name => process_name,
:cpu_threads => Threads.nthreads() - length(CUDA.devices()),
:gpu_devices => length(CUDA.devices()),
:n_inputs => nInputs,
:chunk_size => chunk_size,
:time => time,
:std => s,
:rate => rate,
:cpu_chunks => med_cpu_chunks,
:gpu_chunks => med_gpu_chunks,
:memory_est => mem_estimate,
),
)
return nothing
end
# use "mock" machine that only uses cpu for compilation
machine = Machine(
[
MetagraphOptimization.NumaNode(
0,
1,
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
)
optimizer = ReductionOptimizer()
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke"]
for proc in processes
process = parse_process(proc, QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
inputs = Vector{typeof(gen_process_input(process))}()
resize!(inputs, nInputs)
procs = Vector{typeof(process)}()
for i in 1:Threads.nthreads()
push!(procs, copy(process))
end
@inbounds Threads.@threads for i in eachindex(inputs)
inputs[i] = gen_process_input(procs[Threads.nthreads()])
end
for chunk_size in chunkSizes
full_node_bench(process, compute_func, kernel!, chunk_size, inputs)
CSV.write(results_filename, df)
end
end;

View File

@ -34,9 +34,10 @@ function import_bench()
bench_txt("AB->ABBB.txt")
bench_txt("AB->ABBBBB.txt")
bench_txt("AB->ABBBBBBB.txt")
#bench_txt("AB->ABBBBBBBBB.txt")
bench_txt("AB->ABBBBBBBBB.txt")
bench_txt("ABAB->ABAB.txt")
return bench_txt("ABAB->ABC.txt")
bench_txt("ABAB->ABC.txt")
return nothing
end
import_bench()

View File

@ -2,44 +2,117 @@ using MetagraphOptimization
using LIKWID
using CUDA
using UUIDs
using DataFrames
using CSV
using Random
using BenchmarkTools
using Dates
function cpu_bench(compute_function, inputs)
compute_function.(inputs[begin:10]) # make sure it's compiled
DISABLE_GPU = false
time = @elapsed Threads.@threads for i in eachindex(inputs)
@invokelatest compute_function(inputs[i])
end
rate = length(inputs) / time
return (time, rate)
function log(x...)
println(now(), " ", join(x, " ")...)
return flush(stdout)
end
function gpu_bench(compute_function, inputs)
CUDA.@sync compute_function.(inputs[begin:10]) # make sure it's compiled
results_filename = "bench_results_$(Threads.nthreads()).csv"
time = @elapsed CUDA.@sync compute_function.(inputs)
df = DataFrame(
process_name = String[],
graph_gen_time = Float64[],
optimization_time = Float64[],
function_generation_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_mem = Float64[],
cpu_threads = Int[],
n_inputs = Int[],
nflops_likwid = Int[],
cpu_time = Float64[],
cpu_std = Float64[],
cpu_rate = Float64[],
cpu_gflops = Float64[],
gpu_name = String[],
gpu_time = Float64[],
gpu_std = Float64[],
gpu_rate = Float64[],
gpu_gflops = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
nInputs = 2^20
function cpu_bench(compute_function, inputs)
bench = @benchmark begin
@inbounds Threads.@threads for i in eachindex($inputs)
@invokelatest $compute_function($inputs[i])
end
end gcsample = true samples = 20 evals = 1
time = median(bench.times) / 1e9
s = std(bench.times) / 1e9
rate = length(inputs) / time
return (time, rate)
return (time, rate, s)
end
function gpu_bench(kernel!, inputs)
n = length(inputs)
outputs = CuVector{ComplexF64}()
resize!(outputs, n)
ts = 32
bs = Int(n / ts)
bench = @benchmark begin
@cuda threads = $ts blocks = $bs always_inline = true $kernel!($inputs, $outputs, $n)
CUDA.device_synchronize()
end gcsample = true samples = 20 evals = 1
time = median(bench.times) / 1e9
s = std(bench.times) / 1e9
rate = length(inputs) / time
return (time, rate, s)
end
function bench_process(
process::MetagraphOptimization.AbstractProcessDescription,
process_name::String,
graph::DAG,
func,
io::IO = stdout;
use_likwid = true,
kernel!,
gen_time::Float64,
opt_time::Float64,
func_time::Float64;
use_likwid = false,
use_gpu = true,
)
println(io, "\n--- Benchmarking $(process) ---")
log("\n--- Benchmarking $(process_name) ---")
if DISABLE_GPU
use_gpu = false
end
NFLOPs = GraphProperties(graph).computeEffort
graph_props = GraphProperties(graph)
NFLOPs = graph_props.computeEffort
nflops_likwid = 0
if use_likwid
input = gen_process_input(process)
func(input) # compile first
# get rid of annoying output to console
oldstd = stdout
redirect_stdout(devnull)
_, events = @perfmon "FLOPS_DP" func(input)
redirect_stdout(oldstd) # recover original stdout
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
nflops_likwid = NFLOPs
end
nInputs = 10000000 # ten million
println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
inputs = Vector{typeof(gen_process_input(process))}()
resize!(inputs, nInputs)
@ -48,35 +121,76 @@ function bench_process(
push!(processes, copy(process))
end
Threads.@threads for i in eachindex(inputs)
@inbounds Threads.@threads for i in eachindex(inputs)
inputs[i] = gen_process_input(processes[Threads.nthreads()])
end
println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
(time_cpu, rate_cpu) = cpu_bench(func, inputs)
flops_cpu = (rate_cpu * NFLOPs) / 1024^3
log("Benchmarking CPU with $(Threads.nthreads()) threads...")
(time_cpu, rate_cpu, std_cpu) = cpu_bench(func, inputs)
flops_cpu = (rate_cpu * NFLOPs) / 10^9
println(io, "Benchmarking GPU...")
cuInputs = CuArray(inputs)
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
flops_gpu = (rate_gpu * NFLOPs) / 1024^3
time_gpu = 0.0
std_gpu = 0.0
rate_gpu = 0.0
flops_gpu = 0.0
gpu_name = "none"
if use_gpu
log("Benchmarking GPU...")
gpu_name = "$(name(first(CUDA.devices())))"
cuInputs = CuArray(inputs)
(time_gpu, rate_gpu, std_gpu) = gpu_bench(kernel!, cuInputs)
flops_gpu = (rate_gpu * NFLOPs) / 10^9
else
log("Skipping GPU...")
end
println(io, "\nBenchmark Summary for $(process):")
log("\nBenchmark Summary for $(process):")
if use_likwid
println(io, "Measured FLOPS by LIKWID: $NFLOPs")
log("Measured FLOPS by LIKWID: $NFLOPs")
else
println(io, "Total graph compute effort: $NFLOPs")
log("Total graph compute effort: $NFLOPs")
end
println(io, "Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
println(io, "CPU, $(Threads.nthreads()) threads")
println(io, " Time: $time_cpu")
println(io, " Rate: $rate_cpu")
println(io, " GFLOPS: $flops_cpu")
println(io, "GPU, $(name(first(CUDA.devices())))")
println(io, " Time: $time_gpu")
println(io, " Rate: $rate_gpu")
return println(io, " GFLOPS: $flops_gpu")
log("Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
log("CPU, $(Threads.nthreads()) threads")
log(" Time: $time_cpu")
log(" Rate: $rate_cpu")
log(" GFLOPS: $flops_cpu")
if use_gpu
log("GPU, $gpu_name")
log(" Time: $time_gpu")
log(" Rate: $rate_gpu")
log(" GFLOPS: $flops_gpu")
end
if (process_name != "warmup")
push!(
df,
Dict(
:process_name => process_name,
:graph_gen_time => gen_time,
:optimization_time => opt_time,
:function_generation_time => func_time,
:graph_nodes => graph_props.noNodes,
:graph_edges => graph_props.noEdges,
:graph_mem => MetagraphOptimization.mem(graph),
:cpu_threads => Threads.nthreads(),
:n_inputs => nInputs,
:nflops_likwid => nflops_likwid,
:cpu_time => time_cpu,
:cpu_std => std_cpu,
:cpu_rate => rate_cpu,
:cpu_gflops => flops_cpu,
:gpu_name => gpu_name,
:gpu_time => time_gpu,
:gpu_std => std_gpu,
:gpu_rate => rate_gpu,
:gpu_gflops => flops_gpu,
),
)
end
return nothing
end
# use "mock" machine that only uses cpu
@ -92,57 +206,67 @@ machine = Machine(
],
[-1.0;;],
)
optimizer = ReductionOptimizer()
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
# compton
process = parse_process("ke->ke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
## -- WARMUP TO COMPILE FUNCTIONS first
#=
optimizer = RandomWalkOptimizer(MersenneTwister(0))
# 2-photon compton
process = parse_process("ke->kke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
gen_time = @elapsed graph = gen_graph(process)
opt_time = @elapsed optimize!(optimizer, graph, 200)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
bench_process(process, "warmup", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
# 3-photon compton
process = parse_process("ke->kkke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
# AB->AB
process = parse_process("AB->AB", ABCModel())
graph = parse_dag("input/AB->AB.txt", ABCModel())
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
optimizer = ReductionOptimizer()
# AB->AB^3
process = parse_process("AB->ABBB", ABCModel())
graph = parse_dag("input/AB->ABBB.txt", ABCModel())
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
bench_process(process, "warmup", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
=#
## -- WARMUP END
exit(0)
optimizer = ReductionOptimizer()
# 4-photon compton
process = parse_process("ke->kkkke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke"]
# AB->AB^5
process = parse_process("AB->ABBBBB", ABCModel())
graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
for process_str in processes
# compton
process = parse_process(process_str, QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
bench_process(process, "$process not optimized", graph, compute_func, kernel!, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
CSV.write(results_filename, df)
end
processes = ["AB->AB", "AB->ABBB", "AB->ABBBBB", "AB->ABBBBBBB"]
for process_str in processes
# AB->AB
process = parse_process(process_str, ABCModel())
gen_time = @elapsed graph = parse_dag("input/$(process_str).txt", ABCModel())
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
bench_process(process, "$process not optimized", graph, compute_func, kernel!, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, kernel!, gen_time, opt_time, func_gen_time)
CSV.write(results_filename, df)
end

View File

@ -0,0 +1,163 @@
using MetagraphOptimization
using CUDA
using UUIDs
using BenchmarkTools
using DataFrames
using CSV
results_filename = "bench_results_reduction_steps.csv"
df = DataFrame(
threads = Int[],
process = String[],
operations = Int[],
cumulative_optimization_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_ce = Float64[],
graph_dt = Float64[],
graph_ci = Float64[],
gen_func_t = Float64[],
cpu_compile_t = Float64[],
cpu_st_t = Float64[],
cpu_mt_t = Float64[],
gpu_compile_t = Float64[],
gpu_t = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
function bench(func, inputs)
compile_time = @elapsed func(inputs[1])
single_thread = @benchmark $func.($inputs)
multi_threaded = @benchmark Threads.@threads for i in eachindex($inputs)
$func($inputs[i])
end
return (
cpu_compile_time = compile_time,
gpu_compile_time = 0.0,
cpu_single_thread_time = mean(single_thread.times) / 1e9,
cpu_multi_thread_time = mean(multi_threaded.times) / 1e9,
gpu_time = 0.0,
)
end
# preparation of machine
machine = Machine(
[
MetagraphOptimization.NumaNode(
0,
1,
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
)
# bench and produce data
n_inputs = 50_000
optimizer = ReductionOptimizer()
processes = [("ke->kke", 5), ("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 1), ("ke->kkkke", 1), ("ke->kkkkke", 1)]
for (process_str, STEPSIZE) in processes
n = 0
opt_time_cum = 0
process = parse_process(process_str, QEDModel())
graph = gen_graph(process)
inputs = [gen_process_input(process) for _ in 1:n_inputs]
get_compute_function(graph, process, machine)
while true
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
res = bench(func, inputs)
graph_properties = get_properties(graph)
push!(
df,
(
Threads.nthreads(),
process_str,
n,
opt_time_cum,
graph_properties.noNodes,
graph_properties.noEdges,
graph_properties.computeEffort,
graph_properties.data,
graph_properties.computeIntensity,
func_gen_time,
res.cpu_compile_time,
res.cpu_single_thread_time,
res.cpu_multi_thread_time,
res.gpu_compile_time,
res.gpu_time,
),
)
CSV.write(results_filename, df)
if fixpoint_reached(optimizer, graph)
break
end
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
n += STEPSIZE
end
end
CSV.write(results_filename, df)
for (process_str, STEPSIZE) in [("AB->AB", 1), ("AB->ABBB", 1), ("AB->ABBBBB", 1)]
n = 0
opt_time_cum = 0
process = parse_process(process_str, ABCModel())
graph = parse_dag("input/$process_str.txt", ABCModel())
inputs = [gen_process_input(process) for _ in 1:n_inputs]
get_compute_function(graph, process, machine)
while true
func_gen_time = @elapsed func = get_compute_function(graph, process, machine)
res = bench(func, inputs)
graph_properties = get_properties(graph)
push!(
df,
(
Threads.nthreads(),
process_str,
n,
opt_time_cum,
graph_properties.noNodes,
graph_properties.noEdges,
graph_properties.computeEffort,
graph_properties.data,
graph_properties.computeIntensity,
func_gen_time,
res.cpu_compile_time,
res.cpu_single_thread_time,
res.cpu_multi_thread_time,
res.gpu_compile_time,
res.gpu_time,
),
)
CSV.write(results_filename, df)
if fixpoint_reached(optimizer, graph)
break
end
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
n += STEPSIZE
end
end
CSV.write(results_filename, df)

View File

@ -0,0 +1,208 @@
using MetagraphOptimization
using CUDA
using UUIDs
using BenchmarkTools
using DataFrames
using CSV
using Dates
results_filename = "bench_results_reduction_steps_gpu.csv"
df = DataFrame(
threads = Int[],
process = String[],
operations = Int[],
cumulative_optimization_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_ce = Float64[],
graph_dt = Float64[],
graph_ci = Float64[],
cpu_st_t = Float64[],
cpu_st_s = Float64[],
cpu_mt_t = Float64[],
cpu_mt_s = Float64[],
cpu_mem = Float64[],
gpu_t = Float64[],
gpu_s = Float64[],
gpu_mem = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
function log(x...)
println(now(), " ", join(x, " ")...)
return flush(stdout)
end
function bench(func, kernel!, inputs)
# gpu part
n = length(inputs)
cu_inputs = CuVector(inputs)
cu_outputs = CuVector{ComplexF64}()
resize!(cu_outputs, n)
ts = 32
bs = Int(n / ts)
bench = @benchmark begin
@cuda threads = $ts blocks = $bs always_inline = true $kernel!($cu_inputs, $cu_outputs, $n)
CUDA.device_synchronize()
end gcsample = true samples = 20 evals = 1
gpu_time = median(bench.times) / 1e9
gpu_std = std(bench.times) / 1e9
gpu_mem = bench.memory
# cpu part
single_thread = @benchmark $func.($inputs)
multi_threaded = @benchmark Threads.@threads for i in eachindex($inputs)
$func($inputs[i])
end
cpu_st_time = median(single_thread.times) / 1e9
cpu_st_std = std(single_thread.times) / 1e9
cpu_mt_time = median(multi_threaded.times) / 1e9
cpu_mt_std = std(multi_threaded.times) / 1e9
cpu_mem = std(single_thread.times)
return (
cpu_single_thread_time = cpu_st_time,
cpu_single_thread_std = cpu_st_std,
cpu_multi_thread_time = cpu_mt_time,
cpu_multi_thread_std = cpu_mt_std,
cpu_mem = cpu_mem,
gpu_time = gpu_time,
gpu_std = gpu_std,
gpu_mem = gpu_mem,
)
end
log("Available CUDA devices:")
for dev in CUDA.devices()
display(dev)
end
# preparation of machine
machine = Machine(
[
MetagraphOptimization.NumaNode(
0,
1,
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
)
# bench and produce data
n_inputs = 2^16
optimizer = ReductionOptimizer()
processes = [("ke->ke", 1), ("ke->kke", 1), ("ke->kkke", 1), ("ke->kkkke", 5)]
for (process_str, STEPSIZE) in processes
n = 0
opt_time_cum = 0
process = parse_process(process_str, QEDModel())
graph = gen_graph(process)
inputs = Vector([gen_process_input(process) for _ in 1:n_inputs])
get_compute_function(graph, process, machine)
while true
func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
res = bench(func, kernel!, inputs)
graph_properties = get_properties(graph)
push!(
df,
(
Threads.nthreads(),
process_str,
n,
opt_time_cum,
graph_properties.noNodes,
graph_properties.noEdges,
graph_properties.computeEffort,
graph_properties.data,
graph_properties.computeIntensity,
res.cpu_single_thread_time,
res.cpu_single_thread_std,
res.cpu_multi_thread_time,
res.cpu_multi_thread_std,
res.cpu_mem,
res.gpu_time,
res.gpu_std,
res.gpu_mem,
),
)
CSV.write(results_filename, df)
if fixpoint_reached(optimizer, graph)
break
end
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
n += STEPSIZE
end
end
CSV.write(results_filename, df)
for (process_str, STEPSIZE) in [("AB->AB", 1), ("AB->ABBB", 1), ("AB->ABBBBB", 1)]
n = 0
opt_time_cum = 0
process = parse_process(process_str, ABCModel())
graph = parse_dag("input/$process_str.txt", ABCModel())
inputs = Vector([gen_process_input(process) for _ in 1:n_inputs])
get_compute_function(graph, process, machine)
while true
func = get_compute_function(graph, process, machine)
kernel! = get_cuda_kernel(graph, process, machine)
res = bench(func, kernel!, inputs)
graph_properties = get_properties(graph)
push!(
df,
(
Threads.nthreads(),
process_str,
n,
opt_time_cum,
graph_properties.noNodes,
graph_properties.noEdges,
graph_properties.computeEffort,
graph_properties.data,
graph_properties.computeIntensity,
res.cpu_single_thread_time,
res.cpu_single_thread_std,
res.cpu_multi_thread_time,
res.cpu_multi_thread_std,
res.cpu_mem,
res.gpu_time,
res.gpu_std,
res.gpu_mem,
),
)
CSV.write(results_filename, df)
if fixpoint_reached(optimizer, graph)
break
end
opt_time_cum += @elapsed optimize!(optimizer, graph, STEPSIZE)
n += STEPSIZE
end
end
CSV.write(results_filename, df)

232
examples/qed_bench_tape.jl Normal file
View File

@ -0,0 +1,232 @@
using MetagraphOptimization
using LIKWID
using UUIDs
using DataFrames
using CSV
using Random
using BenchmarkTools
using Dates
function log(x...)
println(now(), " ", join(x, " ")...)
return flush(stdout)
end
results_filename = "bench_results_tape_$(Threads.nthreads()).csv"
df = DataFrame(
process_name = String[],
graph_gen_time = Float64[],
optimization_time = Float64[],
function_generation_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_mem = Float64[],
cpu_threads = Int[],
n_inputs = Int[],
nflops_likwid = Int[],
cpu_time = Float64[],
cpu_rate = Float64[],
cpu_gflops = Float64[],
cpu_std = Float64[],
gpu_name = String[],
gpu_time = Float64[],
gpu_std = Float64[],
gpu_rate = Float64[],
gpu_gflops = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
nInputs = 1_000_000
# use "mock" machine that only uses cpu
machine = Machine(
[
MetagraphOptimization.NumaNode(
0,
1,
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
)
function cpu_bench(tape, inputs)
bench = @benchmark begin
@inbounds Threads.@threads for i in eachindex($inputs)
execute_tape($tape, $inputs[i])
end
end gcsample = true seconds = 300
time = mean(bench.times) / 1e9
s = std(bench.times) / 1e9
rate = length(inputs) / time
return (time, rate, s)
end
function bench_process(
process::MetagraphOptimization.AbstractProcessDescription,
process_name::String,
graph::DAG,
gen_time::Float64,
opt_time::Float64,
io::IO = stdout;
use_likwid = false,
)
log("\n--- Benchmarking $(process_name) ---")
func_time = @elapsed tape = gen_tape(graph, process, machine)
graph_props = GraphProperties(graph)
NFLOPs = graph_props.computeEffort
nflops_likwid = 0
if use_likwid
input = gen_process_input(process)
# get rid of annoying output to console
oldstd = stdout
redirect_stdout(devnull)
_, events = @perfmon "FLOPS_DP" execute_tape(tape, input)
redirect_stdout(oldstd) # recover original stdout
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
nflops_likwid = NFLOPs
end
log("Generating $nInputs inputs with $(Threads.nthreads()) threads...")
inputs = Vector{typeof(gen_process_input(process))}()
resize!(inputs, nInputs)
processes = Vector{typeof(process)}()
for i in 1:Threads.nthreads()
push!(processes, copy(process))
end
@inbounds Threads.@threads for i in eachindex(inputs)
inputs[i] = gen_process_input(processes[Threads.nthreads()])
end
log("Benchmarking CPU with $(Threads.nthreads()) threads...")
(time_cpu, rate_cpu, std_cpu) = cpu_bench(tape, inputs)
flops_cpu = (rate_cpu * NFLOPs) / 10^9
log("\nBenchmark Summary for $(process):")
if use_likwid
log("Measured FLOPS by LIKWID: $NFLOPs")
else
log("Total graph compute effort: $NFLOPs")
end
log("Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
log("CPU, $(Threads.nthreads()) threads")
log(" Time: $time_cpu")
log(" Rate: $rate_cpu")
log(" GFLOPS: $flops_cpu")
if (process_name != "warmup")
push!(
df,
Dict(
:process_name => process_name,
:graph_gen_time => gen_time,
:optimization_time => opt_time,
:function_generation_time => func_time,
:graph_nodes => graph_props.noNodes,
:graph_edges => graph_props.noEdges,
:graph_mem => MetagraphOptimization.mem(graph),
:cpu_threads => Threads.nthreads(),
:n_inputs => nInputs,
:nflops_likwid => nflops_likwid,
:cpu_time => time_cpu,
:cpu_std => std_cpu,
:cpu_rate => rate_cpu,
:cpu_gflops => flops_cpu,
:gpu_name => "none",
:gpu_time => 0.0,
:gpu_std => 0.0,
:gpu_rate => 0.0,
:gpu_gflops => 0.0,
),
)
end
return nothing
end
function bench_qed(process_string::String, skip_unoptimized = false)
optimizer = ReductionOptimizer()
process = parse_process(process_string, QEDModel())
gen_time = @elapsed graph = gen_graph(process)
opt_time = 0.0
if !skip_unoptimized
bench_process(process, "$process not optimized tape", graph, gen_time, opt_time)
end
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
bench_process(process, "$process reduced tape", graph, gen_time, opt_time)
return nothing
end
function bench_abc(process_string::String)
optimizer = ReductionOptimizer()
process = parse_process(process_string, ABCModel())
gen_time = @elapsed graph = parse_dag("input/$process_string.txt", ABCModel())
bench_process(process, "$process not optimized tape", graph, gen_time, 0.0)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
bench_process(process, "$process reduced tape", graph, gen_time, opt_time)
return nothing
end
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
## -- WARMUP TO COMPILE FUNCTIONS first
optimizer = ReductionOptimizer()
process = parse_process("ke->kke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
bench_process(process, "warmup", graph, gen_time, opt_time)
# AB->AB^3
process = parse_process("AB->ABBB", ABCModel())
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
bench_process(process, "warmup", graph, gen_time, opt_time)
## -- WARMUP END
# compton
bench_qed("ke->ke")
CSV.write(results_filename, df)
bench_qed("ke->kke")
CSV.write(results_filename, df)
bench_qed("ke->kkke")
CSV.write(results_filename, df)
bench_qed("ke->kkkke")
CSV.write(results_filename, df)
bench_qed("ke->kkkkke")
CSV.write(results_filename, df)
bench_qed("ke->kkkkkke")
CSV.write(results_filename, df)
bench_qed("ke->kkkkkkke")
CSV.write(results_filename, df)
bench_abc("AB->AB")
CSV.write(results_filename, df)
bench_abc("AB->ABBB")
CSV.write(results_filename, df)
bench_abc("AB->ABBBBB")
CSV.write(results_filename, df)

144
examples/qed_gen_bench.jl Normal file
View File

@ -0,0 +1,144 @@
using MetagraphOptimization
using DataFrames
using CSV
using BenchmarkTools
using StatsBase
results_filename = "qed_gen_results_$(Threads.nthreads()).csv"
df = DataFrame(
process_name = String[],
cpu_threads = Int[],
graph_gen_samples = Int[],
graph_gen_mean = Float64[],
graph_gen_std = Float64[],
graph_gen_median = Float64[],
graph_nodes = Int[],
graph_data_nodes = Int[],
graph_u_nodes = Int[],
graph_v_nodes = Int[],
graph_s1_nodes = Int[],
graph_s2_nodes = Int[],
graph_edges = Int[],
graph_nodes_reduced = Int[],
graph_data_nodes_reduced = Int[],
graph_u_nodes_reduced = Int[],
graph_v_nodes_reduced = Int[],
graph_s1_nodes_reduced = Int[],
graph_s2_nodes_reduced = Int[],
graph_edges_reduced = Int[],
graph_mem = Float64[],
graph_mem_reduced = Float64[],
graph_elapsed_reduce = Float64[],
)
function bench_process(process::AbstractString; warmup = false, optimize = true)
println("Benchmarking $process...")
model = QEDModel()
proc = parse_process(process, model)
gen_bench = @benchmark gen_graph($proc) gcsample = true seconds = 5
graph = gen_graph(proc)
props = GraphProperties(graph)
node_dict = countmap(typeof.(graph.nodes))
graph_size = Base.summarysize(graph)
reduce_elapsed = -1.0
node_dict_reduced = Dict()
graph_size_reduced = -1.0
props_reduced = GraphProperties()
if optimize
reduce_elapsed = @elapsed optimize_to_fixpoint!(ReductionOptimizer(), graph)
props_reduced = GraphProperties(graph)
node_dict_reduced = countmap(typeof.(graph.nodes))
graph_size_reduced = Base.summarysize(graph)
end
if warmup
return nothing
end
push!(
df,
Dict(
:process_name => process,
:cpu_threads => Threads.nthreads(),
:graph_gen_samples => length(gen_bench.times),
:graph_gen_mean => mean(gen_bench.times),
:graph_gen_std => std(gen_bench.times),
:graph_gen_median => median(gen_bench.times),
:graph_nodes => props.noNodes,
:graph_data_nodes => get(node_dict, DataTaskNode{DataTask}, 0),
:graph_u_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_U}, 0),
:graph_v_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_V}, 0),
:graph_s1_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_S1}, 0),
:graph_s2_nodes => get(node_dict, ComputeTaskNode{ComputeTaskQED_S2}, 0),
:graph_edges => props.noEdges,
:graph_nodes_reduced => props_reduced.noNodes,
:graph_data_nodes_reduced => get(node_dict_reduced, DataTaskNode{DataTask}, 0),
:graph_u_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_U}, 0),
:graph_v_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_V}, 0),
:graph_s1_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_S1}, 0),
:graph_s2_nodes_reduced => get(node_dict_reduced, ComputeTaskNode{ComputeTaskQED_S2}, 0),
:graph_edges_reduced => props_reduced.noEdges,
:graph_mem => graph_size,
:graph_mem_reduced => graph_size_reduced,
:graph_elapsed_reduce => reduce_elapsed,
),
)
return nothing
end
processes = [
("ke->ke", true),
("ke->kke", true),
("ke->kkke", true),
("ke->kkkke", true),
("ke->kkkkke", true),
("ke->kkkkkke", true),
("ke->kkkkkkke", true),
#("ke->kkkkkkkke", false),
#("ke->kkkkkkkkke", false),
]
df = DataFrame(
process_name = String[],
cpu_threads = Int[],
graph_gen_samples = Int[],
graph_gen_mean = Float64[],
graph_gen_std = Float64[],
graph_gen_median = Float64[],
graph_nodes = Int[],
graph_data_nodes = Int[],
graph_u_nodes = Int[],
graph_v_nodes = Int[],
graph_s1_nodes = Int[],
graph_s2_nodes = Int[],
graph_edges = Int[],
graph_nodes_reduced = Int[],
graph_data_nodes_reduced = Int[],
graph_u_nodes_reduced = Int[],
graph_v_nodes_reduced = Int[],
graph_s1_nodes_reduced = Int[],
graph_s2_nodes_reduced = Int[],
graph_edges_reduced = Int[],
graph_mem = Float64[],
graph_mem_reduced = Float64[],
graph_elapsed_reduce = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
bench_process("ke->kke", warmup = true)
for (process, opt) in processes
bench_process(process, optimize = opt)
CSV.write(results_filename, df)
end

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,63 @@
Bootstrap: docker
From: nvidia/cuda:12.3.1-devel-ubuntu20.04
%labels
Requires CUDA driver 470.57+.
%environment
export LANG=C
%runscript
nvidia-smi
./run.sh
%post
. /.singularity.d/env/10-docker*.sh
apt-get update
apt-get install -y pciutils
DEBIAN_FRONTEND='noninteractive' apt-get -y -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' install build-essential cuda-compat-12-3 libibverbs-dev ibverbs-utils gcc wget git libcap2-bin
apt-get -y autoremove; apt-get -y clean
cd /tmp
# install slurm
: ${SLURM_VERSION:=17-02-11-1}
wget https://github.com/SchedMD/slurm/archive/slurm-${SLURM_VERSION}.tar.gz
tar -xf slurm-${SLURM_VERSION}.tar.gz
cd slurm-slurm-${SLURM_VERSION}
./configure --prefix=/usr/ --sysconfdir=/etc/slurm --localstatedir=/var --disable-debug
make -C contribs/pmi2 -j$(nproc) install
cd ..
rm -rf slurm-*
# install julia
cd ~
wget https://julialang-s3.julialang.org/bin/linux/x64/1.9/julia-1.9.4-linux-x86_64.tar.gz
tar zxvf julia-1.9.4-linux-x86_64.tar.gz
mv julia-1.9.4/ /opt/julia-1.9.4
#mkdir /usr/local/bin
ln -s /opt/julia-1.9.4/bin/julia /usr/local/bin/julia
#Add nvidia driver paths to the environment variables
echo "\n #Nvidia driver paths \n" >> /environment
echo 'export PATH="/nvbin:$PATH"' >> /environment
echo 'export LD_LIBRARY_PATH="/nvlib:$LD_LIBRARY_PATH"' >> /environment
#Add CUDA paths
echo "\n #Cuda paths \n" >> /environment
echo 'export CPATH="/usr/local/cuda/include:$CPATH"' >> /environment
echo 'export PATH="/usr/local/cuda/bin:$PATH"' >> /environment
echo 'export LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH"' >> /environment
echo 'export CUDA_HOME="/usr/local/cuda"' >> /environment
# install likwid
VERSION=5.3.0
wget http://ftp.fau.de/pub/likwid/likwid-$VERSION.tar.gz
tar -xaf likwid-$VERSION.tar.gz
cd likwid-$VERSION
# accessdaemon doesn't work because of permissions
sed -i 's/ACCESSMODE = accessdaemon/ACCESSMODE = perf_event/g' config.mk
make -j4
make -j4 install
echo 'export LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"' >> /environment

View File

@ -0,0 +1,22 @@
#!/bin/bash
#SBATCH --array=1-32
#SBATCH --job-name=qed_bench
#SBATCH --partition=intel
#SBATCH --time=16:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=32
#SBATCH --mem=16GB
#SBATCH --output=simulation-%A-%a.out
#SBATCH --error=simulation-%A-%a.err
cd $HOME/repos/metagraph_optimization
module load singularity
module load git
printf "Current git commit hash: " > results/git.txt
git rev-parse HEAD >> results/git.txt
git status >> results/git.txt
singularity exec experiments/CUDA_container.sif ./experiments/run_qed_exec.sh $SLURM_ARRAY_TASK_ID

View File

@ -0,0 +1,24 @@
#!/bin/bash
#SBATCH --job-name=qed_bench
#SBATCH --partition=casus_a100
#SBATCH --account=casus
#SBATCH --time=8:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=32
#SBATCH --gres=gpu:1
#SBATCH --mem=256GB
#SBATCH --output=simulation-%A-%a.out
#SBATCH --error=simulation-%A-%a.err
cd $HOME/repos/metagraph_optimization
module load singularity
module load git
module load cuda/12.1
printf "Current git commit hash: " > results/git.txt
git rev-parse HEAD >> results/git.txt
git status >> results/git.txt
singularity exec --nv experiments/CUDA_container.sif ./experiments/run_qed_exec.sh 32

View File

@ -0,0 +1,24 @@
#!/bin/bash
#SBATCH --job-name=qed_bench
#SBATCH --partition=casus_a100
#SBATCH --account=casus
#SBATCH --time=8:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=128
#SBATCH --gres=gpu:4
#SBATCH --mem=2048GB
#SBATCH --output=simulation-%A-%a.out
#SBATCH --error=simulation-%A-%a.err
cd $HOME/repos/metagraph_optimization
module load singularity
module load git
module load cuda/12.1
printf "Current git commit hash: " > results/git_reduce_bench_gpu.txt
git rev-parse HEAD >> results/git_reduce_bench_gpu.txt
git status >> results/git_reduce_bench_gpu.txt
singularity exec --nv experiments/CUDA_container.sif ./experiments/full_node.sh

View File

@ -0,0 +1,22 @@
#!/bin/bash
#SBATCH --array=1-8
#SBATCH --job-name=qed_diag_gen
#SBATCH --partition=intel
#SBATCH --time=4:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=64GB
#SBATCH --output=simulation-%A-%a.out
#SBATCH --error=simulation-%A-%a.err
cd $HOME/repos/metagraph_optimization
module load singularity
module load git
printf "Current git commit hash: " > results/git.txt
git rev-parse HEAD >> results/git.txt
git status >> results/git.txt
singularity exec experiments/CUDA_container.sif ./experiments/run_gen_diagram.sh $SLURM_ARRAY_TASK_ID

View File

@ -0,0 +1,21 @@
#!/bin/bash
#SBATCH --job-name=qed_bench
#SBATCH --partition=intel
#SBATCH --time=48:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=32
#SBATCH --mem=24GB
#SBATCH --output=simulation-%A-%a.out
#SBATCH --error=simulation-%A-%a.err
cd $HOME/repos/metagraph_optimization
module load singularity
module load git
printf "Current git commit hash: " > results/git_reduce_bench.txt
git rev-parse HEAD >> results/git_reduce_bench.txt
git status >> results/git_reduce_bench.txt
singularity exec experiments/CUDA_container.sif ./experiments/run_reduce_bench.sh

View File

@ -0,0 +1,24 @@
#!/bin/bash
#SBATCH --job-name=qed_bench
#SBATCH --partition=casus_a100
#SBATCH --account=casus
#SBATCH --time=16:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=32
#SBATCH --gres=gpu:1
#SBATCH --mem=256GB
#SBATCH --output=simulation-%A-%a.out
#SBATCH --error=simulation-%A-%a.err
cd $HOME/repos/metagraph_optimization
module load singularity
module load git
module load cuda/12.1
printf "Current git commit hash: " > results/git_reduce_bench_gpu.txt
git rev-parse HEAD >> results/git_reduce_bench_gpu.txt
git status >> results/git_reduce_bench_gpu.txt
singularity exec --nv experiments/CUDA_container.sif ./experiments/run_reduce_bench_gpu.sh

25
experiments/full_node.sh Executable file
View File

@ -0,0 +1,25 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_FILE="$SCRIPT_DIR/../julia_full_node.log"
cd $SCRIPT_DIR/..
echo "Writing system info..."
# collect some information of the used node and system
uname -a > results/system_full_node.txt
julia --version > results/julia_full_node.txt
lscpu > results/cpu_full_node.txt
nvidia-smi > results/cuda_gpu_full_node.txt
lsblk > results/storage_full_node.txt
lspci > results/pci_full_node.txt
#echo "Initiating julia..."
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
echo "Benchmarking Full Node 128 Threads + *GPUs*"
julia --project -O3 --threads=128 examples/full_node_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

27
experiments/run_gen_diagram.sh Executable file
View File

@ -0,0 +1,27 @@
#!/bin/bash
# first arg = number of threads
i=$1
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_FILE="$SCRIPT_DIR/../julia.log"
cd $SCRIPT_DIR/..
echo "Writing system info..."
# collect some information of the used node and system
uname -a > results/system.txt
julia --version > results/julia.txt
lscpu > results/cpu.txt
lsblk > results/storage.txt
lspci > results/pci.txt
echo "Initiating julia..."
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("BenchmarkTools"); Pkg.add("StatsBase")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
echo "Benchmarking with $i threads..."
julia --project -O3 --threads=$i examples/qed_gen_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

31
experiments/run_qed_exec.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/bash
# first arg = number of threads
i=$1
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_FILE="$SCRIPT_DIR/../julia_$i.log"
cd $SCRIPT_DIR/..
echo "Writing system info..."
# collect some information of the used node and system
uname -a > results/system_$i.txt
julia --version > results/julia_$i.txt
lscpu > results/cpu_$i.txt
nvidia-smi > results/cuda_gpu_$i.txt
lsblk > results/storage_$i.txt
lspci > results/pci_$i.txt
echo "Initiating julia..."
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
echo "Benchmarking $i Threads"
julia --project -O3 --threads=$i examples/qed_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
echo "Benchmarking Tape variant $i Threads"
julia --project -O3 --threads=$i examples/qed_bench_tape.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

24
experiments/run_reduce_bench.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_FILE="$SCRIPT_DIR/../julia_bench_reduce.log"
cd $SCRIPT_DIR/..
echo "Writing system info..."
# collect some information of the used node and system
uname -a > results/system_bench_reduce.txt
julia --version > results/julia_bench_reduce.txt
lscpu > results/cpu_bench_reduce.txt
nvidia-smi > results/cuda_gpu_bench_reduce.txt
lsblk > results/storage_bench_reduce.txt
lspci > results/pci_bench_reduce.txt
#echo "Initiating julia..."
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
echo "Benchmarking Reduction 32 Threads"
julia --project -O3 --threads=32 examples/qed_bench_reduction_steps.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

View File

@ -0,0 +1,25 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_FILE="$SCRIPT_DIR/../julia_bench_reduce_gpu.log"
cd $SCRIPT_DIR/..
echo "Writing system info..."
# collect some information of the used node and system
uname -a > results/system_bench_reduce_gpu.txt
julia --version > results/julia_bench_reduce_gpu.txt
lscpu > results/cpu_bench_reduce_gpu.txt
nvidia-smi > results/cuda_gpu_bench_reduce_gpu.txt
lsblk > results/storage_bench_reduce_gpu.txt
lspci > results/pci_bench_reduce_gpu.txt
#echo "Initiating julia..."
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
echo "Benchmarking Reduction 32 Threads, *GPU*"
julia --project -O3 --threads=32 examples/qed_bench_reduction_steps_gpu.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

Binary file not shown.

Binary file not shown.

3
images/README.md Normal file
View File

@ -0,0 +1,3 @@
# Images
In this folder we collect benchmark results in pdf form which may be useful in the future.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
images/cpu_vs_gpu_abc.pdf Normal file

Binary file not shown.

BIN
images/cpu_vs_gpu_qed.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,3 @@
# Full Node Benchmarks
Done using `experiments/full_node.sh` on hemera, using 128 threads and 4 A100 GPUs.

Binary file not shown.

BIN
images/gen_memory.pdf Normal file

Binary file not shown.

BIN
images/gen_times.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,5 @@
# Optimizer Plots
Plots of FusionOptimizer, ReductionOptimizer, SplitOptimizer, RandomWalkOptimizer, and GreedyOptimizer, executed on a system with 32 threads and an A30 GPU.
Benchmarked using `notebooks/optimizers.ipynb`.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More