From 45e35dd52656a5878e0e5973b6925ef817e47d7d Mon Sep 17 00:00:00 2001 From: Anton Reinhard Date: Tue, 22 Aug 2023 10:29:59 +0200 Subject: [PATCH] Add bench script --- Project.toml | 1 - results/FWKHIP8999 | 164 ++++++++++++++++++++++++++ scripts/bench_threads.fish | 25 ++++ {examples => scripts}/gen_graphs.fish | 0 src/graph.jl | 2 - src/operations/find.jl | 48 +++++--- 6 files changed, 220 insertions(+), 20 deletions(-) create mode 100644 results/FWKHIP8999 create mode 100755 scripts/bench_threads.fish rename {examples => scripts}/gen_graphs.fish (100%) diff --git a/Project.toml b/Project.toml index cb516a3..e7ec467 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ authors = ["Anton Reinhard "] version = "0.1.0" [deps] -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/results/FWKHIP8999 b/results/FWKHIP8999 new file mode 100644 index 0000000..6b671c8 --- /dev/null +++ b/results/FWKHIP8999 @@ -0,0 +1,164 @@ +Commit Hash: a7fb15c95b63eee40eb7b9324d83b748053c5e13 + +Run with 32 Threads + +AB->AB: +Graph: + Nodes: Total: 34, ComputeTaskS2: 2, ComputeTaskU: 4, + ComputeTaskSum: 1, ComputeTaskV: 4, ComputeTaskP: 4, + DataTask: 19 + Edges: 37 + Total Compute Effort: 185 + Total Data Transfer: 104 + Total Compute Intensity: 1.7788461538461537 + 28.171 μs (515 allocations: 52.06 KiB) + Get Operations: +Sorting... + 0.218136 seconds (155.59 k allocations: 10.433 MiB, 3.34% gc time, 3175.93% compilation time) +Node Reductions... + 0.299127 seconds (257.04 k allocations: 16.853 MiB, 2827.94% compilation time) +Node Fusions... + 0.046983 seconds (16.70 k allocations: 1.120 MiB, 3048.15% compilation time) +Node Splits... + 0.033681 seconds (14.09 k allocations: 958.144 KiB, 3166.45% compilation time) +Waiting... + 0.000001 seconds + 1.096006 seconds (581.46 k allocations: 38.180 MiB, 0.66% gc time, 1677.26% compilation time) + rvim umount +AB->ABBB: +Graph: + Nodes: Total: 280, ComputeTaskS2: 24, ComputeTaskU: 6, + ComputeTaskV: 64, ComputeTaskSum: 1, ComputeTaskP: 6, + ComputeTaskS1: 36, DataTask: 143 + Edges: 385 + Total Compute Effort: 2007 + Total Data Transfer: 1176 + Total Compute Intensity: 1.7066326530612246 + 207.236 μs (4324 allocations: 296.87 KiB) + Get Operations: +Sorting... + 0.000120 seconds (167 allocations: 16.750 KiB) +Node Reductions... + 0.000550 seconds (1.98 k allocations: 351.234 KiB) +Node Fusions... + 0.000168 seconds (417 allocations: 83.797 KiB) +Node Splits... + 0.000150 seconds (478 allocations: 36.406 KiB) +Waiting... + 0.000000 seconds + 0.039897 seconds (16.19 k allocations: 1.440 MiB, 95.31% compilation time) + +AB->ABBBBB: +Graph: + Nodes: Total: 7854, ComputeTaskS2: 720, ComputeTaskU: 8, + ComputeTaskV: 1956, ComputeTaskSum: 1, ComputeTaskP: 8, + ComputeTaskS1: 1230, DataTask: 3931 + Edges: 11241 + Total Compute Effort: 58789 + Total Data Transfer: 34826 + Total Compute Intensity: 1.6880778728536152 + 5.787 ms (121839 allocations: 7.72 MiB) + Get Operations: +Sorting... + 0.000499 seconds (175 allocations: 17.000 KiB) +Node Reductions... + 0.002126 seconds (45.76 k allocations: 4.477 MiB) +Node Fusions... + 0.000949 seconds (7.09 k allocations: 1.730 MiB) +Node Splits... + 0.000423 seconds (8.06 k allocations: 544.031 KiB) +Waiting... + 0.000000 seconds + 0.015005 seconds (100.12 k allocations: 13.161 MiB) + +AB->ABBBBBBB: +Graph: + Nodes: Total: 438436, ComputeTaskS2: 40320, ComputeTaskU: 10, + ComputeTaskV: 109600, ComputeTaskSum: 1, ComputeTaskP: 10, + ComputeTaskS1: 69272, DataTask: 219223 + Edges: 628665 + Total Compute Effort: 3288131 + Total Data Transfer: 1949004 + Total Compute Intensity: 1.687082735592128 + 1.309 s (6826397 allocations: 430.63 MiB) + Get Operations: +Sorting... + 0.011898 seconds (197 allocations: 17.688 KiB) +Node Reductions... + 0.110569 seconds (2.78 M allocations: 225.675 MiB) +Node Fusions... + 0.022475 seconds (380.91 k allocations: 108.982 MiB) +Node Splits... + 0.011369 seconds (438.80 k allocations: 28.743 MiB) +Waiting... + 0.000001 seconds + 2.503065 seconds (5.77 M allocations: 683.968 MiB, 48.27% gc time) + +AB->ABBBBBBBBB: +Graph: + Nodes: Total: 39456442, ComputeTaskS2: 3628800, ComputeTaskU: 12, + ComputeTaskV: 9864100, ComputeTaskSum: 1, ComputeTaskP: 12, + ComputeTaskS1: 6235290, DataTask: 19728227 + Edges: 56578129 + Total Compute Effort: 295923153 + Total Data Transfer: 175407750 + Total Compute Intensity: 1.6870585991782006 + 389.495 s (626095682 allocations: 37.80 GiB) + Get Operations: +Sorting... + 1.181713 seconds (197 allocations: 17.688 KiB) +Node Reductions... + 10.057358 seconds (251.09 M allocations: 19.927 GiB) +Node Fusions... + 1.288635 seconds (34.24 M allocations: 6.095 GiB) +Node Splits... + 0.719345 seconds (39.46 M allocations: 2.522 GiB) +Waiting... + 0.000001 seconds +904.138951 seconds (519.47 M allocations: 54.494 GiB, 25.03% gc time) + +ABAB->ABAB: +Graph: + Nodes: Total: 3218, ComputeTaskS2: 288, ComputeTaskU: 8, + ComputeTaskV: 796, ComputeTaskSum: 1, ComputeTaskP: 8, + ComputeTaskS1: 504, DataTask: 1613 + Edges: 4581 + Total Compute Effort: 24009 + Total Data Transfer: 14144 + Total Compute Intensity: 1.697468891402715 + 2.691 ms (49557 allocations: 3.17 MiB) + Get Operations: +Sorting... + 0.000246 seconds (171 allocations: 16.875 KiB) +Node Reductions... + 0.001037 seconds (19.42 k allocations: 1.751 MiB) +Node Fusions... + 0.001512 seconds (3.04 k allocations: 1.027 MiB) +Node Splits... + 0.000197 seconds (3.41 k allocations: 231.078 KiB) +Waiting... + 0.000000 seconds + 0.007492 seconds (42.20 k allocations: 5.399 MiB) + +ABAB->ABC: +Graph: + Nodes: Total: 817, ComputeTaskS2: 72, ComputeTaskU: 7, + ComputeTaskV: 198, ComputeTaskSum: 1, ComputeTaskP: 7, + ComputeTaskS1: 120, DataTask: 412 + Edges: 1151 + Total Compute Effort: 6028 + Total Data Transfer: 3538 + Total Compute Intensity: 1.7037874505370265 + 602.767 μs (12544 allocations: 843.16 KiB) + Get Operations: +Sorting... + 0.000127 seconds (171 allocations: 16.875 KiB) +Node Reductions... + 0.000440 seconds (5.33 k allocations: 494.047 KiB) +Node Fusions... + 0.001761 seconds (939 allocations: 280.797 KiB) +Node Splits... + 0.000123 seconds (1.00 k allocations: 72.109 KiB) +Waiting... + 0.000000 seconds + 0.003831 seconds (11.74 k allocations: 1.451 MiB) diff --git a/scripts/bench_threads.fish b/scripts/bench_threads.fish new file mode 100755 index 0000000..738249e --- /dev/null +++ b/scripts/bench_threads.fish @@ -0,0 +1,25 @@ +#!/bin/fish +set minthreads 1 +set maxthreads 6 + +julia --project=./examples -t 4 -e 'import Pkg; Pkg.instantiate()' + +for i in $(seq $minthreads $maxthreads) + printf "(AB->AB, $i) " + julia --project=./examples -t $i -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("examples/AB->AB.txt"))' +end + +for i in $(seq $minthreads $maxthreads) + printf "(AB->ABBB, $i) " + julia --project=./examples -t $i -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("examples/AB->ABBB.txt"))' +end + +for i in $(seq $minthreads $maxthreads) + printf "(AB->ABBBBB, $i) " + julia --project=./examples -t $i -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("examples/AB->ABBBBB.txt"))' +end + +for i in $(seq $minthreads $maxthreads) + printf "(AB->ABBBBBBB, $i) " + julia --project=./examples -t $i -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("examples/AB->ABBBBBBB.txt"))' +end diff --git a/examples/gen_graphs.fish b/scripts/gen_graphs.fish similarity index 100% rename from examples/gen_graphs.fish rename to scripts/gen_graphs.fish diff --git a/src/graph.jl b/src/graph.jl index e28b2a7..40dcf39 100644 --- a/src/graph.jl +++ b/src/graph.jl @@ -53,7 +53,6 @@ struct AppliedNodeSplit <: AppliedOperation diff::Diff end - mutable struct PossibleOperations nodeFusions::Set{NodeFusion} nodeReductions::Set{NodeReduction} @@ -68,7 +67,6 @@ function PossibleOperations() ) end - # The actual state of the DAG is the initial state given by the set of nodes # but with all the operations in appliedChain applied in order mutable struct DAG diff --git a/src/operations/find.jl b/src/operations/find.jl index 1f255be..006aa99 100644 --- a/src/operations/find.jl +++ b/src/operations/find.jl @@ -3,7 +3,6 @@ using Base.Threads function insert_operation!(operations::PossibleOperations, nf::NodeFusion, locks::Dict{Node, SpinLock}) - push!(operations.nodeFusions, nf) n1 = nf.input[1]; n2 = nf.input[2]; n3 = nf.input[3] lock(locks[n1]) do; push!(nf.input[1].operations, nf); end @@ -16,7 +15,7 @@ function insert_operation!(operations::PossibleOperations, nr::NodeReduction, lo first = true for n in nr.input skip_duplicate = false - # careful here, this is a manual lock + # careful here, this is a manual lock (because of the break) lock(locks[n]) if first first = false @@ -40,35 +39,57 @@ function insert_operation!(operations::PossibleOperations, nr::NodeReduction, lo end function insert_operation!(operations::PossibleOperations, ns::NodeSplit, locks::Dict{Node, SpinLock}) - push!(operations.nodeSplits, ns) lock(locks[ns.input]) do; push!(ns.input.operations, ns); end return nothing end function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}}, locks::Dict{Node, SpinLock}) + total_len = 0 for vec in nodeReductions + total_len += length(vec) + end + sizehint!(operations.nodeReductions, total_len) + + @time for vec in nodeReductions for op in vec insert_operation!(operations, op, locks) end end + println(" Time for NR insertion") return nothing end function nf_insertion!(operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}}, locks::Dict{Node, SpinLock}) + total_len = 0 for vec in nodeFusions + total_len += length(vec) + end + sizehint!(operations.nodeFusions, total_len) + + @time for vec in nodeFusions + union!(operations.nodeFusions, Set(vec)) for op in vec insert_operation!(operations, op, locks) end end + println(" Time for NF insertion") return nothing end function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}}, locks::Dict{Node, SpinLock}) + total_len = 0 for vec in nodeSplits + total_len += length(vec) + end + sizehint!(operations.nodeSplits, total_len) + + @time for vec in nodeSplits + union!(operations.nodeSplits, Set(vec)) for op in vec insert_operation!(operations, op, locks) end end + println(" Time for NS insertion") return nothing end @@ -89,14 +110,12 @@ function generate_options(graph::DAG) nodeArray = collect(graph.nodes) # sort all nodes - println("Sorting...") - @time @threads for node in nodeArray + @threads for node in nodeArray sort_node!(node) end # --- find possible node reductions --- - println("Node Reductions...") - @time @threads for node in nodeArray + @threads for node in nodeArray # we're looking for nodes with multiple parents, those parents can then potentially reduce with one another if (length(node.parents) <= 1) continue @@ -129,8 +148,7 @@ function generate_options(graph::DAG) schedule(nr_task) # --- find possible node fusions --- - println("Node Fusions...") - @time @threads for node in nodeArray + @threads for node in nodeArray if (typeof(node) <: DataTaskNode) if length(node.parents) != 1 # data node can only have a single parent @@ -156,8 +174,7 @@ function generate_options(graph::DAG) schedule(nf_task) # find possible node splits - println("Node Splits...") - @time @threads for node in nodeArray + @threads for node in nodeArray if (can_split(node)) push!(generatedSplits[threadid()], NodeSplit(node)) end @@ -169,12 +186,9 @@ function generate_options(graph::DAG) empty!(graph.dirtyNodes) - println("Waiting...") - @time begin - wait(nr_task) - wait(nf_task) - wait(ns_task) - end + wait(nr_task) + wait(nf_task) + wait(ns_task) return nothing end