Improve parsing performance and get_operations

This commit is contained in:
Anton Reinhard 2023-08-18 17:18:01 +02:00
parent 1d0511ecb7
commit 9cac6e76be
6 changed files with 139 additions and 61 deletions

View File

@ -3,3 +3,4 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"

View File

@ -16,12 +16,15 @@ function bench_txt(filepath::String, bench::Bool = true)
println(name, ":")
g = parse_abc(filepath)
print(g)
println(" Graph size in memory: ", bytes_to_human_readable(Base.summarysize(g)))
#println(" Graph size in memory: ", bytes_to_human_readable(Base.summarysize(g)))
if (bench)
@btime parse_abc($filepath)
println()
end
println(" Get Operations: ")
@time get_operations(g)
println()
end
function import_bench()

View File

@ -40,9 +40,9 @@ function parse_abc(filename::String, verbose::Bool = false)
if (verbose) println("Estimating ", estimate_no_nodes, " Nodes") end
sizehint!(graph.nodes, estimate_no_nodes)
sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false)
global_data_out = insert_node!(graph, make_node(DataTask(10)), false)
insert_edge!(graph, make_edge(sum_node, global_data_out), false)
sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false, false)
global_data_out = insert_node!(graph, make_node(DataTask(10)), false, false)
insert_edge!(graph, make_edge(sum_node, global_data_out), false, false)
# remember the data out nodes for connection
dataOutNodes = Dict()
@ -58,16 +58,16 @@ function parse_abc(filename::String, verbose::Bool = false)
end
if occursin(regex_a, node)
# add nodes and edges for the state reading to u(P(Particle))
data_in = insert_node!(graph, make_node(DataTask(4)), false) # read particle data node
compute_P = insert_node!(graph, make_node(ComputeTaskP()), false) # compute P node
data_Pu = insert_node!(graph, make_node(DataTask(6)), false) # transfer data from P to u
compute_u = insert_node!(graph, make_node(ComputeTaskU()), false) # compute U node
data_out = insert_node!(graph, make_node(DataTask(3)), false) # transfer data out from u
data_in = insert_node!(graph, make_node(DataTask(4)), false, false) # read particle data node
compute_P = insert_node!(graph, make_node(ComputeTaskP()), false, false) # compute P node
data_Pu = insert_node!(graph, make_node(DataTask(6)), false, false) # transfer data from P to u
compute_u = insert_node!(graph, make_node(ComputeTaskU()), false, false) # compute U node
data_out = insert_node!(graph, make_node(DataTask(3)), false, false) # transfer data out from u
insert_edge!(graph, make_edge(data_in, compute_P), false)
insert_edge!(graph, make_edge(compute_P, data_Pu), false)
insert_edge!(graph, make_edge(data_Pu, compute_u), false)
insert_edge!(graph, make_edge(compute_u, data_out), false)
insert_edge!(graph, make_edge(data_in, compute_P), false, false)
insert_edge!(graph, make_edge(compute_P, data_Pu), false, false)
insert_edge!(graph, make_edge(data_Pu, compute_u), false, false)
insert_edge!(graph, make_edge(compute_u, data_out), false, false)
# remember the data_out node for future edges
dataOutNodes[node] = data_out
@ -77,37 +77,37 @@ function parse_abc(filename::String, verbose::Bool = false)
in1 = capt.captures[1]
in2 = capt.captures[2]
compute_v = insert_node!(graph, make_node(ComputeTaskV()), false)
data_out = insert_node!(graph, make_node(DataTask(5)), false)
compute_v = insert_node!(graph, make_node(ComputeTaskV()), false, false)
data_out = insert_node!(graph, make_node(DataTask(5)), false, false)
if (occursin(regex_c, capt.captures[1]))
# put an S node after this input
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), false)
data_S_v = insert_node!(graph, make_node(DataTask(5)), false)
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), false, false)
data_S_v = insert_node!(graph, make_node(DataTask(5)), false, false)
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[1]], compute_S), false)
insert_edge!(graph, make_edge(compute_S, data_S_v), false)
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[1]], compute_S), false, false)
insert_edge!(graph, make_edge(compute_S, data_S_v), false, false)
insert_edge!(graph, make_edge(data_S_v, compute_v), false)
insert_edge!(graph, make_edge(data_S_v, compute_v), false, false)
else
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[1]], compute_v), false)
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[1]], compute_v), false, false)
end
if (occursin(regex_c, capt.captures[2]))
# i think the current generator only puts the combined particles in the first space, so this case might never be entered
# put an S node after this input
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), false)
data_S_v = insert_node!(graph, make_node(DataTask(5)), false)
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), false, false)
data_S_v = insert_node!(graph, make_node(DataTask(5)), false, false)
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[2]], compute_S), false)
insert_edge!(graph, make_edge(compute_S, data_S_v), false)
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[2]], compute_S), false, false)
insert_edge!(graph, make_edge(compute_S, data_S_v), false, false)
insert_edge!(graph, make_edge(data_S_v, compute_v), false)
insert_edge!(graph, make_edge(data_S_v, compute_v), false, false)
else
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[2]], compute_v), false)
insert_edge!(graph, make_edge(dataOutNodes[capt.captures[2]], compute_v), false, false)
end
insert_edge!(graph, make_edge(compute_v, data_out), false)
insert_edge!(graph, make_edge(compute_v, data_out), false, false)
dataOutNodes[node] = data_out
elseif occursin(regex_m, node)
@ -118,22 +118,22 @@ function parse_abc(filename::String, verbose::Bool = false)
in3 = capt.captures[3]
# in2 + in3 with a v
compute_v = insert_node!(graph, make_node(ComputeTaskV()), false)
data_v = insert_node!(graph, make_node(DataTask(5)), false)
compute_v = insert_node!(graph, make_node(ComputeTaskV()), false, false)
data_v = insert_node!(graph, make_node(DataTask(5)), false, false)
insert_edge!(graph, make_edge(dataOutNodes[in2], compute_v), false)
insert_edge!(graph, make_edge(dataOutNodes[in3], compute_v), false)
insert_edge!(graph, make_edge(compute_v, data_v), false)
insert_edge!(graph, make_edge(dataOutNodes[in2], compute_v), false, false)
insert_edge!(graph, make_edge(dataOutNodes[in3], compute_v), false, false)
insert_edge!(graph, make_edge(compute_v, data_v), false, false)
# combine with the v of the combined other input
compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), false)
data_out = insert_node!(graph, make_node(DataTask(10)), false)
compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), false, false)
data_out = insert_node!(graph, make_node(DataTask(10)), false, false)
insert_edge!(graph, make_edge(data_v, compute_S2), false)
insert_edge!(graph, make_edge(dataOutNodes[in1], compute_S2), false)
insert_edge!(graph, make_edge(compute_S2, data_out), false)
insert_edge!(graph, make_edge(data_v, compute_S2), false, false)
insert_edge!(graph, make_edge(dataOutNodes[in1], compute_S2), false, false)
insert_edge!(graph, make_edge(compute_S2, data_out), false, false)
insert_edge!(graph, make_edge(data_out, sum_node), false)
insert_edge!(graph, make_edge(data_out, sum_node), false, false)
elseif occursin(regex_plus, node)
if (verbose)
println("\rReading Nodes Complete ")
@ -144,6 +144,9 @@ function parse_abc(filename::String, verbose::Bool = false)
end
end
#put all nodes into dirty nodes set
graph.dirtyNodes = copy(graph.nodes)
# don't actually need to read the edges
return graph
end

View File

@ -33,6 +33,10 @@ end
struct NodeReduction <: Operation
input::Vector{Node}
# these inputs can (and do) get very large in large graphs, so we need a better way to compare equality between them
# only node reductions with the same id will be considered equal (the id can be copied)
id::UUID
end
struct AppliedNodeReduction <: AppliedOperation

View File

@ -61,26 +61,20 @@ end
function siblings(node::Node)
result = Set{Node}()
for parent in node.parents
for sibling in parent.children
if (sibling != node)
push!(result, sibling)
end
end
union!(result, parent.children)
end
delete!(result, node)
return result
end
# partners = all parents of any children, no duplicates, does not include the node itself
# partners = all parents of any children, no duplicates, includes the node itself
function partners(node::Node)
result = Set{Node}()
for child in node.children
for partner in child.parents
if (partner != node)
push!(result, partner)
end
end
union!(result, child.parents)
end
delete!(result, node)
return result
end
@ -117,7 +111,7 @@ end
# 2: keep track of what was changed for the diff (if track == true)
# 3: invalidate operation caches
function insert_node!(graph::DAG, node::Node, track=true)
function insert_node!(graph::DAG, node::Node, track=true, invalidate_cache=true)
# 1: mute
push!(graph.nodes, node)
@ -125,12 +119,13 @@ function insert_node!(graph::DAG, node::Node, track=true)
if (track) push!(graph.diff.addedNodes, node) end
# 3: invalidate caches
if (!invalidate_cache) return node end
push!(graph.dirtyNodes, node)
return node
end
function insert_edge!(graph::DAG, edge::Edge, track=true)
function insert_edge!(graph::DAG, edge::Edge, track=true, invalidate_cache=true)
node1 = edge.edge[1]
node2 = edge.edge[2]
@ -150,6 +145,8 @@ function insert_edge!(graph::DAG, edge::Edge, track=true)
if (track) push!(graph.diff.addedEdges, edge) end
# 3: invalidate caches
if (!invalidate_cache) return edge end
while !isempty(node1.operations)
invalidate_caches!(graph, first(node1.operations))
end
@ -162,7 +159,7 @@ function insert_edge!(graph::DAG, edge::Edge, track=true)
return edge
end
function remove_node!(graph::DAG, node::Node, track=true)
function remove_node!(graph::DAG, node::Node, track=true, invalidate_cache=true)
# 1: mute
#=if !(node in graph.nodes)
error("Trying to remove a node that's not in the graph")
@ -173,6 +170,8 @@ function remove_node!(graph::DAG, node::Node, track=true)
if (track) push!(graph.diff.removedNodes, node) end
# 3: invalidate caches
if (!invalidate_cache) return node end
while !isempty(node.operations)
invalidate_caches!(graph, first(node.operations))
end
@ -181,7 +180,7 @@ function remove_node!(graph::DAG, node::Node, track=true)
return nothing
end
function remove_edge!(graph::DAG, edge::Edge, track=true)
function remove_edge!(graph::DAG, edge::Edge, track=true, invalidate_cache=true)
node1 = edge.edge[1]
node2 = edge.edge[2]
@ -205,6 +204,8 @@ function remove_edge!(graph::DAG, edge::Edge, track=true)
if (track) push!(graph.diff.removedEdges, edge) end
# 3: invalidate caches
if (!invalidate_cache) return nothing end
while !isempty(node1.operations)
invalidate_caches!(graph, first(node1.operations))
end
@ -275,6 +276,41 @@ function can_reduce(n1::Node, n2::Node)
if (n1.task != n2.task)
return false
end
n1_length = length(n1.children)
n2_length = length(n2.children)
if (n1_length != n2_length)
return false
end
# this seems to be the most common case so do this first
# doing it manually is a lot faster than using the sets for a general solution
if (n1_length == 2)
if (n1.children[1] != n2.children[1])
if (n1.children[1] != n2.children[2])
return false
end
# 1_1 == 2_2
if (n1.children[2] != n2.children[1])
return false
end
return true
end
# 1_1 == 2_1
if (n1.children[2] != n2.children[2])
return false
end
return true
end
# this is simple
if (n1_length == 1)
return n1.children[1] == n2.children[1]
end
# this takes a long time
return Set(n1.children) == Set(n2.children)
end
@ -372,3 +408,25 @@ function length(diff::Diff)
removedEdges = length(diff.removedEdges)
)
end
function ==(op1::Operation, op2::Operation)
return false
end
function ==(op1::NodeFusion, op2::NodeFusion)
# there can only be one node fusion on a given data task, so if the data task is the same, the fusion is the same
return op1.input[2] == op2.input[2]
end
function ==(op1::NodeReduction, op2::NodeReduction)
# only test the ids against each other
return op1.id == op2.id
end
function ==(op1::NodeSplit, op2::NodeSplit)
return op1.input == op2.input
end
NodeReduction(input::Vector{Node}) = NodeReduction(input, UUIDs.uuid1(rng))
copy(id::UUID) = UUID(id.value)

View File

@ -386,8 +386,6 @@ function clean_node!(graph::DAG, node::Node)
find_fusions!(graph, node)
find_reductions!(graph, node)
find_splits!(graph, node)
delete!(graph.dirtyNodes, node)
end
# function to generate all possible optmizations on the graph
@ -434,8 +432,19 @@ function generate_options(graph::DAG)
push!(visitedNodes, node)
reductionVector = nothing
partners_ = partners(node)
t = typeof(node)
# possible reductions are with nodes that are partners, i.e. parents of children
for partner in partners(node)
for partner in partners_
# see proof Node Reduction 1
if (t != typeof(partner))
continue
end
push!(visitedNodes, partner)
if can_reduce(node, partner)
if reductionVector === nothing
# only when there's at least one reduction partner, insert the vector
@ -444,7 +453,6 @@ function generate_options(graph::DAG)
end
push!(reductionVector, partner)
push!(visitedNodes, partner)
end
end
@ -477,9 +485,10 @@ function get_operations(graph::DAG)
generate_options(graph)
end
while !isempty(graph.dirtyNodes)
clean_node!(graph, first(graph.dirtyNodes))
for node in graph.dirtyNodes
clean_node!(graph, node)
end
empty!(graph.dirtyNodes)
return graph.possibleOperations
end