diff --git a/Project.toml b/Project.toml index 7ec3999..53cf237 100644 --- a/Project.toml +++ b/Project.toml @@ -5,5 +5,6 @@ version = "0.1.0" [deps] DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" diff --git a/src/graph.jl b/src/graph.jl index 6c514e0..49195fa 100644 --- a/src/graph.jl +++ b/src/graph.jl @@ -1,8 +1,7 @@ mutable struct DAG - nodes::Vector{Node} - edges::Vector{Edge} + nodes::Set{Node} end function DAG() - return DAG(Vector{Node}(), Vector{Edge}()) + return DAG(Set{Node}()) end diff --git a/src/graph_functions.jl b/src/graph_functions.jl index 501a827..9672dea 100644 --- a/src/graph_functions.jl +++ b/src/graph_functions.jl @@ -3,6 +3,14 @@ using DataStructures in(node::Node, graph::DAG) = node in graph.nodes in(edge::Edge, graph::DAG) = edge in graph.edges +function is_parent(potential_parent, node) + return potential_parent in node.parents +end + +function is_child(potential_child, node) + return potential_child in node.children +end + function ==(n1::Node, n2::Node, g::DAG) if typeof(n1) != typeof(n2) return false @@ -11,43 +19,49 @@ function ==(n1::Node, n2::Node, g::DAG) return false end - return n1.task == n2.task && children(n1, g) == children(n2, g) + return n1.task == n2.task && children(n1) == children(n2) end # children = prerequisite nodes, nodes that need to execute before the task, edges point into this task -function children(graph::DAG, node::Node) - if !(node in graph) - error("Cannot get children of a node that's not in the given graph") - end - - result = Vector{Node}() - for edge in graph.edges - if (edge.edge[2][] == node) - push!(result, edge.edge[1][]) - end - end - - return result +function children(node::Node) + return copy(node.children) end # parents = subsequent nodes, nodes that need this node to execute, edges point from this task -function parents(graph::DAG, node::Node) - if !(node in graph) - error("Cannot get parents of a node that's not in the given graph") - end +function parents(node::Node) + return copy(node.parents) +end - result = Vector{Node}() - for edge in graph.edges - if (edge.edge[1][] == node) - push!(result, edge.edge[2][]) +# siblings = all children of any parents, no duplicates, does not include the node itself +function siblings(node::Node) + result = Set{Node}() + for parent in parents(node) + for sibling in children(parent) + if (sibling != node) + push!(result, sibling) + end end end return result end -is_entry_node(graph::DAG, node::Node) = length(children(graph, node)) == 0 -is_exit_node(graph::DAG, node::Node) = length(parents(graph, node)) == 0 +# partners = all parents of any children, no duplicates, does not include the node itself +function partners(node::Node) + result = Set{Node}() + for child in children(node) + for partner in parents(node) + if (partner != node) + push!(result, partner) + end + end + end + + return result +end + +is_entry_node(node::Node) = length(children(node)) == 0 +is_exit_node(node::Node) = length(parents(node)) == 0 function insert_node(graph::DAG, node::Node) push!(graph.nodes, node) @@ -55,59 +69,59 @@ function insert_node(graph::DAG, node::Node) end function insert_edge(graph::DAG, edge::Edge) - push!(graph.edges, edge) + # edge points from child to parent + push!(edge.edge[1][].parents, edge.edge[2][]) + push!(edge.edge[2][].children, edge.edge[1][]) + return edge end function remove_node(graph::DAG, node::Node) - deleteat!(graph.nodes, findall(x->x==node, graph.nodes)) + delete!(graph.nodes, node) return nothing end function remove_edge(graph::DAG, edge::Edge) - deleteat!(graph.edges, findall(x->x==edge, graph.edges)) + delete!(edge.edge[1][].parents, edge.edge[2][]) + delete!(edge.edge[2][].children, edge.edge[1][]) return nothing end -function data(graph::DAG) - data_sum::Int64 = 0 +function graph_properties(graph::DAG) + d = 0 + ce = 0 for node in graph.nodes - data_sum = data_sum + data(node.task) + d += data(node.task) + ce += compute_effort(node.task) end - return data_sum -end -function compute_effort(graph::DAG) - compute_effort_sum::Int64 = 0 - for node in graph.nodes - compute_effort_sum = compute_effort_sum + compute_effort(node.task) - end - return compute_effort_sum -end + ci = ce / d -function compute_intensity(graph::DAG) - data_sum = data(graph) - if data_sum == 0 - return typemax(UInt64) - end - return compute_effort(graph) / data_sum + result = (data = d, + compute_effort = ce, + compute_intensity = ci) + return result end function get_exit_node(graph::DAG) for node in graph.nodes - if (is_exit_node(graph, node)) + if (is_exit_node(node)) return node end end error("The given graph has no exit node! It is either empty or not acyclic!") end -function can_reduce(graph::DAG, n1::Node, n2::Node) +function can_reduce(n1::Node, n2::Node) if (n1.task != n2.task) return false end - return Set(parents(graph, n1)) == Set(parents(graph, n2)) + return parents(n1) == parents(n2) +end + +function can_split(n::Node) + return length(parents(n)) > 1 end # check whether the given graph is connected @@ -120,7 +134,7 @@ function is_valid(graph::DAG) current = pop!(nodeQueue) push!(seenNodes, current) - childrenNodes = children(graph, current) + childrenNodes = children(current) for child in childrenNodes push!(nodeQueue, child) end @@ -143,34 +157,24 @@ function show_nodes(io, graph::DAG) print(io, "]") end -function show_edges(io, graph::DAG) - print(io, "[") - first = true - for e in graph.edges - if first - first = false - else - print(io, ", ") - end - print(io, e) - end - print(io, "]") -end - function show(io::IO, graph::DAG) println(io, "Graph:") print(io, " Nodes: ") + + nodeDict = Dict{Type, Int64}() + noEdges = 0 + for node in graph.nodes + if haskey(nodeDict, typeof(node.task)) + nodeDict[typeof(node.task)] = nodeDict[typeof(node.task)] + 1 + else + nodeDict[typeof(node.task)] = 1 + end + noEdges += length(parents(node)) + end + if length(graph.nodes) <= 20 show_nodes(io, graph) else - nodeDict = Dict{Type, Int64}() - for node in graph.nodes - if haskey(nodeDict, typeof(node.task)) - nodeDict[typeof(node.task)] = nodeDict[typeof(node.task)] + 1 - else - nodeDict[typeof(node.task)] = 1 - end - end first = true for (type, number) in zip(keys(nodeDict), values(nodeDict)) if first @@ -182,14 +186,9 @@ function show(io::IO, graph::DAG) end end println(io) - print(io, " Edges: ") - if length(graph.edges) <= 40 - show_edges(io, graph) - else - print(length(graph.edges)) - end - println(io) - println(io, " Total Compute Effort: ", compute_effort(graph)) - println(io, " Total Data Transfer: ", data(graph)) - println(io, " Total Compute Intensity: ", compute_intensity(graph)) + println(io, " Edges: ", noEdges) + properties = graph_properties(graph) + println(io, " Total Compute Effort: ", properties.compute_effort) + println(io, " Total Data Transfer: ", properties.data) + println(io, " Total Compute Intensity: ", properties.compute_intensity) end diff --git a/src/graph_optimizations.jl b/src/graph_optimizations.jl index 3c8b3d1..6aecebc 100644 --- a/src/graph_optimizations.jl +++ b/src/graph_optimizations.jl @@ -4,22 +4,23 @@ function node_fusion(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::Comp error("[Node Fusion] The given nodes are not part of the given graph") end - required_edge1 = make_edge(n1, n2) - required_edge2 = make_edge(n2, n3) - - if !(required_edge1 in graph) || !(required_edge2 in graph) + if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1) + # the checks are redundant but maybe a good sanity check error("[Node Fusion] The given nodes are not connected by edges which is required for node fusion") end # save children and parents - n1_children = children(graph, n1) - n2_parents = parents(graph, n2) - n3_parents = parents(graph, n3) + n1_children = children(n1) + n2_parents = parents(n2) + n3_parents = parents(n3) if length(n2_parents) > 1 error("[Node Fusion] The given data node has more than one parent") end + required_edge1 = make_edge(n1, n2) + required_edge2 = make_edge(n2, n3) + # remove the edges and nodes that will be replaced by the fused node remove_edge(graph, required_edge1) remove_edge(graph, required_edge2) @@ -27,7 +28,7 @@ function node_fusion(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::Comp remove_node(graph, n2) # get n3's children now so it automatically excludes n2 - n3_children = children(graph, n3) + n3_children = children(n3) remove_node(graph, n3) # create new node with the fused compute task @@ -65,10 +66,10 @@ function node_reduction(graph::DAG, n1::Node, n2::Node) end # save n2 parents and children - n2_children = children(graph, n2) - n2_parents = parents(graph, n2) + n2_children = children(n2) + n2_parents = parents(n2) - if n2_children != children(graph, n1) + if n2_children != children(n1) error("[Node Reduction] The given nodes do not have equal prerequisite nodes which is required for node reduction") end @@ -92,8 +93,8 @@ function node_split(graph::DAG, n1::Node) error("[Node Split] The given node is not part of the given graph") end - n1_parents = parents(graph, n1) - n1_children = children(graph, n1) + n1_parents = parents(n1) + n1_children = children(n1) if length(n1_parents) <= 1 error("[Node Split] The given node does not have multiple parents which is required for node split") @@ -117,32 +118,62 @@ end function generate_options(graph::DAG) options = (fusions = Vector{Tuple{ComputeTaskNode, DataTaskNode, ComputeTaskNode}}(), reductions = Vector{Vector{Node}}(), - splits = Vector{Tuple{Node}}()) + splits = Vector{Node}()) # find possible node fusions for node in graph.nodes if (typeof(node) <: DataTaskNode) - node_parents = parents(graph, node) + node_parents = parents(node) if length(node_parents) != 1 # data node can only have a single parent continue end - parent_node = node_parents[1] + parent_node = pop!(node_parents) - node_children = children(graph, node) + node_children = children(node) if length(node_children) != 1 # this node is an entry node or has multiple children which should not be possible continue end - child_node = node_children[1] + child_node = pop!(node_children) push!(options.fusions, (child_node, node, parent_node)) end end # find possible node reductions + visitedNodes = Set{Node}() + + for node in graph.nodes + if (node in visitedNodes) + continue + end + + push!(visitedNodes, node) + + reductionVector = missing + # possible reductions are with nodes that are partners, i.e. parents of children + for partner in partners(node) + if can_reduce(node, partner) + if reductionVector == missing + # only when there's at least one reduction partner, insert the vector + reductionVector = Vector{Node} + push!(reductionVector, node) + push!(options.reductions, reductionVector) + end + + push!(reductionVector, partner) + push!(visitedNodes, partner) + end + end + end # find possible node splits + for node in graph.nodes + if (can_split(node)) + push!(options.splits, node) + end + end return options end diff --git a/src/import.jl b/src/import.jl index 7bd3066..f927f24 100644 --- a/src/import.jl +++ b/src/import.jl @@ -1,5 +1,6 @@ -# functions for importing DAGs from a file +using Printf +# functions for importing DAGs from a file regex_a = r"^[A-C]\d+$" # Regex for the initial particles regex_c = r"^[A-C]\(([^']*),([^']*)\)$" # Regex for the combinations of 2 particles regex_m = r"^M\(([^']*),([^']*),([^']*)\)$" # Regex for the combinations of 3 particles @@ -22,10 +23,12 @@ end function importTxt(filename::String) file = open(filename, "r") + println("Opened file") nodes_string = readline(file) nodes = parse_nodes(nodes_string) close(file) + println("Read file") graph = DAG() @@ -36,7 +39,15 @@ function importTxt(filename::String) # remember the data out nodes for connection dataOutNodes = Dict() - for node in nodes + println("Building graph") + noNodes = 0 + nodesToRead = length(nodes) + while !isempty(nodes) + node = popfirst!(nodes) + noNodes += 1 + if (noNodes % 100 == 0) + @printf "\rReading Nodes... %.2f%%" (100. * noNodes / nodesToRead) + end if occursin(regex_a, node) # add nodes and edges for the state reading to u(P(Particle)) data_in = insert_node(graph, make_node(DataTask(4))) # read particle data node @@ -116,7 +127,7 @@ function importTxt(filename::String) insert_edge(graph, make_edge(data_out, sum_node)) elseif occursin(regex_plus, node) - println("Found sum node, end") + println("\rReading Nodes Complete ") else error("Unknown node '", node, "' while reading from file ", filename) end diff --git a/src/metagraph_optimization.jl b/src/metagraph_optimization.jl index 23ced8e..6713395 100644 --- a/src/metagraph_optimization.jl +++ b/src/metagraph_optimization.jl @@ -14,7 +14,7 @@ include("import.jl") export Node, Edge, ComputeTaskNode, DataTaskNode, DAG export AbstractTask, AbstractComputeTask, AbstractDataTask, DataTask, ComputeTaskP, ComputeTaskS1, ComputeTaskS2, ComputeTaskV, ComputeTaskU, ComputeTaskSum, FusedComputeTask -export make_node, make_edge, insert_node, insert_edge, is_entry_node, is_exit_node, parents, children, compute, data, compute_effort, compute_intensity, get_exit_node, is_valid +export make_node, make_edge, insert_node, insert_edge, is_entry_node, is_exit_node, parents, children, compute, graph_properties, get_exit_node, is_valid export node_fusion, node_reduction, node_split, generate_options export importTxt diff --git a/src/nodes.jl b/src/nodes.jl index e352fc0..1cb389f 100644 --- a/src/nodes.jl +++ b/src/nodes.jl @@ -7,22 +7,27 @@ abstract type Node end struct DataTaskNode <: Node task::AbstractDataTask + parents::Set{Node} + children::Set{Node} id::Base.UUID # need a unique identifier unique to every *constructed* node end struct ComputeTaskNode <: Node task::AbstractComputeTask + parents::Set{Node} + children::Set{Node} id::Base.UUID # need a unique identifier unique to every *constructed* node end function DataTaskNode(t::AbstractDataTask) - return DataTaskNode(t, UUIDs.uuid1(rng)) + return DataTaskNode(t, Set{Node}(), Set{Node}(), UUIDs.uuid1(rng)) end function ComputeTaskNode(t::AbstractComputeTask) - return ComputeTaskNode(t, UUIDs.uuid1(rng)) + return ComputeTaskNode(t, Set{Node}(), Set{Node}(), UUIDs.uuid1(rng)) end struct Edge + # edge points from child to parent edge::Union{Tuple{Ref{DataTaskNode}, Ref{ComputeTaskNode}}, Tuple{Ref{ComputeTaskNode}, Ref{DataTaskNode}}} end