Add utility and trie documentation

This commit is contained in:
Anton Reinhard 2023-08-28 18:14:21 +02:00
parent 42076c4576
commit 8a6e3864eb
2 changed files with 69 additions and 8 deletions

View File

@ -1,26 +1,49 @@
"""
NodeIdTrie
# helper struct for NodeTrie
Helper struct for [`NodeTrie`](@ref). After the Trie's first level, every Trie level contains the vector of nodes that had children up to that level, and the TrieNode's children by UUID of the node's children.
"""
mutable struct NodeIdTrie
value::Vector{Node}
children::Dict{UUID, NodeIdTrie}
end
# Trie data structure for node reduction, inserts nodes by children
# Assumes that given nodes have ordered vectors of children (see sort_node)
# First level is the task type and thus does not have a value
# Should be constructed with all Types that will be used
"""
NodeTrie
Trie data structure for node reduction, inserts nodes by children.
Assumes that given nodes have ordered vectors of children (see [`sort_node`](@ref)).
First insertion level is the node's own task type and thus does not have a value (every node has a task type).
See also: [`insert!`](@ref) and [`collect`](@ref)
"""
mutable struct NodeTrie
children::Dict{DataType, NodeIdTrie}
end
"""
NodeTrie()
Constructor for an empty [`NodeTrie`](@ref).
"""
function NodeTrie()
return NodeTrie(Dict{DataType, NodeIdTrie}())
end
"""
NodeIdTrie()
Constructor for an empty [`NodeIdTrie`](@ref).
"""
function NodeIdTrie()
return NodeIdTrie(Vector{Node}(), Dict{UUID, NodeIdTrie}())
end
"""
insert_helper!(trie::NodeIdTrie, node::Node, depth::Int)
Insert the given node into the trie. The depth is used to iterate through the trie layers, while the function calls itself recursively until it ran through all children of the node.
"""
function insert_helper!(trie::NodeIdTrie, node::Node, depth::Int)
if (length(node.children) == depth)
push!(trie.value, node)
@ -36,6 +59,11 @@ function insert_helper!(trie::NodeIdTrie, node::Node, depth::Int)
return insert_helper!(trie.children[id], node, depth)
end
"""
insert!(trie::NodeTrie, node::Node)
Insert the given node into the trie. It's sorted by its type in the first layer, then by its children in the following layers.
"""
function insert!(trie::NodeTrie, node::Node)
t = typeof(node.task)
if (!haskey(trie.children, t))
@ -44,6 +72,11 @@ function insert!(trie::NodeTrie, node::Node)
return insert_helper!(trie.children[typeof(node.task)], node, 0)
end
"""
collect_helper(trie::NodeIdTrie, acc::Set{Vector{Node}})
Collects the Vectors of this [`NodeIdTrie`](@ref) node and all its children and puts them in the `acc` argument.
"""
function collect_helper(trie::NodeIdTrie, acc::Set{Vector{Node}})
if (length(trie.value) >= 2)
push!(acc, trie.value)
@ -55,7 +88,11 @@ function collect_helper(trie::NodeIdTrie, acc::Set{Vector{Node}})
return nothing
end
# returns all sets of multiple nodes that have accumulated in leaves
"""
collect(trie::NodeTrie)
Return all sets of at least 2 [`Node`](@ref)s that have accumulated in leaves of the trie.
"""
function collect(trie::NodeTrie)
acc = Set{Vector{Node}}()
for (t, child) in trie.children

View File

@ -20,15 +20,31 @@ function bytes_to_human_readable(bytes)
return string(round(bytes, sigdigits = 4), " ", units[unit_index])
end
"""
lt_nodes(n1::Node, n2::Node)
Less-Than comparison between nodes. Uses the nodes' ids to sort.
"""
function lt_nodes(n1::Node, n2::Node)
return n1.id < n2.id
end
"""
sort_node!(node::Node)
Sort the nodes' parents and children vectors. The vectors are mostly very short so sorting does not take a lot of time.
Sorted nodes are required to make the finding of [`NodeReduction`](@ref)s a lot faster using the [`Trie`](@ref) data structure.
"""
function sort_node!(node::Node)
sort!(node.children, lt = lt_nodes)
return sort!(node.parents, lt = lt_nodes)
end
"""
mem(graph::DAG)
Return the memory footprint of the graph in Byte. Should be the same result as `Base.summarysize(graph)` but a lot faster.
"""
function mem(graph::DAG)
size = 0
size += Base.summarysize(graph.nodes, exclude = Union{Node})
@ -54,12 +70,20 @@ function mem(graph::DAG)
return size += sizeof(diff)
end
# calculate the size of this operation in Byte
"""
mem(op::Operation)
Return the memory footprint of the operation in Byte. Used in [`mem(graph::DAG)`](@ref). Unlike `Base.summarysize()` this doesn't follow all references which would yield (almost) the size of the entire graph.
"""
function mem(op::Operation)
return Base.summarysize(op, exclude = Union{Node})
end
# calculate the size of this node in Byte
"""
mem(op::Operation)
Return the memory footprint of the node in Byte. Used in [`mem(graph::DAG)`](@ref). Unlike `Base.summarysize()` this doesn't follow all references which would yield (almost) the size of the entire graph.
"""
function mem(node::Node)
return Base.summarysize(node, exclude = Union{Node, Operation})
end