Various optimizations, Readme update, Bench script for imports

2023-06-22 14:36:44 +02:00
parent f71748838f
commit 1ac619f3a0
9 changed files with 155 additions and 26 deletions
--- a/Project.toml
+++ b/Project.toml
@@ -1,9 +1,10 @@
 name = "metagraph_optimization"
 uuid = "3e869610-d48d-4942-ba70-c1b702a33ca4"
-authors = ["Anton Reinhard <anton.reinhard@wandelbots.com>"]
+authors = ["Anton Reinhard <anton.reinhard@proton.me>"]
 version = "0.1.0"

 [deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
--- a/README.md
+++ b/README.md
@@ -19,3 +19,75 @@ Problems:
 - Reduce memory footprint of the graph, are the UUIDs too large? 
 - Memory layout of Nodes? They should lie linearly in memory, right now probably on heap?
 - Add scaling functions
+
+## Benchmarks of graphs
+
+For graphs AB->AB^n:
+- Number of Sums should always be 1
+- Number of ComputeTaskS2 should always be (n+1)!
+- Number of ComputeTaskU should always be (n+3)
+
+Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz
+
+```
+$ julia --project examples/import_bench.jl
+AB->AB:
+Graph:
+  Nodes: Total: 34, DataTask: 19, ComputeTaskP: 4, ComputeTaskS2: 2, ComputeTaskV: 4, ComputeTaskU: 4, ComputeTaskSum: 1
+  Edges: 37
+  Total Compute Effort: 185
+  Total Data Transfer: 102
+  Total Compute Intensity: 1.8137254901960784
+  Graph size in memory: 8.3594 KiB
+  11.362 μs (522 allocations: 29.70 KiB)
+
+AB->ABBB:
+Graph:
+  Nodes: Total: 280, DataTask: 143, ComputeTaskP: 6, ComputeTaskS2: 24, ComputeTaskV: 64, ComputeTaskU: 6, ComputeTaskSum: 1, ComputeTaskS1: 36
+  Edges: 385
+  Total Compute Effort: 2007
+  Total Data Transfer: 828
+  Total Compute Intensity: 2.4239130434782608
+  Graph size in memory: 88.2188 KiB
+  95.234 μs (4781 allocations: 270.82 KiB)
+
+AB->ABBBBB:
+Graph:
+  Nodes: Total: 7854, DataTask: 3931, ComputeTaskP: 8, ComputeTaskS2: 720, ComputeTaskV: 1956, ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 1230
+  Edges: 11241
+  Total Compute Effort: 58789
+  Total Data Transfer: 23244
+  Total Compute Intensity: 2.5292118396145242
+  Graph size in memory: 2.0988 MiB
+  2.810 ms (136432 allocations: 7.57 MiB)
+
+AB->ABBBBBBB:
+Graph:
+  Nodes: Total: 438436, DataTask: 219223, ComputeTaskP: 10, ComputeTaskS2: 40320, ComputeTaskV: 109600, ComputeTaskU: 10, ComputeTaskSum: 1, ComputeTaskS1: 69272
+  Edges: 628665
+  Total Compute Effort: 3288131
+  Total Data Transfer: 1297700
+  Total Compute Intensity: 2.53381444093396
+  Graph size in memory: 118.4037 MiB
+  463.082 ms (7645256 allocations: 422.57 MiB)
+
+ABAB->ABAB:
+Graph:
+  Nodes: Total: 3218, DataTask: 1613, ComputeTaskP: 8, ComputeTaskS2: 288, ComputeTaskV: 796, ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 504
+  Edges: 4581
+  Total Compute Effort: 24009
+  Total Data Transfer: 9494
+  Total Compute Intensity: 2.528860332841795
+  Graph size in memory: 891.375 KiB
+  1.155 ms (55467 allocations: 3.09 MiB)
+
+ABAB->ABC:
+Graph:
+  Nodes: Total: 817, DataTask: 412, ComputeTaskP: 7, ComputeTaskS2: 72, ComputeTaskV: 198, ComputeTaskU: 7, ComputeTaskSum: 1, ComputeTaskS1: 120
+  Edges: 1151
+  Total Compute Effort: 6028
+  Total Data Transfer: 2411
+  Total Compute Intensity: 2.5002073828287017
+  Graph size in memory: 225.0625 KiB
+  286.583 μs (13996 allocations: 804.48 KiB)
+```
--- a/examples/import_bench.jl
+++ b/examples/import_bench.jl
@@ -0,0 +1,28 @@
+using metagraph_optimization
+using BenchmarkTools
+using Base
+using Base.Filesystem
+
+function bench_txt(filepath::String)
+    name = basename(filepath)
+    name, _ = splitext(name)
+
+    println(name, ":")
+    g = import_txt(filepath)
+    print(g)
+    println("  Graph size in memory: ", bytes_to_human_readable(Base.summarysize(g)))
+    @btime import_txt($filepath)
+    println()
+end
+
+function main()
+    bench_txt("examples/AB->AB.txt")
+    bench_txt("examples/AB->ABBB.txt")
+    bench_txt("examples/AB->ABBBBB.txt")
+    bench_txt("examples/AB->ABBBBBBB.txt")
+    #bench_txt("examples/AB->ABBBBBBBBB.txt")
+    bench_txt("examples/ABAB->ABAB.txt")
+    bench_txt("examples/ABAB->ABC.txt")
+end
+
+main()
--- a/src/graph_functions.jl
+++ b/src/graph_functions.jl
@@ -70,8 +70,8 @@ end

 function insert_edge(graph::DAG, edge::Edge)
   # edge points from child to parent
-   push!(edge.edge[1][].parents, edge.edge[2][])
-   push!(edge.edge[2][].children, edge.edge[1][])
+   push!(edge.edge[1].parents, edge.edge[2])
+   push!(edge.edge[2].children, edge.edge[1])

   return edge
 end
@@ -82,8 +82,8 @@ function remove_node(graph::DAG, node::Node)
 end

 function remove_edge(graph::DAG, edge::Edge)
-   delete!(edge.edge[1][].parents, edge.edge[2][])
-   delete!(edge.edge[2][].children, edge.edge[1][])
+   filter!(x -> x != edge.edge[2], edge.edge[1].parents)
+   filter!(x -> x != edge.edge[1], edge.edge[2].children)
   return nothing
 end

@@ -175,6 +175,7 @@ function show(io::IO, graph::DAG)
   if length(graph.nodes) <= 20
      show_nodes(io, graph)
   else
+      print("Total: ", length(graph.nodes), ", ")
      first = true
      for (type, number) in zip(keys(nodeDict), values(nodeDict))
         if first
--- a/src/import.jl
+++ b/src/import.jl
@@ -20,17 +20,24 @@ function parse_edges(input::AbstractString)
    return output
 end

-function importTxt(filename::String)
+function import_txt(filename::String, verbose::Bool = isinteractive())
    file = open(filename, "r")

-    println("Opened file")
+    if (verbose) println("Opened file") end
    nodes_string = readline(file)
    nodes = parse_nodes(nodes_string)

    close(file)
-    println("Read file")
+    if (verbose) println("Read file") end

    graph = DAG()
+    
+    # estimate total number of nodes
+    # try to slightly overestimate so no resizing is necessary
+    # data nodes are not included in length(nodes) and there are a few more than compute nodes
+    estimate_no_nodes = round(Int, length(nodes) * 4)
+    if (verbose) println("Estimating ", estimate_no_nodes, " Nodes") end
+    sizehint!(graph.nodes, estimate_no_nodes)

    sum_node = insert_node(graph, make_node(ComputeTaskSum()))
    global_data_out = insert_node(graph, make_node(DataTask(10)))
@@ -39,14 +46,14 @@ function importTxt(filename::String)
    # remember the data out nodes for connection
    dataOutNodes = Dict()

-    println("Building graph")
+    if (verbose) println("Building graph") end
    noNodes = 0
    nodesToRead = length(nodes)
    while !isempty(nodes)
        node = popfirst!(nodes)
        noNodes += 1
        if (noNodes % 100 == 0)
-            @printf "\rReading Nodes... %.2f%%" (100. * noNodes / nodesToRead)
+            if (verbose) @printf "\rReading Nodes... %.2f%%" (100. * noNodes / nodesToRead) end
        end
        if occursin(regex_a, node)
            # add nodes and edges for the state reading to u(P(Particle))
@@ -127,7 +134,10 @@ function importTxt(filename::String)

            insert_edge(graph, make_edge(data_out, sum_node))
        elseif occursin(regex_plus, node)
-            println("\rReading Nodes Complete    ")
+            if (verbose)
+                println("\rReading Nodes Complete    ") 
+                println("Added ", length(graph.nodes), " nodes")
+            end
        else
            error("Unknown node '", node, "' while reading from file ", filename)
        end
--- a/src/metagraph_optimization.jl
+++ b/src/metagraph_optimization.jl
@@ -11,13 +11,16 @@ include("node_functions.jl")
 include("graph_functions.jl")
 include("graph_operations.jl")
 include("import.jl")
+include("utility.jl")

 export Node, Edge, ComputeTaskNode, DataTaskNode, DAG
 export AbstractTask, AbstractComputeTask, AbstractDataTask, DataTask, ComputeTaskP, ComputeTaskS1, ComputeTaskS2, ComputeTaskV, ComputeTaskU, ComputeTaskSum, FusedComputeTask
 export make_node, make_edge, insert_node, insert_edge, is_entry_node, is_exit_node, parents, children, compute, graph_properties, get_exit_node, is_valid
 export node_fusion, node_reduction, node_split, generate_options
-export importTxt
+export import_txt

 export ==, in, show

+export bytes_to_human_readable
+
 end # module metagraph_optimization
--- a/src/node_functions.jl
+++ b/src/node_functions.jl
@@ -15,11 +15,11 @@ function make_edge(n1::Node, n2::Node)
 end

 function make_edge(n1::ComputeTaskNode, n2::DataTaskNode)
-    return Edge((Ref(n1), Ref(n2)))
+    return Edge((n1, n2))
 end

 function make_edge(n1::DataTaskNode, n2::ComputeTaskNode)
-    return Edge((Ref(n1), Ref(n2)))
+    return Edge((n1, n2))
 end

 function show(io::IO, n::Node)
@@ -27,9 +27,9 @@ function show(io::IO, n::Node)
 end

 function show(io::IO, e::Edge)
-    print(io, "Edge(", e.edge[1][], ", ", e.edge[2][], ")")
+    print(io, "Edge(", e.edge[1], ", ", e.edge[2], ")")
 end

 function ==(e1::Edge, e2::Edge)
-    return e1.edge[1][] == e2.edge[1][] && e1.edge[2][] == e2.edge[2][]
+    return e1.edge[1] == e2.edge[1] && e1.edge[2] == e2.edge[2]
 end
--- a/src/nodes.jl
+++ b/src/nodes.jl
@@ -1,33 +1,38 @@
 using Random
 using UUIDs

-rng = Random.MersenneTwister(0);
+rng = Random.MersenneTwister(0)

 abstract type Node end

 struct DataTaskNode <: Node
   task::AbstractDataTask
-   parents::Set{Node}
-   children::Set{Node}
-   id::Base.UUID  # need a unique identifier unique to every *constructed* node
+   
+   # use vectors as sets have way too much memory overhead
+   parents::Vector{Node}
+   children::Vector{Node}
+
+   # need a unique identifier unique to every *constructed* node
+   id::Base.UUID
 end

+# same as DataTaskNode
 struct ComputeTaskNode <: Node
   task::AbstractComputeTask
-   parents::Set{Node}
-   children::Set{Node}
-   id::Base.UUID  # need a unique identifier unique to every *constructed* node
+   parents::Vector{Node}
+   children::Vector{Node}
+   id::Base.UUID
 end

 function DataTaskNode(t::AbstractDataTask)
-   return DataTaskNode(t, Set{Node}(), Set{Node}(), UUIDs.uuid1(rng))
+   return DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng))
 end

 function ComputeTaskNode(t::AbstractComputeTask)
-   return ComputeTaskNode(t, Set{Node}(), Set{Node}(), UUIDs.uuid1(rng))
+   return ComputeTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng))
 end

 struct Edge
   # edge points from child to parent
-   edge::Union{Tuple{Ref{DataTaskNode}, Ref{ComputeTaskNode}}, Tuple{Ref{ComputeTaskNode}, Ref{DataTaskNode}}}
+   edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}}
 end
--- a/src/utility.jl
+++ b/src/utility.jl
@@ -0,0 +1,9 @@
+function bytes_to_human_readable(bytes)
+    units = ["B", "KiB", "MiB", "GiB", "TiB"]
+    unit_index = 1
+    while bytes >= 1024 && unit_index < length(units)
+        bytes /= 1024
+        unit_index += 1
+    end
+    return string(round(bytes, digits=4), " ", units[unit_index])
+end