Add more estimator tests, add specialized implementations of operation_effect estimator

2023-11-16 04:28:58 +01:00
parent a97a878881
commit 241fd227c1
7 changed files with 139 additions and 16 deletions
--- a/src/MetagraphOptimization.jl
+++ b/src/MetagraphOptimization.jl
@ -29,6 +29,8 @@ export is_exit_node
 export parents
 export children
 export compute
+export data
+export compute_effort
 export get_properties
 export get_exit_node
 export is_valid, is_scheduled
@ -63,7 +65,7 @@ export gen_process_input
 export get_compute_function

 # estimator
-export cost_type, graph_cost
+export cost_type, graph_cost, operation_effect
 export GlobalMetricEstimator, CDCost

 # machine info
--- a/src/estimator/global_metric.jl
+++ b/src/estimator/global_metric.jl
@ -8,9 +8,27 @@ Representation of a [`DAG`](@ref)'s cost as estimated by the [`GlobalMetricEstim
 `.data`: The total data transfer.\\
 `.computeEffort`: The total compute effort.\\
 `.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`.
+
+
+!!! note
+    Note that the `computeIntensity` doesn't necessarily make sense in the context of only operation costs. 
+    For example, for node fusions this will always be 0, since the computeEffort is zero. 
+    It will still work as intended when adding/subtracting to/from a `graph_cost` estimate.
 """
 const CDCost = NamedTuple{(:data, :computeEffort, :computeIntensity), Tuple{Float64, Float64, Float64}}

+function +(cost1::CDCost, cost2::CDCost)::CDCost
+    d = cost1.data + cost2.data
+    ce = computeEffort = cost1.computeEffort + cost2.computeEffort
+    return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
+end
+
+function -(cost1::CDCost, cost2::CDCost)::CDCost
+    d = cost1.data - cost2.data
+    ce = computeEffort = cost1.computeEffort - cost2.computeEffort
+    return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
+end
+
 struct GlobalMetricEstimator <: AbstractEstimator end

 function cost_type(estimator::GlobalMetricEstimator)
@ -25,3 +43,23 @@ function graph_cost(estimator::GlobalMetricEstimator, graph::DAG)
        computeIntensity = properties.computeIntensity,
    )::CDCost
 end
+
+function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeFusion)
+    return (data = -data(operation.input[2].task), computeEffort = 0.0, computeIntensity = 0.0)::CDCost
+end
+
+function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeReduction)
+    s = length(operation.input) - 1
+    return (
+        data = s * -data(operation.input[1].task),
+        computeEffort = s * -compute_effort(operation.input[1].task),
+        computeIntensity = typeof(operation.input) <: DataTaskNode ? 0.0 : Inf,
+    )::CDCost
+end
+
+function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeSplit)
+    s = length(operation.input.parents) - 1
+    d = s * data(operation.input.task)
+    ce = s * compute_effort(operation.input.task)
+    return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
+end
--- a/src/estimator/interface.jl
+++ b/src/estimator/interface.jl
@ -35,6 +35,7 @@ Get the estimated effect on the cost of the graph, such that `graph_cost(estimat
    It can be much faster to overload this function for a specific estimator and directly compute the effects from the operation if possible.
 """
 function operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
+    # This is currently not stably working, see issue #16
    cost = graph_cost(estimator, graph)
    push_operation!(graph, operation)
    cost_after = graph_cost(estimator, graph)
--- a/src/models/abc/properties.jl
+++ b/src/models/abc/properties.jl
@ -3,35 +3,35 @@

 Return the compute effort of an S1 task.
 """
-compute_effort(t::ComputeTaskS1) = 11
+compute_effort(t::ComputeTaskS1) = 11.0

 """
    compute_effort(t::ComputeTaskS2)

 Return the compute effort of an S2 task.
 """
-compute_effort(t::ComputeTaskS2) = 12
+compute_effort(t::ComputeTaskS2) = 12.0

 """
    compute_effort(t::ComputeTaskU)

 Return the compute effort of a U task.
 """
-compute_effort(t::ComputeTaskU) = 1
+compute_effort(t::ComputeTaskU) = 1.0

 """
    compute_effort(t::ComputeTaskV)

 Return the compute effort of a V task.
 """
-compute_effort(t::ComputeTaskV) = 6
+compute_effort(t::ComputeTaskV) = 6.0

 """
    compute_effort(t::ComputeTaskP)

 Return the compute effort of a P task.
 """
-compute_effort(t::ComputeTaskP) = 0
+compute_effort(t::ComputeTaskP) = 0.0

 """
    compute_effort(t::ComputeTaskSum)
@ -41,7 +41,7 @@ Return the compute effort of a Sum task.
 Note: This is a constant compute effort, even though sum scales with the number of its inputs. Since there is only ever a single sum node in a graph generated from the ABC-Model,
 this doesn't matter.
 """
-compute_effort(t::ComputeTaskSum) = 1
+compute_effort(t::ComputeTaskSum) = 1.0

 """
    show(io::IO, t::DataTask)
--- a/src/models/abc/types.jl
+++ b/src/models/abc/types.jl
@ -4,7 +4,7 @@
 Task representing a specific data transfer in the ABC Model.
 """
 struct DataTask <: AbstractDataTask
-    data::UInt64
+    data::Float64
 end

 """
--- a/src/task/properties.jl
+++ b/src/task/properties.jl
@ -49,7 +49,7 @@ end

 Return the compute effort of a data task, always zero, regardless of the specific task.
 """
-compute_effort(t::AbstractDataTask) = 0
+compute_effort(t::AbstractDataTask) = 0.0

 """
    data(t::AbstractDataTask)
@ -63,7 +63,7 @@ data(t::AbstractDataTask) = getfield(t, :data)

 Return the data of a compute task, always zero, regardless of the specific task.
 """
-data(t::AbstractComputeTask) = 0
+data(t::AbstractComputeTask) = 0.0

 """
    compute_effort(t::FusedComputeTask)
--- a/test/unit_tests_estimator.jl
+++ b/test/unit_tests_estimator.jl
@ -1,17 +1,99 @@
+function test_op_specific(estimator, graph, nf::NodeFusion)
+    estimate = operation_effect(estimator, graph, nf)
+    data_reduce = data(nf.input[2].task)
+
+    @test isapprox(estimate.data, -data_reduce)
+    @test isapprox(estimate.computeEffort, 0; atol = eps(Float64))
+    @test isapprox(estimate.computeIntensity, 0; atol = eps(Float64))
+
+    return nothing
+end
+
+function test_op_specific(estimator, graph, nr::NodeReduction)
+    estimate = operation_effect(estimator, graph, nr)
+
+    data_reduce = data(nr.input[1].task) * (length(nr.input) - 1)
+    compute_effort_reduce = compute_effort(nr.input[1].task) * (length(nr.input) - 1)
+
+    @test isapprox(estimate.data, -data_reduce; atol = eps(Float64))
+    @test isapprox(estimate.computeEffort, -compute_effort_reduce)
+    @test isapprox(estimate.computeIntensity, compute_effort_reduce / data_reduce)
+
+    return nothing
+end
+
+function test_op_specific(estimator, graph, ns::NodeSplit)
+    estimate = operation_effect(estimator, graph, ns)
+
+    copies = length(ns.input.parents) - 1
+
+    data_increase = data(ns.input.task) * copies
+    compute_effort_increase = compute_effort(ns.input.task) * copies
+
+    @test isapprox(estimate.data, data_increase; atol = eps(Float64))
+    @test isapprox(estimate.computeEffort, compute_effort_increase)
+    @test isapprox(estimate.computeIntensity, compute_effort_increase / data_increase)
+
+    return nothing
+end
+
+function test_op(estimator, graph, op)
+    #=
+    See issue #16
+
+    estimate_before = graph_cost(estimator, graph)
+
+    estimate = operation_effect(estimator, graph, op)
+
+    push_operation!(graph, op)
+    estimate_after_apply = graph_cost(estimator, graph)
+    reset_graph!(graph)
+
+    @test isapprox((estimate_before + estimate).data, estimate_after_apply.data)
+    @test isapprox((estimate_before + estimate).computeEffort, estimate_after_apply.computeEffort)
+    @test isapprox((estimate_before + estimate).computeIntensity, estimate_after_apply.computeIntensity)
+    =#
+
+    test_op_specific(estimator, graph, op)
+    return nothing
+end

@testset "Unit Tests Estimator" begin
-    @testset "Global Metric Estimator" begin
+    @testset "Global Metric Estimator" for (graph_string, exp_data, exp_computeEffort) in
+                                           zip(["AB->AB", "AB->ABBB"], [976, 10944], [53, 1075])
        estimator = GlobalMetricEstimator()

        @test cost_type(estimator) == CDCost

-        graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
+        graph = parse_dag(joinpath(@__DIR__, "..", "input", "$(graph_string).txt"), ABCModel())

-        estimate = graph_cost(estimator, graph)
+        @testset "Graph Cost" begin
+            estimate = graph_cost(estimator, graph)

-        @test estimate.data == 976
-        @test estimate.computeEffort == 53
-        @test isapprox(estimate.computeIntensity, 0.05430327868852459)
+            @test estimate.data == exp_data
+            @test estimate.computeEffort == exp_computeEffort
+            @test isapprox(estimate.computeIntensity, exp_computeEffort / exp_data)
+        end
+
+        @testset "Operation Cost" begin
+            ops = get_operations(graph)
+            nfs = copy(ops.nodeFusions)
+            nrs = copy(ops.nodeReductions)
+            nss = copy(ops.nodeSplits)
+
+            println(
+                "Testing $(length(ops.nodeFusions))xNF, $(length(ops.nodeReductions))xNR, $(length(ops.nodeSplits))xNS",
+            )
+            for nf in nfs
+                test_op(estimator, graph, nf)
+            end
+            for nr in nrs
+                test_op(estimator, graph, nr)
+            end
+            for ns in nss
+                test_op(estimator, graph, ns)
+            end
+        end
    end
 end
 println("Estimator Unit Tests Complete!")