Add scheduling, machine info, caching strategies and devices (#9)
Some checks failed
MetagraphOptimization_CI / prepare (push) Has been cancelled
MetagraphOptimization_CI / test (push) Has been cancelled
MetagraphOptimization_CI / docs (push) Has been cancelled

Reviewed-on: Rubydragon/MetagraphOptimization.jl#9
Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Co-committed-by: Anton Reinhard <anton.reinhard@proton.me>
This commit is contained in:
2023-10-12 17:51:03 +02:00
committed by Anton Reinhard
parent bd6c54c1ae
commit 5a30f57e1f
72 changed files with 3397 additions and 987 deletions

View File

@ -29,7 +29,7 @@ export children
export compute
export get_properties
export get_exit_node
export is_valid
export is_valid, is_scheduled
export Operation
export AppliedOperation
@ -42,7 +42,6 @@ export can_pop
export reset_graph!
export get_operations
export parse_abc
export ComputeTaskP
export ComputeTaskS1
export ComputeTaskS2
@ -51,9 +50,15 @@ export ComputeTaskU
export ComputeTaskSum
export execute
export gen_particles
export parse_dag, parse_process
export gen_process_input
export get_compute_function
export ParticleValue
export Particle
export ParticleA, ParticleB, ParticleC
export ABCProcessDescription, ABCProcessInput, ABCModel
export Machine
export get_machine_info
export ==, in, show, isempty, delete!, length
@ -72,6 +77,7 @@ import Base.insert!
import Base.collect
include("devices/interface.jl")
include("task/type.jl")
include("node/type.jl")
include("diff/type.jl")
@ -111,15 +117,34 @@ include("properties/utility.jl")
include("task/create.jl")
include("task/compare.jl")
include("task/compute.jl")
include("task/print.jl")
include("task/properties.jl")
include("models/interface.jl")
include("models/print.jl")
include("models/abc/types.jl")
include("models/abc/particle.jl")
include("models/abc/compute.jl")
include("models/abc/create.jl")
include("models/abc/properties.jl")
include("models/abc/parse.jl")
include("models/abc/print.jl")
include("devices/measure.jl")
include("devices/detect.jl")
include("devices/impl.jl")
include("devices/numa/impl.jl")
include("devices/cuda/impl.jl")
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
# include("devices/rocm/impl.jl")
# oneapi seems also broken for now
# include("devices/oneapi/impl.jl")
include("scheduler/interface.jl")
include("scheduler/greedy.jl")
include("code_gen/main.jl")

View File

@ -1,126 +1,157 @@
using DataStructures
"""
gen_code(graph::DAG)
Generate the code for a given graph. The return value is a tuple of:
Generate the code for a given graph. The return value is a named tuple of:
- `code::Expr`: The julia expression containing the code for the whole graph.
- `inputSymbols::Dict{String, Symbol}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
- `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
- `outputSymbol::Symbol`: The symbol of the final calculated value
See also: [`execute`](@ref)
"""
function gen_code(graph::DAG)
code = Vector{Expr}()
sizehint!(code, length(graph.nodes))
function gen_code(graph::DAG, machine::Machine)
sched = schedule_dag(GreedyScheduler(), graph, machine)
nodeQueue = PriorityQueue{Node, Int}()
inputSyms = Dict{String, Symbol}()
codeAcc = Vector{Expr}()
sizehint!(codeAcc, length(graph.nodes))
# use a priority equal to the number of unseen children -> 0 are nodes that can be added
for node in sched
# TODO: this is kind of ugly, should init nodes be scheduled differently from the rest?
if (node isa DataTaskNode && length(node.children) == 0)
push!(codeAcc, get_init_expression(node, entry_device(machine)))
continue
end
push!(codeAcc, get_expression(node))
end
# get inSymbols
inputSyms = Dict{String, Vector{Symbol}}()
for node in get_entry_nodes(graph)
enqueue!(nodeQueue, node => 0)
push!(inputSyms, node.name => Symbol("data_$(to_var_name(node.id))_in"))
if !haskey(inputSyms, node.name)
inputSyms[node.name] = Vector{Symbol}()
end
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
end
node = nothing
while !isempty(nodeQueue)
@assert peek(nodeQueue)[2] == 0
node = dequeue!(nodeQueue)
# get outSymbol
outSym = Symbol(to_var_name(get_exit_node(graph).id))
push!(code, get_expression(node))
for parent in node.parents
# reduce the priority of all parents by one
if (!haskey(nodeQueue, parent))
enqueue!(nodeQueue, parent => length(parent.children) - 1)
else
nodeQueue[parent] = nodeQueue[parent] - 1
end
return (code = Expr(:block, codeAcc...), inputSymbols = inputSyms, outputSymbol = outSym)
end
function gen_cache_init_code(machine::Machine)
initializeCaches = Vector{Expr}()
for device in machine.devices
push!(initializeCaches, gen_cache_init_code(device))
end
return Expr(:block, initializeCaches...)
end
function gen_input_assignment_code(
inputSymbols::Dict{String, Vector{Symbol}},
processDescription::AbstractProcessDescription,
machine::Machine,
processInputSymbol::Symbol = :input,
)
@assert length(inputSymbols) >=
sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description"
assignInputs = Vector{Expr}()
for (name, symbols) in inputSymbols
type = type_from_name(name)
index = parse(Int, name[2:end])
p = nothing
if (index > in_particles(processDescription)[type])
index -= in_particles(processDescription)[type]
@assert index <= out_particles(processDescription)[type] "Too few particles of type $type in input particles for this process"
p = "filter(x -> typeof(x) <: $type, out_particles($(processInputSymbol)))[$(index)]"
else
p = "filter(x -> typeof(x) <: $type, in_particles($(processInputSymbol)))[$(index)]"
end
for symbol in symbols
# TODO: how to get the "default" cpu device?
device = entry_device(machine)
evalExpr = eval(gen_access_expr(device, symbol))
push!(assignInputs, Meta.parse("$(evalExpr) = ParticleValue($p, 1.0)"))
end
end
# node is now the last node we looked at -> the output node
outSym = Symbol("data_$(to_var_name(node.id))")
return Expr(:block, assignInputs...)
end
return (
code = Expr(:block, code...),
inputSymbols = inputSyms,
outputSymbol = outSym,
"""
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
"""
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
initCaches = gen_cache_init_code(machine)
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
functionId = to_var_name(UUIDs.uuid1(rng[1]))
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
expr = Meta.parse(
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
)
func = eval(expr)
return func
end
"""
execute(generated_code, input::Dict{ParticleType, Vector{Particle}})
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles.
Execute the code of the given `graph` on the given input particles.
This is essentially shorthand for
```julia
compute_graph = get_compute_function(graph, process)
result = compute_graph(particles)
```
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
"""
function execute(generated_code, input::Dict{ParticleType, Vector{Particle}})
(code, inputSymbols, outputSymbol) = generated_code
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
assignInputs = Vector{Expr}()
for (name, symbol) in inputSymbols
type = nothing
if startswith(name, "A")
type = A
elseif startswith(name, "B")
type = B
else
type = C
initCaches = gen_cache_init_code(machine)
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
functionId = to_var_name(UUIDs.uuid1(rng[1]))
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
expr = Meta.parse(
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
)
func = eval(expr)
result = 0
try
result = @eval $func($input)
catch e
println("Error while evaluating: $e")
# if we find a uuid in the exception we can color it in so it's easier to spot
uuidRegex = r"[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}"
m = match(uuidRegex, string(e))
functionStr = string(expr)
if (isa(m, RegexMatch))
functionStr = replace(functionStr, m.match => "\033[31m$(m.match)\033[0m")
end
index = parse(Int, name[2:end])
push!(
assignInputs,
Meta.parse(
"$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)",
),
)
println("Function:\n$functionStr")
@assert false
end
assignInputs = Expr(:block, assignInputs...)
eval(assignInputs)
eval(code)
eval(Meta.parse("result = $outputSymbol"))
return result
end
"""
execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}})
Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles.
The input particles should be sorted correctly into the dictionary to their according [`ParticleType`](@ref)s.
See also: [`gen_particles`](@ref)
"""
function execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}})
(code, inputSymbols, outputSymbol) = gen_code(graph)
assignInputs = Vector{Expr}()
for (name, symbol) in inputSymbols
type = nothing
if startswith(name, "A")
type = A
elseif startswith(name, "B")
type = B
else
type = C
end
index = parse(Int, name[2:end])
push!(
assignInputs,
Meta.parse(
"$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)",
),
)
end
assignInputs = Expr(:block, assignInputs...)
eval(assignInputs)
eval(code)
eval(Meta.parse("result = $outputSymbol"))
return result
end

53
src/devices/cuda/impl.jl Normal file
View File

@ -0,0 +1,53 @@
using CUDA
"""
CUDAGPU <: AbstractGPU
Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct CUDAGPU <: AbstractGPU
device::Any # TODO: what's the cuda device type?
cacheStrategy::CacheStrategy
FLOPS::Float64
end
push!(DEVICE_TYPES, CUDAGPU)
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: CUDAGPU} = LocalVariables()
function measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
devices = Vector{AbstractDevice}()
if !CUDA.functional()
if verbose
println("CUDA is non-functional")
end
return devices
end
CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
for device in CUDADevices
push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1))
end
return devices
end

23
src/devices/detect.jl Normal file
View File

@ -0,0 +1,23 @@
"""
get_machine_info(verbose::Bool)
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
"""
function get_machine_info(; verbose::Bool = Base.is_interactive)
devices = Vector{AbstractDevice}()
for device in device_types()
devs = get_devices(device, verbose = verbose)
for dev in devs
push!(devices, dev)
end
end
noDevices = length(devices)
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
fill!(transferRates, -1)
return Machine(devices, transferRates)
end

52
src/devices/impl.jl Normal file
View File

@ -0,0 +1,52 @@
"""
device_types()
Return a vector of available and implemented device types.
See also: [`DEVICE_TYPES`](@ref)
"""
function device_types()
return DEVICE_TYPES
end
"""
entry_device(machine::Machine)
Return the "entry" device, i.e., the device that starts CPU threads and GPU kernels, and takes input values and returns the output value.
"""
function entry_device(machine::Machine)
return machine.devices[1]
end
"""
strategies(t::Type{T}) where {T <: AbstractDevice}
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstractDevice`](@ref).
The caching strategies are used in code generation.
"""
function strategies(t::Type{T}) where {T <: AbstractDevice}
if !haskey(CACHE_STRATEGIES, t)
error("Trying to get strategies for $T, but it has no strategies defined!")
end
return CACHE_STRATEGIES[t]
end
"""
cache_strategy(device::AbstractDevice)
Returns the cache strategy set for this device.
"""
function cache_strategy(device::AbstractDevice)
return device.cacheStrategy
end
"""
set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
Sets the device's cache strategy. After this call, [`cache_strategy`](@ref) should return `cacheStrategy` on the given device.
"""
function set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
device.cacheStrategy = cacheStrategy
return nothing
end

108
src/devices/interface.jl Normal file
View File

@ -0,0 +1,108 @@
"""
AbstractDevice
Abstract base type for every device, like GPUs, CPUs or any other compute devices.
Every implementation needs to implement various functions and needs a member `cacheStrategy`.
"""
abstract type AbstractDevice end
abstract type AbstractCPU <: AbstractDevice end
abstract type AbstractGPU <: AbstractDevice end
"""
Machine
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
See also: [`Scheduler`](@ref)
"""
struct Machine
devices::Vector{AbstractDevice}
transferRates::Matrix{Float64}
end
"""
CacheStrategy
Abstract base type for caching strategies.
See also: [`strategies`](@ref)
"""
abstract type CacheStrategy end
"""
LocalVariables <: CacheStrategy
A caching strategy relying solely on local variables for every input and output.
Implements the [`CacheStrategy`](@ref) interface.
"""
struct LocalVariables <: CacheStrategy end
"""
Dictionary <: CacheStrategy
A caching strategy relying on a dictionary of Symbols to store every input and output.
Implements the [`CacheStrategy`](@ref) interface.
"""
struct Dictionary <: CacheStrategy end
"""
DEVICE_TYPES::Vector{Type}
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
See also: [`device_types`](@ref), [`get_devices`](@ref)
"""
DEVICE_TYPES = Vector{Type}()
"""
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
See also: [`strategies`](@ref)
"""
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
"""
default_strategy(deviceType::Type{T}) where {T <: AbstractDevice}
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device type.
See also: [`cache_strategy`](@ref), [`set_cache_strategy`](@ref)
"""
function default_strategy end
"""
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
"""
function get_devices end
"""
measure_device!(device::AbstractDevice; verbose::Bool)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
"""
function measure_device! end
"""
gen_cache_init_code(device::AbstractDevice)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an `Expr` initializing this device's variable cache.
The strategy is a symbol
"""
function gen_cache_init_code end
"""
gen_access_expr(device::AbstractDevice, symbol::Symbol)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
Return an `Expr` or `QuoteNode` accessing the variable identified by [`symbol`].
"""
function gen_access_expr end

22
src/devices/measure.jl Normal file
View File

@ -0,0 +1,22 @@
"""
measure_devices(machine::Machine; verbose::Bool)
Measure FLOPS, RAM, cache sizes and what other properties can be extracted for the devices in the given machine.
"""
function measure_devices!(machine::Machine; verbose::Bool = Base.is_interactive())
for device in machine.devices
measure_device!(device; verbose = verbose)
end
return nothing
end
"""
measure_transfer_rates(machine::Machine; verbose::Bool)
Measure the transfer rates between devices in the machine.
"""
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
# TODO implement
return nothing
end

96
src/devices/numa/impl.jl Normal file
View File

@ -0,0 +1,96 @@
using NumaAllocators
"""
NumaNode <: AbstractCPU
Representation of a specific CPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct NumaNode <: AbstractCPU
numaId::UInt16
threads::UInt16
cacheStrategy::CacheStrategy
FLOPS::Float64
id::UUID
end
push!(DEVICE_TYPES, NumaNode)
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: NumaNode} = LocalVariables()
function measure_device!(device::NumaNode; verbose::Bool)
if verbose
println("Measuring Numa Node $(device.numaId)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
devices = Vector{AbstractDevice}()
noNumaNodes = highest_numa_node()
if (verbose)
println("Found $(noNumaNodes + 1) NUMA nodes")
end
for i in 0:noNumaNodes
push!(devices, NumaNode(i, 1, default_strategy(NumaNode), -1, UUIDs.uuid1(rng[1])))
end
return devices
end
"""
gen_cache_init_code(device::NumaNode)
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
"""
function gen_cache_init_code(device::NumaNode)
if typeof(device.cacheStrategy) <: LocalVariables
# don't need to initialize anything
return Expr(:block)
elseif typeof(device.cacheStrategy) <: Dictionary
return Meta.parse("cache_$(to_var_name(device.id)) = Dict{Symbol, Any}()")
# TODO: sizehint?
end
return error("Unimplemented cache strategy \"$(device.cacheStrategy)\" for device \"$(device)\"")
end
"""
gen_access_expr(device::NumaNode, symbol::Symbol)
Generate code to access the variable designated by `symbol` on a [`NumaNode`](@ref), using the [`CacheStrategy`](@ref) set in the device.
"""
function gen_access_expr(device::NumaNode, symbol::Symbol)
return _gen_access_expr(device, device.cacheStrategy, symbol)
end
"""
_gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
Internal function for dispatch, used in [`gen_access_expr`](@ref).
"""
function _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
s = Symbol("data_$symbol")
quoteNode = Meta.parse(":($s)")
return quoteNode
end
"""
_gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
Internal function for dispatch, used in [`gen_access_expr`](@ref).
"""
function _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
accessStr = ":(cache_$(to_var_name(device.id))[:$symbol])"
quoteNode = Meta.parse(accessStr)
return quoteNode
end

View File

@ -0,0 +1,53 @@
using oneAPI
"""
oneAPIGPU <: AbstractGPU
Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct oneAPIGPU <: AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end
push!(DEVICE_TYPES, oneAPIGPU)
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: oneAPIGPU} = LocalVariables()
function measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
devices = Vector{AbstractDevice}()
if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
return devices
end
oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1))
end
return devices
end

53
src/devices/rocm/impl.jl Normal file
View File

@ -0,0 +1,53 @@
using AMDGPU
"""
ROCmGPU <: AbstractGPU
Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct ROCmGPU <: AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end
push!(DEVICE_TYPES, ROCmGPU)
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: ROCmGPU} = LocalVariables()
function measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
devices = Vector{AbstractDevice}()
if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
return devices
end
AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
for device in AMDDevices
push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1))
end
return devices
end

View File

@ -6,6 +6,6 @@ Pretty-print a [`Diff`](@ref). Called via print, println and co.
function show(io::IO, diff::Diff)
print(io, "Nodes: ")
print(io, length(diff.addedNodes) + length(diff.removedNodes))
print(io, " Edges: ")
print(io, ", Edges: ")
return print(io, length(diff.addedEdges) + length(diff.removedEdges))
end

View File

@ -4,8 +4,8 @@
A named tuple representing a difference of added and removed nodes and edges on a [`DAG`](@ref).
"""
const Diff = NamedTuple{
(:addedNodes, :removedNodes, :addedEdges, :removedEdges),
Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}},
(:addedNodes, :removedNodes, :addedEdges, :removedEdges, :updatedChildren),
Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}, Vector{Tuple{Node, AbstractTask}}},
}
function Diff()
@ -14,5 +14,8 @@ function Diff()
removedNodes = Vector{Node}(),
addedEdges = Vector{Edge}(),
removedEdges = Vector{Edge}(),
# children were updated in the task, updatedChildren[x][2] is the task before the update
updatedChildren = Vector{Tuple{Node, AbstractTask}}(),
)::Diff
end

View File

@ -38,8 +38,7 @@ end
Return `true` if [`pop_operation!`](@ref) is possible, `false` otherwise.
"""
can_pop(graph::DAG) =
!isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
can_pop(graph::DAG) = !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
"""
reset_graph!(graph::DAG)

View File

@ -15,12 +15,7 @@ Insert the node into the graph.
See also: [`remove_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
"""
function insert_node!(
graph::DAG,
node::Node,
track = true,
invalidate_cache = true,
)
function insert_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
# 1: mute
push!(graph.nodes, node)
@ -50,14 +45,8 @@ Insert the edge between node1 (child) and node2 (parent) into the graph.
See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`remove_edge!`](@ref)
"""
function insert_edge!(
graph::DAG,
node1::Node,
node2::Node,
track = true,
invalidate_cache = true,
)
# @assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists"
function insert_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
@assert (node2 node1.parents) && (node1 node2.children) "Edge to insert already exists"
# 1: mute
# edge points from child to parent
@ -95,13 +84,8 @@ Remove the node from the graph.
See also: [`insert_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
"""
function remove_node!(
graph::DAG,
node::Node,
track = true,
invalidate_cache = true,
)
# @assert node in graph.nodes "Trying to remove a node that's not in the graph"
function remove_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
@assert node in graph.nodes "Trying to remove a node that's not in the graph"
# 1: mute
delete!(graph.nodes, node)
@ -134,13 +118,7 @@ Remove the edge between node1 (child) and node2 (parent) into the graph.
See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`insert_edge!`](@ref)
"""
function remove_edge!(
graph::DAG,
node1::Node,
node2::Node,
track = true,
invalidate_cache = true,
)
function remove_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
# 1: mute
pre_length1 = length(node1.parents)
pre_length2 = length(node2.children)
@ -149,15 +127,15 @@ function remove_edge!(
filter!(x -> x != node2, node1.parents)
filter!(x -> x != node1, node2.children)
#=@assert begin
removed = pre_length1 - length(node1.parents)
removed <= 1
end "removed more than one node from node1's parents"=#
@assert begin
removed = pre_length1 - length(node1.parents)
removed <= 1
end "removed more than one node from node1's parents"
#=@assert begin
removed = pre_length2 - length(node2.children)
removed <= 1
end "removed more than one node from node2's children"=#
@assert begin
removed = pre_length2 - length(node2.children)
removed <= 1
end "removed more than one node from node2's children"
# 2: keep track
if (track)
@ -181,6 +159,66 @@ function remove_edge!(
return nothing
end
function replace_children!(task::FusedComputeTask, before, after)
replacedIn1 = length(findall(x -> x == before, task.t1_inputs))
replacedIn2 = length(findall(x -> x == before, task.t2_inputs))
@assert replacedIn1 >= 1 || replacedIn2 >= 1 "Nothing to replace while replacing $before with $after in $(task.t1_inputs...) and $(task.t2_inputs...)"
replace!(task.t1_inputs, before => after)
replace!(task.t2_inputs, before => after)
# recursively descend down the tree, but only in the tasks where we're replacing things
if replacedIn1 > 0
replace_children!(task.first_task, before, after)
end
if replacedIn2 > 0
replace_children!(task.second_task, before, after)
end
return nothing
end
function replace_children!(task::AbstractTask, before, after)
return nothing
end
function update_child!(graph::DAG, n::Node, child_before::Symbol, child_after::Symbol; track = true)
# only need to update fused compute tasks
if !(typeof(n.task) <: FusedComputeTask)
return nothing
end
taskBefore = copy(n.task)
if !((child_before in n.task.t1_inputs) || (child_before in n.task.t2_inputs))
println("------------------ Nothing to replace!! ------------------")
child_ids = Vector{String}()
for child in n.children
push!(child_ids, "$(child.id)")
end
println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
@assert false
end
replace_children!(n.task, child_before, child_after)
if !((child_after in n.task.t1_inputs) || (child_after in n.task.t2_inputs))
println("------------------ Did not replace anything!! ------------------")
child_ids = Vector{String}()
for child in n.children
push!(child_ids, "$(child.id)")
end
println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
@assert false
end
# keep track
if (track)
push!(graph.diff.updatedChildren, (n, taskBefore))
end
end
"""
get_snapshot_diff(graph::DAG)

View File

@ -62,9 +62,5 @@ function show(io::IO, graph::DAG)
properties = get_properties(graph)
println(io, " Total Compute Effort: ", properties.computeEffort)
println(io, " Total Data Transfer: ", properties.data)
return println(
io,
" Total Compute Intensity: ",
properties.computeIntensity,
)
return println(io, " Total Compute Intensity: ", properties.computeIntensity)
end

View File

@ -34,6 +34,7 @@ end
Return a vector of the graph's entry nodes.
"""
function get_entry_nodes(graph::DAG)
apply_all!(graph)
result = Vector{Node}()
for node in graph.nodes
if (is_entry_node(node))

View File

@ -17,7 +17,7 @@ end
The representation of the graph as a set of [`Node`](@ref)s.
A DAG can be loaded using the appropriate parse function, e.g. [`parse_abc`](@ref).
A DAG can be loaded using the appropriate parse_dag function, e.g. [`parse_dag`](@ref).
[`Operation`](@ref)s can be applied on it using [`push_operation!`](@ref) and reverted using [`pop_operation!`](@ref) like a stack.
To get the set of possible operations, use [`get_operations`](@ref).
@ -52,11 +52,7 @@ end
Construct and return an empty [`PossibleOperations`](@ref) object.
"""
function PossibleOperations()
return PossibleOperations(
Set{NodeFusion}(),
Set{NodeReduction}(),
Set{NodeSplit}(),
)
return PossibleOperations(Set{NodeFusion}(), Set{NodeReduction}(), Set{NodeSplit}())
end
"""

View File

@ -59,3 +59,19 @@ function is_valid(graph::DAG)
return true
end
"""
is_scheduled(graph::DAG)
Validate that the entire graph has been scheduled, i.e., every [`ComputeTaskNode`](@ref) has its `.device` set.
"""
function is_scheduled(graph::DAG)
for node in graph.nodes
if (node isa DataTaskNode)
continue
end
@assert !ismissing(node.device)
end
return true
end

View File

@ -45,6 +45,12 @@ For valid inputs, both input particles should have the same momenta at this poin
12 FLOP.
"""
function compute(::ComputeTaskS2, data1::ParticleValue, data2::ParticleValue)
#=
@assert isapprox(abs(data1.p.momentum.E), abs(data2.p.momentum.E), rtol = 0.001, atol = sqrt(eps())) "E: $(data1.p.momentum.E) vs. $(data2.p.momentum.E)"
@assert isapprox(data1.p.momentum.px, -data2.p.momentum.px, rtol = 0.001, atol = sqrt(eps())) "px: $(data1.p.momentum.px) vs. $(data2.p.momentum.px)"
@assert isapprox(data1.p.momentum.py, -data2.p.momentum.py, rtol = 0.001, atol = sqrt(eps())) "py: $(data1.p.momentum.py) vs. $(data2.p.momentum.py)"
@assert isapprox(data1.p.momentum.pz, -data2.p.momentum.pz, rtol = 0.001, atol = sqrt(eps())) "pz: $(data1.p.momentum.pz) vs. $(data2.p.momentum.pz)"
=#
return data1.v * inner_edge(data1.p) * data2.v
end
@ -71,186 +77,78 @@ function compute(::ComputeTaskSum, data::Vector{Float64})
end
"""
compute(t::FusedComputeTask, data)
get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSyms`, providing the output on `outSym`.
"""
function compute(t::FusedComputeTask, data)
@assert false "This is not implemented and should never be called"
function get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskP(), $(in[1]))")
end
"""
get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSymbol`, providing the output on `outSymbol`.
Generate code evaluating [`ComputeTaskU`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
"""
function get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol)
return Meta.parse("$outSymbol = compute(ComputeTaskP(), $inSymbol)")
function get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskU(), $(in[1]))")
end
"""
get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskU`](@ref) on `inSymbol`, providing the output on `outSymbol`.
`inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
Generate code evaluating [`ComputeTaskV`](@ref) on `inSyms`, providing the output on `outSym`.
`inSym[1]` and `inSym[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
"""
function get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol)
return Meta.parse("$outSymbol = compute(ComputeTaskU(), $inSymbol)")
function get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1]), eval(inExprs[2])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskV(), $(in[1]), $(in[2]))")
end
"""
get_expression(::ComputeTaskV, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskV`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`.
`inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
Generate code evaluating [`ComputeTaskS2`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms[1]` and `inSyms[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type `Float64`.
"""
function get_expression(
::ComputeTaskV,
inSymbol1::Symbol,
inSymbol2::Symbol,
outSymbol::Symbol,
)
return Meta.parse(
"$outSymbol = compute(ComputeTaskV(), $inSymbol1, $inSymbol2)",
)
function get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1]), eval(inExprs[2])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskS2(), $(in[1]), $(in[2]))")
end
"""
get_expression(::ComputeTaskS2, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskS2`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`.
`inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type `Float64`.
Generate code evaluating [`ComputeTaskS1`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
"""
function get_expression(
::ComputeTaskS2,
inSymbol1::Symbol,
inSymbol2::Symbol,
outSymbol::Symbol,
)
return Meta.parse(
"$outSymbol = compute(ComputeTaskS2(), $inSymbol1, $inSymbol2)",
)
function get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskS1(), $(in[1]))")
end
"""
get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskS1`](@ref) on `inSymbol`, providing the output on `outSymbol`.
`inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
Generate code evaluating [`ComputeTaskSum`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`].
"""
function get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol)
return Meta.parse("$outSymbol = compute(ComputeTaskS1(), $inSymbol)")
end
"""
get_expression(::ComputeTaskSum, inSymbols::Vector{Symbol}, outSymbol::Symbol)
Generate code evaluating [`ComputeTaskSum`](@ref) on `inSymbols`, providing the output on `outSymbol`.
`inSymbols` should be of type [`Float64`], `outSymbol` will be of type [`Float64`].
"""
function get_expression(
::ComputeTaskSum,
inSymbols::Vector{Symbol},
outSymbol::Symbol,
)
return quote
$outSymbol = compute(ComputeTaskSum(), [$(inSymbols...)])
end
end
"""
get_expression(t::FusedComputeTask, inSymbols::Vector{Symbol}, outSymbol::Symbol)
Generate code evaluating a [`FusedComputeTask`](@ref) on `inSymbols`, providing the output on `outSymbol`.
`inSymbols` should be of the correct types and may be heterogeneous. `outSymbol` will be of the type of the output of `T2` of t.
"""
function get_expression(
t::FusedComputeTask,
inSymbols::Vector{Symbol},
outSymbol::Symbol,
)
(T1, T2) = get_types(t)
c1 = children(T1())
c2 = children(T2())
expr1 = nothing
expr2 = nothing
# TODO need to figure out how to know which inputs belong to which subtask
# since we order the vectors with the child nodes we can't just split
if (c1 == 1)
expr1 = get_expression(T1(), inSymbols[begin], :intermediate)
elseif (c1 == 2)
expr1 =
get_expression(T1(), inSymbols[begin], inSymbols[2], :intermediate)
else
expr1 = get_expression(T1(), inSymbols[begin:c1], :intermediate)
end
if (c2 == 1)
expr2 = get_expression(T2(), :intermediate, outSymbol)
elseif c2 == 2
expr2 =
get_expression(T2(), :intermediate, inSymbols[c1 + 1], outSymbol)
else
expr2 = get_expression(
T2(),
:intermediate * inSymbols[(c1 + 1):end],
outSymbol,
)
end
return Expr(:block, expr1, expr2)
end
"""
get_expression(node::ComputeTaskNode)
Generate and return code for a given [`ComputeTaskNode`](@ref).
"""
function get_expression(node::ComputeTaskNode)
t = typeof(node.task)
@assert length(node.children) == children(node.task) || t <: ComputeTaskSum
if (t <: ComputeTaskU || t <: ComputeTaskP || t <: ComputeTaskS1) # single input
symbolIn = Symbol("data_$(to_var_name(node.children[1].id))")
symbolOut = Symbol("data_$(to_var_name(node.id))")
return get_expression(t(), symbolIn, symbolOut)
elseif (t <: ComputeTaskS2 || t <: ComputeTaskV) # double input
symbolIn1 = Symbol("data_$(to_var_name(node.children[1].id))")
symbolIn2 = Symbol("data_$(to_var_name(node.children[2].id))")
symbolOut = Symbol("data_$(to_var_name(node.id))")
return get_expression(t(), symbolIn1, symbolIn2, symbolOut)
elseif (t <: ComputeTaskSum || t <: FusedComputeTask) # vector input
inSymbols = Vector{Symbol}()
for child in node.children
push!(inSymbols, Symbol("data_$(to_var_name(child.id))"))
end
outSymbol = Symbol("data_$(to_var_name(node.id))")
return get_expression(t(), inSymbols, outSymbol)
else
error("Unknown compute task")
end
end
"""
get_expression(node::DataTaskNode)
Generate and return code for a given [`DataTaskNode`](@ref).
"""
function get_expression(node::DataTaskNode)
# TODO: do things to transport data from/to gpu, between numa nodes, etc.
@assert length(node.children) <= 1
inSymbol = nothing
if (length(node.children) == 1)
inSymbol = Symbol("data_$(to_var_name(node.children[1].id))")
else
inSymbol = Symbol("data_$(to_var_name(node.id))_in")
end
outSymbol = Symbol("data_$(to_var_name(node.id))")
dataTransportExp = Meta.parse("$outSymbol = $inSymbol")
return dataTransportExp
function get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector, outExpr)
in = eval.(inExprs)
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskSum(), [$(unroll_symbol_vector(in))])")
end

View File

@ -1,74 +1,198 @@
using QEDbase
using Random
using Roots
using ForwardDiff
ComputeTaskSum() = ComputeTaskSum(0)
"""
Particle(rng)
gen_process_input(processDescription::ABCProcessDescription)
Return a randomly generated particle.
Return a ProcessInput of randomly generated [`ABCParticle`](@ref)s from a [`ABCProcessDescription`](@ref). The process description can be created manually or parsed from a string using [`parse_process`](@ref).
Note: This uses RAMBO to create a valid process with conservation of momentum and energy.
"""
function Particle(rng, type::ParticleType)
function gen_process_input(processDescription::ABCProcessDescription)
inParticleTypes = keys(processDescription.inParticles)
outParticleTypes = keys(processDescription.outParticles)
p1 = rand(rng, Float64)
p2 = rand(rng, Float64)
p3 = rand(rng, Float64)
m = mass(type)
# keep the momenta of the particles on-shell
p4 = sqrt(p1^2 + p2^2 + p3^2 + m^2)
return Particle(p1, p2, p3, p4, type)
end
"""
gen_particles(n::Int)
Return a Vector of `n` randomly generated [`Particle`](@ref)s.
Note: This does not take into account the preservation of momenta required for an actual valid process!
"""
function gen_particles(ns::Dict{ParticleType, Int})
particles = Dict{ParticleType, Vector{Particle}}()
rng = MersenneTwister(0)
if ns == Dict((A => 2), (B => 2))
rho = 1.0
omega = rand(rng, Float64)
theta = rand(rng, Float64) * π
phi = rand(rng, Float64) * π
particles[A] = Vector{Particle}()
particles[B] = Vector{Particle}()
push!(particles[A], Particle(omega, 0, 0, omega, A))
push!(particles[B], Particle(omega, 0, 0, -omega, B))
push!(
particles[A],
Particle(
omega,
rho * cos(theta) * cos(phi),
rho * cos(theta) * sin(phi),
rho * sin(theta),
A,
),
)
push!(
particles[B],
Particle(
omega,
-rho * cos(theta) * cos(phi),
-rho * cos(theta) * sin(phi),
-rho * sin(theta),
B,
),
)
return particles
end
for (type, n) in ns
particles[type] = Vector{Particle}()
for i in 1:n
push!(particles[type], Particle(rng, type))
massSum = 0
inputMasses = Vector{Float64}()
for (particle, n) in processDescription.inParticles
for _ in 1:n
massSum += mass(particle)
push!(inputMasses, mass(particle))
end
end
return particles
outputMasses = Vector{Float64}()
for (particle, n) in processDescription.outParticles
for _ in 1:n
massSum += mass(particle)
push!(outputMasses, mass(particle))
end
end
# add some extra random mass to allow for some momentum
massSum += rand(rng[threadid()]) * (length(inputMasses) + length(outputMasses))
inputParticles = Vector{ABCParticle}()
initialMomenta = generate_initial_moms(massSum, inputMasses)
index = 1
for (particle, n) in processDescription.inParticles
for _ in 1:n
mom = initialMomenta[index]
push!(inputParticles, particle(mom))
index += 1
end
end
outputParticles = Vector{ABCParticle}()
final_momenta = generate_physical_massive_moms(rng[threadid()], massSum, outputMasses)
index = 1
for (particle, n) in processDescription.outParticles
for _ in 1:n
mom = final_momenta[index]
push!(outputParticles, particle(SFourMomentum(-mom.E, mom.px, mom.py, mom.pz)))
index += 1
end
end
processInput = ABCProcessInput(processDescription, inputParticles, outputParticles)
return return processInput
end
####################
# CODE FROM HERE BORROWED FROM SOURCE: https://codebase.helmholtz.cloud/qedsandbox/QEDphasespaces.jl/
# use qedphasespaces directly once released
#
# quick and dirty implementation of the RAMBO algorithm
#
# reference:
# * https://cds.cern.ch/record/164736/files/198601282.pdf
# * https://www.sciencedirect.com/science/article/pii/0010465586901190
####################
function generate_initial_moms(ss, masses)
E1 = (ss^2 + masses[1]^2 - masses[2]^2) / (2 * ss)
E2 = (ss^2 + masses[2]^2 - masses[1]^2) / (2 * ss)
rho1 = sqrt(E1^2 - masses[1]^2)
rho2 = sqrt(E2^2 - masses[2]^2)
return [SFourMomentum(E1, 0, 0, rho1), SFourMomentum(E2, 0, 0, -rho2)]
end
Random.rand(rng::AbstractRNG, ::Random.SamplerType{SFourMomentum}) = SFourMomentum(rand(rng, 4))
Random.rand(rng::AbstractRNG, ::Random.SamplerType{NTuple{N, Float64}}) where {N} = Tuple(rand(rng, N))
function _transform_uni_to_mom(u1, u2, u3, u4)
cth = 2 * u1 - 1
sth = sqrt(1 - cth^2)
phi = 2 * pi * u2
q0 = -log(u3 * u4)
qx = q0 * sth * cos(phi)
qy = q0 * sth * sin(phi)
qz = q0 * cth
return SFourMomentum(q0, qx, qy, qz)
end
function _transform_uni_to_mom!(uni_mom, dest)
u1, u2, u3, u4 = Tuple(uni_mom)
cth = 2 * u1 - 1
sth = sqrt(1 - cth^2)
phi = 2 * pi * u2
q0 = -log(u3 * u4)
qx = q0 * sth * cos(phi)
qy = q0 * sth * sin(phi)
qz = q0 * cth
return dest = SFourMomentum(q0, qx, qy, qz)
end
_transform_uni_to_mom(u1234::Tuple) = _transform_uni_to_mom(u1234...)
_transform_uni_to_mom(u1234::SFourMomentum) = _transform_uni_to_mom(Tuple(u1234))
function generate_massless_moms(rng, n::Int)
a = Vector{SFourMomentum}(undef, n)
rand!(rng, a)
return map(_transform_uni_to_mom, a)
end
function generate_physical_massless_moms(rng, ss, n)
r_moms = generate_massless_moms(rng, n)
Q = sum(r_moms)
M = sqrt(Q * Q)
fac = -1 / M
Qx = getX(Q)
Qy = getY(Q)
Qz = getZ(Q)
bx = fac * Qx
by = fac * Qy
bz = fac * Qz
gamma = getT(Q) / M
a = 1 / (1 + gamma)
x = ss / M
i = 1
while i <= n
mom = r_moms[i]
mom0 = getT(mom)
mom1 = getX(mom)
mom2 = getY(mom)
mom3 = getZ(mom)
bq = bx * mom1 + by * mom2 + bz * mom3
p0 = x * (gamma * mom0 + bq)
px = x * (mom1 + bx * mom0 + a * bq * bx)
py = x * (mom2 + by * mom0 + a * bq * by)
pz = x * (mom3 + bz * mom0 + a * bq * bz)
r_moms[i] = SFourMomentum(p0, px, py, pz)
i += 1
end
return r_moms
end
function _to_be_solved(xi, masses, p0s, ss)
sum = 0.0
for (i, E) in enumerate(p0s)
sum += sqrt(masses[i]^2 + xi^2 * E^2)
end
return sum - ss
end
function _build_massive_momenta(xi, masses, massless_moms)
vec = SFourMomentum[]
i = 1
while i <= length(massless_moms)
massless_mom = massless_moms[i]
k0 = sqrt(getT(massless_mom)^2 * xi^2 + masses[i]^2)
kx = xi * getX(massless_mom)
ky = xi * getY(massless_mom)
kz = xi * getZ(massless_mom)
push!(vec, SFourMomentum(k0, kx, ky, kz))
i += 1
end
return vec
end
first_derivative(func) = x -> ForwardDiff.derivative(func, float(x))
function generate_physical_massive_moms(rng, ss, masses; x0 = 0.1)
n = length(masses)
massless_moms = generate_physical_massless_moms(rng, ss, n)
energies = getT.(massless_moms)
f = x -> _to_be_solved(x, masses, energies, ss)
xi = find_zero((f, first_derivative(f)), x0, Roots.Newton())
return _build_massive_momenta(xi, masses, massless_moms)
end

View File

@ -32,13 +32,13 @@ function parse_edges(input::AbstractString)
end
"""
parse_abc(filename::String; verbose::Bool = false)
parse_dag(filename::String, model::ABCModel; verbose::Bool = false)
Read an abc-model process from the given file. If `verbose` is set to true, print some progress information to stdout.
Returns a valid [`DAG`](@ref).
"""
function parse_abc(filename::String, verbose::Bool = false)
function parse_dag(filename::AbstractString, model::ABCModel, verbose::Bool = false)
file = open(filename, "r")
if (verbose)
@ -63,10 +63,9 @@ function parse_abc(filename::String, verbose::Bool = false)
end
sizehint!(graph.nodes, estimate_no_nodes)
sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false, false)
global_data_out =
insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), false, false)
insert_edge!(graph, sum_node, global_data_out, false, false)
sum_node = insert_node!(graph, make_node(ComputeTaskSum(0)), track = false, invalidate_cache = false)
global_data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false)
insert_edge!(graph, sum_node, global_data_out, track = false, invalidate_cache = false)
# remember the data out nodes for connection
dataOutNodes = Dict()
@ -81,10 +80,7 @@ function parse_abc(filename::String, verbose::Bool = false)
noNodes += 1
if (noNodes % 100 == 0)
if (verbose)
percent = string(
round(100.0 * noNodes / nodesToRead, digits = 2),
"%",
)
percent = string(round(100.0 * noNodes / nodesToRead, digits = 2), "%")
print("\rReading Nodes... $percent")
end
end
@ -93,30 +89,20 @@ function parse_abc(filename::String, verbose::Bool = false)
data_in = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE), string(node)),
false,
false,
track = false,
invalidate_cache = false,
) # read particle data node
compute_P =
insert_node!(graph, make_node(ComputeTaskP()), false, false) # compute P node
data_Pu = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
) # transfer data from P to u (one ParticleValue object)
compute_u =
insert_node!(graph, make_node(ComputeTaskU()), false, false) # compute U node
data_out = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
) # transfer data out from u (one ParticleValue object)
compute_P = insert_node!(graph, make_node(ComputeTaskP()), track = false, invalidate_cache = false) # compute P node
data_Pu =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data from P to u (one ParticleValue object)
compute_u = insert_node!(graph, make_node(ComputeTaskU()), track = false, invalidate_cache = false) # compute U node
data_out =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data out from u (one ParticleValue object)
insert_edge!(graph, data_in, compute_P, false, false)
insert_edge!(graph, compute_P, data_Pu, false, false)
insert_edge!(graph, data_Pu, compute_u, false, false)
insert_edge!(graph, compute_u, data_out, false, false)
insert_edge!(graph, data_in, compute_P, track = false, invalidate_cache = false)
insert_edge!(graph, compute_P, data_Pu, track = false, invalidate_cache = false)
insert_edge!(graph, data_Pu, compute_u, track = false, invalidate_cache = false)
insert_edge!(graph, compute_u, data_out, track = false, invalidate_cache = false)
# remember the data_out node for future edges
dataOutNodes[node] = data_out
@ -126,63 +112,48 @@ function parse_abc(filename::String, verbose::Bool = false)
in1 = capt.captures[1]
in2 = capt.captures[2]
compute_v =
insert_node!(graph, make_node(ComputeTaskV()), false, false)
data_out = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
)
compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
data_out =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
if (occursin(regex_c, in1))
# put an S node after this input
compute_S = insert_node!(
graph,
make_node(ComputeTaskS1()),
false,
false,
)
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
data_S_v = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
track = false,
invalidate_cache = false,
)
insert_edge!(graph, dataOutNodes[in1], compute_S, false, false)
insert_edge!(graph, compute_S, data_S_v, false, false)
insert_edge!(graph, dataOutNodes[in1], compute_S, track = false, invalidate_cache = false)
insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
insert_edge!(graph, data_S_v, compute_v, false, false)
insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
else
insert_edge!(graph, dataOutNodes[in1], compute_v, false, false)
insert_edge!(graph, dataOutNodes[in1], compute_v, track = false, invalidate_cache = false)
end
if (occursin(regex_c, in2))
# i think the current generator only puts the combined particles in the first space, so this case might never be entered
# put an S node after this input
compute_S = insert_node!(
graph,
make_node(ComputeTaskS1()),
false,
false,
)
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
data_S_v = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
track = false,
invalidate_cache = false,
)
insert_edge!(graph, dataOutNodes[in2], compute_S, false, false)
insert_edge!(graph, compute_S, data_S_v, false, false)
insert_edge!(graph, dataOutNodes[in2], compute_S, track = false, invalidate_cache = false)
insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
insert_edge!(graph, data_S_v, compute_v, false, false)
insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
else
insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
end
insert_edge!(graph, compute_v, data_out, false, false)
insert_edge!(graph, compute_v, data_out, track = false, invalidate_cache = false)
dataOutNodes[node] = data_out
elseif occursin(regex_m, node)
@ -193,43 +164,31 @@ function parse_abc(filename::String, verbose::Bool = false)
in3 = capt.captures[3]
# in2 + in3 with a v
compute_v =
insert_node!(graph, make_node(ComputeTaskV()), false, false)
data_v = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
)
compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
data_v =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
insert_edge!(graph, dataOutNodes[in3], compute_v, false, false)
insert_edge!(graph, compute_v, data_v, false, false)
insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
insert_edge!(graph, dataOutNodes[in3], compute_v, track = false, invalidate_cache = false)
insert_edge!(graph, compute_v, data_v, track = false, invalidate_cache = false)
# combine with the v of the combined other input
compute_S2 =
insert_node!(graph, make_node(ComputeTaskS2()), false, false)
data_out = insert_node!(
graph,
make_node(DataTask(FLOAT_SIZE)),
false,
false,
) # output of a S2 task is only a float
compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), track = false, invalidate_cache = false)
data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false) # output of a S2 task is only a float
insert_edge!(graph, data_v, compute_S2, false, false)
insert_edge!(graph, dataOutNodes[in1], compute_S2, false, false)
insert_edge!(graph, compute_S2, data_out, false, false)
insert_edge!(graph, data_v, compute_S2, track = false, invalidate_cache = false)
insert_edge!(graph, dataOutNodes[in1], compute_S2, track = false, invalidate_cache = false)
insert_edge!(graph, compute_S2, data_out, track = false, invalidate_cache = false)
insert_edge!(graph, data_out, sum_node, false, false)
insert_edge!(graph, data_out, sum_node, track = false, invalidate_cache = false)
add_child!(sum_node.task)
elseif occursin(regex_plus, node)
if (verbose)
println("\rReading Nodes Complete ")
println("Added ", length(graph.nodes), " nodes")
end
else
@assert false (
"Unknown node '$node' while reading from file $filename"
)
@assert false ("Unknown node '$node' while reading from file $filename")
end
end
@ -244,6 +203,46 @@ function parse_abc(filename::String, verbose::Bool = false)
if (verbose)
println("Done")
end
# don't actually need to read the edges
return graph
end
"""
parse_process(string::AbstractString, model::ABCModel)
Parse a string representation of a process, such as "AB->ABBB" into the corresponding [`ABCProcessDescription`](@ref).
"""
function parse_process(str::AbstractString, model::ABCModel)
inParticles = Dict{Type, Int}()
outParticles = Dict{Type, Int}()
if !(contains(str, "->"))
throw("Did not find -> while parsing process \"$str\"")
end
(inStr, outStr) = split(str, "->")
if (isempty(inStr) || isempty(outStr))
throw("Process (\"$str\") input or output part is empty!")
end
for t in types(model)
inCount = count(x -> x == String(t)[1], inStr)
outCount = count(x -> x == String(t)[1], outStr)
if inCount != 0
inParticles[t] = inCount
end
if outCount != 0
outParticles[t] = outCount
end
end
if length(inStr) != sum(values(inParticles))
throw("Encountered unknown characters in the input part of process \"$str\"")
elseif length(outStr) != sum(values(outParticles))
throw("Encountered unknown characters in the output part of process \"$str\"")
end
return ABCProcessDescription(inParticles, outParticles)
end

View File

@ -1,99 +1,140 @@
"""
ParticleType
using QEDbase
A Particle Type in the ABC Model as an enum, with types `A`, `B` and `C`.
"""
@enum ParticleType A = 1 B = 2 C = 3
ABCModel <: AbstractPhysicsModel
Singleton definition for identification of the ABC-Model.
"""
struct ABCModel <: AbstractPhysicsModel end
"""
ABCParticle
Base type for all particles in the [`ABCModel`](@ref).
"""
abstract type ABCParticle <: AbstractParticle end
"""
ParticleA <: ABCParticle
An 'A' particle in the ABC Model.
"""
struct ParticleA <: ABCParticle
momentum::SFourMomentum
end
"""
ParticleB <: ABCParticle
A 'B' particle in the ABC Model.
"""
struct ParticleB <: ABCParticle
momentum::SFourMomentum
end
"""
ParticleC <: ABCParticle
A 'C' particle in the ABC Model.
"""
struct ParticleC <: ABCParticle
momentum::SFourMomentum
end
"""
ABCProcessDescription <: AbstractProcessDescription
A description of a process in the ABC-Model. Contains the input and output particles.
See also: [`in_particles`](@ref), [`out_particles`](@ref), [`parse_process`](@ref)
"""
struct ABCProcessDescription <: AbstractProcessDescription
inParticles::Dict{Type, Int}
outParticles::Dict{Type, Int}
end
"""
ABCProcessInput <: AbstractProcessInput
Input for a ABC Process. Contains the [`ABCProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles.
See also: [`gen_process_input`](@ref)
"""
struct ABCProcessInput <: AbstractProcessInput
process::ABCProcessDescription
inParticles::Vector{ABCParticle}
outParticles::Vector{ABCParticle}
end
"""
PARTICLE_MASSES
A constant dictionary containing the masses of the different [`ParticleType`](@ref)s.
A constant dictionary containing the masses of the different [`ABCParticle`](@ref)s.
"""
const PARTICLE_MASSES =
Dict{ParticleType, Float64}(A => 1.0, B => 1.0, C => 0.0)
const PARTICLE_MASSES = Dict{Type, Float64}(ParticleA => 1.0, ParticleB => 1.0, ParticleC => 0.0)
"""
Particle
A struct describing a particle of the ABC-Model. It has the 4 momentum parts P0...P3 and a [`ParticleType`](@ref).
`sizeof(Particle())` = 40 Byte
"""
struct Particle
P0::Float64
P1::Float64
P2::Float64
P3::Float64
type::ParticleType
end
"""
ParticleValue
A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
`sizeof(ParticleValue())` = 48 Byte
"""
struct ParticleValue
p::Particle
v::Float64
end
"""
mass(t::ParticleType)
mass(t::Type{T}) where {T <: ABCParticle}
Return the mass (at rest) of the given particle type.
"""
mass(t::ParticleType) = PARTICLE_MASSES[t]
mass(t::Type{T}) where {T <: ABCParticle} = PARTICLE_MASSES[t]
"""
remaining_type(t1::ParticleType, t2::ParticleType)
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
For 2 given (non-equal) particle types, return the third of ABC.
"""
function remaining_type(t1::ParticleType, t2::ParticleType)
function interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
@assert t1 != t2
if t1 != A && t2 != A
return A
elseif t1 != B && t2 != B
return B
if t1 != Type{ParticleA} && t2 != Type{ParticleA}
return ParticleA
elseif t1 != Type{ParticleB} && t2 != Type{ParticleB}
return ParticleB
else
return C
return ParticleC
end
end
"""
square(p::Particle)
types(::ABCModel)
Return a Vector of the possible types of particle in the [`ABCModel`](@ref).
"""
function types(::ABCModel)
return [ParticleA, ParticleB, ParticleC]
end
"""
square(p::ABCParticle)
Return the square of the particle's momentum as a `Float` value.
Takes 7 effective FLOP.
"""
function square(p::Particle)
return p.P0 * p.P0 - p.P1 * p.P1 - p.P2 * p.P2 - p.P3 * p.P3
function square(p::ABCParticle)
return getMass2(p.momentum)
end
"""
inner_edge(p::Particle)
inner_edge(p::ABCParticle)
Return the factor of the inner edge with the given (virtual) particle.
Takes 10 effective FLOP. (3 here + 10 in square(p))
Takes 10 effective FLOP. (3 here + 7 in square(p))
"""
function inner_edge(p::Particle)
return 1.0 / (square(p) - mass(p.type) * mass(p.type))
function inner_edge(p::ABCParticle)
return 1.0 / (square(p) - mass(typeof(p)) * mass(typeof(p)))
end
"""
outer_edge(p::Particle)
outer_edge(p::ABCParticle)
Return the factor of the outer edge with the given (real) particle.
Takes 0 effective FLOP.
"""
function outer_edge(p::Particle)
function outer_edge(p::ABCParticle)
return 1.0
end
@ -111,20 +152,58 @@ function vertex()
end
"""
preserve_momentum(p1::Particle, p2::Particle)
preserve_momentum(p1::ABCParticle, p2::ABCParticle)
Calculate and return a new particle from two given interacting ones at a vertex.
Takes 4 effective FLOP.
"""
function preserve_momentum(p1::Particle, p2::Particle)
p3 = Particle(
p1.P0 + p2.P0,
p1.P1 + p2.P1,
p1.P2 + p2.P2,
p1.P3 + p2.P3,
remaining_type(p1.type, p2.type),
)
function preserve_momentum(p1::ABCParticle, p2::ABCParticle)
t3 = interaction_result(typeof(p1), typeof(p2))
p3 = t3(p1.momentum + p2.momentum)
return p3
end
"""
type_from_name(name::String)
For a name of a particle, return the particle's [`Type`].
"""
function type_from_name(name::String)
if startswith(name, "A")
return ParticleA
elseif startswith(name, "B")
return ParticleB
elseif startswith(name, "C")
return ParticleC
else
throw("Invalid name for a particle in the ABC model")
end
end
function String(::Type{ParticleA})
return "A"
end
function String(::Type{ParticleB})
return "B"
end
function String(::Type{ParticleC})
return "C"
end
function in_particles(process::ABCProcessDescription)
return process.inParticles
end
function in_particles(input::ABCProcessInput)
return input.inParticles
end
function out_particles(process::ABCProcessDescription)
return process.outParticles
end
function out_particles(input::ABCProcessInput)
return input.outParticles
end

58
src/models/abc/print.jl Normal file
View File

@ -0,0 +1,58 @@
"""
show(io::IO, process::ABCProcessDescription)
Pretty print an [`ABCProcessDescription`](@ref) (no newlines).
```jldoctest
julia> using MetagraphOptimization
julia> print(parse_process("AB->ABBB", ABCModel()))
ABC Process: 'AB->ABBB'
```
"""
function show(io::IO, process::ABCProcessDescription)
# types() gives the types in order (ABC) instead of random like keys() would
print(io, "ABC Process: \'")
for type in types(ABCModel())
for _ in 1:get(process.inParticles, type, 0)
print(io, String(type))
end
end
print(io, "->")
for type in types(ABCModel())
for _ in 1:get(process.outParticles, type, 0)
print(io, String(type))
end
end
print(io, "'")
return nothing
end
"""
show(io::IO, processInput::ABCProcessInput)
Pretty print an [`ABCProcessInput`](@ref) (with newlines).
"""
function show(io::IO, processInput::ABCProcessInput)
println(io, "Input for $(processInput.process):")
println(io, " $(length(processInput.inParticles)) Incoming particles:")
for particle in processInput.inParticles
println(io, " $particle")
end
println(io, " $(length(processInput.outParticles)) Outgoing Particles:")
for particle in processInput.outParticles
println(io, " $particle")
end
return nothing
end
"""
show(io::IO, particle::T) where {T <: ABCParticle}
Pretty print an [`ABCParticle`](@ref) (no newlines).
"""
function show(io::IO, particle::T) where {T <: ABCParticle}
print(io, "$(String(typeof(particle))): $(particle.momentum)")
return nothing
end

View File

@ -57,42 +57,42 @@ end
Print the S1 task to io.
"""
show(io::IO, t::ComputeTaskS1) = print("ComputeS1")
show(io::IO, t::ComputeTaskS1) = print(io, "ComputeS1")
"""
show(io::IO, t::ComputeTaskS2)
Print the S2 task to io.
"""
show(io::IO, t::ComputeTaskS2) = print("ComputeS2")
show(io::IO, t::ComputeTaskS2) = print(io, "ComputeS2")
"""
show(io::IO, t::ComputeTaskP)
Print the P task to io.
"""
show(io::IO, t::ComputeTaskP) = print("ComputeP")
show(io::IO, t::ComputeTaskP) = print(io, "ComputeP")
"""
show(io::IO, t::ComputeTaskU)
Print the U task to io.
"""
show(io::IO, t::ComputeTaskU) = print("ComputeU")
show(io::IO, t::ComputeTaskU) = print(io, "ComputeU")
"""
show(io::IO, t::ComputeTaskV)
Print the V task to io.
"""
show(io::IO, t::ComputeTaskV) = print("ComputeV")
show(io::IO, t::ComputeTaskV) = print(io, "ComputeV")
"""
show(io::IO, t::ComputeTaskSum)
Print the sum task to io.
"""
show(io::IO, t::ComputeTaskSum) = print("ComputeSum")
show(io::IO, t::ComputeTaskSum) = print(io, "ComputeSum")
"""
copy(t::DataTask)
@ -147,19 +147,20 @@ children(::ComputeTaskV) = 2
"""
children(::ComputeTaskSum)
Return the number of children of a ComputeTaskSum, since this is variable and the task doesn't know
how many children it will sum over, return a wildcard -1.
TODO: this is kind of bad because it means we can't fuse with a sum task
Return the number of children of a ComputeTaskSum.
"""
children(::ComputeTaskSum) = -1
children(t::ComputeTaskSum) = t.children_number
"""
children(t::FusedComputeTask)
Return the number of children of a FusedComputeTask. It's the sum of the children of both tasks minus one.
Return the number of children of a FusedComputeTask.
"""
function children(t::FusedComputeTask)
(T1, T2) = get_types(t)
return children(T1()) + children(T2()) - 1 # one of the inputs is the output of T1 and thus not a child of the node
return length(union(Set(t.t1_inputs), Set(t.t2_inputs)))
end
function add_child!(t::ComputeTaskSum)
t.children_number += 1
return nothing
end

View File

@ -47,19 +47,13 @@ struct ComputeTaskU <: AbstractComputeTask end
Task that sums all its inputs, n children.
"""
struct ComputeTaskSum <: AbstractComputeTask end
mutable struct ComputeTaskSum <: AbstractComputeTask
children_number::Int
end
"""
ABC_TASKS
Constant vector of all tasks of the ABC-Model.
"""
ABC_TASKS = [
DataTask,
ComputeTaskS1,
ComputeTaskS2,
ComputeTaskP,
ComputeTaskV,
ComputeTaskU,
ComputeTaskSum,
]
ABC_TASKS = [DataTask, ComputeTaskS1, ComputeTaskS2, ComputeTaskP, ComputeTaskV, ComputeTaskU, ComputeTaskSum]

109
src/models/interface.jl Normal file
View File

@ -0,0 +1,109 @@
"""
AbstractPhysicsModel
Base type for a model, e.g. ABC-Model or QED. This is used to dispatch many functions.
"""
abstract type AbstractPhysicsModel end
"""
AbstractParticle
Base type for particles belonging to a certain [`AbstractPhysicsModel`](@ref).
"""
abstract type AbstractParticle end
"""
ParticleValue{ParticleType <: AbstractParticle}
A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
`sizeof(ParticleValue())` = 48 Byte
"""
struct ParticleValue{ParticleType <: AbstractParticle}
p::ParticleType
v::Float64
end
"""
AbstractProcessDescription
Base type for process descriptions. An object of this type of a corresponding [`AbstractPhysicsModel`](@ref) should uniquely identify a process in that model.
See also: [`parse_process`](@ref)
"""
abstract type AbstractProcessDescription end
"""
AbstractProcessInput
Base type for process inputs. An object of this type contains the input values (e.g. momenta) of the particles in a process.
See also: [`gen_process_input`](@ref)
"""
abstract type AbstractProcessInput end
"""
mass(t::Type{T}) where {T <: AbstractParticle}
Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the particles mass at rest.
"""
function mass end
"""
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: AbstractParticle, T2 <: AbstractParticle}
Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the result particle type when the two given particles interact.
"""
function interaction_result end
"""
types(::AbstractPhysicsModel)
Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref), returning a `Vector` of the available particle types in the model.
"""
function types end
"""
in_particles(::AbstractProcessDescription)
Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of incoming particles for the process per particle type.
in_particles(::AbstractProcessInput)
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
Returns a `<: Vector{AbstractParticle}` object with the values of all incoming particles for the corresponding `ProcessDescription`.
"""
function in_particles end
"""
out_particles(::AbstractProcessDescription)
Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of outgoing particles for the process per particle type.
out_particles(::AbstractProcessInput)
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
Returns a `<: Vector{AbstractParticle}` object with the values of all outgoing particles for the corresponding `ProcessDescription`.
"""
function out_particles end
"""
parse_process(::AbstractString, ::AbstractPhysicsModel)
Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref).
Returns a `ProcessDescription` object.
"""
function parse_process end
"""
gen_process_input(::AbstractProcessDescription)
Interface function that must be implemented for every specific [`AbstractProcessDescription`](@ref).
Returns a randomly generated and valid corresponding `ProcessInput`.
"""
function gen_process_input end

10
src/models/print.jl Normal file
View File

@ -0,0 +1,10 @@
"""
show(io::IO, particleValue::ParticleValue)
Pretty print a [`ParticleValue`](@ref), no newlines.
"""
function show(io::IO, particleValue::ParticleValue)
print(io, "($(particleValue.p), value: $(particleValue.v))")
return nothing
end

View File

@ -1,44 +1,20 @@
DataTaskNode(t::AbstractDataTask, name = "") = DataTaskNode(
t,
Vector{Node}(),
Vector{Node}(),
UUIDs.uuid1(rng[threadid()]),
missing,
missing,
missing,
name,
)
DataTaskNode(t::AbstractDataTask, name = "") =
DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, missing, name)
ComputeTaskNode(t::AbstractComputeTask) = ComputeTaskNode(
t,
Vector{Node}(),
Vector{Node}(),
UUIDs.uuid1(rng[threadid()]),
missing,
missing,
Vector{NodeFusion}(),
t, # task
Vector{Node}(), # parents
Vector{Node}(), # children
UUIDs.uuid1(rng[threadid()]), # id
missing, # node reduction
missing, # node split
Vector{NodeFusion}(), # node fusions
missing, # device
)
copy(m::Missing) = missing
copy(n::ComputeTaskNode) = ComputeTaskNode(
copy(n.task),
copy(n.parents),
copy(n.children),
UUIDs.uuid1(rng[threadid()]),
copy(n.nodeReduction),
copy(n.nodeSplit),
copy(n.nodeFusions),
)
copy(n::DataTaskNode) = DataTaskNode(
copy(n.task),
copy(n.parents),
copy(n.children),
UUIDs.uuid1(rng[threadid()]),
copy(n.nodeReduction),
copy(n.nodeSplit),
copy(n.nodeFusion),
n.name,
)
copy(n::ComputeTaskNode) = ComputeTaskNode(copy(n.task))
copy(n::DataTaskNode) = DataTaskNode(copy(n.task), n.name)
"""
make_node(t::AbstractTask)

View File

@ -22,5 +22,6 @@ end
Return the uuid as a string usable as a variable name in code generation.
"""
function to_var_name(id::UUID)
return replace(string(id), "-" => "_")
str = "_" * replace(string(id), "-" => "_")
return str
end

View File

@ -24,13 +24,14 @@ abstract type Operation end
Any node that transfers data and does no computation.
# Fields
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.\\
`.name`: The name of this node for entry nodes into the graph ([`is_entry_node`](@ref)) to reliably assign the inputs to the correct nodes when executing.\\
"""
mutable struct DataTaskNode <: Node
task::AbstractDataTask
@ -60,16 +61,17 @@ end
"""
ComputeTaskNode <: Node
Any node that transfers data and does no computation.
Any node that computes a result from inputs using an [`AbstractComputeTask`](@ref).
# Fields
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusion`: A vector of this node's [`NodeFusion`](@ref)s. For a ComputeTaskNode there can be any number of these, unlike the DataTaskNodes.
`.task`: The node's compute task type. A concrete subtype of [`AbstractComputeTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusions`: A vector of this node's [`NodeFusion`](@ref)s. For a `ComputeTaskNode` there can be any number of these, unlike the [`DataTaskNode`](@ref)s.\\
`.device`: The Device this node has been scheduled on by a [`Scheduler`](@ref).
"""
mutable struct ComputeTaskNode <: Node
task::AbstractComputeTask
@ -82,6 +84,9 @@ mutable struct ComputeTaskNode <: Node
# for ComputeTasks there can be multiple fusions, unlike the DataTasks
nodeFusions::Vector{Operation}
# the device this node is assigned to execute on
device::Union{AbstractDevice, Missing}
end
"""
@ -95,8 +100,5 @@ The child is the prerequisite node of the parent.
"""
struct Edge
# edge points from child to parent
edge::Union{
Tuple{DataTaskNode, ComputeTaskNode},
Tuple{ComputeTaskNode, DataTaskNode},
}
edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}}
end

View File

@ -22,12 +22,24 @@ function is_valid_node(graph::DAG, node::Node)
@assert node in child.parents "Node is not a parent of its child!"
end
if !ismissing(node.nodeReduction)
#=if !ismissing(node.nodeReduction)
@assert is_valid(graph, node.nodeReduction)
end
if !ismissing(node.nodeSplit)
@assert is_valid(graph, node.nodeSplit)
end=#
if !(typeof(node.task) <: FusedComputeTask)
# the remaining checks are only necessary for fused compute tasks
return true
end
# every child must be in some input of the task
for child in node.children
str = Symbol(to_var_name(child.id))
@assert (str in node.task.t1_inputs) || (str in node.task.t2_inputs) "$str was not in any of the tasks' inputs\nt1_inputs: $(node.task.t1_inputs)\nt2_inputs: $(node.task.t2_inputs)"
end
return true
end
@ -41,9 +53,9 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
function is_valid(graph::DAG, node::ComputeTaskNode)
@assert is_valid_node(graph, node)
for nf in node.nodeFusions
#=for nf in node.nodeFusions
@assert is_valid(graph, nf)
end
end=#
return true
end
@ -57,8 +69,8 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
function is_valid(graph::DAG, node::DataTaskNode)
@assert is_valid_node(graph, node)
if !ismissing(node.nodeFusion)
#=if !ismissing(node.nodeFusion)
@assert is_valid(graph, node.nodeFusion)
end
end=#
return true
end

View File

@ -34,12 +34,7 @@ Apply the given [`NodeFusion`](@ref) to the graph. Generic wrapper around [`node
Return an [`AppliedNodeFusion`](@ref) object generated from the graph's [`Diff`](@ref).
"""
function apply_operation!(graph::DAG, operation::NodeFusion)
diff = node_fusion!(
graph,
operation.input[1],
operation.input[2],
operation.input[3],
)
diff = node_fusion!(graph, operation.input[1], operation.input[2], operation.input[3])
graph.properties += GraphProperties(diff)
@ -124,17 +119,24 @@ function revert_diff!(graph::DAG, diff::Diff)
# add removed nodes, remove added nodes, same for edges
# note the order
for edge in diff.addedEdges
remove_edge!(graph, edge.edge[1], edge.edge[2], false)
remove_edge!(graph, edge.edge[1], edge.edge[2], track = false)
end
for node in diff.addedNodes
remove_node!(graph, node, false)
remove_node!(graph, node, track = false)
end
for node in diff.removedNodes
insert_node!(graph, node, false)
insert_node!(graph, node, track = false)
end
for edge in diff.removedEdges
insert_edge!(graph, edge.edge[1], edge.edge[2], false)
insert_edge!(graph, edge.edge[1], edge.edge[2], track = false)
end
for (node, task) in diff.updatedChildren
# node must be fused compute task at this point
@assert typeof(node.task) <: FusedComputeTask
node.task = task
end
graph.properties -= GraphProperties(diff)
@ -149,21 +151,24 @@ Fuse nodes n1 -> n2 -> n3 together into one node, return the applied difference
For details see [`NodeFusion`](@ref).
"""
function node_fusion!(
graph::DAG,
n1::ComputeTaskNode,
n2::DataTaskNode,
n3::ComputeTaskNode,
)
# @assert is_valid_node_fusion_input(graph, n1, n2, n3)
function node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
@assert is_valid_node_fusion_input(graph, n1, n2, n3)
# clear snapshot
get_snapshot_diff(graph)
# save children and parents
n1_children = children(n1)
n3_parents = parents(n3)
n3_children = children(n3)
n1Children = children(n1)
n3Parents = parents(n3)
n1Task = copy(n1.task)
n3Task = copy(n3.task)
# assemble the input node vectors of n1 and n3 to save into the FusedComputeTask
n1Inputs = Vector{Symbol}()
for child in n1Children
push!(n1Inputs, Symbol(to_var_name(child.id)))
end
# remove the edges and nodes that will be replaced by the fused node
remove_edge!(graph, n1, n2)
@ -172,29 +177,38 @@ function node_fusion!(
remove_node!(graph, n2)
# get n3's children now so it automatically excludes n2
n3_children = children(n3)
n3Children = children(n3)
n3Inputs = Vector{Symbol}()
for child in n3Children
push!(n3Inputs, Symbol(to_var_name(child.id)))
end
remove_node!(graph, n3)
# create new node with the fused compute task
new_node =
ComputeTaskNode(FusedComputeTask{typeof(n1.task), typeof(n3.task)}())
insert_node!(graph, new_node)
newNode = ComputeTaskNode(FusedComputeTask(n1Task, n3Task, n1Inputs, Symbol(to_var_name(n2.id)), n3Inputs))
insert_node!(graph, newNode)
for child in n1_children
for child in n1Children
remove_edge!(graph, child, n1)
insert_edge!(graph, child, new_node)
insert_edge!(graph, child, newNode)
end
for child in n3_children
for child in n3Children
remove_edge!(graph, child, n3)
if !(child in n1_children)
insert_edge!(graph, child, new_node)
if !(child in n1Children)
insert_edge!(graph, child, newNode)
end
end
for parent in n3_parents
for parent in n3Parents
remove_edge!(graph, n3, parent)
insert_edge!(graph, new_node, parent)
insert_edge!(graph, newNode, parent)
# important! update the parent node's child names in case they are fused compute tasks
# needed for compute generation so the fused compute task can correctly match inputs to its component tasks
update_child!(graph, parent, Symbol(to_var_name(n3.id)), Symbol(to_var_name(newNode.id)))
end
return get_snapshot_diff(graph)
@ -208,21 +222,26 @@ Reduce the given nodes together into one node, return the applied difference to
For details see [`NodeReduction`](@ref).
"""
function node_reduction!(graph::DAG, nodes::Vector{Node})
# @assert is_valid_node_reduction_input(graph, nodes)
@assert is_valid_node_reduction_input(graph, nodes)
# clear snapshot
get_snapshot_diff(graph)
n1 = nodes[1]
n1_children = children(n1)
n1Children = children(n1)
n1_parents = Set(n1.parents)
new_parents = Set{Node}()
n1Parents = Set(n1.parents)
# set of the new parents of n1
newParents = Set{Node}()
# names of the previous children that n1 now replaces per parent
newParentsChildNames = Dict{Node, Symbol}()
# remove all of the nodes' parents and children and the nodes themselves (except for first node)
for i in 2:length(nodes)
n = nodes[i]
for child in n1_children
for child in n1Children
remove_edge!(graph, child, n)
end
@ -230,17 +249,23 @@ function node_reduction!(graph::DAG, nodes::Vector{Node})
remove_edge!(graph, n, parent)
# collect all parents
push!(new_parents, parent)
push!(newParents, parent)
newParentsChildNames[parent] = Symbol(to_var_name(n.id))
end
remove_node!(graph, n)
end
setdiff!(new_parents, n1_parents)
for parent in new_parents
for parent in newParents
# now add parents of all input nodes to n1 without duplicates
insert_edge!(graph, n1, parent)
if !(parent in n1Parents)
# don't double insert edges
insert_edge!(graph, n1, parent)
end
# this has to be done for all parents, even the ones of n1 because they can be duplicate
prevChild = newParentsChildNames[parent]
update_child!(graph, parent, prevChild, Symbol(to_var_name(n1.id)))
end
return get_snapshot_diff(graph)
@ -254,30 +279,33 @@ Split the given node into one node per parent, return the applied difference to
For details see [`NodeSplit`](@ref).
"""
function node_split!(graph::DAG, n1::Node)
# @assert is_valid_node_split_input(graph, n1)
@assert is_valid_node_split_input(graph, n1)
# clear snapshot
get_snapshot_diff(graph)
n1_parents = parents(n1)
n1_children = children(n1)
n1Parents = parents(n1)
n1Children = children(n1)
for parent in n1_parents
for parent in n1Parents
remove_edge!(graph, n1, parent)
end
for child in n1_children
for child in n1Children
remove_edge!(graph, child, n1)
end
remove_node!(graph, n1)
for parent in n1_parents
n_copy = copy(n1)
insert_node!(graph, n_copy)
insert_edge!(graph, n_copy, parent)
for parent in n1Parents
nCopy = copy(n1)
for child in n1_children
insert_edge!(graph, child, n_copy)
insert_node!(graph, nCopy)
insert_edge!(graph, nCopy, parent)
for child in n1Children
insert_edge!(graph, child, nCopy)
end
update_child!(graph, parent, Symbol(to_var_name(n1.id)), Symbol(to_var_name(nCopy.id)))
end
return get_snapshot_diff(graph)

View File

@ -7,10 +7,7 @@ using Base.Threads
Insert the given node fusion into its input nodes' operation caches. For the compute nodes, locking via the given `locks` is employed to have safe multi-threading. For a large set of nodes, contention on the locks should be very small.
"""
function insert_operation!(
nf::NodeFusion,
locks::Dict{ComputeTaskNode, SpinLock},
)
function insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock})
n1 = nf.input[1]
n2 = nf.input[2]
n3 = nf.input[3]
@ -52,10 +49,7 @@ end
Insert the node reductions into the graph and the nodes' caches. Employs multithreading for speedup.
"""
function nr_insertion!(
operations::PossibleOperations,
nodeReductions::Vector{Vector{NodeReduction}},
)
function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}})
total_len = 0
for vec in nodeReductions
total_len += length(vec)
@ -83,11 +77,7 @@ end
Insert the node fusions into the graph and the nodes' caches. Employs multithreading for speedup.
"""
function nf_insertion!(
graph::DAG,
operations::PossibleOperations,
nodeFusions::Vector{Vector{NodeFusion}},
)
function nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}})
total_len = 0
for vec in nodeFusions
total_len += length(vec)
@ -122,10 +112,7 @@ end
Insert the node splits into the graph and the nodes' caches. Employs multithreading for speedup.
"""
function ns_insertion!(
operations::PossibleOperations,
nodeSplits::Vector{Vector{NodeSplit}},
)
function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}})
total_len = 0
for vec in nodeSplits
total_len += length(vec)
@ -231,16 +218,12 @@ function generate_operations(graph::DAG)
continue
end
push!(
generatedFusions[threadid()],
NodeFusion((child_node, node, parent_node)),
)
push!(generatedFusions[threadid()], NodeFusion((child_node, node, parent_node)))
end
end
# launch thread for node fusion insertion
nf_task =
@task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
schedule(nf_task)
# find possible node splits

View File

@ -4,9 +4,7 @@
Return whether `operations` is empty, i.e. all of its fields are empty.
"""
function isempty(operations::PossibleOperations)
return isempty(operations.nodeFusions) &&
isempty(operations.nodeReductions) &&
isempty(operations.nodeSplits)
return isempty(operations.nodeFusions) && isempty(operations.nodeReductions) && isempty(operations.nodeSplits)
end
"""
@ -63,9 +61,7 @@ function can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
return false
end
if length(n2.parents) != 1 ||
length(n2.children) != 1 ||
length(n1.parents) != 1
if length(n2.parents) != 1 || length(n2.children) != 1 || length(n1.parents) != 1
return false
end

View File

@ -9,24 +9,12 @@ Assert for a gven node fusion input whether the nodes can be fused. For the requ
Intended for use with `@assert` or `@test`.
"""
function is_valid_node_fusion_input(
graph::DAG,
n1::ComputeTaskNode,
n2::DataTaskNode,
n3::ComputeTaskNode,
)
function is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
if !(n1 in graph) || !(n2 in graph) || !(n3 in graph)
throw(
AssertionError(
"[Node Fusion] The given nodes are not part of the given graph",
),
)
throw(AssertionError("[Node Fusion] The given nodes are not part of the given graph"))
end
if !is_child(n1, n2) ||
!is_child(n2, n3) ||
!is_parent(n3, n2) ||
!is_parent(n2, n1)
if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1)
throw(
AssertionError(
"[Node Fusion] The given nodes are not connected by edges which is required for node fusion",
@ -35,27 +23,19 @@ function is_valid_node_fusion_input(
end
if length(n2.parents) > 1
throw(
AssertionError(
"[Node Fusion] The given data node has more than one parent",
),
)
throw(AssertionError("[Node Fusion] The given data node has more than one parent"))
end
if length(n2.children) > 1
throw(
AssertionError(
"[Node Fusion] The given data node has more than one child",
),
)
throw(AssertionError("[Node Fusion] The given data node has more than one child"))
end
if length(n1.parents) > 1
throw(
AssertionError(
"[Node Fusion] The given n1 has more than one parent",
),
)
throw(AssertionError("[Node Fusion] The given n1 has more than one parent"))
end
@assert is_valid(graph, n1)
@assert is_valid(graph, n2)
@assert is_valid(graph, n3)
return true
end
@ -69,22 +49,21 @@ Intended for use with `@assert` or `@test`.
function is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node})
for n in nodes
if n graph
throw(
AssertionError(
"[Node Reduction] The given nodes are not part of the given graph",
),
)
throw(AssertionError("[Node Reduction] The given nodes are not part of the given graph"))
end
@assert is_valid(graph, n)
end
t = typeof(nodes[1].task)
for n in nodes
if typeof(n.task) != t
throw(
AssertionError(
"[Node Reduction] The given nodes are not of the same type",
),
)
throw(AssertionError("[Node Reduction] The given nodes are not of the same type"))
end
if (typeof(n) <: DataTaskNode)
if (n.name != nodes[1].name)
throw(AssertionError("[Node Reduction] The given nodes do not have the same name"))
end
end
end
@ -111,11 +90,7 @@ Intended for use with `@assert` or `@test`.
"""
function is_valid_node_split_input(graph::DAG, n1::Node)
if n1 graph
throw(
AssertionError(
"[Node Split] The given node is not part of the given graph",
),
)
throw(AssertionError("[Node Split] The given node is not part of the given graph"))
end
if length(n1.parents) <= 1
@ -126,6 +101,8 @@ function is_valid_node_split_input(graph::DAG, n1::Node)
)
end
@assert is_valid(graph, n1)
return true
end
@ -163,12 +140,7 @@ Assert for a given [`NodeFusion`](@ref) whether it is a valid operation in the g
Intended for use with `@assert` or `@test`.
"""
function is_valid(graph::DAG, nf::NodeFusion)
@assert is_valid_node_fusion_input(
graph,
nf.input[1],
nf.input[2],
nf.input[3],
)
@assert is_valid_node_fusion_input(graph, nf.input[1], nf.input[2], nf.input[3])
@assert nf in graph.possibleOperations.nodeFusions "NodeFusion is not part of the graph's possible operations!"
return true
end

View File

@ -11,8 +11,7 @@ function -(prop1::GraphProperties, prop2::GraphProperties)
computeIntensity = if (prop1.data - prop2.data == 0)
0.0
else
(prop1.computeEffort - prop2.computeEffort) /
(prop1.data - prop2.data)
(prop1.computeEffort - prop2.computeEffort) / (prop1.data - prop2.data)
end,
cost = prop1.cost - prop2.cost,
noNodes = prop1.noNodes - prop2.noNodes,
@ -33,8 +32,7 @@ function +(prop1::GraphProperties, prop2::GraphProperties)
computeIntensity = if (prop1.data + prop2.data == 0)
0.0
else
(prop1.computeEffort + prop2.computeEffort) /
(prop1.data + prop2.data)
(prop1.computeEffort + prop2.computeEffort) / (prop1.data + prop2.data)
end,
cost = prop1.cost + prop2.cost,
noNodes = prop1.noNodes + prop2.noNodes,

50
src/scheduler/greedy.jl Normal file
View File

@ -0,0 +1,50 @@
"""
GreedyScheduler
A greedy implementation of a scheduler, creating a topological ordering of nodes and naively balancing them onto the different devices.
"""
struct GreedyScheduler end
function schedule_dag(::GreedyScheduler, graph::DAG, machine::Machine)
nodeQueue = PriorityQueue{Node, Int}()
# use a priority equal to the number of unseen children -> 0 are nodes that can be added
for node in get_entry_nodes(graph)
enqueue!(nodeQueue, node => 0)
end
schedule = Vector{Node}()
sizehint!(schedule, length(graph.nodes))
# keep an accumulated cost of things scheduled to this device so far
deviceAccCost = PriorityQueue{AbstractDevice, Int}()
for device in machine.devices
enqueue!(deviceAccCost, device => 0)
end
node = nothing
while !isempty(nodeQueue)
@assert peek(nodeQueue)[2] == 0
node = dequeue!(nodeQueue)
# assign the device with lowest accumulated cost to the node (if it's a compute node)
if (isa(node, ComputeTaskNode))
lowestDevice = peek(deviceAccCost)[1]
node.device = lowestDevice
deviceAccCost[lowestDevice] = compute_effort(node.task)
end
push!(schedule, node)
for parent in node.parents
# reduce the priority of all parents by one
if (!haskey(nodeQueue, parent))
enqueue!(nodeQueue, parent => length(parent.children) - 1)
else
nodeQueue[parent] = nodeQueue[parent] - 1
end
end
end
return schedule
end

View File

@ -0,0 +1,18 @@
"""
Scheduler
Abstract base type for scheduler implementations. The scheduler is used to assign each node to a device and create a topological ordering of tasks.
"""
abstract type Scheduler end
"""
schedule_dag(::Scheduler, ::DAG, ::Machine)
Interface functions that must be implemented for implementations of [`Scheduler`](@ref).
The function assigns each [`ComputeTaskNode`](@ref) of the [`DAG`](@ref) to one of the devices in the given [`Machine`](@ref) and returns a `Vector{Node}` representing a topological ordering.
[`DataTaskNode`](@ref)s are not scheduled to devices since they do not compute. Instead, a data node transfers data from the [`AbstractDevice`](@ref) of their child to all [`AbstractDevice`](@ref)s of its parents.
"""
function schedule_dag end

89
src/task/compute.jl Normal file
View File

@ -0,0 +1,89 @@
"""
compute(t::FusedComputeTask, data)
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
"""
function compute(t::FusedComputeTask, data)
@assert false "This is not implemented and should never be called"
end
"""
get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector{String}, outExpr::String)
Generate code evaluating a [`FusedComputeTask`](@ref) on `inExprs`, providing the output on `outExpr`.
`inExprs` should be of the correct types and may be heterogeneous. `outExpr` will be of the type of the output of `T2` of t.
"""
function get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector, outExpr)
inExprs1 = Vector()
for sym in t.t1_inputs
push!(inExprs1, gen_access_expr(device, sym))
end
outExpr1 = gen_access_expr(device, t.t1_output)
inExprs2 = Vector()
for sym in t.t2_inputs
push!(inExprs2, gen_access_expr(device, sym))
end
expr1 = get_expression(t.first_task, device, inExprs1, outExpr1)
expr2 = get_expression(t.second_task, device, [inExprs2..., outExpr1], outExpr)
full_expr = Expr(:block, expr1, expr2)
return full_expr
end
"""
get_expression(node::ComputeTaskNode)
Generate and return code for a given [`ComputeTaskNode`](@ref).
"""
function get_expression(node::ComputeTaskNode)
@assert length(node.children) <= children(node.task) "Node $(node) has too many children for its task: node has $(length(node.children)) versus task has $(children(node.task))\nNode's children: $(getfield.(node.children, :children))"
@assert !ismissing(node.device) "Trying to get expression for an unscheduled ComputeTaskNode\nNode: $(node)"
inExprs = Vector()
for id in getfield.(node.children, :id)
push!(inExprs, gen_access_expr(node.device, Symbol(to_var_name(id))))
end
outExpr = gen_access_expr(node.device, Symbol(to_var_name(node.id)))
return get_expression(node.task, node.device, inExprs, outExpr)
end
"""
get_expression(node::DataTaskNode)
Generate and return code for a given [`DataTaskNode`](@ref).
"""
function get_expression(node::DataTaskNode)
@assert length(node.children) == 1 "Trying to call get_expression on a data task node that has $(length(node.children)) children instead of 1"
# TODO: dispatch to device implementations generating the copy commands
child = node.children[1]
inExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(child.id))))
outExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(node.id))))
dataTransportExp = Meta.parse("$outExpr = $inExpr")
return dataTransportExp
end
"""
get_init_expression(node::DataTaskNode, device::AbstractDevice)
Generate and return code for the initial input reading expression for [`DataTaskNode`](@ref)s with 0 children, i.e., entry nodes.
See also: [`get_entry_nodes`](@ref)
"""
function get_init_expression(node::DataTaskNode, device::AbstractDevice)
@assert isempty(node.children) "Trying to call get_init_expression on a data task node that is not an entry node."
inExpr = eval(gen_access_expr(device, Symbol("$(to_var_name(node.id))_in")))
outExpr = eval(gen_access_expr(device, Symbol(to_var_name(node.id))))
dataTransportExp = Meta.parse("$outExpr = $inExpr")
return dataTransportExp
end

View File

@ -3,8 +3,7 @@
Fallback implementation of the copy of an abstract data task, throwing an error.
"""
copy(t::AbstractDataTask) =
error("Need to implement copying for your data tasks!")
copy(t::AbstractDataTask) = error("Need to implement copying for your data tasks!")
"""
copy(t::AbstractComputeTask)
@ -12,3 +11,21 @@ copy(t::AbstractDataTask) =
Return a copy of the given compute task.
"""
copy(t::AbstractComputeTask) = typeof(t)()
"""
copy(t::FusedComputeTask)
Return a copy of th egiven [`FusedComputeTask`](@ref).
"""
function copy(t::FusedComputeTask{T1, T2}) where {T1, T2}
return FusedComputeTask{T1, T2}(
copy(t.first_task),
copy(t.second_task),
copy(t.t1_inputs),
t.t1_output,
copy(t.t2_inputs),
)
end
FusedComputeTask{T1, T2}(t1_inputs::Vector{String}, t1_output::String, t2_inputs::Vector{String}) where {T1, T2} =
FusedComputeTask{T1, T2}(T1(), T2(), t1_inputs, t1_output, t2_inputs)

View File

@ -4,6 +4,5 @@
Print a string representation of the fused compute task to io.
"""
function show(io::IO, t::FusedComputeTask)
(T1, T2) = get_types(t)
return print(io, "ComputeFuse(", T1(), ", ", T2(), ")")
return print(io, "ComputeFuse($(t.first_task), $(t.second_task))")
end

View File

@ -71,8 +71,7 @@ data(t::AbstractComputeTask) = 0
Return the compute effort of a fused compute task.
"""
function compute_effort(t::FusedComputeTask)
(T1, T2) = collect(typeof(t).parameters)
return compute_effort(T1()) + compute_effort(T2())
return compute_effort(t.first_task) + compute_effort(t.second_task)
end
"""
@ -81,30 +80,3 @@ end
Return a tuple of a the fused compute task's components' types.
"""
get_types(::FusedComputeTask{T1, T2}) where {T1, T2} = (T1, T2)
"""
get_expression(t::AbstractTask)
Return an expression evaluating the given task on the :dataIn symbol
"""
function get_expression(t::AbstractTask)
return quote
dataOut = compute($t, dataIn)
end
end
"""
get_expression()
"""
function get_expression(
t::FusedComputeTask,
inSymbol::Symbol,
outSymbol::Symbol,
)
#TODO
computeExp = quote
$outSymbol = compute($t, $inSymbol)
end
return computeExp
end

View File

@ -26,5 +26,13 @@ A fused compute task made up of the computation of first `T1` and then `T2`.
Also see: [`get_types`](@ref).
"""
struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <:
AbstractComputeTask end
struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <: AbstractComputeTask
first_task::T1
second_task::T2
# the names of the inputs for T1
t1_inputs::Vector{Symbol}
# output name of T1
t1_output::Symbol
# t2_inputs doesn't include the output of t1, that's implicit
t2_inputs::Vector{Symbol}
end

View File

@ -87,3 +87,19 @@ Return the memory footprint of the node in Byte. Used in [`mem(graph::DAG)`](@re
function mem(node::Node)
return Base.summarysize(node, exclude = Union{Node, Operation})
end
"""
unroll_symbol_vector(vec::Vector{Symbol})
Return the given vector as single String without quotation marks or brackets.
"""
function unroll_symbol_vector(vec::Vector)
result = ""
for s in vec
if (result != "")
result *= ", "
end
result *= "$s"
end
return result
end