WIP Adding machine/device info and caching strategies

This commit is contained in:
2023-09-29 18:02:57 +02:00
parent afb6af44ca
commit 37d645cb4e
14 changed files with 469 additions and 293 deletions

View File

@@ -57,6 +57,9 @@ export ParticleValue
export ParticleA, ParticleB, ParticleC
export ABCProcessDescription, ABCProcessInput, ABCModel
export Machine
export get_machine_info
export ==, in, show, isempty, delete!, length
export bytes_to_human_readable
@@ -127,6 +130,18 @@ include("models/abc/properties.jl")
include("models/abc/parse.jl")
include("models/abc/print.jl")
include("devices/interface.jl")
include("devices/measure.jl")
include("devices/detect.jl")
include("devices/impl.jl")
include("devices/numa/impl.jl")
include("devices/cuda/impl.jl")
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
# include("devices/rocm/impl.jl")
# oneapi seems also broken for now
# include("devices/oneapi/impl.jl")
include("code_gen/main.jl")
end # module MetagraphOptimization

View File

@@ -25,7 +25,7 @@ function gen_code(graph::DAG)
inputSyms[node.name] = Vector{Symbol}()
end
push!(inputSyms[node.name], Symbol("data_$(to_var_name(node.id))_in"))
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
end
node = nothing
@@ -45,7 +45,7 @@ function gen_code(graph::DAG)
end
# node is now the last node we looked at -> the output node
outSym = Symbol("data_$(to_var_name(node.id))")
outSym = Symbol("$(to_var_name(node.id))")
return (code = Expr(:block, code...), inputSymbols = inputSyms, outputSymbol = outSym)
end
@@ -53,6 +53,7 @@ end
function gen_input_assignment_code(
inputSymbols::Dict{String, Vector{Symbol}},
processDescription::AbstractProcessDescription,
machine::Machine,
processInputSymbol::Symbol = :input,
)
@assert length(inputSymbols) >=
@@ -75,7 +76,11 @@ function gen_input_assignment_code(
end
for symbol in symbols
push!(assignInputs, Meta.parse("$(symbol) = ParticleValue($p, 1.0)"))
# TODO generate correct access expression
# TODO how to define cahce strategies?
device = machine.devices[1]
evalExpr = eval(gen_access_expr(device, default_strategy(device), symbol))
push!(assignInputs, Meta.parse("$(evalExpr) = ParticleValue($p, 1.0)"))
end
end
@@ -83,19 +88,23 @@ function gen_input_assignment_code(
end
"""
get_compute_function(graph::DAG, process::AbstractProcessDescription)
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
"""
function get_compute_function(graph::DAG, process::AbstractProcessDescription)
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
(code, inputSymbols, outputSymbol) = gen_code(graph)
assignInputs = gen_input_assignment_code(inputSymbols, process, :input)
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
function_id = to_var_name(UUIDs.uuid1(rng[1]))
# TODO generate correct access expression
# TODO how to define cahce strategies?
device = machine.devices[1]
functionId = to_var_name(UUIDs.uuid1(rng[1]))
func = eval(
Meta.parse(
"function compute_$(function_id)(input::AbstractProcessInput) $assignInputs; $code; return $outputSymbol; end",
"function compute_$(functionId)(input::AbstractProcessInput) $assignInputs; $code; return $(eval(gen_access_expr(device, default_strategy(device), outputSymbol))); end",
),
)
@@ -103,7 +112,7 @@ function get_compute_function(graph::DAG, process::AbstractProcessDescription)
end
"""
execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
Execute the code of the given `graph` on the given input particles.
@@ -115,8 +124,8 @@ This is essentially shorthand for
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
"""
function execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
func = get_compute_function(graph, process)
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
func = get_compute_function(graph, process, machine)
result = 0
try

47
src/devices/cuda/impl.jl Normal file
View File

@@ -0,0 +1,47 @@
using CUDA
mutable struct CUDAGPU <: AbstractGPU
device::Any # TODO: what's the cuda device type?
FLOPS::Float64
end
push!(DEVICE_TYPES, CUDAGPU)
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
default_strategy(::CUDAGPU) = LocalVariables()
function measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
devices = Vector{AbstractDevice}()
if !CUDA.functional()
if verbose
println("CUDA is non-functional")
end
return devices
end
CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
for device in CUDADevices
push!(devices, CUDAGPU(device, -1))
end
return devices
end

View File

@@ -1,127 +1,23 @@
using NumaAllocators
using CUDA
using ROCm
using oneAPI
"""
get_machine_info(verbose::Bool)
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
"""
function get_machine_info(verbose::Bool = Base.is_interactive())
devices = Vector{Device}()
function get_machine_info(; verbose::Bool = Base.is_interactive)
devices = Vector{AbstractDevice}()
numaDevices = get_numa_devices(verbose)
push!(devices, numaDevices)
cudaDevices = get_cuda_devices(verbose)
push!(devices, cudaDevices)
rocmDevices = get_rocm_devices(verbose)
push!(devices, rocmDevices)
oneapiDevices = get_oneapi_devices(verbose)
push!(devices, oneapiDevices)
for device in device_types()
devs = get_devices(device, verbose = verbose)
for dev in devs
push!(devices, dev)
end
end
noDevices = length(devices)
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
return Machine(devices, transferRates::Matrix{Float64}(-1, noDevices, noDevices))
end
"""
get_numa_devices(verbose::Bool)
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_numa_devices(verbose::Bool)
devices = Vector{Device}()
noNumaNodes = highest_numa_node()
if (verbose)
println("Found $(noNumaNodes + 1) NUMA nodes")
end
for i in 0:noNumaNodes
push!(devices, NumaNode(i, 1, -1))
end
return devices
end
"""
get_cuda_devices(verbose::Bool)
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_cuda_devices(verbose::Bool)
devices = Vector{Device}()
if !CUDA.functional()
if verbose
println("CUDA is non-functional")
end
return devices
end
CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
for device in CUDADevices
push!(devices, CUDAGPU(device, -1))
end
return devices
end
"""
get_rocm_devices(verbose::Bool)
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_rocm_devices(verbose::Bool)
devices = Vector{Device}()
if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
return devices
end
AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
for device in AMDDevices
push!(devices, ROCmGPU(device, -1))
end
return devices
end
"""
get_oneapi_devices(verbose::Bool)
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_oneapi_devices(verbose::Bool)
devices = Vector{Device}()
if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
return devices
end
oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, -1))
end
return devices
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
fill!(transferRates, -1)
return Machine(devices, transferRates)
end

25
src/devices/impl.jl Normal file
View File

@@ -0,0 +1,25 @@
"""
device_types()
Return a vector of available and implemented device types.
See also: [`DEVICE_TYPES`](@ref)
"""
function device_types()
return DEVICE_TYPES
end
"""
strategies(t::Type{T}) where {T <: AbstractDevice}
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstarctDevice`](@ref).
The caching strategies are used in code generation.
"""
function strategies(t::Type{T}) where {T <: AbstractDevice}
if !haskey(CACHE_STRATEGIES, t)
error("Trying to get strategies for $T, but it has no strategies defined!")
end
return CACHE_STRATEGIES[t]
end

87
src/devices/interface.jl Normal file
View File

@@ -0,0 +1,87 @@
abstract type AbstractDevice end
abstract type AbstractCPU <: AbstractDevice end
abstract type AbstractGPU <: AbstractDevice end
"""
Machine
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
See also: [`Scheduler`](@ref)
"""
struct Machine
devices::Vector{AbstractDevice}
transferRates::Matrix{Float64}
end
"""
CacheStrategy
Abstract base type for caching strategies.
See also: [`strategies`](@ref)
"""
abstract type CacheStrategy end
struct LocalVariables <: CacheStrategy end
struct Dictionary <: CacheStrategy end
"""
DEVICE_TYPES::Vector{Type}
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
See also: [`device_types`](@ref), [`get_devices`](@ref)
"""
DEVICE_TYPES = Vector{Type}()
"""
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
See also: [`strategies`](@ref)
"""
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
"""
default_strategy(device::AbstractDevice)
Interface function that must be implmented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device.
"""
function default_strategy end
"""
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
"""
function get_devices end
"""
measure_device!(device::AbstractDevice; verbose::Bool)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
"""
function measure_device! end
"""
gen_cache_init_code(device::AbstractDevice, strategy::CacheStrategy)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an [`Expr`](@ref) initializing this device's variable cache.
The strategy is a symbol
"""
function gen_cache_init_code end
"""
gen_access_expr(device::AbstractDevice, strategy::CacheStrategy, symbol::Symbol)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
Return an `Expr` accessing the variable identified by [`symbol`].
"""
function gen_access_expr end

View File

@@ -17,34 +17,6 @@ end
Measure the transfer rates between devices in the machine.
"""
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
return nothing
end
function measure_device!(device::NumaNode; verbose::Bool)
if verbose
println("Measuring Numa Node $(device.numaId)")
end
return nothing
end
function measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
return nothing
end
function measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
return nothing
end
function measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
# TODO implement
return nothing
end

61
src/devices/numa/impl.jl Normal file
View File

@@ -0,0 +1,61 @@
using NumaAllocators
mutable struct NumaNode <: AbstractCPU
numaId::UInt16
threads::UInt16
FLOPS::Float64
end
push!(DEVICE_TYPES, NumaNode)
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
default_strategy(::NumaNode) = LocalVariables()
function measure_device!(device::NumaNode; verbose::Bool)
if verbose
println("Measuring Numa Node $(device.numaId)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
devices = Vector{AbstractDevice}()
noNumaNodes = highest_numa_node()
if (verbose)
println("Found $(noNumaNodes + 1) NUMA nodes")
end
for i in 0:noNumaNodes
push!(devices, NumaNode(i, 1, -1))
end
return devices
end
"""
gen_cache_init_code(device::NumaNode, strategy::LocalVariables)
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
"""
function gen_cache_init_code(::NumaNode, ::LocalVariables)
# don't need to initialize anything
return Expr()
end
"""
gen_access_expr(device::NumaNode, strategy::LocalVariables, symbol::Symbol)
Generate code to access the variable designated by `symbol` using the [`LocalVariables`](@ref) [`CacheStrategy`](@ref) on a [`NumaNode`](@ref).
"""
function gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol)
s = Symbol("data_$symbol")
return Meta.parse(":($s)")
end

View File

@@ -0,0 +1,47 @@
using oneAPI
mutable struct oneAPIGPU <: AbstractGPU
device::Any
FLOPS::Float64
end
push!(DEVICE_TYPES, oneAPIGPU)
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
default_strategy(::oneAPIGPU) = LocalVariables()
function measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
devices = Vector{AbstractDevice}()
if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
return devices
end
oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, -1))
end
return devices
end

47
src/devices/rocm/impl.jl Normal file
View File

@@ -0,0 +1,47 @@
using AMDGPU
mutable struct ROCmGPU <: AbstractGPU
device::Any
FLOPS::Float64
end
push!(DEVICE_TYPES, ROCmGPU)
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
default_strategy(::ROCmGPU) = LocalVariables()
function measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
devices = Vector{AbstractDevice}()
if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
return devices
end
AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
for device in AMDDevices
push!(devices, ROCmGPU(device, -1))
end
return devices
end

View File

@@ -1,40 +0,0 @@
abstract type Device end
abstract type CPU <: Device end
mutable struct NumaNode <: CPU
numaId::UInt16
threads::UInt16
FLOPS::Float64
end
abstract type GPU <: Device end
mutable struct CUDAGPU <: GPU
device::Any # TODO: what's the cuda device type?
FLOPS::Float64
end
mutable struct ROCmGPU <: GPU
device::Any
FLOPS::Float64
end
mutable struct oneAPIGPU <: GPU
device::Any
FLOPS::Float64
end
"""
Machine
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
See also: [`Scheduler`](@ref)
"""
struct Machine
devices::Vector{Device}
transferRates::Matrix{Float64}
end