WIP Adding machine/device info and caching strategies
This commit is contained in:
@@ -57,6 +57,9 @@ export ParticleValue
|
||||
export ParticleA, ParticleB, ParticleC
|
||||
export ABCProcessDescription, ABCProcessInput, ABCModel
|
||||
|
||||
export Machine
|
||||
export get_machine_info
|
||||
|
||||
export ==, in, show, isempty, delete!, length
|
||||
|
||||
export bytes_to_human_readable
|
||||
@@ -127,6 +130,18 @@ include("models/abc/properties.jl")
|
||||
include("models/abc/parse.jl")
|
||||
include("models/abc/print.jl")
|
||||
|
||||
include("devices/interface.jl")
|
||||
include("devices/measure.jl")
|
||||
include("devices/detect.jl")
|
||||
include("devices/impl.jl")
|
||||
|
||||
include("devices/numa/impl.jl")
|
||||
include("devices/cuda/impl.jl")
|
||||
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
|
||||
# include("devices/rocm/impl.jl")
|
||||
# oneapi seems also broken for now
|
||||
# include("devices/oneapi/impl.jl")
|
||||
|
||||
include("code_gen/main.jl")
|
||||
|
||||
end # module MetagraphOptimization
|
||||
|
@@ -25,7 +25,7 @@ function gen_code(graph::DAG)
|
||||
inputSyms[node.name] = Vector{Symbol}()
|
||||
end
|
||||
|
||||
push!(inputSyms[node.name], Symbol("data_$(to_var_name(node.id))_in"))
|
||||
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
|
||||
end
|
||||
|
||||
node = nothing
|
||||
@@ -45,7 +45,7 @@ function gen_code(graph::DAG)
|
||||
end
|
||||
|
||||
# node is now the last node we looked at -> the output node
|
||||
outSym = Symbol("data_$(to_var_name(node.id))")
|
||||
outSym = Symbol("$(to_var_name(node.id))")
|
||||
|
||||
return (code = Expr(:block, code...), inputSymbols = inputSyms, outputSymbol = outSym)
|
||||
end
|
||||
@@ -53,6 +53,7 @@ end
|
||||
function gen_input_assignment_code(
|
||||
inputSymbols::Dict{String, Vector{Symbol}},
|
||||
processDescription::AbstractProcessDescription,
|
||||
machine::Machine,
|
||||
processInputSymbol::Symbol = :input,
|
||||
)
|
||||
@assert length(inputSymbols) >=
|
||||
@@ -75,7 +76,11 @@ function gen_input_assignment_code(
|
||||
end
|
||||
|
||||
for symbol in symbols
|
||||
push!(assignInputs, Meta.parse("$(symbol) = ParticleValue($p, 1.0)"))
|
||||
# TODO generate correct access expression
|
||||
# TODO how to define cahce strategies?
|
||||
device = machine.devices[1]
|
||||
evalExpr = eval(gen_access_expr(device, default_strategy(device), symbol))
|
||||
push!(assignInputs, Meta.parse("$(evalExpr) = ParticleValue($p, 1.0)"))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -83,19 +88,23 @@ function gen_input_assignment_code(
|
||||
end
|
||||
|
||||
"""
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription)
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
|
||||
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
|
||||
"""
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription)
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph)
|
||||
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, :input)
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
|
||||
|
||||
function_id = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
# TODO generate correct access expression
|
||||
# TODO how to define cahce strategies?
|
||||
device = machine.devices[1]
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
func = eval(
|
||||
Meta.parse(
|
||||
"function compute_$(function_id)(input::AbstractProcessInput) $assignInputs; $code; return $outputSymbol; end",
|
||||
"function compute_$(functionId)(input::AbstractProcessInput) $assignInputs; $code; return $(eval(gen_access_expr(device, default_strategy(device), outputSymbol))); end",
|
||||
),
|
||||
)
|
||||
|
||||
@@ -103,7 +112,7 @@ function get_compute_function(graph::DAG, process::AbstractProcessDescription)
|
||||
end
|
||||
|
||||
"""
|
||||
execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
|
||||
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
|
||||
Execute the code of the given `graph` on the given input particles.
|
||||
|
||||
@@ -115,8 +124,8 @@ This is essentially shorthand for
|
||||
|
||||
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
|
||||
"""
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
|
||||
func = get_compute_function(graph, process)
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
func = get_compute_function(graph, process, machine)
|
||||
|
||||
result = 0
|
||||
try
|
||||
|
47
src/devices/cuda/impl.jl
Normal file
47
src/devices/cuda/impl.jl
Normal file
@@ -0,0 +1,47 @@
|
||||
using CUDA
|
||||
|
||||
mutable struct CUDAGPU <: AbstractGPU
|
||||
device::Any # TODO: what's the cuda device type?
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, CUDAGPU)
|
||||
|
||||
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::CUDAGPU) = LocalVariables()
|
||||
|
||||
function measure_device!(device::CUDAGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring CUDA GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
|
||||
|
||||
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !CUDA.functional()
|
||||
if verbose
|
||||
println("CUDA is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
CUDADevices = CUDA.devices()
|
||||
if verbose
|
||||
println("Found $(length(CUDADevices)) CUDA devices")
|
||||
end
|
||||
for device in CUDADevices
|
||||
push!(devices, CUDAGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
@@ -1,127 +1,23 @@
|
||||
using NumaAllocators
|
||||
using CUDA
|
||||
using ROCm
|
||||
using oneAPI
|
||||
|
||||
"""
|
||||
get_machine_info(verbose::Bool)
|
||||
|
||||
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
|
||||
"""
|
||||
function get_machine_info(verbose::Bool = Base.is_interactive())
|
||||
devices = Vector{Device}()
|
||||
function get_machine_info(; verbose::Bool = Base.is_interactive)
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
numaDevices = get_numa_devices(verbose)
|
||||
push!(devices, numaDevices)
|
||||
|
||||
cudaDevices = get_cuda_devices(verbose)
|
||||
push!(devices, cudaDevices)
|
||||
|
||||
rocmDevices = get_rocm_devices(verbose)
|
||||
push!(devices, rocmDevices)
|
||||
|
||||
oneapiDevices = get_oneapi_devices(verbose)
|
||||
push!(devices, oneapiDevices)
|
||||
for device in device_types()
|
||||
devs = get_devices(device, verbose = verbose)
|
||||
for dev in devs
|
||||
push!(devices, dev)
|
||||
end
|
||||
end
|
||||
|
||||
noDevices = length(devices)
|
||||
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
|
||||
|
||||
return Machine(devices, transferRates::Matrix{Float64}(-1, noDevices, noDevices))
|
||||
end
|
||||
|
||||
"""
|
||||
get_numa_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_numa_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
noNumaNodes = highest_numa_node()
|
||||
|
||||
if (verbose)
|
||||
println("Found $(noNumaNodes + 1) NUMA nodes")
|
||||
end
|
||||
for i in 0:noNumaNodes
|
||||
push!(devices, NumaNode(i, 1, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
get_cuda_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_cuda_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
|
||||
if !CUDA.functional()
|
||||
if verbose
|
||||
println("CUDA is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
CUDADevices = CUDA.devices()
|
||||
if verbose
|
||||
println("Found $(length(CUDADevices)) CUDA devices")
|
||||
end
|
||||
for device in CUDADevices
|
||||
push!(devices, CUDAGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
get_rocm_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_rocm_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
|
||||
if !AMDGPU.functional()
|
||||
if verbose
|
||||
println("AMDGPU is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
AMDDevices = AMDGPU.devices()
|
||||
if verbose
|
||||
println("Found $(length(AMDDevices)) AMD devices")
|
||||
end
|
||||
for device in AMDDevices
|
||||
push!(devices, ROCmGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
get_oneapi_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_oneapi_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
|
||||
if !oneAPI.functional()
|
||||
if verbose
|
||||
println("oneAPI is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
oneAPIDevices = oneAPI.devices()
|
||||
if verbose
|
||||
println("Found $(length(oneAPIDevices)) oneAPI devices")
|
||||
end
|
||||
for device in oneAPIDevices
|
||||
push!(devices, oneAPIGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
|
||||
fill!(transferRates, -1)
|
||||
return Machine(devices, transferRates)
|
||||
end
|
||||
|
25
src/devices/impl.jl
Normal file
25
src/devices/impl.jl
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
"""
|
||||
device_types()
|
||||
|
||||
Return a vector of available and implemented device types.
|
||||
|
||||
See also: [`DEVICE_TYPES`](@ref)
|
||||
"""
|
||||
function device_types()
|
||||
return DEVICE_TYPES
|
||||
end
|
||||
|
||||
"""
|
||||
strategies(t::Type{T}) where {T <: AbstractDevice}
|
||||
|
||||
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstarctDevice`](@ref).
|
||||
The caching strategies are used in code generation.
|
||||
"""
|
||||
function strategies(t::Type{T}) where {T <: AbstractDevice}
|
||||
if !haskey(CACHE_STRATEGIES, t)
|
||||
error("Trying to get strategies for $T, but it has no strategies defined!")
|
||||
end
|
||||
|
||||
return CACHE_STRATEGIES[t]
|
||||
end
|
87
src/devices/interface.jl
Normal file
87
src/devices/interface.jl
Normal file
@@ -0,0 +1,87 @@
|
||||
|
||||
abstract type AbstractDevice end
|
||||
|
||||
abstract type AbstractCPU <: AbstractDevice end
|
||||
|
||||
abstract type AbstractGPU <: AbstractDevice end
|
||||
|
||||
"""
|
||||
Machine
|
||||
|
||||
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
|
||||
|
||||
See also: [`Scheduler`](@ref)
|
||||
"""
|
||||
struct Machine
|
||||
devices::Vector{AbstractDevice}
|
||||
|
||||
transferRates::Matrix{Float64}
|
||||
end
|
||||
|
||||
"""
|
||||
CacheStrategy
|
||||
|
||||
Abstract base type for caching strategies.
|
||||
|
||||
See also: [`strategies`](@ref)
|
||||
"""
|
||||
abstract type CacheStrategy end
|
||||
|
||||
struct LocalVariables <: CacheStrategy end
|
||||
struct Dictionary <: CacheStrategy end
|
||||
|
||||
"""
|
||||
DEVICE_TYPES::Vector{Type}
|
||||
|
||||
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
|
||||
|
||||
See also: [`device_types`](@ref), [`get_devices`](@ref)
|
||||
"""
|
||||
DEVICE_TYPES = Vector{Type}()
|
||||
|
||||
"""
|
||||
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
|
||||
|
||||
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
|
||||
|
||||
See also: [`strategies`](@ref)
|
||||
"""
|
||||
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
|
||||
|
||||
"""
|
||||
default_strategy(device::AbstractDevice)
|
||||
|
||||
Interface function that must be implmented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device.
|
||||
"""
|
||||
function default_strategy end
|
||||
|
||||
"""
|
||||
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
|
||||
"""
|
||||
function get_devices end
|
||||
|
||||
"""
|
||||
measure_device!(device::AbstractDevice; verbose::Bool)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
|
||||
"""
|
||||
function measure_device! end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(device::AbstractDevice, strategy::CacheStrategy)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an [`Expr`](@ref) initializing this device's variable cache.
|
||||
|
||||
The strategy is a symbol
|
||||
"""
|
||||
function gen_cache_init_code end
|
||||
|
||||
"""
|
||||
gen_access_expr(device::AbstractDevice, strategy::CacheStrategy, symbol::Symbol)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
|
||||
Return an `Expr` accessing the variable identified by [`symbol`].
|
||||
"""
|
||||
function gen_access_expr end
|
@@ -17,34 +17,6 @@ end
|
||||
Measure the transfer rates between devices in the machine.
|
||||
"""
|
||||
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::NumaNode; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring Numa Node $(device.numaId)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::CUDAGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring CUDA GPU $(device.device)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::ROCmGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring ROCm GPU $(device.device)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::oneAPIGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring oneAPI GPU $(device.device)")
|
||||
end
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
61
src/devices/numa/impl.jl
Normal file
61
src/devices/numa/impl.jl
Normal file
@@ -0,0 +1,61 @@
|
||||
using NumaAllocators
|
||||
|
||||
mutable struct NumaNode <: AbstractCPU
|
||||
numaId::UInt16
|
||||
threads::UInt16
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, NumaNode)
|
||||
|
||||
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
|
||||
|
||||
default_strategy(::NumaNode) = LocalVariables()
|
||||
|
||||
function measure_device!(device::NumaNode; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring Numa Node $(device.numaId)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
|
||||
|
||||
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
|
||||
devices = Vector{AbstractDevice}()
|
||||
noNumaNodes = highest_numa_node()
|
||||
|
||||
if (verbose)
|
||||
println("Found $(noNumaNodes + 1) NUMA nodes")
|
||||
end
|
||||
for i in 0:noNumaNodes
|
||||
push!(devices, NumaNode(i, 1, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(device::NumaNode, strategy::LocalVariables)
|
||||
|
||||
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
|
||||
"""
|
||||
function gen_cache_init_code(::NumaNode, ::LocalVariables)
|
||||
# don't need to initialize anything
|
||||
return Expr()
|
||||
end
|
||||
|
||||
"""
|
||||
gen_access_expr(device::NumaNode, strategy::LocalVariables, symbol::Symbol)
|
||||
|
||||
Generate code to access the variable designated by `symbol` using the [`LocalVariables`](@ref) [`CacheStrategy`](@ref) on a [`NumaNode`](@ref).
|
||||
"""
|
||||
function gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol)
|
||||
s = Symbol("data_$symbol")
|
||||
return Meta.parse(":($s)")
|
||||
end
|
47
src/devices/oneapi/impl.jl
Normal file
47
src/devices/oneapi/impl.jl
Normal file
@@ -0,0 +1,47 @@
|
||||
using oneAPI
|
||||
|
||||
mutable struct oneAPIGPU <: AbstractGPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, oneAPIGPU)
|
||||
|
||||
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::oneAPIGPU) = LocalVariables()
|
||||
|
||||
function measure_device!(device::oneAPIGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring oneAPI GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
|
||||
|
||||
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !oneAPI.functional()
|
||||
if verbose
|
||||
println("oneAPI is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
oneAPIDevices = oneAPI.devices()
|
||||
if verbose
|
||||
println("Found $(length(oneAPIDevices)) oneAPI devices")
|
||||
end
|
||||
for device in oneAPIDevices
|
||||
push!(devices, oneAPIGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
47
src/devices/rocm/impl.jl
Normal file
47
src/devices/rocm/impl.jl
Normal file
@@ -0,0 +1,47 @@
|
||||
using AMDGPU
|
||||
|
||||
mutable struct ROCmGPU <: AbstractGPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, ROCmGPU)
|
||||
|
||||
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::ROCmGPU) = LocalVariables()
|
||||
|
||||
function measure_device!(device::ROCmGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring ROCm GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
|
||||
|
||||
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !AMDGPU.functional()
|
||||
if verbose
|
||||
println("AMDGPU is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
AMDDevices = AMDGPU.devices()
|
||||
if verbose
|
||||
println("Found $(length(AMDDevices)) AMD devices")
|
||||
end
|
||||
for device in AMDDevices
|
||||
push!(devices, ROCmGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
@@ -1,40 +0,0 @@
|
||||
|
||||
abstract type Device end
|
||||
|
||||
abstract type CPU <: Device end
|
||||
|
||||
mutable struct NumaNode <: CPU
|
||||
numaId::UInt16
|
||||
threads::UInt16
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
abstract type GPU <: Device end
|
||||
|
||||
mutable struct CUDAGPU <: GPU
|
||||
device::Any # TODO: what's the cuda device type?
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
mutable struct ROCmGPU <: GPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
mutable struct oneAPIGPU <: GPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
"""
|
||||
Machine
|
||||
|
||||
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
|
||||
|
||||
See also: [`Scheduler`](@ref)
|
||||
"""
|
||||
struct Machine
|
||||
devices::Vector{Device}
|
||||
|
||||
transferRates::Matrix{Float64}
|
||||
end
|
Reference in New Issue
Block a user