experiments #1
@ -12,7 +12,7 @@ using Base.Threads
|
||||
|
||||
function log(x...)
|
||||
println(now(), " ", join(x, " ")...)
|
||||
#flush(stdout)
|
||||
flush(stdout)
|
||||
return nothing
|
||||
end
|
||||
|
||||
@ -29,6 +29,7 @@ df = DataFrame(
|
||||
rate = Float64[],
|
||||
cpu_chunks = Float64[],
|
||||
gpu_chunks = Float64[],
|
||||
memory_est = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
@ -36,7 +37,7 @@ if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
nInputs = 16_777_216 # 2^30
|
||||
nInputs = 2^26
|
||||
|
||||
lck = ReentrantLock()
|
||||
|
||||
@ -109,7 +110,8 @@ function gpu_worker(kernel!, inputs, chunk_size)
|
||||
cuInputs = CuVector(inputs[work_start:work_end])
|
||||
ts = 32
|
||||
bs = Int(chunk_size / 32)
|
||||
CUDA.@sync threads = ts blocks = bs always_inline = true kernel!(cuInputs, cuOutputs, chunk_size)
|
||||
@cuda threads = ts blocks = bs always_inline = true kernel!(cuInputs, cuOutputs, chunk_size)
|
||||
CUDA.device_synchronize()
|
||||
end
|
||||
|
||||
#log("GPU Worker on Device $(CUDA.device()) finished!")
|
||||
@ -156,7 +158,7 @@ function bench(compute_function, kernel!, inputs, chunk_size)
|
||||
|
||||
bench = @benchmark begin
|
||||
full_compute($compute_function, $kernel!, $inputs, $chunk_size)
|
||||
end gcsample = true seconds = 30
|
||||
end gcsample = true seconds = 60
|
||||
|
||||
time = median(bench.times) / 1e9
|
||||
s = std(bench.times) / 1e9
|
||||
@ -164,10 +166,11 @@ function bench(compute_function, kernel!, inputs, chunk_size)
|
||||
|
||||
med_cpu_chunks = median(getindex.(cpu_gpu_ratio, 1))
|
||||
med_gpu_chunks = median(getindex.(cpu_gpu_ratio, 2))
|
||||
mem_estimate = bench.memory
|
||||
|
||||
log("CPU/GPU ratios: $(cpu_gpu_ratio)")
|
||||
|
||||
return (time, rate, s, med_cpu_chunks, med_gpu_chunks)
|
||||
return (time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate)
|
||||
end
|
||||
|
||||
function full_node_bench(process::MetagraphOptimization.AbstractProcessDescription, func, kernel!, chunk_size, inputs)
|
||||
@ -178,7 +181,7 @@ function full_node_bench(process::MetagraphOptimization.AbstractProcessDescripti
|
||||
display.(CUDA.devices())
|
||||
|
||||
log("Benchmarking full node...")
|
||||
(time, rate, s, med_cpu_chunks, med_gpu_chunks) = bench(func, kernel!, inputs, chunk_size)
|
||||
(time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate) = bench(func, kernel!, inputs, chunk_size)
|
||||
log(
|
||||
"Benchmarking complete with median time $(time), $(med_cpu_chunks) cpu chunks, and $(med_gpu_chunks) gpu chunks.",
|
||||
)
|
||||
@ -196,6 +199,7 @@ function full_node_bench(process::MetagraphOptimization.AbstractProcessDescripti
|
||||
:rate => rate,
|
||||
:cpu_chunks => med_cpu_chunks,
|
||||
:gpu_chunks => med_gpu_chunks,
|
||||
:memory_est => mem_estimate,
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -235,7 +235,7 @@ bench_process(process, "warmup", graph, compute_func, kernel!, gen_time, opt_tim
|
||||
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke", "ke->kkkkkke"]
|
||||
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke"]
|
||||
|
||||
for process_str in processes
|
||||
# compton
|
||||
@ -253,7 +253,7 @@ for process_str in processes
|
||||
CSV.write(results_filename, df)
|
||||
end
|
||||
|
||||
processes = ["AB->AB", "AB->ABBB", "AB->ABBBBB"]
|
||||
processes = ["AB->AB", "AB->ABBB", "AB->ABBBBB", "AB->ABBBBBBB"]
|
||||
|
||||
for process_str in processes
|
||||
# AB->AB
|
||||
|
@ -15,11 +15,11 @@ nvidia-smi > results/cuda_gpu_full_node.txt
|
||||
lsblk > results/storage_full_node.txt
|
||||
lspci > results/pci_full_node.txt
|
||||
|
||||
echo "Initiating julia..."
|
||||
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
|
||||
#echo "Initiating julia..."
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
|
||||
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
|
||||
|
||||
echo "Benchmarking Full Node 128 Threads + *GPUs*"
|
||||
julia --project -O3 --threads=128 examples/full_node_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
||||
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user