Full node fix

This commit is contained in:
Anton Reinhard 2024-03-07 16:36:15 +01:00
parent 1d957bc128
commit 371467c2bc
4 changed files with 2361 additions and 1344 deletions

View File

@ -12,7 +12,7 @@ using Base.Threads
function log(x...)
println(now(), " ", join(x, " ")...)
#flush(stdout)
flush(stdout)
return nothing
end
@ -29,6 +29,7 @@ df = DataFrame(
rate = Float64[],
cpu_chunks = Float64[],
gpu_chunks = Float64[],
memory_est = Float64[],
)
# if they exist, read existing results and append new ones
@ -36,7 +37,7 @@ if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
nInputs = 16_777_216 # 2^30
nInputs = 2^26
lck = ReentrantLock()
@ -109,7 +110,8 @@ function gpu_worker(kernel!, inputs, chunk_size)
cuInputs = CuVector(inputs[work_start:work_end])
ts = 32
bs = Int(chunk_size / 32)
CUDA.@sync threads = ts blocks = bs always_inline = true kernel!(cuInputs, cuOutputs, chunk_size)
@cuda threads = ts blocks = bs always_inline = true kernel!(cuInputs, cuOutputs, chunk_size)
CUDA.device_synchronize()
end
#log("GPU Worker on Device $(CUDA.device()) finished!")
@ -156,7 +158,7 @@ function bench(compute_function, kernel!, inputs, chunk_size)
bench = @benchmark begin
full_compute($compute_function, $kernel!, $inputs, $chunk_size)
end gcsample = true seconds = 30
end gcsample = true seconds = 60
time = median(bench.times) / 1e9
s = std(bench.times) / 1e9
@ -164,10 +166,11 @@ function bench(compute_function, kernel!, inputs, chunk_size)
med_cpu_chunks = median(getindex.(cpu_gpu_ratio, 1))
med_gpu_chunks = median(getindex.(cpu_gpu_ratio, 2))
mem_estimate = bench.memory
log("CPU/GPU ratios: $(cpu_gpu_ratio)")
return (time, rate, s, med_cpu_chunks, med_gpu_chunks)
return (time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate)
end
function full_node_bench(process::MetagraphOptimization.AbstractProcessDescription, func, kernel!, chunk_size, inputs)
@ -178,7 +181,7 @@ function full_node_bench(process::MetagraphOptimization.AbstractProcessDescripti
display.(CUDA.devices())
log("Benchmarking full node...")
(time, rate, s, med_cpu_chunks, med_gpu_chunks) = bench(func, kernel!, inputs, chunk_size)
(time, rate, s, med_cpu_chunks, med_gpu_chunks, mem_estimate) = bench(func, kernel!, inputs, chunk_size)
log(
"Benchmarking complete with median time $(time), $(med_cpu_chunks) cpu chunks, and $(med_gpu_chunks) gpu chunks.",
)
@ -196,6 +199,7 @@ function full_node_bench(process::MetagraphOptimization.AbstractProcessDescripti
:rate => rate,
:cpu_chunks => med_cpu_chunks,
:gpu_chunks => med_gpu_chunks,
:memory_est => mem_estimate,
),
)

View File

@ -235,7 +235,7 @@ bench_process(process, "warmup", graph, compute_func, kernel!, gen_time, opt_tim
optimizer = ReductionOptimizer()
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke", "ke->kkkkkke"]
processes = ["ke->ke", "ke->kke", "ke->kkke", "ke->kkkke", "ke->kkkkke"]
for process_str in processes
# compton
@ -253,7 +253,7 @@ for process_str in processes
CSV.write(results_filename, df)
end
processes = ["AB->AB", "AB->ABBB", "AB->ABBBBB"]
processes = ["AB->AB", "AB->ABBB", "AB->ABBBBB", "AB->ABBBBBBB"]
for process_str in processes
# AB->AB

View File

@ -15,11 +15,11 @@ nvidia-smi > results/cuda_gpu_full_node.txt
lsblk > results/storage_full_node.txt
lspci > results/pci_full_node.txt
echo "Initiating julia..."
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
#echo "Initiating julia..."
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' >> $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
#julia --threads=8 --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/AntonReinhard/QEDbase.jl/tree/fix_bs_multiplication")' >> $LOG_FILE 2>&1 || exit 1 # need a specific fix for abs*bs multiplication for gpu
#julia --threads=8 -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("CUDA"); Pkg.add("Random"); Pkg.add("BenchmarkTools"); Pkg.add("Dates")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
#julia --project -e 'using CUDA; CUDA.set_runtime_version!(VersionNumber("12.1"))' >> $LOG_FILE 2>&1 || echo "Failed to set CUDA version number"
echo "Benchmarking Full Node 128 Threads + *GPUs*"
julia --project -O3 --threads=128 examples/full_node_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"

File diff suppressed because one or more lines are too long