WIP Adding machine/device info and caching strategies
This commit is contained in:
parent
afb6af44ca
commit
37d645cb4e
@ -15,7 +15,6 @@ QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
|
||||
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
|
||||
|
||||
[extras]
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
@ -5,13 +5,44 @@
|
||||
"execution_count": 1,
|
||||
"id": "20768e45-df62-4638-ba33-b0ccf239f1aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"┌ Error: Failed to initialize oneAPI\n",
|
||||
"│ exception = (oneAPI.oneL0.ZeError(oneAPI.oneL0.ZE_RESULT_ERROR_UNINITIALIZED), Union{Ptr{Nothing}, Base.InterpreterIP}[Ptr{Nothing} @0x00007f1860405f02, Ptr{Nothing} @0x00007f18604068dc, Ptr{Nothing} @0x00007f1860406900, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e7a7a4, Ptr{Nothing} @0x00007f1967e5e3bc, Ptr{Nothing} @0x00007f1953cecc65, Ptr{Nothing} @0x00007f19480c5c72, Ptr{Nothing} @0x00007f19480cb52c, Ptr{Nothing} @0x00007f19480c79a6, Ptr{Nothing} @0x00007f19480f67de, Ptr{Nothing} @0x00007f19480f6b4d, Ptr{Nothing} @0x00007f19480d2439, Ptr{Nothing} @0x00007f19480d40e4, Ptr{Nothing} @0x00007f19480d4286, Ptr{Nothing} @0x00007f19480d0c2a, Ptr{Nothing} @0x00007f19480d1216, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e7a338, Ptr{Nothing} @0x00007f1967e7c0e8, Ptr{Nothing} @0x00007f1967e7ba79, Ptr{Nothing} @0x00007f1967e7cdfa, Ptr{Nothing} @0x00007f19480f31ff, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e51f48, Ptr{Nothing} @0x00007f19480f1eb9, Ptr{Nothing} @0x00007f19480f2c78, Ptr{Nothing} @0x00007f19480f2f21, Ptr{Nothing} @0x00007f19480f2f56, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480e8bb1, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480ddd7c, Ptr{Nothing} @0x00007f19480dde82, Ptr{Nothing} @0x00007f19480ddead, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480d5001, Ptr{Nothing} @0x00007f1967e7b5db, Ptr{Nothing} @0x00007f1967e7ba79, Ptr{Nothing} @0x00007f1967e7cdfa, Ptr{Nothing} @0x00007f1953d26667, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1953b5abfd, Ptr{Nothing} @0x00007f19533d6e93, Ptr{Nothing} @0x00007f19533d6eaf, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1953d5c06e, Ptr{Nothing} @0x00007f1953786d4f, Ptr{Nothing} @0x00007f1953786e58, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967ea7a15, Ptr{Nothing} @0x00007f1967ea845e, Ptr{Nothing} @0x0000000000401088, Ptr{Nothing} @0x00007f1968c27ccf, Ptr{Nothing} @0x00007f1968c27d89, Ptr{Nothing} @0x00000000004010b8])\n",
|
||||
"└ @ oneAPI.oneL0 /home/antonr/.julia/packages/oneAPI/g8ZUm/lib/level-zero/oneL0.jl:103\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"using Revise\n",
|
||||
"using MetagraphOptimization\n",
|
||||
"using BenchmarkTools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ff5f4a49",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, -1.0)], [-1.0;;])"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get our machine's info\n",
|
||||
"machine = get_machine_info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@ -24,9 +55,8 @@
|
||||
"ABCModel()"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -46,9 +76,8 @@
|
||||
"ABC Process: 'AB->ABBB'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -59,17 +88,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Total: 280, ComputeTaskS2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
@ -81,17 +103,16 @@
|
||||
" Total Compute Intensity: 0.09822733918128655\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
": 24, DataTask: 143, \n",
|
||||
" ComputeTaskU: 6, ComputeTaskSum: 1, ComputeTaskS1: 36, \n",
|
||||
" ComputeTaskV: 64, ComputeTaskP: 6"
|
||||
"Total: 280, DataTask: 143, ComputeTaskV: 64, \n",
|
||||
" ComputeTaskS2: 24, ComputeTaskS1: 36, ComputeTaskP: 6, \n",
|
||||
" ComputeTaskSum: 1, ComputeTaskU: 6"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -102,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -110,19 +131,18 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Input for ABC Process: 'AB->ABBB':\n",
|
||||
" Input particles:\n",
|
||||
" A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n",
|
||||
" B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n",
|
||||
" Output Particles:\n",
|
||||
" A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n",
|
||||
" B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n",
|
||||
" B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n",
|
||||
" B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n"
|
||||
" 2 Incoming particles:\n",
|
||||
" B: [3.6884461652243594, 0.0, 0.0, 3.5503007075117288]\n",
|
||||
" A: [3.6884461652243594, 0.0, 0.0, -3.5503007075117288]\n",
|
||||
" 4 Outgoing Particles:\n",
|
||||
" B: [-2.1242475430909664, -0.8503576743556215, -0.7677870655372816, 1.4831798515305399]\n",
|
||||
" B: [-1.7067195416668486, 1.3493031523092422, -0.2241887179000338, 0.20496832887590027]\n",
|
||||
" B: [-2.276483767911827, 0.19358394881362065, 1.325763825770123, -1.5450740689711668]\n",
|
||||
" A: [-1.2694414777790763, -0.6925294267672414, -0.3337880423328074, -0.14307411143527338]\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -132,41 +152,39 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 9,
|
||||
"id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"compute_2f2a5dca_5e3a_11ee_3973_bfa5c42acee3 (generic function with 1 method)"
|
||||
"compute_2430665c_5ee0_11ee_1bd6_cf00927a1f40 (generic function with 1 method)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get the function computing the result of the process from a ProcessInput\n",
|
||||
"AB_AB3_compute = get_compute_function(graph, process)"
|
||||
"AB_AB3_compute = get_compute_function(graph, process, machine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"-1.8924431710735022e-13"
|
||||
"-1.0304553507713216e-11"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -176,7 +194,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 11,
|
||||
"id": "80c70010",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -189,18 +207,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 13,
|
||||
"id": "5b192b44",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n",
|
||||
"@assert result ≈ execute(graph, process, input_data)"
|
||||
"@assert result ≈ execute(graph, process, machine, input_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 15,
|
||||
"id": "9b2f4a3f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -208,71 +226,63 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1000-element Vector{Float64}:\n",
|
||||
" -2.1491995259940396e-11\n",
|
||||
" -1.04995646459455e-11\n",
|
||||
" 5.8217606911879465e-15\n",
|
||||
" -6.556969485683647e-14\n",
|
||||
" -1.3588086164373804e-14\n",
|
||||
" -1.8789662441593714e-13\n",
|
||||
" -2.1319733018358848e-13\n",
|
||||
" -5.335975907200479e-12\n",
|
||||
" -9.053914191490206e-13\n",
|
||||
" -5.611079017069238e-13\n",
|
||||
" -5.063492275603427e-11\n",
|
||||
" 2.9168508985810955e-15\n",
|
||||
" -1.6420151378194154e-13\n",
|
||||
" -2.1503212387420805e-13\n",
|
||||
" -3.515624153345333e-15\n",
|
||||
" -9.952937612602425e-14\n",
|
||||
" -9.153444987973305e-14\n",
|
||||
" -9.219799628850346e-12\n",
|
||||
" -1.0155811880772751e-13\n",
|
||||
" -2.278823085449527e-13\n",
|
||||
" -8.070463578419918e-14\n",
|
||||
" -1.838772437827446e-13\n",
|
||||
" -4.109239393253953e-12\n",
|
||||
" ⋮\n",
|
||||
" 1.0931677247833414e-13\n",
|
||||
" -7.704755306464438e-16\n",
|
||||
" -1.8385907037491413e-12\n",
|
||||
" -6.036215596560112e-14\n",
|
||||
" -9.988724014003618e-12\n",
|
||||
" 3.4861755637292744e-13\n",
|
||||
" -1.1051119822969222e-10\n",
|
||||
" -2.4965725132161986e-12\n",
|
||||
" -3.868242784720193e-11\n",
|
||||
" 7.904149696653533e-15\n",
|
||||
" -7.606811743178718e-11\n",
|
||||
" -5.100594937480289e-13"
|
||||
" -9.249672758783865e-13\n",
|
||||
" -1.1230011623854415e-11\n",
|
||||
" 3.0412717788587913e-12\n",
|
||||
" -7.618572966754196e-12\n",
|
||||
" -1.771698295445229e-14\n",
|
||||
" -3.128112746753292e-11\n",
|
||||
" -3.9791519593567145e-12\n",
|
||||
" -4.4850922847751944e-15\n",
|
||||
" 1.0995512644879932e-14"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now we can generate a function and use it on lots of inputs\n",
|
||||
"inputs = [gen_process_input(process) for _ in 1:1000]\n",
|
||||
"AB_AB3_reduced_compute = get_compute_function(graph, process)\n",
|
||||
"AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n",
|
||||
"\n",
|
||||
"results = AB_AB3_reduced_compute.(inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 16,
|
||||
"id": "d43e4ff0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"BenchmarkTools.Trial: 1453 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.596 ms\u001b[22m\u001b[39m … \u001b[35m6.983 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 27.07%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.715 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m3.443 ms\u001b[22m\u001b[39m ± \u001b[32m1.068 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m4.39% ± 8.86%\n",
|
||||
"BenchmarkTools.Trial: 1366 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.671 ms\u001b[22m\u001b[39m … \u001b[35m9.612 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 44.01%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m3.007 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 0.00%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m3.654 ms\u001b[22m\u001b[39m ± \u001b[32m1.320 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m11.60% ± 16.57%\n",
|
||||
"\n",
|
||||
" \u001b[39m▇\u001b[39m█\u001b[34m▃\u001b[39m\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
|
||||
" \u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▆\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▅\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▆\u001b[39m▅\u001b[39m▇\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m \u001b[39m█\n",
|
||||
" 2.6 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 6.44 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
" \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\n",
|
||||
" \u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[32m▇\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m \u001b[39m█\n",
|
||||
" 2.67 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 8.27 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
"\n",
|
||||
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143004\u001b[39m."
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -281,28 +291,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 17,
|
||||
"id": "e18d9546",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"BenchmarkTools.Trial: 1890 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.007 ms\u001b[22m\u001b[39m … \u001b[35m 5.588 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 29.53%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.086 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m2.644 ms\u001b[22m\u001b[39m ± \u001b[32m840.095 μs\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m5.18% ± 10.46%\n",
|
||||
"BenchmarkTools.Trial: 1772 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.065 ms\u001b[22m\u001b[39m … \u001b[35m8.468 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 44.06%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.158 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 0.00%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m2.820 ms\u001b[22m\u001b[39m ± \u001b[32m1.195 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m11.39% ± 16.96%\n",
|
||||
"\n",
|
||||
" \u001b[39m█\u001b[34m▇\u001b[39m\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
|
||||
" \u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[32m▅\u001b[39m\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m \u001b[39m█\n",
|
||||
" 2.01 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 5.32 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
" \u001b[39m█\u001b[34m▆\u001b[39m\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
|
||||
" \u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[32m▅\u001b[39m\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▁\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m \u001b[39m█\n",
|
||||
" 2.06 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 7.41 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
"\n",
|
||||
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123004\u001b[39m."
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
|
@ -57,6 +57,9 @@ export ParticleValue
|
||||
export ParticleA, ParticleB, ParticleC
|
||||
export ABCProcessDescription, ABCProcessInput, ABCModel
|
||||
|
||||
export Machine
|
||||
export get_machine_info
|
||||
|
||||
export ==, in, show, isempty, delete!, length
|
||||
|
||||
export bytes_to_human_readable
|
||||
@ -127,6 +130,18 @@ include("models/abc/properties.jl")
|
||||
include("models/abc/parse.jl")
|
||||
include("models/abc/print.jl")
|
||||
|
||||
include("devices/interface.jl")
|
||||
include("devices/measure.jl")
|
||||
include("devices/detect.jl")
|
||||
include("devices/impl.jl")
|
||||
|
||||
include("devices/numa/impl.jl")
|
||||
include("devices/cuda/impl.jl")
|
||||
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
|
||||
# include("devices/rocm/impl.jl")
|
||||
# oneapi seems also broken for now
|
||||
# include("devices/oneapi/impl.jl")
|
||||
|
||||
include("code_gen/main.jl")
|
||||
|
||||
end # module MetagraphOptimization
|
||||
|
@ -25,7 +25,7 @@ function gen_code(graph::DAG)
|
||||
inputSyms[node.name] = Vector{Symbol}()
|
||||
end
|
||||
|
||||
push!(inputSyms[node.name], Symbol("data_$(to_var_name(node.id))_in"))
|
||||
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
|
||||
end
|
||||
|
||||
node = nothing
|
||||
@ -45,7 +45,7 @@ function gen_code(graph::DAG)
|
||||
end
|
||||
|
||||
# node is now the last node we looked at -> the output node
|
||||
outSym = Symbol("data_$(to_var_name(node.id))")
|
||||
outSym = Symbol("$(to_var_name(node.id))")
|
||||
|
||||
return (code = Expr(:block, code...), inputSymbols = inputSyms, outputSymbol = outSym)
|
||||
end
|
||||
@ -53,6 +53,7 @@ end
|
||||
function gen_input_assignment_code(
|
||||
inputSymbols::Dict{String, Vector{Symbol}},
|
||||
processDescription::AbstractProcessDescription,
|
||||
machine::Machine,
|
||||
processInputSymbol::Symbol = :input,
|
||||
)
|
||||
@assert length(inputSymbols) >=
|
||||
@ -75,7 +76,11 @@ function gen_input_assignment_code(
|
||||
end
|
||||
|
||||
for symbol in symbols
|
||||
push!(assignInputs, Meta.parse("$(symbol) = ParticleValue($p, 1.0)"))
|
||||
# TODO generate correct access expression
|
||||
# TODO how to define cahce strategies?
|
||||
device = machine.devices[1]
|
||||
evalExpr = eval(gen_access_expr(device, default_strategy(device), symbol))
|
||||
push!(assignInputs, Meta.parse("$(evalExpr) = ParticleValue($p, 1.0)"))
|
||||
end
|
||||
end
|
||||
|
||||
@ -83,19 +88,23 @@ function gen_input_assignment_code(
|
||||
end
|
||||
|
||||
"""
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription)
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
|
||||
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
|
||||
"""
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription)
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph)
|
||||
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, :input)
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
|
||||
|
||||
function_id = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
# TODO generate correct access expression
|
||||
# TODO how to define cahce strategies?
|
||||
device = machine.devices[1]
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
func = eval(
|
||||
Meta.parse(
|
||||
"function compute_$(function_id)(input::AbstractProcessInput) $assignInputs; $code; return $outputSymbol; end",
|
||||
"function compute_$(functionId)(input::AbstractProcessInput) $assignInputs; $code; return $(eval(gen_access_expr(device, default_strategy(device), outputSymbol))); end",
|
||||
),
|
||||
)
|
||||
|
||||
@ -103,7 +112,7 @@ function get_compute_function(graph::DAG, process::AbstractProcessDescription)
|
||||
end
|
||||
|
||||
"""
|
||||
execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
|
||||
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
|
||||
Execute the code of the given `graph` on the given input particles.
|
||||
|
||||
@ -115,8 +124,8 @@ This is essentially shorthand for
|
||||
|
||||
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
|
||||
"""
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
|
||||
func = get_compute_function(graph, process)
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
func = get_compute_function(graph, process, machine)
|
||||
|
||||
result = 0
|
||||
try
|
||||
|
47
src/devices/cuda/impl.jl
Normal file
47
src/devices/cuda/impl.jl
Normal file
@ -0,0 +1,47 @@
|
||||
using CUDA
|
||||
|
||||
mutable struct CUDAGPU <: AbstractGPU
|
||||
device::Any # TODO: what's the cuda device type?
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, CUDAGPU)
|
||||
|
||||
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::CUDAGPU) = LocalVariables()
|
||||
|
||||
function measure_device!(device::CUDAGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring CUDA GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
|
||||
|
||||
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !CUDA.functional()
|
||||
if verbose
|
||||
println("CUDA is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
CUDADevices = CUDA.devices()
|
||||
if verbose
|
||||
println("Found $(length(CUDADevices)) CUDA devices")
|
||||
end
|
||||
for device in CUDADevices
|
||||
push!(devices, CUDAGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
@ -1,127 +1,23 @@
|
||||
using NumaAllocators
|
||||
using CUDA
|
||||
using ROCm
|
||||
using oneAPI
|
||||
|
||||
"""
|
||||
get_machine_info(verbose::Bool)
|
||||
|
||||
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
|
||||
"""
|
||||
function get_machine_info(verbose::Bool = Base.is_interactive())
|
||||
devices = Vector{Device}()
|
||||
function get_machine_info(; verbose::Bool = Base.is_interactive)
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
numaDevices = get_numa_devices(verbose)
|
||||
push!(devices, numaDevices)
|
||||
|
||||
cudaDevices = get_cuda_devices(verbose)
|
||||
push!(devices, cudaDevices)
|
||||
|
||||
rocmDevices = get_rocm_devices(verbose)
|
||||
push!(devices, rocmDevices)
|
||||
|
||||
oneapiDevices = get_oneapi_devices(verbose)
|
||||
push!(devices, oneapiDevices)
|
||||
for device in device_types()
|
||||
devs = get_devices(device, verbose = verbose)
|
||||
for dev in devs
|
||||
push!(devices, dev)
|
||||
end
|
||||
end
|
||||
|
||||
noDevices = length(devices)
|
||||
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
|
||||
|
||||
return Machine(devices, transferRates::Matrix{Float64}(-1, noDevices, noDevices))
|
||||
end
|
||||
|
||||
"""
|
||||
get_numa_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_numa_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
noNumaNodes = highest_numa_node()
|
||||
|
||||
if (verbose)
|
||||
println("Found $(noNumaNodes + 1) NUMA nodes")
|
||||
end
|
||||
for i in 0:noNumaNodes
|
||||
push!(devices, NumaNode(i, 1, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
get_cuda_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_cuda_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
|
||||
if !CUDA.functional()
|
||||
if verbose
|
||||
println("CUDA is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
CUDADevices = CUDA.devices()
|
||||
if verbose
|
||||
println("Found $(length(CUDADevices)) CUDA devices")
|
||||
end
|
||||
for device in CUDADevices
|
||||
push!(devices, CUDAGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
get_rocm_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_rocm_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
|
||||
if !AMDGPU.functional()
|
||||
if verbose
|
||||
println("AMDGPU is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
AMDDevices = AMDGPU.devices()
|
||||
if verbose
|
||||
println("Found $(length(AMDDevices)) AMD devices")
|
||||
end
|
||||
for device in AMDDevices
|
||||
push!(devices, ROCmGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
get_oneapi_devices(verbose::Bool)
|
||||
|
||||
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_oneapi_devices(verbose::Bool)
|
||||
devices = Vector{Device}()
|
||||
|
||||
if !oneAPI.functional()
|
||||
if verbose
|
||||
println("oneAPI is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
oneAPIDevices = oneAPI.devices()
|
||||
if verbose
|
||||
println("Found $(length(oneAPIDevices)) oneAPI devices")
|
||||
end
|
||||
for device in oneAPIDevices
|
||||
push!(devices, oneAPIGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
|
||||
fill!(transferRates, -1)
|
||||
return Machine(devices, transferRates)
|
||||
end
|
||||
|
25
src/devices/impl.jl
Normal file
25
src/devices/impl.jl
Normal file
@ -0,0 +1,25 @@
|
||||
|
||||
"""
|
||||
device_types()
|
||||
|
||||
Return a vector of available and implemented device types.
|
||||
|
||||
See also: [`DEVICE_TYPES`](@ref)
|
||||
"""
|
||||
function device_types()
|
||||
return DEVICE_TYPES
|
||||
end
|
||||
|
||||
"""
|
||||
strategies(t::Type{T}) where {T <: AbstractDevice}
|
||||
|
||||
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstarctDevice`](@ref).
|
||||
The caching strategies are used in code generation.
|
||||
"""
|
||||
function strategies(t::Type{T}) where {T <: AbstractDevice}
|
||||
if !haskey(CACHE_STRATEGIES, t)
|
||||
error("Trying to get strategies for $T, but it has no strategies defined!")
|
||||
end
|
||||
|
||||
return CACHE_STRATEGIES[t]
|
||||
end
|
87
src/devices/interface.jl
Normal file
87
src/devices/interface.jl
Normal file
@ -0,0 +1,87 @@
|
||||
|
||||
abstract type AbstractDevice end
|
||||
|
||||
abstract type AbstractCPU <: AbstractDevice end
|
||||
|
||||
abstract type AbstractGPU <: AbstractDevice end
|
||||
|
||||
"""
|
||||
Machine
|
||||
|
||||
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
|
||||
|
||||
See also: [`Scheduler`](@ref)
|
||||
"""
|
||||
struct Machine
|
||||
devices::Vector{AbstractDevice}
|
||||
|
||||
transferRates::Matrix{Float64}
|
||||
end
|
||||
|
||||
"""
|
||||
CacheStrategy
|
||||
|
||||
Abstract base type for caching strategies.
|
||||
|
||||
See also: [`strategies`](@ref)
|
||||
"""
|
||||
abstract type CacheStrategy end
|
||||
|
||||
struct LocalVariables <: CacheStrategy end
|
||||
struct Dictionary <: CacheStrategy end
|
||||
|
||||
"""
|
||||
DEVICE_TYPES::Vector{Type}
|
||||
|
||||
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
|
||||
|
||||
See also: [`device_types`](@ref), [`get_devices`](@ref)
|
||||
"""
|
||||
DEVICE_TYPES = Vector{Type}()
|
||||
|
||||
"""
|
||||
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
|
||||
|
||||
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
|
||||
|
||||
See also: [`strategies`](@ref)
|
||||
"""
|
||||
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
|
||||
|
||||
"""
|
||||
default_strategy(device::AbstractDevice)
|
||||
|
||||
Interface function that must be implmented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device.
|
||||
"""
|
||||
function default_strategy end
|
||||
|
||||
"""
|
||||
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
|
||||
"""
|
||||
function get_devices end
|
||||
|
||||
"""
|
||||
measure_device!(device::AbstractDevice; verbose::Bool)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
|
||||
"""
|
||||
function measure_device! end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(device::AbstractDevice, strategy::CacheStrategy)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an [`Expr`](@ref) initializing this device's variable cache.
|
||||
|
||||
The strategy is a symbol
|
||||
"""
|
||||
function gen_cache_init_code end
|
||||
|
||||
"""
|
||||
gen_access_expr(device::AbstractDevice, strategy::CacheStrategy, symbol::Symbol)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
|
||||
Return an `Expr` accessing the variable identified by [`symbol`].
|
||||
"""
|
||||
function gen_access_expr end
|
@ -17,34 +17,6 @@ end
|
||||
Measure the transfer rates between devices in the machine.
|
||||
"""
|
||||
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::NumaNode; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring Numa Node $(device.numaId)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::CUDAGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring CUDA GPU $(device.device)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::ROCmGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring ROCm GPU $(device.device)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function measure_device!(device::oneAPIGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring oneAPI GPU $(device.device)")
|
||||
end
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
61
src/devices/numa/impl.jl
Normal file
61
src/devices/numa/impl.jl
Normal file
@ -0,0 +1,61 @@
|
||||
using NumaAllocators
|
||||
|
||||
mutable struct NumaNode <: AbstractCPU
|
||||
numaId::UInt16
|
||||
threads::UInt16
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, NumaNode)
|
||||
|
||||
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
|
||||
|
||||
default_strategy(::NumaNode) = LocalVariables()
|
||||
|
||||
function measure_device!(device::NumaNode; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring Numa Node $(device.numaId)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
|
||||
|
||||
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
|
||||
devices = Vector{AbstractDevice}()
|
||||
noNumaNodes = highest_numa_node()
|
||||
|
||||
if (verbose)
|
||||
println("Found $(noNumaNodes + 1) NUMA nodes")
|
||||
end
|
||||
for i in 0:noNumaNodes
|
||||
push!(devices, NumaNode(i, 1, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(device::NumaNode, strategy::LocalVariables)
|
||||
|
||||
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
|
||||
"""
|
||||
function gen_cache_init_code(::NumaNode, ::LocalVariables)
|
||||
# don't need to initialize anything
|
||||
return Expr()
|
||||
end
|
||||
|
||||
"""
|
||||
gen_access_expr(device::NumaNode, strategy::LocalVariables, symbol::Symbol)
|
||||
|
||||
Generate code to access the variable designated by `symbol` using the [`LocalVariables`](@ref) [`CacheStrategy`](@ref) on a [`NumaNode`](@ref).
|
||||
"""
|
||||
function gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol)
|
||||
s = Symbol("data_$symbol")
|
||||
return Meta.parse(":($s)")
|
||||
end
|
47
src/devices/oneapi/impl.jl
Normal file
47
src/devices/oneapi/impl.jl
Normal file
@ -0,0 +1,47 @@
|
||||
using oneAPI
|
||||
|
||||
mutable struct oneAPIGPU <: AbstractGPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, oneAPIGPU)
|
||||
|
||||
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::oneAPIGPU) = LocalVariables()
|
||||
|
||||
function measure_device!(device::oneAPIGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring oneAPI GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
|
||||
|
||||
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !oneAPI.functional()
|
||||
if verbose
|
||||
println("oneAPI is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
oneAPIDevices = oneAPI.devices()
|
||||
if verbose
|
||||
println("Found $(length(oneAPIDevices)) oneAPI devices")
|
||||
end
|
||||
for device in oneAPIDevices
|
||||
push!(devices, oneAPIGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
47
src/devices/rocm/impl.jl
Normal file
47
src/devices/rocm/impl.jl
Normal file
@ -0,0 +1,47 @@
|
||||
using AMDGPU
|
||||
|
||||
mutable struct ROCmGPU <: AbstractGPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, ROCmGPU)
|
||||
|
||||
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::ROCmGPU) = LocalVariables()
|
||||
|
||||
function measure_device!(device::ROCmGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring ROCm GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
|
||||
|
||||
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !AMDGPU.functional()
|
||||
if verbose
|
||||
println("AMDGPU is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
AMDDevices = AMDGPU.devices()
|
||||
if verbose
|
||||
println("Found $(length(AMDDevices)) AMD devices")
|
||||
end
|
||||
for device in AMDDevices
|
||||
push!(devices, ROCmGPU(device, -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
@ -1,40 +0,0 @@
|
||||
|
||||
abstract type Device end
|
||||
|
||||
abstract type CPU <: Device end
|
||||
|
||||
mutable struct NumaNode <: CPU
|
||||
numaId::UInt16
|
||||
threads::UInt16
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
abstract type GPU <: Device end
|
||||
|
||||
mutable struct CUDAGPU <: GPU
|
||||
device::Any # TODO: what's the cuda device type?
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
mutable struct ROCmGPU <: GPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
mutable struct oneAPIGPU <: GPU
|
||||
device::Any
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
"""
|
||||
Machine
|
||||
|
||||
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
|
||||
|
||||
See also: [`Scheduler`](@ref)
|
||||
"""
|
||||
struct Machine
|
||||
devices::Vector{Device}
|
||||
|
||||
transferRates::Matrix{Float64}
|
||||
end
|
@ -5,6 +5,8 @@ using QEDbase
|
||||
include("../examples/profiling_utilities.jl")
|
||||
|
||||
@testset "Unit Tests Execution" begin
|
||||
machine = get_machine_info()
|
||||
|
||||
process_2_2 = ABCProcessDescription(
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
|
||||
@ -26,9 +28,9 @@ include("../examples/profiling_utilities.jl")
|
||||
@testset "AB->AB no optimization" begin
|
||||
for _ in 1:10 # test in a loop because graph layout should not change the result
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
|
||||
@test isapprox(execute(graph, process_2_2, particles_2_2), expected_result; rtol = 0.001)
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = 0.001)
|
||||
|
||||
func = get_compute_function(graph, process_2_2)
|
||||
func = get_compute_function(graph, process_2_2, machine)
|
||||
@test isapprox(func(particles_2_2), expected_result; rtol = 0.001)
|
||||
end
|
||||
end
|
||||
@ -39,7 +41,7 @@ include("../examples/profiling_utilities.jl")
|
||||
random_walk!(graph, 50)
|
||||
@test is_valid(graph)
|
||||
|
||||
@test isapprox(execute(graph, process_2_2, particles_2_2), expected_result; rtol = 0.001)
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = 0.001)
|
||||
end
|
||||
end
|
||||
|
||||
@ -49,14 +51,14 @@ include("../examples/profiling_utilities.jl")
|
||||
)
|
||||
particles_2_4 = gen_process_input(process_2_4)
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
|
||||
expected_result = execute(graph, process_2_4, particles_2_4)
|
||||
expected_result = execute(graph, process_2_4, machine, particles_2_4)
|
||||
|
||||
@testset "AB->ABBB no optimization" begin
|
||||
for _ in 1:5 # test in a loop because graph layout should not change the result
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
|
||||
@test isapprox(execute(graph, process_2_4, particles_2_4), expected_result; rtol = 0.001)
|
||||
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = 0.001)
|
||||
|
||||
func = get_compute_function(graph, process_2_4)
|
||||
func = get_compute_function(graph, process_2_4, machine)
|
||||
@test isapprox(func(particles_2_4), expected_result; rtol = 0.001)
|
||||
end
|
||||
end
|
||||
@ -67,7 +69,7 @@ include("../examples/profiling_utilities.jl")
|
||||
random_walk!(graph, 100)
|
||||
@test is_valid(graph)
|
||||
|
||||
@test isapprox(execute(graph, process_2_4, particles_2_4), expected_result; rtol = 0.001)
|
||||
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = 0.001)
|
||||
end
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user