WIP Adding machine/device info and caching strategies

This commit is contained in:
Anton Reinhard 2023-09-29 18:02:57 +02:00
parent afb6af44ca
commit 37d645cb4e
14 changed files with 469 additions and 293 deletions

View File

@ -15,7 +15,6 @@ QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

View File

@ -5,13 +5,44 @@
"execution_count": 1,
"id": "20768e45-df62-4638-ba33-b0ccf239f1aa",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"┌ Error: Failed to initialize oneAPI\n",
"│ exception = (oneAPI.oneL0.ZeError(oneAPI.oneL0.ZE_RESULT_ERROR_UNINITIALIZED), Union{Ptr{Nothing}, Base.InterpreterIP}[Ptr{Nothing} @0x00007f1860405f02, Ptr{Nothing} @0x00007f18604068dc, Ptr{Nothing} @0x00007f1860406900, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e7a7a4, Ptr{Nothing} @0x00007f1967e5e3bc, Ptr{Nothing} @0x00007f1953cecc65, Ptr{Nothing} @0x00007f19480c5c72, Ptr{Nothing} @0x00007f19480cb52c, Ptr{Nothing} @0x00007f19480c79a6, Ptr{Nothing} @0x00007f19480f67de, Ptr{Nothing} @0x00007f19480f6b4d, Ptr{Nothing} @0x00007f19480d2439, Ptr{Nothing} @0x00007f19480d40e4, Ptr{Nothing} @0x00007f19480d4286, Ptr{Nothing} @0x00007f19480d0c2a, Ptr{Nothing} @0x00007f19480d1216, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e7a338, Ptr{Nothing} @0x00007f1967e7c0e8, Ptr{Nothing} @0x00007f1967e7ba79, Ptr{Nothing} @0x00007f1967e7cdfa, Ptr{Nothing} @0x00007f19480f31ff, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e51f48, Ptr{Nothing} @0x00007f19480f1eb9, Ptr{Nothing} @0x00007f19480f2c78, Ptr{Nothing} @0x00007f19480f2f21, Ptr{Nothing} @0x00007f19480f2f56, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480e8bb1, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480ddd7c, Ptr{Nothing} @0x00007f19480dde82, Ptr{Nothing} @0x00007f19480ddead, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480d5001, Ptr{Nothing} @0x00007f1967e7b5db, Ptr{Nothing} @0x00007f1967e7ba79, Ptr{Nothing} @0x00007f1967e7cdfa, Ptr{Nothing} @0x00007f1953d26667, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1953b5abfd, Ptr{Nothing} @0x00007f19533d6e93, Ptr{Nothing} @0x00007f19533d6eaf, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1953d5c06e, Ptr{Nothing} @0x00007f1953786d4f, Ptr{Nothing} @0x00007f1953786e58, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967ea7a15, Ptr{Nothing} @0x00007f1967ea845e, Ptr{Nothing} @0x0000000000401088, Ptr{Nothing} @0x00007f1968c27ccf, Ptr{Nothing} @0x00007f1968c27d89, Ptr{Nothing} @0x00000000004010b8])\n",
"└ @ oneAPI.oneL0 /home/antonr/.julia/packages/oneAPI/g8ZUm/lib/level-zero/oneL0.jl:103\n"
]
}
],
"source": [
"using Revise\n",
"using MetagraphOptimization\n",
"using BenchmarkTools"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ff5f4a49",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, -1.0)], [-1.0;;])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Get our machine's info\n",
"machine = get_machine_info()"
]
},
{
"cell_type": "code",
"execution_count": 2,
@ -24,9 +55,8 @@
"ABCModel()"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
@ -46,9 +76,8 @@
"ABC Process: 'AB->ABBB'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
@ -59,17 +88,10 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total: 280, ComputeTaskS2"
]
},
{
"data": {
"text/plain": [
@ -81,17 +103,16 @@
" Total Compute Intensity: 0.09822733918128655\n"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
": 24, DataTask: 143, \n",
" ComputeTaskU: 6, ComputeTaskSum: 1, ComputeTaskS1: 36, \n",
" ComputeTaskV: 64, ComputeTaskP: 6"
"Total: 280, DataTask: 143, ComputeTaskV: 64, \n",
" ComputeTaskS2: 24, ComputeTaskS1: 36, ComputeTaskP: 6, \n",
" ComputeTaskSum: 1, ComputeTaskU: 6"
]
}
],
@ -102,7 +123,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4",
"metadata": {},
"outputs": [
@ -110,19 +131,18 @@
"data": {
"text/plain": [
"Input for ABC Process: 'AB->ABBB':\n",
" Input particles:\n",
" A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n",
" B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n",
" Output Particles:\n",
" A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n",
" B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n",
" B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n",
" B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n"
" 2 Incoming particles:\n",
" B: [3.6884461652243594, 0.0, 0.0, 3.5503007075117288]\n",
" A: [3.6884461652243594, 0.0, 0.0, -3.5503007075117288]\n",
" 4 Outgoing Particles:\n",
" B: [-2.1242475430909664, -0.8503576743556215, -0.7677870655372816, 1.4831798515305399]\n",
" B: [-1.7067195416668486, 1.3493031523092422, -0.2241887179000338, 0.20496832887590027]\n",
" B: [-2.276483767911827, 0.19358394881362065, 1.325763825770123, -1.5450740689711668]\n",
" A: [-1.2694414777790763, -0.6925294267672414, -0.3337880423328074, -0.14307411143527338]\n"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
@ -132,41 +152,39 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"compute_2f2a5dca_5e3a_11ee_3973_bfa5c42acee3 (generic function with 1 method)"
"compute_2430665c_5ee0_11ee_1bd6_cf00927a1f40 (generic function with 1 method)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
"# Get the function computing the result of the process from a ProcessInput\n",
"AB_AB3_compute = get_compute_function(graph, process)"
"AB_AB3_compute = get_compute_function(graph, process, machine)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1.8924431710735022e-13"
"-1.0304553507713216e-11"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
@ -176,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"id": "80c70010",
"metadata": {},
"outputs": [],
@ -189,18 +207,18 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 13,
"id": "5b192b44",
"metadata": {},
"outputs": [],
"source": [
"# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n",
"@assert result ≈ execute(graph, process, input_data)"
"@assert result ≈ execute(graph, process, machine, input_data)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 15,
"id": "9b2f4a3f",
"metadata": {},
"outputs": [
@ -208,71 +226,63 @@
"data": {
"text/plain": [
"1000-element Vector{Float64}:\n",
" -2.1491995259940396e-11\n",
" -1.04995646459455e-11\n",
" 5.8217606911879465e-15\n",
" -6.556969485683647e-14\n",
" -1.3588086164373804e-14\n",
" -1.8789662441593714e-13\n",
" -2.1319733018358848e-13\n",
" -5.335975907200479e-12\n",
" -9.053914191490206e-13\n",
" -5.611079017069238e-13\n",
" -5.063492275603427e-11\n",
" 2.9168508985810955e-15\n",
" -1.6420151378194154e-13\n",
" -2.1503212387420805e-13\n",
" -3.515624153345333e-15\n",
" -9.952937612602425e-14\n",
" -9.153444987973305e-14\n",
" -9.219799628850346e-12\n",
" -1.0155811880772751e-13\n",
" -2.278823085449527e-13\n",
" -8.070463578419918e-14\n",
" -1.838772437827446e-13\n",
" -4.109239393253953e-12\n",
" ⋮\n",
" 1.0931677247833414e-13\n",
" -7.704755306464438e-16\n",
" -1.8385907037491413e-12\n",
" -6.036215596560112e-14\n",
" -9.988724014003618e-12\n",
" 3.4861755637292744e-13\n",
" -1.1051119822969222e-10\n",
" -2.4965725132161986e-12\n",
" -3.868242784720193e-11\n",
" 7.904149696653533e-15\n",
" -7.606811743178718e-11\n",
" -5.100594937480289e-13"
" -9.249672758783865e-13\n",
" -1.1230011623854415e-11\n",
" 3.0412717788587913e-12\n",
" -7.618572966754196e-12\n",
" -1.771698295445229e-14\n",
" -3.128112746753292e-11\n",
" -3.9791519593567145e-12\n",
" -4.4850922847751944e-15\n",
" 1.0995512644879932e-14"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
"# Now we can generate a function and use it on lots of inputs\n",
"inputs = [gen_process_input(process) for _ in 1:1000]\n",
"AB_AB3_reduced_compute = get_compute_function(graph, process)\n",
"AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n",
"\n",
"results = AB_AB3_reduced_compute.(inputs)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 16,
"id": "d43e4ff0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 1453 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.596 ms\u001b[22m\u001b[39m … \u001b[35m6.983 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 27.07%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.715 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m3.443 ms\u001b[22m\u001b[39m ± \u001b[32m1.068 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m4.39% ± 8.86%\n",
"BenchmarkTools.Trial: 1366 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.671 ms\u001b[22m\u001b[39m … \u001b[35m9.612 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 44.01%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m3.007 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m3.654 ms\u001b[22m\u001b[39m ± \u001b[32m1.320 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m11.60% ± 16.57%\n",
"\n",
" \u001b[39m▇\u001b[39m█\u001b[34m▃\u001b[39m\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▆\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▅\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▆\u001b[39m▅\u001b[39m▇\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m \u001b[39m█\n",
" 2.6 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 6.44 ms \u001b[0m\u001b[1m<\u001b[22m\n",
" \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\n",
" \u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[32m▇\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m \u001b[39m█\n",
" 2.67 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 8.27 ms \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143004\u001b[39m."
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
@ -281,28 +291,27 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 17,
"id": "e18d9546",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 1890 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.007 ms\u001b[22m\u001b[39m … \u001b[35m 5.588 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 29.53%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.086 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m2.644 ms\u001b[22m\u001b[39m ± \u001b[32m840.095 μs\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m5.18% ± 10.46%\n",
"BenchmarkTools.Trial: 1772 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m2.065 ms\u001b[22m\u001b[39m … \u001b[35m8.468 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 44.06%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m2.158 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m2.820 ms\u001b[22m\u001b[39m ± \u001b[32m1.195 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m11.39% ± 16.96%\n",
"\n",
" \u001b[39m█\u001b[34m▇\u001b[39m\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[32m▅\u001b[39m\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m \u001b[39m█\n",
" 2.01 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 5.32 ms \u001b[0m\u001b[1m<\u001b[22m\n",
" \u001b[39m█\u001b[34m▆\u001b[39m\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[32m▅\u001b[39m\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▁\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m \u001b[39m█\n",
" 2.06 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 7.41 ms \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123004\u001b[39m."
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [

View File

@ -57,6 +57,9 @@ export ParticleValue
export ParticleA, ParticleB, ParticleC
export ABCProcessDescription, ABCProcessInput, ABCModel
export Machine
export get_machine_info
export ==, in, show, isempty, delete!, length
export bytes_to_human_readable
@ -127,6 +130,18 @@ include("models/abc/properties.jl")
include("models/abc/parse.jl")
include("models/abc/print.jl")
include("devices/interface.jl")
include("devices/measure.jl")
include("devices/detect.jl")
include("devices/impl.jl")
include("devices/numa/impl.jl")
include("devices/cuda/impl.jl")
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
# include("devices/rocm/impl.jl")
# oneapi seems also broken for now
# include("devices/oneapi/impl.jl")
include("code_gen/main.jl")
end # module MetagraphOptimization

View File

@ -25,7 +25,7 @@ function gen_code(graph::DAG)
inputSyms[node.name] = Vector{Symbol}()
end
push!(inputSyms[node.name], Symbol("data_$(to_var_name(node.id))_in"))
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
end
node = nothing
@ -45,7 +45,7 @@ function gen_code(graph::DAG)
end
# node is now the last node we looked at -> the output node
outSym = Symbol("data_$(to_var_name(node.id))")
outSym = Symbol("$(to_var_name(node.id))")
return (code = Expr(:block, code...), inputSymbols = inputSyms, outputSymbol = outSym)
end
@ -53,6 +53,7 @@ end
function gen_input_assignment_code(
inputSymbols::Dict{String, Vector{Symbol}},
processDescription::AbstractProcessDescription,
machine::Machine,
processInputSymbol::Symbol = :input,
)
@assert length(inputSymbols) >=
@ -75,7 +76,11 @@ function gen_input_assignment_code(
end
for symbol in symbols
push!(assignInputs, Meta.parse("$(symbol) = ParticleValue($p, 1.0)"))
# TODO generate correct access expression
# TODO how to define cahce strategies?
device = machine.devices[1]
evalExpr = eval(gen_access_expr(device, default_strategy(device), symbol))
push!(assignInputs, Meta.parse("$(evalExpr) = ParticleValue($p, 1.0)"))
end
end
@ -83,19 +88,23 @@ function gen_input_assignment_code(
end
"""
get_compute_function(graph::DAG, process::AbstractProcessDescription)
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
"""
function get_compute_function(graph::DAG, process::AbstractProcessDescription)
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
(code, inputSymbols, outputSymbol) = gen_code(graph)
assignInputs = gen_input_assignment_code(inputSymbols, process, :input)
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
function_id = to_var_name(UUIDs.uuid1(rng[1]))
# TODO generate correct access expression
# TODO how to define cahce strategies?
device = machine.devices[1]
functionId = to_var_name(UUIDs.uuid1(rng[1]))
func = eval(
Meta.parse(
"function compute_$(function_id)(input::AbstractProcessInput) $assignInputs; $code; return $outputSymbol; end",
"function compute_$(functionId)(input::AbstractProcessInput) $assignInputs; $code; return $(eval(gen_access_expr(device, default_strategy(device), outputSymbol))); end",
),
)
@ -103,7 +112,7 @@ function get_compute_function(graph::DAG, process::AbstractProcessDescription)
end
"""
execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
Execute the code of the given `graph` on the given input particles.
@ -115,8 +124,8 @@ This is essentially shorthand for
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
"""
function execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
func = get_compute_function(graph, process)
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
func = get_compute_function(graph, process, machine)
result = 0
try

47
src/devices/cuda/impl.jl Normal file
View File

@ -0,0 +1,47 @@
using CUDA
mutable struct CUDAGPU <: AbstractGPU
device::Any # TODO: what's the cuda device type?
FLOPS::Float64
end
push!(DEVICE_TYPES, CUDAGPU)
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
default_strategy(::CUDAGPU) = LocalVariables()
function measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
devices = Vector{AbstractDevice}()
if !CUDA.functional()
if verbose
println("CUDA is non-functional")
end
return devices
end
CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
for device in CUDADevices
push!(devices, CUDAGPU(device, -1))
end
return devices
end

View File

@ -1,127 +1,23 @@
using NumaAllocators
using CUDA
using ROCm
using oneAPI
"""
get_machine_info(verbose::Bool)
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
"""
function get_machine_info(verbose::Bool = Base.is_interactive())
devices = Vector{Device}()
function get_machine_info(; verbose::Bool = Base.is_interactive)
devices = Vector{AbstractDevice}()
numaDevices = get_numa_devices(verbose)
push!(devices, numaDevices)
cudaDevices = get_cuda_devices(verbose)
push!(devices, cudaDevices)
rocmDevices = get_rocm_devices(verbose)
push!(devices, rocmDevices)
oneapiDevices = get_oneapi_devices(verbose)
push!(devices, oneapiDevices)
for device in device_types()
devs = get_devices(device, verbose = verbose)
for dev in devs
push!(devices, dev)
end
end
noDevices = length(devices)
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
return Machine(devices, transferRates::Matrix{Float64}(-1, noDevices, noDevices))
end
"""
get_numa_devices(verbose::Bool)
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_numa_devices(verbose::Bool)
devices = Vector{Device}()
noNumaNodes = highest_numa_node()
if (verbose)
println("Found $(noNumaNodes + 1) NUMA nodes")
end
for i in 0:noNumaNodes
push!(devices, NumaNode(i, 1, -1))
end
return devices
end
"""
get_cuda_devices(verbose::Bool)
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_cuda_devices(verbose::Bool)
devices = Vector{Device}()
if !CUDA.functional()
if verbose
println("CUDA is non-functional")
end
return devices
end
CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
for device in CUDADevices
push!(devices, CUDAGPU(device, -1))
end
return devices
end
"""
get_rocm_devices(verbose::Bool)
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_rocm_devices(verbose::Bool)
devices = Vector{Device}()
if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
return devices
end
AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
for device in AMDDevices
push!(devices, ROCmGPU(device, -1))
end
return devices
end
"""
get_oneapi_devices(verbose::Bool)
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_oneapi_devices(verbose::Bool)
devices = Vector{Device}()
if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
return devices
end
oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, -1))
end
return devices
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
fill!(transferRates, -1)
return Machine(devices, transferRates)
end

25
src/devices/impl.jl Normal file
View File

@ -0,0 +1,25 @@
"""
device_types()
Return a vector of available and implemented device types.
See also: [`DEVICE_TYPES`](@ref)
"""
function device_types()
return DEVICE_TYPES
end
"""
strategies(t::Type{T}) where {T <: AbstractDevice}
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstarctDevice`](@ref).
The caching strategies are used in code generation.
"""
function strategies(t::Type{T}) where {T <: AbstractDevice}
if !haskey(CACHE_STRATEGIES, t)
error("Trying to get strategies for $T, but it has no strategies defined!")
end
return CACHE_STRATEGIES[t]
end

87
src/devices/interface.jl Normal file
View File

@ -0,0 +1,87 @@
abstract type AbstractDevice end
abstract type AbstractCPU <: AbstractDevice end
abstract type AbstractGPU <: AbstractDevice end
"""
Machine
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
See also: [`Scheduler`](@ref)
"""
struct Machine
devices::Vector{AbstractDevice}
transferRates::Matrix{Float64}
end
"""
CacheStrategy
Abstract base type for caching strategies.
See also: [`strategies`](@ref)
"""
abstract type CacheStrategy end
struct LocalVariables <: CacheStrategy end
struct Dictionary <: CacheStrategy end
"""
DEVICE_TYPES::Vector{Type}
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
See also: [`device_types`](@ref), [`get_devices`](@ref)
"""
DEVICE_TYPES = Vector{Type}()
"""
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
See also: [`strategies`](@ref)
"""
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
"""
default_strategy(device::AbstractDevice)
Interface function that must be implmented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device.
"""
function default_strategy end
"""
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
"""
function get_devices end
"""
measure_device!(device::AbstractDevice; verbose::Bool)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
"""
function measure_device! end
"""
gen_cache_init_code(device::AbstractDevice, strategy::CacheStrategy)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an [`Expr`](@ref) initializing this device's variable cache.
The strategy is a symbol
"""
function gen_cache_init_code end
"""
gen_access_expr(device::AbstractDevice, strategy::CacheStrategy, symbol::Symbol)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
Return an `Expr` accessing the variable identified by [`symbol`].
"""
function gen_access_expr end

View File

@ -17,34 +17,6 @@ end
Measure the transfer rates between devices in the machine.
"""
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
return nothing
end
function measure_device!(device::NumaNode; verbose::Bool)
if verbose
println("Measuring Numa Node $(device.numaId)")
end
return nothing
end
function measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
return nothing
end
function measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
return nothing
end
function measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
# TODO implement
return nothing
end

61
src/devices/numa/impl.jl Normal file
View File

@ -0,0 +1,61 @@
using NumaAllocators
mutable struct NumaNode <: AbstractCPU
numaId::UInt16
threads::UInt16
FLOPS::Float64
end
push!(DEVICE_TYPES, NumaNode)
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
default_strategy(::NumaNode) = LocalVariables()
function measure_device!(device::NumaNode; verbose::Bool)
if verbose
println("Measuring Numa Node $(device.numaId)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
devices = Vector{AbstractDevice}()
noNumaNodes = highest_numa_node()
if (verbose)
println("Found $(noNumaNodes + 1) NUMA nodes")
end
for i in 0:noNumaNodes
push!(devices, NumaNode(i, 1, -1))
end
return devices
end
"""
gen_cache_init_code(device::NumaNode, strategy::LocalVariables)
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
"""
function gen_cache_init_code(::NumaNode, ::LocalVariables)
# don't need to initialize anything
return Expr()
end
"""
gen_access_expr(device::NumaNode, strategy::LocalVariables, symbol::Symbol)
Generate code to access the variable designated by `symbol` using the [`LocalVariables`](@ref) [`CacheStrategy`](@ref) on a [`NumaNode`](@ref).
"""
function gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol)
s = Symbol("data_$symbol")
return Meta.parse(":($s)")
end

View File

@ -0,0 +1,47 @@
using oneAPI
mutable struct oneAPIGPU <: AbstractGPU
device::Any
FLOPS::Float64
end
push!(DEVICE_TYPES, oneAPIGPU)
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
default_strategy(::oneAPIGPU) = LocalVariables()
function measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
devices = Vector{AbstractDevice}()
if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
return devices
end
oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, -1))
end
return devices
end

47
src/devices/rocm/impl.jl Normal file
View File

@ -0,0 +1,47 @@
using AMDGPU
mutable struct ROCmGPU <: AbstractGPU
device::Any
FLOPS::Float64
end
push!(DEVICE_TYPES, ROCmGPU)
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
default_strategy(::ROCmGPU) = LocalVariables()
function measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
devices = Vector{AbstractDevice}()
if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
return devices
end
AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
for device in AMDDevices
push!(devices, ROCmGPU(device, -1))
end
return devices
end

View File

@ -1,40 +0,0 @@
abstract type Device end
abstract type CPU <: Device end
mutable struct NumaNode <: CPU
numaId::UInt16
threads::UInt16
FLOPS::Float64
end
abstract type GPU <: Device end
mutable struct CUDAGPU <: GPU
device::Any # TODO: what's the cuda device type?
FLOPS::Float64
end
mutable struct ROCmGPU <: GPU
device::Any
FLOPS::Float64
end
mutable struct oneAPIGPU <: GPU
device::Any
FLOPS::Float64
end
"""
Machine
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
See also: [`Scheduler`](@ref)
"""
struct Machine
devices::Vector{Device}
transferRates::Matrix{Float64}
end

View File

@ -5,6 +5,8 @@ using QEDbase
include("../examples/profiling_utilities.jl")
@testset "Unit Tests Execution" begin
machine = get_machine_info()
process_2_2 = ABCProcessDescription(
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
@ -26,9 +28,9 @@ include("../examples/profiling_utilities.jl")
@testset "AB->AB no optimization" begin
for _ in 1:10 # test in a loop because graph layout should not change the result
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
@test isapprox(execute(graph, process_2_2, particles_2_2), expected_result; rtol = 0.001)
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = 0.001)
func = get_compute_function(graph, process_2_2)
func = get_compute_function(graph, process_2_2, machine)
@test isapprox(func(particles_2_2), expected_result; rtol = 0.001)
end
end
@ -39,7 +41,7 @@ include("../examples/profiling_utilities.jl")
random_walk!(graph, 50)
@test is_valid(graph)
@test isapprox(execute(graph, process_2_2, particles_2_2), expected_result; rtol = 0.001)
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = 0.001)
end
end
@ -49,14 +51,14 @@ include("../examples/profiling_utilities.jl")
)
particles_2_4 = gen_process_input(process_2_4)
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
expected_result = execute(graph, process_2_4, particles_2_4)
expected_result = execute(graph, process_2_4, machine, particles_2_4)
@testset "AB->ABBB no optimization" begin
for _ in 1:5 # test in a loop because graph layout should not change the result
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
@test isapprox(execute(graph, process_2_4, particles_2_4), expected_result; rtol = 0.001)
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = 0.001)
func = get_compute_function(graph, process_2_4)
func = get_compute_function(graph, process_2_4, machine)
@test isapprox(func(particles_2_4), expected_result; rtol = 0.001)
end
end
@ -67,7 +69,7 @@ include("../examples/profiling_utilities.jl")
random_walk!(graph, 100)
@test is_valid(graph)
@test isapprox(execute(graph, process_2_4, particles_2_4), expected_result; rtol = 0.001)
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = 0.001)
end
end