WIP Adding machine/device info and caching strategies

2023-09-29 18:02:57 +02:00 · 2023-09-29 18:02:57 +02:00 · 37d645cb4e
commit 37d645cb4e
parent afb6af44ca
14 changed files with 469 additions and 293 deletions
--- a/Project.toml
+++ b/Project.toml
@ -15,7 +15,6 @@ QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"

 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
--- a/notebooks/abc_model_showcase.ipynb
+++ b/notebooks/abc_model_showcase.ipynb
@ -5,13 +5,44 @@
   "execution_count": 1,
   "id": "20768e45-df62-4638-ba33-b0ccf239f1aa",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Error: Failed to initialize oneAPI\n",
+      "│   exception = (oneAPI.oneL0.ZeError(oneAPI.oneL0.ZE_RESULT_ERROR_UNINITIALIZED), Union{Ptr{Nothing}, Base.InterpreterIP}[Ptr{Nothing} @0x00007f1860405f02, Ptr{Nothing} @0x00007f18604068dc, Ptr{Nothing} @0x00007f1860406900, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e7a7a4, Ptr{Nothing} @0x00007f1967e5e3bc, Ptr{Nothing} @0x00007f1953cecc65, Ptr{Nothing} @0x00007f19480c5c72, Ptr{Nothing} @0x00007f19480cb52c, Ptr{Nothing} @0x00007f19480c79a6, Ptr{Nothing} @0x00007f19480f67de, Ptr{Nothing} @0x00007f19480f6b4d, Ptr{Nothing} @0x00007f19480d2439, Ptr{Nothing} @0x00007f19480d40e4, Ptr{Nothing} @0x00007f19480d4286, Ptr{Nothing} @0x00007f19480d0c2a, Ptr{Nothing} @0x00007f19480d1216, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e7a338, Ptr{Nothing} @0x00007f1967e7c0e8, Ptr{Nothing} @0x00007f1967e7ba79, Ptr{Nothing} @0x00007f1967e7cdfa, Ptr{Nothing} @0x00007f19480f31ff, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967e51f48, Ptr{Nothing} @0x00007f19480f1eb9, Ptr{Nothing} @0x00007f19480f2c78, Ptr{Nothing} @0x00007f19480f2f21, Ptr{Nothing} @0x00007f19480f2f56, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480e8bb1, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480ddd7c, Ptr{Nothing} @0x00007f19480dde82, Ptr{Nothing} @0x00007f19480ddead, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f19480d5001, Ptr{Nothing} @0x00007f1967e7b5db, Ptr{Nothing} @0x00007f1967e7ba79, Ptr{Nothing} @0x00007f1967e7cdfa, Ptr{Nothing} @0x00007f1953d26667, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1953b5abfd, Ptr{Nothing} @0x00007f19533d6e93, Ptr{Nothing} @0x00007f19533d6eaf, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1953d5c06e, Ptr{Nothing} @0x00007f1953786d4f, Ptr{Nothing} @0x00007f1953786e58, Ptr{Nothing} @0x00007f1967e42f4d, Ptr{Nothing} @0x00007f1967ea7a15, Ptr{Nothing} @0x00007f1967ea845e, Ptr{Nothing} @0x0000000000401088, Ptr{Nothing} @0x00007f1968c27ccf, Ptr{Nothing} @0x00007f1968c27d89, Ptr{Nothing} @0x00000000004010b8])\n",
+      "└ @ oneAPI.oneL0 /home/antonr/.julia/packages/oneAPI/g8ZUm/lib/level-zero/oneL0.jl:103\n"
+     ]
+    }
+   ],
   "source": [
    "using Revise\n",
    "using MetagraphOptimization\n",
    "using BenchmarkTools"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ff5f4a49",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, -1.0)], [-1.0;;])"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Get our machine's info\n",
+    "machine = get_machine_info()"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@ -24,9 +55,8 @@
       "ABCModel()"
      ]
     },
-     "execution_count": 2,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
@ -46,9 +76,8 @@
       "ABC Process: 'AB->ABBB'"
      ]
     },
-     "execution_count": 3,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
@ -59,17 +88,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f",
   "metadata": {},
   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total: 280, ComputeTaskS2"
-     ]
-    },
    {
     "data": {
      "text/plain": [
@ -81,17 +103,16 @@
       "  Total Compute Intensity: 0.09822733918128655\n"
      ]
     },
-     "execution_count": 4,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      ": 24, DataTask: 143, \n",
-      "         ComputeTaskU: 6, ComputeTaskSum: 1, ComputeTaskS1: 36, \n",
-      "         ComputeTaskV: 64, ComputeTaskP: 6"
+      "Total: 280, DataTask: 143, ComputeTaskV: 64, \n",
+      "         ComputeTaskS2: 24, ComputeTaskS1: 36, ComputeTaskP: 6, \n",
+      "         ComputeTaskSum: 1, ComputeTaskU: 6"
     ]
    }
   ],
@ -102,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
   "id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4",
   "metadata": {},
   "outputs": [
@ -110,19 +131,18 @@
     "data": {
      "text/plain": [
       "Input for ABC Process: 'AB->ABBB':\n",
-       "  Input particles:\n",
-       "    A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n",
-       "    B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n",
-       "  Output Particles:\n",
-       "    A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n",
-       "    B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n",
-       "    B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n",
-       "    B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n"
+       "  2 Incoming particles:\n",
+       "    B: [3.6884461652243594, 0.0, 0.0, 3.5503007075117288]\n",
+       "    A: [3.6884461652243594, 0.0, 0.0, -3.5503007075117288]\n",
+       "  4 Outgoing Particles:\n",
+       "    B: [-2.1242475430909664, -0.8503576743556215, -0.7677870655372816, 1.4831798515305399]\n",
+       "    B: [-1.7067195416668486, 1.3493031523092422, -0.2241887179000338, 0.20496832887590027]\n",
+       "    B: [-2.276483767911827, 0.19358394881362065, 1.325763825770123, -1.5450740689711668]\n",
+       "    A: [-1.2694414777790763, -0.6925294267672414, -0.3337880423328074, -0.14307411143527338]\n"
      ]
     },
-     "execution_count": 5,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
@ -132,41 +152,39 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
   "id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "compute_2f2a5dca_5e3a_11ee_3973_bfa5c42acee3 (generic function with 1 method)"
+       "compute_2430665c_5ee0_11ee_1bd6_cf00927a1f40 (generic function with 1 method)"
      ]
     },
-     "execution_count": 6,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
    "# Get the function computing the result of the process from a ProcessInput\n",
-    "AB_AB3_compute = get_compute_function(graph, process)"
+    "AB_AB3_compute = get_compute_function(graph, process, machine)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
   "id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "-1.8924431710735022e-13"
+       "-1.0304553507713216e-11"
      ]
     },
-     "execution_count": 7,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
@ -176,7 +194,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
   "id": "80c70010",
   "metadata": {},
   "outputs": [],
@ -189,18 +207,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 13,
   "id": "5b192b44",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n",
-    "@assert result ≈ execute(graph, process, input_data)"
+    "@assert result ≈ execute(graph, process, machine, input_data)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 15,
   "id": "9b2f4a3f",
   "metadata": {},
   "outputs": [
@ -208,71 +226,63 @@
     "data": {
      "text/plain": [
       "1000-element Vector{Float64}:\n",
-       " -2.1491995259940396e-11\n",
-       " -1.04995646459455e-11\n",
-       "  5.8217606911879465e-15\n",
-       " -6.556969485683647e-14\n",
-       " -1.3588086164373804e-14\n",
-       " -1.8789662441593714e-13\n",
-       " -2.1319733018358848e-13\n",
-       " -5.335975907200479e-12\n",
-       " -9.053914191490206e-13\n",
-       " -5.611079017069238e-13\n",
-       " -5.063492275603427e-11\n",
-       "  2.9168508985810955e-15\n",
-       " -1.6420151378194154e-13\n",
+       " -2.1503212387420805e-13\n",
+       " -3.515624153345333e-15\n",
+       " -9.952937612602425e-14\n",
+       " -9.153444987973305e-14\n",
+       " -9.219799628850346e-12\n",
+       " -1.0155811880772751e-13\n",
+       " -2.278823085449527e-13\n",
+       " -8.070463578419918e-14\n",
+       " -1.838772437827446e-13\n",
+       " -4.109239393253953e-12\n",
       "  ⋮\n",
-       "  1.0931677247833414e-13\n",
-       " -7.704755306464438e-16\n",
-       " -1.8385907037491413e-12\n",
-       " -6.036215596560112e-14\n",
-       " -9.988724014003618e-12\n",
-       "  3.4861755637292744e-13\n",
-       " -1.1051119822969222e-10\n",
-       " -2.4965725132161986e-12\n",
-       " -3.868242784720193e-11\n",
-       "  7.904149696653533e-15\n",
-       " -7.606811743178718e-11\n",
-       " -5.100594937480289e-13"
+       " -9.249672758783865e-13\n",
+       " -1.1230011623854415e-11\n",
+       "  3.0412717788587913e-12\n",
+       " -7.618572966754196e-12\n",
+       " -1.771698295445229e-14\n",
+       " -3.128112746753292e-11\n",
+       " -3.9791519593567145e-12\n",
+       " -4.4850922847751944e-15\n",
+       "  1.0995512644879932e-14"
      ]
     },
-     "execution_count": 10,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
    "# Now we can generate a function and use it on lots of inputs\n",
    "inputs = [gen_process_input(process) for _ in 1:1000]\n",
-    "AB_AB3_reduced_compute = get_compute_function(graph, process)\n",
+    "AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n",
    "\n",
    "results = AB_AB3_reduced_compute.(inputs)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 16,
   "id": "d43e4ff0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "BenchmarkTools.Trial: 1453 samples with 1 evaluation.\n",
-       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m2.596 ms\u001b[22m\u001b[39m … \u001b[35m6.983 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 27.07%\n",
-       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m2.715 ms             \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m0.00%\n",
-       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m3.443 ms\u001b[22m\u001b[39m ± \u001b[32m1.068 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m4.39% ±  8.86%\n",
+       "BenchmarkTools.Trial: 1366 samples with 1 evaluation.\n",
+       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m2.671 ms\u001b[22m\u001b[39m … \u001b[35m9.612 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 44.01%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m3.007 ms             \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m 0.00%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m3.654 ms\u001b[22m\u001b[39m ± \u001b[32m1.320 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m11.60% ± 16.57%\n",
       "\n",
-       "  \u001b[39m▇\u001b[39m█\u001b[34m▃\u001b[39m\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
-       "  \u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▆\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▅\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▆\u001b[39m▅\u001b[39m▇\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m \u001b[39m█\n",
-       "  2.6 ms\u001b[90m      \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m     6.44 ms \u001b[0m\u001b[1m<\u001b[22m\n",
+       "  \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\n",
+       "  \u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[32m▇\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m \u001b[39m█\n",
+       "  2.67 ms\u001b[90m     \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m     8.27 ms \u001b[0m\u001b[1m<\u001b[22m\n",
       "\n",
       " Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143004\u001b[39m."
      ]
     },
-     "execution_count": 11,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
@ -281,28 +291,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 17,
   "id": "e18d9546",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "BenchmarkTools.Trial: 1890 samples with 1 evaluation.\n",
-       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m2.007 ms\u001b[22m\u001b[39m … \u001b[35m  5.588 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 29.53%\n",
-       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m2.086 ms               \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m0.00%\n",
-       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m2.644 ms\u001b[22m\u001b[39m ± \u001b[32m840.095 μs\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m5.18% ± 10.46%\n",
+       "BenchmarkTools.Trial: 1772 samples with 1 evaluation.\n",
+       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m2.065 ms\u001b[22m\u001b[39m … \u001b[35m8.468 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 44.06%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m2.158 ms             \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m 0.00%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m2.820 ms\u001b[22m\u001b[39m ± \u001b[32m1.195 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m11.39% ± 16.96%\n",
       "\n",
-       "  \u001b[39m█\u001b[34m▇\u001b[39m\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▃\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
-       "  \u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[32m▅\u001b[39m\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m \u001b[39m█\n",
-       "  2.01 ms\u001b[90m      \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m      5.32 ms \u001b[0m\u001b[1m<\u001b[22m\n",
+       "  \u001b[39m█\u001b[34m▆\u001b[39m\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
+       "  \u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▇\u001b[39m▇\u001b[39m▆\u001b[32m▅\u001b[39m\u001b[39m▆\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▁\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m \u001b[39m█\n",
+       "  2.06 ms\u001b[90m     \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m     7.41 ms \u001b[0m\u001b[1m<\u001b[22m\n",
       "\n",
       " Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123004\u001b[39m."
      ]
     },
-     "execution_count": 12,
     "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
    }
   ],
   "source": [
--- a/src/MetagraphOptimization.jl
+++ b/src/MetagraphOptimization.jl
@ -57,6 +57,9 @@ export ParticleValue
 export ParticleA, ParticleB, ParticleC
 export ABCProcessDescription, ABCProcessInput, ABCModel

+export Machine
+export get_machine_info
+
 export ==, in, show, isempty, delete!, length

 export bytes_to_human_readable
@ -127,6 +130,18 @@ include("models/abc/properties.jl")
 include("models/abc/parse.jl")
 include("models/abc/print.jl")

+include("devices/interface.jl")
+include("devices/measure.jl")
+include("devices/detect.jl")
+include("devices/impl.jl")
+
+include("devices/numa/impl.jl")
+include("devices/cuda/impl.jl")
+# can currently not use AMDGPU because of incompatability with the newest rocm drivers
+# include("devices/rocm/impl.jl")
+# oneapi seems also broken for now
+# include("devices/oneapi/impl.jl")
+
 include("code_gen/main.jl")

 end # module MetagraphOptimization
--- a/src/code_gen/main.jl
+++ b/src/code_gen/main.jl
@ -25,7 +25,7 @@ function gen_code(graph::DAG)
            inputSyms[node.name] = Vector{Symbol}()
        end

-        push!(inputSyms[node.name], Symbol("data_$(to_var_name(node.id))_in"))
+        push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
    end

    node = nothing
@ -45,7 +45,7 @@ function gen_code(graph::DAG)
    end

    # node is now the last node we looked at -> the output node
-    outSym = Symbol("data_$(to_var_name(node.id))")
+    outSym = Symbol("$(to_var_name(node.id))")

    return (code = Expr(:block, code...), inputSymbols = inputSyms, outputSymbol = outSym)
 end
@ -53,6 +53,7 @@ end
 function gen_input_assignment_code(
    inputSymbols::Dict{String, Vector{Symbol}},
    processDescription::AbstractProcessDescription,
+    machine::Machine,
    processInputSymbol::Symbol = :input,
 )
    @assert length(inputSymbols) >=
@ -75,7 +76,11 @@ function gen_input_assignment_code(
        end

        for symbol in symbols
-            push!(assignInputs, Meta.parse("$(symbol) = ParticleValue($p, 1.0)"))
+            # TODO generate correct access expression
+            # TODO how to define cahce strategies?
+            device = machine.devices[1]
+            evalExpr = eval(gen_access_expr(device, default_strategy(device), symbol))
+            push!(assignInputs, Meta.parse("$(evalExpr) = ParticleValue($p, 1.0)"))
        end
    end

@ -83,19 +88,23 @@ function gen_input_assignment_code(
 end

 """
-    get_compute_function(graph::DAG, process::AbstractProcessDescription)
+    get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)

 Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
 """
-function get_compute_function(graph::DAG, process::AbstractProcessDescription)
+function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
    (code, inputSymbols, outputSymbol) = gen_code(graph)

-    assignInputs = gen_input_assignment_code(inputSymbols, process, :input)
+    assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)

-    function_id = to_var_name(UUIDs.uuid1(rng[1]))
+    # TODO generate correct access expression
+    # TODO how to define cahce strategies?
+    device = machine.devices[1]
+
+    functionId = to_var_name(UUIDs.uuid1(rng[1]))
    func = eval(
        Meta.parse(
-            "function compute_$(function_id)(input::AbstractProcessInput) $assignInputs; $code; return $outputSymbol; end",
+            "function compute_$(functionId)(input::AbstractProcessInput) $assignInputs; $code; return $(eval(gen_access_expr(device, default_strategy(device), outputSymbol))); end",
        ),
    )

@ -103,7 +112,7 @@ function get_compute_function(graph::DAG, process::AbstractProcessDescription)
 end

 """
-    execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
+    execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)

 Execute the code of the given `graph` on the given input particles.

@ -115,8 +124,8 @@ This is essentially shorthand for

 See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
 """
-function execute(graph::DAG, process::AbstractProcessDescription, input::AbstractProcessInput)
-    func = get_compute_function(graph, process)
+function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
+    func = get_compute_function(graph, process, machine)

    result = 0
    try
--- a/src/devices/cuda/impl.jl
+++ b/src/devices/cuda/impl.jl
@ -0,0 +1,47 @@
+using CUDA
+
+mutable struct CUDAGPU <: AbstractGPU
+    device::Any # TODO: what's the cuda device type?
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, CUDAGPU)
+
+CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
+
+default_strategy(::CUDAGPU) = LocalVariables()
+
+function measure_device!(device::CUDAGPU; verbose::Bool)
+    if verbose
+        println("Measuring CUDA GPU $(device.device)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
+
+Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
+    devices = Vector{AbstractDevice}()
+
+    if !CUDA.functional()
+        if verbose
+            println("CUDA is non-functional")
+        end
+        return devices
+    end
+
+    CUDADevices = CUDA.devices()
+    if verbose
+        println("Found $(length(CUDADevices)) CUDA devices")
+    end
+    for device in CUDADevices
+        push!(devices, CUDAGPU(device, -1))
+    end
+
+    return devices
+end
--- a/src/devices/detect.jl
+++ b/src/devices/detect.jl
@ -1,127 +1,23 @@
-using NumaAllocators
-using CUDA
-using ROCm
-using oneAPI

 """
    get_machine_info(verbose::Bool)

 Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
 """
-function get_machine_info(verbose::Bool = Base.is_interactive())
-    devices = Vector{Device}()
+function get_machine_info(; verbose::Bool = Base.is_interactive)
+    devices = Vector{AbstractDevice}()

-    numaDevices = get_numa_devices(verbose)
-    push!(devices, numaDevices)
-
-    cudaDevices = get_cuda_devices(verbose)
-    push!(devices, cudaDevices)
-
-    rocmDevices = get_rocm_devices(verbose)
-    push!(devices, rocmDevices)
-
-    oneapiDevices = get_oneapi_devices(verbose)
-    push!(devices, oneapiDevices)
+    for device in device_types()
+        devs = get_devices(device, verbose = verbose)
+        for dev in devs
+            push!(devices, dev)
+        end
+    end

    noDevices = length(devices)
    @assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"

-    return Machine(devices, transferRates::Matrix{Float64}(-1, noDevices, noDevices))
-end
-
-"""
-    get_numa_devices(verbose::Bool)
-
-Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
-"""
-function get_numa_devices(verbose::Bool)
-    devices = Vector{Device}()
-    noNumaNodes = highest_numa_node()
-
-    if (verbose)
-        println("Found $(noNumaNodes + 1) NUMA nodes")
-    end
-    for i in 0:noNumaNodes
-        push!(devices, NumaNode(i, 1, -1))
-    end
-
-    return devices
-end
-
-"""
-    get_cuda_devices(verbose::Bool)
-
-Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
-"""
-function get_cuda_devices(verbose::Bool)
-    devices = Vector{Device}()
-
-    if !CUDA.functional()
-        if verbose
-            println("CUDA is non-functional")
-        end
-        return devices
-    end
-
-    CUDADevices = CUDA.devices()
-    if verbose
-        println("Found $(length(CUDADevices)) CUDA devices")
-    end
-    for device in CUDADevices
-        push!(devices, CUDAGPU(device, -1))
-    end
-
-    return devices
-end
-
-"""
-    get_rocm_devices(verbose::Bool)
-
-Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
-"""
-function get_rocm_devices(verbose::Bool)
-    devices = Vector{Device}()
-
-    if !AMDGPU.functional()
-        if verbose
-            println("AMDGPU is non-functional")
-        end
-        return devices
-    end
-
-    AMDDevices = AMDGPU.devices()
-    if verbose
-        println("Found $(length(AMDDevices)) AMD devices")
-    end
-    for device in AMDDevices
-        push!(devices, ROCmGPU(device, -1))
-    end
-
-    return devices
-end
-
-"""
-    get_oneapi_devices(verbose::Bool)
-
-Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
-"""
-function get_oneapi_devices(verbose::Bool)
-    devices = Vector{Device}()
-
-    if !oneAPI.functional()
-        if verbose
-            println("oneAPI is non-functional")
-        end
-        return devices
-    end
-
-    oneAPIDevices = oneAPI.devices()
-    if verbose
-        println("Found $(length(oneAPIDevices)) oneAPI devices")
-    end
-    for device in oneAPIDevices
-        push!(devices, oneAPIGPU(device, -1))
-    end
-
-    return devices
+    transferRates = Matrix{Float64}(undef, noDevices, noDevices)
+    fill!(transferRates, -1)
+    return Machine(devices, transferRates)
 end
--- a/src/devices/impl.jl
+++ b/src/devices/impl.jl
@ -0,0 +1,25 @@
+
+"""
+    device_types()
+
+Return a vector of available and implemented device types.
+
+See also: [`DEVICE_TYPES`](@ref)
+"""
+function device_types()
+    return DEVICE_TYPES
+end
+
+"""
+    strategies(t::Type{T}) where {T <: AbstractDevice}
+
+Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstarctDevice`](@ref).
+The caching strategies are used in code generation.
+"""
+function strategies(t::Type{T}) where {T <: AbstractDevice}
+    if !haskey(CACHE_STRATEGIES, t)
+        error("Trying to get strategies for $T, but it has no strategies defined!")
+    end
+
+    return CACHE_STRATEGIES[t]
+end
--- a/src/devices/interface.jl
+++ b/src/devices/interface.jl
@ -0,0 +1,87 @@
+
+abstract type AbstractDevice end
+
+abstract type AbstractCPU <: AbstractDevice end
+
+abstract type AbstractGPU <: AbstractDevice end
+
+"""
+    Machine
+
+A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
+
+See also: [`Scheduler`](@ref)
+"""
+struct Machine
+    devices::Vector{AbstractDevice}
+
+    transferRates::Matrix{Float64}
+end
+
+"""
+    CacheStrategy
+
+Abstract base type for caching strategies.
+
+See also: [`strategies`](@ref)
+"""
+abstract type CacheStrategy end
+
+struct LocalVariables <: CacheStrategy end
+struct Dictionary <: CacheStrategy end
+
+"""
+    DEVICE_TYPES::Vector{Type}
+
+Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
+
+See also: [`device_types`](@ref), [`get_devices`](@ref)
+"""
+DEVICE_TYPES = Vector{Type}()
+
+"""
+    CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
+
+Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
+
+See also: [`strategies`](@ref)
+"""
+CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
+
+"""
+    default_strategy(device::AbstractDevice)
+
+Interface function that must be implmented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device.
+"""
+function default_strategy end
+
+"""
+    get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
+"""
+function get_devices end
+
+"""
+    measure_device!(device::AbstractDevice; verbose::Bool)
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
+"""
+function measure_device! end
+
+"""
+    gen_cache_init_code(device::AbstractDevice, strategy::CacheStrategy)
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an [`Expr`](@ref) initializing this device's variable cache.
+    
+The strategy is a symbol
+"""
+function gen_cache_init_code end
+
+"""
+    gen_access_expr(device::AbstractDevice, strategy::CacheStrategy, symbol::Symbol)
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
+Return an `Expr` accessing the variable identified by [`symbol`].
+"""
+function gen_access_expr end
--- a/src/devices/measure.jl
+++ b/src/devices/measure.jl
@ -17,34 +17,6 @@ end
 Measure the transfer rates between devices in the machine.
 """
 function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
-
-    return nothing
-end
-
-function measure_device!(device::NumaNode; verbose::Bool)
-    if verbose
-        println("Measuring Numa Node $(device.numaId)")
-    end
-    return nothing
-end
-
-function measure_device!(device::CUDAGPU; verbose::Bool)
-    if verbose
-        println("Measuring CUDA GPU $(device.device)")
-    end
-    return nothing
-end
-
-function measure_device!(device::ROCmGPU; verbose::Bool)
-    if verbose
-        println("Measuring ROCm GPU $(device.device)")
-    end
-    return nothing
-end
-
-function measure_device!(device::oneAPIGPU; verbose::Bool)
-    if verbose
-        println("Measuring oneAPI GPU $(device.device)")
-    end
+    # TODO implement
    return nothing
 end
--- a/src/devices/numa/impl.jl
+++ b/src/devices/numa/impl.jl
@ -0,0 +1,61 @@
+using NumaAllocators
+
+mutable struct NumaNode <: AbstractCPU
+    numaId::UInt16
+    threads::UInt16
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, NumaNode)
+
+CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
+
+default_strategy(::NumaNode) = LocalVariables()
+
+function measure_device!(device::NumaNode; verbose::Bool)
+    if verbose
+        println("Measuring Numa Node $(device.numaId)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
+
+Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
+    devices = Vector{AbstractDevice}()
+    noNumaNodes = highest_numa_node()
+
+    if (verbose)
+        println("Found $(noNumaNodes + 1) NUMA nodes")
+    end
+    for i in 0:noNumaNodes
+        push!(devices, NumaNode(i, 1, -1))
+    end
+
+    return devices
+end
+
+"""
+    gen_cache_init_code(device::NumaNode, strategy::LocalVariables)
+
+Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
+"""
+function gen_cache_init_code(::NumaNode, ::LocalVariables)
+    # don't need to initialize anything
+    return Expr()
+end
+
+"""
+    gen_access_expr(device::NumaNode, strategy::LocalVariables, symbol::Symbol)
+
+Generate code to access the variable designated by `symbol` using the [`LocalVariables`](@ref) [`CacheStrategy`](@ref) on a [`NumaNode`](@ref).
+"""
+function gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol)
+    s = Symbol("data_$symbol")
+    return Meta.parse(":($s)")
+end
--- a/src/devices/oneapi/impl.jl
+++ b/src/devices/oneapi/impl.jl
@ -0,0 +1,47 @@
+using oneAPI
+
+mutable struct oneAPIGPU <: AbstractGPU
+    device::Any
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, oneAPIGPU)
+
+CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
+
+default_strategy(::oneAPIGPU) = LocalVariables()
+
+function measure_device!(device::oneAPIGPU; verbose::Bool)
+    if verbose
+        println("Measuring oneAPI GPU $(device.device)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
+
+Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
+    devices = Vector{AbstractDevice}()
+
+    if !oneAPI.functional()
+        if verbose
+            println("oneAPI is non-functional")
+        end
+        return devices
+    end
+
+    oneAPIDevices = oneAPI.devices()
+    if verbose
+        println("Found $(length(oneAPIDevices)) oneAPI devices")
+    end
+    for device in oneAPIDevices
+        push!(devices, oneAPIGPU(device, -1))
+    end
+
+    return devices
+end
--- a/src/devices/rocm/impl.jl
+++ b/src/devices/rocm/impl.jl
@ -0,0 +1,47 @@
+using AMDGPU
+
+mutable struct ROCmGPU <: AbstractGPU
+    device::Any
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, ROCmGPU)
+
+CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
+
+default_strategy(::ROCmGPU) = LocalVariables()
+
+function measure_device!(device::ROCmGPU; verbose::Bool)
+    if verbose
+        println("Measuring ROCm GPU $(device.device)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
+
+Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
+    devices = Vector{AbstractDevice}()
+
+    if !AMDGPU.functional()
+        if verbose
+            println("AMDGPU is non-functional")
+        end
+        return devices
+    end
+
+    AMDDevices = AMDGPU.devices()
+    if verbose
+        println("Found $(length(AMDDevices)) AMD devices")
+    end
+    for device in AMDDevices
+        push!(devices, ROCmGPU(device, -1))
+    end
+
+    return devices
+end
--- a/src/devices/type.jl
+++ b/src/devices/type.jl
@ -1,40 +0,0 @@
-
-abstract type Device end
-
-abstract type CPU <: Device end
-
-mutable struct NumaNode <: CPU
-    numaId::UInt16
-    threads::UInt16
-    FLOPS::Float64
-end
-
-abstract type GPU <: Device end
-
-mutable struct CUDAGPU <: GPU
-    device::Any # TODO: what's the cuda device type?
-    FLOPS::Float64
-end
-
-mutable struct ROCmGPU <: GPU
-    device::Any
-    FLOPS::Float64
-end
-
-mutable struct oneAPIGPU <: GPU
-    device::Any
-    FLOPS::Float64
-end
-
-"""
-    Machine
-
-A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
-
-See also: [`Scheduler`](@ref)
-"""
-struct Machine
-    devices::Vector{Device}
-
-    transferRates::Matrix{Float64}
-end
--- a/test/unit_tests_execution.jl
+++ b/test/unit_tests_execution.jl
@ -5,6 +5,8 @@ using QEDbase
 include("../examples/profiling_utilities.jl")

@testset "Unit Tests Execution" begin
+    machine = get_machine_info()
+
    process_2_2 = ABCProcessDescription(
        Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
        Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
@ -26,9 +28,9 @@ include("../examples/profiling_utilities.jl")
    @testset "AB->AB no optimization" begin
        for _ in 1:10   # test in a loop because graph layout should not change the result
            graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
-            @test isapprox(execute(graph, process_2_2, particles_2_2), expected_result; rtol = 0.001)
+            @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = 0.001)

-            func = get_compute_function(graph, process_2_2)
+            func = get_compute_function(graph, process_2_2, machine)
            @test isapprox(func(particles_2_2), expected_result; rtol = 0.001)
        end
    end
@ -39,7 +41,7 @@ include("../examples/profiling_utilities.jl")
            random_walk!(graph, 50)
            @test is_valid(graph)

-            @test isapprox(execute(graph, process_2_2, particles_2_2), expected_result; rtol = 0.001)
+            @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = 0.001)
        end
    end

@ -49,14 +51,14 @@ include("../examples/profiling_utilities.jl")
    )
    particles_2_4 = gen_process_input(process_2_4)
    graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
-    expected_result = execute(graph, process_2_4, particles_2_4)
+    expected_result = execute(graph, process_2_4, machine, particles_2_4)

    @testset "AB->ABBB no optimization" begin
        for _ in 1:5   # test in a loop because graph layout should not change the result
            graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
-            @test isapprox(execute(graph, process_2_4, particles_2_4), expected_result; rtol = 0.001)
+            @test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = 0.001)

-            func = get_compute_function(graph, process_2_4)
+            func = get_compute_function(graph, process_2_4, machine)
            @test isapprox(func(particles_2_4), expected_result; rtol = 0.001)
        end
    end
@ -67,7 +69,7 @@ include("../examples/profiling_utilities.jl")
            random_walk!(graph, 100)
            @test is_valid(graph)

-            @test isapprox(execute(graph, process_2_4, particles_2_4), expected_result; rtol = 0.001)
+            @test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = 0.001)
        end
    end