Use runner label

Add more estimator tests, add specialized implementations of operation_effect estimator
Add abcmodel tests, Add groundtruth for AB->AB process, fix calculation
2023-11-16 15:26:37 +01:00 · 2023-11-16 04:28:58 +01:00 · 2023-11-13 14:26:13 +01:00 · 2023-11-13 14:22:41 +01:00 · 2023-11-13 14:22:41 +01:00 · 2023-11-13 14:22:41 +01:00
122 changed files with 7655 additions and 1829 deletions
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -0,0 +1,13 @@
+indent = 4
+margin = 120
+always_for_in = true
+for_in_replacement = "in"
+whitespace_typedefs = true
+whitespace_ops_in_indices = true
+long_to_short_function_def = false
+always_use_return = true
+whitespace_in_kwargs = true
+conditional_to_if = true
+normalize_line_endings = "unix"
+
+overwrite = true
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,2 @@
-examples/AB->ABBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
-examples/AB->ABBBBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
+input/AB->ABBBBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
+input/AB->ABBBBBBB.txt filter=lfs diff=lfs merge=lfs -text
--- a/.gitea/workflows/julia-package-ci.yml
+++ b/.gitea/workflows/julia-package-ci.yml
@@ -1,33 +1,185 @@
-name: Test
+name: MetagraphOptimization_CI

 on: [push]

+env:
+  # keep the depot directly in the repository for the cache
+  JULIA_DEPOT_PATH: './.julia'
+
 jobs:
-  test:
-    runs-on: arch-latest
+  prepare:
+    runs-on: ubuntu-22.04

    steps:
-      #- name: Get git-lfs
-      #  run: apt-get update && apt-get install git-lfs
-
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0

-      #- name: Checkout LFS objects
-      #  run: git lfs checkout
-
      - name: Setup Julia environment
-        uses: https://github.com/julia-actions/setup-julia@v1.9.1
+        uses: https://github.com/julia-actions/setup-julia@v1.9.2
        with:
-          version: '1.9.1'
+          version: '1.9.2'
+
+      # needed for the file hashing, should be removed when ${{ hashFiles('**/Project.toml') }} is supported in gitea
+      - name: Setup go environment
+        uses: actions/setup-go@v3
+        with:
+          go-version: '1.20'
+
+      - name: Hash files
+        uses: https://gitea.com/actions/go-hashfiles@v0.0.1
+        id: get-hash
+        with: 
+          patterns: |-
+            **/Project.toml
+
+      - name: Restore Cache
+        uses: actions/cache/restore@v3
+        id: cache-restore
+        with:
+          path: |
+            .julia/artifacts
+            .julia/packages
+            .julia/registries
+          key: julia-${{ steps.get-hash.outputs.hash }}
+
+      - name: Check cache hit
+        if: steps.cache-restore.outputs.cache-hit == 'true'
+        run: exit 0

      - name: Install dependencies
-        run: julia --project=./ -e 'import Pkg; Pkg.instantiate()'
+        run: |
+          julia --project=./        -e 'import Pkg; Pkg.instantiate(); Pkg.precompile()'
+          julia --project=examples/ -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
+          julia --project=docs/     -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
+
+      - name: Cache Julia packages
+        uses: actions/cache/save@v3
+        with:
+          path: |
+            .julia/artifacts
+            .julia/packages
+            .julia/registries
+          key: julia-${{ steps.get-hash.outputs.hash }}
+
+  test:
+    needs: prepare
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Setup Julia environment
+        uses: https://github.com/julia-actions/setup-julia@v1.9.2
+        with:
+          version: '1.9.2'
+
+      # needed for the file hashing, should be removed when ${{ hashFiles('**/Project.toml') }} is supported in gitea
+      - name: Setup go environment
+        uses: actions/setup-go@v3
+        with:
+          go-version: '1.20'
+
+      - name: Hash files
+        uses: https://gitea.com/actions/go-hashfiles@v0.0.1
+        id: get-hash
+        with: 
+          patterns: |-
+            **/Project.toml  
+
+      - name: Restore cached Julia packages
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            .julia/artifacts
+            .julia/packages
+            .julia/registries
+          key: julia-${{ steps.get-hash.outputs.hash }}
+
+      - name: Install dependencies
+        run: |
+          julia --project=./        -e 'import Pkg; Pkg.instantiate(); Pkg.precompile()'
+          julia --project=examples/ -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
+          julia --project=docs/     -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
+
+      - name: Format check
+        run: |
+          julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true, ignore=[".julia/*"])'
+          julia --project=./ -e '
+          out = Cmd(`git diff --name-only`) |> read |> String
+          if out == ""
+            exit(0)
+          else
+            @error "Some files have not been formatted !!!"
+            write(stdout, out)
+            exit(1)
+          end'

      - name: Run tests
        run: julia --project=./ -t 4 -e 'import Pkg; Pkg.test()' -O0

      - name: Run examples
-        run: julia --project=examples/ -t 4 -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); include("examples/import_bench.jl")' -O3
+        run: julia --project=examples/ -t 4 -e 'include("examples/import_bench.jl")' -O3
+
+  docs:
+    needs: prepare
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Setup Julia environment
+        uses: https://github.com/julia-actions/setup-julia@v1.9.2
+        with:
+          version: '1.9.2'
+
+      # needed for the file hashing, should be removed when ${{ hashFiles('**/Project.toml') }} is supported in gitea
+      - name: Setup go environment
+        uses: actions/setup-go@v3
+        with:
+          go-version: '1.20'
+
+      - name: Hash files
+        uses: https://gitea.com/actions/go-hashfiles@v0.0.1
+        id: get-hash
+        with: 
+          patterns: |-
+            **/Project.toml  
+
+      - name: Restore cached Julia packages
+        uses: actions/cache/restore@v3
+        with:
+          path: |
+            .julia/artifacts
+            .julia/packages
+            .julia/registries
+          key: julia-${{ steps.get-hash.outputs.hash }}
+
+      - name: Install dependencies
+        run: |
+          julia --project=./        -e 'import Pkg; Pkg.instantiate(); Pkg.precompile()'
+          julia --project=examples/ -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
+          julia --project=docs/     -e 'import Pkg; Pkg.develop(Pkg.PackageSpec(path=pwd())); Pkg.instantiate(); Pkg.precompile()'
+
+      - name: Build docs
+        run: julia --project=docs/ docs/make.jl
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: web-doc
+          path: docs/build/
+
+      #- name: Webhook Trigger
+      #  uses: https://github.com/zzzze/webhook-trigger@master
+      #  continue-on-error: true
+      #  with:
+      #    data: "{\"event\":\"action_completed\", \"download_url\":\"deckardcain.local:8099/something\"}"
+      #    webhook_url: ${{ secrets.WEBHOOK_URL }}
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,5 @@ Manifest.toml

 # vscode workspace directory
 .vscode
+.julia
+**/.ipynb_checkpoints/
--- a/Project.toml
+++ b/Project.toml
@@ -4,9 +4,16 @@ authors = ["Anton Reinhard <anton.reinhard@proton.me>"]
 version = "0.1.0"

 [deps]
+AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+NumaAllocators = "21436f30-1b4a-4f08-87af-e26101bb5379"
+QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

 [extras]
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Problems:
 - Lots of testing required because mistakes will propagate and multiply.

 ## Other TODOs
- Reduce memory footprint of the graph, are the UUIDs too large? 
+- Reduce memory footprint of the graph
 - Memory layout of Nodes? They should lie linearly in memory, right now probably on heap?
 - Add scaling functions

@@ -53,7 +53,7 @@ For graphs AB->AB^n:
 - Number of ComputeTaskS2 should always be (n+1)!
 - Number of ComputeTaskU should always be (n+3)

-Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz
+Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz (not necessarily up to date, check Jupyter Notebooks in `notebooks/` instead)

 ```
 $ julia --project examples/import_bench.jl
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
+MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -0,0 +1,34 @@
+using Documenter
+using MetagraphOptimization
+
+makedocs(
+    #format = Documenter.LaTeX(platform=""),
+
+    root = "docs",
+    source = "src",
+    build = "build",
+    warnonly = true,
+    clean = true,
+    doctest = true,
+    modules = Module[MetagraphOptimization],
+    #repo = "https://code.woubery.com/Rubydragon/MetagraphOptimization.jl/src/branch/{commit}{path}#L{line}",
+    remotes = nothing,
+    sitename = "MetagraphOptimization.jl",
+    pages = [
+        "index.md",
+        "Manual" => "manual.md",
+        "Library" => [
+            "Public" => "lib/public.md",
+            "Graph" => "lib/internals/graph.md",
+            "Node" => "lib/internals/node.md",
+            "Task" => "lib/internals/task.md",
+            "Operation" => "lib/internals/operation.md",
+            "Models" => "lib/internals/models.md",
+            "Diff" => "lib/internals/diff.md",
+            "Utility" => "lib/internals/utility.md",
+            "Code Generation" => "lib/internals/code_gen.md",
+            "Devices" => "lib/internals/devices.md",
+        ],
+        "Contribution" => "contribution.md",
+    ],
+)
--- a/docs/src/contribution.md
+++ b/docs/src/contribution.md
@@ -0,0 +1,3 @@
+# Contribution
+
+This is currently in development for a diploma thesis and is therefore private and impossible to contribute to.
--- a/docs/src/flowchart.drawio
+++ b/docs/src/flowchart.drawio
@@ -0,0 +1,75 @@
+<mxfile host="Electron" modified="2023-09-17T13:34:45.840Z" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.1 Chrome/114.0.5735.134 Electron/25.6.0 Safari/537.36" etag="e0c8qLevhaP_q_R2fyC9" version="21.6.1" type="device">
+  <diagram name="Page-1" id="Vy0cA1nkMPfy-3cC5ahA">
+    <mxGraphModel dx="1185" dy="707" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="500" pageHeight="900" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-1" target="yG8qeggDCLqQ8GwY7ugi-2">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-1" value="Process Generator Script" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="180" y="120" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-2" target="yG8qeggDCLqQ8GwY7ugi-3">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-2" value="Process Parser" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="180" y="220" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-6">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-12">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-3" value="Optimizer" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="180" y="320" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.75;entryDx=0;entryDy=0;exitX=0;exitY=0.75;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-6" target="yG8qeggDCLqQ8GwY7ugi-3">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-6" value="Fast Cost Estimator&lt;br&gt;(Global Metrics)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="340" y="320" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-13">
+          <mxGeometry relative="1" as="geometry">
+            <Array as="points">
+              <mxPoint x="80" y="450" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-12" value="Scheduler" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="180" y="420" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-13" target="yG8qeggDCLqQ8GwY7ugi-3">
+          <mxGeometry relative="1" as="geometry">
+            <mxPoint x="120" y="380" as="targetPoint" />
+            <Array as="points">
+              <mxPoint x="80" y="350" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-18">
+          <mxGeometry relative="1" as="geometry">
+            <Array as="points">
+              <mxPoint x="240" y="500" />
+              <mxPoint x="240" y="500" />
+            </Array>
+          </mxGeometry>
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-13" value="Accurate Cost Estimator&lt;br&gt;(Machine Specific)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="20" y="370" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-18" target="yG8qeggDCLqQ8GwY7ugi-20">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-18" value="Code Generator" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="180" y="520" width="120" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="yG8qeggDCLqQ8GwY7ugi-20" value="Executor" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
+          <mxGeometry x="180" y="620" width="120" height="60" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -0,0 +1,26 @@
+# MetagraphOptimization.jl
+
+*A domain-specific DAG-optimizer*
+
+## Package Features
+- Read a DAG from a file
+- Analyze its properties
+- Mute the graph using the operations NodeFusion, NodeReduction and NodeSplit
+
+## Coming Soon:
+- Add Code Generation from finished DAG
+- Add optimization algorithms and strategies
+
+## Library Outline
+
+```@contents
+Pages = [
+    "lib/public.md",
+    "lib/internals.md"
+]
+```
+
+### [Index](@id main-index)
+```@index
+Pages = ["lib/public.md"]
+```
--- a/docs/src/lib/internals/code_gen.md
+++ b/docs/src/lib/internals/code_gen.md
@@ -0,0 +1,8 @@
+# Code Generation
+
+## Main
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["code_gen/main.jl"]
+Order = [:function]
+```
--- a/docs/src/lib/internals/devices.md
+++ b/docs/src/lib/internals/devices.md
@@ -0,0 +1,59 @@
+# Devices
+
+## Interface
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/interface.jl"]
+Order = [:type, :constant, :function]
+```
+
+## Detect
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/detect.jl"]
+Order = [:function]
+```
+
+## Measure
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/measure.jl"]
+Order = [:function]
+```
+
+## Implementations
+
+### General
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/impl.jl"]
+Order = [:type, :function]
+```
+
+### NUMA
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/numa/impl.jl"]
+Order = [:type, :function]
+```
+
+### CUDA
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/cuda/impl.jl"]
+Order = [:type, :function]
+```
+
+### ROCm
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/rocm/impl.jl"]
+Order = [:type, :function]
+```
+
+### oneAPI
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["devices/oneapi/impl.jl"]
+Order = [:type, :function]
+```
--- a/docs/src/lib/internals/diff.md
+++ b/docs/src/lib/internals/diff.md
@@ -0,0 +1,22 @@
+# Diff
+
+## Type
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["diff/type.jl"]
+Order   = [:type]
+```
+
+## Properties
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["diff/properties.jl"]
+Order   = [:function]
+```
+
+## Printing
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["diff/print.jl"]
+Order   = [:function]
+```
--- a/docs/src/lib/internals/estimator.md
+++ b/docs/src/lib/internals/estimator.md
@@ -0,0 +1,21 @@
+# Models
+
+## Interface
+
+The interface that has to be implemented for an estimator.
+
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["estimator/interafce.jl"]
+Order = [:type, :constant, :function]
+```
+
+## Global Metric Estimator
+
+Implementation of a global metric estimator. It uses the graph properties compute effort, data transfer, and compute intensity.
+
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["estimator/global_metric.jl"]
+Order = [:type, :function]
+```
--- a/docs/src/lib/internals/graph.md
+++ b/docs/src/lib/internals/graph.md
@@ -0,0 +1,50 @@
+# Graph
+
+## Type
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/type.jl"]
+Order   = [:type]
+```
+
+## Interface
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/interface.jl"]
+Order   = [:function]
+```
+
+## Compare
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/compare.jl"]
+Order   = [:function]
+```
+
+## Mute
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/mute.jl"]
+Order   = [:function]
+```
+
+## Print
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/print.jl"]
+Order   = [:function]
+```
+
+## Properties
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/properties.jl"]
+Order   = [:function]
+```
+
+## Validate
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["graph/validate.jl"]
+Order   = [:function]
+```
--- a/docs/src/lib/internals/models.md
+++ b/docs/src/lib/internals/models.md
@@ -0,0 +1,72 @@
+# Models
+
+## Interface
+
+The interface that has to be implemented for a model to be usable is defined in `src/models/interface.jl`.
+
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/interface.jl"]
+Order = [:type, :constant, :function]
+```
+
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/print.jl"]
+Order = [:function]
+```
+
+## ABC-Model
+
+### Types
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/types.jl"]
+Order   = [:type, :constant]
+```
+
+### Particle
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/particle.jl"]
+Order   = [:type, :constant, :function]
+```
+
+### Parse
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/parse.jl"]
+Order   = [:function]
+```
+
+### Properties
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/properties.jl"]
+Order   = [:function]
+```
+
+### Create
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/create.jl"]
+Order = [:function]
+```
+
+### Compute
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/compute.jl"]
+Order = [:function]
+```
+
+### Print
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["models/abc/print.jl"]
+Order = [:function]
+```
+
+## QED-Model
+
+*To be added*
--- a/docs/src/lib/internals/node.md
+++ b/docs/src/lib/internals/node.md
@@ -0,0 +1,43 @@
+# Node
+
+## Type
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["node/type.jl"]
+Order   = [:type]
+```
+
+## Create
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["node/create.jl"]
+Order   = [:function]
+```
+
+## Compare
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["node/compare.jl"]
+Order   = [:function]
+```
+
+## Properties
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["node/properties.jl"]
+Order   = [:function]
+```
+
+## Print
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["node/print.jl"]
+Order   = [:function]
+```
+
+## Validate
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["node/validate.jl"]
+Order   = [:function]
+```
--- a/docs/src/lib/internals/operation.md
+++ b/docs/src/lib/internals/operation.md
@@ -0,0 +1,57 @@
+# Operation
+
+## Types
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/type.jl"]
+Order   = [:type]
+```
+
+## Find
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/find.jl"]
+Order   = [:function]
+```
+
+## Apply
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/apply.jl"]
+Order   = [:function]
+```
+
+## Get
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/get.jl"]
+Order   = [:function]
+```
+
+## Clean
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/clean.jl"]
+Order   = [:function]
+```
+
+## Utility
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/utility.jl"]
+Order   = [:function]
+```
+
+## Print
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/print.jl"]
+Order   = [:function]
+```
+
+## Validate
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["operation/validate.jl"]
+Order   = [:function]
+```
--- a/docs/src/lib/internals/properties.md
+++ b/docs/src/lib/internals/properties.md
@@ -0,0 +1,22 @@
+# Properties
+
+## Type
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["properties/type.jl"]
+Order   = [:type]
+```
+
+## Create
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["properties/create.jl"]
+Order   = [:function]
+```
+
+## Utility
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["properties/utility.jl"]
+Order   = [:function]
+```
--- a/docs/src/lib/internals/scheduler.md
+++ b/docs/src/lib/internals/scheduler.md
@@ -0,0 +1,15 @@
+# Scheduler
+
+## Interface
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["scheduler/interface.jl"]
+Order   = [:type, :function]
+```
+
+## Greedy
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["scheduler/greedy.jl"]
+Order   = [:type, :function]
+```
--- a/docs/src/lib/internals/task.md
+++ b/docs/src/lib/internals/task.md
@@ -0,0 +1,43 @@
+# Task
+
+## Type
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["task/type.jl"]
+Order   = [:type]
+```
+
+## Create
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["task/create.jl"]
+Order   = [:function]
+```
+
+## Compare
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["task/compare.jl"]
+Order   = [:function]
+```
+
+## Compute
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["task/compute.jl"]
+Order   = [:function]
+```
+
+## Properties
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["task/properties.jl"]
+Order   = [:function]
+```
+
+## Print
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages = ["task/print.jl"]
+Order   = [:function]
+```
--- a/docs/src/lib/internals/utility.md
+++ b/docs/src/lib/internals/utility.md
@@ -0,0 +1,17 @@
+# Utility
+
+## Helper Functions
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages   = ["./utility.jl"]
+Order   = [:type, :function]
+```
+
+## Trie Helper
+This is a simple implementation of a [Trie Data Structure](https://en.wikipedia.org/wiki/Trie) to greatly improve the performance of the Node Reduction search.
+
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages   = ["trie.jl"]
+Order   = [:type, :function]
+```
--- a/docs/src/lib/public.md
+++ b/docs/src/lib/public.md
@@ -0,0 +1,24 @@
+# Public Documentation
+
+Documentation for `MetagraphOptimization.jl`'s public interface.
+
+See the Internals section of the manual for documentation of everything else.
+
+```@autodocs
+Modules = [MetagraphOptimization]
+Pages   = ["MetagraphOptimization.jl"]
+Order   = [:module]
+```
+
+## Contents
+
+```@contents
+Pages = ["public.md"]
+Depth = 2
+```
+
+## Index
+
+```@index
+Pages = ["public.md"]
+```
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -0,0 +1,7 @@
+# Manual
+
+## Jupyter Notebooks
+
+In the `notebooks` directory are notebooks containing some examples of the usage of this repository.
+
+- `abc_model_showcase`: A simple showcase of the intended usage of the ABC Model implementation.
--- a/examples/Project.toml
+++ b/examples/Project.toml
@@ -1,7 +1,3 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
-PProf = "e4faabce-9ead-11e9-39d9-4379958e3056"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
--- a/examples/ab5.jl
+++ b/examples/ab5.jl
@@ -0,0 +1,34 @@
+using MetagraphOptimization
+using BenchmarkTools
+
+println("Getting machine info")
+@time machine = get_machine_info()
+
+println("Making model")
+@time model = ABCModel()
+
+println("Making process")
+process_str = "AB->ABBBBB"
+@time process = parse_process(process_str, model)
+
+println("Parsing DAG")
+@time graph = parse_dag("input/$process_str.txt", model)
+
+println("Generating input data")
+@time input_data = [gen_process_input(process) for _ in 1:1000]
+
+include("profiling_utilities.jl")
+println("Reducing graph")
+@time reduce_all!(graph)
+
+println("Generating compute function")
+@time compute_func = get_compute_function(graph, process, machine)
+
+println("First run, single argument")
+@time compute_func(input_data[1])
+
+println("\nBenchmarking function, 1 input")
+display(@benchmark compute_func($(input_data[1])))
+
+println("\nBenchmarking function, 1000 inputs")
+display(@benchmark compute_func.($input_data))
--- a/examples/ab7.jl
+++ b/examples/ab7.jl
@@ -0,0 +1,34 @@
+using MetagraphOptimization
+using BenchmarkTools
+
+println("Getting machine info")
+@time machine = get_machine_info()
+
+println("Making model")
+@time model = ABCModel()
+
+println("Making process")
+process_str = "AB->ABBBBBBB"
+@time process = parse_process(process_str, model)
+
+println("Parsing DAG")
+@time graph = parse_dag("input/$process_str.txt", model)
+
+println("Generating input data")
+@time input_data = [gen_process_input(process) for _ in 1:1000]
+
+include("profiling_utilities.jl")
+println("Reducing graph")
+@time reduce_all!(graph)
+
+println("Generating compute function")
+@time compute_func = get_compute_function(graph, process, machine)
+
+println("First run, single argument")
+@time compute_func(input_data[1])
+
+println("\nBenchmarking function, 1 input")
+display(@benchmark compute_func($(input_data[1])))
+
+println("\nBenchmarking function, 1000 inputs")
+display(@benchmark compute_func.($input_data))
--- a/examples/import_bench.jl
+++ b/examples/import_bench.jl
@@ -13,18 +13,20 @@ function bench_txt(filepath::String, bench::Bool = true)
        return
    end

+    model = ABCModel()
+
    println(name, ":")
-    g = parse_abc(filepath)
+    g = parse_dag(filepath, model)
    print(g)
-    #println("  Graph size in memory: ", bytes_to_human_readable(Base.summarysize(g)))
+    println("  Graph size in memory: ", bytes_to_human_readable(MetagraphOptimization.mem(g)))

    if (bench)
-        @btime parse_abc($filepath)
+        @btime parse_dag($filepath, $model)
    end

    println("  Get Operations: ")
    @time get_operations(g)
-    println()
+    return println()
 end

 function import_bench()
@@ -34,7 +36,7 @@ function import_bench()
    bench_txt("AB->ABBBBBBB.txt")
    #bench_txt("AB->ABBBBBBBBB.txt")
    bench_txt("ABAB->ABAB.txt")
-    bench_txt("ABAB->ABC.txt")
+    return bench_txt("ABAB->ABC.txt")
 end

 import_bench()
--- a/examples/plot_chain.jl
+++ b/examples/plot_chain.jl
@@ -12,7 +12,7 @@ function gen_plot(filepath)
        return
    end

-    g = parse_abc(filepath)
+    g = parse_dag(filepath, ABCModel())

    Random.seed!(1)

@@ -21,7 +21,7 @@ function gen_plot(filepath)
    x = Vector{Float64}()
    y = Vector{Float64}()

-    for i = 1:30
+    for i in 1:30
        print("\r", i)
        # push
        opt = get_operations(g)
@@ -38,23 +38,23 @@ function gen_plot(filepath)
            push_operation!(g, rand(collect(opt.nodeSplits)))
            println("NS")
        else
-            i = i-1
+            i = i - 1
        end

-        props = graph_properties(g)
+        props = get_properties(g)
        push!(x, props.data)
-        push!(y, props.compute_effort)
+        push!(y, props.computeEffort)
    end

    println("\rDone.")

-    plot([x[1], x[2]], [y[1], y[2]], linestyle = :solid, linewidth = 1, color = :red, legend=false)
+    plot([x[1], x[2]], [y[1], y[2]], linestyle = :solid, linewidth = 1, color = :red, legend = false)
    # Create lines connecting the reference point to each data point
    for i in 3:length(x)
-        plot!([x[i-1], x[i]], [y[i-1], y[i]], linestyle = :solid, linewidth = 1, color = :red)
+        plot!([x[i - 1], x[i]], [y[i - 1], y[i]], linestyle = :solid, linewidth = 1, color = :red)
    end

-    gui()
+    return gui()
 end

 gen_plot("AB->ABBB.txt")
--- a/examples/plot_star.jl
+++ b/examples/plot_star.jl
@@ -12,13 +12,13 @@ function gen_plot(filepath)
        return
    end

-    g = parse_abc(filepath)
+    g = parse_dag(filepath, ABCModel())

    Random.seed!(1)

    println("Random Walking... ")

-    for i = 1:30
+    for i in 1:30
        print("\r", i)
        # push
        opt = get_operations(g)
@@ -35,7 +35,7 @@ function gen_plot(filepath)
            push_operation!(g, rand(collect(opt.nodeSplits)))
            println("NS")
        else
-            i = i-1
+            i = i - 1
        end
    end

@@ -44,9 +44,9 @@ function gen_plot(filepath)



-    props = graph_properties(g)
+    props = get_properties(g)
    x0 = props.data
-    y0 = props.compute_effort
+    y0 = props.computeEffort

    x = Vector{Float64}()
    y = Vector{Float64}()
@@ -55,33 +55,33 @@ function gen_plot(filepath)
    opt = get_operations(g)
    for op in opt.nodeFusions
        push_operation!(g, op)
-        props = graph_properties(g)
+        props = get_properties(g)
        push!(x, props.data)
-        push!(y, props.compute_effort)
+        push!(y, props.computeEffort)
        pop_operation!(g)

-        push!(names, "NF: (" * string(props.data) * ", " * string(props.compute_effort) * ")")
+        push!(names, "NF: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
    end
    for op in opt.nodeReductions
        push_operation!(g, op)
-        props = graph_properties(g)
+        props = get_properties(g)
        push!(x, props.data)
-        push!(y, props.compute_effort)
+        push!(y, props.computeEffort)
        pop_operation!(g)

-        push!(names, "NR: (" * string(props.data) * ", " * string(props.compute_effort) * ")")
+        push!(names, "NR: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
    end
    for op in opt.nodeSplits
        push_operation!(g, op)
-        props = graph_properties(g)
+        props = get_properties(g)
        push!(x, props.data)
-        push!(y, props.compute_effort)
+        push!(y, props.computeEffort)
        pop_operation!(g)

-        push!(names, "NS: (" * string(props.data) * ", " * string(props.compute_effort) * ")")
+        push!(names, "NS: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
    end
-    
-    plot([x0, x[1]], [y0, y[1]], linestyle = :solid, linewidth = 1, color = :red, legend=false)
+
+    plot([x0, x[1]], [y0, y[1]], linestyle = :solid, linewidth = 1, color = :red, legend = false)
    # Create lines connecting the reference point to each data point
    for i in 2:length(x)
        plot!([x0, x[i]], [y0, y[i]], linestyle = :solid, linewidth = 1, color = :red)
@@ -90,7 +90,7 @@ function gen_plot(filepath)

    print(names)

-    gui()
+    return gui()
 end

 gen_plot("AB->ABBB.txt")
--- a/examples/profiling_utilities.jl
+++ b/examples/profiling_utilities.jl
@@ -1,11 +1,11 @@

-function test_random_walk(g::DAG, n::Int64)
-    # the purpose here is to do "random" operations and reverse them again and validate that the graph stays the same and doesn't diverge
+function random_walk!(g::DAG, n::Int64)
+    # the purpose here is to do "random" operations on the graph to simulate an optimizer
    reset_graph!(g)

-    properties = graph_properties(g)
+    properties = get_properties(g)

-    for i = 1:n
+    for i in 1:n
        # choose push or pop
        if rand(Bool)
            # push
@@ -32,5 +32,28 @@ function test_random_walk(g::DAG, n::Int64)
        end
    end

+    return nothing
+end
+
+function reduce_all!(g::DAG)
    reset_graph!(g)
-end
+
+    opt = get_operations(g)
+    while (!isempty(opt.nodeReductions))
+        push_operation!(g, pop!(opt.nodeReductions))
+
+        if (isempty(opt.nodeReductions))
+            opt = get_operations(g)
+        end
+    end
+    return nothing
+end
+
+function reduce_one!(g::DAG)
+    opt = get_operations(g)
+    if !isempty(opt.nodeReductions)
+        push_operation!(g, pop!(opt.nodeReductions))
+    end
+    opt = get_operations(g)
+    return nothing
+end
--- a/input/AB->ABBBBBBBBB.txt
+++ b/input/AB->ABBBBBBBBB.txt
--- a/notebooks/abc_model_large.ipynb
+++ b/notebooks/abc_model_large.ipynb
@@ -0,0 +1,637 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using MetagraphOptimization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 1 NUMA nodes\n",
+      "CUDA is non-functional\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get machine and set dictionary caching strategy\n",
+    "machine = get_machine_info()\n",
+    "MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Graph:\n",
+      "  Nodes: Total: 7854, ComputeTaskP: 8, ComputeTaskS2: 720, \n",
+      "         ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 1230, \n",
+      "         ComputeTaskV: 1956, DataTask: 3931\n",
+      "  Edges: 11241\n",
+      "  Total Compute Effort: 33915.0\n",
+      "  Total Data Transfer: 322464.0\n",
+      "  Total Compute Intensity: 0.10517453111044954\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = ABCModel()\n",
+    "process_str = \"AB->ABBBBB\"\n",
+    "process = parse_process(process_str, model)\n",
+    "graph = parse_dag(\"../input/$process_str.txt\", model)\n",
+    "print(graph)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "compute__ae7097a4_7bfc_11ee_2cec_190d7ced64f1 (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compute_AB_AB5 = get_compute_function(graph, process, machine)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  0.140021 seconds (791.41 k allocations: 30.317 MiB, 9.74% gc time)\n",
+      "Graph:\n",
+      "  Nodes: Total: 4998, ComputeTaskP: 8, ComputeTaskS2: 720, \n",
+      "         ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 516, \n",
+      "         ComputeTaskV: 1242, DataTask: 2503\n",
+      "  Edges: 7671\n",
+      "  Total Compute Effort: 21777.0\n",
+      "  Total Data Transfer: 219648.0\n",
+      "  Total Compute Intensity: 0.09914499562937062\n"
+     ]
+    }
+   ],
+   "source": [
+    "include(\"../examples/profiling_utilities.jl\")\n",
+    "@time reduce_all!(graph)\n",
+    "print(graph)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  3.626740 seconds (1.52 M allocations: 114.358 MiB, 0.84% gc time)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@time compute_AB_AB5_reduced = get_compute_function(graph, process, machine)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  2.130952 seconds (4.31 M allocations: 276.129 MiB, 4.50% gc time, 99.02% compilation time)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "1000-element Vector{ABCProcessInput}:\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [4.694213004647641, 0.0, 0.0, 4.58646222408983]\n",
+       "    B: [4.694213004647641, 0.0, 0.0, -4.58646222408983]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.1989656045893697, -0.40235742161696864, 0.06512533692021122, 0.5209469423550988]\n",
+       "    B: [-1.2555060342925868, 0.3685683194051901, 0.4785890883121294, -0.4597882997907804]\n",
+       "    B: [-2.189083660521547, 0.31663070338411387, 0.1742479621961443, -1.9134967776579581]\n",
+       "    B: [-1.0637129314000269, -0.2948512505337184, 0.0500740340487307, -0.2050378784528044]\n",
+       "    B: [-1.6149410305664367, 1.0344652685816964, -0.406159957064284, 0.6106965118475143]\n",
+       "    B: [-2.0662167479253144, -1.0224556192203134, -0.3618764644129321, 1.4466795016989296]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [5.621657834589244, 0.0, 0.0, 5.532001157736559]\n",
+       "    B: [5.621657834589244, 0.0, 0.0, -5.532001157736559]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.058801595505931, 0.7220299456693885, 0.22719930902793095, 1.6327024349806234]\n",
+       "    B: [-1.1826215869997767, 0.04638669502532437, -0.553508153090363, -0.30011800516629]\n",
+       "    B: [-2.3776830758041227, -0.8637209881441633, -0.22710813067439403, 1.9636152272240621]\n",
+       "    B: [-1.9086249240920268, 0.02598092498567318, -1.087715954825374, -1.2079106316365085]\n",
+       "    B: [-2.6526208210236426, 0.3117066248738638, 1.6178469805428013, -1.8225826038033035]\n",
+       "    B: [-1.0629636657529868, -0.24238320241008685, 0.023285949019398133, -0.2657064215985837]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.176284774018432, 0.0, 0.0, 6.094792335245879]\n",
+       "    B: [6.176284774018432, 0.0, 0.0, -6.094792335245879]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-3.2943110238771185, 1.9799744259594443, 2.3805040294128346, 0.5151572192390796]\n",
+       "    B: [-1.0255775134941767, 0.18009906891836583, -0.12779691496180498, 0.05514988745120904]\n",
+       "    B: [-1.7854209452644407, -0.56381615584479, -0.9572322565407875, 0.9764966468120639]\n",
+       "    B: [-3.3312939695760786, -0.5949754252793171, -2.9420979921841868, -1.0428725518649993]\n",
+       "    B: [-1.6551651824618003, -0.8748451354288965, 0.9749427327758187, -0.1539624566503731]\n",
+       "    B: [-1.260800913363249, -0.12643677832480643, 0.6716804014981268, -0.34996874498697933]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [4.747497785190141, 0.0, 0.0, 4.640984294348053]\n",
+       "    B: [4.747497785190141, 0.0, 0.0, -4.640984294348053]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.3704329562088802, 0.8292801285050307, 0.2251475790952209, 0.3737506167990253]\n",
+       "    B: [-1.352958681672649, 0.11120507604905326, 0.6088733084867489, -0.6688825902852584]\n",
+       "    B: [-1.4224569379606473, -0.25277059018918374, -0.4925475402927904, -0.84669220478242]\n",
+       "    B: [-2.4534584066229996, -0.23638988525842838, -1.4120549440785204, 1.7232756047945383]\n",
+       "    B: [-1.4378719974624208, 0.5461758322111039, 0.8131489669135029, -0.3285674953530594]\n",
+       "    B: [-1.457816590452685, -0.9975005613175758, 0.257432629875838, -0.25288393117282576]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.148648417619223, 0.0, 0.0, 6.066784763240853]\n",
+       "    B: [6.148648417619223, 0.0, 0.0, -6.066784763240853]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.5381168736188293, 0.5769721565317305, 1.0069443436143835, 0.13773066601554382]\n",
+       "    B: [-1.3178580311796126, 0.27781510267038506, -0.8083323925420551, 0.07853217328003184]\n",
+       "    B: [-1.5330954954905804, 0.4994081736550063, -1.0290017953406905, 0.20525247761163526]\n",
+       "    B: [-3.083592979398096, -2.1497728433794587, -1.2247634566690573, -1.5449844205264607]\n",
+       "    B: [-3.1391572693216845, 0.49043306139044257, 2.931865230552653, 0.13397777318202247]\n",
+       "    B: [-1.6854761862296446, 0.30514434913189475, -0.876711929615233, 0.989491330437227]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [7.422637433466136, 0.0, 0.0, 7.35496746890785]\n",
+       "    B: [7.422637433466136, 0.0, 0.0, -7.35496746890785]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-3.3788591199517355, 2.3069724486616927, -0.5016400230094518, 2.2006645271171985]\n",
+       "    B: [-2.193241133599192, -1.652465184572841, -0.691853387986234, -0.7752447184070871]\n",
+       "    B: [-2.295315825041209, 0.334376552772819, 0.5374003175214306, 1.966689593293318]\n",
+       "    B: [-2.3721558149969235, -2.0813404180022568, 0.4923496733367945, 0.22964554029865022]\n",
+       "    B: [-1.5367714331999278, 0.9008878309070798, 0.1482895506792473, -0.7266895920420517]\n",
+       "    B: [-3.068931540143284, 0.1915687702335065, 0.015453869458212284, -2.8950653502600274]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.716486802754837, 0.0, 0.0, 6.64162592830851]\n",
+       "    B: [6.716486802754837, 0.0, 0.0, -6.64162592830851]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.3263331205917814, -0.5023870926274977, 0.418137178911541, 0.5761319775467438]\n",
+       "    B: [-2.1603199304697136, -1.202627416523187, 1.024176720111292, -1.0824654936733602]\n",
+       "    B: [-1.1665818595303201, 0.5747508534091106, 0.05041215840441908, 0.16743149576984034]\n",
+       "    B: [-1.829760754209137, 0.5127529745920416, -0.17835468593467171, -1.4329334983509001]\n",
+       "    B: [-2.891550940379351, -2.652621236308268, 0.3953841214715819, 0.41029113320086874]\n",
+       "    B: [-4.05842700032937, 3.2701319174577996, -1.7097554929641623, 1.3615443855068068]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [7.700331598721008, 0.0, 0.0, 7.635123229539995]\n",
+       "    B: [7.700331598721008, 0.0, 0.0, -7.635123229539995]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.382743739041896, -1.410381415274026, 1.0613871843128353, 1.2496996576655786]\n",
+       "    B: [-3.021630369232257, 0.25595209564405125, -2.8389223073732714, 0.07251720968504605]\n",
+       "    B: [-2.7262381500229256, 1.0736489469437192, 2.293577756890956, 0.13839603484966886]\n",
+       "    B: [-2.222260574660266, 1.5432031708495264, -0.7055857379280247, 1.0291330339668954]\n",
+       "    B: [-1.650055097318715, -1.062833285640475, -0.34598865120359784, 0.6880109623839291]\n",
+       "    B: [-3.397735267165956, -0.3995895125227963, 0.5355317553011019, -3.1777568985511193]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [4.9341647451125334, 0.0, 0.0, 4.8317679716550375]\n",
+       "    B: [4.9341647451125334, 0.0, 0.0, -4.8317679716550375]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.834221818900379, 0.1070495973399568, 1.2695354794210922, 0.860923766155068]\n",
+       "    B: [-1.5116322118250454, 0.39753882899610743, -0.756426277560466, -0.7448584495617266]\n",
+       "    B: [-1.6588475476725886, 0.06712527283179799, 0.6875031760830096, -1.1289857249063835]\n",
+       "    B: [-1.5718164783029667, 0.4294130824657117, -0.6215317131811225, -0.9486357444151968]\n",
+       "    B: [-1.7838526603309615, -0.5732435925039472, -0.9425541080554634, 0.9824020820472578]\n",
+       "    B: [-1.5079587731931232, -0.4278831891296266, 0.36347344329295106, 0.979154070680981]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [7.099667747066588, 0.0, 0.0, 7.028889109862067]\n",
+       "    B: [7.099667747066588, 0.0, 0.0, -7.028889109862067]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-3.851129225519823, 2.5555470019017212, -2.502060728335724, 1.019837214678957]\n",
+       "    B: [-2.3860288930086897, 0.6059782347076652, 0.6711053982516709, 1.9686395814801452]\n",
+       "    B: [-1.9543999030878276, -1.5857282951514855, 0.5255033921941499, -0.17026726032362857]\n",
+       "    B: [-1.5523812781985644, -1.154244859738803, 0.03484928145183679, -0.2763909626783212]\n",
+       "    B: [-3.2795110937910716, -1.0290377989842119, 1.3607888704851536, -2.616204860580336]\n",
+       "    B: [-1.175885100527199, 0.6074857172651138, -0.09018621404708665, 0.07438628742318319]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.3653048194550985, 0.0, 0.0, 6.286263233796236]\n",
+       "    B: [6.3653048194550985, 0.0, 0.0, -6.286263233796236]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-3.274142279992413, -2.62046758782023, -1.339558866223036, 1.028950598785383]\n",
+       "    B: [-1.8502190446152251, -1.1967169760014287, 0.8476370040459147, 0.5221977611776395]\n",
+       "    B: [-1.3090919645484567, 0.8304076910302604, -0.132118345313184, 0.08178985973111547]\n",
+       "    B: [-1.7699077332157842, 0.8156249668276708, -0.2891156025546255, 1.1763254081859622]\n",
+       "    B: [-1.6671330761442815, 1.2573648831500233, 0.2190135291489001, -0.3878135096217862]\n",
+       "    B: [-2.8601155403940384, 0.913787022813704, 0.6941422808960306, -2.421450118258315]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [5.2620105860572215, 0.0, 0.0, 5.166116085395126]\n",
+       "    B: [5.2620105860572215, 0.0, 0.0, -5.166116085395126]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.9479176369516882, 0.8861257045164052, 1.1018829783040076, 0.8916379636750793]\n",
+       "    B: [-1.2433791528628988, 0.41365857789168176, 0.544699730060495, -0.27960776595565956]\n",
+       "    B: [-1.074755543453127, 0.3002469943380598, 0.01041159782849033, 0.25464253219924826]\n",
+       "    B: [-1.7453891507499704, 1.1576089006622574, 0.03134512003430503, -0.8398466551182168]\n",
+       "    B: [-1.5208938996272057, 0.008686514238768405, -1.1440782944999142, -0.06424682441800389]\n",
+       "    B: [-2.991685788469555, -2.7663266916471727, -0.544261131727384, 0.03742074961755215]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [4.439668869119513, 0.0, 0.0, 4.325582003318043]\n",
+       "    B: [4.439668869119513, 0.0, 0.0, -4.325582003318043]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.1969832203303146, 0.48265768801558717, -0.02482335564392214, 0.4463117598342591]\n",
+       "    B: [-1.7251727113760817, -1.0744400415092346, 0.6322269398265393, 0.6496834443295479]\n",
+       "    B: [-1.419669052608684, -0.4173084301546306, -0.44626125418717505, -0.8013518491074973]\n",
+       "    B: [-1.331289111993432, -0.7645577006899625, -0.3423664341778722, 0.2656453402118452]\n",
+       "    B: [-1.5156451020746182, 0.6491857388484042, 0.8955487542892042, -0.2715333876518423]\n",
+       "    B: [-1.6905785398558963, 1.1244627454898357, -0.7143246501067739, -0.2887553076163127]\n",
+       "\n",
+       " ⋮\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [5.750717080737416, 0.0, 0.0, 5.663104002460582]\n",
+       "    B: [5.750717080737416, 0.0, 0.0, -5.663104002460582]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.0362067302993534, 0.23737037129807034, 0.1316212944823847, 0.007451817649030921]\n",
+       "    B: [-3.597917991072113, -1.5787159301449987, 0.28387609057144564, 3.0613860010767477]\n",
+       "    B: [-1.0798303035395174, -0.06880694215947386, -0.2669312876106363, -0.3000779512850572]\n",
+       "    B: [-1.3394551212059678, -0.7053379424304421, 0.44160810884651497, -0.3187799976376953]\n",
+       "    B: [-3.270241523195321, 1.927780354010675, 0.003047457202140131, -2.4450221348130854]\n",
+       "    B: [-1.1777824921625586, 0.1877100894261692, -0.5932216634918489, -0.004957734989940532]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.84577391627276, 0.0, 0.0, 6.772342320993563]\n",
+       "    B: [6.84577391627276, 0.0, 0.0, -6.772342320993563]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.0594956991232163, -0.09579189209396338, 0.21296650876679918, 0.2607687021353065]\n",
+       "    B: [-1.8300488673592041, 0.8497425690197566, -0.8227483588311224, 0.9747315329664396]\n",
+       "    B: [-2.860723394379955, 0.6743651794772785, 0.1320397309862766, 2.5906631300310776]\n",
+       "    B: [-2.557528905485892, -1.3508678766931497, 1.2829278224554168, -1.4388211440218013]\n",
+       "    B: [-3.790115184858299, 0.47588521284738383, -1.0334447791446917, -3.474262262286086]\n",
+       "    B: [-1.5936357813389537, -0.553333192557306, 0.2282590757673212, 1.086920041175065]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.25909007687458, 0.0, 0.0, 6.178689876537731]\n",
+       "    B: [6.25909007687458, 0.0, 0.0, -6.178689876537731]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.15208406752572, -0.27987613820502405, 0.20983197963180572, -1.873260718983155]\n",
+       "    B: [-3.1436326945514232, -2.0821664144960677, -1.9679549582157083, 0.8210741885063981]\n",
+       "    B: [-2.206056617746511, 1.7689323832663284, -0.4273996865759156, -0.7449117612507478]\n",
+       "    B: [-1.8709609004510535, 0.5332842722412897, 1.48760475220818, -0.055988188078690854]\n",
+       "    B: [-1.0916331546903268, 0.018218872767661307, 0.4300802089857822, 0.07976234031782706]\n",
+       "    B: [-2.0538127187841235, 0.04160702442581186, 0.2678377039658561, 1.7733241394883685]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.8752382625158255, 0.0, 0.0, 6.802124753807565]\n",
+       "    B: [6.8752382625158255, 0.0, 0.0, -6.802124753807565]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-3.815955448364548, 1.7284392485789066, 3.22998101457395, -0.37581430702794955]\n",
+       "    B: [-3.705003390432734, 0.8773209536576554, -3.1633610279519866, -1.3966048382509024]\n",
+       "    B: [-1.4798429985544235, -0.876885056483666, -0.05155962504198175, 0.6467994303891397]\n",
+       "    B: [-1.196598159149068, -0.6492448407423084, 0.0066213036625077295, -0.10141227532326653]\n",
+       "    B: [-1.307725757451199, -0.47623875265044, -0.08939192779758245, -0.6894580410872709]\n",
+       "    B: [-2.2453507710796776, -0.6033915523601473, 0.06771026255509205, 1.91649003130025]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.591382068439754, 0.0, 0.0, 6.515083849970707]\n",
+       "    B: [6.591382068439754, 0.0, 0.0, -6.515083849970707]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.166341377746586, 0.738656605699622, 1.1097711420427974, -1.3841348908550482]\n",
+       "    B: [-1.9136122405957643, -1.3687809690739081, -0.8052302154690981, 0.37410528752561706]\n",
+       "    B: [-1.020282522629639, 0.01566959851558055, -0.04103060943002397, -0.1976040959992001]\n",
+       "    B: [-3.3680104240574718, -0.44221430614525714, -3.1855463435158966, -0.015336796039828009]\n",
+       "    B: [-1.1380460439601876, 0.33787512483866744, -0.3053034033656307, 0.2962752606648943]\n",
+       "    B: [-3.576471527889859, 0.7187939461652956, 3.227339429737853, 0.9266952347035636]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [7.366791305680796, 0.0, 0.0, 7.298603574756898]\n",
+       "    B: [7.366791305680796, 0.0, 0.0, -7.298603574756898]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.1161936134323496, 0.1815174250263101, -0.30155987378038246, 0.34928677273057857]\n",
+       "    B: [-1.1768168637671912, -0.488638136596838, -0.0387546058981897, 0.38030091090042567]\n",
+       "    B: [-3.8756829146246745, -0.22123631639903027, -3.6727532274395425, -0.694878606198396]\n",
+       "    B: [-1.4161987387916468, -0.42653096897021076, -0.26480462532703347, -0.8680833546784509]\n",
+       "    B: [-3.4638938410201177, 2.8217659294852746, 1.2824429941168167, 1.179634497585545]\n",
+       "    B: [-3.6847966397256138, -1.8668779325455054, 2.995429338328331, -0.346260220339702]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [4.762032860651893, 0.0, 0.0, 4.655851905497903]\n",
+       "    B: [4.762032860651893, 0.0, 0.0, -4.655851905497903]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.656166654414924, 2.017338594394486, -1.384735065574992, 0.2609120345236529]\n",
+       "    B: [-1.031990140619295, -0.035004877965791346, -0.20112979442869375, 0.15272561883031827]\n",
+       "    B: [-1.7319386082994335, -1.0359644740176492, 0.8025718625008718, -0.5312883934487891]\n",
+       "    B: [-1.7450617894727098, -0.49163856285061436, 1.1666756465784553, 0.6651316473275205]\n",
+       "    B: [-1.0945973465763637, -0.42438631366397905, -0.017047995524507212, 0.1332252744613839]\n",
+       "    B: [-1.2643111819210613, -0.030344365896452122, -0.3663346535511349, -0.6807061816940867]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.12211537837656, 0.0, 0.0, 6.039892110473065]\n",
+       "    B: [6.12211537837656, 0.0, 0.0, -6.039892110473065]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.09449973649211, -1.247911941781509, -0.776547530016726, 1.1075282684200622]\n",
+       "    B: [-2.857971140758051, 1.4507115887866229, 2.2078617054725442, 0.43449006556414854]\n",
+       "    B: [-2.068918524386865, -0.43350532192333185, 1.7407499017717505, -0.24957318745593]\n",
+       "    B: [-1.0503370840395667, 0.28162676024293815, -0.11219953076948735, 0.10632790470480236]\n",
+       "    B: [-1.6648953051752136, 0.3171875953909028, -1.2925202016854087, 0.025689195388605857]\n",
+       "    B: [-2.5076089659013125, -0.36810868071562286, -1.7673443447726724, -1.4244622466216894]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [7.431058837653249, 0.0, 0.0, 7.363466265874004]\n",
+       "    B: [7.431058837653249, 0.0, 0.0, -7.363466265874004]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.4340725727125623, 0.9525417282027518, 0.38239995291064965, -0.05476016666222433]\n",
+       "    B: [-3.5734117962040854, 2.3267511116139916, 2.49915109639257, -0.33127771922267657]\n",
+       "    B: [-2.3529075757582945, 1.185265706342765, -1.375530715171772, 1.1132091075119688]\n",
+       "    B: [-2.710381815585542, -2.1195780947035594, -1.2974231675570782, -0.4126153305389483]\n",
+       "    B: [-2.374272199256637, -1.2400410368129877, 1.6839473809113144, -0.5136028830766439]\n",
+       "    B: [-2.4170717157893766, -1.104939414642962, -1.8925445474856835, 0.1990469919885247]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [4.370360958267613, 0.0, 0.0, 4.254415930013168]\n",
+       "    B: [4.370360958267613, 0.0, 0.0, -4.254415930013168]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.0037967551530176, -0.04979456910726583, -0.007092097585518878, 0.07126098999442977]\n",
+       "    B: [-2.2427356029926337, 0.4432886498747459, -1.2315068062419472, -1.522087101319342]\n",
+       "    B: [-1.576810353663218, -0.08400160217698217, 1.025238316808337, 0.6543401378482231]\n",
+       "    B: [-1.1878570602356244, 0.3852696171578499, -0.47734716319323317, 0.18630996601909597]\n",
+       "    B: [-1.6436772930583505, -1.0018521094453126, 0.4216069097815019, 0.7212593210074284]\n",
+       "    B: [-1.0858448514323804, 0.3070900136969648, 0.26910084043086047, -0.11108331354983517]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [5.940760429560125, 0.0, 0.0, 5.855991332082674]\n",
+       "    B: [5.940760429560125, 0.0, 0.0, -5.855991332082674]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-2.5515863925730233, 0.0574036477190863, 1.9321385747234918, 1.3319678930281418]\n",
+       "    B: [-3.2707523737124977, -2.710802011299676, -1.41016923110446, -0.6006632045712658]\n",
+       "    B: [-1.6965910302662786, 0.9846458960035911, 0.9504416414719069, -0.07452697242920955]\n",
+       "    B: [-1.0283520810617242, 0.1620200166783027, 0.15874691422324994, -0.07782630689000514]\n",
+       "    B: [-1.277724475991329, 0.26836143674120055, -0.33222621981983513, -0.6709602929248032]\n",
+       "    B: [-2.0565145055153993, 1.2383710141574962, -1.298931679494354, 0.09200888378714224]\n",
+       "\n",
+       " Input for ABC Process: 'AB->ABBBBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [6.732994664701373, 0.0, 0.0, 6.65831939417877]\n",
+       "    B: [6.732994664701373, 0.0, 0.0, -6.65831939417877]\n",
+       "  6 Outgoing Particles:\n",
+       "    A: [-1.602557260532173, -0.06659157948757613, 0.9308846463293637, -0.8349904850080558]\n",
+       "    B: [-1.3205375883536927, 0.7078592481114431, -0.05631226213188625, -0.48947291677035515]\n",
+       "    B: [-1.7625153098951976, 0.12706601232750347, 0.34097061443470383, 1.405010137407617]\n",
+       "    B: [-2.7792473938949334, 1.6510422215054068, 1.7155538904747691, -1.0272051928194055]\n",
+       "    B: [-2.722083339444658, -0.5204063912580275, -2.061236049180356, -1.3748530264647703]\n",
+       "    B: [-3.279048437282091, -1.89896951119875, -0.8698608399265956, 2.3215114836549695]\n"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@time inputs = [gen_process_input(process) for _ in 1:1000]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Internal error: stack overflow in type inference of materialize(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(MetagraphOptimization.compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad), Tuple{Array{MetagraphOptimization.ABCProcessInput, 1}}}).\n",
+      "This might be caused by recursion over very long tuples or argument lists.\n"
+     ]
+    },
+    {
+     "ename": "LoadError",
+     "evalue": "StackOverflowError:",
+     "output_type": "error",
+     "traceback": [
+      "StackOverflowError:",
+      "",
+      "Stacktrace:",
+      "    [1] argtypes_to_type",
+      "      @ ./compiler/typeutils.jl:71 [inlined]",
+      "    [2] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1948",
+      "    [3] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
+      "    [4] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1566",
+      "    [5] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1855",
+      "    [6] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
+      "    [7] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
+      "    [8] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
+      "    [9] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
+      "   [10] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2682",
+      "   [11] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
+      "   [12] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
+      "   [13] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:246",
+      "   [14] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:216",
+      "   [15] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:932",
+      "   [16] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:611",
+      "   [17] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:152",
+      "   [18] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1949",
+      "--- the last 16 lines are repeated 413 more times ---",
+      " [6627] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
+      " [6628] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1566",
+      " [6629] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1855",
+      " [6630] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
+      " [6631] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
+      " [6632] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
+      " [6633] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
+      " [6634] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2658",
+      " [6635] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
+      " [6636] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
+      " [6637] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:246",
+      " [6638] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:216",
+      " [6639] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:932",
+      " [6640] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:611",
+      " [6641] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:152",
+      " [6642] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1949",
+      " [6643] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
+      " [6644] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
+      " [6645] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
+      " [6646] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
+      " [6647] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2682",
+      " [6648] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
+      " [6649] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
+      " [6650] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:246",
+      " [6651] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:216",
+      " [6652] typeinf",
+      "      @ ./compiler/typeinfer.jl:12 [inlined]",
+      " [6653] typeinf_type(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:1079",
+      " [6654] return_type(interp::Core.Compiler.NativeInterpreter, t::DataType)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:1140",
+      " [6655] return_type(f::Any, t::DataType)",
+      "      @ Core.Compiler ./compiler/typeinfer.jl:1112",
+      " [6656] combine_eltypes(f::Function, args::Tuple{Vector{ABCProcessInput}})",
+      "      @ Base.Broadcast ./broadcast.jl:730",
+      " [6657] copy(bc::Base.Broadcast.Broadcasted{Style}) where Style",
+      "      @ Base.Broadcast ./broadcast.jl:895",
+      " [6658] materialize(bc::Base.Broadcast.Broadcasted)",
+      "      @ Base.Broadcast ./broadcast.jl:873",
+      " [6659] var\"##core#302\"()",
+      "      @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:489",
+      " [6660] var\"##sample#303\"(::Tuple{}, __params::BenchmarkTools.Parameters)",
+      "      @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:495",
+      " [6661] _run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; verbose::Bool, pad::String, kwargs::Base.Pairs{Symbol, Integer, NTuple{4, Symbol}, NamedTuple{(:samples, :evals, :gctrial, :gcsample), Tuple{Int64, Int64, Bool, Bool}}})",
+      "      @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:99",
+      " [6662] #invokelatest#2",
+      "      @ ./essentials.jl:821 [inlined]",
+      " [6663] invokelatest",
+      "      @ ./essentials.jl:816 [inlined]",
+      " [6664] #run_result#45",
+      "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]",
+      " [6665] run_result",
+      "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]",
+      " [6666] run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; progressid::Nothing, nleaves::Float64, ndone::Float64, kwargs::Base.Pairs{Symbol, Integer, NTuple{5, Symbol}, NamedTuple{(:verbose, :samples, :evals, :gctrial, :gcsample), Tuple{Bool, Int64, Int64, Bool, Bool}}})",
+      "      @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117",
+      " [6667] run (repeats 2 times)",
+      "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117 [inlined]",
+      " [6668] #warmup#54",
+      "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:169 [inlined]",
+      " [6669] warmup(item::BenchmarkTools.Benchmark)",
+      "      @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:168"
+     ]
+    }
+   ],
+   "source": [
+    "using BenchmarkTools\n",
+    "#compute_bench = @benchmark compute_AB_AB5.(inputs)\n",
+    "compute_bench_reduced = @benchmark compute_AB_AB5_reduced.(inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.9.3",
+   "language": "julia",
+   "name": "julia-1.9"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.9.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/notebooks/abc_model_showcase.ipynb
+++ b/notebooks/abc_model_showcase.ipynb
@@ -0,0 +1,409 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "20768e45-df62-4638-ba33-b0ccf239f1aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using Revise\n",
+    "using MetagraphOptimization\n",
+    "using BenchmarkTools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ff5f4a49",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 1 NUMA nodes\n",
+      "CUDA is non-functional\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, MetagraphOptimization.LocalVariables(), -1.0, UUID(\"a89974f6-6212-11ee-0866-0f591a3b69ea\"))], [-1.0;;])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get our machine's info\n",
+    "machine = get_machine_info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9df482a4-ca44-44c5-9ea7-7a2977d529be",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ABCModel()"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create a model identifier\n",
+    "model = ABCModel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "30b16872-07f7-4d47-8ff8-8c3a849c9d4e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ABC Process: 'AB->ABBB'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create a process in our model\n",
+    "process_str = \"AB->ABBB\"\n",
+    "process = parse_process(process_str, model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total: 280, ComputeTaskP"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Graph:\n",
+       "  Nodes: \n",
+       "  Edges: 385\n",
+       "  Total Compute Effort: 1075.0\n",
+       "  Total Data Transfer: 10944.0\n",
+       "  Total Compute Intensity: 0.09822733918128655\n"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ": 6, ComputeTaskU: 6, \n",
+      "         ComputeTaskV: 64, ComputeTaskSum: 1, ComputeTaskS2: 24, \n",
+      "         ComputeTaskS1: 36, DataTask: 143"
+     ]
+    }
+   ],
+   "source": [
+    "# Read the graph (of the same process) from a file\n",
+    "graph = parse_dag(\"../input/$process_str.txt\", model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Input for ABC Process: 'AB->ABBB':\n",
+       "  2 Incoming particles:\n",
+       "    A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n",
+       "    B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n",
+       "  4 Outgoing Particles:\n",
+       "    A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n",
+       "    B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n",
+       "    B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n",
+       "    B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Generate some random input data for our process\n",
+    "input_data = gen_process_input(process)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "compute__af4450a2_6212_11ee_2601_cde7cf2aedc1 (generic function with 1 method)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get the function computing the result of the process from a ProcessInput\n",
+    "AB_AB3_compute = get_compute_function(graph, process, machine)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-1.8924431710735022e-13"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Actually compute a result using the generated function and the input data\n",
+    "result = AB_AB3_compute(input_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "80c70010",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "include(\"../examples/profiling_utilities.jl\")\n",
+    "\n",
+    "# We can also mute the graph by applying some operations to it\n",
+    "reduce_all!(graph)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5b192b44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n",
+    "@assert result ≈ execute(graph, process, machine, input_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "9b2f4a3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1000-element Vector{Float64}:\n",
+       " -2.1491995259940396e-11\n",
+       " -1.04995646459455e-11\n",
+       "  5.821760691187782e-15\n",
+       " -6.556969485683705e-14\n",
+       " -1.3588086164373753e-14\n",
+       " -1.8789662441593694e-13\n",
+       " -2.131973301835892e-13\n",
+       " -5.3359759072004825e-12\n",
+       " -9.053914191490223e-13\n",
+       " -5.61107901706923e-13\n",
+       " -5.063492275603428e-11\n",
+       "  2.9168508985811397e-15\n",
+       " -1.6420151378194157e-13\n",
+       "  ⋮\n",
+       "  1.0931677247833436e-13\n",
+       " -7.704755306462797e-16\n",
+       " -1.8385907037491397e-12\n",
+       " -6.036215596560059e-14\n",
+       " -9.98872401400362e-12\n",
+       "  3.4861755637292935e-13\n",
+       " -1.1051119822969222e-10\n",
+       " -2.496572513216201e-12\n",
+       " -3.8682427847201926e-11\n",
+       "  7.904149696653438e-15\n",
+       " -7.606811743178716e-11\n",
+       " -5.100594937480292e-13"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Now we can generate a function and use it on lots of inputs\n",
+    "inputs = [gen_process_input(process) for _ in 1:1000]\n",
+    "AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n",
+    "\n",
+    "results = AB_AB3_reduced_compute.(inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "d43e4ff0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BenchmarkTools.Trial: 879 samples with 1 evaluation.\n",
+       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m4.567 ms\u001b[22m\u001b[39m … \u001b[35m14.334 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 54.51%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m4.998 ms              \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m0.00%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m5.686 ms\u001b[22m\u001b[39m ± \u001b[32m 1.414 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m9.09% ± 14.49%\n",
+       "\n",
+       "  \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▇\u001b[39m█\u001b[34m▅\u001b[39m\u001b[39m▄\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
+       "  \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▁\u001b[39m▄\u001b[39m \u001b[39m▇\n",
+       "  4.57 ms\u001b[90m      \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m       10 ms \u001b[0m\u001b[1m<\u001b[22m\n",
+       "\n",
+       " Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143006\u001b[39m."
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@benchmark results = AB_AB3_compute.($inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e18d9546",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BenchmarkTools.Trial: 1089 samples with 1 evaluation.\n",
+       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m3.637 ms\u001b[22m\u001b[39m … \u001b[35m10.921 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 59.52%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m4.098 ms              \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m 0.00%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m4.587 ms\u001b[22m\u001b[39m ± \u001b[32m 1.334 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m10.21% ± 15.77%\n",
+       "\n",
+       "  \u001b[39m \u001b[39m▂\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
+       "  \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[32m▆\u001b[39m\u001b[39m▅\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m \u001b[39m█\n",
+       "  3.64 ms\u001b[90m      \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m     8.78 ms \u001b[0m\u001b[1m<\u001b[22m\n",
+       "\n",
+       " Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123006\u001b[39m."
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@benchmark results = AB_AB3_reduced_compute.($inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "13efed12-3547-400b-a7a2-5dfae9a973a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set a different caching strategy\n",
+    "MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "ef62716b-a219-4f6e-9150-f984d3734839",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BenchmarkTools.Trial: 331 samples with 1 evaluation.\n",
+       " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m12.148 ms\u001b[22m\u001b[39m … \u001b[35m24.164 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 13.35%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m15.412 ms              \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m17.47%\n",
+       " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m15.117 ms\u001b[22m\u001b[39m ± \u001b[32m 2.194 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m12.31% ±  8.95%\n",
+       "\n",
+       "  \u001b[39m \u001b[39m▄\u001b[39m█\u001b[39m▄\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m▄\u001b[39m\u001b[39m▄\u001b[34m▂\u001b[39m\u001b[39m \u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
+       "  \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▅\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[32m█\u001b[39m\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▃\n",
+       "  12.1 ms\u001b[90m         Histogram: frequency by time\u001b[39m          21 ms \u001b[0m\u001b[1m<\u001b[22m\n",
+       "\n",
+       " Memory estimate\u001b[90m: \u001b[39m\u001b[33m27.46 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m118013\u001b[39m."
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# ... and bench again\n",
+    "AB_AB3_reduced_dict_compute = get_compute_function(graph, process, machine)\n",
+    "@benchmark results = AB_AB3_reduced_dict_compute.($inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5461ffd4-6a0e-4f1f-b1f1-3a2854a8ae88",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.9.3",
+   "language": "julia",
+   "name": "julia-1.9"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.9.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/profiling.ipynb
+++ b/notebooks/profiling.ipynb
@@ -0,0 +1,70 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using Revise; using MetagraphOptimization; using BenchmarkTools; using ProfileView\n",
+    "using Base.Threads\n",
+    "nthreads()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = ABCModel()\n",
+    "process_str = \"AB->ABBBBB\"\n",
+    "process = parse_process(process_str, model)\n",
+    "graph = parse_dag(\"../input/$process_str.txt\", model)\n",
+    "print(graph)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "include(\"../examples/profiling_utilities.jl\")\n",
+    "@ProfileView.profview reduce_all!(graph)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@ProfileView.profview comp_func = get_compute_function(graph, process)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Julia 1.9.3",
+   "language": "julia",
+   "name": "julia-1.9"
+  },
+  "language_info": {
+   "file_extension": ".jl",
+   "mimetype": "application/julia",
+   "name": "julia",
+   "version": "1.9.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/scripts/bench_threads.fish
+++ b/scripts/bench_threads.fish
@@ -6,20 +6,20 @@ julia --project=./examples -t 4 -e 'import Pkg; Pkg.instantiate()'

 #for i in $(seq $minthreads $maxthreads)
 #   printf "(AB->AB, $i) "
-#   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->AB.txt"))'
+#   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->AB.txt"), ABCModel())'
 #end

 #for i in $(seq $minthreads $maxthreads)
 #   printf "(AB->ABBB, $i) "
-#   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBB.txt"))'
+#   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBB.txt"), ABCModel())'
 #end

 #for i in $(seq $minthreads $maxthreads)
 #   printf "(AB->ABBBBB, $i) "
-#   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBB.txt"))'
+#   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBB.txt"), ABCModel())'
 #end

 for i in $(seq $minthreads $maxthreads)
   printf "(AB->ABBBBBBB, $i) "
-   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBBBB.txt"))'
+   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBBBB.txt"), ABCModel())'
 end
--- a/src/MetagraphOptimization.jl
+++ b/src/MetagraphOptimization.jl
@@ -1,10 +1,76 @@
+"""
+    MetagraphOptimization
+
+A module containing tools to work on DAGs.
+"""
 module MetagraphOptimization

-export Node, Edge, ComputeTaskNode, DataTaskNode, DAG
-export AbstractTask, AbstractComputeTask, AbstractDataTask, DataTask, FusedComputeTask
-export make_node, make_edge, insert_node, insert_edge, is_entry_node, is_exit_node, parents, children, compute, graph_properties, get_exit_node, is_valid
-export NodeFusion, NodeReduction, NodeSplit, push_operation!, pop_operation!, can_pop, reset_graph!, get_operations
-export parse_abc, ComputeTaskP, ComputeTaskS1, ComputeTaskS2, ComputeTaskV, ComputeTaskU, ComputeTaskSum
+# graph types
+export DAG
+export Node
+export Edge
+export ComputeTaskNode
+export DataTaskNode
+export AbstractTask
+export AbstractComputeTask
+export AbstractDataTask
+export DataTask
+export FusedComputeTask
+export PossibleOperations
+export GraphProperties
+
+# graph functions
+export make_node
+export make_edge
+export insert_node
+export insert_edge
+export is_entry_node
+export is_exit_node
+export parents
+export children
+export compute
+export data
+export compute_effort
+export get_properties
+export get_exit_node
+export is_valid, is_scheduled
+
+# graph operation related
+export Operation
+export AppliedOperation
+export NodeFusion
+export NodeReduction
+export NodeSplit
+export push_operation!
+export pop_operation!
+export can_pop
+export reset_graph!
+export get_operations
+
+# ABC model
+export ParticleValue
+export ParticleA, ParticleB, ParticleC
+export ABCProcessDescription, ABCProcessInput, ABCModel
+export ComputeTaskP
+export ComputeTaskS1
+export ComputeTaskS2
+export ComputeTaskV
+export ComputeTaskU
+export ComputeTaskSum
+
+# code generation related
+export execute
+export parse_dag, parse_process
+export gen_process_input
+export get_compute_function
+
+# estimator
+export cost_type, graph_cost, operation_effect
+export GlobalMetricEstimator, CDCost
+
+# machine info
+export Machine
+export get_machine_info

 export ==, in, show, isempty, delete!, length

@@ -13,6 +79,8 @@ export bytes_to_human_readable
 import Base.length
 import Base.show
 import Base.==
+import Base.+
+import Base.-
 import Base.in
 import Base.copy
 import Base.isempty
@@ -21,29 +89,78 @@ import Base.insert!
 import Base.collect


-include("tasks.jl")
-include("nodes.jl")
-include("graph.jl")
+include("devices/interface.jl")
+include("task/type.jl")
+include("node/type.jl")
+include("diff/type.jl")
+include("properties/type.jl")
+include("operation/type.jl")
+include("graph/type.jl")

 include("trie.jl")
 include("utility.jl")

-include("task_functions.jl")
-include("node_functions.jl")
-include("graph_functions.jl")
+include("diff/print.jl")
+include("diff/properties.jl")

-include("operations/utility.jl")
-include("operations/apply.jl")
-include("operations/clean.jl")
-include("operations/find.jl")
-include("operations/get.jl")
-include("operations/print.jl")
-include("operations/validate.jl")
+include("graph/compare.jl")
+include("graph/interface.jl")
+include("graph/mute.jl")
+include("graph/print.jl")
+include("graph/properties.jl")
+include("graph/validate.jl")

-include("graph_interface.jl")
+include("node/compare.jl")
+include("node/create.jl")
+include("node/print.jl")
+include("node/properties.jl")
+include("node/validate.jl")

-include("abc_model/tasks.jl")
-include("abc_model/task_functions.jl")
-include("abc_model/parse.jl")
+include("operation/utility.jl")
+include("operation/apply.jl")
+include("operation/clean.jl")
+include("operation/find.jl")
+include("operation/get.jl")
+include("operation/print.jl")
+include("operation/validate.jl")
+
+include("properties/create.jl")
+include("properties/utility.jl")
+
+include("task/create.jl")
+include("task/compare.jl")
+include("task/compute.jl")
+include("task/print.jl")
+include("task/properties.jl")
+
+include("estimator/interface.jl")
+include("estimator/global_metric.jl")
+
+include("models/interface.jl")
+include("models/print.jl")
+
+include("models/abc/types.jl")
+include("models/abc/particle.jl")
+include("models/abc/compute.jl")
+include("models/abc/create.jl")
+include("models/abc/properties.jl")
+include("models/abc/parse.jl")
+include("models/abc/print.jl")
+
+include("devices/measure.jl")
+include("devices/detect.jl")
+include("devices/impl.jl")
+
+include("devices/numa/impl.jl")
+include("devices/cuda/impl.jl")
+# can currently not use AMDGPU because of incompatability with the newest rocm drivers
+# include("devices/rocm/impl.jl")
+# oneapi seems also broken for now
+# include("devices/oneapi/impl.jl")
+
+include("scheduler/interface.jl")
+include("scheduler/greedy.jl")
+
+include("code_gen/main.jl")

 end # module MetagraphOptimization
--- a/src/abc_model/parse.jl
+++ b/src/abc_model/parse.jl
@@ -1,152 +0,0 @@
-using Printf
-
-# functions for importing DAGs from a file
-regex_a = r"^[A-C]\d+$"                     # Regex for the initial particles
-regex_c = r"^[A-C]\(([^']*),([^']*)\)$"     # Regex for the combinations of 2 particles
-regex_m = r"^M\(([^']*),([^']*),([^']*)\)$" # Regex for the combinations of 3 particles
-regex_plus = r"^\+$"                        # Regex for the sum
-
-function parse_nodes(input::AbstractString)
-    regex = r"'([^']*)'"
-    matches = eachmatch(regex, input)
-    output = [match.captures[1] for match in matches]
-    return output
-end
-
-function parse_edges(input::AbstractString)
-    regex = r"\('([^']*)', '([^']*)'\)"
-    matches = eachmatch(regex, input)
-    output = [(match.captures[1], match.captures[2]) for match in matches]
-    return output
-end
-
-# reads an abc-model process from the given file
-function parse_abc(filename::String, verbose::Bool = false)
-    file = open(filename, "r")
-
-    if (verbose) println("Opened file") end
-    nodes_string = readline(file)
-    nodes = parse_nodes(nodes_string)
-
-    close(file)
-    if (verbose) println("Read file") end
-
-    graph = DAG()
-    
-    # estimate total number of nodes
-    # try to slightly overestimate so no resizing is necessary
-    # data nodes are not included in length(nodes) and there are a few more than compute nodes
-    estimate_no_nodes = round(Int, length(nodes) * 4)
-    if (verbose) println("Estimating ", estimate_no_nodes, " Nodes") end
-    sizehint!(graph.nodes, estimate_no_nodes)
-
-    sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false, false)
-    global_data_out = insert_node!(graph, make_node(DataTask(10)), false, false)
-    insert_edge!(graph, sum_node, global_data_out, false, false)
-
-    # remember the data out nodes for connection
-    dataOutNodes = Dict()
-
-    if (verbose) println("Building graph") end
-    noNodes = 0
-    nodesToRead = length(nodes)
-    while !isempty(nodes)
-        node = popfirst!(nodes)
-        noNodes += 1
-        if (noNodes % 100 == 0)
-            if (verbose) @printf "\rReading Nodes... %.2f%%" (100. * noNodes / nodesToRead) end
-        end
-        if occursin(regex_a, node)
-            # add nodes and edges for the state reading to u(P(Particle))
-            data_in = insert_node!(graph, make_node(DataTask(4)), false, false) # read particle data node
-            compute_P = insert_node!(graph, make_node(ComputeTaskP()), false, false) # compute P node
-            data_Pu = insert_node!(graph, make_node(DataTask(6)), false, false) # transfer data from P to u
-            compute_u = insert_node!(graph, make_node(ComputeTaskU()), false, false) # compute U node
-            data_out = insert_node!(graph, make_node(DataTask(3)), false, false) # transfer data out from u
-
-            insert_edge!(graph, data_in, compute_P, false, false)
-            insert_edge!(graph, compute_P, data_Pu, false, false)
-            insert_edge!(graph, data_Pu, compute_u, false, false)
-            insert_edge!(graph, compute_u, data_out, false, false)
-            
-            # remember the data_out node for future edges
-            dataOutNodes[node] = data_out
-        elseif occursin(regex_c, node)
-            capt = match(regex_c, node)
-            
-            in1 = capt.captures[1]
-            in2 = capt.captures[2]
-
-            compute_v = insert_node!(graph, make_node(ComputeTaskV()), false, false)
-            data_out = insert_node!(graph, make_node(DataTask(5)), false, false)
-
-            if (occursin(regex_c, in1))
-                # put an S node after this input
-                compute_S = insert_node!(graph, make_node(ComputeTaskS1()), false, false)
-                data_S_v = insert_node!(graph, make_node(DataTask(5)), false, false)
-
-                insert_edge!(graph, dataOutNodes[in1], compute_S, false, false)
-                insert_edge!(graph, compute_S, data_S_v, false, false)
-
-                insert_edge!(graph, data_S_v, compute_v, false, false)
-            else
-                insert_edge!(graph, dataOutNodes[in1], compute_v, false, false)
-            end
-
-            if (occursin(regex_c, in2))
-                # i think the current generator only puts the combined particles in the first space, so this case might never be entered
-                # put an S node after this input
-                compute_S = insert_node!(graph, make_node(ComputeTaskS1()), false, false)
-                data_S_v = insert_node!(graph, make_node(DataTask(5)), false, false)
-
-                insert_edge!(graph, dataOutNodes[in2], compute_S, false, false)
-                insert_edge!(graph, compute_S, data_S_v, false, false)
-
-                insert_edge!(graph, data_S_v, compute_v, false, false)
-            else
-                insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
-            end
- 
-            insert_edge!(graph, compute_v, data_out, false, false)
-            dataOutNodes[node] = data_out
-
-        elseif occursin(regex_m, node)
-            # assume for now that only the first particle of the three is combined and the other two are "original" ones
-            capt = match(regex_m, node)
-            in1 = capt.captures[1]
-            in2 = capt.captures[2]
-            in3 = capt.captures[3]
-
-            # in2 + in3 with a v
-            compute_v = insert_node!(graph, make_node(ComputeTaskV()), false, false)
-            data_v = insert_node!(graph, make_node(DataTask(5)), false, false)
-
-            insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
-            insert_edge!(graph, dataOutNodes[in3], compute_v, false, false)
-            insert_edge!(graph, compute_v, data_v, false, false)
-
-            # combine with the v of the combined other input
-            compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), false, false)
-            data_out = insert_node!(graph, make_node(DataTask(10)), false, false)
-
-            insert_edge!(graph, data_v, compute_S2, false, false)
-            insert_edge!(graph, dataOutNodes[in1], compute_S2, false, false)
-            insert_edge!(graph, compute_S2, data_out, false, false)
-
-            insert_edge!(graph, data_out, sum_node, false, false)
-        elseif occursin(regex_plus, node)
-            if (verbose)
-                println("\rReading Nodes Complete    ") 
-                println("Added ", length(graph.nodes), " nodes")
-            end
-        else
-            @assert false ("Unknown node '$node' while reading from file $filename")
-        end
-    end
-
-    #put all nodes into dirty nodes set
-    graph.dirtyNodes = copy(graph.nodes)
-
-    # don't actually need to read the edges
-    return graph
-end
--- a/src/abc_model/task_functions.jl
+++ b/src/abc_model/task_functions.jl
@@ -1,21 +0,0 @@
-# define compute_efforts tasks computation
-# put some "random" numbers here for now
-compute_effort(t::ComputeTaskS1) = 10
-compute_effort(t::ComputeTaskS2) = 10
-compute_effort(t::ComputeTaskU) = 6
-compute_effort(t::ComputeTaskV) = 20
-compute_effort(t::ComputeTaskP) = 15
-compute_effort(t::ComputeTaskSum) = 1
-
-function show(io::IO, t::DataTask)
-    print(io, "Data", t.data)
-end
-
-show(io::IO, t::ComputeTaskS1) = print("ComputeS1")
-show(io::IO, t::ComputeTaskS2) = print("ComputeS2")
-show(io::IO, t::ComputeTaskP) = print("ComputeP")
-show(io::IO, t::ComputeTaskU) = print("ComputeU")
-show(io::IO, t::ComputeTaskV) = print("ComputeV")
-show(io::IO, t::ComputeTaskSum) = print("ComputeSum")
-
-copy(t::DataTask) = DataTask(t.data)
--- a/src/abc_model/tasks.jl
+++ b/src/abc_model/tasks.jl
@@ -1,29 +0,0 @@
-struct DataTask <: AbstractDataTask
-    data::UInt64
-end
-
-# S task with 1 child
-struct ComputeTaskS1 <: AbstractComputeTask
-end
-
-# S task with 2 children
-struct ComputeTaskS2 <: AbstractComputeTask
-end
-
-# P task with 0 children
-struct ComputeTaskP <: AbstractComputeTask
-end
-
-# v task with 2 children
-struct ComputeTaskV <: AbstractComputeTask
-end
-
-# u task with 1 child
-struct ComputeTaskU <: AbstractComputeTask
-end
-
-# task that sums all its inputs, n children
-struct ComputeTaskSum <: AbstractComputeTask
-end
-
-ABC_TASKS = [DataTask, ComputeTaskS1, ComputeTaskS2, ComputeTaskP, ComputeTaskV, ComputeTaskU, ComputeTaskSum]
--- a/src/code_gen/main.jl
+++ b/src/code_gen/main.jl
@@ -0,0 +1,158 @@
+"""
+    gen_code(graph::DAG)
+
+Generate the code for a given graph. The return value is a named tuple of:
+
+- `code::Expr`: The julia expression containing the code for the whole graph.
+- `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
+- `outputSymbol::Symbol`: The symbol of the final calculated value
+
+See also: [`execute`](@ref)
+"""
+function gen_code(graph::DAG, machine::Machine)
+    sched = schedule_dag(GreedyScheduler(), graph, machine)
+
+    codeAcc = Vector{Expr}()
+    sizehint!(codeAcc, length(graph.nodes))
+
+    for node in sched
+        # TODO: this is kind of ugly, should init nodes be scheduled differently from the rest?
+        if (node isa DataTaskNode && length(node.children) == 0)
+            push!(codeAcc, get_init_expression(node, entry_device(machine)))
+            continue
+        end
+        push!(codeAcc, get_expression(node))
+    end
+
+    # get inSymbols
+    inputSyms = Dict{String, Vector{Symbol}}()
+    for node in get_entry_nodes(graph)
+        if !haskey(inputSyms, node.name)
+            inputSyms[node.name] = Vector{Symbol}()
+        end
+
+        push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
+    end
+
+    # get outSymbol
+    outSym = Symbol(to_var_name(get_exit_node(graph).id))
+
+    return (code = Expr(:block, codeAcc...), inputSymbols = inputSyms, outputSymbol = outSym)
+end
+
+function gen_cache_init_code(machine::Machine)
+    initializeCaches = Vector{Expr}()
+
+    for device in machine.devices
+        push!(initializeCaches, gen_cache_init_code(device))
+    end
+
+    return Expr(:block, initializeCaches...)
+end
+
+function gen_input_assignment_code(
+    inputSymbols::Dict{String, Vector{Symbol}},
+    processDescription::AbstractProcessDescription,
+    machine::Machine,
+    processInputSymbol::Symbol = :input,
+)
+    @assert length(inputSymbols) >=
+            sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description"
+
+    assignInputs = Vector{Expr}()
+    for (name, symbols) in inputSymbols
+        type = type_from_name(name)
+        index = parse(Int, name[2:end])
+
+        p = nothing
+
+        if (index > in_particles(processDescription)[type])
+            index -= in_particles(processDescription)[type]
+            @assert index <= out_particles(processDescription)[type] "Too few particles of type $type in input particles for this process"
+
+            p = "filter(x -> typeof(x) <: $type, out_particles($(processInputSymbol)))[$(index)]"
+        else
+            p = "filter(x -> typeof(x) <: $type, in_particles($(processInputSymbol)))[$(index)]"
+        end
+
+        for symbol in symbols
+            # TODO: how to get the "default" cpu device?
+            device = entry_device(machine)
+            evalExpr = eval(gen_access_expr(device, symbol))
+            push!(assignInputs, Meta.parse("$(evalExpr)::ParticleValue{$type} = ParticleValue($p, 1.0)"))
+        end
+    end
+
+    return Expr(:block, assignInputs...)
+end
+
+"""
+    get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
+
+Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
+"""
+function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
+    (code, inputSymbols, outputSymbol) = gen_code(graph, machine)
+
+    initCaches = gen_cache_init_code(machine)
+    assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
+
+    functionId = to_var_name(UUIDs.uuid1(rng[1]))
+    resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
+    expr = Meta.parse(
+        "function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
+    )
+
+    func = eval(expr)
+
+    return func
+end
+
+"""
+    execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
+
+Execute the code of the given `graph` on the given input particles.
+
+This is essentially shorthand for
+    ```julia
+    compute_graph = get_compute_function(graph, process)
+    result = compute_graph(particles)
+    ```
+
+See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
+"""
+function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
+    (code, inputSymbols, outputSymbol) = gen_code(graph, machine)
+
+    initCaches = gen_cache_init_code(machine)
+    assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
+
+
+    functionId = to_var_name(UUIDs.uuid1(rng[1]))
+    resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
+    expr = Meta.parse(
+        "function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
+    )
+    func = eval(expr)
+
+    result = 0
+    try
+        result = @eval $func($input)
+    catch e
+        println("Error while evaluating: $e")
+
+        # if we find a uuid in the exception we can color it in so it's easier to spot
+        uuidRegex = r"[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}"
+        m = match(uuidRegex, string(e))
+
+        functionStr = string(expr)
+        if (isa(m, RegexMatch))
+            functionStr = replace(functionStr, m.match => "\033[31m$(m.match)\033[0m")
+        end
+
+        println("Function:\n$functionStr")
+        @assert false
+    end
+
+    return result
+end
--- a/src/devices/cuda/impl.jl
+++ b/src/devices/cuda/impl.jl
@@ -0,0 +1,53 @@
+using CUDA
+
+"""
+    CUDAGPU <: AbstractGPU
+
+Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
+"""
+mutable struct CUDAGPU <: AbstractGPU
+    device::Any # TODO: what's the cuda device type?
+    cacheStrategy::CacheStrategy
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, CUDAGPU)
+
+CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
+
+default_strategy(::Type{T}) where {T <: CUDAGPU} = LocalVariables()
+
+function measure_device!(device::CUDAGPU; verbose::Bool)
+    if verbose
+        println("Measuring CUDA GPU $(device.device)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
+
+Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
+    devices = Vector{AbstractDevice}()
+
+    if !CUDA.functional()
+        if verbose
+            println("CUDA is non-functional")
+        end
+        return devices
+    end
+
+    CUDADevices = CUDA.devices()
+    if verbose
+        println("Found $(length(CUDADevices)) CUDA devices")
+    end
+    for device in CUDADevices
+        push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1))
+    end
+
+    return devices
+end
--- a/src/devices/detect.jl
+++ b/src/devices/detect.jl
@@ -0,0 +1,23 @@
+
+"""
+    get_machine_info(verbose::Bool)
+
+Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
+"""
+function get_machine_info(; verbose::Bool = Base.is_interactive)
+    devices = Vector{AbstractDevice}()
+
+    for device in device_types()
+        devs = get_devices(device, verbose = verbose)
+        for dev in devs
+            push!(devices, dev)
+        end
+    end
+
+    noDevices = length(devices)
+    @assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
+
+    transferRates = Matrix{Float64}(undef, noDevices, noDevices)
+    fill!(transferRates, -1)
+    return Machine(devices, transferRates)
+end
--- a/src/devices/impl.jl
+++ b/src/devices/impl.jl
@@ -0,0 +1,52 @@
+"""
+    device_types()
+
+Return a vector of available and implemented device types.
+
+See also: [`DEVICE_TYPES`](@ref)
+"""
+function device_types()
+    return DEVICE_TYPES
+end
+
+"""
+    entry_device(machine::Machine)
+
+Return the "entry" device, i.e., the device that starts CPU threads and GPU kernels, and takes input values and returns the output value.
+"""
+function entry_device(machine::Machine)
+    return machine.devices[1]
+end
+
+"""
+    strategies(t::Type{T}) where {T <: AbstractDevice}
+
+Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstractDevice`](@ref).
+The caching strategies are used in code generation.
+"""
+function strategies(t::Type{T}) where {T <: AbstractDevice}
+    if !haskey(CACHE_STRATEGIES, t)
+        error("Trying to get strategies for $T, but it has no strategies defined!")
+    end
+
+    return CACHE_STRATEGIES[t]
+end
+
+"""
+    cache_strategy(device::AbstractDevice)
+
+Returns the cache strategy set for this device.
+"""
+function cache_strategy(device::AbstractDevice)
+    return device.cacheStrategy
+end
+
+"""
+    set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
+
+Sets the device's cache strategy. After this call, [`cache_strategy`](@ref) should return `cacheStrategy` on the given device.
+"""
+function set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
+    device.cacheStrategy = cacheStrategy
+    return nothing
+end
--- a/src/devices/interface.jl
+++ b/src/devices/interface.jl
@@ -0,0 +1,108 @@
+"""
+    AbstractDevice
+
+Abstract base type for every device, like GPUs, CPUs or any other compute devices.
+Every implementation needs to implement various functions and needs a member `cacheStrategy`.
+"""
+abstract type AbstractDevice end
+
+abstract type AbstractCPU <: AbstractDevice end
+
+abstract type AbstractGPU <: AbstractDevice end
+
+"""
+    Machine
+
+A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
+
+See also: [`Scheduler`](@ref)
+"""
+struct Machine
+    devices::Vector{AbstractDevice}
+
+    transferRates::Matrix{Float64}
+end
+
+"""
+    CacheStrategy
+
+Abstract base type for caching strategies.
+
+See also: [`strategies`](@ref)
+"""
+abstract type CacheStrategy end
+
+"""
+    LocalVariables <: CacheStrategy
+
+A caching strategy relying solely on local variables for every input and output.
+
+Implements the [`CacheStrategy`](@ref) interface.
+"""
+struct LocalVariables <: CacheStrategy end
+
+"""
+    Dictionary <: CacheStrategy
+
+A caching strategy relying on a dictionary of Symbols to store every input and output.
+
+Implements the [`CacheStrategy`](@ref) interface.
+"""
+struct Dictionary <: CacheStrategy end
+
+"""
+    DEVICE_TYPES::Vector{Type}
+
+Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
+
+See also: [`device_types`](@ref), [`get_devices`](@ref)
+"""
+DEVICE_TYPES = Vector{Type}()
+
+"""
+    CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
+
+Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
+
+See also: [`strategies`](@ref)
+"""
+CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
+
+"""
+    default_strategy(deviceType::Type{T}) where {T <: AbstractDevice}
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device type.
+See also: [`cache_strategy`](@ref), [`set_cache_strategy`](@ref)
+"""
+function default_strategy end
+
+"""
+    get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
+"""
+function get_devices end
+
+"""
+    measure_device!(device::AbstractDevice; verbose::Bool)
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
+"""
+function measure_device! end
+
+"""
+    gen_cache_init_code(device::AbstractDevice)
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an `Expr` initializing this device's variable cache.
+    
+The strategy is a symbol
+"""
+function gen_cache_init_code end
+
+"""
+    gen_access_expr(device::AbstractDevice, symbol::Symbol)
+
+Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
+Return an `Expr` or `QuoteNode` accessing the variable identified by [`symbol`].
+"""
+function gen_access_expr end
--- a/src/devices/measure.jl
+++ b/src/devices/measure.jl
@@ -0,0 +1,22 @@
+"""
+    measure_devices(machine::Machine; verbose::Bool)
+
+Measure FLOPS, RAM, cache sizes and what other properties can be extracted for the devices in the given machine.
+"""
+function measure_devices!(machine::Machine; verbose::Bool = Base.is_interactive())
+    for device in machine.devices
+        measure_device!(device; verbose = verbose)
+    end
+
+    return nothing
+end
+
+"""
+    measure_transfer_rates(machine::Machine; verbose::Bool)
+
+Measure the transfer rates between devices in the machine.
+"""
+function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
+    # TODO implement
+    return nothing
+end
--- a/src/devices/numa/impl.jl
+++ b/src/devices/numa/impl.jl
@@ -0,0 +1,96 @@
+using NumaAllocators
+
+"""
+    NumaNode <: AbstractCPU
+
+Representation of a specific CPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
+"""
+mutable struct NumaNode <: AbstractCPU
+    numaId::UInt16
+    threads::UInt16
+    cacheStrategy::CacheStrategy
+    FLOPS::Float64
+    id::UUID
+end
+
+push!(DEVICE_TYPES, NumaNode)
+
+CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
+
+default_strategy(::Type{T}) where {T <: NumaNode} = LocalVariables()
+
+function measure_device!(device::NumaNode; verbose::Bool)
+    if verbose
+        println("Measuring Numa Node $(device.numaId)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
+
+Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
+    devices = Vector{AbstractDevice}()
+    noNumaNodes = highest_numa_node()
+
+    if (verbose)
+        println("Found $(noNumaNodes + 1) NUMA nodes")
+    end
+    for i in 0:noNumaNodes
+        push!(devices, NumaNode(i, 1, default_strategy(NumaNode), -1, UUIDs.uuid1(rng[1])))
+    end
+
+    return devices
+end
+
+"""
+    gen_cache_init_code(device::NumaNode)
+
+Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
+"""
+function gen_cache_init_code(device::NumaNode)
+    if typeof(device.cacheStrategy) <: LocalVariables
+        # don't need to initialize anything
+        return Expr(:block)
+    elseif typeof(device.cacheStrategy) <: Dictionary
+        return Meta.parse("cache_$(to_var_name(device.id)) = Dict{Symbol, Any}()")
+        # TODO: sizehint?
+    end
+
+    return error("Unimplemented cache strategy \"$(device.cacheStrategy)\" for device \"$(device)\"")
+end
+
+"""
+    gen_access_expr(device::NumaNode, symbol::Symbol)
+
+Generate code to access the variable designated by `symbol` on a [`NumaNode`](@ref), using the [`CacheStrategy`](@ref) set in the device.
+"""
+function gen_access_expr(device::NumaNode, symbol::Symbol)
+    return _gen_access_expr(device, device.cacheStrategy, symbol)
+end
+
+"""
+    _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
+
+Internal function for dispatch, used in [`gen_access_expr`](@ref).
+"""
+function _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
+    s = Symbol("data_$symbol")
+    quoteNode = Meta.parse(":($s)")
+    return quoteNode
+end
+
+"""
+    _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
+
+Internal function for dispatch, used in [`gen_access_expr`](@ref).
+"""
+function _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
+    accessStr = ":(cache_$(to_var_name(device.id))[:$symbol])"
+    quoteNode = Meta.parse(accessStr)
+    return quoteNode
+end
--- a/src/devices/oneapi/impl.jl
+++ b/src/devices/oneapi/impl.jl
@@ -0,0 +1,53 @@
+using oneAPI
+
+"""
+    oneAPIGPU <: AbstractGPU
+
+Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
+"""
+mutable struct oneAPIGPU <: AbstractGPU
+    device::Any
+    cacheStrategy::CacheStrategy
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, oneAPIGPU)
+
+CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
+
+default_strategy(::Type{T}) where {T <: oneAPIGPU} = LocalVariables()
+
+function measure_device!(device::oneAPIGPU; verbose::Bool)
+    if verbose
+        println("Measuring oneAPI GPU $(device.device)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
+
+Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
+    devices = Vector{AbstractDevice}()
+
+    if !oneAPI.functional()
+        if verbose
+            println("oneAPI is non-functional")
+        end
+        return devices
+    end
+
+    oneAPIDevices = oneAPI.devices()
+    if verbose
+        println("Found $(length(oneAPIDevices)) oneAPI devices")
+    end
+    for device in oneAPIDevices
+        push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1))
+    end
+
+    return devices
+end
--- a/src/devices/rocm/impl.jl
+++ b/src/devices/rocm/impl.jl
@@ -0,0 +1,53 @@
+using AMDGPU
+
+"""
+    ROCmGPU <: AbstractGPU
+
+Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
+"""
+mutable struct ROCmGPU <: AbstractGPU
+    device::Any
+    cacheStrategy::CacheStrategy
+    FLOPS::Float64
+end
+
+push!(DEVICE_TYPES, ROCmGPU)
+
+CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
+
+default_strategy(::Type{T}) where {T <: ROCmGPU} = LocalVariables()
+
+function measure_device!(device::ROCmGPU; verbose::Bool)
+    if verbose
+        println("Measuring ROCm GPU $(device.device)")
+    end
+
+    # TODO implement
+    return nothing
+end
+
+"""
+    get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
+
+Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
+"""
+function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
+    devices = Vector{AbstractDevice}()
+
+    if !AMDGPU.functional()
+        if verbose
+            println("AMDGPU is non-functional")
+        end
+        return devices
+    end
+
+    AMDDevices = AMDGPU.devices()
+    if verbose
+        println("Found $(length(AMDDevices)) AMD devices")
+    end
+    for device in AMDDevices
+        push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1))
+    end
+
+    return devices
+end
--- a/src/diff/print.jl
+++ b/src/diff/print.jl
@@ -0,0 +1,11 @@
+"""
+    show(io::IO, diff::Diff)
+
+Pretty-print a [`Diff`](@ref). Called via print, println and co.
+"""
+function show(io::IO, diff::Diff)
+    print(io, "Nodes: ")
+    print(io, length(diff.addedNodes) + length(diff.removedNodes))
+    print(io, ", Edges: ")
+    return print(io, length(diff.addedEdges) + length(diff.removedEdges))
+end
--- a/src/diff/properties.jl
+++ b/src/diff/properties.jl
@@ -0,0 +1,14 @@
+"""
+    length(diff::Diff)
+    
+Return a named tuple of the lengths of the added/removed nodes/edges.
+The fields are `.addedNodes`, `.addedEdges`, `.removedNodes` and `.removedEdges`.
+"""
+function length(diff::Diff)
+    return (
+        addedNodes = length(diff.addedNodes),
+        removedNodes = length(diff.removedNodes),
+        addedEdges = length(diff.addedEdges),
+        removedEdges = length(diff.removedEdges),
+    )
+end
--- a/src/diff/type.jl
+++ b/src/diff/type.jl
@@ -0,0 +1,21 @@
+"""
+    Diff
+
+A named tuple representing a difference of added and removed nodes and edges on a [`DAG`](@ref).
+"""
+const Diff = NamedTuple{
+    (:addedNodes, :removedNodes, :addedEdges, :removedEdges, :updatedChildren),
+    Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}, Vector{Tuple{Node, AbstractTask}}},
+}
+
+function Diff()
+    return (
+        addedNodes = Vector{Node}(),
+        removedNodes = Vector{Node}(),
+        addedEdges = Vector{Edge}(),
+        removedEdges = Vector{Edge}(),
+
+        # children were updated in the task, updatedChildren[x][2] is the task before the update
+        updatedChildren = Vector{Tuple{Node, AbstractTask}}(),
+    )::Diff
+end
--- a/src/estimator/global_metric.jl
+++ b/src/estimator/global_metric.jl
@@ -0,0 +1,65 @@
+
+"""
+   CDCost
+
+Representation of a [`DAG`](@ref)'s cost as estimated by the [`GlobalMetricEstimator`](@ref).
+
+# Fields:
+`.data`: The total data transfer.\\
+`.computeEffort`: The total compute effort.\\
+`.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`.
+
+
+!!! note
+    Note that the `computeIntensity` doesn't necessarily make sense in the context of only operation costs. 
+    For example, for node fusions this will always be 0, since the computeEffort is zero. 
+    It will still work as intended when adding/subtracting to/from a `graph_cost` estimate.
+"""
+const CDCost = NamedTuple{(:data, :computeEffort, :computeIntensity), Tuple{Float64, Float64, Float64}}
+
+function +(cost1::CDCost, cost2::CDCost)::CDCost
+    d = cost1.data + cost2.data
+    ce = computeEffort = cost1.computeEffort + cost2.computeEffort
+    return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
+end
+
+function -(cost1::CDCost, cost2::CDCost)::CDCost
+    d = cost1.data - cost2.data
+    ce = computeEffort = cost1.computeEffort - cost2.computeEffort
+    return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
+end
+
+struct GlobalMetricEstimator <: AbstractEstimator end
+
+function cost_type(estimator::GlobalMetricEstimator)
+    return CDCost
+end
+
+function graph_cost(estimator::GlobalMetricEstimator, graph::DAG)
+    properties = get_properties(graph)
+    return (
+        data = properties.data,
+        computeEffort = properties.computeEffort,
+        computeIntensity = properties.computeIntensity,
+    )::CDCost
+end
+
+function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeFusion)
+    return (data = -data(operation.input[2].task), computeEffort = 0.0, computeIntensity = 0.0)::CDCost
+end
+
+function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeReduction)
+    s = length(operation.input) - 1
+    return (
+        data = s * -data(operation.input[1].task),
+        computeEffort = s * -compute_effort(operation.input[1].task),
+        computeIntensity = typeof(operation.input) <: DataTaskNode ? 0.0 : Inf,
+    )::CDCost
+end
+
+function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeSplit)
+    s = length(operation.input.parents) - 1
+    d = s * data(operation.input.task)
+    ce = s * compute_effort(operation.input.task)
+    return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
+end
--- a/src/estimator/interface.jl
+++ b/src/estimator/interface.jl
@@ -0,0 +1,44 @@
+
+"""
+    AbstractEstimator
+
+Abstract base type for an estimator. An estimator estimates the cost of a graph or the difference an operation applied to a graph will make to its cost.
+
+Interface functions are
+- [`graph_cost`](@ref)
+- [`operation_effect`](@ref)
+"""
+abstract type AbstractEstimator end
+
+"""
+    cost_type(estimator::AbstractEstimator)
+
+Interface function returning a specific estimator's cost type, i.e., the type returned by its implementation of [`graph_cost`](@ref) and [`operation_effect`](@ref).
+"""
+function cost_type end
+
+"""
+    graph_cost(estimator::AbstractEstimator, graph::DAG)
+
+Get the total estimated cost of the graph. The cost's data type can be chosen by the implementation, but should have usable comparison operators (<, <=, >, >=, ==) and basic math operators (+, -, *, /).
+"""
+function graph_cost end
+
+"""
+    operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
+
+Get the estimated effect on the cost of the graph, such that `graph_cost(estimator, graph) + operation_effect(estimator, graph, operation) ~= graph_cost(estimator, graph_with_operation_applied)`. There is no hard requirement for this, but the better the estimate, the better an optimization algorithm will be.
+
+!!! note
+    There is a default implementation of this function, applying the operation, calling [`graph_cost`](@ref), then popping the operation again.
+    
+    It can be much faster to overload this function for a specific estimator and directly compute the effects from the operation if possible.
+"""
+function operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
+    # This is currently not stably working, see issue #16
+    cost = graph_cost(estimator, graph)
+    push_operation!(graph, operation)
+    cost_after = graph_cost(estimator, graph)
+    pop_operation!(graph)
+    return cost_after - cost
+end
--- a/src/graph.jl
+++ b/src/graph.jl
@@ -1,90 +0,0 @@
-using DataStructures
-
-const Diff = NamedTuple{
-   (:addedNodes, :removedNodes, :addedEdges, :removedEdges),
-   Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}}
-}
-
-function Diff()
-   return (
-      addedNodes = Vector{Node}(), 
-      removedNodes = Vector{Node}(), 
-      addedEdges = Vector{Edge}(), 
-      removedEdges = Vector{Edge}()
-   )::Diff
-end
-
-# An abstract base class for operations
-# an operation can be applied to a DAG
-abstract type Operation end
-
-# An abstract base class for already applied operations
-# an applied operation can be reversed iff it is the last applied operation on the DAG
-abstract type AppliedOperation end
-
-struct NodeFusion <: Operation
-   input::Tuple{ComputeTaskNode, DataTaskNode, ComputeTaskNode}
-end
-
-struct AppliedNodeFusion <: AppliedOperation
-   operation::NodeFusion
-   diff::Diff
-end
-
-struct NodeReduction <: Operation
-   input::Vector{Node}
-end
-
-struct AppliedNodeReduction <: AppliedOperation
-   operation::NodeReduction
-   diff::Diff
-end
-
-struct NodeSplit <: Operation
-   input::Node
-end
-
-struct AppliedNodeSplit <: AppliedOperation
-   operation::NodeSplit
-   diff::Diff
-end
-
-mutable struct PossibleOperations
-   nodeFusions::Set{NodeFusion}
-   nodeReductions::Set{NodeReduction}
-   nodeSplits::Set{NodeSplit}
-end
-
-function PossibleOperations()
-   return PossibleOperations(
-      Set{NodeFusion}(),
-      Set{NodeReduction}(),
-      Set{NodeSplit}()
-   )
-end
-
-# The actual state of the DAG is the initial state given by the set of nodes 
-# but with all the operations in appliedChain applied in order
-mutable struct DAG
-   nodes::Set{Node}
-
-   # The operations currently applied to the set of nodes
-   appliedOperations::Stack{AppliedOperation}
-
-   # The operations not currently applied but part of the current state of the DAG
-   operationsToApply::Deque{Operation}
-
-   # The possible operations at the current state of the DAG
-   possibleOperations::PossibleOperations
-
-   # The set of nodes whose possible operations need to be reevaluated
-   dirtyNodes::Set{Node}
-
-   # "snapshot" system: keep track of added/removed nodes/edges since last snapshot
-   # these are muted in insert_node! etc.
-   diff::Diff
-end
-
-function DAG()
-   return DAG(Set{Node}(), Stack{AppliedOperation}(), Deque{Operation}(), PossibleOperations(), Set{Node}(), Diff())
-end
--- a/src/graph/compare.jl
+++ b/src/graph/compare.jl
@@ -0,0 +1,37 @@
+"""
+    in(node::Node, graph::DAG)
+
+Check whether the node is part of the graph.
+"""
+in(node::Node, graph::DAG) = node in graph.nodes
+
+"""
+    in(edge::Edge, graph::DAG)
+
+Check whether the edge is part of the graph.
+"""
+function in(edge::Edge, graph::DAG)
+    n1 = edge.edge[1]
+    n2 = edge.edge[2]
+    if !(n1 in graph) || !(n2 in graph)
+        return false
+    end
+
+    return n1 in n2.children
+end
+
+"""
+    ==(n1::Node, n2::Node, g::DAG)
+
+Check equality of two nodes in a graph.
+"""
+function ==(n1::Node, n2::Node, g::DAG)
+    if typeof(n1) != typeof(n2)
+        return false
+    end
+    if !(n1 in g) || !(n2 in g)
+        return false
+    end
+
+    return n1.task == n2.task && children(n1) == children(n2)
+end
--- a/src/graph/interface.jl
+++ b/src/graph/interface.jl
@@ -0,0 +1,54 @@
+"""
+    push_operation!(graph::DAG, operation::Operation)
+
+Apply a new operation to the graph.
+
+See also: [`DAG`](@ref), [`pop_operation!`](@ref)
+"""
+function push_operation!(graph::DAG, operation::Operation)
+    # 1.: Add the operation to the DAG
+    push!(graph.operationsToApply, operation)
+
+    return nothing
+end
+
+"""
+    pop_operation!(graph::DAG)
+
+Revert the latest applied operation on the graph.
+
+See also: [`DAG`](@ref), [`push_operation!`](@ref)
+"""
+function pop_operation!(graph::DAG)
+    # 1.: Remove the operation from the appliedChain of the DAG
+    if !isempty(graph.operationsToApply)
+        pop!(graph.operationsToApply)
+    elseif !isempty(graph.appliedOperations)
+        appliedOp = pop!(graph.appliedOperations)
+        revert_operation!(graph, appliedOp)
+    else
+        error("No more operations to pop!")
+    end
+
+    return nothing
+end
+
+"""
+    can_pop(graph::DAG)
+
+Return `true` if [`pop_operation!`](@ref) is possible, `false` otherwise.
+"""
+can_pop(graph::DAG) = !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
+
+"""
+    reset_graph!(graph::DAG)
+
+Reset the graph to its initial state with no operations applied.
+"""
+function reset_graph!(graph::DAG)
+    while (can_pop(graph))
+        pop_operation!(graph)
+    end
+
+    return nothing
+end
--- a/src/graph/mute.jl
+++ b/src/graph/mute.jl
@@ -0,0 +1,321 @@
+# for graph mutating functions we need to do a few things
+# 1: mute the graph (duh)
+# 2: keep track of what was changed for the diff (if track == true)
+# 3: invalidate operation caches
+
+"""
+    insert_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
+
+Insert the node into the graph.
+
+## Keyword Arguments
+`track::Bool`: Whether to add the changes to the [`DAG`](@ref)'s [`Diff`](@ref). Should be set `false` in parsing or graph creation functions for performance.
+
+`invalidate_cache::Bool`: Whether to invalidate caches associated with the changes. Should also be turned off for graph creation or parsing.
+
+See also: [`remove_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
+"""
+function insert_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
+    # 1: mute
+    push!(graph.nodes, node)
+
+    # 2: keep track
+    if (track)
+        push!(graph.diff.addedNodes, node)
+    end
+
+    # 3: invalidate caches
+    if (!invalidate_cache)
+        return node
+    end
+    push!(graph.dirtyNodes, node)
+
+    return node
+end
+
+"""
+    insert_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
+
+Insert the edge between node1 (child) and node2 (parent) into the graph.
+
+## Keyword Arguments
+`track::Bool`: Whether to add the changes to the [`DAG`](@ref)'s [`Diff`](@ref). Should be set `false` in parsing or graph creation functions for performance.
+
+`invalidate_cache::Bool`: Whether to invalidate caches associated with the changes. Should also be turned off for graph creation or parsing.
+
+See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`remove_edge!`](@ref)
+"""
+function insert_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
+    @assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists"
+
+    # 1: mute
+    # edge points from child to parent
+    push!(node1.parents, node2)
+    push!(node2.children, node1)
+
+    # 2: keep track
+    if (track)
+        push!(graph.diff.addedEdges, make_edge(node1, node2))
+    end
+
+    # 3: invalidate caches
+    if (!invalidate_cache)
+        return nothing
+    end
+
+    invalidate_operation_caches!(graph, node1)
+    invalidate_operation_caches!(graph, node2)
+
+    push!(graph.dirtyNodes, node1)
+    push!(graph.dirtyNodes, node2)
+
+    return nothing
+end
+
+"""
+    remove_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
+
+Remove the node from the graph.
+
+## Keyword Arguments
+`track::Bool`: Whether to add the changes to the [`DAG`](@ref)'s [`Diff`](@ref). Should be set `false` in parsing or graph creation functions for performance.
+
+`invalidate_cache::Bool`: Whether to invalidate caches associated with the changes. Should also be turned off for graph creation or parsing.
+
+See also: [`insert_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
+"""
+function remove_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
+    @assert node in graph.nodes "Trying to remove a node that's not in the graph"
+
+    # 1: mute
+    delete!(graph.nodes, node)
+
+    # 2: keep track
+    if (track)
+        push!(graph.diff.removedNodes, node)
+    end
+
+    # 3: invalidate caches
+    if (!invalidate_cache)
+        return nothing
+    end
+
+    invalidate_operation_caches!(graph, node)
+    delete!(graph.dirtyNodes, node)
+
+    return nothing
+end
+
+"""
+    remove_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
+
+Remove the edge between node1 (child) and node2 (parent) into the graph.
+
+## Keyword Arguments
+`track::Bool`: Whether to add the changes to the [`DAG`](@ref)'s [`Diff`](@ref). Should be set `false` in parsing or graph creation functions for performance.
+
+`invalidate_cache::Bool`: Whether to invalidate caches associated with the changes. Should also be turned off for graph creation or parsing.
+
+See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`insert_edge!`](@ref)
+"""
+function remove_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
+    # 1: mute
+    pre_length1 = length(node1.parents)
+    pre_length2 = length(node2.children)
+
+    #TODO: filter is very slow
+    filter!(x -> x != node2, node1.parents)
+    filter!(x -> x != node1, node2.children)
+
+    @assert begin
+        removed = pre_length1 - length(node1.parents)
+        removed <= 1
+    end "removed more than one node from node1's parents"
+
+    @assert begin
+        removed = pre_length2 - length(node2.children)
+        removed <= 1
+    end "removed more than one node from node2's children"
+
+    # 2: keep track
+    if (track)
+        push!(graph.diff.removedEdges, make_edge(node1, node2))
+    end
+
+    # 3: invalidate caches
+    if (!invalidate_cache)
+        return nothing
+    end
+
+    invalidate_operation_caches!(graph, node1)
+    invalidate_operation_caches!(graph, node2)
+    if (node1 in graph)
+        push!(graph.dirtyNodes, node1)
+    end
+    if (node2 in graph)
+        push!(graph.dirtyNodes, node2)
+    end
+
+    return nothing
+end
+
+function replace_children!(task::FusedComputeTask, before, after)
+    replacedIn1 = length(findall(x -> x == before, task.t1_inputs))
+    replacedIn2 = length(findall(x -> x == before, task.t2_inputs))
+
+    @assert replacedIn1 >= 1 || replacedIn2 >= 1 "Nothing to replace while replacing $before with $after in $(task.t1_inputs...) and $(task.t2_inputs...)"
+
+    replace!(task.t1_inputs, before => after)
+    replace!(task.t2_inputs, before => after)
+
+    # recursively descend down the tree, but only in the tasks where we're replacing things
+    if replacedIn1 > 0
+        replace_children!(task.first_task, before, after)
+    end
+    if replacedIn2 > 0
+        replace_children!(task.second_task, before, after)
+    end
+
+    return nothing
+end
+
+function replace_children!(task::AbstractTask, before, after)
+    return nothing
+end
+
+function update_child!(graph::DAG, n::Node, child_before::Symbol, child_after::Symbol; track = true)
+    # only need to update fused compute tasks
+    if !(typeof(n.task) <: FusedComputeTask)
+        return nothing
+    end
+
+    taskBefore = copy(n.task)
+
+    if !((child_before in n.task.t1_inputs) || (child_before in n.task.t2_inputs))
+        println("------------------ Nothing to replace!! ------------------")
+        child_ids = Vector{String}()
+        for child in n.children
+            push!(child_ids, "$(child.id)")
+        end
+        println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
+        @assert false
+    end
+
+    replace_children!(n.task, child_before, child_after)
+
+    if !((child_after in n.task.t1_inputs) || (child_after in n.task.t2_inputs))
+        println("------------------ Did not replace anything!! ------------------")
+        child_ids = Vector{String}()
+        for child in n.children
+            push!(child_ids, "$(child.id)")
+        end
+        println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
+        @assert false
+    end
+
+    # keep track
+    if (track)
+        push!(graph.diff.updatedChildren, (n, taskBefore))
+    end
+end
+
+"""
+    get_snapshot_diff(graph::DAG)
+
+Return the graph's [`Diff`](@ref) since last time this function was called.
+
+See also: [`revert_diff!`](@ref), [`AppliedOperation`](@ref) and [`revert_operation!`](@ref)
+"""
+function get_snapshot_diff(graph::DAG)
+    return swapfield!(graph, :diff, Diff())
+end
+
+"""
+    invalidate_caches!(graph::DAG, operation::NodeFusion)
+
+Invalidate the operation caches for a given [`NodeFusion`](@ref).
+
+This deletes the operation from the graph's possible operations and from the involved nodes' own operation caches.
+"""
+function invalidate_caches!(graph::DAG, operation::NodeFusion)
+    delete!(graph.possibleOperations, operation)
+
+    # delete the operation from all caches of nodes involved in the operation
+    # TODO: filter is very slow
+    filter!(!=(operation), operation.input[1].nodeFusions)
+    filter!(!=(operation), operation.input[3].nodeFusions)
+
+    operation.input[2].nodeFusion = missing
+
+    return nothing
+end
+
+"""
+    invalidate_caches!(graph::DAG, operation::NodeReduction)
+
+Invalidate the operation caches for a given [`NodeReduction`](@ref).
+
+This deletes the operation from the graph's possible operations and from the involved nodes' own operation caches.
+"""
+function invalidate_caches!(graph::DAG, operation::NodeReduction)
+    delete!(graph.possibleOperations, operation)
+
+    for node in operation.input
+        node.nodeReduction = missing
+    end
+
+    return nothing
+end
+
+"""
+    invalidate_caches!(graph::DAG, operation::NodeSplit)
+
+Invalidate the operation caches for a given [`NodeSplit`](@ref).
+
+This deletes the operation from the graph's possible operations and from the involved nodes' own operation caches.
+"""
+function invalidate_caches!(graph::DAG, operation::NodeSplit)
+    delete!(graph.possibleOperations, operation)
+
+    # delete the operation from all caches of nodes involved in the operation
+    # for node split there is only one node
+    operation.input.nodeSplit = missing
+
+    return nothing
+end
+
+"""
+    invalidate_operation_caches!(graph::DAG, node::ComputeTaskNode)
+
+Invalidate the operation caches of the given node through calls to the respective [`invalidate_caches!`](@ref) functions.
+"""
+function invalidate_operation_caches!(graph::DAG, node::ComputeTaskNode)
+    if !ismissing(node.nodeReduction)
+        invalidate_caches!(graph, node.nodeReduction)
+    end
+    if !ismissing(node.nodeSplit)
+        invalidate_caches!(graph, node.nodeSplit)
+    end
+    while !isempty(node.nodeFusions)
+        invalidate_caches!(graph, pop!(node.nodeFusions))
+    end
+    return nothing
+end
+
+"""
+    invalidate_operation_caches!(graph::DAG, node::DataTaskNode)
+
+Invalidate the operation caches of the given node through calls to the respective [`invalidate_caches!`](@ref) functions.
+"""
+function invalidate_operation_caches!(graph::DAG, node::DataTaskNode)
+    if !ismissing(node.nodeReduction)
+        invalidate_caches!(graph, node.nodeReduction)
+    end
+    if !ismissing(node.nodeSplit)
+        invalidate_caches!(graph, node.nodeSplit)
+    end
+    if !ismissing(node.nodeFusion)
+        invalidate_caches!(graph, node.nodeFusion)
+    end
+    return nothing
+end
--- a/src/graph/print.jl
+++ b/src/graph/print.jl
@@ -0,0 +1,66 @@
+"""
+    show_nodes(io::IO, graph::DAG)
+
+Print a graph's nodes. Should only be used for small graphs as it prints every node in a list.
+"""
+function show_nodes(io::IO, graph::DAG)
+    print(io, "[")
+    first = true
+    for n in graph.nodes
+        if first
+            first = false
+        else
+            print(io, ", ")
+        end
+        print(io, n)
+    end
+    return print(io, "]")
+end
+
+"""
+    show(io::IO, graph::DAG)
+
+Print the given graph to io. If there are too many nodes it will print only a summary of them.
+"""
+function show(io::IO, graph::DAG)
+    apply_all!(graph)
+    println(io, "Graph:")
+    print(io, "  Nodes: ")
+
+    nodeDict = Dict{Type, Int64}()
+    noEdges = 0
+    for node in graph.nodes
+        if haskey(nodeDict, typeof(node.task))
+            nodeDict[typeof(node.task)] = nodeDict[typeof(node.task)] + 1
+        else
+            nodeDict[typeof(node.task)] = 1
+        end
+        noEdges += length(parents(node))
+    end
+
+    if length(graph.nodes) <= 20
+        show_nodes(io, graph)
+    else
+        print("Total: ", length(graph.nodes), ", ")
+        first = true
+        i = 0
+        for (type, number) in zip(keys(nodeDict), values(nodeDict))
+            i += 1
+            if first
+                first = false
+            else
+                print(", ")
+            end
+            if (i % 3 == 0)
+                print("\n         ")
+            end
+            print(type, ": ", number)
+        end
+    end
+    println(io)
+    println(io, "  Edges: ", noEdges)
+    properties = get_properties(graph)
+    println(io, "  Total Compute Effort: ", properties.computeEffort)
+    println(io, "  Total Data Transfer: ", properties.data)
+    return println(io, "  Total Compute Intensity: ", properties.computeIntensity)
+end
--- a/src/graph/properties.jl
+++ b/src/graph/properties.jl
@@ -0,0 +1,45 @@
+"""
+    get_properties(graph::DAG)
+
+Return the graph's [`GraphProperties`](@ref).
+"""
+function get_properties(graph::DAG)
+    # make sure the graph is fully generated
+    apply_all!(graph)
+
+    if (graph.properties.computeEffort == 0.0)
+        graph.properties = GraphProperties(graph)
+    end
+
+    return graph.properties
+end
+
+"""
+    get_exit_node(graph::DAG)
+
+Return the graph's exit node. This assumes the graph only has a single exit node. If the graph has multiple exit nodes, the one encountered first will be returned.
+"""
+function get_exit_node(graph::DAG)
+    for node in graph.nodes
+        if (is_exit_node(node))
+            return node
+        end
+    end
+    @assert false "The given graph has no exit node! It is either empty or not acyclic!"
+end
+
+"""
+    get_entry_nodes(graph::DAG)
+
+Return a vector of the graph's entry nodes.
+"""
+function get_entry_nodes(graph::DAG)
+    apply_all!(graph)
+    result = Vector{Node}()
+    for node in graph.nodes
+        if (is_entry_node(node))
+            push!(result, node)
+        end
+    end
+    return result
+end
--- a/src/graph/type.jl
+++ b/src/graph/type.jl
@@ -0,0 +1,73 @@
+using DataStructures
+
+"""
+    PossibleOperations
+
+A struct storing all possible operations on a [`DAG`](@ref).
+To get the [`PossibleOperations`](@ref) on a [`DAG`](@ref), use [`get_operations`](@ref).
+"""
+mutable struct PossibleOperations
+    nodeFusions::Set{NodeFusion}
+    nodeReductions::Set{NodeReduction}
+    nodeSplits::Set{NodeSplit}
+end
+
+""" 
+    DAG
+
+The representation of the graph as a set of [`Node`](@ref)s.
+
+A DAG can be loaded using the appropriate parse_dag function, e.g. [`parse_dag`](@ref).
+
+[`Operation`](@ref)s can be applied on it using [`push_operation!`](@ref) and reverted using [`pop_operation!`](@ref) like a stack.
+To get the set of possible operations, use [`get_operations`](@ref).
+The members of the object should not be manually accessed, instead always use the provided interface functions.
+"""
+mutable struct DAG
+    nodes::Set{Node}
+
+    # The operations currently applied to the set of nodes
+    appliedOperations::Stack{AppliedOperation}
+
+    # The operations not currently applied but part of the current state of the DAG
+    operationsToApply::Deque{Operation}
+
+    # The possible operations at the current state of the DAG
+    possibleOperations::PossibleOperations
+
+    # The set of nodes whose possible operations need to be reevaluated
+    dirtyNodes::Set{Node}
+
+    # "snapshot" system: keep track of added/removed nodes/edges since last snapshot
+    # these are muted in insert_node! etc.
+    diff::Diff
+
+    # the cached properties of the DAG
+    properties::GraphProperties
+end
+
+"""
+    PossibleOperations()
+
+Construct and return an empty [`PossibleOperations`](@ref) object.
+"""
+function PossibleOperations()
+    return PossibleOperations(Set{NodeFusion}(), Set{NodeReduction}(), Set{NodeSplit}())
+end
+
+"""
+    DAG()
+
+Construct and return an empty [`DAG`](@ref).
+"""
+function DAG()
+    return DAG(
+        Set{Node}(),
+        Stack{AppliedOperation}(),
+        Deque{Operation}(),
+        PossibleOperations(),
+        Set{Node}(),
+        Diff(),
+        GraphProperties(),
+    )
+end
--- a/src/graph/validate.jl
+++ b/src/graph/validate.jl
@@ -0,0 +1,77 @@
+"""
+    is_connected(graph::DAG)
+
+Return whether the given graph is connected.
+"""
+function is_connected(graph::DAG)
+    nodeQueue = Deque{Node}()
+    push!(nodeQueue, get_exit_node(graph))
+    seenNodes = Set{Node}()
+
+    while !isempty(nodeQueue)
+        current = pop!(nodeQueue)
+        push!(seenNodes, current)
+
+        for child in current.children
+            push!(nodeQueue, child)
+        end
+    end
+
+    return length(seenNodes) == length(graph.nodes)
+end
+
+"""
+    is_valid(graph::DAG)
+
+Validate the entire graph using asserts. Intended for testing with `@assert is_valid(graph)`.
+"""
+function is_valid(graph::DAG)
+    for node in graph.nodes
+        @assert is_valid(graph, node)
+    end
+
+    for op in graph.operationsToApply
+        @assert is_valid(graph, op)
+    end
+
+    for nr in graph.possibleOperations.nodeReductions
+        @assert is_valid(graph, nr)
+    end
+    for ns in graph.possibleOperations.nodeSplits
+        @assert is_valid(graph, ns)
+    end
+    for nf in graph.possibleOperations.nodeFusions
+        @assert is_valid(graph, nf)
+    end
+
+    for node in graph.dirtyNodes
+        @assert node in graph "Dirty Node is not part of the graph!"
+        @assert ismissing(node.nodeReduction) "Dirty Node has a NodeReduction!"
+        @assert ismissing(node.nodeSplit) "Dirty Node has a NodeSplit!"
+        if (typeof(node) <: DataTaskNode)
+            @assert ismissing(node.nodeFusion) "Dirty DataTaskNode has a Node Fusion!"
+        elseif (typeof(node) <: ComputeTaskNode)
+            @assert isempty(node.nodeFusions) "Dirty ComputeTaskNode has Node Fusions!"
+        end
+    end
+
+    @assert is_connected(graph) "Graph is not connected!"
+
+    return true
+end
+
+"""
+    is_scheduled(graph::DAG)
+
+Validate that the entire graph has been scheduled, i.e., every [`ComputeTaskNode`](@ref) has its `.device` set.
+"""
+function is_scheduled(graph::DAG)
+    for node in graph.nodes
+        if (node isa DataTaskNode)
+            continue
+        end
+        @assert !ismissing(node.device)
+    end
+
+    return true
+end
--- a/src/graph_functions.jl
+++ b/src/graph_functions.jl
@@ -1,354 +0,0 @@
-using DataStructures
-
-in(node::Node, graph::DAG) = node in graph.nodes
-in(edge::Edge, graph::DAG) = edge in graph.edges
-
-function is_parent(potential_parent, node)
-   return potential_parent in node.parents
-end
-
-function is_child(potential_child, node)
-   return potential_child in node.children
-end
-
-function ==(n1::Node, n2::Node, g::DAG)
-   if typeof(n1) != typeof(n2)
-      return false
-   end
-   if !(n1 in g) || !(n2 in g)
-      return false
-   end
-
-   return n1.task == n2.task && children(n1) == children(n2)
-end
-
-# children = prerequisite nodes, nodes that need to execute before the task, edges point into this task
-function children(node::Node)
-   return copy(node.children)
-end
-
-# parents = subsequent nodes, nodes that need this node to execute, edges point from this task
-function parents(node::Node)
-   return copy(node.parents)
-end
-
-# siblings = all children of any parents, no duplicates, includes the node itself
-function siblings(node::Node)
-   result = Set{Node}()
-   push!(result, node)
-   for parent in node.parents
-      union!(result, parent.children)
-   end
-
-   return result
-end
-
-# partners = all parents of any children, no duplicates, includes the node itself
-function partners(node::Node)
-   result = Set{Node}()
-   push!(result, node)
-   for child in node.children
-      union!(result, child.parents)
-   end
-
-   return result
-end
-
-# alternative version to partners(Node), avoiding allocation of a new set
-# works on the given set and returns nothing
-function partners(node::Node, set::Set{Node})
-   push!(set, node)
-   for child in node.children
-      union!(set, child.parents)
-   end
-   return nothing
-end
-
-is_entry_node(node::Node) = length(node.children) == 0
-is_exit_node(node::Node) = length(node.parents) == 0
-
-# function to invalidate the operation caches for a given NodeFusion
-function invalidate_caches!(graph::DAG, operation::NodeFusion)
-   delete!(graph.possibleOperations, operation)
-
-   # delete the operation from all caches of nodes involved in the operation
-   filter!(!=(operation), operation.input[1].nodeFusions)
-   filter!(!=(operation), operation.input[3].nodeFusions)
-   
-   operation.input[2].nodeFusion = missing
-
-   return nothing
-end
-
-# function to invalidate the operation caches for a given NodeReduction
-function invalidate_caches!(graph::DAG, operation::NodeReduction)
-   delete!(graph.possibleOperations, operation)
-
-   for node in operation.input
-      node.nodeReduction = missing
-   end
-
-   return nothing
-end
-
-# function to invalidate the operation caches for a given NodeSplit
-function invalidate_caches!(graph::DAG, operation::NodeSplit)
-   delete!(graph.possibleOperations, operation)
-
-   # delete the operation from all caches of nodes involved in the operation
-   # for node split there is only one node
-   operation.input.nodeSplit = missing
-   
-   return nothing
-end
-
-# function to invalidate the operation caches of a ComputeTaskNode
-function invalidate_operation_caches!(graph::DAG, node::ComputeTaskNode)
-   if !ismissing(node.nodeReduction)
-      invalidate_caches!(graph, node.nodeReduction)
-   end
-   if !ismissing(node.nodeSplit)
-      invalidate_caches!(graph, node.nodeSplit)
-   end
-   while !isempty(node.nodeFusions)
-      invalidate_caches!(graph, pop!(node.nodeFusions))
-   end
-   return nothing
-end
-
-# function to invalidate the operation caches of a DataTaskNode
-function invalidate_operation_caches!(graph::DAG, node::DataTaskNode)
-   if !ismissing(node.nodeReduction)
-      invalidate_caches!(graph, node.nodeReduction)
-   end
-   if !ismissing(node.nodeSplit)
-      invalidate_caches!(graph, node.nodeSplit)
-   end
-   if !ismissing(node.nodeFusion)
-      invalidate_caches!(graph, node.nodeFusion)
-   end
-   return nothing
-end
-
-# for graph mutating functions we need to do a few things
-# 1: mute the graph (duh)
-# 2: keep track of what was changed for the diff (if track == true)
-# 3: invalidate operation caches
-
-function insert_node!(graph::DAG, node::Node, track=true, invalidate_cache=true)
-   # 1: mute
-   push!(graph.nodes, node)
-
-   # 2: keep track
-   if (track) push!(graph.diff.addedNodes, node) end
-
-   # 3: invalidate caches
-   if (!invalidate_cache) return node end
-   push!(graph.dirtyNodes, node)
-
-   return node
-end
-
-function insert_edge!(graph::DAG, node1::Node, node2::Node, track=true, invalidate_cache=true)
-   # @assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists"
-
-   # 1: mute
-   # edge points from child to parent
-   push!(node1.parents, node2)
-   push!(node2.children, node1)
-
-   # 2: keep track
-   if (track) push!(graph.diff.addedEdges, make_edge(node1, node2)) end
-   
-   # 3: invalidate caches
-   if (!invalidate_cache) return nothing end
-
-   invalidate_operation_caches!(graph, node1)
-   invalidate_operation_caches!(graph, node2)
-
-   push!(graph.dirtyNodes, node1)
-   push!(graph.dirtyNodes, node2)
-
-   return nothing
-end
-
-function remove_node!(graph::DAG, node::Node, track=true, invalidate_cache=true)
-   # @assert node in graph.nodes "Trying to remove a node that's not in the graph"
-
-   # 1: mute
-   delete!(graph.nodes, node)
-
-   # 2: keep track
-   if (track) push!(graph.diff.removedNodes, node) end
-
-   # 3: invalidate caches
-   if (!invalidate_cache) return nothing end
-
-   invalidate_operation_caches!(graph, node)
-   delete!(graph.dirtyNodes, node)
-
-   return nothing
-end
-
-function remove_edge!(graph::DAG, node1::Node, node2::Node, track=true, invalidate_cache=true)
-   # 1: mute
-   pre_length1 = length(node1.parents)
-   pre_length2 = length(node2.children)
-   filter!(x -> x != node2, node1.parents)
-   filter!(x -> x != node1, node2.children)
-
-   #=@assert begin
-      removed = pre_length1 - length(node1.parents)
-      removed <= 1
-   end "removed more than one node from node1's parents"=#
-
-   #=@assert begin
-      removed = pre_length2 - length(node2.children)
-      removed <= 1
-   end "removed more than one node from node2's children"=#
-
-   # 2: keep track
-   if (track) push!(graph.diff.removedEdges, make_edge(node1, node2)) end
-
-   # 3: invalidate caches
-   if (!invalidate_cache) return nothing end
-
-   invalidate_operation_caches!(graph, node1)
-   invalidate_operation_caches!(graph, node2)
-   if (node1 in graph)
-      push!(graph.dirtyNodes, node1)
-   end
-   if (node2 in graph) 
-      push!(graph.dirtyNodes, node2)
-   end
-
-   return nothing
-end
-
-# return the graph "difference" since last time this function was called
-function get_snapshot_diff(graph::DAG)
-   return swapfield!(graph, :diff, Diff())
-end
-
-function graph_properties(graph::DAG)
-   # make sure the graph is fully generated
-   apply_all!(graph)
-
-   d = 0
-   ce = 0
-   ed = 0
-   for node in graph.nodes
-      d += data(node.task) * length(node.parents)
-      ce += compute_effort(node.task)
-      ed += length(node.parents)
-   end
-
-   ci = ce / d
-
-   result = (data = d,
-             compute_effort = ce,
-             compute_intensity = ci,
-             nodes = length(graph.nodes),
-             edges = ed)
-   return result
-end
-
-function get_exit_node(graph::DAG)
-   for node in graph.nodes
-      if (is_exit_node(node))
-         return node
-      end
-   end
-   @assert false "The given graph has no exit node! It is either empty or not acyclic!"
-end
-
-# check whether the given graph is connected
-function is_valid(graph::DAG)
-   nodeQueue = Deque{Node}()
-   push!(nodeQueue, get_exit_node(graph))
-   seenNodes = Set{Node}()
-
-   while !isempty(nodeQueue)
-      current = pop!(nodeQueue)
-      push!(seenNodes, current)
-
-      for child in current.chlidren
-         push!(nodeQueue, child)
-      end
-   end
-
-   return length(seenNodes) == length(graph.nodes)
-end
-
-function show_nodes(io, graph::DAG)
-   print(io, "[")
-   first = true
-   for n in graph.nodes
-      if first
-         first = false
-      else
-         print(io, ", ")
-      end
-      print(io, n)
-   end
-   print(io, "]")
-end
-
-function show(io::IO, graph::DAG)
-   println(io, "Graph:")
-   print(io, "  Nodes: ")
-
-   nodeDict = Dict{Type, Int64}()
-   noEdges = 0
-   for node in graph.nodes
-      if haskey(nodeDict, typeof(node.task))
-         nodeDict[typeof(node.task)] = nodeDict[typeof(node.task)] + 1
-      else
-         nodeDict[typeof(node.task)] = 1
-      end
-      noEdges += length(parents(node))
-   end
-
-   if length(graph.nodes) <= 20
-      show_nodes(io, graph)
-   else
-      print("Total: ", length(graph.nodes), ", ")
-      first = true
-      i = 0
-      for (type, number) in zip(keys(nodeDict), values(nodeDict))
-         i += 1
-         if first
-            first = false
-         else
-            print(", ")
-         end
-         if (i % 3 == 0)
-            print("\n         ")
-         end
-         print(type, ": ", number)
-      end
-   end
-   println(io)
-   println(io, "  Edges: ", noEdges)
-   properties = graph_properties(graph)
-   println(io, "  Total Compute Effort: ", properties.compute_effort)
-   println(io, "  Total Data Transfer: ", properties.data)
-   println(io, "  Total Compute Intensity: ", properties.compute_intensity)
-end
-
-function show(io::IO, diff::Diff)
-   print(io, "Nodes: ")
-   print(io, length(diff.addedNodes) + length(diff.removedNodes))
-   print(io, " Edges: ")
-   print(io, length(diff.addedEdges) + length(diff.removedEdges))
-end
-
-# return a namedtuple of the lengths of the added/removed nodes/edges
-function length(diff::Diff)
-   return (
-      addedNodes = length(diff.addedNodes), 
-      removedNodes = length(diff.removedNodes), 
-      addedEdges = length(diff.addedEdges), 
-      removedEdges = length(diff.removedEdges)
-   )
-end
--- a/src/graph_interface.jl
+++ b/src/graph_interface.jl
@@ -1,34 +0,0 @@
-# user interface on the DAG
-
-# applies a new operation to the end of the graph
-function push_operation!(graph::DAG, operation::Operation)
-   # 1.: Add the operation to the DAG
-   push!(graph.operationsToApply, operation)
-
-   return nothing
-end
-
-# reverts the latest applied operation, essentially like a ctrl+z for
-function pop_operation!(graph::DAG)
-   # 1.: Remove the operation from the appliedChain of the DAG
-   if !isempty(graph.operationsToApply)
-      pop!(graph.operationsToApply)
-   elseif !isempty(graph.appliedOperations)
-      appliedOp = pop!(graph.appliedOperations)
-      revert_operation!(graph, appliedOp)
-   else
-      error("No more operations to pop!")
-   end
-   return nothing
-end
-
-can_pop(graph::DAG) = !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
-
-# reset the graph to its initial state with no operations applied
-function reset_graph!(graph::DAG)
-   while (can_pop(graph))
-      pop_operation!(graph)
-   end
-
-   return nothing
-end
--- a/src/models/abc/compute.jl
+++ b/src/models/abc/compute.jl
@@ -0,0 +1,159 @@
+using AccurateArithmetic
+
+"""
+    compute(::ComputeTaskP, data::ParticleValue)
+
+Return the particle and value as is. 
+
+0 FLOP.
+"""
+function compute(::ComputeTaskP, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
+    return data
+end
+
+"""
+    compute(::ComputeTaskU, data::ParticleValue)
+
+Compute an outer edge. Return the particle value with the same particle and the value multiplied by an outer_edge factor.
+
+1 FLOP.
+"""
+function compute(::ComputeTaskU, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
+    return ParticleValue(data.p, data.v * outer_edge(data.p))
+end
+
+"""
+    compute(::ComputeTaskV, data1::ParticleValue, data2::ParticleValue)
+
+Compute a vertex. Preserve momentum and particle types (AB->C etc.) to create resulting particle, multiply values together and times a vertex factor.
+
+6 FLOP.
+"""
+function compute(
+    ::ComputeTaskV,
+    data1::ParticleValue{P1},
+    data2::ParticleValue{P2},
+)::ParticleValue where {P1 <: ABCParticle, P2 <: ABCParticle}
+    p3 = preserve_momentum(data1.p, data2.p)
+    dataOut = ParticleValue(p3, data1.v * vertex() * data2.v)
+    return dataOut
+end
+
+"""
+    compute(::ComputeTaskS2, data1::ParticleValue, data2::ParticleValue)
+
+Compute a final inner edge (2 input particles, no output particle).
+
+For valid inputs, both input particles should have the same momenta at this point.
+
+12 FLOP.
+"""
+function compute(::ComputeTaskS2, data1::ParticleValue{P}, data2::ParticleValue{P})::Float64 where {P <: ABCParticle}
+    #=
+    @assert isapprox(abs(data1.p.momentum.E), abs(data2.p.momentum.E), rtol = 0.001, atol = sqrt(eps())) "E: $(data1.p.momentum.E) vs. $(data2.p.momentum.E)"
+    @assert isapprox(data1.p.momentum.px, -data2.p.momentum.px, rtol = 0.001, atol = sqrt(eps())) "px: $(data1.p.momentum.px) vs. $(data2.p.momentum.px)"
+    @assert isapprox(data1.p.momentum.py, -data2.p.momentum.py, rtol = 0.001, atol = sqrt(eps())) "py: $(data1.p.momentum.py) vs. $(data2.p.momentum.py)"
+    @assert isapprox(data1.p.momentum.pz, -data2.p.momentum.pz, rtol = 0.001, atol = sqrt(eps())) "pz: $(data1.p.momentum.pz) vs. $(data2.p.momentum.pz)"
+    =#
+    inner = inner_edge(data1.p)
+    return data1.v * inner * data2.v
+end
+
+"""
+    compute(::ComputeTaskS1, data::ParticleValue)
+
+Compute inner edge (1 input particle, 1 output particle).
+
+11 FLOP.
+"""
+function compute(::ComputeTaskS1, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
+    return ParticleValue(data.p, data.v * inner_edge(data.p))
+end
+
+"""
+    compute(::ComputeTaskSum, data::Vector{Float64})
+
+Compute a sum over the vector. Use an algorithm that accounts for accumulated errors in long sums with potentially large differences in magnitude of the summands.
+
+Linearly many FLOP with growing data.
+"""
+function compute(::ComputeTaskSum, data::Vector{Float64})::Float64
+    return sum_kbn(data)
+end
+
+"""
+    get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
+
+Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSyms`, providing the output on `outSym`.
+"""
+function get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector, outExpr)
+    in = [eval(inExprs[1])]
+    out = eval(outExpr)
+
+    return Meta.parse("$out = compute(ComputeTaskP(), $(in[1]))")
+end
+
+"""
+    get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
+
+Generate code evaluating [`ComputeTaskU`](@ref) on `inSyms`, providing the output on `outSym`.
+`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
+"""
+function get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector, outExpr)
+    in = [eval(inExprs[1])]
+    out = eval(outExpr)
+
+    return Meta.parse("$out = compute(ComputeTaskU(), $(in[1]))")
+end
+
+"""
+    get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
+
+Generate code evaluating [`ComputeTaskV`](@ref) on `inSyms`, providing the output on `outSym`.
+`inSym[1]` and `inSym[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
+"""
+function get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector, outExpr)
+    in = [eval(inExprs[1]), eval(inExprs[2])]
+    out = eval(outExpr)
+
+    return Meta.parse("$out = compute(ComputeTaskV(), $(in[1]), $(in[2]))")
+end
+
+"""
+    get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
+
+Generate code evaluating [`ComputeTaskS2`](@ref) on `inSyms`, providing the output on `outSym`.
+`inSyms[1]` and `inSyms[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type `Float64`.
+"""
+function get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector, outExpr)
+    in = [eval(inExprs[1]), eval(inExprs[2])]
+    out = eval(outExpr)
+
+    return Meta.parse("$out = compute(ComputeTaskS2(), $(in[1]), $(in[2]))")
+end
+
+"""
+    get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
+
+Generate code evaluating [`ComputeTaskS1`](@ref) on `inSyms`, providing the output on `outSym`.
+`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
+"""
+function get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector, outExpr)
+    in = [eval(inExprs[1])]
+    out = eval(outExpr)
+
+    return Meta.parse("$out = compute(ComputeTaskS1(), $(in[1]))")
+end
+
+"""
+    get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
+
+Generate code evaluating [`ComputeTaskSum`](@ref) on `inSyms`, providing the output on `outSym`.
+`inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`].
+"""
+function get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector, outExpr)
+    in = eval.(inExprs)
+    out = eval(outExpr)
+
+    return Meta.parse("$out = compute(ComputeTaskSum(), [$(unroll_symbol_vector(in))])")
+end
--- a/src/models/abc/create.jl
+++ b/src/models/abc/create.jl
@@ -0,0 +1,198 @@
+using QEDbase
+using Random
+using Roots
+using ForwardDiff
+
+ComputeTaskSum() = ComputeTaskSum(0)
+
+"""
+    gen_process_input(processDescription::ABCProcessDescription)
+
+Return a ProcessInput of randomly generated [`ABCParticle`](@ref)s from a [`ABCProcessDescription`](@ref). The process description can be created manually or parsed from a string using [`parse_process`](@ref).
+
+Note: This uses RAMBO to create a valid process with conservation of momentum and energy.
+"""
+function gen_process_input(processDescription::ABCProcessDescription)
+    inParticleTypes = keys(processDescription.inParticles)
+    outParticleTypes = keys(processDescription.outParticles)
+
+    massSum = 0
+    inputMasses = Vector{Float64}()
+    for (particle, n) in processDescription.inParticles
+        for _ in 1:n
+            massSum += mass(particle)
+            push!(inputMasses, mass(particle))
+        end
+    end
+    outputMasses = Vector{Float64}()
+    for (particle, n) in processDescription.outParticles
+        for _ in 1:n
+            massSum += mass(particle)
+            push!(outputMasses, mass(particle))
+        end
+    end
+
+    # add some extra random mass to allow for some momentum
+    massSum += rand(rng[threadid()]) * (length(inputMasses) + length(outputMasses))
+
+
+    inputParticles = Vector{ABCParticle}()
+    initialMomenta = generate_initial_moms(massSum, inputMasses)
+    index = 1
+    for (particle, n) in processDescription.inParticles
+        for _ in 1:n
+            mom = initialMomenta[index]
+            push!(inputParticles, particle(mom))
+            index += 1
+        end
+    end
+
+    outputParticles = Vector{ABCParticle}()
+    final_momenta = generate_physical_massive_moms(rng[threadid()], massSum, outputMasses)
+    index = 1
+    for (particle, n) in processDescription.outParticles
+        for _ in 1:n
+            mom = final_momenta[index]
+            push!(outputParticles, particle(SFourMomentum(-mom.E, mom.px, mom.py, mom.pz)))
+            index += 1
+        end
+    end
+
+    processInput = ABCProcessInput(processDescription, inputParticles, outputParticles)
+
+    return return processInput
+end
+
+####################
+# CODE FROM HERE BORROWED FROM SOURCE: https://codebase.helmholtz.cloud/qedsandbox/QEDphasespaces.jl/
+# use qedphasespaces directly once released
+#
+# quick and dirty implementation of the RAMBO algorithm
+#
+# reference: 
+# * https://cds.cern.ch/record/164736/files/198601282.pdf
+# * https://www.sciencedirect.com/science/article/pii/0010465586901190
+####################
+
+function generate_initial_moms(ss, masses)
+    E1 = (ss^2 + masses[1]^2 - masses[2]^2) / (2 * ss)
+    E2 = (ss^2 + masses[2]^2 - masses[1]^2) / (2 * ss)
+
+    rho1 = sqrt(E1^2 - masses[1]^2)
+    rho2 = sqrt(E2^2 - masses[2]^2)
+
+    return [SFourMomentum(E1, 0, 0, rho1), SFourMomentum(E2, 0, 0, -rho2)]
+end
+
+
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{SFourMomentum}) = SFourMomentum(rand(rng, 4))
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{NTuple{N, Float64}}) where {N} = Tuple(rand(rng, N))
+
+
+function _transform_uni_to_mom(u1, u2, u3, u4)
+    cth = 2 * u1 - 1
+    sth = sqrt(1 - cth^2)
+    phi = 2 * pi * u2
+    q0 = -log(u3 * u4)
+    qx = q0 * sth * cos(phi)
+    qy = q0 * sth * sin(phi)
+    qz = q0 * cth
+
+    return SFourMomentum(q0, qx, qy, qz)
+end
+
+function _transform_uni_to_mom!(uni_mom, dest)
+    u1, u2, u3, u4 = Tuple(uni_mom)
+    cth = 2 * u1 - 1
+    sth = sqrt(1 - cth^2)
+    phi = 2 * pi * u2
+    q0 = -log(u3 * u4)
+    qx = q0 * sth * cos(phi)
+    qy = q0 * sth * sin(phi)
+    qz = q0 * cth
+
+    return dest = SFourMomentum(q0, qx, qy, qz)
+end
+
+_transform_uni_to_mom(u1234::Tuple) = _transform_uni_to_mom(u1234...)
+_transform_uni_to_mom(u1234::SFourMomentum) = _transform_uni_to_mom(Tuple(u1234))
+
+function generate_massless_moms(rng, n::Int)
+    a = Vector{SFourMomentum}(undef, n)
+    rand!(rng, a)
+    return map(_transform_uni_to_mom, a)
+end
+
+function generate_physical_massless_moms(rng, ss, n)
+    r_moms = generate_massless_moms(rng, n)
+    Q = sum(r_moms)
+    M = sqrt(Q * Q)
+    fac = -1 / M
+    Qx = getX(Q)
+    Qy = getY(Q)
+    Qz = getZ(Q)
+    bx = fac * Qx
+    by = fac * Qy
+    bz = fac * Qz
+    gamma = getT(Q) / M
+    a = 1 / (1 + gamma)
+    x = ss / M
+
+    i = 1
+    while i <= n
+        mom = r_moms[i]
+        mom0 = getT(mom)
+        mom1 = getX(mom)
+        mom2 = getY(mom)
+        mom3 = getZ(mom)
+
+        bq = bx * mom1 + by * mom2 + bz * mom3
+
+        p0 = x * (gamma * mom0 + bq)
+        px = x * (mom1 + bx * mom0 + a * bq * bx)
+        py = x * (mom2 + by * mom0 + a * bq * by)
+        pz = x * (mom3 + bz * mom0 + a * bq * bz)
+
+        r_moms[i] = SFourMomentum(p0, px, py, pz)
+        i += 1
+    end
+    return r_moms
+end
+
+function _to_be_solved(xi, masses, p0s, ss)
+    sum = 0.0
+    for (i, E) in enumerate(p0s)
+        sum += sqrt(masses[i]^2 + xi^2 * E^2)
+    end
+    return sum - ss
+end
+
+function _build_massive_momenta(xi, masses, massless_moms)
+    vec = SFourMomentum[]
+    i = 1
+    while i <= length(massless_moms)
+        massless_mom = massless_moms[i]
+        k0 = sqrt(getT(massless_mom)^2 * xi^2 + masses[i]^2)
+
+        kx = xi * getX(massless_mom)
+        ky = xi * getY(massless_mom)
+        kz = xi * getZ(massless_mom)
+
+        push!(vec, SFourMomentum(k0, kx, ky, kz))
+
+        i += 1
+    end
+    return vec
+end
+
+first_derivative(func) = x -> ForwardDiff.derivative(func, float(x))
+
+
+function generate_physical_massive_moms(rng, ss, masses; x0 = 0.1)
+    n = length(masses)
+    massless_moms = generate_physical_massless_moms(rng, ss, n)
+    energies = getT.(massless_moms)
+    f = x -> _to_be_solved(x, masses, energies, ss)
+    xi = find_zero((f, first_derivative(f)), x0, Roots.Newton())
+    return _build_massive_momenta(xi, masses, massless_moms)
+end
--- a/src/models/abc/parse.jl
+++ b/src/models/abc/parse.jl
@@ -0,0 +1,248 @@
+# functions for importing DAGs from a file
+regex_a = r"^[A-C]\d+$"                     # Regex for the initial particles
+regex_c = r"^[A-C]\(([^']*),([^']*)\)$"     # Regex for the combinations of 2 particles
+regex_m = r"^M\(([^']*),([^']*),([^']*)\)$" # Regex for the combinations of 3 particles
+regex_plus = r"^\+$"                        # Regex for the sum
+
+const PARTICLE_VALUE_SIZE::Int = 48
+const FLOAT_SIZE::Int = 8
+
+"""
+    parse_nodes(input::AbstractString)
+
+Parse the given string into a vector of strings containing each node.
+"""
+function parse_nodes(input::AbstractString)
+    regex = r"'([^']*)'"
+    matches = eachmatch(regex, input)
+    output = [match.captures[1] for match in matches]
+    return output
+end
+
+"""
+    parse_edges(input::AbstractString)
+
+Parse the given string into a vector of strings containing each edge. Currently unused since the entire graph can be read from just the node names.
+"""
+function parse_edges(input::AbstractString)
+    regex = r"\('([^']*)', '([^']*)'\)"
+    matches = eachmatch(regex, input)
+    output = [(match.captures[1], match.captures[2]) for match in matches]
+    return output
+end
+
+"""
+    parse_dag(filename::String, model::ABCModel; verbose::Bool = false)
+
+Read an abc-model process from the given file. If `verbose` is set to true, print some progress information to stdout.
+
+Returns a valid [`DAG`](@ref).
+"""
+function parse_dag(filename::AbstractString, model::ABCModel, verbose::Bool = false)
+    file = open(filename, "r")
+
+    if (verbose)
+        println("Opened file")
+    end
+    nodes_string = readline(file)
+    nodes = parse_nodes(nodes_string)
+
+    close(file)
+    if (verbose)
+        println("Read file")
+    end
+
+    graph = DAG()
+
+    # estimate total number of nodes
+    # try to slightly overestimate so no resizing is necessary
+    # data nodes are not included in length(nodes) and there are a few more than compute nodes
+    estimate_no_nodes = round(Int, length(nodes) * 4)
+    if (verbose)
+        println("Estimating ", estimate_no_nodes, " Nodes")
+    end
+    sizehint!(graph.nodes, estimate_no_nodes)
+
+    sum_node = insert_node!(graph, make_node(ComputeTaskSum(0)), track = false, invalidate_cache = false)
+    global_data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false)
+    insert_edge!(graph, sum_node, global_data_out, track = false, invalidate_cache = false)
+
+    # remember the data out nodes for connection
+    dataOutNodes = Dict()
+
+    if (verbose)
+        println("Building graph")
+    end
+    noNodes = 0
+    nodesToRead = length(nodes)
+    while !isempty(nodes)
+        node = popfirst!(nodes)
+        noNodes += 1
+        if (noNodes % 100 == 0)
+            if (verbose)
+                percent = string(round(100.0 * noNodes / nodesToRead, digits = 2), "%")
+                print("\rReading Nodes... $percent")
+            end
+        end
+        if occursin(regex_a, node)
+            # add nodes and edges for the state reading to u(P(Particle))
+            data_in = insert_node!(
+                graph,
+                make_node(DataTask(PARTICLE_VALUE_SIZE), string(node)),
+                track = false,
+                invalidate_cache = false,
+            ) # read particle data node
+            compute_P = insert_node!(graph, make_node(ComputeTaskP()), track = false, invalidate_cache = false) # compute P node
+            data_Pu =
+                insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data from P to u (one ParticleValue object)
+            compute_u = insert_node!(graph, make_node(ComputeTaskU()), track = false, invalidate_cache = false) # compute U node
+            data_out =
+                insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data out from u (one ParticleValue object)
+
+            insert_edge!(graph, data_in, compute_P, track = false, invalidate_cache = false)
+            insert_edge!(graph, compute_P, data_Pu, track = false, invalidate_cache = false)
+            insert_edge!(graph, data_Pu, compute_u, track = false, invalidate_cache = false)
+            insert_edge!(graph, compute_u, data_out, track = false, invalidate_cache = false)
+
+            # remember the data_out node for future edges
+            dataOutNodes[node] = data_out
+        elseif occursin(regex_c, node)
+            capt = match(regex_c, node)
+
+            in1 = capt.captures[1]
+            in2 = capt.captures[2]
+
+            compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
+            data_out =
+                insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
+
+            if (occursin(regex_c, in1))
+                # put an S node after this input
+                compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
+                data_S_v = insert_node!(
+                    graph,
+                    make_node(DataTask(PARTICLE_VALUE_SIZE)),
+                    track = false,
+                    invalidate_cache = false,
+                )
+
+                insert_edge!(graph, dataOutNodes[in1], compute_S, track = false, invalidate_cache = false)
+                insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
+
+                insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
+            else
+                insert_edge!(graph, dataOutNodes[in1], compute_v, track = false, invalidate_cache = false)
+            end
+
+            if (occursin(regex_c, in2))
+                # i think the current generator only puts the combined particles in the first space, so this case might never be entered
+                # put an S node after this input
+                compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
+                data_S_v = insert_node!(
+                    graph,
+                    make_node(DataTask(PARTICLE_VALUE_SIZE)),
+                    track = false,
+                    invalidate_cache = false,
+                )
+
+                insert_edge!(graph, dataOutNodes[in2], compute_S, track = false, invalidate_cache = false)
+                insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
+
+                insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
+            else
+                insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
+            end
+
+            insert_edge!(graph, compute_v, data_out, track = false, invalidate_cache = false)
+            dataOutNodes[node] = data_out
+
+        elseif occursin(regex_m, node)
+            # assume for now that only the first particle of the three is combined and the other two are "original" ones
+            capt = match(regex_m, node)
+            in1 = capt.captures[1]
+            in2 = capt.captures[2]
+            in3 = capt.captures[3]
+
+            # in2 + in3 with a v
+            compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
+            data_v =
+                insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
+
+            insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
+            insert_edge!(graph, dataOutNodes[in3], compute_v, track = false, invalidate_cache = false)
+            insert_edge!(graph, compute_v, data_v, track = false, invalidate_cache = false)
+
+            # combine with the v of the combined other input
+            compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), track = false, invalidate_cache = false)
+            data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false) # output of a S2 task is only a float
+
+            insert_edge!(graph, data_v, compute_S2, track = false, invalidate_cache = false)
+            insert_edge!(graph, dataOutNodes[in1], compute_S2, track = false, invalidate_cache = false)
+            insert_edge!(graph, compute_S2, data_out, track = false, invalidate_cache = false)
+
+            insert_edge!(graph, data_out, sum_node, track = false, invalidate_cache = false)
+            add_child!(sum_node.task)
+        elseif occursin(regex_plus, node)
+            if (verbose)
+                println("\rReading Nodes Complete    ")
+                println("Added ", length(graph.nodes), " nodes")
+            end
+        else
+            @assert false ("Unknown node '$node' while reading from file $filename")
+        end
+    end
+
+    #put all nodes into dirty nodes set
+    graph.dirtyNodes = copy(graph.nodes)
+
+    if (verbose)
+        println("Generating the graph's properties")
+    end
+    graph.properties = GraphProperties(graph)
+
+    if (verbose)
+        println("Done")
+    end
+
+    # don't actually need to read the edges
+    return graph
+end
+
+"""
+    parse_process(string::AbstractString, model::ABCModel)
+
+Parse a string representation of a process, such as "AB->ABBB" into the corresponding [`ABCProcessDescription`](@ref).
+"""
+function parse_process(str::AbstractString, model::ABCModel)
+    inParticles = Dict{Type, Int}()
+    outParticles = Dict{Type, Int}()
+
+    if !(contains(str, "->"))
+        throw("Did not find -> while parsing process \"$str\"")
+    end
+
+    (inStr, outStr) = split(str, "->")
+
+    if (isempty(inStr) || isempty(outStr))
+        throw("Process (\"$str\") input or output part is empty!")
+    end
+
+    for t in types(model)
+        inCount = count(x -> x == String(t)[1], inStr)
+        outCount = count(x -> x == String(t)[1], outStr)
+        if inCount != 0
+            inParticles[t] = inCount
+        end
+        if outCount != 0
+            outParticles[t] = outCount
+        end
+    end
+
+    if length(inStr) != sum(values(inParticles))
+        throw("Encountered unknown characters in the input part of process \"$str\"")
+    elseif length(outStr) != sum(values(outParticles))
+        throw("Encountered unknown characters in the output part of process \"$str\"")
+    end
+
+    return ABCProcessDescription(inParticles, outParticles)
+end
--- a/src/models/abc/particle.jl
+++ b/src/models/abc/particle.jl
@@ -0,0 +1,210 @@
+using QEDbase
+
+import QEDbase.mass
+
+"""
+    ABCModel <: AbstractPhysicsModel
+
+Singleton definition for identification of the ABC-Model.
+"""
+struct ABCModel <: AbstractPhysicsModel end
+
+"""
+    ABCParticle
+
+Base type for all particles in the [`ABCModel`](@ref).
+"""
+abstract type ABCParticle <: AbstractParticle end
+
+"""
+    ParticleA <: ABCParticle
+
+An 'A' particle in the ABC Model.
+"""
+struct ParticleA <: ABCParticle
+    momentum::SFourMomentum
+end
+
+"""
+    ParticleB <: ABCParticle
+
+A 'B' particle in the ABC Model.
+"""
+struct ParticleB <: ABCParticle
+    momentum::SFourMomentum
+end
+
+"""
+    ParticleC <: ABCParticle
+
+A 'C' particle in the ABC Model.
+"""
+struct ParticleC <: ABCParticle
+    momentum::SFourMomentum
+end
+
+"""
+    ABCProcessDescription <: AbstractProcessDescription
+
+A description of a process in the ABC-Model. Contains the input and output particles.
+
+See also: [`in_particles`](@ref), [`out_particles`](@ref), [`parse_process`](@ref)
+"""
+struct ABCProcessDescription <: AbstractProcessDescription
+    inParticles::Dict{Type, Int}
+    outParticles::Dict{Type, Int}
+end
+
+"""
+    ABCProcessInput <: AbstractProcessInput
+
+Input for a ABC Process. Contains the [`ABCProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles.
+
+See also: [`gen_process_input`](@ref)
+"""
+struct ABCProcessInput <: AbstractProcessInput
+    process::ABCProcessDescription
+    inParticles::Vector{ABCParticle}
+    outParticles::Vector{ABCParticle}
+end
+
+"""
+    PARTICLE_MASSES
+
+A constant dictionary containing the masses of the different [`ABCParticle`](@ref)s.
+"""
+const PARTICLE_MASSES = Dict{Type, Float64}(ParticleA => 1.0, ParticleB => 1.0, ParticleC => 0.0)
+
+"""
+    mass(t::Type{T}) where {T <: ABCParticle}
+    
+Return the mass (at rest) of the given particle type.
+"""
+mass(t::Type{T}) where {T <: ABCParticle} = PARTICLE_MASSES[t]
+
+"""
+    interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
+
+For 2 given (non-equal) particle types, return the third of ABC.
+"""
+function interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
+    @assert t1 != t2
+    if t1 != ParticleA && t2 != ParticleA
+        return ParticleA
+    elseif t1 != ParticleB && t2 != ParticleB
+        return ParticleB
+    else
+        return ParticleC
+    end
+end
+
+"""
+    types(::ABCModel)
+
+Return a Vector of the possible types of particle in the [`ABCModel`](@ref).
+"""
+function types(::ABCModel)
+    return [ParticleA, ParticleB, ParticleC]
+end
+
+"""
+    square(p::ABCParticle)
+
+Return the square of the particle's momentum as a `Float` value.
+
+Takes 7 effective FLOP.
+"""
+function square(p::ABCParticle)
+    return getMass2(p.momentum)
+end
+
+"""
+    inner_edge(p::ABCParticle)
+
+Return the factor of the inner edge with the given (virtual) particle.
+
+Takes 10 effective FLOP. (3 here + 7 in square(p))
+"""
+function inner_edge(p::ABCParticle)
+    return 1.0 / (square(p) - mass(typeof(p)) * mass(typeof(p)))
+end
+
+"""
+    outer_edge(p::ABCParticle)
+
+Return the factor of the outer edge with the given (real) particle.
+
+Takes 0 effective FLOP.
+"""
+function outer_edge(p::ABCParticle)
+    return 1.0
+end
+
+"""
+    vertex()
+
+Return the factor of a vertex.
+
+Takes 0 effective FLOP since it's constant.
+"""
+function vertex()
+    i = 1.0
+    lambda = 1.0 / 137.0
+    return i * lambda
+end
+
+"""
+    preserve_momentum(p1::ABCParticle, p2::ABCParticle)
+
+Calculate and return a new particle from two given interacting ones at a vertex.
+
+Takes 4 effective FLOP.
+"""
+function preserve_momentum(p1::ABCParticle, p2::ABCParticle)
+    t3 = interaction_result(typeof(p1), typeof(p2))
+    p3 = t3(p1.momentum + p2.momentum)
+    return p3
+end
+
+"""
+    type_from_name(name::String)
+
+For a name of a particle, return the particle's [`Type`].
+"""
+function type_from_name(name::String)
+    if startswith(name, "A")
+        return ParticleA
+    elseif startswith(name, "B")
+        return ParticleB
+    elseif startswith(name, "C")
+        return ParticleC
+    else
+        throw("Invalid name for a particle in the ABC model")
+    end
+end
+
+function String(::Type{ParticleA})
+    return "A"
+end
+function String(::Type{ParticleB})
+    return "B"
+end
+function String(::Type{ParticleC})
+    return "C"
+end
+
+function in_particles(process::ABCProcessDescription)
+    return process.inParticles
+end
+
+function in_particles(input::ABCProcessInput)
+    return input.inParticles
+end
+
+function out_particles(process::ABCProcessDescription)
+    return process.outParticles
+end
+
+function out_particles(input::ABCProcessInput)
+    return input.outParticles
+end
--- a/src/models/abc/print.jl
+++ b/src/models/abc/print.jl
@@ -0,0 +1,58 @@
+
+"""
+    show(io::IO, process::ABCProcessDescription)
+
+Pretty print an [`ABCProcessDescription`](@ref) (no newlines).
+
+```jldoctest
+julia> using MetagraphOptimization
+
+julia> print(parse_process("AB->ABBB", ABCModel()))
+ABC Process: 'AB->ABBB'
+```
+"""
+function show(io::IO, process::ABCProcessDescription)
+    # types() gives the types in order (ABC) instead of random like keys() would
+    print(io, "ABC Process: \'")
+    for type in types(ABCModel())
+        for _ in 1:get(process.inParticles, type, 0)
+            print(io, String(type))
+        end
+    end
+    print(io, "->")
+    for type in types(ABCModel())
+        for _ in 1:get(process.outParticles, type, 0)
+            print(io, String(type))
+        end
+    end
+    print(io, "'")
+    return nothing
+end
+
+"""
+    show(io::IO, processInput::ABCProcessInput)
+
+Pretty print an [`ABCProcessInput`](@ref) (with newlines).
+"""
+function show(io::IO, processInput::ABCProcessInput)
+    println(io, "Input for $(processInput.process):")
+    println(io, "  $(length(processInput.inParticles)) Incoming particles:")
+    for particle in processInput.inParticles
+        println(io, "    $particle")
+    end
+    println(io, "  $(length(processInput.outParticles)) Outgoing Particles:")
+    for particle in processInput.outParticles
+        println(io, "    $particle")
+    end
+    return nothing
+end
+
+"""
+    show(io::IO, particle::T) where {T <: ABCParticle}
+
+Pretty print an [`ABCParticle`](@ref) (no newlines).
+"""
+function show(io::IO, particle::T) where {T <: ABCParticle}
+    print(io, "$(String(typeof(particle))): $(particle.momentum)")
+    return nothing
+end
--- a/src/models/abc/properties.jl
+++ b/src/models/abc/properties.jl
@@ -0,0 +1,166 @@
+"""
+    compute_effort(t::ComputeTaskS1)
+
+Return the compute effort of an S1 task.
+"""
+compute_effort(t::ComputeTaskS1) = 11.0
+
+"""
+    compute_effort(t::ComputeTaskS2)
+
+Return the compute effort of an S2 task.
+"""
+compute_effort(t::ComputeTaskS2) = 12.0
+
+"""
+    compute_effort(t::ComputeTaskU)
+
+Return the compute effort of a U task.
+"""
+compute_effort(t::ComputeTaskU) = 1.0
+
+"""
+    compute_effort(t::ComputeTaskV)
+
+Return the compute effort of a V task.
+"""
+compute_effort(t::ComputeTaskV) = 6.0
+
+"""
+    compute_effort(t::ComputeTaskP)
+
+Return the compute effort of a P task.
+"""
+compute_effort(t::ComputeTaskP) = 0.0
+
+"""
+    compute_effort(t::ComputeTaskSum)
+
+Return the compute effort of a Sum task. 
+
+Note: This is a constant compute effort, even though sum scales with the number of its inputs. Since there is only ever a single sum node in a graph generated from the ABC-Model,
+this doesn't matter.
+"""
+compute_effort(t::ComputeTaskSum) = 1.0
+
+"""
+    show(io::IO, t::DataTask)
+
+Print the data task to io.
+"""
+function show(io::IO, t::DataTask)
+    return print(io, "Data", t.data)
+end
+
+"""
+    show(io::IO, t::ComputeTaskS1)
+
+Print the S1 task to io.
+"""
+show(io::IO, t::ComputeTaskS1) = print(io, "ComputeS1")
+
+"""
+    show(io::IO, t::ComputeTaskS2)
+
+Print the S2 task to io.
+"""
+show(io::IO, t::ComputeTaskS2) = print(io, "ComputeS2")
+
+"""
+    show(io::IO, t::ComputeTaskP)
+
+Print the P task to io.
+"""
+show(io::IO, t::ComputeTaskP) = print(io, "ComputeP")
+
+"""
+    show(io::IO, t::ComputeTaskU)
+
+Print the U task to io.
+"""
+show(io::IO, t::ComputeTaskU) = print(io, "ComputeU")
+
+"""
+    show(io::IO, t::ComputeTaskV)
+
+Print the V task to io.
+"""
+show(io::IO, t::ComputeTaskV) = print(io, "ComputeV")
+
+"""
+    show(io::IO, t::ComputeTaskSum)
+
+Print the sum task to io.
+"""
+show(io::IO, t::ComputeTaskSum) = print(io, "ComputeSum")
+
+"""
+    copy(t::DataTask)
+
+Copy the data task and return it.
+"""
+copy(t::DataTask) = DataTask(t.data)
+
+"""
+    children(::DataTask)
+
+Return the number of children of a data task (always 1).
+"""
+children(::DataTask) = 1
+
+"""
+    children(::ComputeTaskS1)
+
+Return the number of children of a ComputeTaskS1 (always 1).
+"""
+children(::ComputeTaskS1) = 1
+
+"""
+    children(::ComputeTaskS2)
+
+Return the number of children of a ComputeTaskS2 (always 2).
+"""
+children(::ComputeTaskS2) = 2
+
+"""
+    children(::ComputeTaskP)
+
+Return the number of children of a ComputeTaskP (always 1).
+"""
+children(::ComputeTaskP) = 1
+
+"""
+    children(::ComputeTaskU)
+
+Return the number of children of a ComputeTaskU (always 1).
+"""
+children(::ComputeTaskU) = 1
+
+"""
+    children(::ComputeTaskV)
+
+Return the number of children of a ComputeTaskV (always 2).
+"""
+children(::ComputeTaskV) = 2
+
+
+"""
+    children(::ComputeTaskSum)
+
+Return the number of children of a ComputeTaskSum.
+"""
+children(t::ComputeTaskSum) = t.children_number
+
+"""
+    children(t::FusedComputeTask)
+
+Return the number of children of a FusedComputeTask.
+"""
+function children(t::FusedComputeTask)
+    return length(union(Set(t.t1_inputs), Set(t.t2_inputs)))
+end
+
+function add_child!(t::ComputeTaskSum)
+    t.children_number += 1
+    return nothing
+end
--- a/src/models/abc/types.jl
+++ b/src/models/abc/types.jl
@@ -0,0 +1,59 @@
+"""
+    DataTask <: AbstractDataTask
+
+Task representing a specific data transfer in the ABC Model.
+"""
+struct DataTask <: AbstractDataTask
+    data::Float64
+end
+
+"""
+    ComputeTaskS1 <: AbstractComputeTask
+
+S task with a single child.
+"""
+struct ComputeTaskS1 <: AbstractComputeTask end
+
+"""
+    ComputeTaskS2 <: AbstractComputeTask
+
+S task with two children.
+"""
+struct ComputeTaskS2 <: AbstractComputeTask end
+
+"""
+    ComputeTaskP <: AbstractComputeTask
+
+P task with no children.
+"""
+struct ComputeTaskP <: AbstractComputeTask end
+
+"""
+    ComputeTaskV <: AbstractComputeTask
+
+v task with two children.
+"""
+struct ComputeTaskV <: AbstractComputeTask end
+
+"""
+    ComputeTaskU <: AbstractComputeTask
+
+u task with a single child.
+"""
+struct ComputeTaskU <: AbstractComputeTask end
+
+"""
+    ComputeTaskSum <: AbstractComputeTask
+
+Task that sums all its inputs, n children.
+"""
+mutable struct ComputeTaskSum <: AbstractComputeTask
+    children_number::Int
+end
+
+"""
+    ABC_TASKS
+
+Constant vector of all tasks of the ABC-Model.
+"""
+ABC_TASKS = [DataTask, ComputeTaskS1, ComputeTaskS2, ComputeTaskP, ComputeTaskV, ComputeTaskU, ComputeTaskSum]
--- a/src/models/interface.jl
+++ b/src/models/interface.jl
@@ -0,0 +1,109 @@
+
+"""
+    AbstractPhysicsModel
+
+Base type for a model, e.g. ABC-Model or QED. This is used to dispatch many functions.
+"""
+abstract type AbstractPhysicsModel end
+
+"""
+    AbstractParticle
+
+Base type for particles belonging to a certain [`AbstractPhysicsModel`](@ref).
+"""
+abstract type AbstractParticle end
+
+"""
+    ParticleValue{ParticleType <: AbstractParticle}
+
+A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
+
+`sizeof(ParticleValue())` = 48 Byte
+"""
+struct ParticleValue{ParticleType <: AbstractParticle}
+    p::ParticleType
+    v::Float64
+end
+
+"""
+    AbstractProcessDescription
+
+Base type for process descriptions. An object of this type of a corresponding [`AbstractPhysicsModel`](@ref) should uniquely identify a process in that model.
+
+See also: [`parse_process`](@ref)
+"""
+abstract type AbstractProcessDescription end
+
+"""
+    AbstractProcessInput
+
+Base type for process inputs. An object of this type contains the input values (e.g. momenta) of the particles in a process.
+
+See also: [`gen_process_input`](@ref)
+"""
+abstract type AbstractProcessInput end
+
+"""
+    mass(t::Type{T}) where {T <: AbstractParticle}
+
+Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the particles mass at rest.
+"""
+function mass end
+
+"""
+    interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: AbstractParticle, T2 <: AbstractParticle}
+
+Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the result particle type when the two given particles interact.
+"""
+function interaction_result end
+
+"""
+    types(::AbstractPhysicsModel)
+
+Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref), returning a `Vector` of the available particle types in the model.
+"""
+function types end
+
+"""
+    in_particles(::AbstractProcessDescription)
+
+Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
+Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of incoming particles for the process per particle type.
+
+
+    in_particles(::AbstractProcessInput)
+
+Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
+Returns a `<: Vector{AbstractParticle}` object with the values of all incoming particles for the corresponding `ProcessDescription`.
+"""
+function in_particles end
+
+"""
+    out_particles(::AbstractProcessDescription)
+
+Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
+Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of outgoing particles for the process per particle type.
+
+
+    out_particles(::AbstractProcessInput)
+
+Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
+Returns a `<: Vector{AbstractParticle}` object with the values of all outgoing particles for the corresponding `ProcessDescription`.
+"""
+function out_particles end
+
+"""
+    parse_process(::AbstractString, ::AbstractPhysicsModel)
+
+Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref).
+Returns a `ProcessDescription` object.
+"""
+function parse_process end
+
+"""
+    gen_process_input(::AbstractProcessDescription)
+
+Interface function that must be implemented for every specific [`AbstractProcessDescription`](@ref).
+Returns a randomly generated and valid corresponding `ProcessInput`.
+"""
+function gen_process_input end
--- a/src/models/print.jl
+++ b/src/models/print.jl
@@ -0,0 +1,10 @@
+
+"""
+    show(io::IO, particleValue::ParticleValue)
+
+Pretty print a [`ParticleValue`](@ref), no newlines.
+"""
+function show(io::IO, particleValue::ParticleValue)
+    print(io, "($(particleValue.p), value: $(particleValue.v))")
+    return nothing
+end
--- a/src/node/compare.jl
+++ b/src/node/compare.jl
@@ -0,0 +1,35 @@
+"""
+    ==(e1::Edge, e2::Edge)
+
+Equality comparison between two edges.
+"""
+function ==(e1::Edge, e2::Edge)
+    return e1.edge[1] == e2.edge[1] && e1.edge[2] == e2.edge[2]
+end
+
+"""
+    ==(n1::Node, n2::Node)
+
+Fallback equality comparison between two nodes. For equal node types, the more specific versions of this function will be called.
+"""
+function ==(n1::Node, n2::Node)
+    return false
+end
+
+"""
+    ==(n1::ComputeTaskNode, n2::ComputeTaskNode)
+
+Equality comparison between two [`ComputeTaskNode`](@ref)s.
+"""
+function ==(n1::ComputeTaskNode, n2::ComputeTaskNode)
+    return n1.id == n2.id
+end
+
+"""
+    ==(n1::DataTaskNode, n2::DataTaskNode)
+
+Equality comparison between two [`DataTaskNode`](@ref)s.
+"""
+function ==(n1::DataTaskNode, n2::DataTaskNode)
+    return n1.id == n2.id
+end
--- a/src/node/create.jl
+++ b/src/node/create.jl
@@ -0,0 +1,71 @@
+
+DataTaskNode(t::AbstractDataTask, name = "") =
+    DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, missing, name)
+ComputeTaskNode(t::AbstractComputeTask) = ComputeTaskNode(
+    t,                              # task
+    Vector{Node}(),                 # parents
+    Vector{Node}(),                 # children
+    UUIDs.uuid1(rng[threadid()]),   # id
+    missing,                        # node reduction
+    missing,                        # node split
+    Vector{NodeFusion}(),           # node fusions
+    missing,                        # device
+)
+
+copy(m::Missing) = missing
+copy(n::ComputeTaskNode) = ComputeTaskNode(copy(n.task))
+copy(n::DataTaskNode) = DataTaskNode(copy(n.task), n.name)
+
+"""
+    make_node(t::AbstractTask)
+
+Fallback implementation of `make_node` for an [`AbstractTask`](@ref), throwing an error.
+"""
+function make_node(t::AbstractTask)
+    return error("Cannot make a node from this task type")
+end
+
+"""
+    make_node(t::AbstractDataTask)
+
+Construct and return a new [`DataTaskNode`](@ref) with the given task.
+"""
+function make_node(t::AbstractDataTask, name::String = "")
+    return DataTaskNode(t, name)
+end
+
+"""
+    make_node(t::AbstractComputeTask)
+
+Construct and return a new [`ComputeTaskNode`](@ref) with the given task.
+"""
+function make_node(t::AbstractComputeTask)
+    return ComputeTaskNode(t)
+end
+
+"""
+    make_edge(n1::Node, n2::Node)
+
+Fallback implementation of `make_edge` throwing an error. If you got this error it likely means you tried to construct an edge between two nodes of the same type.
+"""
+function make_edge(n1::Node, n2::Node)
+    return error("Can only create edges from compute to data node or reverse")
+end
+
+"""
+    make_edge(n1::ComputeTaskNode, n2::DataTaskNode)
+
+Construct and return a new [`Edge`](@ref) pointing from `n1` (child) to `n2` (parent).
+"""
+function make_edge(n1::ComputeTaskNode, n2::DataTaskNode)
+    return Edge((n1, n2))
+end
+
+"""
+    make_edge(n1::DataTaskNode, n2::ComputeTaskNode)
+
+Construct and return a new [`Edge`](@ref) pointing from `n1` (child) to `n2` (parent).
+"""
+function make_edge(n1::DataTaskNode, n2::ComputeTaskNode)
+    return Edge((n1, n2))
+end
--- a/src/node/print.jl
+++ b/src/node/print.jl
@@ -0,0 +1,27 @@
+"""
+    show(io::IO, n::Node)
+
+Print a short string representation of the node to io.
+"""
+function show(io::IO, n::Node)
+    return print(io, "Node(", n.task, ")")
+end
+
+"""
+    show(io::IO, e::Edge)
+
+Print a short string representation of the edge to io.
+"""
+function show(io::IO, e::Edge)
+    return print(io, "Edge(", e.edge[1], ", ", e.edge[2], ")")
+end
+
+"""
+    to_var_name(id::UUID)
+
+Return the uuid as a string usable as a variable name in code generation.
+"""
+function to_var_name(id::UUID)
+    str = "_" * replace(string(id), "-" => "_")
+    return str
+end
--- a/src/node/properties.jl
+++ b/src/node/properties.jl
@@ -0,0 +1,115 @@
+"""
+    is_entry_node(node::Node)
+
+Return whether this node is an entry node in its graph, i.e., it has no children.
+"""
+is_entry_node(node::Node) = length(node.children) == 0
+
+"""
+    is_exit_node(node::Node)
+
+Return whether this node is an exit node of its graph, i.e., it has no parents.
+"""
+is_exit_node(node::Node) = length(node.parents) == 0
+
+"""
+    data(edge::Edge)
+
+Return the data transfered by this edge, i.e., 0 if the child is a [`ComputeTaskNode`](@ref), otherwise the child's `data()`.
+"""
+function data(edge::Edge)
+    if typeof(edge.edge[1]) <: DataTaskNode
+        return data(edge.edge[1].task)
+    end
+    return 0.0
+end
+
+"""
+    children(node::Node)
+
+Return a copy of the node's children so it can safely be muted without changing the node in the graph.
+
+A node's children are its prerequisite nodes, nodes that need to execute before the task of this node.
+"""
+function children(node::Node)
+    return copy(node.children)
+end
+
+"""
+    parents(node::Node)
+
+Return a copy of the node's parents so it can safely be muted without changing the node in the graph.
+
+A node's parents are its subsequent nodes, nodes that need this node to execute.
+"""
+function parents(node::Node)
+    return copy(node.parents)
+end
+
+"""
+    siblings(node::Node)
+
+Return a vector of all siblings of this node. 
+
+A node's siblings are all children of any of its parents. The result contains no duplicates and includes the node itself.
+"""
+function siblings(node::Node)
+    result = Set{Node}()
+    push!(result, node)
+    for parent in node.parents
+        union!(result, parent.children)
+    end
+
+    return result
+end
+
+"""
+    partners(node::Node)
+
+Return a vector of all partners of this node. 
+
+A node's partners are all parents of any of its children. The result contains no duplicates and includes the node itself.
+
+Note: This is very slow when there are multiple children with many parents. 
+This is less of a problem in [`siblings(node::Node)`](@ref) because (depending on the model) there are no nodes with a large number of children, or only a single one.
+"""
+function partners(node::Node)
+    result = Set{Node}()
+    push!(result, node)
+    for child in node.children
+        union!(result, child.parents)
+    end
+
+    return result
+end
+
+"""
+    partners(node::Node, set::Set{Node})
+
+Alternative version to [`partners(node::Node)`](@ref), avoiding allocation of a new set. Works on the given set and returns `nothing`.
+"""
+function partners(node::Node, set::Set{Node})
+    push!(set, node)
+    for child in node.children
+        union!(set, child.parents)
+    end
+    return nothing
+end
+
+"""
+    is_parent(potential_parent::Node, node::Node)
+
+Return whether the `potential_parent` is a parent of `node`.
+"""
+function is_parent(potential_parent::Node, node::Node)
+    return potential_parent in node.parents
+end
+
+"""
+    is_child(potential_child::Node, node::Node)
+
+Return whether the `potential_child` is a child of `node`.
+"""
+function is_child(potential_child::Node, node::Node)
+    return potential_child in node.children
+end
--- a/src/node/type.jl
+++ b/src/node/type.jl
@@ -0,0 +1,104 @@
+using Random
+using UUIDs
+using Base.Threads
+
+# TODO: reliably find out how many threads we're running with (nthreads() returns 1 when precompiling :/)
+rng = [Random.MersenneTwister(0) for _ in 1:32]
+
+"""
+    Node
+
+The abstract base type of every node.
+
+See [`DataTaskNode`](@ref), [`ComputeTaskNode`](@ref) and [`make_node`](@ref).
+"""
+abstract type Node end
+
+# declare this type here because it's needed
+# the specific operations are declared in graph.jl
+abstract type Operation end
+
+"""
+    DataTaskNode <: Node
+    
+Any node that transfers data and does no computation.
+
+# Fields
+`.task`:            The node's data task type. Usually [`DataTask`](@ref).\\
+`.parents`:         A vector of the node's parents (i.e. nodes that depend on this one).\\
+`.children`:        A vector of the node's children (i.e. nodes that this one depends on).\\
+`.id`:              The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
+`.nodeReduction`:   Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
+`.nodeSplit`:       Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
+`.nodeFusion`:      Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.\\
+`.name`:            The name of this node for entry nodes into the graph ([`is_entry_node`](@ref)) to reliably assign the inputs to the correct nodes when executing.\\
+"""
+mutable struct DataTaskNode <: Node
+    task::AbstractDataTask
+
+    # use vectors as sets have way too much memory overhead
+    parents::Vector{Node}
+    children::Vector{Node}
+
+    # need a unique identifier unique to every *constructed* node
+    # however, it can be copied when splitting a node
+    id::Base.UUID
+
+    # the NodeReduction involving this node, if it exists
+    # Can't use the NodeReduction type here because it's not yet defined
+    nodeReduction::Union{Operation, Missing}
+
+    # the NodeSplit involving this node, if it exists
+    nodeSplit::Union{Operation, Missing}
+
+    # the node fusion involving this node, if it exists
+    nodeFusion::Union{Operation, Missing}
+
+    # for input nodes we need a name for the node to distinguish between them
+    name::String
+end
+
+"""
+    ComputeTaskNode <: Node
+    
+Any node that computes a result from inputs using an [`AbstractComputeTask`](@ref).
+
+# Fields
+`.task`:            The node's compute task type. A concrete subtype of [`AbstractComputeTask`](@ref).\\
+`.parents`:         A vector of the node's parents (i.e. nodes that depend on this one).\\
+`.children`:        A vector of the node's children (i.e. nodes that this one depends on).\\
+`.id`:              The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
+`.nodeReduction`:   Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
+`.nodeSplit`:       Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
+`.nodeFusions`:     A vector of this node's [`NodeFusion`](@ref)s. For a `ComputeTaskNode` there can be any number of these, unlike the [`DataTaskNode`](@ref)s.\\
+`.device`:          The Device this node has been scheduled on by a [`Scheduler`](@ref).
+"""
+mutable struct ComputeTaskNode <: Node
+    task::AbstractComputeTask
+    parents::Vector{Node}
+    children::Vector{Node}
+    id::Base.UUID
+
+    nodeReduction::Union{Operation, Missing}
+    nodeSplit::Union{Operation, Missing}
+
+    # for ComputeTasks there can be multiple fusions, unlike the DataTasks
+    nodeFusions::Vector{Operation}
+
+    # the device this node is assigned to execute on
+    device::Union{AbstractDevice, Missing}
+end
+
+"""
+    Edge
+
+Type of an edge in the graph. Edges can only exist between a [`DataTaskNode`](@ref) and a [`ComputeTaskNode`](@ref) or vice versa, not between two of the same type of node.
+
+An edge always points from child to parent: `child = e.edge[1]` and `parent = e.edge[2]`.
+
+The child is the prerequisite node of the parent.
+"""
+struct Edge
+    # edge points from child to parent
+    edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}}
+end
--- a/src/node/validate.jl
+++ b/src/node/validate.jl
@@ -0,0 +1,76 @@
+"""
+    is_valid_node(graph::DAG, node::Node)
+
+Verify that a given node is valid in the graph. Call like `@test is_valid_node(g, n)`. Uses `@assert` to fail if something is invalid but also provide an error message.
+
+This function is very performance intensive and should only be used when testing or debugging.
+
+See also this function's specific versions for the concrete Node types [`is_valid(graph::DAG, node::ComputeTaskNode)`](@ref) and [`is_valid(graph::DAG, node::DataTaskNode)`](@ref).
+"""
+function is_valid_node(graph::DAG, node::Node)
+    @assert node in graph "Node is not part of the given graph!"
+
+    for parent in node.parents
+        @assert typeof(parent) != typeof(node) "Node's type is the same as its parent's!"
+        @assert parent in graph "Node's parent is not in the same graph!"
+        @assert node in parent.children "Node is not a child of its parent!"
+    end
+
+    for child in node.children
+        @assert typeof(child) != typeof(node) "Node's type is the same as its child's!"
+        @assert child in graph "Node's child is not in the same graph!"
+        @assert node in child.parents "Node is not a parent of its child!"
+    end
+
+    #=if !ismissing(node.nodeReduction)
+        @assert is_valid(graph, node.nodeReduction)
+    end
+    if !ismissing(node.nodeSplit)
+        @assert is_valid(graph, node.nodeSplit)
+    end=#
+
+    if !(typeof(node.task) <: FusedComputeTask)
+        # the remaining checks are only necessary for fused compute tasks
+        return true
+    end
+
+    # every child must be in some input of the task
+    for child in node.children
+        str = Symbol(to_var_name(child.id))
+        @assert (str in node.task.t1_inputs) || (str in node.task.t2_inputs) "$str was not in any of the tasks' inputs\nt1_inputs: $(node.task.t1_inputs)\nt2_inputs: $(node.task.t2_inputs)"
+    end
+
+    return true
+end
+
+"""
+    is_valid(graph::DAG, node::ComputeTaskNode)
+
+Verify that the given compute node is valid in the graph. Call with `@assert` or `@test` when testing or debugging.
+
+This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
+"""
+function is_valid(graph::DAG, node::ComputeTaskNode)
+    @assert is_valid_node(graph, node)
+
+    #=for nf in node.nodeFusions
+        @assert is_valid(graph, nf)
+    end=#
+    return true
+end
+
+"""
+    is_valid(graph::DAG, node::DataTaskNode)
+
+Verify that the given compute node is valid in the graph. Call with `@assert` or `@test` when testing or debugging.
+
+This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
+"""
+function is_valid(graph::DAG, node::DataTaskNode)
+    @assert is_valid_node(graph, node)
+
+    #=if !ismissing(node.nodeFusion)
+        @assert is_valid(graph, node.nodeFusion)
+    end=#
+    return true
+end
--- a/src/node_functions.jl
+++ b/src/node_functions.jl
@@ -1,51 +0,0 @@
-function make_node(t::AbstractTask)
-    error("Cannot make a node from this task type")
-end
-
-function make_node(t::AbstractDataTask)
-    return DataTaskNode(t)
-end
-
-function make_node(t::AbstractComputeTask)
-    return ComputeTaskNode(t)
-end
-
-function make_edge(n1::Node, n2::Node)
-    error("Can only create edges from compute to data node or reverse")
-end
-
-function make_edge(n1::ComputeTaskNode, n2::DataTaskNode)
-    return Edge((n1, n2))
-end
-
-function make_edge(n1::DataTaskNode, n2::ComputeTaskNode)
-    return Edge((n1, n2))
-end
-
-function show(io::IO, n::Node)
-    print(io, "Node(", n.task, ")")
-end
-
-function show(io::IO, e::Edge)
-    print(io, "Edge(", e.edge[1], ", ", e.edge[2], ")")
-end
-
-function ==(e1::Edge, e2::Edge)
-    return e1.edge[1] == e2.edge[1] && e1.edge[2] == e2.edge[2]
-end
-
-function ==(n1::Node, n2::Node)
-    return false
-end
-
-function ==(n1::ComputeTaskNode, n2::ComputeTaskNode)
-    return n1.id == n2.id
-end
-
-function ==(n1::DataTaskNode, n2::DataTaskNode)
-    return n1.id == n2.id
-end
-
-copy(m::Missing) = missing
-copy(n::ComputeTaskNode) =  ComputeTaskNode(copy(n.task), copy(n.parents), copy(n.children), UUIDs.uuid1(rng[threadid()]), copy(n.nodeReduction), copy(n.nodeSplit), copy(n.nodeFusions))
-copy(n::DataTaskNode) = DataTaskNode(copy(n.task), copy(n.parents), copy(n.children), UUIDs.uuid1(rng[threadid()]), copy(n.nodeReduction), copy(n.nodeSplit), copy(n.nodeFusion))
--- a/src/nodes.jl
+++ b/src/nodes.jl
@@ -1,56 +0,0 @@
-using Random
-using UUIDs
-using Base.Threads
-
-# TODO: reliably find out how many threads we're running with (nthreads() returns 1 when precompiling :/)
-rng = [Random.MersenneTwister(0) for _ in 1:32]
-
-abstract type Node end
-
-# declare this type here because it's needed
-# the specific operations are declared in graph.jl
-abstract type Operation end
-
-mutable struct DataTaskNode <: Node
-   task::AbstractDataTask
-   
-   # use vectors as sets have way too much memory overhead
-   parents::Vector{Node}
-   children::Vector{Node}
-
-   # need a unique identifier unique to every *constructed* node
-   # however, it can be copied when splitting a node
-   id::Base.UUID
-
-   # the NodeReduction involving this node, if it exists
-   # Can't use the NodeReduction type here because it's not yet defined
-   nodeReduction::Union{Operation, Missing}
-
-   # the NodeSplit involving this node, if it exists
-   nodeSplit::Union{Operation, Missing}
-
-   # the node fusion involving this node, if it exists
-   nodeFusion::Union{Operation, Missing}
-end
-
-# same as DataTaskNode
-mutable struct ComputeTaskNode <: Node
-   task::AbstractComputeTask
-   parents::Vector{Node}
-   children::Vector{Node}
-   id::Base.UUID
-
-   nodeReduction::Union{Operation, Missing}
-   nodeSplit::Union{Operation, Missing}
-
-   # for ComputeTasks there can be multiple fusions, unlike the DataTasks
-   nodeFusions::Vector{Operation}
-end
-
-DataTaskNode(t::AbstractDataTask) = DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, missing)
-ComputeTaskNode(t::AbstractComputeTask) = ComputeTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, Vector{NodeFusion}())
-
-struct Edge
-   # edge points from child to parent
-   edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}}
-end
--- a/src/operation/apply.jl
+++ b/src/operation/apply.jl
@@ -0,0 +1,312 @@
+"""
+    apply_all!(graph::DAG)
+
+Apply all unapplied operations in the DAG. Is automatically called in all functions that require the latest state of the [`DAG`](@ref).
+"""
+function apply_all!(graph::DAG)
+    while !isempty(graph.operationsToApply)
+        # get next operation to apply from front of the deque
+        op = popfirst!(graph.operationsToApply)
+
+        # apply it
+        appliedOp = apply_operation!(graph, op)
+
+        # push to the end of the appliedOperations deque
+        push!(graph.appliedOperations, appliedOp)
+    end
+    return nothing
+end
+
+"""
+    apply_operation!(graph::DAG, operation::Operation)
+
+Fallback implementation of apply_operation! for unimplemented operation types, throwing an error.
+"""
+function apply_operation!(graph::DAG, operation::Operation)
+    return error("Unknown operation type!")
+end
+
+"""
+    apply_operation!(graph::DAG, operation::NodeFusion)
+
+Apply the given [`NodeFusion`](@ref) to the graph. Generic wrapper around [`node_fusion!`](@ref).
+
+Return an [`AppliedNodeFusion`](@ref) object generated from the graph's [`Diff`](@ref).
+"""
+function apply_operation!(graph::DAG, operation::NodeFusion)
+    diff = node_fusion!(graph, operation.input[1], operation.input[2], operation.input[3])
+
+    graph.properties += GraphProperties(diff)
+
+    return AppliedNodeFusion(operation, diff)
+end
+
+"""
+    apply_operation!(graph::DAG, operation::NodeReduction)
+
+Apply the given [`NodeReduction`](@ref) to the graph. Generic wrapper around [`node_reduction!`](@ref).
+
+Return an [`AppliedNodeReduction`](@ref) object generated from the graph's [`Diff`](@ref).
+"""
+function apply_operation!(graph::DAG, operation::NodeReduction)
+    diff = node_reduction!(graph, operation.input)
+
+    graph.properties += GraphProperties(diff)
+
+    return AppliedNodeReduction(operation, diff)
+end
+
+"""
+    apply_operation!(graph::DAG, operation::NodeSplit)
+
+Apply the given [`NodeSplit`](@ref) to the graph. Generic wrapper around [`node_split!`](@ref).
+
+Return an [`AppliedNodeSplit`](@ref) object generated from the graph's [`Diff`](@ref).
+"""
+function apply_operation!(graph::DAG, operation::NodeSplit)
+    diff = node_split!(graph, operation.input)
+
+    graph.properties += GraphProperties(diff)
+
+    return AppliedNodeSplit(operation, diff)
+end
+
+"""
+    revert_operation!(graph::DAG, operation::AppliedOperation)
+
+Fallback implementation of operation reversion for unimplemented operation types, throwing an error.
+"""
+function revert_operation!(graph::DAG, operation::AppliedOperation)
+    return error("Unknown operation type!")
+end
+
+"""
+    revert_operation!(graph::DAG, operation::AppliedNodeFusion)
+
+Revert the applied node fusion on the graph. Return the original [`NodeFusion`](@ref) operation.
+"""
+function revert_operation!(graph::DAG, operation::AppliedNodeFusion)
+    revert_diff!(graph, operation.diff)
+    return operation.operation
+end
+
+"""
+    revert_operation!(graph::DAG, operation::AppliedNodeReduction)
+
+Revert the applied node fusion on the graph. Return the original [`NodeReduction`](@ref) operation.
+"""
+function revert_operation!(graph::DAG, operation::AppliedNodeReduction)
+    revert_diff!(graph, operation.diff)
+    return operation.operation
+end
+
+"""
+    revert_operation!(graph::DAG, operation::AppliedNodeSplit)
+
+Revert the applied node fusion on the graph. Return the original [`NodeSplit`](@ref) operation.
+"""
+function revert_operation!(graph::DAG, operation::AppliedNodeSplit)
+    revert_diff!(graph, operation.diff)
+    return operation.operation
+end
+
+"""
+    revert_diff!(graph::DAG, diff::Diff)
+
+Revert the given diff on the graph. Used to revert the individual [`AppliedOperation`](@ref)s with [`revert_operation!`](@ref).
+"""
+function revert_diff!(graph::DAG, diff::Diff)
+    # add removed nodes, remove added nodes, same for edges
+    # note the order
+    for edge in diff.addedEdges
+        remove_edge!(graph, edge.edge[1], edge.edge[2], track = false)
+    end
+    for node in diff.addedNodes
+        remove_node!(graph, node, track = false)
+    end
+
+    for node in diff.removedNodes
+        insert_node!(graph, node, track = false)
+    end
+    for edge in diff.removedEdges
+        insert_edge!(graph, edge.edge[1], edge.edge[2], track = false)
+    end
+
+    for (node, task) in diff.updatedChildren
+        # node must be fused compute task at this point
+        @assert typeof(node.task) <: FusedComputeTask
+
+        node.task = task
+    end
+
+    graph.properties -= GraphProperties(diff)
+
+    return nothing
+end
+
+"""
+    node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
+
+Fuse nodes n1 -> n2 -> n3 together into one node, return the applied difference to the graph.
+
+For details see [`NodeFusion`](@ref).
+"""
+function node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
+    @assert is_valid_node_fusion_input(graph, n1, n2, n3)
+
+    # clear snapshot
+    get_snapshot_diff(graph)
+
+    # save children and parents
+    n1Children = children(n1)
+    n3Parents = parents(n3)
+
+    n1Task = copy(n1.task)
+    n3Task = copy(n3.task)
+
+    # assemble the input node vectors of n1 and n3 to save into the FusedComputeTask
+    n1Inputs = Vector{Symbol}()
+    for child in n1Children
+        push!(n1Inputs, Symbol(to_var_name(child.id)))
+    end
+
+    # remove the edges and nodes that will be replaced by the fused node
+    remove_edge!(graph, n1, n2)
+    remove_edge!(graph, n2, n3)
+    remove_node!(graph, n1)
+    remove_node!(graph, n2)
+
+    # get n3's children now so it automatically excludes n2
+    n3Children = children(n3)
+
+    n3Inputs = Vector{Symbol}()
+    for child in n3Children
+        push!(n3Inputs, Symbol(to_var_name(child.id)))
+    end
+
+    remove_node!(graph, n3)
+
+    # create new node with the fused compute task
+    newNode = ComputeTaskNode(FusedComputeTask(n1Task, n3Task, n1Inputs, Symbol(to_var_name(n2.id)), n3Inputs))
+    insert_node!(graph, newNode)
+
+    for child in n1Children
+        remove_edge!(graph, child, n1)
+        insert_edge!(graph, child, newNode)
+    end
+
+    for child in n3Children
+        remove_edge!(graph, child, n3)
+        if !(child in n1Children)
+            insert_edge!(graph, child, newNode)
+        end
+    end
+
+    for parent in n3Parents
+        remove_edge!(graph, n3, parent)
+        insert_edge!(graph, newNode, parent)
+
+        # important! update the parent node's child names in case they are fused compute tasks
+        # needed for compute generation so the fused compute task can correctly match inputs to its component tasks
+        update_child!(graph, parent, Symbol(to_var_name(n3.id)), Symbol(to_var_name(newNode.id)))
+    end
+
+    return get_snapshot_diff(graph)
+end
+
+"""
+    node_reduction!(graph::DAG, nodes::Vector{Node})
+
+Reduce the given nodes together into one node, return the applied difference to the graph.
+
+For details see [`NodeReduction`](@ref).
+"""
+function node_reduction!(graph::DAG, nodes::Vector{Node})
+    @assert is_valid_node_reduction_input(graph, nodes)
+
+    # clear snapshot
+    get_snapshot_diff(graph)
+
+    n1 = nodes[1]
+    n1Children = children(n1)
+
+    n1Parents = Set(n1.parents)
+
+    # set of the new parents of n1
+    newParents = Set{Node}()
+
+    # names of the previous children that n1 now replaces per parent
+    newParentsChildNames = Dict{Node, Symbol}()
+
+    # remove all of the nodes' parents and children and the nodes themselves (except for first node)
+    for i in 2:length(nodes)
+        n = nodes[i]
+        for child in n1Children
+            remove_edge!(graph, child, n)
+        end
+
+        for parent in parents(n)
+            remove_edge!(graph, n, parent)
+
+            # collect all parents
+            push!(newParents, parent)
+            newParentsChildNames[parent] = Symbol(to_var_name(n.id))
+        end
+
+        remove_node!(graph, n)
+    end
+
+    for parent in newParents
+        # now add parents of all input nodes to n1 without duplicates
+        if !(parent in n1Parents)
+            # don't double insert edges
+            insert_edge!(graph, n1, parent)
+        end
+
+        # this has to be done for all parents, even the ones of n1 because they can be duplicate
+        prevChild = newParentsChildNames[parent]
+        update_child!(graph, parent, prevChild, Symbol(to_var_name(n1.id)))
+    end
+
+    return get_snapshot_diff(graph)
+end
+
+"""
+    node_split!(graph::DAG, n1::Node)
+
+Split the given node into one node per parent, return the applied difference to the graph.
+
+For details see [`NodeSplit`](@ref).
+"""
+function node_split!(graph::DAG, n1::Node)
+    @assert is_valid_node_split_input(graph, n1)
+
+    # clear snapshot
+    get_snapshot_diff(graph)
+
+    n1Parents = parents(n1)
+    n1Children = children(n1)
+
+    for parent in n1Parents
+        remove_edge!(graph, n1, parent)
+    end
+    for child in n1Children
+        remove_edge!(graph, child, n1)
+    end
+    remove_node!(graph, n1)
+
+    for parent in n1Parents
+        nCopy = copy(n1)
+
+        insert_node!(graph, nCopy)
+        insert_edge!(graph, nCopy, parent)
+
+        for child in n1Children
+            insert_edge!(graph, child, nCopy)
+        end
+
+        update_child!(graph, parent, Symbol(to_var_name(n1.id)), Symbol(to_var_name(nCopy.id)))
+    end
+
+    return get_snapshot_diff(graph)
+end
--- a/src/operation/clean.jl
+++ b/src/operation/clean.jl
@@ -0,0 +1,134 @@
+# These are functions for "cleaning" nodes, i.e. regenerating the possible operations for a node
+
+"""
+    find_fusions!(graph::DAG, node::DataTaskNode)
+
+Find node fusions involving the given data node. The function pushes the found [`NodeFusion`](@ref) (if any) everywhere it needs to be and returns nothing.
+
+Does nothing if the node already has a node fusion set. Since it's a data node, only one node fusion can be possible with it.
+"""
+function find_fusions!(graph::DAG, node::DataTaskNode)
+    # if there is already a fusion here, skip to avoid duplicates
+    if !ismissing(node.nodeFusion)
+        return nothing
+    end
+
+    if length(node.parents) != 1 || length(node.children) != 1
+        return nothing
+    end
+
+    child_node = first(node.children)
+    parent_node = first(node.parents)
+
+    if !(child_node in graph) || !(parent_node in graph)
+        error("Parents/Children that are not in the graph!!!")
+    end
+
+    if length(child_node.parents) != 1
+        return nothing
+    end
+
+    nf = NodeFusion((child_node, node, parent_node))
+    push!(graph.possibleOperations.nodeFusions, nf)
+    push!(child_node.nodeFusions, nf)
+    node.nodeFusion = nf
+    push!(parent_node.nodeFusions, nf)
+
+    return nothing
+end
+
+"""
+    find_fusions!(graph::DAG, node::ComputeTaskNode)
+
+Find node fusions involving the given compute node. The function pushes the found [`NodeFusion`](@ref)s (if any) everywhere they need to be and returns nothing.
+"""
+function find_fusions!(graph::DAG, node::ComputeTaskNode)
+    # just find fusions in neighbouring DataTaskNodes
+    for child in node.children
+        find_fusions!(graph, child)
+    end
+
+    for parent in node.parents
+        find_fusions!(graph, parent)
+    end
+
+    return nothing
+end
+
+"""
+    find_reductions!(graph::DAG, node::Node)
+
+Find node reductions involving the given node. The function pushes the found [`NodeReduction`](@ref) (if any) everywhere it needs to be and returns nothing.
+"""
+function find_reductions!(graph::DAG, node::Node)
+    # there can only be one reduction per node, avoid adding duplicates
+    if !ismissing(node.nodeReduction)
+        return nothing
+    end
+
+    reductionVector = nothing
+    # possible reductions are with nodes that are partners, i.e. parents of children
+    partners_ = partners(node)
+    delete!(partners_, node)
+    for partner in partners_
+        @assert partner in graph.nodes
+        if can_reduce(node, partner)
+            if reductionVector === nothing
+                # only when there's at least one reduction partner, insert the vector
+                reductionVector = Vector{Node}()
+                push!(reductionVector, node)
+            end
+
+            push!(reductionVector, partner)
+        end
+    end
+
+    if reductionVector !== nothing
+        nr = NodeReduction(reductionVector)
+        push!(graph.possibleOperations.nodeReductions, nr)
+        for node in reductionVector
+            if !ismissing(node.nodeReduction)
+                # it can happen that the dirty node becomes part of an existing NodeReduction and overrides those ones now
+                # this is only a problem insofar the existing NodeReduction has to be deleted and replaced also in the possibleOperations
+                invalidate_caches!(graph, node.nodeReduction)
+            end
+            node.nodeReduction = nr
+        end
+    end
+
+    return nothing
+end
+
+"""
+    find_splits!(graph::DAG, node::Node)
+
+Find the node split of the given node. The function pushes the found [`NodeSplit`](@ref) (if any) everywhere it needs to be and returns nothing.
+"""
+function find_splits!(graph::DAG, node::Node)
+    if !ismissing(node.nodeSplit)
+        return nothing
+    end
+
+    if (can_split(node))
+        ns = NodeSplit(node)
+        push!(graph.possibleOperations.nodeSplits, ns)
+        node.nodeSplit = ns
+    end
+
+    return nothing
+end
+
+"""
+    clean_node!(graph::DAG, node::Node)
+
+Sort this node's parent and child sets, then find fusions, reductions and splits involving it. Needs to be called after the node was changed in some way.
+"""
+function clean_node!(graph::DAG, node::Node)
+    sort_node!(node)
+
+    find_fusions!(graph, node)
+    find_reductions!(graph, node)
+    find_splits!(graph, node)
+
+    return nothing
+end
--- a/src/operation/find.jl
+++ b/src/operation/find.jl
@@ -0,0 +1,247 @@
+# functions that find operations on the inital graph
+
+using Base.Threads
+
+"""
+    insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock})
+
+Insert the given node fusion into its input nodes' operation caches. For the compute nodes, locking via the given `locks` is employed to have safe multi-threading. For a large set of nodes, contention on the locks should be very small.
+"""
+function insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock})
+    n1 = nf.input[1]
+    n2 = nf.input[2]
+    n3 = nf.input[3]
+
+    lock(locks[n1]) do
+        return push!(nf.input[1].nodeFusions, nf)
+    end
+    n2.nodeFusion = nf
+    lock(locks[n3]) do
+        return push!(nf.input[3].nodeFusions, nf)
+    end
+    return nothing
+end
+
+"""
+    insert_operation!(nf::NodeReduction)
+
+Insert the given node reduction into its input nodes' operation caches. This is thread-safe.
+"""
+function insert_operation!(nr::NodeReduction)
+    for n in nr.input
+        n.nodeReduction = nr
+    end
+    return nothing
+end
+
+"""
+    insert_operation!(nf::NodeSplit)
+
+Insert the given node split into its input node's operation cache. This is thread-safe.
+"""
+function insert_operation!(ns::NodeSplit)
+    ns.input.nodeSplit = ns
+    return nothing
+end
+
+"""
+    nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}})
+
+Insert the node reductions into the graph and the nodes' caches. Employs multithreading for speedup.
+"""
+function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}})
+    total_len = 0
+    for vec in nodeReductions
+        total_len += length(vec)
+    end
+    sizehint!(operations.nodeReductions, total_len)
+
+    t = @task for vec in nodeReductions
+        union!(operations.nodeReductions, Set(vec))
+    end
+    schedule(t)
+
+    @threads for vec in nodeReductions
+        for op in vec
+            insert_operation!(op)
+        end
+    end
+
+    wait(t)
+
+    return nothing
+end
+
+"""
+    nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}})
+
+Insert the node fusions into the graph and the nodes' caches. Employs multithreading for speedup.
+"""
+function nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}})
+    total_len = 0
+    for vec in nodeFusions
+        total_len += length(vec)
+    end
+    sizehint!(operations.nodeFusions, total_len)
+
+    t = @task for vec in nodeFusions
+        union!(operations.nodeFusions, Set(vec))
+    end
+    schedule(t)
+
+    locks = Dict{ComputeTaskNode, SpinLock}()
+    for n in graph.nodes
+        if (typeof(n) <: ComputeTaskNode)
+            locks[n] = SpinLock()
+        end
+    end
+
+    @threads for vec in nodeFusions
+        for op in vec
+            insert_operation!(op, locks)
+        end
+    end
+
+    wait(t)
+
+    return nothing
+end
+
+"""
+    ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplits}})
+
+Insert the node splits into the graph and the nodes' caches. Employs multithreading for speedup.
+"""
+function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}})
+    total_len = 0
+    for vec in nodeSplits
+        total_len += length(vec)
+    end
+    sizehint!(operations.nodeSplits, total_len)
+
+    t = @task for vec in nodeSplits
+        union!(operations.nodeSplits, Set(vec))
+    end
+    schedule(t)
+
+    @threads for vec in nodeSplits
+        for op in vec
+            insert_operation!(op)
+        end
+    end
+
+    wait(t)
+
+    return nothing
+end
+
+"""
+    generate_operations(graph::DAG)
+
+Generate all possible operations on the graph. Used initially when the graph is freshly assembled or parsed. Uses multithreading for speedup.
+
+Safely inserts all the found operations into the graph and its nodes.
+"""
+function generate_operations(graph::DAG)
+    generatedFusions = [Vector{NodeFusion}() for _ in 1:nthreads()]
+    generatedReductions = [Vector{NodeReduction}() for _ in 1:nthreads()]
+    generatedSplits = [Vector{NodeSplit}() for _ in 1:nthreads()]
+
+    # make sure the graph is fully generated through
+    apply_all!(graph)
+
+    nodeArray = collect(graph.nodes)
+
+    # sort all nodes
+    @threads for node in nodeArray
+        sort_node!(node)
+    end
+
+    checkedNodes = Set{Node}()
+    checkedNodesLock = SpinLock()
+    # --- find possible node reductions ---
+    @threads for node in nodeArray
+        # we're looking for nodes with multiple parents, those parents can then potentially reduce with one another
+        if (length(node.parents) <= 1)
+            continue
+        end
+
+        candidates = node.parents
+
+        # sort into equivalence classes
+        trie = NodeTrie()
+
+        for candidate in candidates
+            # insert into trie
+            insert!(trie, candidate)
+        end
+
+        nodeReductions = collect(trie)
+
+        for nrVec in nodeReductions
+            # parent sets are ordered and any node can only be part of one nodeReduction, so a NodeReduction is uniquely identifiable by its first element
+            # this prevents duplicate nodeReductions being generated
+            lock(checkedNodesLock)
+            if (nrVec[1] in checkedNodes)
+                unlock(checkedNodesLock)
+                continue
+            else
+                push!(checkedNodes, nrVec[1])
+            end
+            unlock(checkedNodesLock)
+
+            push!(generatedReductions[threadid()], NodeReduction(nrVec))
+        end
+    end
+
+
+    # launch thread for node reduction insertion
+    # remove duplicates
+    nr_task = @task nr_insertion!(graph.possibleOperations, generatedReductions)
+    schedule(nr_task)
+
+    # --- find possible node fusions ---
+    @threads for node in nodeArray
+        if (typeof(node) <: DataTaskNode)
+            if length(node.parents) != 1
+                # data node can only have a single parent
+                continue
+            end
+            parent_node = first(node.parents)
+
+            if length(node.children) != 1
+                # this node is an entry node or has multiple children which should not be possible
+                continue
+            end
+            child_node = first(node.children)
+            if (length(child_node.parents) != 1)
+                continue
+            end
+
+            push!(generatedFusions[threadid()], NodeFusion((child_node, node, parent_node)))
+        end
+    end
+
+    # launch thread for node fusion insertion
+    nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
+    schedule(nf_task)
+
+    # find possible node splits
+    @threads for node in nodeArray
+        if (can_split(node))
+            push!(generatedSplits[threadid()], NodeSplit(node))
+        end
+    end
+
+    # launch thread for node split insertion
+    ns_task = @task ns_insertion!(graph.possibleOperations, generatedSplits)
+    schedule(ns_task)
+
+    empty!(graph.dirtyNodes)
+
+    wait(nr_task)
+    wait(nf_task)
+    wait(ns_task)
+
+    return nothing
+end
--- a/src/operation/get.jl
+++ b/src/operation/get.jl
@@ -0,0 +1,23 @@
+# function to return the possible operations of a graph
+
+using Base.Threads
+
+"""
+    get_operations(graph::DAG)
+
+Return the [`PossibleOperations`](@ref) of the graph at the current state.
+"""
+function get_operations(graph::DAG)
+    apply_all!(graph)
+
+    if isempty(graph.possibleOperations)
+        generate_operations(graph)
+    end
+
+    for node in graph.dirtyNodes
+        clean_node!(graph, node)
+    end
+    empty!(graph.dirtyNodes)
+
+    return graph.possibleOperations
+end
--- a/src/operations/print.jl
+++ b/src/operations/print.jl
@@ -1,3 +1,8 @@
+"""
+    show(io::IO, ops::PossibleOperations)
+
+Print a string representation of the set of possible operations to io.
+"""
 function show(io::IO, ops::PossibleOperations)
    print(io, length(ops.nodeFusions))
    println(io, " Node Fusions: ")
@@ -16,23 +21,38 @@ function show(io::IO, ops::PossibleOperations)
    end
 end

+"""
+    show(io::IO, op::NodeReduction)
+
+Print a string representation of the node reduction to io.
+"""
 function show(io::IO, op::NodeReduction)
    print(io, "NR: ")
    print(io, length(op.input))
    print(io, "x")
-    print(io, op.input[1].task)
+    return print(io, op.input[1].task)
 end

+"""
+    show(io::IO, op::NodeSplit)
+
+Print a string representation of the node split to io.
+"""
 function show(io::IO, op::NodeSplit)
    print(io, "NS: ")
-    print(io, op.input.task)
+    return print(io, op.input.task)
 end

+"""
+    show(io::IO, op::NodeFusion)
+
+Print a string representation of the node fusion to io.
+"""
 function show(io::IO, op::NodeFusion)
    print(io, "NF: ")
    print(io, op.input[1].task)
    print(io, "->")
    print(io, op.input[2].task)
    print(io, "->")
-    print(io, op.input[3].task)
+    return print(io, op.input[3].task)
 end
--- a/src/operation/type.jl
+++ b/src/operation/type.jl
@@ -0,0 +1,117 @@
+"""
+    Operation
+
+An abstract base class for operations. An operation can be applied to a [`DAG`](@ref), changing its nodes and edges.
+
+Possible operations on a [`DAG`](@ref) can be retrieved using [`get_operations`](@ref).
+
+See also: [`push_operation!`](@ref), [`pop_operation!`](@ref)
+"""
+abstract type Operation end
+
+"""
+    AppliedOperation
+
+An abstract base class for already applied operations.
+An applied operation can be reversed iff it is the last applied operation on the DAG.
+Every applied operation stores a [`Diff`](@ref) from when it was initially applied to be able to revert the operation.
+
+See also: [`revert_operation!`](@ref).
+"""
+abstract type AppliedOperation end
+
+"""
+    NodeFusion <: Operation
+
+The NodeFusion operation. Represents the fusing of a chain of compute node -> data node -> compute node.
+
+After the node fusion is applied, the graph has 2 fewer nodes and edges, and a new [`FusedComputeTask`](@ref) with the two input compute nodes as parts.
+
+# Requirements for successful application
+
+A chain of (n1, n2, n3) can be fused if:
+- All nodes are in the graph.
+- (n1, n2) is an edge in the graph.
+- (n2, n3) is an edge in the graph.
+- n2 has exactly one parent (n3) and exactly one child (n1).
+- n1 has exactly one parent (n2).
+
+[`is_valid_node_fusion_input`](@ref) can be used to `@assert` these requirements.
+
+See also: [`can_fuse`](@ref)
+"""
+struct NodeFusion <: Operation
+    input::Tuple{ComputeTaskNode, DataTaskNode, ComputeTaskNode}
+end
+
+"""
+    AppliedNodeFusion <: AppliedOperation
+
+The applied version of the [`NodeFusion`](@ref).
+"""
+struct AppliedNodeFusion <: AppliedOperation
+    operation::NodeFusion
+    diff::Diff
+end
+
+"""
+    NodeReduction <: Operation
+
+The NodeReduction operation. Represents the reduction of two or more nodes with one another.
+Only one of the input nodes is kept, while all others are deleted and their parents are accumulated in the kept node's parents instead.
+
+After the node reduction is applied, the graph has `length(nr.input) - 1` fewer nodes.
+
+# Requirements for successful application
+
+A vector of nodes can be reduced if:
+- All nodes are in the graph.
+- All nodes have the same task type.
+- All nodes have the same set of children.
+
+[`is_valid_node_reduction_input`](@ref) can be used to `@assert` these requirements.
+
+See also: [`can_reduce`](@ref)
+"""
+struct NodeReduction <: Operation
+    input::Vector{Node}
+end
+
+"""
+    AppliedNodeReduction <: AppliedOperation
+
+The applied version of the [`NodeReduction`](@ref).
+"""
+struct AppliedNodeReduction <: AppliedOperation
+    operation::NodeReduction
+    diff::Diff
+end
+
+"""
+    NodeSplit <: Operation
+
+The NodeSplit operation. Represents the split of its input node into one node for each of its parents. It is the reverse operation to the [`NodeReduction`](@ref).
+
+# Requirements for successful application
+
+A node can be split if:
+- It is in the graph.
+- It has at least 2 parents.
+
+[`is_valid_node_split_input`](@ref) can be used to `@assert` these requirements.
+
+See also: [`can_split`](@ref)
+"""
+struct NodeSplit <: Operation
+    input::Node
+end
+
+"""
+    AppliedNodeSplit <: AppliedOperation
+
+The applied version of the [`NodeSplit`](@ref).
+"""
+struct AppliedNodeSplit <: AppliedOperation
+    operation::NodeSplit
+    diff::Diff
+end
--- a/src/operation/utility.jl
+++ b/src/operation/utility.jl
@@ -0,0 +1,163 @@
+"""
+    isempty(operations::PossibleOperations)
+
+Return whether `operations` is empty, i.e. all of its fields are empty.
+"""
+function isempty(operations::PossibleOperations)
+    return isempty(operations.nodeFusions) && isempty(operations.nodeReductions) && isempty(operations.nodeSplits)
+end
+
+"""
+    length(operations::PossibleOperations)
+
+Return a named tuple with the number of each of the operation types as a named tuple. The fields are named the same as the [`PossibleOperations`](@ref)'.
+"""
+function length(operations::PossibleOperations)
+    return (
+        nodeFusions = length(operations.nodeFusions),
+        nodeReductions = length(operations.nodeReductions),
+        nodeSplits = length(operations.nodeSplits),
+    )
+end
+
+"""
+    delete!(operations::PossibleOperations, op::NodeFusion)
+
+Delete the given node fusion from the possible operations.
+"""
+function delete!(operations::PossibleOperations, op::NodeFusion)
+    delete!(operations.nodeFusions, op)
+    return operations
+end
+
+"""
+    delete!(operations::PossibleOperations, op::NodeReduction)
+
+Delete the given node reduction from the possible operations.
+"""
+function delete!(operations::PossibleOperations, op::NodeReduction)
+    delete!(operations.nodeReductions, op)
+    return operations
+end
+
+"""
+    delete!(operations::PossibleOperations, op::NodeSplit)
+
+Delete the given node split from the possible operations.
+"""
+function delete!(operations::PossibleOperations, op::NodeSplit)
+    delete!(operations.nodeSplits, op)
+    return operations
+end
+
+"""
+    can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
+
+Return whether the given nodes can be fused. See [`NodeFusion`](@ref) for the requirements.
+"""
+function can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
+    if !is_child(n1, n2) || !is_child(n2, n3)
+        # the checks are redundant but maybe a good sanity check
+        return false
+    end
+
+    if length(n2.parents) != 1 || length(n2.children) != 1 || length(n1.parents) != 1
+        return false
+    end
+
+    return true
+end
+
+"""
+    can_reduce(n1::Node, n2::Node)
+
+Return whether the given two nodes can be reduced. See [`NodeReduction`](@ref) for the requirements.
+"""
+function can_reduce(n1::Node, n2::Node)
+    if (n1.task != n2.task)
+        return false
+    end
+
+    n1_length = length(n1.children)
+    n2_length = length(n2.children)
+
+    if (n1_length != n2_length)
+        return false
+    end
+
+    # this seems to be the most common case so do this first
+    # doing it manually is a lot faster than using the sets for a general solution
+    if (n1_length == 2)
+        if (n1.children[1] != n2.children[1])
+            if (n1.children[1] != n2.children[2])
+                return false
+            end
+            # 1_1 == 2_2
+            if (n1.children[2] != n2.children[1])
+                return false
+            end
+            return true
+        end
+
+        # 1_1 == 2_1
+        if (n1.children[2] != n2.children[2])
+            return false
+        end
+        return true
+    end
+
+    # this is simple
+    if (n1_length == 1)
+        return n1.children[1] == n2.children[1]
+    end
+
+    # this takes a long time
+    return Set(n1.children) == Set(n2.children)
+end
+
+"""
+    can_split(n1::Node)
+
+Return whether the given node can be split. See [`NodeSplit`](@ref) for the requirements.
+"""
+function can_split(n::Node)
+    return length(parents(n)) > 1
+end
+
+"""
+    ==(op1::Operation, op2::Operation)
+
+Fallback implementation of operation equality. Return false. Actual comparisons are done by the overloads of same type operation comparisons.
+"""
+function ==(op1::Operation, op2::Operation)
+    return false
+end
+
+"""
+    ==(op1::NodeFusion, op2::NodeFusion)
+
+Equality comparison between two node fusions. Two node fusions are considered equal if they have the same inputs.
+"""
+function ==(op1::NodeFusion, op2::NodeFusion)
+    # there can only be one node fusion on a given data task, so if the data task is the same, the fusion is the same
+    return op1.input[2] == op2.input[2]
+end
+
+"""
+    ==(op1::NodeReduction, op2::NodeReduction)
+
+Equality comparison between two node reductions. Two node reductions are considered equal when they have the same inputs.
+"""
+function ==(op1::NodeReduction, op2::NodeReduction)
+    # node reductions are equal exactly if their first input is the same
+    return op1.input[1].id == op2.input[1].id
+end
+
+"""
+    ==(op1::NodeSplit, op2::NodeSplit)
+
+Equality comparison between two node splits. Two node splits are considered equal if they have the same input node.
+"""
+function ==(op1::NodeSplit, op2::NodeSplit)
+    return op1.input == op2.input
+end
--- a/src/operation/validate.jl
+++ b/src/operation/validate.jl
@@ -0,0 +1,146 @@
+# functions to throw assertion errors for inconsistent or wrong node operations
+# should be called with @assert
+# the functions throw their own errors though, to still have helpful error messages
+
+"""
+    is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
+
+Assert for a gven node fusion input whether the nodes can be fused. For the requirements of a node fusion see [`NodeFusion`](@ref).
+
+Intended for use with `@assert` or `@test`.
+"""
+function is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
+    if !(n1 in graph) || !(n2 in graph) || !(n3 in graph)
+        throw(AssertionError("[Node Fusion] The given nodes are not part of the given graph"))
+    end
+
+    if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1)
+        throw(
+            AssertionError(
+                "[Node Fusion] The given nodes are not connected by edges which is required for node fusion",
+            ),
+        )
+    end
+
+    if length(n2.parents) > 1
+        throw(AssertionError("[Node Fusion] The given data node has more than one parent"))
+    end
+    if length(n2.children) > 1
+        throw(AssertionError("[Node Fusion] The given data node has more than one child"))
+    end
+    if length(n1.parents) > 1
+        throw(AssertionError("[Node Fusion] The given n1 has more than one parent"))
+    end
+
+    @assert is_valid(graph, n1)
+    @assert is_valid(graph, n2)
+    @assert is_valid(graph, n3)
+
+    return true
+end
+
+"""
+    is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node})
+
+Assert for a gven node reduction input whether the nodes can be reduced. For the requirements of a node reduction see [`NodeReduction`](@ref).
+
+Intended for use with `@assert` or `@test`.
+"""
+function is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node})
+    for n in nodes
+        if n ∉ graph
+            throw(AssertionError("[Node Reduction] The given nodes are not part of the given graph"))
+        end
+        @assert is_valid(graph, n)
+    end
+
+    t = typeof(nodes[1].task)
+    for n in nodes
+        if typeof(n.task) != t
+            throw(AssertionError("[Node Reduction] The given nodes are not of the same type"))
+        end
+
+        if (typeof(n) <: DataTaskNode)
+            if (n.name != nodes[1].name)
+                throw(AssertionError("[Node Reduction] The given nodes do not have the same name"))
+            end
+        end
+    end
+
+    n1_children = nodes[1].children
+    for n in nodes
+        if Set(n1_children) != Set(n.children)
+            throw(
+                AssertionError(
+                    "[Node Reduction] The given nodes do not have equal prerequisite nodes which is required for node reduction",
+                ),
+            )
+        end
+    end
+
+    return true
+end
+
+"""
+    is_valid_node_split_input(graph::DAG, n1::Node)
+
+Assert for a gven node split input whether the node can be split. For the requirements of a node split see [`NodeSplit`](@ref).
+
+Intended for use with `@assert` or `@test`.
+"""
+function is_valid_node_split_input(graph::DAG, n1::Node)
+    if n1 ∉ graph
+        throw(AssertionError("[Node Split] The given node is not part of the given graph"))
+    end
+
+    if length(n1.parents) <= 1
+        throw(
+            AssertionError(
+                "[Node Split] The given node does not have multiple parents which is required for node split",
+            ),
+        )
+    end
+
+    @assert is_valid(graph, n1)
+
+    return true
+end
+
+"""
+    is_valid(graph::DAG, nr::NodeReduction)
+
+Assert for a given [`NodeReduction`](@ref) whether it is a valid operation in the graph.
+
+Intended for use with `@assert` or `@test`.
+"""
+function is_valid(graph::DAG, nr::NodeReduction)
+    @assert is_valid_node_reduction_input(graph, nr.input)
+    @assert nr in graph.possibleOperations.nodeReductions "NodeReduction is not part of the graph's possible operations!"
+    return true
+end
+
+"""
+    is_valid(graph::DAG, nr::NodeSplit)
+
+Assert for a given [`NodeSplit`](@ref) whether it is a valid operation in the graph.
+
+Intended for use with `@assert` or `@test`.
+"""
+function is_valid(graph::DAG, ns::NodeSplit)
+    @assert is_valid_node_split_input(graph, ns.input)
+    @assert ns in graph.possibleOperations.nodeSplits "NodeSplit is not part of the graph's possible operations!"
+    return true
+end
+
+"""
+    is_valid(graph::DAG, nr::NodeFusion)
+
+Assert for a given [`NodeFusion`](@ref) whether it is a valid operation in the graph.
+
+Intended for use with `@assert` or `@test`.
+"""
+function is_valid(graph::DAG, nf::NodeFusion)
+    @assert is_valid_node_fusion_input(graph, nf.input[1], nf.input[2], nf.input[3])
+    @assert nf in graph.possibleOperations.nodeFusions "NodeFusion is not part of the graph's possible operations!"
+    return true
+end
--- a/src/operations/apply.jl
+++ b/src/operations/apply.jl
@@ -1,198 +0,0 @@
-# functions that apply graph operations
-
-# applies all unapplied operations in the DAG
-function apply_all!(graph::DAG)
-    while !isempty(graph.operationsToApply)
-        # get next operation to apply from front of the deque
-        op = popfirst!(graph.operationsToApply)
-
-        # apply it
-        appliedOp = apply_operation!(graph, op)
-
-        # push to the end of the appliedOperations deque
-        push!(graph.appliedOperations, appliedOp)
-    end
-    return nothing
-end
-
-function apply_operation!(graph::DAG, operation::Operation)
-    error("Unknown operation type!")
-end
-
-function apply_operation!(graph::DAG, operation::NodeFusion)
-    diff = node_fusion!(graph, operation.input[1], operation.input[2], operation.input[3])
-    return AppliedNodeFusion(operation, diff)
-end
-
-function apply_operation!(graph::DAG, operation::NodeReduction)
-    diff = node_reduction!(graph, operation.input)
-    return AppliedNodeReduction(operation, diff)
-end
-
-function apply_operation!(graph::DAG, operation::NodeSplit)
-    diff = node_split!(graph, operation.input)
-    return AppliedNodeSplit(operation, diff)
-end
-
-
-function revert_operation!(graph::DAG, operation::AppliedOperation)
-    error("Unknown operation type!")
-end
-
-function revert_operation!(graph::DAG, operation::AppliedNodeFusion)
-    revert_diff!(graph, operation.diff)
-    return operation.operation
-end
-
-function revert_operation!(graph::DAG, operation::AppliedNodeReduction)
-    revert_diff!(graph, operation.diff)
-    return operation.operation
-end
-
-function revert_operation!(graph::DAG, operation::AppliedNodeSplit)
-    revert_diff!(graph, operation.diff)
-    return operation.operation
-end
-
-
-function revert_diff!(graph::DAG, diff::Diff)
-    # add removed nodes, remove added nodes, same for edges
-    # note the order
-    for edge in diff.addedEdges
-        remove_edge!(graph, edge.edge[1], edge.edge[2], false)
-    end
-    for node in diff.addedNodes
-        remove_node!(graph, node, false)
-    end
-
-    for node in diff.removedNodes
-        insert_node!(graph, node, false)
-    end
-    for edge in diff.removedEdges
-        insert_edge!(graph, edge.edge[1], edge.edge[2], false)
-    end
-end
-
-# Fuse nodes n1 -> n2 -> n3 together into one node, return the applied difference to the graph
-function node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
-    # @assert is_valid_node_fusion_input(graph, n1, n2, n3)
-
-    # clear snapshot
-    get_snapshot_diff(graph)
-
-
-    # save children and parents
-    n1_children = children(n1)
-    n3_parents = parents(n3)
-    n3_children = children(n3)
-
-    # remove the edges and nodes that will be replaced by the fused node
-    remove_edge!(graph, n1, n2)
-    remove_edge!(graph, n2, n3)
-    remove_node!(graph, n1)
-    remove_node!(graph, n2)
-
-    # get n3's children now so it automatically excludes n2
-    n3_children = children(n3)
-    remove_node!(graph, n3)
-
-    # create new node with the fused compute task
-    new_node = ComputeTaskNode(FusedComputeTask{typeof(n1.task),typeof(n3.task)}())
-    insert_node!(graph, new_node)
-
-    # use a set for combined children of n1 and n3 to not get duplicates
-    n1and3_children = Set{Node}()
-
-    # remove edges from n1 children to n1
-    for child in n1_children
-        remove_edge!(graph, child, n1)
-        push!(n1and3_children, child)
-    end
-
-    # remove edges from n3 children to n3
-    for child in n3_children
-        remove_edge!(graph, child, n3)
-        push!(n1and3_children, child)
-    end
-
-    for child in n1and3_children
-        insert_edge!(graph, child, new_node)
-    end
-
-    # "repoint" parents of n3 from new node
-    for parent in n3_parents
-        remove_edge!(graph, n3, parent)
-        insert_edge!(graph, new_node, parent)
-    end
-
-    return get_snapshot_diff(graph)
-end
-
-function node_reduction!(graph::DAG, nodes::Vector{Node})
-    # @assert is_valid_node_reduction_input(graph, nodes)
-
-    # clear snapshot
-    get_snapshot_diff(graph)
-
-    n1 = nodes[1]
-    n1_children = children(n1)
-    
-    n1_parents = Set(n1.parents)
-    new_parents = Set{Node}()
-
-    # remove all of the nodes' parents and children and the nodes themselves (except for first node)
-    for i in 2:length(nodes)
-        n = nodes[i]
-        for child in n1_children
-            remove_edge!(graph, child, n)
-        end
-
-        for parent in parents(n)
-            remove_edge!(graph, n, parent)
-
-            # collect all parents
-            push!(new_parents, parent)
-        end
-
-        remove_node!(graph, n)
-    end
-
-    setdiff!(new_parents, n1_parents)
-
-    for parent in new_parents
-        # now add parents of all input nodes to n1 without duplicates
-        insert_edge!(graph, n1, parent)
-    end
-
-    return get_snapshot_diff(graph)
-end
-
-function node_split!(graph::DAG, n1::Node)
-    # @assert is_valid_node_split_input(graph, n1)
-
-    # clear snapshot
-    get_snapshot_diff(graph)
-
-    n1_parents = parents(n1)
-    n1_children = children(n1)
-
-    for parent in n1_parents
-        remove_edge!(graph, n1, parent)
-    end
-    for child in n1_children
-        remove_edge!(graph, child, n1)
-    end
-    remove_node!(graph, n1)
-
-    for parent in n1_parents
-        n_copy = copy(n1)
-        insert_node!(graph, n_copy)
-        insert_edge!(graph, n_copy, parent)
-
-        for child in n1_children
-            insert_edge!(graph, child, n_copy)
-        end
-    end
-
-    return get_snapshot_diff(graph)
-end
--- a/src/operations/clean.jl
+++ b/src/operations/clean.jl
@@ -1,115 +0,0 @@
-# functions for "cleaning" nodes, i.e. regenerating the possible operations for a node
-
-# function to find node fusions involving the given node if it's a data node
-# pushes the found fusion everywhere it needs to be and returns nothing
-function find_fusions!(graph::DAG, node::DataTaskNode)
-   # if there is already a fusion here, skip
-   if !ismissing(node.nodeFusion)
-      return nothing
-   end
-
-   if length(node.parents) != 1 || length(node.children) != 1
-      return nothing
-   end
-
-   child_node = first(node.children)
-   parent_node = first(node.parents)
-
-   if !(child_node in graph) || !(parent_node in graph)
-      error("Parents/Children that are not in the graph!!!")
-   end
-
-   if length(child_node.parents) != 1
-      return nothing
-   end
-
-   nf = NodeFusion((child_node, node, parent_node))
-   push!(graph.possibleOperations.nodeFusions, nf)
-   push!(child_node.nodeFusions, nf)
-   node.nodeFusion = nf
-   push!(parent_node.nodeFusions, nf)
-
-   return nothing
-end
-
-
-function find_fusions!(graph::DAG, node::ComputeTaskNode)
-   # just find fusions in neighbouring DataTaskNodes
-   for child in node.children
-      find_fusions!(graph, child)
-   end
-
-   for parent in node.parents
-      find_fusions!(graph, parent)
-   end
-
-   return nothing
-end
-
-function find_reductions!(graph::DAG, node::Node)
-   # there can only be one reduction per node, avoid adding duplicates
-   if !ismissing(node.nodeReduction)
-      return nothing
-   end
-
-   reductionVector = nothing
-   # possible reductions are with nodes that are partners, i.e. parents of children
-   partners_ = partners(node)
-   delete!(partners_, node)
-   for partner in partners_
-      if partner ∉ graph.nodes
-         error("Partner is not part of the graph")
-      end
-
-      if can_reduce(node, partner)
-         if Set(node.children) != Set(partner.children)
-            error("Not equal children")
-         end
-         if reductionVector === nothing
-            # only when there's at least one reduction partner, insert the vector
-            reductionVector = Vector{Node}()
-            push!(reductionVector, node)
-         end
-
-         push!(reductionVector, partner)
-      end
-   end
-
-   if reductionVector !== nothing
-      nr = NodeReduction(reductionVector)
-      push!(graph.possibleOperations.nodeReductions, nr)
-      for node in reductionVector
-         if !ismissing(node.nodeReduction)
-            # it can happen that the dirty node becomes part of an existing NodeReduction and overrides those ones now
-            # this is only a problem insofar the existing NodeReduction has to be deleted and replaced also in the possibleOperations
-            invalidate_caches!(graph, node.nodeReduction)
-         end
-         node.nodeReduction = nr
-      end
-   end
-
-   return nothing
-end
-
-function find_splits!(graph::DAG, node::Node)
-   if !ismissing(node.nodeSplit)
-      return nothing
-   end
-
-   if (can_split(node))
-      ns = NodeSplit(node)
-      push!(graph.possibleOperations.nodeSplits, ns)
-      node.nodeSplit = ns
-   end
-
-   return nothing
-end
-
-# "clean" the operations on a dirty node
-function clean_node!(graph::DAG, node::Node)
-   sort_node!(node)
-   
-   find_fusions!(graph, node)
-   find_reductions!(graph, node)
-   find_splits!(graph, node)
-end
--- a/src/operations/find.jl
+++ b/src/operations/find.jl
@@ -1,205 +0,0 @@
-# functions that find operations on the inital graph
-
-using Base.Threads
-
-function insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock})
-   n1 = nf.input[1]; n2 = nf.input[2]; n3 = nf.input[3]
-
-   lock(locks[n1]) do; push!(nf.input[1].nodeFusions, nf); end
-   nf.input[2].nodeFusion = nf
-   lock(locks[n3]) do; push!(nf.input[3].nodeFusions, nf); end
-   return nothing
-end
-
-function insert_operation!(nr::NodeReduction)
-   for n in nr.input
-      n.nodeReduction = nr
-   end
-   return nothing
-end
-
-function insert_operation!(ns::NodeSplit)
-   ns.input.nodeSplit = ns
-   return nothing
-end
-
-function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}})
-   total_len = 0
-   for vec in nodeReductions
-      total_len += length(vec)
-   end
-   sizehint!(operations.nodeReductions, total_len)
-
-   t = @task for vec in nodeReductions
-      union!(operations.nodeReductions, Set(vec))
-   end
-   schedule(t)
-
-   @threads for vec in nodeReductions
-      for op in vec
-         insert_operation!(op)
-      end
-   end
-
-   wait(t)
-
-   return nothing
-end
-
-function nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}})
-   total_len = 0
-   for vec in nodeFusions
-      total_len += length(vec)
-   end
-   sizehint!(operations.nodeFusions, total_len)
-   
-   t = @task for vec in nodeFusions
-      union!(operations.nodeFusions, Set(vec))
-   end
-   schedule(t)
-
-   locks = Dict{ComputeTaskNode, SpinLock}()
-   for n in graph.nodes
-      if (typeof(n) <: ComputeTaskNode)
-         locks[n] = SpinLock()
-      end
-   end
-
-   @threads for vec in nodeFusions 
-      for op in vec
-         insert_operation!(op, locks)
-      end
-   end
-
-   wait(t)
-
-   return nothing
-end
-
-function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}})
-   total_len = 0
-   for vec in nodeSplits
-      total_len += length(vec)
-   end
-   sizehint!(operations.nodeSplits, total_len)
-
-   t = @task for vec in nodeSplits
-      union!(operations.nodeSplits, Set(vec))
-   end
-   schedule(t)
-
-   @threads for vec in nodeSplits
-      for op in vec
-         insert_operation!(op)
-      end
-   end
-
-   wait(t)
-
-   return nothing
-end
-
-# function to generate all possible operations on the graph
-function generate_options(graph::DAG)
-   generatedFusions = [Vector{NodeFusion}() for _ in 1:nthreads()]
-   generatedReductions = [Vector{NodeReduction}() for _ in 1:nthreads()]
-   generatedSplits = [Vector{NodeSplit}() for _ in 1:nthreads()]
-
-   # make sure the graph is fully generated through
-   apply_all!(graph)
-
-   nodeArray = collect(graph.nodes)
-
-   # sort all nodes
-   @threads for node in nodeArray
-      sort_node!(node)
-   end
-
-   checkedNodes = Set{Node}()
-   checkedNodesLock = SpinLock()
-   # --- find possible node reductions ---
-   @threads for node in nodeArray
-      # we're looking for nodes with multiple parents, those parents can then potentially reduce with one another
-      if (length(node.parents) <= 1)
-         continue
-      end
-
-      candidates = node.parents
-
-      # sort into equivalence classes
-      trie = NodeTrie()
-
-      for candidate in candidates
-         # insert into trie
-         insert!(trie, candidate)
-      end
-
-      nodeReductions = collect(trie)
-
-      for nrVec in nodeReductions
-         # parent sets are ordered and any node can only be part of one nodeReduction, so a NodeReduction is uniquely identifiable by its first element
-         # this prevents duplicate nodeReductions being generated
-         lock(checkedNodesLock)
-         if (nrVec[1] in checkedNodes)
-            unlock(checkedNodesLock)
-            continue
-         else
-            push!(checkedNodes, nrVec[1])
-         end
-         unlock(checkedNodesLock)
-
-         push!(generatedReductions[threadid()], NodeReduction(nrVec))
-      end
-   end
-
-   
-   # launch thread for node reduction insertion
-   # remove duplicates
-   nr_task = @task nr_insertion!(graph.possibleOperations, generatedReductions)
-   schedule(nr_task)
-
-   # --- find possible node fusions ---
-   @threads for node in nodeArray
-      if (typeof(node) <: DataTaskNode)
-         if length(node.parents) != 1
-            # data node can only have a single parent
-            continue
-         end
-         parent_node = first(node.parents)
-
-         if length(node.children) != 1
-            # this node is an entry node or has multiple children which should not be possible
-            continue
-         end
-         child_node = first(node.children)
-         if (length(child_node.parents) != 1)
-            continue
-         end
-
-         push!(generatedFusions[threadid()], NodeFusion((child_node, node, parent_node)))
-      end
-   end
-
-   # launch thread for node fusion insertion
-   nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
-   schedule(nf_task)
-
-   # find possible node splits
-   @threads for node in nodeArray
-      if (can_split(node))
-         push!(generatedSplits[threadid()], NodeSplit(node))
-      end
-   end
-
-   # launch thread for node split insertion
-   ns_task = @task ns_insertion!(graph.possibleOperations, generatedSplits)
-   schedule(ns_task)
-
-   empty!(graph.dirtyNodes)
-
-   wait(nr_task)
-   wait(nf_task)
-   wait(ns_task)
-
-   return nothing
-end
--- a/src/operations/get.jl
+++ b/src/operations/get.jl
@@ -1,18 +0,0 @@
-# function to return the possible operations of a graph
-
-using Base.Threads
-
-function get_operations(graph::DAG)
-   apply_all!(graph)
-
-   if isempty(graph.possibleOperations)
-      generate_options(graph)
-   end
-
-   for node in graph.dirtyNodes
-      clean_node!(graph, node)
-   end
-   empty!(graph.dirtyNodes)
-
-   return graph.possibleOperations
-end
--- a/src/operations/utility.jl
+++ b/src/operations/utility.jl
@@ -1,107 +0,0 @@
-
-function isempty(operations::PossibleOperations)
-   return isempty(operations.nodeFusions) && 
-          isempty(operations.nodeReductions) && 
-          isempty(operations.nodeSplits)
-end
-
-function length(operations::PossibleOperations)
-   return (nodeFusions = length(operations.nodeFusions),
-           nodeReductions = length(operations.nodeReductions),
-           nodeSplits = length(operations.nodeSplits))
-end
-
-function delete!(operations::PossibleOperations, op::NodeFusion)
-   delete!(operations.nodeFusions, op)
-   return operations
-end
-
-function delete!(operations::PossibleOperations, op::NodeReduction)
-   delete!(operations.nodeReductions, op)
-   return operations
-end
-
-function delete!(operations::PossibleOperations, op::NodeSplit)
-   delete!(operations.nodeSplits, op)
-   return operations
-end
-
-
-function can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
-   if !is_child(n1, n2) || !is_child(n2, n3)
-      # the checks are redundant but maybe a good sanity check
-      return false
-   end
-
-   if length(n2.parents) != 1 || length(n2.children) != 1 || length(n1.parents) != 1
-      return false
-   end
-
-   return true
-end
-
-function can_reduce(n1::Node, n2::Node)
-   if (n1.task != n2.task)
-      return false
-   end
-   
-   n1_length = length(n1.children)
-   n2_length = length(n2.children)
-
-   if (n1_length != n2_length)
-      return false
-   end
-
-   # this seems to be the most common case so do this first
-   # doing it manually is a lot faster than using the sets for a general solution
-   if (n1_length == 2)
-      if (n1.children[1] != n2.children[1])
-         if (n1.children[1] != n2.children[2])
-            return false
-         end
-         # 1_1 == 2_2
-         if (n1.children[2] != n2.children[1])
-            return false
-         end
-         return true
-      end
-
-      # 1_1 == 2_1
-      if (n1.children[2] != n2.children[2])
-         return false
-      end
-      return true
-   end
-
-   # this is simple
-   if (n1_length == 1)
-      return n1.children[1] == n2.children[1]
-   end
-   
-   # this takes a long time
-   return Set(n1.children) == Set(n2.children)
-end
-
-function can_split(n::Node)
-   return length(parents(n)) > 1
-end
-
-function ==(op1::Operation, op2::Operation)
-   return false
-end
-
-function ==(op1::NodeFusion, op2::NodeFusion)
-   # there can only be one node fusion on a given data task, so if the data task is the same, the fusion is the same
-   return op1.input[2] == op2.input[2]
-end
-
-function ==(op1::NodeReduction, op2::NodeReduction)
-   # node reductions are equal exactly if their first input is the same
-   return op1.input[1].id == op2.input[1].id
-end
-
-function ==(op1::NodeSplit, op2::NodeSplit)
-   return op1.input == op2.input
-end
-
-copy(id::UUID) = UUID(id.value)
--- a/src/operations/validate.jl
+++ b/src/operations/validate.jl
@@ -1,61 +0,0 @@
-# functions to throw assertion errors for inconsistent or wrong node operations
-# should be called with @assert
-# the functions throw their own errors though, to still have helpful error messages
-
-function is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
-    if !(n1 in graph) || !(n2 in graph) || !(n3 in graph)
-        throw(AssertionError("[Node Fusion] The given nodes are not part of the given graph"))
-    end
-
-    if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1)
-        throw(AssertionError("[Node Fusion] The given nodes are not connected by edges which is required for node fusion"))
-    end
-
-    if length(n2.parents) > 1
-        throw(AssertionError("[Node Fusion] The given data node has more than one parent"))
-    end
-    if length(n2.children) > 1
-        throw(AssertionError("[Node Fusion] The given data node has more than one child"))
-    end
-    if length(n1.parents) > 1
-        throw(AssertionError("[Node Fusion] The given n1 has more than one parent"))
-    end
-
-    return true
-end
-
-function is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node})
-    for n in nodes
-        if n ∉ graph
-            throw(AssertionError("[Node Reduction] The given nodes are not part of the given graph"))
-        end
-    end
-
-    t = typeof(nodes[1].task)
-    for n in nodes
-        if typeof(n.task) != t
-            throw(AssertionError("[Node Reduction] The given nodes are not of the same type"))
-        end
-    end
-
-    n1_children = nodes[1].children
-    for n in nodes
-        if Set(n1_children) != Set(n.children)
-            throw(AssertionError("[Node Reduction] The given nodes do not have equal prerequisite nodes which is required for node reduction"))
-        end
-    end
-
-    return true
-end
-
-function is_valid_node_split_input(graph::DAG, n1::Node)
-    if n1 ∉ graph
-        throw(AssertionError("[Node Split] The given node is not part of the given graph"))
-    end
-
-    if length(n1.parents) <= 1
-        throw(AssertionError("[Node Split] The given node does not have multiple parents which is required for node split"))
-    end
-
-    return true
-end
--- a/src/optimization/greedy.jl
+++ b/src/optimization/greedy.jl
@@ -0,0 +1,8 @@
+"""
+    GreedyOptimizer
+
+An implementation of the greedy optimization algorithm, simply choosing the best next option evaluated with the given estimator.
+"""
+struct GreedyOptimizer
+    estimator::AbstractEstimator
+end
--- a/src/properties/create.jl
+++ b/src/properties/create.jl
@@ -0,0 +1,73 @@
+"""
+   GraphProperties()
+
+Create an empty [`GraphProperties`](@ref) object.
+"""
+function GraphProperties()
+    return (
+        data = 0.0,
+        computeEffort = 0.0,
+        computeIntensity = 0.0,
+        cost = 0.0,
+        noNodes = 0,
+        noEdges = 0,
+    )::GraphProperties
+end
+
+"""
+   GraphProperties(graph::DAG)
+
+Calculate the graph's properties and return the constructed [`GraphProperties`](@ref) object.
+"""
+function GraphProperties(graph::DAG)
+    # make sure the graph is fully generated
+    apply_all!(graph)
+
+    d = 0.0
+    ce = 0.0
+    ed = 0
+    for node in graph.nodes
+        d += data(node.task) * length(node.parents)
+        ce += compute_effort(node.task)
+        ed += length(node.parents)
+    end
+
+    return (
+        data = d,
+        computeEffort = ce,
+        computeIntensity = (d == 0) ? 0.0 : ce / d,
+        cost = 0.0, # TODO
+        noNodes = length(graph.nodes),
+        noEdges = ed,
+    )::GraphProperties
+end
+
+"""
+   GraphProperties(diff::Diff)
+
+Create the graph properties difference from a given [`Diff`](@ref).
+The graph's properties after applying the [`Diff`](@ref) will be `get_properties(graph) + GraphProperties(diff)`.
+For reverting a diff, it's `get_properties(graph) - GraphProperties(diff)`.
+"""
+function GraphProperties(diff::Diff)
+    d = 0.0
+    ce = 0.0
+    c = 0.0 # TODO
+
+    ce =
+        reduce(+, compute_effort(n.task) for n in diff.addedNodes; init = 0.0) -
+        reduce(+, compute_effort(n.task) for n in diff.removedNodes; init = 0.0)
+
+    d =
+        reduce(+, data(e) for e in diff.addedEdges; init = 0.0) -
+        reduce(+, data(e) for e in diff.removedEdges; init = 0.0)
+
+    return (
+        data = d,
+        computeEffort = ce,
+        computeIntensity = (d == 0) ? 0.0 : ce / d,
+        cost = c,
+        noNodes = length(diff.addedNodes) - length(diff.removedNodes),
+        noEdges = length(diff.addedEdges) - length(diff.removedEdges),
+    )::GraphProperties
+end
--- a/src/properties/type.jl
+++ b/src/properties/type.jl
@@ -0,0 +1,17 @@
+"""
+   GraphProperties
+
+Representation of a [`DAG`](@ref)'s properties.
+
+# Fields:
+`.data`: The total data transfer.\\
+`.computeEffort`: The total compute effort.\\
+`.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`.\\
+`.cost`: The estimated cost.\\
+`.noNodes`: Number of [`Node`](@ref)s.\\
+`.noEdges`: Number of [`Edge`](@ref)s.
+"""
+const GraphProperties = NamedTuple{
+    (:data, :computeEffort, :computeIntensity, :cost, :noNodes, :noEdges),
+    Tuple{Float64, Float64, Float64, Float64, Int, Int},
+}
--- a/src/properties/utility.jl
+++ b/src/properties/utility.jl
@@ -0,0 +1,57 @@
+"""
+    -(prop1::GraphProperties, prop2::GraphProperties)
+
+Subtract `prop1` from `prop2` and return the result as a new [`GraphProperties`](@ref).
+Also take care to keep consistent compute intensity.
+"""
+function -(prop1::GraphProperties, prop2::GraphProperties)
+    return (
+        data = prop1.data - prop2.data,
+        computeEffort = prop1.computeEffort - prop2.computeEffort,
+        computeIntensity = if (prop1.data - prop2.data == 0)
+            0.0
+        else
+            (prop1.computeEffort - prop2.computeEffort) / (prop1.data - prop2.data)
+        end,
+        cost = prop1.cost - prop2.cost,
+        noNodes = prop1.noNodes - prop2.noNodes,
+        noEdges = prop1.noEdges - prop2.noEdges,
+    )::GraphProperties
+end
+
+"""
+    +(prop1::GraphProperties, prop2::GraphProperties)
+
+Add `prop1` and `prop2` and return the result as a new [`GraphProperties`](@ref).
+Also take care to keep consistent compute intensity.
+"""
+function +(prop1::GraphProperties, prop2::GraphProperties)
+    return (
+        data = prop1.data + prop2.data,
+        computeEffort = prop1.computeEffort + prop2.computeEffort,
+        computeIntensity = if (prop1.data + prop2.data == 0)
+            0.0
+        else
+            (prop1.computeEffort + prop2.computeEffort) / (prop1.data + prop2.data)
+        end,
+        cost = prop1.cost + prop2.cost,
+        noNodes = prop1.noNodes + prop2.noNodes,
+        noEdges = prop1.noEdges + prop2.noEdges,
+    )::GraphProperties
+end
+
+"""
+    -(prop::GraphProperties)
+
+Unary negation of the graph properties. `.computeIntensity` will not be negated because `.data` and `.computeEffort` both are.
+"""
+function -(prop::GraphProperties)
+    return (
+        data = -prop.data,
+        computeEffort = -prop.computeEffort,
+        computeIntensity = prop.computeIntensity,   # no negation here!
+        cost = -prop.cost,
+        noNodes = -prop.noNodes,
+        noEdges = -prop.noEdges,
+    )::GraphProperties
+end
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Anton Reinhard	0c74a3f30e	Use runner label	2023-11-16 15:26:37 +01:00
Anton Reinhard	241fd227c1	Add more estimator tests, add specialized implementations of operation_effect estimator	2023-11-16 04:28:58 +01:00
Anton Reinhard	a97a878881	Add abcmodel tests, Add groundtruth for AB->AB process, fix calculation	2023-11-13 14:26:13 +01:00
Anton Reinhard	1cf8ae643c	Start adding estimator tests	2023-11-13 14:22:41 +01:00
Rubydragon	8b4e2b5ef7	Add estimator interface and global metric estimator implementation	2023-11-13 14:22:41 +01:00
Rubydragon	61b6541b81	Add estimator interface	2023-11-13 14:22:41 +01:00
Anton Reinhard	2709eeb3dc	Fix the types, add some profiling examples (#15 ) Reviewed-on: Rubydragon/MetagraphOptimization.jl#15 Co-authored-by: Anton Reinhard <anton.reinhard@proton.me> Co-committed-by: Anton Reinhard <anton.reinhard@proton.me>	2023-11-13 12:55:02 +01:00
Anton Reinhard	5a30f57e1f	Add scheduling, machine info, caching strategies and devices (#9 ) Some checks failed MetagraphOptimization_CI / prepare (push) Has been cancelled Details MetagraphOptimization_CI / test (push) Has been cancelled Details MetagraphOptimization_CI / docs (push) Has been cancelled Details Reviewed-on: Rubydragon/MetagraphOptimization.jl#9 Co-authored-by: Anton Reinhard <anton.reinhard@proton.me> Co-committed-by: Anton Reinhard <anton.reinhard@proton.me>	2023-10-12 17:51:03 +02:00
Anton Reinhard	bd6c54c1ae	Merge pull request 'Code Generation' (#8 ) from code-gen into main Reviewed-on: Rubydragon/MetagraphOptimization.jl#8	2023-09-17 14:35:46 +02:00
Anton Reinhard	62791ab422	Fix docs Some checks failed MetagraphOptimization_CI / prepare (push) Has been cancelled Details MetagraphOptimization_CI / test (push) Has been cancelled Details MetagraphOptimization_CI / docs (push) Has been cancelled Details	2023-09-17 12:40:11 +02:00
Anton Reinhard	4c452dce98	Add execution test	2023-09-17 10:32:43 +02:00
Anton Reinhard	27c4b8ba34	Use real ABC-Model compute effort and data transfer numbers	2023-09-07 18:46:41 +02:00
Anton Reinhard	e59d24ebe5	Add code gen documentation	2023-09-07 18:23:36 +02:00
Anton Reinhard	d1666de432	Add accurate arithmetic for summation, fix order of input particles	2023-09-07 16:49:44 +02:00
Anton Reinhard	0f78053ccf	Fix topoligical ordering on the graph	2023-09-05 12:14:41 +02:00
Anton Reinhard	7a1a97dac8	Add basic execution function	2023-09-01 16:22:16 +02:00
Anton Reinhard	f1edce258a	Start adding code generation	2023-08-31 18:24:48 +02:00
Anton Reinhard	32fcd069d7	Merge pull request 'Property Caching' (#7 ) from feature/property-tracking into main Reviewed-on: Rubydragon/MetagraphOptimization.jl#7	2023-08-29 15:35:51 +02:00
Anton Reinhard	e09ab7c77b	Add tests	2023-08-29 13:09:33 +02:00
Anton Reinhard	7387fa86b1	Add GraphProperties and property caching	2023-08-29 13:08:02 +02:00
Anton Reinhard	065236be22	Add documentation to every function and automatic doc html building (#6 ) Reviewed-on: Rubydragon/MetagraphOptimization.jl#6 Co-authored-by: Anton Reinhard <anton.reinhard@proton.me> Co-committed-by: Anton Reinhard <anton.reinhard@proton.me>	2023-08-29 12:57:46 +02:00
Anton Reinhard	8014bbffcd	Merge pull request 'More Validation' (#5 ) from test into main Reviewed-on: Rubydragon/MetagraphOptimization.jl#5	2023-08-25 11:05:17 +02:00
Anton Reinhard	ae1345d547	Add formatter Some checks failed Test / test (push) Has been cancelled Details	2023-08-25 10:48:22 +02:00
Anton Reinhard	dbcd569967	Update Julia in CI and dependencies	2023-08-25 10:24:37 +02:00
Anton Reinhard	0f5f475cb4	Shuffle files and functions around for more consistent naming and smaller files	2023-08-24 15:11:54 +02:00
Anton Reinhard	1b4030d633	Add validity checks to tests	2023-08-24 14:44:21 +02:00
Anton Reinhard	383c92ec47	Merge pull request 'Performance Improvements' (#4 ) from performance into main Reviewed-on: Rubydragon/MetagraphOptimization.jl#4	2023-08-24 11:33:06 +02:00