9 Commits

Author SHA1 Message Date
0c74a3f30e Use runner label 2023-11-16 15:26:37 +01:00
241fd227c1 Add more estimator tests, add specialized implementations of operation_effect estimator 2023-11-16 04:28:58 +01:00
a97a878881 Add abcmodel tests, Add groundtruth for AB->AB process, fix calculation 2023-11-13 14:26:13 +01:00
1cf8ae643c Start adding estimator tests 2023-11-13 14:22:41 +01:00
8b4e2b5ef7 Add estimator interface and global metric estimator implementation 2023-11-13 14:22:41 +01:00
61b6541b81 Add estimator interface 2023-11-13 14:22:41 +01:00
2709eeb3dc Fix the types, add some profiling examples (#15)
Reviewed-on: Rubydragon/MetagraphOptimization.jl#15
Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Co-committed-by: Anton Reinhard <anton.reinhard@proton.me>
2023-11-13 12:55:02 +01:00
5a30f57e1f Add scheduling, machine info, caching strategies and devices (#9)
Some checks failed
MetagraphOptimization_CI / prepare (push) Has been cancelled
MetagraphOptimization_CI / test (push) Has been cancelled
MetagraphOptimization_CI / docs (push) Has been cancelled
Reviewed-on: Rubydragon/MetagraphOptimization.jl#9
Co-authored-by: Anton Reinhard <anton.reinhard@proton.me>
Co-committed-by: Anton Reinhard <anton.reinhard@proton.me>
2023-10-12 17:51:03 +02:00
bd6c54c1ae Merge pull request 'Code Generation' (#8) from code-gen into main
Reviewed-on: Rubydragon/MetagraphOptimization.jl#8
2023-09-17 14:35:46 +02:00
81 changed files with 3775 additions and 1008 deletions

View File

@ -1,5 +1,5 @@
indent = 4
margin = 80
margin = 120
always_for_in = true
for_in_replacement = "in"
whitespace_typedefs = true

View File

@ -8,7 +8,7 @@ env:
jobs:
prepare:
runs-on: arch-latest
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
@ -65,7 +65,7 @@ jobs:
test:
needs: prepare
runs-on: arch-latest
runs-on: ubuntu-22.04
steps:
- name: Checkout repository
@ -108,7 +108,7 @@ jobs:
- name: Format check
run: |
julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true)'
julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true, ignore=[".julia/*"])'
julia --project=./ -e '
out = Cmd(`git diff --name-only`) |> read |> String
if out == ""
@ -127,7 +127,7 @@ jobs:
docs:
needs: prepare
runs-on: arch-latest
runs-on: ubuntu-22.04
steps:
- name: Checkout repository

2
.gitignore vendored
View File

@ -26,3 +26,5 @@ Manifest.toml
# vscode workspace directory
.vscode
.julia
**/.ipynb_checkpoints/

View File

@ -5,9 +5,15 @@ version = "0.1.0"
[deps]
AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
NumaAllocators = "21436f30-1b4a-4f08-87af-e26101bb5379"
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[extras]

View File

@ -42,7 +42,7 @@ Problems:
- Lots of testing required because mistakes will propagate and multiply.
## Other TODOs
- Reduce memory footprint of the graph, are the UUIDs too large?
- Reduce memory footprint of the graph
- Memory layout of Nodes? They should lie linearly in memory, right now probably on heap?
- Add scaling functions
@ -53,7 +53,7 @@ For graphs AB->AB^n:
- Number of ComputeTaskS2 should always be (n+1)!
- Number of ComputeTaskU should always be (n+3)
Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz
Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz (not necessarily up to date, check Jupyter Notebooks in `notebooks/` instead)
```
$ julia --project examples/import_bench.jl

View File

@ -27,6 +27,7 @@ makedocs(
"Diff" => "lib/internals/diff.md",
"Utility" => "lib/internals/utility.md",
"Code Generation" => "lib/internals/code_gen.md",
"Devices" => "lib/internals/devices.md",
],
"Contribution" => "contribution.md",
],

75
docs/src/flowchart.drawio Normal file
View File

@ -0,0 +1,75 @@
<mxfile host="Electron" modified="2023-09-17T13:34:45.840Z" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.1 Chrome/114.0.5735.134 Electron/25.6.0 Safari/537.36" etag="e0c8qLevhaP_q_R2fyC9" version="21.6.1" type="device">
<diagram name="Page-1" id="Vy0cA1nkMPfy-3cC5ahA">
<mxGraphModel dx="1185" dy="707" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="500" pageHeight="900" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="yG8qeggDCLqQ8GwY7ugi-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-1" target="yG8qeggDCLqQ8GwY7ugi-2">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-1" value="Process Generator Script" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="180" y="120" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-2" target="yG8qeggDCLqQ8GwY7ugi-3">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-2" value="Process Parser" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="180" y="220" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-6">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-12">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-3" value="Optimizer" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="180" y="320" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.75;entryDx=0;entryDy=0;exitX=0;exitY=0.75;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-6" target="yG8qeggDCLqQ8GwY7ugi-3">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-6" value="Fast Cost Estimator&lt;br&gt;(Global Metrics)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="340" y="320" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-13">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="80" y="450" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-12" value="Scheduler" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="180" y="420" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-13" target="yG8qeggDCLqQ8GwY7ugi-3">
<mxGeometry relative="1" as="geometry">
<mxPoint x="120" y="380" as="targetPoint" />
<Array as="points">
<mxPoint x="80" y="350" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-18">
<mxGeometry relative="1" as="geometry">
<Array as="points">
<mxPoint x="240" y="500" />
<mxPoint x="240" y="500" />
</Array>
</mxGeometry>
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-13" value="Accurate Cost Estimator&lt;br&gt;(Machine Specific)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="20" y="370" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-18" target="yG8qeggDCLqQ8GwY7ugi-20">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-18" value="Code Generator" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="180" y="520" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="yG8qeggDCLqQ8GwY7ugi-20" value="Executor" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
<mxGeometry x="180" y="620" width="120" height="60" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

View File

@ -0,0 +1,59 @@
# Devices
## Interface
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/interface.jl"]
Order = [:type, :constant, :function]
```
## Detect
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/detect.jl"]
Order = [:function]
```
## Measure
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/measure.jl"]
Order = [:function]
```
## Implementations
### General
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/impl.jl"]
Order = [:type, :function]
```
### NUMA
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/numa/impl.jl"]
Order = [:type, :function]
```
### CUDA
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/cuda/impl.jl"]
Order = [:type, :function]
```
### ROCm
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/rocm/impl.jl"]
Order = [:type, :function]
```
### oneAPI
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["devices/oneapi/impl.jl"]
Order = [:type, :function]
```

View File

@ -0,0 +1,21 @@
# Models
## Interface
The interface that has to be implemented for an estimator.
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["estimator/interafce.jl"]
Order = [:type, :constant, :function]
```
## Global Metric Estimator
Implementation of a global metric estimator. It uses the graph properties compute effort, data transfer, and compute intensity.
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["estimator/global_metric.jl"]
Order = [:type, :function]
```

View File

@ -1,5 +1,21 @@
# Models
## Interface
The interface that has to be implemented for a model to be usable is defined in `src/models/interface.jl`.
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["models/interface.jl"]
Order = [:type, :constant, :function]
```
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["models/print.jl"]
Order = [:function]
```
## ABC-Model
### Types
@ -44,6 +60,13 @@ Pages = ["models/abc/compute.jl"]
Order = [:function]
```
### Print
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["models/abc/print.jl"]
Order = [:function]
```
## QED-Model
*To be added*

View File

@ -0,0 +1,15 @@
# Scheduler
## Interface
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["scheduler/interface.jl"]
Order = [:type, :function]
```
## Greedy
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["scheduler/greedy.jl"]
Order = [:type, :function]
```

View File

@ -21,6 +21,13 @@ Pages = ["task/compare.jl"]
Order = [:function]
```
## Compute
```@autodocs
Modules = [MetagraphOptimization]
Pages = ["task/compute.jl"]
Order = [:function]
```
## Properties
```@autodocs
Modules = [MetagraphOptimization]

View File

@ -1,3 +1,7 @@
# Manual
This will become a manual.
## Jupyter Notebooks
In the `notebooks` directory are notebooks containing some examples of the usage of this repository.
- `abc_model_showcase`: A simple showcase of the intended usage of the ABC Model implementation.

34
examples/ab5.jl Normal file
View File

@ -0,0 +1,34 @@
using MetagraphOptimization
using BenchmarkTools
println("Getting machine info")
@time machine = get_machine_info()
println("Making model")
@time model = ABCModel()
println("Making process")
process_str = "AB->ABBBBB"
@time process = parse_process(process_str, model)
println("Parsing DAG")
@time graph = parse_dag("input/$process_str.txt", model)
println("Generating input data")
@time input_data = [gen_process_input(process) for _ in 1:1000]
include("profiling_utilities.jl")
println("Reducing graph")
@time reduce_all!(graph)
println("Generating compute function")
@time compute_func = get_compute_function(graph, process, machine)
println("First run, single argument")
@time compute_func(input_data[1])
println("\nBenchmarking function, 1 input")
display(@benchmark compute_func($(input_data[1])))
println("\nBenchmarking function, 1000 inputs")
display(@benchmark compute_func.($input_data))

34
examples/ab7.jl Normal file
View File

@ -0,0 +1,34 @@
using MetagraphOptimization
using BenchmarkTools
println("Getting machine info")
@time machine = get_machine_info()
println("Making model")
@time model = ABCModel()
println("Making process")
process_str = "AB->ABBBBBBB"
@time process = parse_process(process_str, model)
println("Parsing DAG")
@time graph = parse_dag("input/$process_str.txt", model)
println("Generating input data")
@time input_data = [gen_process_input(process) for _ in 1:1000]
include("profiling_utilities.jl")
println("Reducing graph")
@time reduce_all!(graph)
println("Generating compute function")
@time compute_func = get_compute_function(graph, process, machine)
println("First run, single argument")
@time compute_func(input_data[1])
println("\nBenchmarking function, 1 input")
display(@benchmark compute_func($(input_data[1])))
println("\nBenchmarking function, 1000 inputs")
display(@benchmark compute_func.($input_data))

View File

@ -13,16 +13,15 @@ function bench_txt(filepath::String, bench::Bool = true)
return
end
model = ABCModel()
println(name, ":")
g = parse_abc(filepath)
g = parse_dag(filepath, model)
print(g)
println(
" Graph size in memory: ",
bytes_to_human_readable(MetagraphOptimization.mem(g)),
)
println(" Graph size in memory: ", bytes_to_human_readable(MetagraphOptimization.mem(g)))
if (bench)
@btime parse_abc($filepath)
@btime parse_dag($filepath, $model)
end
println(" Get Operations: ")

View File

@ -12,7 +12,7 @@ function gen_plot(filepath)
return
end
g = parse_abc(filepath)
g = parse_dag(filepath, ABCModel())
Random.seed!(1)
@ -48,23 +48,10 @@ function gen_plot(filepath)
println("\rDone.")
plot(
[x[1], x[2]],
[y[1], y[2]],
linestyle = :solid,
linewidth = 1,
color = :red,
legend = false,
)
plot([x[1], x[2]], [y[1], y[2]], linestyle = :solid, linewidth = 1, color = :red, legend = false)
# Create lines connecting the reference point to each data point
for i in 3:length(x)
plot!(
[x[i - 1], x[i]],
[y[i - 1], y[i]],
linestyle = :solid,
linewidth = 1,
color = :red,
)
plot!([x[i - 1], x[i]], [y[i - 1], y[i]], linestyle = :solid, linewidth = 1, color = :red)
end
return gui()

View File

@ -12,7 +12,7 @@ function gen_plot(filepath)
return
end
g = parse_abc(filepath)
g = parse_dag(filepath, ABCModel())
Random.seed!(1)
@ -60,14 +60,7 @@ function gen_plot(filepath)
push!(y, props.computeEffort)
pop_operation!(g)
push!(
names,
"NF: (" *
string(props.data) *
", " *
string(props.computeEffort) *
")",
)
push!(names, "NF: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
end
for op in opt.nodeReductions
push_operation!(g, op)
@ -76,14 +69,7 @@ function gen_plot(filepath)
push!(y, props.computeEffort)
pop_operation!(g)
push!(
names,
"NR: (" *
string(props.data) *
", " *
string(props.computeEffort) *
")",
)
push!(names, "NR: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
end
for op in opt.nodeSplits
push_operation!(g, op)
@ -92,33 +78,13 @@ function gen_plot(filepath)
push!(y, props.computeEffort)
pop_operation!(g)
push!(
names,
"NS: (" *
string(props.data) *
", " *
string(props.computeEffort) *
")",
)
push!(names, "NS: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
end
plot(
[x0, x[1]],
[y0, y[1]],
linestyle = :solid,
linewidth = 1,
color = :red,
legend = false,
)
plot([x0, x[1]], [y0, y[1]], linestyle = :solid, linewidth = 1, color = :red, legend = false)
# Create lines connecting the reference point to each data point
for i in 2:length(x)
plot!(
[x0, x[i]],
[y0, y[i]],
linestyle = :solid,
linewidth = 1,
color = :red,
)
plot!([x0, x[i]], [y0, y[i]], linestyle = :solid, linewidth = 1, color = :red)
end
#scatter!(x, y, label=names)

View File

@ -1,6 +1,6 @@
function test_random_walk(g::DAG, n::Int64)
# the purpose here is to do "random" operations and reverse them again and validate that the graph stays the same and doesn't diverge
function random_walk!(g::DAG, n::Int64)
# the purpose here is to do "random" operations on the graph to simulate an optimizer
reset_graph!(g)
properties = get_properties(g)
@ -32,7 +32,7 @@ function test_random_walk(g::DAG, n::Int64)
end
end
return reset_graph!(g)
return nothing
end
function reduce_all!(g::DAG)

Binary file not shown.

View File

@ -0,0 +1,637 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"using MetagraphOptimization"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1 NUMA nodes\n",
"CUDA is non-functional\n"
]
}
],
"source": [
"# Get machine and set dictionary caching strategy\n",
"machine = get_machine_info()\n",
"MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Graph:\n",
" Nodes: Total: 7854, ComputeTaskP: 8, ComputeTaskS2: 720, \n",
" ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 1230, \n",
" ComputeTaskV: 1956, DataTask: 3931\n",
" Edges: 11241\n",
" Total Compute Effort: 33915.0\n",
" Total Data Transfer: 322464.0\n",
" Total Compute Intensity: 0.10517453111044954\n"
]
}
],
"source": [
"model = ABCModel()\n",
"process_str = \"AB->ABBBBB\"\n",
"process = parse_process(process_str, model)\n",
"graph = parse_dag(\"../input/$process_str.txt\", model)\n",
"print(graph)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"compute__ae7097a4_7bfc_11ee_2cec_190d7ced64f1 (generic function with 1 method)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compute_AB_AB5 = get_compute_function(graph, process, machine)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0.140021 seconds (791.41 k allocations: 30.317 MiB, 9.74% gc time)\n",
"Graph:\n",
" Nodes: Total: 4998, ComputeTaskP: 8, ComputeTaskS2: 720, \n",
" ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 516, \n",
" ComputeTaskV: 1242, DataTask: 2503\n",
" Edges: 7671\n",
" Total Compute Effort: 21777.0\n",
" Total Data Transfer: 219648.0\n",
" Total Compute Intensity: 0.09914499562937062\n"
]
}
],
"source": [
"include(\"../examples/profiling_utilities.jl\")\n",
"@time reduce_all!(graph)\n",
"print(graph)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 3.626740 seconds (1.52 M allocations: 114.358 MiB, 0.84% gc time)\n"
]
},
{
"data": {
"text/plain": [
"compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad (generic function with 1 method)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@time compute_AB_AB5_reduced = get_compute_function(graph, process, machine)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2.130952 seconds (4.31 M allocations: 276.129 MiB, 4.50% gc time, 99.02% compilation time)\n"
]
},
{
"data": {
"text/plain": [
"1000-element Vector{ABCProcessInput}:\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [4.694213004647641, 0.0, 0.0, 4.58646222408983]\n",
" B: [4.694213004647641, 0.0, 0.0, -4.58646222408983]\n",
" 6 Outgoing Particles:\n",
" A: [-1.1989656045893697, -0.40235742161696864, 0.06512533692021122, 0.5209469423550988]\n",
" B: [-1.2555060342925868, 0.3685683194051901, 0.4785890883121294, -0.4597882997907804]\n",
" B: [-2.189083660521547, 0.31663070338411387, 0.1742479621961443, -1.9134967776579581]\n",
" B: [-1.0637129314000269, -0.2948512505337184, 0.0500740340487307, -0.2050378784528044]\n",
" B: [-1.6149410305664367, 1.0344652685816964, -0.406159957064284, 0.6106965118475143]\n",
" B: [-2.0662167479253144, -1.0224556192203134, -0.3618764644129321, 1.4466795016989296]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [5.621657834589244, 0.0, 0.0, 5.532001157736559]\n",
" B: [5.621657834589244, 0.0, 0.0, -5.532001157736559]\n",
" 6 Outgoing Particles:\n",
" A: [-2.058801595505931, 0.7220299456693885, 0.22719930902793095, 1.6327024349806234]\n",
" B: [-1.1826215869997767, 0.04638669502532437, -0.553508153090363, -0.30011800516629]\n",
" B: [-2.3776830758041227, -0.8637209881441633, -0.22710813067439403, 1.9636152272240621]\n",
" B: [-1.9086249240920268, 0.02598092498567318, -1.087715954825374, -1.2079106316365085]\n",
" B: [-2.6526208210236426, 0.3117066248738638, 1.6178469805428013, -1.8225826038033035]\n",
" B: [-1.0629636657529868, -0.24238320241008685, 0.023285949019398133, -0.2657064215985837]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.176284774018432, 0.0, 0.0, 6.094792335245879]\n",
" B: [6.176284774018432, 0.0, 0.0, -6.094792335245879]\n",
" 6 Outgoing Particles:\n",
" A: [-3.2943110238771185, 1.9799744259594443, 2.3805040294128346, 0.5151572192390796]\n",
" B: [-1.0255775134941767, 0.18009906891836583, -0.12779691496180498, 0.05514988745120904]\n",
" B: [-1.7854209452644407, -0.56381615584479, -0.9572322565407875, 0.9764966468120639]\n",
" B: [-3.3312939695760786, -0.5949754252793171, -2.9420979921841868, -1.0428725518649993]\n",
" B: [-1.6551651824618003, -0.8748451354288965, 0.9749427327758187, -0.1539624566503731]\n",
" B: [-1.260800913363249, -0.12643677832480643, 0.6716804014981268, -0.34996874498697933]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [4.747497785190141, 0.0, 0.0, 4.640984294348053]\n",
" B: [4.747497785190141, 0.0, 0.0, -4.640984294348053]\n",
" 6 Outgoing Particles:\n",
" A: [-1.3704329562088802, 0.8292801285050307, 0.2251475790952209, 0.3737506167990253]\n",
" B: [-1.352958681672649, 0.11120507604905326, 0.6088733084867489, -0.6688825902852584]\n",
" B: [-1.4224569379606473, -0.25277059018918374, -0.4925475402927904, -0.84669220478242]\n",
" B: [-2.4534584066229996, -0.23638988525842838, -1.4120549440785204, 1.7232756047945383]\n",
" B: [-1.4378719974624208, 0.5461758322111039, 0.8131489669135029, -0.3285674953530594]\n",
" B: [-1.457816590452685, -0.9975005613175758, 0.257432629875838, -0.25288393117282576]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.148648417619223, 0.0, 0.0, 6.066784763240853]\n",
" B: [6.148648417619223, 0.0, 0.0, -6.066784763240853]\n",
" 6 Outgoing Particles:\n",
" A: [-1.5381168736188293, 0.5769721565317305, 1.0069443436143835, 0.13773066601554382]\n",
" B: [-1.3178580311796126, 0.27781510267038506, -0.8083323925420551, 0.07853217328003184]\n",
" B: [-1.5330954954905804, 0.4994081736550063, -1.0290017953406905, 0.20525247761163526]\n",
" B: [-3.083592979398096, -2.1497728433794587, -1.2247634566690573, -1.5449844205264607]\n",
" B: [-3.1391572693216845, 0.49043306139044257, 2.931865230552653, 0.13397777318202247]\n",
" B: [-1.6854761862296446, 0.30514434913189475, -0.876711929615233, 0.989491330437227]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [7.422637433466136, 0.0, 0.0, 7.35496746890785]\n",
" B: [7.422637433466136, 0.0, 0.0, -7.35496746890785]\n",
" 6 Outgoing Particles:\n",
" A: [-3.3788591199517355, 2.3069724486616927, -0.5016400230094518, 2.2006645271171985]\n",
" B: [-2.193241133599192, -1.652465184572841, -0.691853387986234, -0.7752447184070871]\n",
" B: [-2.295315825041209, 0.334376552772819, 0.5374003175214306, 1.966689593293318]\n",
" B: [-2.3721558149969235, -2.0813404180022568, 0.4923496733367945, 0.22964554029865022]\n",
" B: [-1.5367714331999278, 0.9008878309070798, 0.1482895506792473, -0.7266895920420517]\n",
" B: [-3.068931540143284, 0.1915687702335065, 0.015453869458212284, -2.8950653502600274]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.716486802754837, 0.0, 0.0, 6.64162592830851]\n",
" B: [6.716486802754837, 0.0, 0.0, -6.64162592830851]\n",
" 6 Outgoing Particles:\n",
" A: [-1.3263331205917814, -0.5023870926274977, 0.418137178911541, 0.5761319775467438]\n",
" B: [-2.1603199304697136, -1.202627416523187, 1.024176720111292, -1.0824654936733602]\n",
" B: [-1.1665818595303201, 0.5747508534091106, 0.05041215840441908, 0.16743149576984034]\n",
" B: [-1.829760754209137, 0.5127529745920416, -0.17835468593467171, -1.4329334983509001]\n",
" B: [-2.891550940379351, -2.652621236308268, 0.3953841214715819, 0.41029113320086874]\n",
" B: [-4.05842700032937, 3.2701319174577996, -1.7097554929641623, 1.3615443855068068]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [7.700331598721008, 0.0, 0.0, 7.635123229539995]\n",
" B: [7.700331598721008, 0.0, 0.0, -7.635123229539995]\n",
" 6 Outgoing Particles:\n",
" A: [-2.382743739041896, -1.410381415274026, 1.0613871843128353, 1.2496996576655786]\n",
" B: [-3.021630369232257, 0.25595209564405125, -2.8389223073732714, 0.07251720968504605]\n",
" B: [-2.7262381500229256, 1.0736489469437192, 2.293577756890956, 0.13839603484966886]\n",
" B: [-2.222260574660266, 1.5432031708495264, -0.7055857379280247, 1.0291330339668954]\n",
" B: [-1.650055097318715, -1.062833285640475, -0.34598865120359784, 0.6880109623839291]\n",
" B: [-3.397735267165956, -0.3995895125227963, 0.5355317553011019, -3.1777568985511193]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [4.9341647451125334, 0.0, 0.0, 4.8317679716550375]\n",
" B: [4.9341647451125334, 0.0, 0.0, -4.8317679716550375]\n",
" 6 Outgoing Particles:\n",
" A: [-1.834221818900379, 0.1070495973399568, 1.2695354794210922, 0.860923766155068]\n",
" B: [-1.5116322118250454, 0.39753882899610743, -0.756426277560466, -0.7448584495617266]\n",
" B: [-1.6588475476725886, 0.06712527283179799, 0.6875031760830096, -1.1289857249063835]\n",
" B: [-1.5718164783029667, 0.4294130824657117, -0.6215317131811225, -0.9486357444151968]\n",
" B: [-1.7838526603309615, -0.5732435925039472, -0.9425541080554634, 0.9824020820472578]\n",
" B: [-1.5079587731931232, -0.4278831891296266, 0.36347344329295106, 0.979154070680981]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [7.099667747066588, 0.0, 0.0, 7.028889109862067]\n",
" B: [7.099667747066588, 0.0, 0.0, -7.028889109862067]\n",
" 6 Outgoing Particles:\n",
" A: [-3.851129225519823, 2.5555470019017212, -2.502060728335724, 1.019837214678957]\n",
" B: [-2.3860288930086897, 0.6059782347076652, 0.6711053982516709, 1.9686395814801452]\n",
" B: [-1.9543999030878276, -1.5857282951514855, 0.5255033921941499, -0.17026726032362857]\n",
" B: [-1.5523812781985644, -1.154244859738803, 0.03484928145183679, -0.2763909626783212]\n",
" B: [-3.2795110937910716, -1.0290377989842119, 1.3607888704851536, -2.616204860580336]\n",
" B: [-1.175885100527199, 0.6074857172651138, -0.09018621404708665, 0.07438628742318319]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.3653048194550985, 0.0, 0.0, 6.286263233796236]\n",
" B: [6.3653048194550985, 0.0, 0.0, -6.286263233796236]\n",
" 6 Outgoing Particles:\n",
" A: [-3.274142279992413, -2.62046758782023, -1.339558866223036, 1.028950598785383]\n",
" B: [-1.8502190446152251, -1.1967169760014287, 0.8476370040459147, 0.5221977611776395]\n",
" B: [-1.3090919645484567, 0.8304076910302604, -0.132118345313184, 0.08178985973111547]\n",
" B: [-1.7699077332157842, 0.8156249668276708, -0.2891156025546255, 1.1763254081859622]\n",
" B: [-1.6671330761442815, 1.2573648831500233, 0.2190135291489001, -0.3878135096217862]\n",
" B: [-2.8601155403940384, 0.913787022813704, 0.6941422808960306, -2.421450118258315]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [5.2620105860572215, 0.0, 0.0, 5.166116085395126]\n",
" B: [5.2620105860572215, 0.0, 0.0, -5.166116085395126]\n",
" 6 Outgoing Particles:\n",
" A: [-1.9479176369516882, 0.8861257045164052, 1.1018829783040076, 0.8916379636750793]\n",
" B: [-1.2433791528628988, 0.41365857789168176, 0.544699730060495, -0.27960776595565956]\n",
" B: [-1.074755543453127, 0.3002469943380598, 0.01041159782849033, 0.25464253219924826]\n",
" B: [-1.7453891507499704, 1.1576089006622574, 0.03134512003430503, -0.8398466551182168]\n",
" B: [-1.5208938996272057, 0.008686514238768405, -1.1440782944999142, -0.06424682441800389]\n",
" B: [-2.991685788469555, -2.7663266916471727, -0.544261131727384, 0.03742074961755215]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [4.439668869119513, 0.0, 0.0, 4.325582003318043]\n",
" B: [4.439668869119513, 0.0, 0.0, -4.325582003318043]\n",
" 6 Outgoing Particles:\n",
" A: [-1.1969832203303146, 0.48265768801558717, -0.02482335564392214, 0.4463117598342591]\n",
" B: [-1.7251727113760817, -1.0744400415092346, 0.6322269398265393, 0.6496834443295479]\n",
" B: [-1.419669052608684, -0.4173084301546306, -0.44626125418717505, -0.8013518491074973]\n",
" B: [-1.331289111993432, -0.7645577006899625, -0.3423664341778722, 0.2656453402118452]\n",
" B: [-1.5156451020746182, 0.6491857388484042, 0.8955487542892042, -0.2715333876518423]\n",
" B: [-1.6905785398558963, 1.1244627454898357, -0.7143246501067739, -0.2887553076163127]\n",
"\n",
" ⋮\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [5.750717080737416, 0.0, 0.0, 5.663104002460582]\n",
" B: [5.750717080737416, 0.0, 0.0, -5.663104002460582]\n",
" 6 Outgoing Particles:\n",
" A: [-1.0362067302993534, 0.23737037129807034, 0.1316212944823847, 0.007451817649030921]\n",
" B: [-3.597917991072113, -1.5787159301449987, 0.28387609057144564, 3.0613860010767477]\n",
" B: [-1.0798303035395174, -0.06880694215947386, -0.2669312876106363, -0.3000779512850572]\n",
" B: [-1.3394551212059678, -0.7053379424304421, 0.44160810884651497, -0.3187799976376953]\n",
" B: [-3.270241523195321, 1.927780354010675, 0.003047457202140131, -2.4450221348130854]\n",
" B: [-1.1777824921625586, 0.1877100894261692, -0.5932216634918489, -0.004957734989940532]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.84577391627276, 0.0, 0.0, 6.772342320993563]\n",
" B: [6.84577391627276, 0.0, 0.0, -6.772342320993563]\n",
" 6 Outgoing Particles:\n",
" A: [-1.0594956991232163, -0.09579189209396338, 0.21296650876679918, 0.2607687021353065]\n",
" B: [-1.8300488673592041, 0.8497425690197566, -0.8227483588311224, 0.9747315329664396]\n",
" B: [-2.860723394379955, 0.6743651794772785, 0.1320397309862766, 2.5906631300310776]\n",
" B: [-2.557528905485892, -1.3508678766931497, 1.2829278224554168, -1.4388211440218013]\n",
" B: [-3.790115184858299, 0.47588521284738383, -1.0334447791446917, -3.474262262286086]\n",
" B: [-1.5936357813389537, -0.553333192557306, 0.2282590757673212, 1.086920041175065]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.25909007687458, 0.0, 0.0, 6.178689876537731]\n",
" B: [6.25909007687458, 0.0, 0.0, -6.178689876537731]\n",
" 6 Outgoing Particles:\n",
" A: [-2.15208406752572, -0.27987613820502405, 0.20983197963180572, -1.873260718983155]\n",
" B: [-3.1436326945514232, -2.0821664144960677, -1.9679549582157083, 0.8210741885063981]\n",
" B: [-2.206056617746511, 1.7689323832663284, -0.4273996865759156, -0.7449117612507478]\n",
" B: [-1.8709609004510535, 0.5332842722412897, 1.48760475220818, -0.055988188078690854]\n",
" B: [-1.0916331546903268, 0.018218872767661307, 0.4300802089857822, 0.07976234031782706]\n",
" B: [-2.0538127187841235, 0.04160702442581186, 0.2678377039658561, 1.7733241394883685]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.8752382625158255, 0.0, 0.0, 6.802124753807565]\n",
" B: [6.8752382625158255, 0.0, 0.0, -6.802124753807565]\n",
" 6 Outgoing Particles:\n",
" A: [-3.815955448364548, 1.7284392485789066, 3.22998101457395, -0.37581430702794955]\n",
" B: [-3.705003390432734, 0.8773209536576554, -3.1633610279519866, -1.3966048382509024]\n",
" B: [-1.4798429985544235, -0.876885056483666, -0.05155962504198175, 0.6467994303891397]\n",
" B: [-1.196598159149068, -0.6492448407423084, 0.0066213036625077295, -0.10141227532326653]\n",
" B: [-1.307725757451199, -0.47623875265044, -0.08939192779758245, -0.6894580410872709]\n",
" B: [-2.2453507710796776, -0.6033915523601473, 0.06771026255509205, 1.91649003130025]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.591382068439754, 0.0, 0.0, 6.515083849970707]\n",
" B: [6.591382068439754, 0.0, 0.0, -6.515083849970707]\n",
" 6 Outgoing Particles:\n",
" A: [-2.166341377746586, 0.738656605699622, 1.1097711420427974, -1.3841348908550482]\n",
" B: [-1.9136122405957643, -1.3687809690739081, -0.8052302154690981, 0.37410528752561706]\n",
" B: [-1.020282522629639, 0.01566959851558055, -0.04103060943002397, -0.1976040959992001]\n",
" B: [-3.3680104240574718, -0.44221430614525714, -3.1855463435158966, -0.015336796039828009]\n",
" B: [-1.1380460439601876, 0.33787512483866744, -0.3053034033656307, 0.2962752606648943]\n",
" B: [-3.576471527889859, 0.7187939461652956, 3.227339429737853, 0.9266952347035636]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [7.366791305680796, 0.0, 0.0, 7.298603574756898]\n",
" B: [7.366791305680796, 0.0, 0.0, -7.298603574756898]\n",
" 6 Outgoing Particles:\n",
" A: [-1.1161936134323496, 0.1815174250263101, -0.30155987378038246, 0.34928677273057857]\n",
" B: [-1.1768168637671912, -0.488638136596838, -0.0387546058981897, 0.38030091090042567]\n",
" B: [-3.8756829146246745, -0.22123631639903027, -3.6727532274395425, -0.694878606198396]\n",
" B: [-1.4161987387916468, -0.42653096897021076, -0.26480462532703347, -0.8680833546784509]\n",
" B: [-3.4638938410201177, 2.8217659294852746, 1.2824429941168167, 1.179634497585545]\n",
" B: [-3.6847966397256138, -1.8668779325455054, 2.995429338328331, -0.346260220339702]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [4.762032860651893, 0.0, 0.0, 4.655851905497903]\n",
" B: [4.762032860651893, 0.0, 0.0, -4.655851905497903]\n",
" 6 Outgoing Particles:\n",
" A: [-2.656166654414924, 2.017338594394486, -1.384735065574992, 0.2609120345236529]\n",
" B: [-1.031990140619295, -0.035004877965791346, -0.20112979442869375, 0.15272561883031827]\n",
" B: [-1.7319386082994335, -1.0359644740176492, 0.8025718625008718, -0.5312883934487891]\n",
" B: [-1.7450617894727098, -0.49163856285061436, 1.1666756465784553, 0.6651316473275205]\n",
" B: [-1.0945973465763637, -0.42438631366397905, -0.017047995524507212, 0.1332252744613839]\n",
" B: [-1.2643111819210613, -0.030344365896452122, -0.3663346535511349, -0.6807061816940867]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.12211537837656, 0.0, 0.0, 6.039892110473065]\n",
" B: [6.12211537837656, 0.0, 0.0, -6.039892110473065]\n",
" 6 Outgoing Particles:\n",
" A: [-2.09449973649211, -1.247911941781509, -0.776547530016726, 1.1075282684200622]\n",
" B: [-2.857971140758051, 1.4507115887866229, 2.2078617054725442, 0.43449006556414854]\n",
" B: [-2.068918524386865, -0.43350532192333185, 1.7407499017717505, -0.24957318745593]\n",
" B: [-1.0503370840395667, 0.28162676024293815, -0.11219953076948735, 0.10632790470480236]\n",
" B: [-1.6648953051752136, 0.3171875953909028, -1.2925202016854087, 0.025689195388605857]\n",
" B: [-2.5076089659013125, -0.36810868071562286, -1.7673443447726724, -1.4244622466216894]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [7.431058837653249, 0.0, 0.0, 7.363466265874004]\n",
" B: [7.431058837653249, 0.0, 0.0, -7.363466265874004]\n",
" 6 Outgoing Particles:\n",
" A: [-1.4340725727125623, 0.9525417282027518, 0.38239995291064965, -0.05476016666222433]\n",
" B: [-3.5734117962040854, 2.3267511116139916, 2.49915109639257, -0.33127771922267657]\n",
" B: [-2.3529075757582945, 1.185265706342765, -1.375530715171772, 1.1132091075119688]\n",
" B: [-2.710381815585542, -2.1195780947035594, -1.2974231675570782, -0.4126153305389483]\n",
" B: [-2.374272199256637, -1.2400410368129877, 1.6839473809113144, -0.5136028830766439]\n",
" B: [-2.4170717157893766, -1.104939414642962, -1.8925445474856835, 0.1990469919885247]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [4.370360958267613, 0.0, 0.0, 4.254415930013168]\n",
" B: [4.370360958267613, 0.0, 0.0, -4.254415930013168]\n",
" 6 Outgoing Particles:\n",
" A: [-1.0037967551530176, -0.04979456910726583, -0.007092097585518878, 0.07126098999442977]\n",
" B: [-2.2427356029926337, 0.4432886498747459, -1.2315068062419472, -1.522087101319342]\n",
" B: [-1.576810353663218, -0.08400160217698217, 1.025238316808337, 0.6543401378482231]\n",
" B: [-1.1878570602356244, 0.3852696171578499, -0.47734716319323317, 0.18630996601909597]\n",
" B: [-1.6436772930583505, -1.0018521094453126, 0.4216069097815019, 0.7212593210074284]\n",
" B: [-1.0858448514323804, 0.3070900136969648, 0.26910084043086047, -0.11108331354983517]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [5.940760429560125, 0.0, 0.0, 5.855991332082674]\n",
" B: [5.940760429560125, 0.0, 0.0, -5.855991332082674]\n",
" 6 Outgoing Particles:\n",
" A: [-2.5515863925730233, 0.0574036477190863, 1.9321385747234918, 1.3319678930281418]\n",
" B: [-3.2707523737124977, -2.710802011299676, -1.41016923110446, -0.6006632045712658]\n",
" B: [-1.6965910302662786, 0.9846458960035911, 0.9504416414719069, -0.07452697242920955]\n",
" B: [-1.0283520810617242, 0.1620200166783027, 0.15874691422324994, -0.07782630689000514]\n",
" B: [-1.277724475991329, 0.26836143674120055, -0.33222621981983513, -0.6709602929248032]\n",
" B: [-2.0565145055153993, 1.2383710141574962, -1.298931679494354, 0.09200888378714224]\n",
"\n",
" Input for ABC Process: 'AB->ABBBBB':\n",
" 2 Incoming particles:\n",
" A: [6.732994664701373, 0.0, 0.0, 6.65831939417877]\n",
" B: [6.732994664701373, 0.0, 0.0, -6.65831939417877]\n",
" 6 Outgoing Particles:\n",
" A: [-1.602557260532173, -0.06659157948757613, 0.9308846463293637, -0.8349904850080558]\n",
" B: [-1.3205375883536927, 0.7078592481114431, -0.05631226213188625, -0.48947291677035515]\n",
" B: [-1.7625153098951976, 0.12706601232750347, 0.34097061443470383, 1.405010137407617]\n",
" B: [-2.7792473938949334, 1.6510422215054068, 1.7155538904747691, -1.0272051928194055]\n",
" B: [-2.722083339444658, -0.5204063912580275, -2.061236049180356, -1.3748530264647703]\n",
" B: [-3.279048437282091, -1.89896951119875, -0.8698608399265956, 2.3215114836549695]\n"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@time inputs = [gen_process_input(process) for _ in 1:1000]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Internal error: stack overflow in type inference of materialize(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(MetagraphOptimization.compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad), Tuple{Array{MetagraphOptimization.ABCProcessInput, 1}}}).\n",
"This might be caused by recursion over very long tuples or argument lists.\n"
]
},
{
"ename": "LoadError",
"evalue": "StackOverflowError:",
"output_type": "error",
"traceback": [
"StackOverflowError:",
"",
"Stacktrace:",
" [1] argtypes_to_type",
" @ ./compiler/typeutils.jl:71 [inlined]",
" [2] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1948",
" [3] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
" [4] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1566",
" [5] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1855",
" [6] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
" [7] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
" [8] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
" [9] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
" [10] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2682",
" [11] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
" [12] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
" [13] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:246",
" [14] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:216",
" [15] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:932",
" [16] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:611",
" [17] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:152",
" [18] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1949",
"--- the last 16 lines are repeated 413 more times ---",
" [6627] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
" [6628] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1566",
" [6629] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1855",
" [6630] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
" [6631] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
" [6632] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
" [6633] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
" [6634] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2658",
" [6635] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
" [6636] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
" [6637] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:246",
" [6638] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:216",
" [6639] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:932",
" [6640] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:611",
" [6641] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:152",
" [6642] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1949",
" [6643] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
" [6644] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
" [6645] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
" [6646] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
" [6647] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2682",
" [6648] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
" [6649] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
" [6650] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:246",
" [6651] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
" @ Core.Compiler ./compiler/typeinfer.jl:216",
" [6652] typeinf",
" @ ./compiler/typeinfer.jl:12 [inlined]",
" [6653] typeinf_type(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector)",
" @ Core.Compiler ./compiler/typeinfer.jl:1079",
" [6654] return_type(interp::Core.Compiler.NativeInterpreter, t::DataType)",
" @ Core.Compiler ./compiler/typeinfer.jl:1140",
" [6655] return_type(f::Any, t::DataType)",
" @ Core.Compiler ./compiler/typeinfer.jl:1112",
" [6656] combine_eltypes(f::Function, args::Tuple{Vector{ABCProcessInput}})",
" @ Base.Broadcast ./broadcast.jl:730",
" [6657] copy(bc::Base.Broadcast.Broadcasted{Style}) where Style",
" @ Base.Broadcast ./broadcast.jl:895",
" [6658] materialize(bc::Base.Broadcast.Broadcasted)",
" @ Base.Broadcast ./broadcast.jl:873",
" [6659] var\"##core#302\"()",
" @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:489",
" [6660] var\"##sample#303\"(::Tuple{}, __params::BenchmarkTools.Parameters)",
" @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:495",
" [6661] _run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; verbose::Bool, pad::String, kwargs::Base.Pairs{Symbol, Integer, NTuple{4, Symbol}, NamedTuple{(:samples, :evals, :gctrial, :gcsample), Tuple{Int64, Int64, Bool, Bool}}})",
" @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:99",
" [6662] #invokelatest#2",
" @ ./essentials.jl:821 [inlined]",
" [6663] invokelatest",
" @ ./essentials.jl:816 [inlined]",
" [6664] #run_result#45",
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]",
" [6665] run_result",
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]",
" [6666] run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; progressid::Nothing, nleaves::Float64, ndone::Float64, kwargs::Base.Pairs{Symbol, Integer, NTuple{5, Symbol}, NamedTuple{(:verbose, :samples, :evals, :gctrial, :gcsample), Tuple{Bool, Int64, Int64, Bool, Bool}}})",
" @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117",
" [6667] run (repeats 2 times)",
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117 [inlined]",
" [6668] #warmup#54",
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:169 [inlined]",
" [6669] warmup(item::BenchmarkTools.Benchmark)",
" @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:168"
]
}
],
"source": [
"using BenchmarkTools\n",
"#compute_bench = @benchmark compute_AB_AB5.(inputs)\n",
"compute_bench_reduced = @benchmark compute_AB_AB5_reduced.(inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.9.3",
"language": "julia",
"name": "julia-1.9"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.9.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,409 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "20768e45-df62-4638-ba33-b0ccf239f1aa",
"metadata": {},
"outputs": [],
"source": [
"using Revise\n",
"using MetagraphOptimization\n",
"using BenchmarkTools"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ff5f4a49",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1 NUMA nodes\n",
"CUDA is non-functional\n"
]
},
{
"data": {
"text/plain": [
"Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, MetagraphOptimization.LocalVariables(), -1.0, UUID(\"a89974f6-6212-11ee-0866-0f591a3b69ea\"))], [-1.0;;])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get our machine's info\n",
"machine = get_machine_info()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "9df482a4-ca44-44c5-9ea7-7a2977d529be",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ABCModel()"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a model identifier\n",
"model = ABCModel()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "30b16872-07f7-4d47-8ff8-8c3a849c9d4e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ABC Process: 'AB->ABBB'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a process in our model\n",
"process_str = \"AB->ABBB\"\n",
"process = parse_process(process_str, model)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total: 280, ComputeTaskP"
]
},
{
"data": {
"text/plain": [
"Graph:\n",
" Nodes: \n",
" Edges: 385\n",
" Total Compute Effort: 1075.0\n",
" Total Data Transfer: 10944.0\n",
" Total Compute Intensity: 0.09822733918128655\n"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
": 6, ComputeTaskU: 6, \n",
" ComputeTaskV: 64, ComputeTaskSum: 1, ComputeTaskS2: 24, \n",
" ComputeTaskS1: 36, DataTask: 143"
]
}
],
"source": [
"# Read the graph (of the same process) from a file\n",
"graph = parse_dag(\"../input/$process_str.txt\", model)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Input for ABC Process: 'AB->ABBB':\n",
" 2 Incoming particles:\n",
" A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n",
" B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n",
" 4 Outgoing Particles:\n",
" A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n",
" B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n",
" B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n",
" B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Generate some random input data for our process\n",
"input_data = gen_process_input(process)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"compute__af4450a2_6212_11ee_2601_cde7cf2aedc1 (generic function with 1 method)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Get the function computing the result of the process from a ProcessInput\n",
"AB_AB3_compute = get_compute_function(graph, process, machine)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-1.8924431710735022e-13"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Actually compute a result using the generated function and the input data\n",
"result = AB_AB3_compute(input_data)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "80c70010",
"metadata": {},
"outputs": [],
"source": [
"include(\"../examples/profiling_utilities.jl\")\n",
"\n",
"# We can also mute the graph by applying some operations to it\n",
"reduce_all!(graph)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "5b192b44",
"metadata": {},
"outputs": [],
"source": [
"# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n",
"@assert result ≈ execute(graph, process, machine, input_data)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9b2f4a3f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1000-element Vector{Float64}:\n",
" -2.1491995259940396e-11\n",
" -1.04995646459455e-11\n",
" 5.821760691187782e-15\n",
" -6.556969485683705e-14\n",
" -1.3588086164373753e-14\n",
" -1.8789662441593694e-13\n",
" -2.131973301835892e-13\n",
" -5.3359759072004825e-12\n",
" -9.053914191490223e-13\n",
" -5.61107901706923e-13\n",
" -5.063492275603428e-11\n",
" 2.9168508985811397e-15\n",
" -1.6420151378194157e-13\n",
" ⋮\n",
" 1.0931677247833436e-13\n",
" -7.704755306462797e-16\n",
" -1.8385907037491397e-12\n",
" -6.036215596560059e-14\n",
" -9.98872401400362e-12\n",
" 3.4861755637292935e-13\n",
" -1.1051119822969222e-10\n",
" -2.496572513216201e-12\n",
" -3.8682427847201926e-11\n",
" 7.904149696653438e-15\n",
" -7.606811743178716e-11\n",
" -5.100594937480292e-13"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Now we can generate a function and use it on lots of inputs\n",
"inputs = [gen_process_input(process) for _ in 1:1000]\n",
"AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n",
"\n",
"results = AB_AB3_reduced_compute.(inputs)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d43e4ff0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 879 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m4.567 ms\u001b[22m\u001b[39m … \u001b[35m14.334 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 54.51%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m4.998 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m5.686 ms\u001b[22m\u001b[39m ± \u001b[32m 1.414 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m9.09% ± 14.49%\n",
"\n",
" \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▇\u001b[39m█\u001b[34m▅\u001b[39m\u001b[39m▄\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▁\u001b[39m▄\u001b[39m \u001b[39m▇\n",
" 4.57 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 10 ms \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143006\u001b[39m."
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@benchmark results = AB_AB3_compute.($inputs)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e18d9546",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 1089 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m3.637 ms\u001b[22m\u001b[39m … \u001b[35m10.921 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 59.52%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m4.098 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 0.00%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m4.587 ms\u001b[22m\u001b[39m ± \u001b[32m 1.334 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m10.21% ± 15.77%\n",
"\n",
" \u001b[39m \u001b[39m▂\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[32m▆\u001b[39m\u001b[39m▅\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m \u001b[39m█\n",
" 3.64 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 8.78 ms \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123006\u001b[39m."
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@benchmark results = AB_AB3_reduced_compute.($inputs)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "13efed12-3547-400b-a7a2-5dfae9a973a2",
"metadata": {},
"outputs": [],
"source": [
"# Set a different caching strategy\n",
"MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "ef62716b-a219-4f6e-9150-f984d3734839",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BenchmarkTools.Trial: 331 samples with 1 evaluation.\n",
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m12.148 ms\u001b[22m\u001b[39m … \u001b[35m24.164 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 13.35%\n",
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m15.412 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m17.47%\n",
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m15.117 ms\u001b[22m\u001b[39m ± \u001b[32m 2.194 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m12.31% ± 8.95%\n",
"\n",
" \u001b[39m \u001b[39m▄\u001b[39m█\u001b[39m▄\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m▄\u001b[39m\u001b[39m▄\u001b[34m▂\u001b[39m\u001b[39m \u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
" \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▅\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[32m█\u001b[39m\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▃\n",
" 12.1 ms\u001b[90m Histogram: frequency by time\u001b[39m 21 ms \u001b[0m\u001b[1m<\u001b[22m\n",
"\n",
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m27.46 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m118013\u001b[39m."
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ... and bench again\n",
"AB_AB3_reduced_dict_compute = get_compute_function(graph, process, machine)\n",
"@benchmark results = AB_AB3_reduced_dict_compute.($inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5461ffd4-6a0e-4f1f-b1f1-3a2854a8ae88",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.9.3",
"language": "julia",
"name": "julia-1.9"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.9.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

70
notebooks/profiling.ipynb Normal file
View File

@ -0,0 +1,70 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"using Revise; using MetagraphOptimization; using BenchmarkTools; using ProfileView\n",
"using Base.Threads\n",
"nthreads()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = ABCModel()\n",
"process_str = \"AB->ABBBBB\"\n",
"process = parse_process(process_str, model)\n",
"graph = parse_dag(\"../input/$process_str.txt\", model)\n",
"print(graph)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"include(\"../examples/profiling_utilities.jl\")\n",
"@ProfileView.profview reduce_all!(graph)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"@ProfileView.profview comp_func = get_compute_function(graph, process)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.9.3",
"language": "julia",
"name": "julia-1.9"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.9.3"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -6,20 +6,20 @@ julia --project=./examples -t 4 -e 'import Pkg; Pkg.instantiate()'
#for i in $(seq $minthreads $maxthreads)
# printf "(AB->AB, $i) "
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->AB.txt"))'
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->AB.txt"), ABCModel())'
#end
#for i in $(seq $minthreads $maxthreads)
# printf "(AB->ABBB, $i) "
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBB.txt"))'
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBB.txt"), ABCModel())'
#end
#for i in $(seq $minthreads $maxthreads)
# printf "(AB->ABBBBB, $i) "
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBB.txt"))'
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBB.txt"), ABCModel())'
#end
for i in $(seq $minthreads $maxthreads)
printf "(AB->ABBBBBBB, $i) "
julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBBBB.txt"))'
julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBBBB.txt"), ABCModel())'
end

View File

@ -5,6 +5,7 @@ A module containing tools to work on DAGs.
"""
module MetagraphOptimization
# graph types
export DAG
export Node
export Edge
@ -18,6 +19,7 @@ export FusedComputeTask
export PossibleOperations
export GraphProperties
# graph functions
export make_node
export make_edge
export insert_node
@ -27,10 +29,13 @@ export is_exit_node
export parents
export children
export compute
export data
export compute_effort
export get_properties
export get_exit_node
export is_valid
export is_valid, is_scheduled
# graph operation related
export Operation
export AppliedOperation
export NodeFusion
@ -42,7 +47,10 @@ export can_pop
export reset_graph!
export get_operations
export parse_abc
# ABC model
export ParticleValue
export ParticleA, ParticleB, ParticleC
export ABCProcessDescription, ABCProcessInput, ABCModel
export ComputeTaskP
export ComputeTaskS1
export ComputeTaskS2
@ -50,10 +58,19 @@ export ComputeTaskV
export ComputeTaskU
export ComputeTaskSum
# code generation related
export execute
export gen_particles
export ParticleValue
export Particle
export parse_dag, parse_process
export gen_process_input
export get_compute_function
# estimator
export cost_type, graph_cost, operation_effect
export GlobalMetricEstimator, CDCost
# machine info
export Machine
export get_machine_info
export ==, in, show, isempty, delete!, length
@ -72,6 +89,7 @@ import Base.insert!
import Base.collect
include("devices/interface.jl")
include("task/type.jl")
include("node/type.jl")
include("diff/type.jl")
@ -111,15 +129,37 @@ include("properties/utility.jl")
include("task/create.jl")
include("task/compare.jl")
include("task/compute.jl")
include("task/print.jl")
include("task/properties.jl")
include("estimator/interface.jl")
include("estimator/global_metric.jl")
include("models/interface.jl")
include("models/print.jl")
include("models/abc/types.jl")
include("models/abc/particle.jl")
include("models/abc/compute.jl")
include("models/abc/create.jl")
include("models/abc/properties.jl")
include("models/abc/parse.jl")
include("models/abc/print.jl")
include("devices/measure.jl")
include("devices/detect.jl")
include("devices/impl.jl")
include("devices/numa/impl.jl")
include("devices/cuda/impl.jl")
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
# include("devices/rocm/impl.jl")
# oneapi seems also broken for now
# include("devices/oneapi/impl.jl")
include("scheduler/interface.jl")
include("scheduler/greedy.jl")
include("code_gen/main.jl")

View File

@ -1,126 +1,158 @@
using DataStructures
"""
gen_code(graph::DAG)
Generate the code for a given graph. The return value is a tuple of:
Generate the code for a given graph. The return value is a named tuple of:
- `code::Expr`: The julia expression containing the code for the whole graph.
- `inputSymbols::Dict{String, Symbol}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
- `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
- `outputSymbol::Symbol`: The symbol of the final calculated value
See also: [`execute`](@ref)
"""
function gen_code(graph::DAG)
code = Vector{Expr}()
sizehint!(code, length(graph.nodes))
function gen_code(graph::DAG, machine::Machine)
sched = schedule_dag(GreedyScheduler(), graph, machine)
nodeQueue = PriorityQueue{Node, Int}()
inputSyms = Dict{String, Symbol}()
codeAcc = Vector{Expr}()
sizehint!(codeAcc, length(graph.nodes))
# use a priority equal to the number of unseen children -> 0 are nodes that can be added
for node in sched
# TODO: this is kind of ugly, should init nodes be scheduled differently from the rest?
if (node isa DataTaskNode && length(node.children) == 0)
push!(codeAcc, get_init_expression(node, entry_device(machine)))
continue
end
push!(codeAcc, get_expression(node))
end
# get inSymbols
inputSyms = Dict{String, Vector{Symbol}}()
for node in get_entry_nodes(graph)
enqueue!(nodeQueue, node => 0)
push!(inputSyms, node.name => Symbol("data_$(to_var_name(node.id))_in"))
if !haskey(inputSyms, node.name)
inputSyms[node.name] = Vector{Symbol}()
end
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
end
node = nothing
while !isempty(nodeQueue)
@assert peek(nodeQueue)[2] == 0
node = dequeue!(nodeQueue)
# get outSymbol
outSym = Symbol(to_var_name(get_exit_node(graph).id))
push!(code, get_expression(node))
for parent in node.parents
# reduce the priority of all parents by one
if (!haskey(nodeQueue, parent))
enqueue!(nodeQueue, parent => length(parent.children) - 1)
else
nodeQueue[parent] = nodeQueue[parent] - 1
end
return (code = Expr(:block, codeAcc...), inputSymbols = inputSyms, outputSymbol = outSym)
end
function gen_cache_init_code(machine::Machine)
initializeCaches = Vector{Expr}()
for device in machine.devices
push!(initializeCaches, gen_cache_init_code(device))
end
return Expr(:block, initializeCaches...)
end
function gen_input_assignment_code(
inputSymbols::Dict{String, Vector{Symbol}},
processDescription::AbstractProcessDescription,
machine::Machine,
processInputSymbol::Symbol = :input,
)
@assert length(inputSymbols) >=
sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description"
assignInputs = Vector{Expr}()
for (name, symbols) in inputSymbols
type = type_from_name(name)
index = parse(Int, name[2:end])
p = nothing
if (index > in_particles(processDescription)[type])
index -= in_particles(processDescription)[type]
@assert index <= out_particles(processDescription)[type] "Too few particles of type $type in input particles for this process"
p = "filter(x -> typeof(x) <: $type, out_particles($(processInputSymbol)))[$(index)]"
else
p = "filter(x -> typeof(x) <: $type, in_particles($(processInputSymbol)))[$(index)]"
end
for symbol in symbols
# TODO: how to get the "default" cpu device?
device = entry_device(machine)
evalExpr = eval(gen_access_expr(device, symbol))
push!(assignInputs, Meta.parse("$(evalExpr)::ParticleValue{$type} = ParticleValue($p, 1.0)"))
end
end
# node is now the last node we looked at -> the output node
outSym = Symbol("data_$(to_var_name(node.id))")
return Expr(:block, assignInputs...)
end
return (
code = Expr(:block, code...),
inputSymbols = inputSyms,
outputSymbol = outSym,
"""
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
"""
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
initCaches = gen_cache_init_code(machine)
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
functionId = to_var_name(UUIDs.uuid1(rng[1]))
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
expr = Meta.parse(
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
)
func = eval(expr)
return func
end
"""
execute(generated_code, input::Dict{ParticleType, Vector{Particle}})
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles.
Execute the code of the given `graph` on the given input particles.
This is essentially shorthand for
```julia
compute_graph = get_compute_function(graph, process)
result = compute_graph(particles)
```
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
"""
function execute(generated_code, input::Dict{ParticleType, Vector{Particle}})
(code, inputSymbols, outputSymbol) = generated_code
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
assignInputs = Vector{Expr}()
for (name, symbol) in inputSymbols
type = nothing
if startswith(name, "A")
type = A
elseif startswith(name, "B")
type = B
else
type = C
initCaches = gen_cache_init_code(machine)
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
functionId = to_var_name(UUIDs.uuid1(rng[1]))
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
expr = Meta.parse(
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
)
func = eval(expr)
result = 0
try
result = @eval $func($input)
catch e
println("Error while evaluating: $e")
# if we find a uuid in the exception we can color it in so it's easier to spot
uuidRegex = r"[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}"
m = match(uuidRegex, string(e))
functionStr = string(expr)
if (isa(m, RegexMatch))
functionStr = replace(functionStr, m.match => "\033[31m$(m.match)\033[0m")
end
index = parse(Int, name[2:end])
push!(
assignInputs,
Meta.parse(
"$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)",
),
)
println("Function:\n$functionStr")
@assert false
end
assignInputs = Expr(:block, assignInputs...)
eval(assignInputs)
eval(code)
eval(Meta.parse("result = $outputSymbol"))
return result
end
"""
execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}})
Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles.
The input particles should be sorted correctly into the dictionary to their according [`ParticleType`](@ref)s.
See also: [`gen_particles`](@ref)
"""
function execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}})
(code, inputSymbols, outputSymbol) = gen_code(graph)
assignInputs = Vector{Expr}()
for (name, symbol) in inputSymbols
type = nothing
if startswith(name, "A")
type = A
elseif startswith(name, "B")
type = B
else
type = C
end
index = parse(Int, name[2:end])
push!(
assignInputs,
Meta.parse(
"$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)",
),
)
end
assignInputs = Expr(:block, assignInputs...)
eval(assignInputs)
eval(code)
eval(Meta.parse("result = $outputSymbol"))
return result
end

53
src/devices/cuda/impl.jl Normal file
View File

@ -0,0 +1,53 @@
using CUDA
"""
CUDAGPU <: AbstractGPU
Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct CUDAGPU <: AbstractGPU
device::Any # TODO: what's the cuda device type?
cacheStrategy::CacheStrategy
FLOPS::Float64
end
push!(DEVICE_TYPES, CUDAGPU)
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: CUDAGPU} = LocalVariables()
function measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
devices = Vector{AbstractDevice}()
if !CUDA.functional()
if verbose
println("CUDA is non-functional")
end
return devices
end
CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
for device in CUDADevices
push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1))
end
return devices
end

23
src/devices/detect.jl Normal file
View File

@ -0,0 +1,23 @@
"""
get_machine_info(verbose::Bool)
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
"""
function get_machine_info(; verbose::Bool = Base.is_interactive)
devices = Vector{AbstractDevice}()
for device in device_types()
devs = get_devices(device, verbose = verbose)
for dev in devs
push!(devices, dev)
end
end
noDevices = length(devices)
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
fill!(transferRates, -1)
return Machine(devices, transferRates)
end

52
src/devices/impl.jl Normal file
View File

@ -0,0 +1,52 @@
"""
device_types()
Return a vector of available and implemented device types.
See also: [`DEVICE_TYPES`](@ref)
"""
function device_types()
return DEVICE_TYPES
end
"""
entry_device(machine::Machine)
Return the "entry" device, i.e., the device that starts CPU threads and GPU kernels, and takes input values and returns the output value.
"""
function entry_device(machine::Machine)
return machine.devices[1]
end
"""
strategies(t::Type{T}) where {T <: AbstractDevice}
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstractDevice`](@ref).
The caching strategies are used in code generation.
"""
function strategies(t::Type{T}) where {T <: AbstractDevice}
if !haskey(CACHE_STRATEGIES, t)
error("Trying to get strategies for $T, but it has no strategies defined!")
end
return CACHE_STRATEGIES[t]
end
"""
cache_strategy(device::AbstractDevice)
Returns the cache strategy set for this device.
"""
function cache_strategy(device::AbstractDevice)
return device.cacheStrategy
end
"""
set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
Sets the device's cache strategy. After this call, [`cache_strategy`](@ref) should return `cacheStrategy` on the given device.
"""
function set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
device.cacheStrategy = cacheStrategy
return nothing
end

108
src/devices/interface.jl Normal file
View File

@ -0,0 +1,108 @@
"""
AbstractDevice
Abstract base type for every device, like GPUs, CPUs or any other compute devices.
Every implementation needs to implement various functions and needs a member `cacheStrategy`.
"""
abstract type AbstractDevice end
abstract type AbstractCPU <: AbstractDevice end
abstract type AbstractGPU <: AbstractDevice end
"""
Machine
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
See also: [`Scheduler`](@ref)
"""
struct Machine
devices::Vector{AbstractDevice}
transferRates::Matrix{Float64}
end
"""
CacheStrategy
Abstract base type for caching strategies.
See also: [`strategies`](@ref)
"""
abstract type CacheStrategy end
"""
LocalVariables <: CacheStrategy
A caching strategy relying solely on local variables for every input and output.
Implements the [`CacheStrategy`](@ref) interface.
"""
struct LocalVariables <: CacheStrategy end
"""
Dictionary <: CacheStrategy
A caching strategy relying on a dictionary of Symbols to store every input and output.
Implements the [`CacheStrategy`](@ref) interface.
"""
struct Dictionary <: CacheStrategy end
"""
DEVICE_TYPES::Vector{Type}
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
See also: [`device_types`](@ref), [`get_devices`](@ref)
"""
DEVICE_TYPES = Vector{Type}()
"""
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
See also: [`strategies`](@ref)
"""
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
"""
default_strategy(deviceType::Type{T}) where {T <: AbstractDevice}
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device type.
See also: [`cache_strategy`](@ref), [`set_cache_strategy`](@ref)
"""
function default_strategy end
"""
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
"""
function get_devices end
"""
measure_device!(device::AbstractDevice; verbose::Bool)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
"""
function measure_device! end
"""
gen_cache_init_code(device::AbstractDevice)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an `Expr` initializing this device's variable cache.
The strategy is a symbol
"""
function gen_cache_init_code end
"""
gen_access_expr(device::AbstractDevice, symbol::Symbol)
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
Return an `Expr` or `QuoteNode` accessing the variable identified by [`symbol`].
"""
function gen_access_expr end

22
src/devices/measure.jl Normal file
View File

@ -0,0 +1,22 @@
"""
measure_devices(machine::Machine; verbose::Bool)
Measure FLOPS, RAM, cache sizes and what other properties can be extracted for the devices in the given machine.
"""
function measure_devices!(machine::Machine; verbose::Bool = Base.is_interactive())
for device in machine.devices
measure_device!(device; verbose = verbose)
end
return nothing
end
"""
measure_transfer_rates(machine::Machine; verbose::Bool)
Measure the transfer rates between devices in the machine.
"""
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
# TODO implement
return nothing
end

96
src/devices/numa/impl.jl Normal file
View File

@ -0,0 +1,96 @@
using NumaAllocators
"""
NumaNode <: AbstractCPU
Representation of a specific CPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct NumaNode <: AbstractCPU
numaId::UInt16
threads::UInt16
cacheStrategy::CacheStrategy
FLOPS::Float64
id::UUID
end
push!(DEVICE_TYPES, NumaNode)
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: NumaNode} = LocalVariables()
function measure_device!(device::NumaNode; verbose::Bool)
if verbose
println("Measuring Numa Node $(device.numaId)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
devices = Vector{AbstractDevice}()
noNumaNodes = highest_numa_node()
if (verbose)
println("Found $(noNumaNodes + 1) NUMA nodes")
end
for i in 0:noNumaNodes
push!(devices, NumaNode(i, 1, default_strategy(NumaNode), -1, UUIDs.uuid1(rng[1])))
end
return devices
end
"""
gen_cache_init_code(device::NumaNode)
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
"""
function gen_cache_init_code(device::NumaNode)
if typeof(device.cacheStrategy) <: LocalVariables
# don't need to initialize anything
return Expr(:block)
elseif typeof(device.cacheStrategy) <: Dictionary
return Meta.parse("cache_$(to_var_name(device.id)) = Dict{Symbol, Any}()")
# TODO: sizehint?
end
return error("Unimplemented cache strategy \"$(device.cacheStrategy)\" for device \"$(device)\"")
end
"""
gen_access_expr(device::NumaNode, symbol::Symbol)
Generate code to access the variable designated by `symbol` on a [`NumaNode`](@ref), using the [`CacheStrategy`](@ref) set in the device.
"""
function gen_access_expr(device::NumaNode, symbol::Symbol)
return _gen_access_expr(device, device.cacheStrategy, symbol)
end
"""
_gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
Internal function for dispatch, used in [`gen_access_expr`](@ref).
"""
function _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
s = Symbol("data_$symbol")
quoteNode = Meta.parse(":($s)")
return quoteNode
end
"""
_gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
Internal function for dispatch, used in [`gen_access_expr`](@ref).
"""
function _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
accessStr = ":(cache_$(to_var_name(device.id))[:$symbol])"
quoteNode = Meta.parse(accessStr)
return quoteNode
end

View File

@ -0,0 +1,53 @@
using oneAPI
"""
oneAPIGPU <: AbstractGPU
Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct oneAPIGPU <: AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end
push!(DEVICE_TYPES, oneAPIGPU)
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: oneAPIGPU} = LocalVariables()
function measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
devices = Vector{AbstractDevice}()
if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
return devices
end
oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1))
end
return devices
end

53
src/devices/rocm/impl.jl Normal file
View File

@ -0,0 +1,53 @@
using AMDGPU
"""
ROCmGPU <: AbstractGPU
Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct ROCmGPU <: AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end
push!(DEVICE_TYPES, ROCmGPU)
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
default_strategy(::Type{T}) where {T <: ROCmGPU} = LocalVariables()
function measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
# TODO implement
return nothing
end
"""
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
devices = Vector{AbstractDevice}()
if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
return devices
end
AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
for device in AMDDevices
push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1))
end
return devices
end

View File

@ -6,6 +6,6 @@ Pretty-print a [`Diff`](@ref). Called via print, println and co.
function show(io::IO, diff::Diff)
print(io, "Nodes: ")
print(io, length(diff.addedNodes) + length(diff.removedNodes))
print(io, " Edges: ")
print(io, ", Edges: ")
return print(io, length(diff.addedEdges) + length(diff.removedEdges))
end

View File

@ -4,8 +4,8 @@
A named tuple representing a difference of added and removed nodes and edges on a [`DAG`](@ref).
"""
const Diff = NamedTuple{
(:addedNodes, :removedNodes, :addedEdges, :removedEdges),
Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}},
(:addedNodes, :removedNodes, :addedEdges, :removedEdges, :updatedChildren),
Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}, Vector{Tuple{Node, AbstractTask}}},
}
function Diff()
@ -14,5 +14,8 @@ function Diff()
removedNodes = Vector{Node}(),
addedEdges = Vector{Edge}(),
removedEdges = Vector{Edge}(),
# children were updated in the task, updatedChildren[x][2] is the task before the update
updatedChildren = Vector{Tuple{Node, AbstractTask}}(),
)::Diff
end

View File

@ -0,0 +1,65 @@
"""
CDCost
Representation of a [`DAG`](@ref)'s cost as estimated by the [`GlobalMetricEstimator`](@ref).
# Fields:
`.data`: The total data transfer.\\
`.computeEffort`: The total compute effort.\\
`.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`.
!!! note
Note that the `computeIntensity` doesn't necessarily make sense in the context of only operation costs.
For example, for node fusions this will always be 0, since the computeEffort is zero.
It will still work as intended when adding/subtracting to/from a `graph_cost` estimate.
"""
const CDCost = NamedTuple{(:data, :computeEffort, :computeIntensity), Tuple{Float64, Float64, Float64}}
function +(cost1::CDCost, cost2::CDCost)::CDCost
d = cost1.data + cost2.data
ce = computeEffort = cost1.computeEffort + cost2.computeEffort
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
end
function -(cost1::CDCost, cost2::CDCost)::CDCost
d = cost1.data - cost2.data
ce = computeEffort = cost1.computeEffort - cost2.computeEffort
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
end
struct GlobalMetricEstimator <: AbstractEstimator end
function cost_type(estimator::GlobalMetricEstimator)
return CDCost
end
function graph_cost(estimator::GlobalMetricEstimator, graph::DAG)
properties = get_properties(graph)
return (
data = properties.data,
computeEffort = properties.computeEffort,
computeIntensity = properties.computeIntensity,
)::CDCost
end
function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeFusion)
return (data = -data(operation.input[2].task), computeEffort = 0.0, computeIntensity = 0.0)::CDCost
end
function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeReduction)
s = length(operation.input) - 1
return (
data = s * -data(operation.input[1].task),
computeEffort = s * -compute_effort(operation.input[1].task),
computeIntensity = typeof(operation.input) <: DataTaskNode ? 0.0 : Inf,
)::CDCost
end
function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeSplit)
s = length(operation.input.parents) - 1
d = s * data(operation.input.task)
ce = s * compute_effort(operation.input.task)
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
end

View File

@ -0,0 +1,44 @@
"""
AbstractEstimator
Abstract base type for an estimator. An estimator estimates the cost of a graph or the difference an operation applied to a graph will make to its cost.
Interface functions are
- [`graph_cost`](@ref)
- [`operation_effect`](@ref)
"""
abstract type AbstractEstimator end
"""
cost_type(estimator::AbstractEstimator)
Interface function returning a specific estimator's cost type, i.e., the type returned by its implementation of [`graph_cost`](@ref) and [`operation_effect`](@ref).
"""
function cost_type end
"""
graph_cost(estimator::AbstractEstimator, graph::DAG)
Get the total estimated cost of the graph. The cost's data type can be chosen by the implementation, but should have usable comparison operators (<, <=, >, >=, ==) and basic math operators (+, -, *, /).
"""
function graph_cost end
"""
operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
Get the estimated effect on the cost of the graph, such that `graph_cost(estimator, graph) + operation_effect(estimator, graph, operation) ~= graph_cost(estimator, graph_with_operation_applied)`. There is no hard requirement for this, but the better the estimate, the better an optimization algorithm will be.
!!! note
There is a default implementation of this function, applying the operation, calling [`graph_cost`](@ref), then popping the operation again.
It can be much faster to overload this function for a specific estimator and directly compute the effects from the operation if possible.
"""
function operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
# This is currently not stably working, see issue #16
cost = graph_cost(estimator, graph)
push_operation!(graph, operation)
cost_after = graph_cost(estimator, graph)
pop_operation!(graph)
return cost_after - cost
end

View File

@ -38,8 +38,7 @@ end
Return `true` if [`pop_operation!`](@ref) is possible, `false` otherwise.
"""
can_pop(graph::DAG) =
!isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
can_pop(graph::DAG) = !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
"""
reset_graph!(graph::DAG)

View File

@ -15,12 +15,7 @@ Insert the node into the graph.
See also: [`remove_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
"""
function insert_node!(
graph::DAG,
node::Node,
track = true,
invalidate_cache = true,
)
function insert_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
# 1: mute
push!(graph.nodes, node)
@ -50,14 +45,8 @@ Insert the edge between node1 (child) and node2 (parent) into the graph.
See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`remove_edge!`](@ref)
"""
function insert_edge!(
graph::DAG,
node1::Node,
node2::Node,
track = true,
invalidate_cache = true,
)
# @assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists"
function insert_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
@assert (node2 node1.parents) && (node1 node2.children) "Edge to insert already exists"
# 1: mute
# edge points from child to parent
@ -95,13 +84,8 @@ Remove the node from the graph.
See also: [`insert_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
"""
function remove_node!(
graph::DAG,
node::Node,
track = true,
invalidate_cache = true,
)
# @assert node in graph.nodes "Trying to remove a node that's not in the graph"
function remove_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
@assert node in graph.nodes "Trying to remove a node that's not in the graph"
# 1: mute
delete!(graph.nodes, node)
@ -134,13 +118,7 @@ Remove the edge between node1 (child) and node2 (parent) into the graph.
See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`insert_edge!`](@ref)
"""
function remove_edge!(
graph::DAG,
node1::Node,
node2::Node,
track = true,
invalidate_cache = true,
)
function remove_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
# 1: mute
pre_length1 = length(node1.parents)
pre_length2 = length(node2.children)
@ -149,15 +127,15 @@ function remove_edge!(
filter!(x -> x != node2, node1.parents)
filter!(x -> x != node1, node2.children)
#=@assert begin
removed = pre_length1 - length(node1.parents)
removed <= 1
end "removed more than one node from node1's parents"=#
@assert begin
removed = pre_length1 - length(node1.parents)
removed <= 1
end "removed more than one node from node1's parents"
#=@assert begin
removed = pre_length2 - length(node2.children)
removed <= 1
end "removed more than one node from node2's children"=#
@assert begin
removed = pre_length2 - length(node2.children)
removed <= 1
end "removed more than one node from node2's children"
# 2: keep track
if (track)
@ -181,6 +159,66 @@ function remove_edge!(
return nothing
end
function replace_children!(task::FusedComputeTask, before, after)
replacedIn1 = length(findall(x -> x == before, task.t1_inputs))
replacedIn2 = length(findall(x -> x == before, task.t2_inputs))
@assert replacedIn1 >= 1 || replacedIn2 >= 1 "Nothing to replace while replacing $before with $after in $(task.t1_inputs...) and $(task.t2_inputs...)"
replace!(task.t1_inputs, before => after)
replace!(task.t2_inputs, before => after)
# recursively descend down the tree, but only in the tasks where we're replacing things
if replacedIn1 > 0
replace_children!(task.first_task, before, after)
end
if replacedIn2 > 0
replace_children!(task.second_task, before, after)
end
return nothing
end
function replace_children!(task::AbstractTask, before, after)
return nothing
end
function update_child!(graph::DAG, n::Node, child_before::Symbol, child_after::Symbol; track = true)
# only need to update fused compute tasks
if !(typeof(n.task) <: FusedComputeTask)
return nothing
end
taskBefore = copy(n.task)
if !((child_before in n.task.t1_inputs) || (child_before in n.task.t2_inputs))
println("------------------ Nothing to replace!! ------------------")
child_ids = Vector{String}()
for child in n.children
push!(child_ids, "$(child.id)")
end
println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
@assert false
end
replace_children!(n.task, child_before, child_after)
if !((child_after in n.task.t1_inputs) || (child_after in n.task.t2_inputs))
println("------------------ Did not replace anything!! ------------------")
child_ids = Vector{String}()
for child in n.children
push!(child_ids, "$(child.id)")
end
println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
@assert false
end
# keep track
if (track)
push!(graph.diff.updatedChildren, (n, taskBefore))
end
end
"""
get_snapshot_diff(graph::DAG)

View File

@ -62,9 +62,5 @@ function show(io::IO, graph::DAG)
properties = get_properties(graph)
println(io, " Total Compute Effort: ", properties.computeEffort)
println(io, " Total Data Transfer: ", properties.data)
return println(
io,
" Total Compute Intensity: ",
properties.computeIntensity,
)
return println(io, " Total Compute Intensity: ", properties.computeIntensity)
end

View File

@ -34,6 +34,7 @@ end
Return a vector of the graph's entry nodes.
"""
function get_entry_nodes(graph::DAG)
apply_all!(graph)
result = Vector{Node}()
for node in graph.nodes
if (is_entry_node(node))

View File

@ -17,7 +17,7 @@ end
The representation of the graph as a set of [`Node`](@ref)s.
A DAG can be loaded using the appropriate parse function, e.g. [`parse_abc`](@ref).
A DAG can be loaded using the appropriate parse_dag function, e.g. [`parse_dag`](@ref).
[`Operation`](@ref)s can be applied on it using [`push_operation!`](@ref) and reverted using [`pop_operation!`](@ref) like a stack.
To get the set of possible operations, use [`get_operations`](@ref).
@ -52,11 +52,7 @@ end
Construct and return an empty [`PossibleOperations`](@ref) object.
"""
function PossibleOperations()
return PossibleOperations(
Set{NodeFusion}(),
Set{NodeReduction}(),
Set{NodeSplit}(),
)
return PossibleOperations(Set{NodeFusion}(), Set{NodeReduction}(), Set{NodeSplit}())
end
"""

View File

@ -59,3 +59,19 @@ function is_valid(graph::DAG)
return true
end
"""
is_scheduled(graph::DAG)
Validate that the entire graph has been scheduled, i.e., every [`ComputeTaskNode`](@ref) has its `.device` set.
"""
function is_scheduled(graph::DAG)
for node in graph.nodes
if (node isa DataTaskNode)
continue
end
@assert !ismissing(node.device)
end
return true
end

View File

@ -7,7 +7,7 @@ Return the particle and value as is.
0 FLOP.
"""
function compute(::ComputeTaskP, data::ParticleValue)
function compute(::ComputeTaskP, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
return data
end
@ -18,7 +18,7 @@ Compute an outer edge. Return the particle value with the same particle and the
1 FLOP.
"""
function compute(::ComputeTaskU, data::ParticleValue)
function compute(::ComputeTaskU, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
return ParticleValue(data.p, data.v * outer_edge(data.p))
end
@ -29,7 +29,11 @@ Compute a vertex. Preserve momentum and particle types (AB->C etc.) to create re
6 FLOP.
"""
function compute(::ComputeTaskV, data1::ParticleValue, data2::ParticleValue)
function compute(
::ComputeTaskV,
data1::ParticleValue{P1},
data2::ParticleValue{P2},
)::ParticleValue where {P1 <: ABCParticle, P2 <: ABCParticle}
p3 = preserve_momentum(data1.p, data2.p)
dataOut = ParticleValue(p3, data1.v * vertex() * data2.v)
return dataOut
@ -44,8 +48,15 @@ For valid inputs, both input particles should have the same momenta at this poin
12 FLOP.
"""
function compute(::ComputeTaskS2, data1::ParticleValue, data2::ParticleValue)
return data1.v * inner_edge(data1.p) * data2.v
function compute(::ComputeTaskS2, data1::ParticleValue{P}, data2::ParticleValue{P})::Float64 where {P <: ABCParticle}
#=
@assert isapprox(abs(data1.p.momentum.E), abs(data2.p.momentum.E), rtol = 0.001, atol = sqrt(eps())) "E: $(data1.p.momentum.E) vs. $(data2.p.momentum.E)"
@assert isapprox(data1.p.momentum.px, -data2.p.momentum.px, rtol = 0.001, atol = sqrt(eps())) "px: $(data1.p.momentum.px) vs. $(data2.p.momentum.px)"
@assert isapprox(data1.p.momentum.py, -data2.p.momentum.py, rtol = 0.001, atol = sqrt(eps())) "py: $(data1.p.momentum.py) vs. $(data2.p.momentum.py)"
@assert isapprox(data1.p.momentum.pz, -data2.p.momentum.pz, rtol = 0.001, atol = sqrt(eps())) "pz: $(data1.p.momentum.pz) vs. $(data2.p.momentum.pz)"
=#
inner = inner_edge(data1.p)
return data1.v * inner * data2.v
end
"""
@ -55,7 +66,7 @@ Compute inner edge (1 input particle, 1 output particle).
11 FLOP.
"""
function compute(::ComputeTaskS1, data::ParticleValue)
function compute(::ComputeTaskS1, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
return ParticleValue(data.p, data.v * inner_edge(data.p))
end
@ -66,191 +77,83 @@ Compute a sum over the vector. Use an algorithm that accounts for accumulated er
Linearly many FLOP with growing data.
"""
function compute(::ComputeTaskSum, data::Vector{Float64})
function compute(::ComputeTaskSum, data::Vector{Float64})::Float64
return sum_kbn(data)
end
"""
compute(t::FusedComputeTask, data)
get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSyms`, providing the output on `outSym`.
"""
function compute(t::FusedComputeTask, data)
@assert false "This is not implemented and should never be called"
function get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskP(), $(in[1]))")
end
"""
get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSymbol`, providing the output on `outSymbol`.
Generate code evaluating [`ComputeTaskU`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
"""
function get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol)
return Meta.parse("$outSymbol = compute(ComputeTaskP(), $inSymbol)")
function get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskU(), $(in[1]))")
end
"""
get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskU`](@ref) on `inSymbol`, providing the output on `outSymbol`.
`inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
Generate code evaluating [`ComputeTaskV`](@ref) on `inSyms`, providing the output on `outSym`.
`inSym[1]` and `inSym[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
"""
function get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol)
return Meta.parse("$outSymbol = compute(ComputeTaskU(), $inSymbol)")
function get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1]), eval(inExprs[2])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskV(), $(in[1]), $(in[2]))")
end
"""
get_expression(::ComputeTaskV, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskV`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`.
`inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
Generate code evaluating [`ComputeTaskS2`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms[1]` and `inSyms[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type `Float64`.
"""
function get_expression(
::ComputeTaskV,
inSymbol1::Symbol,
inSymbol2::Symbol,
outSymbol::Symbol,
)
return Meta.parse(
"$outSymbol = compute(ComputeTaskV(), $inSymbol1, $inSymbol2)",
)
function get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1]), eval(inExprs[2])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskS2(), $(in[1]), $(in[2]))")
end
"""
get_expression(::ComputeTaskS2, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskS2`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`.
`inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type `Float64`.
Generate code evaluating [`ComputeTaskS1`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
"""
function get_expression(
::ComputeTaskS2,
inSymbol1::Symbol,
inSymbol2::Symbol,
outSymbol::Symbol,
)
return Meta.parse(
"$outSymbol = compute(ComputeTaskS2(), $inSymbol1, $inSymbol2)",
)
function get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector, outExpr)
in = [eval(inExprs[1])]
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskS1(), $(in[1]))")
end
"""
get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol)
get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
Generate code evaluating [`ComputeTaskS1`](@ref) on `inSymbol`, providing the output on `outSymbol`.
`inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
Generate code evaluating [`ComputeTaskSum`](@ref) on `inSyms`, providing the output on `outSym`.
`inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`].
"""
function get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol)
return Meta.parse("$outSymbol = compute(ComputeTaskS1(), $inSymbol)")
end
"""
get_expression(::ComputeTaskSum, inSymbols::Vector{Symbol}, outSymbol::Symbol)
Generate code evaluating [`ComputeTaskSum`](@ref) on `inSymbols`, providing the output on `outSymbol`.
`inSymbols` should be of type [`Float64`], `outSymbol` will be of type [`Float64`].
"""
function get_expression(
::ComputeTaskSum,
inSymbols::Vector{Symbol},
outSymbol::Symbol,
)
return quote
$outSymbol = compute(ComputeTaskSum(), [$(inSymbols...)])
end
end
"""
get_expression(t::FusedComputeTask, inSymbols::Vector{Symbol}, outSymbol::Symbol)
Generate code evaluating a [`FusedComputeTask`](@ref) on `inSymbols`, providing the output on `outSymbol`.
`inSymbols` should be of the correct types and may be heterogeneous. `outSymbol` will be of the type of the output of `T2` of t.
"""
function get_expression(
t::FusedComputeTask,
inSymbols::Vector{Symbol},
outSymbol::Symbol,
)
(T1, T2) = get_types(t)
c1 = children(T1())
c2 = children(T2())
expr1 = nothing
expr2 = nothing
# TODO need to figure out how to know which inputs belong to which subtask
# since we order the vectors with the child nodes we can't just split
if (c1 == 1)
expr1 = get_expression(T1(), inSymbols[begin], :intermediate)
elseif (c1 == 2)
expr1 =
get_expression(T1(), inSymbols[begin], inSymbols[2], :intermediate)
else
expr1 = get_expression(T1(), inSymbols[begin:c1], :intermediate)
end
if (c2 == 1)
expr2 = get_expression(T2(), :intermediate, outSymbol)
elseif c2 == 2
expr2 =
get_expression(T2(), :intermediate, inSymbols[c1 + 1], outSymbol)
else
expr2 = get_expression(
T2(),
:intermediate * inSymbols[(c1 + 1):end],
outSymbol,
)
end
return Expr(:block, expr1, expr2)
end
"""
get_expression(node::ComputeTaskNode)
Generate and return code for a given [`ComputeTaskNode`](@ref).
"""
function get_expression(node::ComputeTaskNode)
t = typeof(node.task)
@assert length(node.children) == children(node.task) || t <: ComputeTaskSum
if (t <: ComputeTaskU || t <: ComputeTaskP || t <: ComputeTaskS1) # single input
symbolIn = Symbol("data_$(to_var_name(node.children[1].id))")
symbolOut = Symbol("data_$(to_var_name(node.id))")
return get_expression(t(), symbolIn, symbolOut)
elseif (t <: ComputeTaskS2 || t <: ComputeTaskV) # double input
symbolIn1 = Symbol("data_$(to_var_name(node.children[1].id))")
symbolIn2 = Symbol("data_$(to_var_name(node.children[2].id))")
symbolOut = Symbol("data_$(to_var_name(node.id))")
return get_expression(t(), symbolIn1, symbolIn2, symbolOut)
elseif (t <: ComputeTaskSum || t <: FusedComputeTask) # vector input
inSymbols = Vector{Symbol}()
for child in node.children
push!(inSymbols, Symbol("data_$(to_var_name(child.id))"))
end
outSymbol = Symbol("data_$(to_var_name(node.id))")
return get_expression(t(), inSymbols, outSymbol)
else
error("Unknown compute task")
end
end
"""
get_expression(node::DataTaskNode)
Generate and return code for a given [`DataTaskNode`](@ref).
"""
function get_expression(node::DataTaskNode)
# TODO: do things to transport data from/to gpu, between numa nodes, etc.
@assert length(node.children) <= 1
inSymbol = nothing
if (length(node.children) == 1)
inSymbol = Symbol("data_$(to_var_name(node.children[1].id))")
else
inSymbol = Symbol("data_$(to_var_name(node.id))_in")
end
outSymbol = Symbol("data_$(to_var_name(node.id))")
dataTransportExp = Meta.parse("$outSymbol = $inSymbol")
return dataTransportExp
function get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector, outExpr)
in = eval.(inExprs)
out = eval(outExpr)
return Meta.parse("$out = compute(ComputeTaskSum(), [$(unroll_symbol_vector(in))])")
end

View File

@ -1,74 +1,198 @@
using QEDbase
using Random
using Roots
using ForwardDiff
ComputeTaskSum() = ComputeTaskSum(0)
"""
Particle(rng)
gen_process_input(processDescription::ABCProcessDescription)
Return a randomly generated particle.
Return a ProcessInput of randomly generated [`ABCParticle`](@ref)s from a [`ABCProcessDescription`](@ref). The process description can be created manually or parsed from a string using [`parse_process`](@ref).
Note: This uses RAMBO to create a valid process with conservation of momentum and energy.
"""
function Particle(rng, type::ParticleType)
function gen_process_input(processDescription::ABCProcessDescription)
inParticleTypes = keys(processDescription.inParticles)
outParticleTypes = keys(processDescription.outParticles)
p1 = rand(rng, Float64)
p2 = rand(rng, Float64)
p3 = rand(rng, Float64)
m = mass(type)
# keep the momenta of the particles on-shell
p4 = sqrt(p1^2 + p2^2 + p3^2 + m^2)
return Particle(p1, p2, p3, p4, type)
end
"""
gen_particles(n::Int)
Return a Vector of `n` randomly generated [`Particle`](@ref)s.
Note: This does not take into account the preservation of momenta required for an actual valid process!
"""
function gen_particles(ns::Dict{ParticleType, Int})
particles = Dict{ParticleType, Vector{Particle}}()
rng = MersenneTwister(0)
if ns == Dict((A => 2), (B => 2))
rho = 1.0
omega = rand(rng, Float64)
theta = rand(rng, Float64) * π
phi = rand(rng, Float64) * π
particles[A] = Vector{Particle}()
particles[B] = Vector{Particle}()
push!(particles[A], Particle(omega, 0, 0, omega, A))
push!(particles[B], Particle(omega, 0, 0, -omega, B))
push!(
particles[A],
Particle(
omega,
rho * cos(theta) * cos(phi),
rho * cos(theta) * sin(phi),
rho * sin(theta),
A,
),
)
push!(
particles[B],
Particle(
omega,
-rho * cos(theta) * cos(phi),
-rho * cos(theta) * sin(phi),
-rho * sin(theta),
B,
),
)
return particles
end
for (type, n) in ns
particles[type] = Vector{Particle}()
for i in 1:n
push!(particles[type], Particle(rng, type))
massSum = 0
inputMasses = Vector{Float64}()
for (particle, n) in processDescription.inParticles
for _ in 1:n
massSum += mass(particle)
push!(inputMasses, mass(particle))
end
end
return particles
outputMasses = Vector{Float64}()
for (particle, n) in processDescription.outParticles
for _ in 1:n
massSum += mass(particle)
push!(outputMasses, mass(particle))
end
end
# add some extra random mass to allow for some momentum
massSum += rand(rng[threadid()]) * (length(inputMasses) + length(outputMasses))
inputParticles = Vector{ABCParticle}()
initialMomenta = generate_initial_moms(massSum, inputMasses)
index = 1
for (particle, n) in processDescription.inParticles
for _ in 1:n
mom = initialMomenta[index]
push!(inputParticles, particle(mom))
index += 1
end
end
outputParticles = Vector{ABCParticle}()
final_momenta = generate_physical_massive_moms(rng[threadid()], massSum, outputMasses)
index = 1
for (particle, n) in processDescription.outParticles
for _ in 1:n
mom = final_momenta[index]
push!(outputParticles, particle(SFourMomentum(-mom.E, mom.px, mom.py, mom.pz)))
index += 1
end
end
processInput = ABCProcessInput(processDescription, inputParticles, outputParticles)
return return processInput
end
####################
# CODE FROM HERE BORROWED FROM SOURCE: https://codebase.helmholtz.cloud/qedsandbox/QEDphasespaces.jl/
# use qedphasespaces directly once released
#
# quick and dirty implementation of the RAMBO algorithm
#
# reference:
# * https://cds.cern.ch/record/164736/files/198601282.pdf
# * https://www.sciencedirect.com/science/article/pii/0010465586901190
####################
function generate_initial_moms(ss, masses)
E1 = (ss^2 + masses[1]^2 - masses[2]^2) / (2 * ss)
E2 = (ss^2 + masses[2]^2 - masses[1]^2) / (2 * ss)
rho1 = sqrt(E1^2 - masses[1]^2)
rho2 = sqrt(E2^2 - masses[2]^2)
return [SFourMomentum(E1, 0, 0, rho1), SFourMomentum(E2, 0, 0, -rho2)]
end
Random.rand(rng::AbstractRNG, ::Random.SamplerType{SFourMomentum}) = SFourMomentum(rand(rng, 4))
Random.rand(rng::AbstractRNG, ::Random.SamplerType{NTuple{N, Float64}}) where {N} = Tuple(rand(rng, N))
function _transform_uni_to_mom(u1, u2, u3, u4)
cth = 2 * u1 - 1
sth = sqrt(1 - cth^2)
phi = 2 * pi * u2
q0 = -log(u3 * u4)
qx = q0 * sth * cos(phi)
qy = q0 * sth * sin(phi)
qz = q0 * cth
return SFourMomentum(q0, qx, qy, qz)
end
function _transform_uni_to_mom!(uni_mom, dest)
u1, u2, u3, u4 = Tuple(uni_mom)
cth = 2 * u1 - 1
sth = sqrt(1 - cth^2)
phi = 2 * pi * u2
q0 = -log(u3 * u4)
qx = q0 * sth * cos(phi)
qy = q0 * sth * sin(phi)
qz = q0 * cth
return dest = SFourMomentum(q0, qx, qy, qz)
end
_transform_uni_to_mom(u1234::Tuple) = _transform_uni_to_mom(u1234...)
_transform_uni_to_mom(u1234::SFourMomentum) = _transform_uni_to_mom(Tuple(u1234))
function generate_massless_moms(rng, n::Int)
a = Vector{SFourMomentum}(undef, n)
rand!(rng, a)
return map(_transform_uni_to_mom, a)
end
function generate_physical_massless_moms(rng, ss, n)
r_moms = generate_massless_moms(rng, n)
Q = sum(r_moms)
M = sqrt(Q * Q)
fac = -1 / M
Qx = getX(Q)
Qy = getY(Q)
Qz = getZ(Q)
bx = fac * Qx
by = fac * Qy
bz = fac * Qz
gamma = getT(Q) / M
a = 1 / (1 + gamma)
x = ss / M
i = 1
while i <= n
mom = r_moms[i]
mom0 = getT(mom)
mom1 = getX(mom)
mom2 = getY(mom)
mom3 = getZ(mom)
bq = bx * mom1 + by * mom2 + bz * mom3
p0 = x * (gamma * mom0 + bq)
px = x * (mom1 + bx * mom0 + a * bq * bx)
py = x * (mom2 + by * mom0 + a * bq * by)
pz = x * (mom3 + bz * mom0 + a * bq * bz)
r_moms[i] = SFourMomentum(p0, px, py, pz)
i += 1
end
return r_moms
end
function _to_be_solved(xi, masses, p0s, ss)
sum = 0.0
for (i, E) in enumerate(p0s)
sum += sqrt(masses[i]^2 + xi^2 * E^2)
end
return sum - ss
end
function _build_massive_momenta(xi, masses, massless_moms)
vec = SFourMomentum[]
i = 1
while i <= length(massless_moms)
massless_mom = massless_moms[i]
k0 = sqrt(getT(massless_mom)^2 * xi^2 + masses[i]^2)
kx = xi * getX(massless_mom)
ky = xi * getY(massless_mom)
kz = xi * getZ(massless_mom)
push!(vec, SFourMomentum(k0, kx, ky, kz))
i += 1
end
return vec
end
first_derivative(func) = x -> ForwardDiff.derivative(func, float(x))
function generate_physical_massive_moms(rng, ss, masses; x0 = 0.1)
n = length(masses)
massless_moms = generate_physical_massless_moms(rng, ss, n)
energies = getT.(massless_moms)
f = x -> _to_be_solved(x, masses, energies, ss)
xi = find_zero((f, first_derivative(f)), x0, Roots.Newton())
return _build_massive_momenta(xi, masses, massless_moms)
end

View File

@ -32,13 +32,13 @@ function parse_edges(input::AbstractString)
end
"""
parse_abc(filename::String; verbose::Bool = false)
parse_dag(filename::String, model::ABCModel; verbose::Bool = false)
Read an abc-model process from the given file. If `verbose` is set to true, print some progress information to stdout.
Returns a valid [`DAG`](@ref).
"""
function parse_abc(filename::String, verbose::Bool = false)
function parse_dag(filename::AbstractString, model::ABCModel, verbose::Bool = false)
file = open(filename, "r")
if (verbose)
@ -63,10 +63,9 @@ function parse_abc(filename::String, verbose::Bool = false)
end
sizehint!(graph.nodes, estimate_no_nodes)
sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false, false)
global_data_out =
insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), false, false)
insert_edge!(graph, sum_node, global_data_out, false, false)
sum_node = insert_node!(graph, make_node(ComputeTaskSum(0)), track = false, invalidate_cache = false)
global_data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false)
insert_edge!(graph, sum_node, global_data_out, track = false, invalidate_cache = false)
# remember the data out nodes for connection
dataOutNodes = Dict()
@ -81,10 +80,7 @@ function parse_abc(filename::String, verbose::Bool = false)
noNodes += 1
if (noNodes % 100 == 0)
if (verbose)
percent = string(
round(100.0 * noNodes / nodesToRead, digits = 2),
"%",
)
percent = string(round(100.0 * noNodes / nodesToRead, digits = 2), "%")
print("\rReading Nodes... $percent")
end
end
@ -93,30 +89,20 @@ function parse_abc(filename::String, verbose::Bool = false)
data_in = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE), string(node)),
false,
false,
track = false,
invalidate_cache = false,
) # read particle data node
compute_P =
insert_node!(graph, make_node(ComputeTaskP()), false, false) # compute P node
data_Pu = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
) # transfer data from P to u (one ParticleValue object)
compute_u =
insert_node!(graph, make_node(ComputeTaskU()), false, false) # compute U node
data_out = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
) # transfer data out from u (one ParticleValue object)
compute_P = insert_node!(graph, make_node(ComputeTaskP()), track = false, invalidate_cache = false) # compute P node
data_Pu =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data from P to u (one ParticleValue object)
compute_u = insert_node!(graph, make_node(ComputeTaskU()), track = false, invalidate_cache = false) # compute U node
data_out =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data out from u (one ParticleValue object)
insert_edge!(graph, data_in, compute_P, false, false)
insert_edge!(graph, compute_P, data_Pu, false, false)
insert_edge!(graph, data_Pu, compute_u, false, false)
insert_edge!(graph, compute_u, data_out, false, false)
insert_edge!(graph, data_in, compute_P, track = false, invalidate_cache = false)
insert_edge!(graph, compute_P, data_Pu, track = false, invalidate_cache = false)
insert_edge!(graph, data_Pu, compute_u, track = false, invalidate_cache = false)
insert_edge!(graph, compute_u, data_out, track = false, invalidate_cache = false)
# remember the data_out node for future edges
dataOutNodes[node] = data_out
@ -126,63 +112,48 @@ function parse_abc(filename::String, verbose::Bool = false)
in1 = capt.captures[1]
in2 = capt.captures[2]
compute_v =
insert_node!(graph, make_node(ComputeTaskV()), false, false)
data_out = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
)
compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
data_out =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
if (occursin(regex_c, in1))
# put an S node after this input
compute_S = insert_node!(
graph,
make_node(ComputeTaskS1()),
false,
false,
)
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
data_S_v = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
track = false,
invalidate_cache = false,
)
insert_edge!(graph, dataOutNodes[in1], compute_S, false, false)
insert_edge!(graph, compute_S, data_S_v, false, false)
insert_edge!(graph, dataOutNodes[in1], compute_S, track = false, invalidate_cache = false)
insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
insert_edge!(graph, data_S_v, compute_v, false, false)
insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
else
insert_edge!(graph, dataOutNodes[in1], compute_v, false, false)
insert_edge!(graph, dataOutNodes[in1], compute_v, track = false, invalidate_cache = false)
end
if (occursin(regex_c, in2))
# i think the current generator only puts the combined particles in the first space, so this case might never be entered
# put an S node after this input
compute_S = insert_node!(
graph,
make_node(ComputeTaskS1()),
false,
false,
)
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
data_S_v = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
track = false,
invalidate_cache = false,
)
insert_edge!(graph, dataOutNodes[in2], compute_S, false, false)
insert_edge!(graph, compute_S, data_S_v, false, false)
insert_edge!(graph, dataOutNodes[in2], compute_S, track = false, invalidate_cache = false)
insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
insert_edge!(graph, data_S_v, compute_v, false, false)
insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
else
insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
end
insert_edge!(graph, compute_v, data_out, false, false)
insert_edge!(graph, compute_v, data_out, track = false, invalidate_cache = false)
dataOutNodes[node] = data_out
elseif occursin(regex_m, node)
@ -193,43 +164,31 @@ function parse_abc(filename::String, verbose::Bool = false)
in3 = capt.captures[3]
# in2 + in3 with a v
compute_v =
insert_node!(graph, make_node(ComputeTaskV()), false, false)
data_v = insert_node!(
graph,
make_node(DataTask(PARTICLE_VALUE_SIZE)),
false,
false,
)
compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
data_v =
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
insert_edge!(graph, dataOutNodes[in3], compute_v, false, false)
insert_edge!(graph, compute_v, data_v, false, false)
insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
insert_edge!(graph, dataOutNodes[in3], compute_v, track = false, invalidate_cache = false)
insert_edge!(graph, compute_v, data_v, track = false, invalidate_cache = false)
# combine with the v of the combined other input
compute_S2 =
insert_node!(graph, make_node(ComputeTaskS2()), false, false)
data_out = insert_node!(
graph,
make_node(DataTask(FLOAT_SIZE)),
false,
false,
) # output of a S2 task is only a float
compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), track = false, invalidate_cache = false)
data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false) # output of a S2 task is only a float
insert_edge!(graph, data_v, compute_S2, false, false)
insert_edge!(graph, dataOutNodes[in1], compute_S2, false, false)
insert_edge!(graph, compute_S2, data_out, false, false)
insert_edge!(graph, data_v, compute_S2, track = false, invalidate_cache = false)
insert_edge!(graph, dataOutNodes[in1], compute_S2, track = false, invalidate_cache = false)
insert_edge!(graph, compute_S2, data_out, track = false, invalidate_cache = false)
insert_edge!(graph, data_out, sum_node, false, false)
insert_edge!(graph, data_out, sum_node, track = false, invalidate_cache = false)
add_child!(sum_node.task)
elseif occursin(regex_plus, node)
if (verbose)
println("\rReading Nodes Complete ")
println("Added ", length(graph.nodes), " nodes")
end
else
@assert false (
"Unknown node '$node' while reading from file $filename"
)
@assert false ("Unknown node '$node' while reading from file $filename")
end
end
@ -244,6 +203,46 @@ function parse_abc(filename::String, verbose::Bool = false)
if (verbose)
println("Done")
end
# don't actually need to read the edges
return graph
end
"""
parse_process(string::AbstractString, model::ABCModel)
Parse a string representation of a process, such as "AB->ABBB" into the corresponding [`ABCProcessDescription`](@ref).
"""
function parse_process(str::AbstractString, model::ABCModel)
inParticles = Dict{Type, Int}()
outParticles = Dict{Type, Int}()
if !(contains(str, "->"))
throw("Did not find -> while parsing process \"$str\"")
end
(inStr, outStr) = split(str, "->")
if (isempty(inStr) || isempty(outStr))
throw("Process (\"$str\") input or output part is empty!")
end
for t in types(model)
inCount = count(x -> x == String(t)[1], inStr)
outCount = count(x -> x == String(t)[1], outStr)
if inCount != 0
inParticles[t] = inCount
end
if outCount != 0
outParticles[t] = outCount
end
end
if length(inStr) != sum(values(inParticles))
throw("Encountered unknown characters in the input part of process \"$str\"")
elseif length(outStr) != sum(values(outParticles))
throw("Encountered unknown characters in the output part of process \"$str\"")
end
return ABCProcessDescription(inParticles, outParticles)
end

View File

@ -1,99 +1,142 @@
"""
ParticleType
using QEDbase
import QEDbase.mass
A Particle Type in the ABC Model as an enum, with types `A`, `B` and `C`.
"""
@enum ParticleType A = 1 B = 2 C = 3
ABCModel <: AbstractPhysicsModel
Singleton definition for identification of the ABC-Model.
"""
struct ABCModel <: AbstractPhysicsModel end
"""
ABCParticle
Base type for all particles in the [`ABCModel`](@ref).
"""
abstract type ABCParticle <: AbstractParticle end
"""
ParticleA <: ABCParticle
An 'A' particle in the ABC Model.
"""
struct ParticleA <: ABCParticle
momentum::SFourMomentum
end
"""
ParticleB <: ABCParticle
A 'B' particle in the ABC Model.
"""
struct ParticleB <: ABCParticle
momentum::SFourMomentum
end
"""
ParticleC <: ABCParticle
A 'C' particle in the ABC Model.
"""
struct ParticleC <: ABCParticle
momentum::SFourMomentum
end
"""
ABCProcessDescription <: AbstractProcessDescription
A description of a process in the ABC-Model. Contains the input and output particles.
See also: [`in_particles`](@ref), [`out_particles`](@ref), [`parse_process`](@ref)
"""
struct ABCProcessDescription <: AbstractProcessDescription
inParticles::Dict{Type, Int}
outParticles::Dict{Type, Int}
end
"""
ABCProcessInput <: AbstractProcessInput
Input for a ABC Process. Contains the [`ABCProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles.
See also: [`gen_process_input`](@ref)
"""
struct ABCProcessInput <: AbstractProcessInput
process::ABCProcessDescription
inParticles::Vector{ABCParticle}
outParticles::Vector{ABCParticle}
end
"""
PARTICLE_MASSES
A constant dictionary containing the masses of the different [`ParticleType`](@ref)s.
A constant dictionary containing the masses of the different [`ABCParticle`](@ref)s.
"""
const PARTICLE_MASSES =
Dict{ParticleType, Float64}(A => 1.0, B => 1.0, C => 0.0)
const PARTICLE_MASSES = Dict{Type, Float64}(ParticleA => 1.0, ParticleB => 1.0, ParticleC => 0.0)
"""
Particle
A struct describing a particle of the ABC-Model. It has the 4 momentum parts P0...P3 and a [`ParticleType`](@ref).
`sizeof(Particle())` = 40 Byte
"""
struct Particle
P0::Float64
P1::Float64
P2::Float64
P3::Float64
type::ParticleType
end
"""
ParticleValue
A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
`sizeof(ParticleValue())` = 48 Byte
"""
struct ParticleValue
p::Particle
v::Float64
end
"""
mass(t::ParticleType)
mass(t::Type{T}) where {T <: ABCParticle}
Return the mass (at rest) of the given particle type.
"""
mass(t::ParticleType) = PARTICLE_MASSES[t]
mass(t::Type{T}) where {T <: ABCParticle} = PARTICLE_MASSES[t]
"""
remaining_type(t1::ParticleType, t2::ParticleType)
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
For 2 given (non-equal) particle types, return the third of ABC.
"""
function remaining_type(t1::ParticleType, t2::ParticleType)
function interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
@assert t1 != t2
if t1 != A && t2 != A
return A
elseif t1 != B && t2 != B
return B
if t1 != ParticleA && t2 != ParticleA
return ParticleA
elseif t1 != ParticleB && t2 != ParticleB
return ParticleB
else
return C
return ParticleC
end
end
"""
square(p::Particle)
types(::ABCModel)
Return a Vector of the possible types of particle in the [`ABCModel`](@ref).
"""
function types(::ABCModel)
return [ParticleA, ParticleB, ParticleC]
end
"""
square(p::ABCParticle)
Return the square of the particle's momentum as a `Float` value.
Takes 7 effective FLOP.
"""
function square(p::Particle)
return p.P0 * p.P0 - p.P1 * p.P1 - p.P2 * p.P2 - p.P3 * p.P3
function square(p::ABCParticle)
return getMass2(p.momentum)
end
"""
inner_edge(p::Particle)
inner_edge(p::ABCParticle)
Return the factor of the inner edge with the given (virtual) particle.
Takes 10 effective FLOP. (3 here + 10 in square(p))
Takes 10 effective FLOP. (3 here + 7 in square(p))
"""
function inner_edge(p::Particle)
return 1.0 / (square(p) - mass(p.type) * mass(p.type))
function inner_edge(p::ABCParticle)
return 1.0 / (square(p) - mass(typeof(p)) * mass(typeof(p)))
end
"""
outer_edge(p::Particle)
outer_edge(p::ABCParticle)
Return the factor of the outer edge with the given (real) particle.
Takes 0 effective FLOP.
"""
function outer_edge(p::Particle)
function outer_edge(p::ABCParticle)
return 1.0
end
@ -111,20 +154,57 @@ function vertex()
end
"""
preserve_momentum(p1::Particle, p2::Particle)
preserve_momentum(p1::ABCParticle, p2::ABCParticle)
Calculate and return a new particle from two given interacting ones at a vertex.
Takes 4 effective FLOP.
"""
function preserve_momentum(p1::Particle, p2::Particle)
p3 = Particle(
p1.P0 + p2.P0,
p1.P1 + p2.P1,
p1.P2 + p2.P2,
p1.P3 + p2.P3,
remaining_type(p1.type, p2.type),
)
function preserve_momentum(p1::ABCParticle, p2::ABCParticle)
t3 = interaction_result(typeof(p1), typeof(p2))
p3 = t3(p1.momentum + p2.momentum)
return p3
end
"""
type_from_name(name::String)
For a name of a particle, return the particle's [`Type`].
"""
function type_from_name(name::String)
if startswith(name, "A")
return ParticleA
elseif startswith(name, "B")
return ParticleB
elseif startswith(name, "C")
return ParticleC
else
throw("Invalid name for a particle in the ABC model")
end
end
function String(::Type{ParticleA})
return "A"
end
function String(::Type{ParticleB})
return "B"
end
function String(::Type{ParticleC})
return "C"
end
function in_particles(process::ABCProcessDescription)
return process.inParticles
end
function in_particles(input::ABCProcessInput)
return input.inParticles
end
function out_particles(process::ABCProcessDescription)
return process.outParticles
end
function out_particles(input::ABCProcessInput)
return input.outParticles
end

58
src/models/abc/print.jl Normal file
View File

@ -0,0 +1,58 @@
"""
show(io::IO, process::ABCProcessDescription)
Pretty print an [`ABCProcessDescription`](@ref) (no newlines).
```jldoctest
julia> using MetagraphOptimization
julia> print(parse_process("AB->ABBB", ABCModel()))
ABC Process: 'AB->ABBB'
```
"""
function show(io::IO, process::ABCProcessDescription)
# types() gives the types in order (ABC) instead of random like keys() would
print(io, "ABC Process: \'")
for type in types(ABCModel())
for _ in 1:get(process.inParticles, type, 0)
print(io, String(type))
end
end
print(io, "->")
for type in types(ABCModel())
for _ in 1:get(process.outParticles, type, 0)
print(io, String(type))
end
end
print(io, "'")
return nothing
end
"""
show(io::IO, processInput::ABCProcessInput)
Pretty print an [`ABCProcessInput`](@ref) (with newlines).
"""
function show(io::IO, processInput::ABCProcessInput)
println(io, "Input for $(processInput.process):")
println(io, " $(length(processInput.inParticles)) Incoming particles:")
for particle in processInput.inParticles
println(io, " $particle")
end
println(io, " $(length(processInput.outParticles)) Outgoing Particles:")
for particle in processInput.outParticles
println(io, " $particle")
end
return nothing
end
"""
show(io::IO, particle::T) where {T <: ABCParticle}
Pretty print an [`ABCParticle`](@ref) (no newlines).
"""
function show(io::IO, particle::T) where {T <: ABCParticle}
print(io, "$(String(typeof(particle))): $(particle.momentum)")
return nothing
end

View File

@ -3,35 +3,35 @@
Return the compute effort of an S1 task.
"""
compute_effort(t::ComputeTaskS1) = 11
compute_effort(t::ComputeTaskS1) = 11.0
"""
compute_effort(t::ComputeTaskS2)
Return the compute effort of an S2 task.
"""
compute_effort(t::ComputeTaskS2) = 12
compute_effort(t::ComputeTaskS2) = 12.0
"""
compute_effort(t::ComputeTaskU)
Return the compute effort of a U task.
"""
compute_effort(t::ComputeTaskU) = 1
compute_effort(t::ComputeTaskU) = 1.0
"""
compute_effort(t::ComputeTaskV)
Return the compute effort of a V task.
"""
compute_effort(t::ComputeTaskV) = 6
compute_effort(t::ComputeTaskV) = 6.0
"""
compute_effort(t::ComputeTaskP)
Return the compute effort of a P task.
"""
compute_effort(t::ComputeTaskP) = 0
compute_effort(t::ComputeTaskP) = 0.0
"""
compute_effort(t::ComputeTaskSum)
@ -41,7 +41,7 @@ Return the compute effort of a Sum task.
Note: This is a constant compute effort, even though sum scales with the number of its inputs. Since there is only ever a single sum node in a graph generated from the ABC-Model,
this doesn't matter.
"""
compute_effort(t::ComputeTaskSum) = 1
compute_effort(t::ComputeTaskSum) = 1.0
"""
show(io::IO, t::DataTask)
@ -57,42 +57,42 @@ end
Print the S1 task to io.
"""
show(io::IO, t::ComputeTaskS1) = print("ComputeS1")
show(io::IO, t::ComputeTaskS1) = print(io, "ComputeS1")
"""
show(io::IO, t::ComputeTaskS2)
Print the S2 task to io.
"""
show(io::IO, t::ComputeTaskS2) = print("ComputeS2")
show(io::IO, t::ComputeTaskS2) = print(io, "ComputeS2")
"""
show(io::IO, t::ComputeTaskP)
Print the P task to io.
"""
show(io::IO, t::ComputeTaskP) = print("ComputeP")
show(io::IO, t::ComputeTaskP) = print(io, "ComputeP")
"""
show(io::IO, t::ComputeTaskU)
Print the U task to io.
"""
show(io::IO, t::ComputeTaskU) = print("ComputeU")
show(io::IO, t::ComputeTaskU) = print(io, "ComputeU")
"""
show(io::IO, t::ComputeTaskV)
Print the V task to io.
"""
show(io::IO, t::ComputeTaskV) = print("ComputeV")
show(io::IO, t::ComputeTaskV) = print(io, "ComputeV")
"""
show(io::IO, t::ComputeTaskSum)
Print the sum task to io.
"""
show(io::IO, t::ComputeTaskSum) = print("ComputeSum")
show(io::IO, t::ComputeTaskSum) = print(io, "ComputeSum")
"""
copy(t::DataTask)
@ -147,19 +147,20 @@ children(::ComputeTaskV) = 2
"""
children(::ComputeTaskSum)
Return the number of children of a ComputeTaskSum, since this is variable and the task doesn't know
how many children it will sum over, return a wildcard -1.
TODO: this is kind of bad because it means we can't fuse with a sum task
Return the number of children of a ComputeTaskSum.
"""
children(::ComputeTaskSum) = -1
children(t::ComputeTaskSum) = t.children_number
"""
children(t::FusedComputeTask)
Return the number of children of a FusedComputeTask. It's the sum of the children of both tasks minus one.
Return the number of children of a FusedComputeTask.
"""
function children(t::FusedComputeTask)
(T1, T2) = get_types(t)
return children(T1()) + children(T2()) - 1 # one of the inputs is the output of T1 and thus not a child of the node
return length(union(Set(t.t1_inputs), Set(t.t2_inputs)))
end
function add_child!(t::ComputeTaskSum)
t.children_number += 1
return nothing
end

View File

@ -4,7 +4,7 @@
Task representing a specific data transfer in the ABC Model.
"""
struct DataTask <: AbstractDataTask
data::UInt64
data::Float64
end
"""
@ -47,19 +47,13 @@ struct ComputeTaskU <: AbstractComputeTask end
Task that sums all its inputs, n children.
"""
struct ComputeTaskSum <: AbstractComputeTask end
mutable struct ComputeTaskSum <: AbstractComputeTask
children_number::Int
end
"""
ABC_TASKS
Constant vector of all tasks of the ABC-Model.
"""
ABC_TASKS = [
DataTask,
ComputeTaskS1,
ComputeTaskS2,
ComputeTaskP,
ComputeTaskV,
ComputeTaskU,
ComputeTaskSum,
]
ABC_TASKS = [DataTask, ComputeTaskS1, ComputeTaskS2, ComputeTaskP, ComputeTaskV, ComputeTaskU, ComputeTaskSum]

109
src/models/interface.jl Normal file
View File

@ -0,0 +1,109 @@
"""
AbstractPhysicsModel
Base type for a model, e.g. ABC-Model or QED. This is used to dispatch many functions.
"""
abstract type AbstractPhysicsModel end
"""
AbstractParticle
Base type for particles belonging to a certain [`AbstractPhysicsModel`](@ref).
"""
abstract type AbstractParticle end
"""
ParticleValue{ParticleType <: AbstractParticle}
A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
`sizeof(ParticleValue())` = 48 Byte
"""
struct ParticleValue{ParticleType <: AbstractParticle}
p::ParticleType
v::Float64
end
"""
AbstractProcessDescription
Base type for process descriptions. An object of this type of a corresponding [`AbstractPhysicsModel`](@ref) should uniquely identify a process in that model.
See also: [`parse_process`](@ref)
"""
abstract type AbstractProcessDescription end
"""
AbstractProcessInput
Base type for process inputs. An object of this type contains the input values (e.g. momenta) of the particles in a process.
See also: [`gen_process_input`](@ref)
"""
abstract type AbstractProcessInput end
"""
mass(t::Type{T}) where {T <: AbstractParticle}
Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the particles mass at rest.
"""
function mass end
"""
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: AbstractParticle, T2 <: AbstractParticle}
Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the result particle type when the two given particles interact.
"""
function interaction_result end
"""
types(::AbstractPhysicsModel)
Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref), returning a `Vector` of the available particle types in the model.
"""
function types end
"""
in_particles(::AbstractProcessDescription)
Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of incoming particles for the process per particle type.
in_particles(::AbstractProcessInput)
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
Returns a `<: Vector{AbstractParticle}` object with the values of all incoming particles for the corresponding `ProcessDescription`.
"""
function in_particles end
"""
out_particles(::AbstractProcessDescription)
Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of outgoing particles for the process per particle type.
out_particles(::AbstractProcessInput)
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
Returns a `<: Vector{AbstractParticle}` object with the values of all outgoing particles for the corresponding `ProcessDescription`.
"""
function out_particles end
"""
parse_process(::AbstractString, ::AbstractPhysicsModel)
Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref).
Returns a `ProcessDescription` object.
"""
function parse_process end
"""
gen_process_input(::AbstractProcessDescription)
Interface function that must be implemented for every specific [`AbstractProcessDescription`](@ref).
Returns a randomly generated and valid corresponding `ProcessInput`.
"""
function gen_process_input end

10
src/models/print.jl Normal file
View File

@ -0,0 +1,10 @@
"""
show(io::IO, particleValue::ParticleValue)
Pretty print a [`ParticleValue`](@ref), no newlines.
"""
function show(io::IO, particleValue::ParticleValue)
print(io, "($(particleValue.p), value: $(particleValue.v))")
return nothing
end

View File

@ -1,44 +1,20 @@
DataTaskNode(t::AbstractDataTask, name = "") = DataTaskNode(
t,
Vector{Node}(),
Vector{Node}(),
UUIDs.uuid1(rng[threadid()]),
missing,
missing,
missing,
name,
)
DataTaskNode(t::AbstractDataTask, name = "") =
DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, missing, name)
ComputeTaskNode(t::AbstractComputeTask) = ComputeTaskNode(
t,
Vector{Node}(),
Vector{Node}(),
UUIDs.uuid1(rng[threadid()]),
missing,
missing,
Vector{NodeFusion}(),
t, # task
Vector{Node}(), # parents
Vector{Node}(), # children
UUIDs.uuid1(rng[threadid()]), # id
missing, # node reduction
missing, # node split
Vector{NodeFusion}(), # node fusions
missing, # device
)
copy(m::Missing) = missing
copy(n::ComputeTaskNode) = ComputeTaskNode(
copy(n.task),
copy(n.parents),
copy(n.children),
UUIDs.uuid1(rng[threadid()]),
copy(n.nodeReduction),
copy(n.nodeSplit),
copy(n.nodeFusions),
)
copy(n::DataTaskNode) = DataTaskNode(
copy(n.task),
copy(n.parents),
copy(n.children),
UUIDs.uuid1(rng[threadid()]),
copy(n.nodeReduction),
copy(n.nodeSplit),
copy(n.nodeFusion),
n.name,
)
copy(n::ComputeTaskNode) = ComputeTaskNode(copy(n.task))
copy(n::DataTaskNode) = DataTaskNode(copy(n.task), n.name)
"""
make_node(t::AbstractTask)

View File

@ -22,5 +22,6 @@ end
Return the uuid as a string usable as a variable name in code generation.
"""
function to_var_name(id::UUID)
return replace(string(id), "-" => "_")
str = "_" * replace(string(id), "-" => "_")
return str
end

View File

@ -24,13 +24,14 @@ abstract type Operation end
Any node that transfers data and does no computation.
# Fields
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.\\
`.name`: The name of this node for entry nodes into the graph ([`is_entry_node`](@ref)) to reliably assign the inputs to the correct nodes when executing.\\
"""
mutable struct DataTaskNode <: Node
task::AbstractDataTask
@ -60,16 +61,17 @@ end
"""
ComputeTaskNode <: Node
Any node that transfers data and does no computation.
Any node that computes a result from inputs using an [`AbstractComputeTask`](@ref).
# Fields
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusion`: A vector of this node's [`NodeFusion`](@ref)s. For a ComputeTaskNode there can be any number of these, unlike the DataTaskNodes.
`.task`: The node's compute task type. A concrete subtype of [`AbstractComputeTask`](@ref).\\
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
`.id`: The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
`.nodeFusions`: A vector of this node's [`NodeFusion`](@ref)s. For a `ComputeTaskNode` there can be any number of these, unlike the [`DataTaskNode`](@ref)s.\\
`.device`: The Device this node has been scheduled on by a [`Scheduler`](@ref).
"""
mutable struct ComputeTaskNode <: Node
task::AbstractComputeTask
@ -82,6 +84,9 @@ mutable struct ComputeTaskNode <: Node
# for ComputeTasks there can be multiple fusions, unlike the DataTasks
nodeFusions::Vector{Operation}
# the device this node is assigned to execute on
device::Union{AbstractDevice, Missing}
end
"""
@ -95,8 +100,5 @@ The child is the prerequisite node of the parent.
"""
struct Edge
# edge points from child to parent
edge::Union{
Tuple{DataTaskNode, ComputeTaskNode},
Tuple{ComputeTaskNode, DataTaskNode},
}
edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}}
end

View File

@ -22,12 +22,24 @@ function is_valid_node(graph::DAG, node::Node)
@assert node in child.parents "Node is not a parent of its child!"
end
if !ismissing(node.nodeReduction)
#=if !ismissing(node.nodeReduction)
@assert is_valid(graph, node.nodeReduction)
end
if !ismissing(node.nodeSplit)
@assert is_valid(graph, node.nodeSplit)
end=#
if !(typeof(node.task) <: FusedComputeTask)
# the remaining checks are only necessary for fused compute tasks
return true
end
# every child must be in some input of the task
for child in node.children
str = Symbol(to_var_name(child.id))
@assert (str in node.task.t1_inputs) || (str in node.task.t2_inputs) "$str was not in any of the tasks' inputs\nt1_inputs: $(node.task.t1_inputs)\nt2_inputs: $(node.task.t2_inputs)"
end
return true
end
@ -41,9 +53,9 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
function is_valid(graph::DAG, node::ComputeTaskNode)
@assert is_valid_node(graph, node)
for nf in node.nodeFusions
#=for nf in node.nodeFusions
@assert is_valid(graph, nf)
end
end=#
return true
end
@ -57,8 +69,8 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
function is_valid(graph::DAG, node::DataTaskNode)
@assert is_valid_node(graph, node)
if !ismissing(node.nodeFusion)
#=if !ismissing(node.nodeFusion)
@assert is_valid(graph, node.nodeFusion)
end
end=#
return true
end

View File

@ -34,12 +34,7 @@ Apply the given [`NodeFusion`](@ref) to the graph. Generic wrapper around [`node
Return an [`AppliedNodeFusion`](@ref) object generated from the graph's [`Diff`](@ref).
"""
function apply_operation!(graph::DAG, operation::NodeFusion)
diff = node_fusion!(
graph,
operation.input[1],
operation.input[2],
operation.input[3],
)
diff = node_fusion!(graph, operation.input[1], operation.input[2], operation.input[3])
graph.properties += GraphProperties(diff)
@ -124,17 +119,24 @@ function revert_diff!(graph::DAG, diff::Diff)
# add removed nodes, remove added nodes, same for edges
# note the order
for edge in diff.addedEdges
remove_edge!(graph, edge.edge[1], edge.edge[2], false)
remove_edge!(graph, edge.edge[1], edge.edge[2], track = false)
end
for node in diff.addedNodes
remove_node!(graph, node, false)
remove_node!(graph, node, track = false)
end
for node in diff.removedNodes
insert_node!(graph, node, false)
insert_node!(graph, node, track = false)
end
for edge in diff.removedEdges
insert_edge!(graph, edge.edge[1], edge.edge[2], false)
insert_edge!(graph, edge.edge[1], edge.edge[2], track = false)
end
for (node, task) in diff.updatedChildren
# node must be fused compute task at this point
@assert typeof(node.task) <: FusedComputeTask
node.task = task
end
graph.properties -= GraphProperties(diff)
@ -149,21 +151,24 @@ Fuse nodes n1 -> n2 -> n3 together into one node, return the applied difference
For details see [`NodeFusion`](@ref).
"""
function node_fusion!(
graph::DAG,
n1::ComputeTaskNode,
n2::DataTaskNode,
n3::ComputeTaskNode,
)
# @assert is_valid_node_fusion_input(graph, n1, n2, n3)
function node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
@assert is_valid_node_fusion_input(graph, n1, n2, n3)
# clear snapshot
get_snapshot_diff(graph)
# save children and parents
n1_children = children(n1)
n3_parents = parents(n3)
n3_children = children(n3)
n1Children = children(n1)
n3Parents = parents(n3)
n1Task = copy(n1.task)
n3Task = copy(n3.task)
# assemble the input node vectors of n1 and n3 to save into the FusedComputeTask
n1Inputs = Vector{Symbol}()
for child in n1Children
push!(n1Inputs, Symbol(to_var_name(child.id)))
end
# remove the edges and nodes that will be replaced by the fused node
remove_edge!(graph, n1, n2)
@ -172,29 +177,38 @@ function node_fusion!(
remove_node!(graph, n2)
# get n3's children now so it automatically excludes n2
n3_children = children(n3)
n3Children = children(n3)
n3Inputs = Vector{Symbol}()
for child in n3Children
push!(n3Inputs, Symbol(to_var_name(child.id)))
end
remove_node!(graph, n3)
# create new node with the fused compute task
new_node =
ComputeTaskNode(FusedComputeTask{typeof(n1.task), typeof(n3.task)}())
insert_node!(graph, new_node)
newNode = ComputeTaskNode(FusedComputeTask(n1Task, n3Task, n1Inputs, Symbol(to_var_name(n2.id)), n3Inputs))
insert_node!(graph, newNode)
for child in n1_children
for child in n1Children
remove_edge!(graph, child, n1)
insert_edge!(graph, child, new_node)
insert_edge!(graph, child, newNode)
end
for child in n3_children
for child in n3Children
remove_edge!(graph, child, n3)
if !(child in n1_children)
insert_edge!(graph, child, new_node)
if !(child in n1Children)
insert_edge!(graph, child, newNode)
end
end
for parent in n3_parents
for parent in n3Parents
remove_edge!(graph, n3, parent)
insert_edge!(graph, new_node, parent)
insert_edge!(graph, newNode, parent)
# important! update the parent node's child names in case they are fused compute tasks
# needed for compute generation so the fused compute task can correctly match inputs to its component tasks
update_child!(graph, parent, Symbol(to_var_name(n3.id)), Symbol(to_var_name(newNode.id)))
end
return get_snapshot_diff(graph)
@ -208,21 +222,26 @@ Reduce the given nodes together into one node, return the applied difference to
For details see [`NodeReduction`](@ref).
"""
function node_reduction!(graph::DAG, nodes::Vector{Node})
# @assert is_valid_node_reduction_input(graph, nodes)
@assert is_valid_node_reduction_input(graph, nodes)
# clear snapshot
get_snapshot_diff(graph)
n1 = nodes[1]
n1_children = children(n1)
n1Children = children(n1)
n1_parents = Set(n1.parents)
new_parents = Set{Node}()
n1Parents = Set(n1.parents)
# set of the new parents of n1
newParents = Set{Node}()
# names of the previous children that n1 now replaces per parent
newParentsChildNames = Dict{Node, Symbol}()
# remove all of the nodes' parents and children and the nodes themselves (except for first node)
for i in 2:length(nodes)
n = nodes[i]
for child in n1_children
for child in n1Children
remove_edge!(graph, child, n)
end
@ -230,17 +249,23 @@ function node_reduction!(graph::DAG, nodes::Vector{Node})
remove_edge!(graph, n, parent)
# collect all parents
push!(new_parents, parent)
push!(newParents, parent)
newParentsChildNames[parent] = Symbol(to_var_name(n.id))
end
remove_node!(graph, n)
end
setdiff!(new_parents, n1_parents)
for parent in new_parents
for parent in newParents
# now add parents of all input nodes to n1 without duplicates
insert_edge!(graph, n1, parent)
if !(parent in n1Parents)
# don't double insert edges
insert_edge!(graph, n1, parent)
end
# this has to be done for all parents, even the ones of n1 because they can be duplicate
prevChild = newParentsChildNames[parent]
update_child!(graph, parent, prevChild, Symbol(to_var_name(n1.id)))
end
return get_snapshot_diff(graph)
@ -254,30 +279,33 @@ Split the given node into one node per parent, return the applied difference to
For details see [`NodeSplit`](@ref).
"""
function node_split!(graph::DAG, n1::Node)
# @assert is_valid_node_split_input(graph, n1)
@assert is_valid_node_split_input(graph, n1)
# clear snapshot
get_snapshot_diff(graph)
n1_parents = parents(n1)
n1_children = children(n1)
n1Parents = parents(n1)
n1Children = children(n1)
for parent in n1_parents
for parent in n1Parents
remove_edge!(graph, n1, parent)
end
for child in n1_children
for child in n1Children
remove_edge!(graph, child, n1)
end
remove_node!(graph, n1)
for parent in n1_parents
n_copy = copy(n1)
insert_node!(graph, n_copy)
insert_edge!(graph, n_copy, parent)
for parent in n1Parents
nCopy = copy(n1)
for child in n1_children
insert_edge!(graph, child, n_copy)
insert_node!(graph, nCopy)
insert_edge!(graph, nCopy, parent)
for child in n1Children
insert_edge!(graph, child, nCopy)
end
update_child!(graph, parent, Symbol(to_var_name(n1.id)), Symbol(to_var_name(nCopy.id)))
end
return get_snapshot_diff(graph)

View File

@ -7,10 +7,7 @@ using Base.Threads
Insert the given node fusion into its input nodes' operation caches. For the compute nodes, locking via the given `locks` is employed to have safe multi-threading. For a large set of nodes, contention on the locks should be very small.
"""
function insert_operation!(
nf::NodeFusion,
locks::Dict{ComputeTaskNode, SpinLock},
)
function insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock})
n1 = nf.input[1]
n2 = nf.input[2]
n3 = nf.input[3]
@ -52,10 +49,7 @@ end
Insert the node reductions into the graph and the nodes' caches. Employs multithreading for speedup.
"""
function nr_insertion!(
operations::PossibleOperations,
nodeReductions::Vector{Vector{NodeReduction}},
)
function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}})
total_len = 0
for vec in nodeReductions
total_len += length(vec)
@ -83,11 +77,7 @@ end
Insert the node fusions into the graph and the nodes' caches. Employs multithreading for speedup.
"""
function nf_insertion!(
graph::DAG,
operations::PossibleOperations,
nodeFusions::Vector{Vector{NodeFusion}},
)
function nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}})
total_len = 0
for vec in nodeFusions
total_len += length(vec)
@ -122,10 +112,7 @@ end
Insert the node splits into the graph and the nodes' caches. Employs multithreading for speedup.
"""
function ns_insertion!(
operations::PossibleOperations,
nodeSplits::Vector{Vector{NodeSplit}},
)
function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}})
total_len = 0
for vec in nodeSplits
total_len += length(vec)
@ -231,16 +218,12 @@ function generate_operations(graph::DAG)
continue
end
push!(
generatedFusions[threadid()],
NodeFusion((child_node, node, parent_node)),
)
push!(generatedFusions[threadid()], NodeFusion((child_node, node, parent_node)))
end
end
# launch thread for node fusion insertion
nf_task =
@task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
schedule(nf_task)
# find possible node splits

View File

@ -4,9 +4,7 @@
Return whether `operations` is empty, i.e. all of its fields are empty.
"""
function isempty(operations::PossibleOperations)
return isempty(operations.nodeFusions) &&
isempty(operations.nodeReductions) &&
isempty(operations.nodeSplits)
return isempty(operations.nodeFusions) && isempty(operations.nodeReductions) && isempty(operations.nodeSplits)
end
"""
@ -63,9 +61,7 @@ function can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
return false
end
if length(n2.parents) != 1 ||
length(n2.children) != 1 ||
length(n1.parents) != 1
if length(n2.parents) != 1 || length(n2.children) != 1 || length(n1.parents) != 1
return false
end

View File

@ -9,24 +9,12 @@ Assert for a gven node fusion input whether the nodes can be fused. For the requ
Intended for use with `@assert` or `@test`.
"""
function is_valid_node_fusion_input(
graph::DAG,
n1::ComputeTaskNode,
n2::DataTaskNode,
n3::ComputeTaskNode,
)
function is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
if !(n1 in graph) || !(n2 in graph) || !(n3 in graph)
throw(
AssertionError(
"[Node Fusion] The given nodes are not part of the given graph",
),
)
throw(AssertionError("[Node Fusion] The given nodes are not part of the given graph"))
end
if !is_child(n1, n2) ||
!is_child(n2, n3) ||
!is_parent(n3, n2) ||
!is_parent(n2, n1)
if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1)
throw(
AssertionError(
"[Node Fusion] The given nodes are not connected by edges which is required for node fusion",
@ -35,27 +23,19 @@ function is_valid_node_fusion_input(
end
if length(n2.parents) > 1
throw(
AssertionError(
"[Node Fusion] The given data node has more than one parent",
),
)
throw(AssertionError("[Node Fusion] The given data node has more than one parent"))
end
if length(n2.children) > 1
throw(
AssertionError(
"[Node Fusion] The given data node has more than one child",
),
)
throw(AssertionError("[Node Fusion] The given data node has more than one child"))
end
if length(n1.parents) > 1
throw(
AssertionError(
"[Node Fusion] The given n1 has more than one parent",
),
)
throw(AssertionError("[Node Fusion] The given n1 has more than one parent"))
end
@assert is_valid(graph, n1)
@assert is_valid(graph, n2)
@assert is_valid(graph, n3)
return true
end
@ -69,22 +49,21 @@ Intended for use with `@assert` or `@test`.
function is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node})
for n in nodes
if n graph
throw(
AssertionError(
"[Node Reduction] The given nodes are not part of the given graph",
),
)
throw(AssertionError("[Node Reduction] The given nodes are not part of the given graph"))
end
@assert is_valid(graph, n)
end
t = typeof(nodes[1].task)
for n in nodes
if typeof(n.task) != t
throw(
AssertionError(
"[Node Reduction] The given nodes are not of the same type",
),
)
throw(AssertionError("[Node Reduction] The given nodes are not of the same type"))
end
if (typeof(n) <: DataTaskNode)
if (n.name != nodes[1].name)
throw(AssertionError("[Node Reduction] The given nodes do not have the same name"))
end
end
end
@ -111,11 +90,7 @@ Intended for use with `@assert` or `@test`.
"""
function is_valid_node_split_input(graph::DAG, n1::Node)
if n1 graph
throw(
AssertionError(
"[Node Split] The given node is not part of the given graph",
),
)
throw(AssertionError("[Node Split] The given node is not part of the given graph"))
end
if length(n1.parents) <= 1
@ -126,6 +101,8 @@ function is_valid_node_split_input(graph::DAG, n1::Node)
)
end
@assert is_valid(graph, n1)
return true
end
@ -163,12 +140,7 @@ Assert for a given [`NodeFusion`](@ref) whether it is a valid operation in the g
Intended for use with `@assert` or `@test`.
"""
function is_valid(graph::DAG, nf::NodeFusion)
@assert is_valid_node_fusion_input(
graph,
nf.input[1],
nf.input[2],
nf.input[3],
)
@assert is_valid_node_fusion_input(graph, nf.input[1], nf.input[2], nf.input[3])
@assert nf in graph.possibleOperations.nodeFusions "NodeFusion is not part of the graph's possible operations!"
return true
end

View File

@ -0,0 +1,8 @@
"""
GreedyOptimizer
An implementation of the greedy optimization algorithm, simply choosing the best next option evaluated with the given estimator.
"""
struct GreedyOptimizer
estimator::AbstractEstimator
end

View File

@ -11,8 +11,7 @@ function -(prop1::GraphProperties, prop2::GraphProperties)
computeIntensity = if (prop1.data - prop2.data == 0)
0.0
else
(prop1.computeEffort - prop2.computeEffort) /
(prop1.data - prop2.data)
(prop1.computeEffort - prop2.computeEffort) / (prop1.data - prop2.data)
end,
cost = prop1.cost - prop2.cost,
noNodes = prop1.noNodes - prop2.noNodes,
@ -33,8 +32,7 @@ function +(prop1::GraphProperties, prop2::GraphProperties)
computeIntensity = if (prop1.data + prop2.data == 0)
0.0
else
(prop1.computeEffort + prop2.computeEffort) /
(prop1.data + prop2.data)
(prop1.computeEffort + prop2.computeEffort) / (prop1.data + prop2.data)
end,
cost = prop1.cost + prop2.cost,
noNodes = prop1.noNodes + prop2.noNodes,

50
src/scheduler/greedy.jl Normal file
View File

@ -0,0 +1,50 @@
"""
GreedyScheduler
A greedy implementation of a scheduler, creating a topological ordering of nodes and naively balancing them onto the different devices.
"""
struct GreedyScheduler end
function schedule_dag(::GreedyScheduler, graph::DAG, machine::Machine)
nodeQueue = PriorityQueue{Node, Int}()
# use a priority equal to the number of unseen children -> 0 are nodes that can be added
for node in get_entry_nodes(graph)
enqueue!(nodeQueue, node => 0)
end
schedule = Vector{Node}()
sizehint!(schedule, length(graph.nodes))
# keep an accumulated cost of things scheduled to this device so far
deviceAccCost = PriorityQueue{AbstractDevice, Int}()
for device in machine.devices
enqueue!(deviceAccCost, device => 0)
end
node = nothing
while !isempty(nodeQueue)
@assert peek(nodeQueue)[2] == 0
node = dequeue!(nodeQueue)
# assign the device with lowest accumulated cost to the node (if it's a compute node)
if (isa(node, ComputeTaskNode))
lowestDevice = peek(deviceAccCost)[1]
node.device = lowestDevice
deviceAccCost[lowestDevice] = compute_effort(node.task)
end
push!(schedule, node)
for parent in node.parents
# reduce the priority of all parents by one
if (!haskey(nodeQueue, parent))
enqueue!(nodeQueue, parent => length(parent.children) - 1)
else
nodeQueue[parent] = nodeQueue[parent] - 1
end
end
end
return schedule
end

View File

@ -0,0 +1,18 @@
"""
Scheduler
Abstract base type for scheduler implementations. The scheduler is used to assign each node to a device and create a topological ordering of tasks.
"""
abstract type Scheduler end
"""
schedule_dag(::Scheduler, ::DAG, ::Machine)
Interface functions that must be implemented for implementations of [`Scheduler`](@ref).
The function assigns each [`ComputeTaskNode`](@ref) of the [`DAG`](@ref) to one of the devices in the given [`Machine`](@ref) and returns a `Vector{Node}` representing a topological ordering.
[`DataTaskNode`](@ref)s are not scheduled to devices since they do not compute. Instead, a data node transfers data from the [`AbstractDevice`](@ref) of their child to all [`AbstractDevice`](@ref)s of its parents.
"""
function schedule_dag end

89
src/task/compute.jl Normal file
View File

@ -0,0 +1,89 @@
"""
compute(t::FusedComputeTask, data)
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
"""
function compute(t::FusedComputeTask, data)
@assert false "This is not implemented and should never be called"
end
"""
get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector{String}, outExpr::String)
Generate code evaluating a [`FusedComputeTask`](@ref) on `inExprs`, providing the output on `outExpr`.
`inExprs` should be of the correct types and may be heterogeneous. `outExpr` will be of the type of the output of `T2` of t.
"""
function get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector, outExpr)
inExprs1 = Vector()
for sym in t.t1_inputs
push!(inExprs1, gen_access_expr(device, sym))
end
outExpr1 = gen_access_expr(device, t.t1_output)
inExprs2 = Vector()
for sym in t.t2_inputs
push!(inExprs2, gen_access_expr(device, sym))
end
expr1 = get_expression(t.first_task, device, inExprs1, outExpr1)
expr2 = get_expression(t.second_task, device, [inExprs2..., outExpr1], outExpr)
full_expr = Expr(:block, expr1, expr2)
return full_expr
end
"""
get_expression(node::ComputeTaskNode)
Generate and return code for a given [`ComputeTaskNode`](@ref).
"""
function get_expression(node::ComputeTaskNode)
@assert length(node.children) <= children(node.task) "Node $(node) has too many children for its task: node has $(length(node.children)) versus task has $(children(node.task))\nNode's children: $(getfield.(node.children, :children))"
@assert !ismissing(node.device) "Trying to get expression for an unscheduled ComputeTaskNode\nNode: $(node)"
inExprs = Vector()
for id in getfield.(node.children, :id)
push!(inExprs, gen_access_expr(node.device, Symbol(to_var_name(id))))
end
outExpr = gen_access_expr(node.device, Symbol(to_var_name(node.id)))
return get_expression(node.task, node.device, inExprs, outExpr)
end
"""
get_expression(node::DataTaskNode)
Generate and return code for a given [`DataTaskNode`](@ref).
"""
function get_expression(node::DataTaskNode)
@assert length(node.children) == 1 "Trying to call get_expression on a data task node that has $(length(node.children)) children instead of 1"
# TODO: dispatch to device implementations generating the copy commands
child = node.children[1]
inExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(child.id))))
outExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(node.id))))
dataTransportExp = Meta.parse("$outExpr = $inExpr")
return dataTransportExp
end
"""
get_init_expression(node::DataTaskNode, device::AbstractDevice)
Generate and return code for the initial input reading expression for [`DataTaskNode`](@ref)s with 0 children, i.e., entry nodes.
See also: [`get_entry_nodes`](@ref)
"""
function get_init_expression(node::DataTaskNode, device::AbstractDevice)
@assert isempty(node.children) "Trying to call get_init_expression on a data task node that is not an entry node."
inExpr = eval(gen_access_expr(device, Symbol("$(to_var_name(node.id))_in")))
outExpr = eval(gen_access_expr(device, Symbol(to_var_name(node.id))))
dataTransportExp = Meta.parse("$outExpr = $inExpr")
return dataTransportExp
end

View File

@ -3,8 +3,7 @@
Fallback implementation of the copy of an abstract data task, throwing an error.
"""
copy(t::AbstractDataTask) =
error("Need to implement copying for your data tasks!")
copy(t::AbstractDataTask) = error("Need to implement copying for your data tasks!")
"""
copy(t::AbstractComputeTask)
@ -12,3 +11,21 @@ copy(t::AbstractDataTask) =
Return a copy of the given compute task.
"""
copy(t::AbstractComputeTask) = typeof(t)()
"""
copy(t::FusedComputeTask)
Return a copy of th egiven [`FusedComputeTask`](@ref).
"""
function copy(t::FusedComputeTask{T1, T2}) where {T1, T2}
return FusedComputeTask{T1, T2}(
copy(t.first_task),
copy(t.second_task),
copy(t.t1_inputs),
t.t1_output,
copy(t.t2_inputs),
)
end
FusedComputeTask{T1, T2}(t1_inputs::Vector{String}, t1_output::String, t2_inputs::Vector{String}) where {T1, T2} =
FusedComputeTask{T1, T2}(T1(), T2(), t1_inputs, t1_output, t2_inputs)

View File

@ -4,6 +4,5 @@
Print a string representation of the fused compute task to io.
"""
function show(io::IO, t::FusedComputeTask)
(T1, T2) = get_types(t)
return print(io, "ComputeFuse(", T1(), ", ", T2(), ")")
return print(io, "ComputeFuse($(t.first_task), $(t.second_task))")
end

View File

@ -49,7 +49,7 @@ end
Return the compute effort of a data task, always zero, regardless of the specific task.
"""
compute_effort(t::AbstractDataTask) = 0
compute_effort(t::AbstractDataTask) = 0.0
"""
data(t::AbstractDataTask)
@ -63,7 +63,7 @@ data(t::AbstractDataTask) = getfield(t, :data)
Return the data of a compute task, always zero, regardless of the specific task.
"""
data(t::AbstractComputeTask) = 0
data(t::AbstractComputeTask) = 0.0
"""
compute_effort(t::FusedComputeTask)
@ -71,8 +71,7 @@ data(t::AbstractComputeTask) = 0
Return the compute effort of a fused compute task.
"""
function compute_effort(t::FusedComputeTask)
(T1, T2) = collect(typeof(t).parameters)
return compute_effort(T1()) + compute_effort(T2())
return compute_effort(t.first_task) + compute_effort(t.second_task)
end
"""
@ -81,30 +80,3 @@ end
Return a tuple of a the fused compute task's components' types.
"""
get_types(::FusedComputeTask{T1, T2}) where {T1, T2} = (T1, T2)
"""
get_expression(t::AbstractTask)
Return an expression evaluating the given task on the :dataIn symbol
"""
function get_expression(t::AbstractTask)
return quote
dataOut = compute($t, dataIn)
end
end
"""
get_expression()
"""
function get_expression(
t::FusedComputeTask,
inSymbol::Symbol,
outSymbol::Symbol,
)
#TODO
computeExp = quote
$outSymbol = compute($t, $inSymbol)
end
return computeExp
end

View File

@ -26,5 +26,13 @@ A fused compute task made up of the computation of first `T1` and then `T2`.
Also see: [`get_types`](@ref).
"""
struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <:
AbstractComputeTask end
struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <: AbstractComputeTask
first_task::T1
second_task::T2
# the names of the inputs for T1
t1_inputs::Vector{Symbol}
# output name of T1
t1_output::Symbol
# t2_inputs doesn't include the output of t1, that's implicit
t2_inputs::Vector{Symbol}
end

View File

@ -87,3 +87,19 @@ Return the memory footprint of the node in Byte. Used in [`mem(graph::DAG)`](@re
function mem(node::Node)
return Base.summarysize(node, exclude = Union{Node, Operation})
end
"""
unroll_symbol_vector(vec::Vector{Symbol})
Return the given vector as single String without quotation marks or brackets.
"""
function unroll_symbol_vector(vec::Vector)
result = ""
for s in vec
if (result != "")
result *= ", "
end
result *= "$s"
end
return result
end

View File

@ -1,3 +1,5 @@
[deps]
AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

View File

@ -2,7 +2,7 @@ using Random
function test_known_graph(name::String, n, fusion_test = true)
@testset "Test $name Graph ($n)" begin
graph = parse_abc(joinpath(@__DIR__, "..", "input", "$name.txt"))
graph = parse_dag(joinpath(@__DIR__, "..", "input", "$name.txt"), ABCModel())
props = get_properties(graph)
if (fusion_test)

View File

@ -5,51 +5,51 @@ import MetagraphOptimization.make_node
@testset "Unit Tests Node Reduction" begin
graph = MetagraphOptimization.DAG()
d_exit = insert_node!(graph, make_node(DataTask(10)), false)
d_exit = insert_node!(graph, make_node(DataTask(10)), track = false)
s0 = insert_node!(graph, make_node(ComputeTaskS2()), false)
s0 = insert_node!(graph, make_node(ComputeTaskS2()), track = false)
ED = insert_node!(graph, make_node(DataTask(3)), false)
FD = insert_node!(graph, make_node(DataTask(3)), false)
ED = insert_node!(graph, make_node(DataTask(3)), track = false)
FD = insert_node!(graph, make_node(DataTask(3)), track = false)
EC = insert_node!(graph, make_node(ComputeTaskV()), false)
FC = insert_node!(graph, make_node(ComputeTaskV()), false)
EC = insert_node!(graph, make_node(ComputeTaskV()), track = false)
FC = insert_node!(graph, make_node(ComputeTaskV()), track = false)
A1D = insert_node!(graph, make_node(DataTask(4)), false)
B1D_1 = insert_node!(graph, make_node(DataTask(4)), false)
B1D_2 = insert_node!(graph, make_node(DataTask(4)), false)
C1D = insert_node!(graph, make_node(DataTask(4)), false)
A1D = insert_node!(graph, make_node(DataTask(4)), track = false)
B1D_1 = insert_node!(graph, make_node(DataTask(4)), track = false)
B1D_2 = insert_node!(graph, make_node(DataTask(4)), track = false)
C1D = insert_node!(graph, make_node(DataTask(4)), track = false)
A1C = insert_node!(graph, make_node(ComputeTaskU()), false)
B1C_1 = insert_node!(graph, make_node(ComputeTaskU()), false)
B1C_2 = insert_node!(graph, make_node(ComputeTaskU()), false)
C1C = insert_node!(graph, make_node(ComputeTaskU()), false)
A1C = insert_node!(graph, make_node(ComputeTaskU()), track = false)
B1C_1 = insert_node!(graph, make_node(ComputeTaskU()), track = false)
B1C_2 = insert_node!(graph, make_node(ComputeTaskU()), track = false)
C1C = insert_node!(graph, make_node(ComputeTaskU()), track = false)
AD = insert_node!(graph, make_node(DataTask(5)), false)
BD = insert_node!(graph, make_node(DataTask(5)), false)
CD = insert_node!(graph, make_node(DataTask(5)), false)
AD = insert_node!(graph, make_node(DataTask(5)), track = false)
BD = insert_node!(graph, make_node(DataTask(5)), track = false)
CD = insert_node!(graph, make_node(DataTask(5)), track = false)
insert_edge!(graph, s0, d_exit, false)
insert_edge!(graph, ED, s0, false)
insert_edge!(graph, FD, s0, false)
insert_edge!(graph, EC, ED, false)
insert_edge!(graph, FC, FD, false)
insert_edge!(graph, s0, d_exit, track = false)
insert_edge!(graph, ED, s0, track = false)
insert_edge!(graph, FD, s0, track = false)
insert_edge!(graph, EC, ED, track = false)
insert_edge!(graph, FC, FD, track = false)
insert_edge!(graph, A1D, EC, false)
insert_edge!(graph, B1D_1, EC, false)
insert_edge!(graph, A1D, EC, track = false)
insert_edge!(graph, B1D_1, EC, track = false)
insert_edge!(graph, B1D_2, FC, false)
insert_edge!(graph, C1D, FC, false)
insert_edge!(graph, B1D_2, FC, track = false)
insert_edge!(graph, C1D, FC, track = false)
insert_edge!(graph, A1C, A1D, false)
insert_edge!(graph, B1C_1, B1D_1, false)
insert_edge!(graph, B1C_2, B1D_2, false)
insert_edge!(graph, C1C, C1D, false)
insert_edge!(graph, A1C, A1D, track = false)
insert_edge!(graph, B1C_1, B1D_1, track = false)
insert_edge!(graph, B1C_2, B1D_2, track = false)
insert_edge!(graph, C1C, C1D, track = false)
insert_edge!(graph, AD, A1C, false)
insert_edge!(graph, BD, B1C_1, false)
insert_edge!(graph, BD, B1C_2, false)
insert_edge!(graph, CD, C1C, false)
insert_edge!(graph, AD, A1C, track = false)
insert_edge!(graph, BD, B1C_1, track = false)
insert_edge!(graph, BD, B1C_2, track = false)
insert_edge!(graph, CD, C1C, track = false)
@test is_valid(graph)

View File

@ -6,6 +6,8 @@ using Test
include("unit_tests_tasks.jl")
include("unit_tests_nodes.jl")
include("unit_tests_properties.jl")
include("unit_tests_estimator.jl")
include("unit_tests_abcmodel.jl")
include("node_reduction.jl")
include("unit_tests_graph.jl")
include("unit_tests_execution.jl")

View File

@ -0,0 +1,26 @@
using MetagraphOptimization
using QEDbase
import MetagraphOptimization.interaction_result
def_momentum = SFourMomentum(1.0, 0.0, 0.0, 0.0)
testparticleTypes = [ParticleA, ParticleB, ParticleC]
testparticles = [ParticleA(def_momentum), ParticleB(def_momentum), ParticleC(def_momentum)]
@testset "Unit Tests ABC-Model" begin
@testset "Interaction Result" begin
for p1 in testparticleTypes, p2 in testparticleTypes
if (p1 == p2)
@test_throws AssertionError interaction_result(p1, p2)
else
@test interaction_result(p1, p2) == setdiff(testparticleTypes, [p1, p2])[1]
end
end
end
@testset "Vertex" begin
@test isapprox(MetagraphOptimization.vertex(), 1 / 137.0)
end
end
println("ABC-Model Unit Tests Complete!")

View File

@ -0,0 +1,99 @@
function test_op_specific(estimator, graph, nf::NodeFusion)
estimate = operation_effect(estimator, graph, nf)
data_reduce = data(nf.input[2].task)
@test isapprox(estimate.data, -data_reduce)
@test isapprox(estimate.computeEffort, 0; atol = eps(Float64))
@test isapprox(estimate.computeIntensity, 0; atol = eps(Float64))
return nothing
end
function test_op_specific(estimator, graph, nr::NodeReduction)
estimate = operation_effect(estimator, graph, nr)
data_reduce = data(nr.input[1].task) * (length(nr.input) - 1)
compute_effort_reduce = compute_effort(nr.input[1].task) * (length(nr.input) - 1)
@test isapprox(estimate.data, -data_reduce; atol = eps(Float64))
@test isapprox(estimate.computeEffort, -compute_effort_reduce)
@test isapprox(estimate.computeIntensity, compute_effort_reduce / data_reduce)
return nothing
end
function test_op_specific(estimator, graph, ns::NodeSplit)
estimate = operation_effect(estimator, graph, ns)
copies = length(ns.input.parents) - 1
data_increase = data(ns.input.task) * copies
compute_effort_increase = compute_effort(ns.input.task) * copies
@test isapprox(estimate.data, data_increase; atol = eps(Float64))
@test isapprox(estimate.computeEffort, compute_effort_increase)
@test isapprox(estimate.computeIntensity, compute_effort_increase / data_increase)
return nothing
end
function test_op(estimator, graph, op)
#=
See issue #16
estimate_before = graph_cost(estimator, graph)
estimate = operation_effect(estimator, graph, op)
push_operation!(graph, op)
estimate_after_apply = graph_cost(estimator, graph)
reset_graph!(graph)
@test isapprox((estimate_before + estimate).data, estimate_after_apply.data)
@test isapprox((estimate_before + estimate).computeEffort, estimate_after_apply.computeEffort)
@test isapprox((estimate_before + estimate).computeIntensity, estimate_after_apply.computeIntensity)
=#
test_op_specific(estimator, graph, op)
return nothing
end
@testset "Unit Tests Estimator" begin
@testset "Global Metric Estimator" for (graph_string, exp_data, exp_computeEffort) in
zip(["AB->AB", "AB->ABBB"], [976, 10944], [53, 1075])
estimator = GlobalMetricEstimator()
@test cost_type(estimator) == CDCost
graph = parse_dag(joinpath(@__DIR__, "..", "input", "$(graph_string).txt"), ABCModel())
@testset "Graph Cost" begin
estimate = graph_cost(estimator, graph)
@test estimate.data == exp_data
@test estimate.computeEffort == exp_computeEffort
@test isapprox(estimate.computeIntensity, exp_computeEffort / exp_data)
end
@testset "Operation Cost" begin
ops = get_operations(graph)
nfs = copy(ops.nodeFusions)
nrs = copy(ops.nodeReductions)
nss = copy(ops.nodeSplits)
println(
"Testing $(length(ops.nodeFusions))xNF, $(length(ops.nodeReductions))xNR, $(length(ops.nodeSplits))xNS",
)
for nf in nfs
test_op(estimator, graph, nf)
end
for nr in nrs
test_op(estimator, graph, nr)
end
for ns in nss
test_op(estimator, graph, ns)
end
end
end
end
println("Estimator Unit Tests Complete!")

View File

@ -1,31 +1,219 @@
import MetagraphOptimization.A
import MetagraphOptimization.B
import MetagraphOptimization.ParticleType
import MetagraphOptimization.ABCParticle
import MetagraphOptimization.interaction_result
@testset "Unit Tests Graph" begin
particles = Dict{ParticleType, Vector{Particle}}(
(
A => [
Particle(0.823648, 0.0, 0.0, 0.823648, A),
Particle(0.823648, -0.835061, -0.474802, 0.277915, A),
]
),
(
B => [
Particle(0.823648, 0.0, 0.0, -0.823648, B),
Particle(0.823648, 0.835061, 0.474802, -0.277915, B),
]
),
using QEDbase
using AccurateArithmetic
include("../examples/profiling_utilities.jl")
const RTOL = sqrt(eps(Float64))
function check_particle_reverse_moment(p1::SFourMomentum, p2::SFourMomentum)
@test isapprox(abs(p1.E), abs(p2.E))
@test isapprox(p1.px, -p2.px)
@test isapprox(p1.py, -p2.py)
@test isapprox(p1.pz, -p2.pz)
return nothing
end
function ground_truth_graph_result(input::ABCProcessInput)
# formula for one diagram:
# u_Bp * iλ * u_Ap * S_C * u_B * iλ * u_A
# for the second diagram:
# u_B * iλ * u_Ap * S_C * u_Bp * iλ * u_Ap
# the "u"s are all 1, we ignore the i, λ is 1/137.
constant = (1 / 137.0)^2
# calculate particle C in diagram 1
diagram1_C = ParticleC(input.inParticles[1].momentum + input.inParticles[2].momentum)
diagram2_C = ParticleC(input.inParticles[1].momentum + input.outParticles[2].momentum)
diagram1_Cp = ParticleC(input.outParticles[1].momentum + input.outParticles[2].momentum)
diagram2_Cp = ParticleC(input.outParticles[1].momentum + input.inParticles[2].momentum)
check_particle_reverse_moment(diagram1_Cp.momentum, diagram1_C.momentum)
check_particle_reverse_moment(diagram2_Cp.momentum, diagram2_C.momentum)
@test isapprox(getMass2(diagram1_C.momentum), getMass2(diagram1_Cp.momentum))
@test isapprox(getMass2(diagram2_C.momentum), getMass2(diagram2_Cp.momentum))
inner1 = MetagraphOptimization.inner_edge(diagram1_C)
inner2 = MetagraphOptimization.inner_edge(diagram2_C)
diagram1_result = inner1 * constant
diagram2_result = inner2 * constant
return sum_kbn([diagram1_result, diagram2_result])
end
@testset "Unit Tests Execution" begin
machine = get_machine_info()
process_2_2 = ABCProcessDescription(
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
)
expected_result = 5.5320567694746876e-5
particles_2_2 = ABCProcessInput(
process_2_2,
ABCParticle[
ParticleA(SFourMomentum(0.823648, 0.0, 0.0, 0.823648)),
ParticleB(SFourMomentum(0.823648, 0.0, 0.0, -0.823648)),
],
ABCParticle[
ParticleA(SFourMomentum(0.823648, -0.835061, -0.474802, 0.277915)),
ParticleB(SFourMomentum(0.823648, 0.835061, 0.474802, -0.277915)),
],
)
expected_result = ground_truth_graph_result(particles_2_2)
for _ in 1:10 # test in a loop because graph layout should not change the result
graph = parse_abc(joinpath(@__DIR__, "..", "input", "AB->AB.txt"))
@test isapprox(execute(graph, particles), expected_result; rtol = 0.001)
@testset "AB->AB no optimization" begin
for _ in 1:10 # test in a loop because graph layout should not change the result
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
code = MetagraphOptimization.gen_code(graph)
@test isapprox(execute(code, particles), expected_result; rtol = 0.001)
# graph should be fully scheduled after being executed
@test is_scheduled(graph)
func = get_compute_function(graph, process_2_2, machine)
@test isapprox(func(particles_2_2), expected_result; rtol = RTOL)
end
end
@testset "AB->AB after random walk" begin
for i in 1:200
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
random_walk!(graph, 50)
@test is_valid(graph)
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
# graph should be fully scheduled after being executed
@test is_scheduled(graph)
end
end
process_2_4 = ABCProcessDescription(
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
Dict{Type, Int64}(ParticleA => 1, ParticleB => 3),
)
particles_2_4 = gen_process_input(process_2_4)
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
expected_result = execute(graph, process_2_4, machine, particles_2_4)
@testset "AB->ABBB no optimization" begin
for _ in 1:5 # test in a loop because graph layout should not change the result
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = RTOL)
func = get_compute_function(graph, process_2_4, machine)
@test isapprox(func(particles_2_4), expected_result; rtol = RTOL)
end
end
@testset "AB->ABBB after random walk" begin
for i in 1:50
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
random_walk!(graph, 100)
@test is_valid(graph)
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = RTOL)
end
end
@testset "AB->AB large sum fusion" for _ in 1:20
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
# push a fusion with the sum node
ops = get_operations(graph)
for fusion in ops.nodeFusions
if isa(fusion.input[3].task, ComputeTaskSum)
push_operation!(graph, fusion)
break
end
end
# push two more fusions with the fused node
for _ in 1:15
ops = get_operations(graph)
for fusion in ops.nodeFusions
if isa(fusion.input[3].task, FusedComputeTask)
push_operation!(graph, fusion)
break
end
end
end
# try execute
@test is_valid(graph)
expected_result = ground_truth_graph_result(particles_2_2)
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
end
@testset "AB->AB large sum fusion" for _ in 1:20
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
# push a fusion with the sum node
ops = get_operations(graph)
for fusion in ops.nodeFusions
if isa(fusion.input[3].task, ComputeTaskSum)
push_operation!(graph, fusion)
break
end
end
# push two more fusions with the fused node
for _ in 1:15
ops = get_operations(graph)
for fusion in ops.nodeFusions
if isa(fusion.input[3].task, FusedComputeTask)
push_operation!(graph, fusion)
break
end
end
end
# try execute
@test is_valid(graph)
expected_result = ground_truth_graph_result(particles_2_2)
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
end
@testset "AB->AB fusion edge case" for _ in 1:20
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
# push two fusions with ComputeTaskV
for _ in 1:2
ops = get_operations(graph)
for fusion in ops.nodeFusions
if isa(fusion.input[1].task, ComputeTaskV)
push_operation!(graph, fusion)
break
end
end
end
# push fusions until the end
cont = true
while cont
cont = false
ops = get_operations(graph)
for fusion in ops.nodeFusions
if isa(fusion.input[1].task, FusedComputeTask)
push_operation!(graph, fusion)
cont = true
break
end
end
end
# try execute
@test is_valid(graph)
expected_result = ground_truth_graph_result(particles_2_2)
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
end
end
println("Execution Unit Tests Complete!")

View File

@ -11,104 +11,101 @@ import MetagraphOptimization.partners
@test length(graph.appliedOperations) == 0
@test length(graph.operationsToApply) == 0
@test length(graph.dirtyNodes) == 0
@test length(graph.diff) ==
(addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
@test length(get_operations(graph)) ==
(nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
@test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
@test length(get_operations(graph)) == (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
# s to output (exit node)
d_exit = insert_node!(graph, make_node(DataTask(10)), false)
d_exit = insert_node!(graph, make_node(DataTask(10)), track = false)
@test length(graph.nodes) == 1
@test length(graph.dirtyNodes) == 1
# final s compute
s0 = insert_node!(graph, make_node(ComputeTaskS2()), false)
s0 = insert_node!(graph, make_node(ComputeTaskS2()), track = false)
@test length(graph.nodes) == 2
@test length(graph.dirtyNodes) == 2
# data from v0 and v1 to s0
d_v0_s0 = insert_node!(graph, make_node(DataTask(5)), false)
d_v1_s0 = insert_node!(graph, make_node(DataTask(5)), false)
d_v0_s0 = insert_node!(graph, make_node(DataTask(5)), track = false)
d_v1_s0 = insert_node!(graph, make_node(DataTask(5)), track = false)
# v0 and v1 compute
v0 = insert_node!(graph, make_node(ComputeTaskV()), false)
v1 = insert_node!(graph, make_node(ComputeTaskV()), false)
v0 = insert_node!(graph, make_node(ComputeTaskV()), track = false)
v1 = insert_node!(graph, make_node(ComputeTaskV()), track = false)
# data from uB, uA, uBp and uAp to v0 and v1
d_uB_v0 = insert_node!(graph, make_node(DataTask(3)), false)
d_uA_v0 = insert_node!(graph, make_node(DataTask(3)), false)
d_uBp_v1 = insert_node!(graph, make_node(DataTask(3)), false)
d_uAp_v1 = insert_node!(graph, make_node(DataTask(3)), false)
d_uB_v0 = insert_node!(graph, make_node(DataTask(3)), track = false)
d_uA_v0 = insert_node!(graph, make_node(DataTask(3)), track = false)
d_uBp_v1 = insert_node!(graph, make_node(DataTask(3)), track = false)
d_uAp_v1 = insert_node!(graph, make_node(DataTask(3)), track = false)
# uB, uA, uBp and uAp computes
uB = insert_node!(graph, make_node(ComputeTaskU()), false)
uA = insert_node!(graph, make_node(ComputeTaskU()), false)
uBp = insert_node!(graph, make_node(ComputeTaskU()), false)
uAp = insert_node!(graph, make_node(ComputeTaskU()), false)
uB = insert_node!(graph, make_node(ComputeTaskU()), track = false)
uA = insert_node!(graph, make_node(ComputeTaskU()), track = false)
uBp = insert_node!(graph, make_node(ComputeTaskU()), track = false)
uAp = insert_node!(graph, make_node(ComputeTaskU()), track = false)
# data from PB, PA, PBp and PAp to uB, uA, uBp and uAp
d_PB_uB = insert_node!(graph, make_node(DataTask(6)), false)
d_PA_uA = insert_node!(graph, make_node(DataTask(6)), false)
d_PBp_uBp = insert_node!(graph, make_node(DataTask(6)), false)
d_PAp_uAp = insert_node!(graph, make_node(DataTask(6)), false)
d_PB_uB = insert_node!(graph, make_node(DataTask(6)), track = false)
d_PA_uA = insert_node!(graph, make_node(DataTask(6)), track = false)
d_PBp_uBp = insert_node!(graph, make_node(DataTask(6)), track = false)
d_PAp_uAp = insert_node!(graph, make_node(DataTask(6)), track = false)
# P computes PB, PA, PBp and PAp
PB = insert_node!(graph, make_node(ComputeTaskP()), false)
PA = insert_node!(graph, make_node(ComputeTaskP()), false)
PBp = insert_node!(graph, make_node(ComputeTaskP()), false)
PAp = insert_node!(graph, make_node(ComputeTaskP()), false)
PB = insert_node!(graph, make_node(ComputeTaskP()), track = false)
PA = insert_node!(graph, make_node(ComputeTaskP()), track = false)
PBp = insert_node!(graph, make_node(ComputeTaskP()), track = false)
PAp = insert_node!(graph, make_node(ComputeTaskP()), track = false)
# entry nodes getting data for P computes
d_PB = insert_node!(graph, make_node(DataTask(4)), false)
d_PA = insert_node!(graph, make_node(DataTask(4)), false)
d_PBp = insert_node!(graph, make_node(DataTask(4)), false)
d_PAp = insert_node!(graph, make_node(DataTask(4)), false)
d_PB = insert_node!(graph, make_node(DataTask(4)), track = false)
d_PA = insert_node!(graph, make_node(DataTask(4)), track = false)
d_PBp = insert_node!(graph, make_node(DataTask(4)), track = false)
d_PAp = insert_node!(graph, make_node(DataTask(4)), track = false)
@test length(graph.nodes) == 26
@test length(graph.dirtyNodes) == 26
# now for all the edges
insert_edge!(graph, d_PB, PB, false)
insert_edge!(graph, d_PA, PA, false)
insert_edge!(graph, d_PBp, PBp, false)
insert_edge!(graph, d_PAp, PAp, false)
insert_edge!(graph, d_PB, PB, track = false)
insert_edge!(graph, d_PA, PA, track = false)
insert_edge!(graph, d_PBp, PBp, track = false)
insert_edge!(graph, d_PAp, PAp, track = false)
insert_edge!(graph, PB, d_PB_uB, false)
insert_edge!(graph, PA, d_PA_uA, false)
insert_edge!(graph, PBp, d_PBp_uBp, false)
insert_edge!(graph, PAp, d_PAp_uAp, false)
insert_edge!(graph, PB, d_PB_uB, track = false)
insert_edge!(graph, PA, d_PA_uA, track = false)
insert_edge!(graph, PBp, d_PBp_uBp, track = false)
insert_edge!(graph, PAp, d_PAp_uAp, track = false)
insert_edge!(graph, d_PB_uB, uB, false)
insert_edge!(graph, d_PA_uA, uA, false)
insert_edge!(graph, d_PBp_uBp, uBp, false)
insert_edge!(graph, d_PAp_uAp, uAp, false)
insert_edge!(graph, d_PB_uB, uB, track = false)
insert_edge!(graph, d_PA_uA, uA, track = false)
insert_edge!(graph, d_PBp_uBp, uBp, track = false)
insert_edge!(graph, d_PAp_uAp, uAp, track = false)
insert_edge!(graph, uB, d_uB_v0, false)
insert_edge!(graph, uA, d_uA_v0, false)
insert_edge!(graph, uBp, d_uBp_v1, false)
insert_edge!(graph, uAp, d_uAp_v1, false)
insert_edge!(graph, uB, d_uB_v0, track = false)
insert_edge!(graph, uA, d_uA_v0, track = false)
insert_edge!(graph, uBp, d_uBp_v1, track = false)
insert_edge!(graph, uAp, d_uAp_v1, track = false)
insert_edge!(graph, d_uB_v0, v0, false)
insert_edge!(graph, d_uA_v0, v0, false)
insert_edge!(graph, d_uBp_v1, v1, false)
insert_edge!(graph, d_uAp_v1, v1, false)
insert_edge!(graph, d_uB_v0, v0, track = false)
insert_edge!(graph, d_uA_v0, v0, track = false)
insert_edge!(graph, d_uBp_v1, v1, track = false)
insert_edge!(graph, d_uAp_v1, v1, track = false)
insert_edge!(graph, v0, d_v0_s0, false)
insert_edge!(graph, v1, d_v1_s0, false)
insert_edge!(graph, v0, d_v0_s0, track = false)
insert_edge!(graph, v1, d_v1_s0, track = false)
insert_edge!(graph, d_v0_s0, s0, false)
insert_edge!(graph, d_v1_s0, s0, false)
insert_edge!(graph, d_v0_s0, s0, track = false)
insert_edge!(graph, d_v1_s0, s0, track = false)
insert_edge!(graph, s0, d_exit, false)
insert_edge!(graph, s0, d_exit, track = false)
@test length(graph.nodes) == 26
@test length(graph.appliedOperations) == 0
@test length(graph.operationsToApply) == 0
@test length(graph.dirtyNodes) == 26
@test length(graph.diff) ==
(addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
@test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
@test is_valid(graph)
@ -135,8 +132,7 @@ import MetagraphOptimization.partners
@test length(siblings(s0)) == 1
operations = get_operations(graph)
@test length(operations) ==
(nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
@test length(operations) == (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
@test length(graph.dirtyNodes) == 0
@test operations == get_operations(graph)
@ -157,8 +153,7 @@ import MetagraphOptimization.partners
@test length(graph.operationsToApply) == 1
@test first(graph.operationsToApply) == nf
@test length(graph.dirtyNodes) == 0
@test length(graph.diff) ==
(addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
@test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
# this applies pending operations
properties = get_properties(graph)
@ -176,8 +171,7 @@ import MetagraphOptimization.partners
operations = get_operations(graph)
@test length(graph.dirtyNodes) == 0
@test length(operations) ==
(nodeFusions = 9, nodeReductions = 0, nodeSplits = 0)
@test length(operations) == (nodeFusions = 9, nodeReductions = 0, nodeSplits = 0)
@test !isempty(operations)
possibleNF = 9
@ -185,14 +179,12 @@ import MetagraphOptimization.partners
push_operation!(graph, first(operations.nodeFusions))
operations = get_operations(graph)
possibleNF = possibleNF - 1
@test length(operations) ==
(nodeFusions = possibleNF, nodeReductions = 0, nodeSplits = 0)
@test length(operations) == (nodeFusions = possibleNF, nodeReductions = 0, nodeSplits = 0)
end
@test isempty(operations)
@test length(operations) ==
(nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
@test length(operations) == (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
@test length(graph.dirtyNodes) == 0
@test length(graph.nodes) == 6
@test length(graph.appliedOperations) == 10
@ -213,8 +205,7 @@ import MetagraphOptimization.partners
@test properties.computeIntensity 28 / 62
operations = get_operations(graph)
@test length(operations) ==
(nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
@test length(operations) == (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
@test is_valid(graph)
end

View File

@ -3,8 +3,7 @@
nC1 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskU())
nC2 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskV())
nC3 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskP())
nC4 =
MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskSum())
nC4 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskSum())
nD1 = MetagraphOptimization.make_node(MetagraphOptimization.DataTask(10))
nD2 = MetagraphOptimization.make_node(MetagraphOptimization.DataTask(20))

View File

@ -5,9 +5,7 @@
@test MetagraphOptimization.bytes_to_human_readable(1025) == "1.001 KiB"
@test MetagraphOptimization.bytes_to_human_readable(684235) == "668.2 KiB"
@test MetagraphOptimization.bytes_to_human_readable(86214576) == "82.22 MiB"
@test MetagraphOptimization.bytes_to_human_readable(9241457698) ==
"8.607 GiB"
@test MetagraphOptimization.bytes_to_human_readable(3218598654367) ==
"2.927 TiB"
@test MetagraphOptimization.bytes_to_human_readable(9241457698) == "8.607 GiB"
@test MetagraphOptimization.bytes_to_human_readable(3218598654367) == "2.927 TiB"
end
println("Utility Unit Tests Complete!")