Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
0c74a3f30e | |||
241fd227c1 | |||
a97a878881 | |||
1cf8ae643c | |||
8b4e2b5ef7 | |||
61b6541b81 | |||
2709eeb3dc | |||
5a30f57e1f | |||
bd6c54c1ae |
@ -1,5 +1,5 @@
|
||||
indent = 4
|
||||
margin = 80
|
||||
margin = 120
|
||||
always_for_in = true
|
||||
for_in_replacement = "in"
|
||||
whitespace_typedefs = true
|
||||
|
@ -8,7 +8,7 @@ env:
|
||||
|
||||
jobs:
|
||||
prepare:
|
||||
runs-on: arch-latest
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@ -65,7 +65,7 @@ jobs:
|
||||
|
||||
test:
|
||||
needs: prepare
|
||||
runs-on: arch-latest
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@ -108,7 +108,7 @@ jobs:
|
||||
|
||||
- name: Format check
|
||||
run: |
|
||||
julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true)'
|
||||
julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true, ignore=[".julia/*"])'
|
||||
julia --project=./ -e '
|
||||
out = Cmd(`git diff --name-only`) |> read |> String
|
||||
if out == ""
|
||||
@ -127,7 +127,7 @@ jobs:
|
||||
|
||||
docs:
|
||||
needs: prepare
|
||||
runs-on: arch-latest
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -26,3 +26,5 @@ Manifest.toml
|
||||
|
||||
# vscode workspace directory
|
||||
.vscode
|
||||
.julia
|
||||
**/.ipynb_checkpoints/
|
||||
|
@ -5,9 +5,15 @@ version = "0.1.0"
|
||||
|
||||
[deps]
|
||||
AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
|
||||
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
|
||||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
|
||||
NumaAllocators = "21436f30-1b4a-4f08-87af-e26101bb5379"
|
||||
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
|
||||
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[extras]
|
||||
|
@ -42,7 +42,7 @@ Problems:
|
||||
- Lots of testing required because mistakes will propagate and multiply.
|
||||
|
||||
## Other TODOs
|
||||
- Reduce memory footprint of the graph, are the UUIDs too large?
|
||||
- Reduce memory footprint of the graph
|
||||
- Memory layout of Nodes? They should lie linearly in memory, right now probably on heap?
|
||||
- Add scaling functions
|
||||
|
||||
@ -53,7 +53,7 @@ For graphs AB->AB^n:
|
||||
- Number of ComputeTaskS2 should always be (n+1)!
|
||||
- Number of ComputeTaskU should always be (n+3)
|
||||
|
||||
Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz
|
||||
Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz (not necessarily up to date, check Jupyter Notebooks in `notebooks/` instead)
|
||||
|
||||
```
|
||||
$ julia --project examples/import_bench.jl
|
||||
|
@ -27,6 +27,7 @@ makedocs(
|
||||
"Diff" => "lib/internals/diff.md",
|
||||
"Utility" => "lib/internals/utility.md",
|
||||
"Code Generation" => "lib/internals/code_gen.md",
|
||||
"Devices" => "lib/internals/devices.md",
|
||||
],
|
||||
"Contribution" => "contribution.md",
|
||||
],
|
||||
|
75
docs/src/flowchart.drawio
Normal file
75
docs/src/flowchart.drawio
Normal file
@ -0,0 +1,75 @@
|
||||
<mxfile host="Electron" modified="2023-09-17T13:34:45.840Z" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.1 Chrome/114.0.5735.134 Electron/25.6.0 Safari/537.36" etag="e0c8qLevhaP_q_R2fyC9" version="21.6.1" type="device">
|
||||
<diagram name="Page-1" id="Vy0cA1nkMPfy-3cC5ahA">
|
||||
<mxGraphModel dx="1185" dy="707" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="500" pageHeight="900" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-1" target="yG8qeggDCLqQ8GwY7ugi-2">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-1" value="Process Generator Script" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="180" y="120" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-2" target="yG8qeggDCLqQ8GwY7ugi-3">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-2" value="Process Parser" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="180" y="220" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-6">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-12">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-3" value="Optimizer" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="180" y="320" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.75;entryDx=0;entryDy=0;exitX=0;exitY=0.75;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-6" target="yG8qeggDCLqQ8GwY7ugi-3">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-6" value="Fast Cost Estimator<br>(Global Metrics)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="340" y="320" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-13">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<Array as="points">
|
||||
<mxPoint x="80" y="450" />
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-12" value="Scheduler" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="180" y="420" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-13" target="yG8qeggDCLqQ8GwY7ugi-3">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="120" y="380" as="targetPoint" />
|
||||
<Array as="points">
|
||||
<mxPoint x="80" y="350" />
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-18">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<Array as="points">
|
||||
<mxPoint x="240" y="500" />
|
||||
<mxPoint x="240" y="500" />
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-13" value="Accurate Cost Estimator<br>(Machine Specific)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="20" y="370" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-18" target="yG8qeggDCLqQ8GwY7ugi-20">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-18" value="Code Generator" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="180" y="520" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="yG8qeggDCLqQ8GwY7ugi-20" value="Executor" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="180" y="620" width="120" height="60" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
59
docs/src/lib/internals/devices.md
Normal file
59
docs/src/lib/internals/devices.md
Normal file
@ -0,0 +1,59 @@
|
||||
# Devices
|
||||
|
||||
## Interface
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/interface.jl"]
|
||||
Order = [:type, :constant, :function]
|
||||
```
|
||||
|
||||
## Detect
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/detect.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## Measure
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/measure.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## Implementations
|
||||
|
||||
### General
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/impl.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
### NUMA
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/numa/impl.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
### CUDA
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/cuda/impl.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
### ROCm
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/rocm/impl.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
### oneAPI
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["devices/oneapi/impl.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
21
docs/src/lib/internals/estimator.md
Normal file
21
docs/src/lib/internals/estimator.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Models
|
||||
|
||||
## Interface
|
||||
|
||||
The interface that has to be implemented for an estimator.
|
||||
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["estimator/interafce.jl"]
|
||||
Order = [:type, :constant, :function]
|
||||
```
|
||||
|
||||
## Global Metric Estimator
|
||||
|
||||
Implementation of a global metric estimator. It uses the graph properties compute effort, data transfer, and compute intensity.
|
||||
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["estimator/global_metric.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
@ -1,5 +1,21 @@
|
||||
# Models
|
||||
|
||||
## Interface
|
||||
|
||||
The interface that has to be implemented for a model to be usable is defined in `src/models/interface.jl`.
|
||||
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["models/interface.jl"]
|
||||
Order = [:type, :constant, :function]
|
||||
```
|
||||
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["models/print.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## ABC-Model
|
||||
|
||||
### Types
|
||||
@ -44,6 +60,13 @@ Pages = ["models/abc/compute.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
### Print
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["models/abc/print.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## QED-Model
|
||||
|
||||
*To be added*
|
||||
|
15
docs/src/lib/internals/scheduler.md
Normal file
15
docs/src/lib/internals/scheduler.md
Normal file
@ -0,0 +1,15 @@
|
||||
# Scheduler
|
||||
|
||||
## Interface
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["scheduler/interface.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
## Greedy
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["scheduler/greedy.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
@ -21,6 +21,13 @@ Pages = ["task/compare.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## Compute
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["task/compute.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## Properties
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
|
@ -1,3 +1,7 @@
|
||||
# Manual
|
||||
|
||||
This will become a manual.
|
||||
## Jupyter Notebooks
|
||||
|
||||
In the `notebooks` directory are notebooks containing some examples of the usage of this repository.
|
||||
|
||||
- `abc_model_showcase`: A simple showcase of the intended usage of the ABC Model implementation.
|
||||
|
34
examples/ab5.jl
Normal file
34
examples/ab5.jl
Normal file
@ -0,0 +1,34 @@
|
||||
using MetagraphOptimization
|
||||
using BenchmarkTools
|
||||
|
||||
println("Getting machine info")
|
||||
@time machine = get_machine_info()
|
||||
|
||||
println("Making model")
|
||||
@time model = ABCModel()
|
||||
|
||||
println("Making process")
|
||||
process_str = "AB->ABBBBB"
|
||||
@time process = parse_process(process_str, model)
|
||||
|
||||
println("Parsing DAG")
|
||||
@time graph = parse_dag("input/$process_str.txt", model)
|
||||
|
||||
println("Generating input data")
|
||||
@time input_data = [gen_process_input(process) for _ in 1:1000]
|
||||
|
||||
include("profiling_utilities.jl")
|
||||
println("Reducing graph")
|
||||
@time reduce_all!(graph)
|
||||
|
||||
println("Generating compute function")
|
||||
@time compute_func = get_compute_function(graph, process, machine)
|
||||
|
||||
println("First run, single argument")
|
||||
@time compute_func(input_data[1])
|
||||
|
||||
println("\nBenchmarking function, 1 input")
|
||||
display(@benchmark compute_func($(input_data[1])))
|
||||
|
||||
println("\nBenchmarking function, 1000 inputs")
|
||||
display(@benchmark compute_func.($input_data))
|
34
examples/ab7.jl
Normal file
34
examples/ab7.jl
Normal file
@ -0,0 +1,34 @@
|
||||
using MetagraphOptimization
|
||||
using BenchmarkTools
|
||||
|
||||
println("Getting machine info")
|
||||
@time machine = get_machine_info()
|
||||
|
||||
println("Making model")
|
||||
@time model = ABCModel()
|
||||
|
||||
println("Making process")
|
||||
process_str = "AB->ABBBBBBB"
|
||||
@time process = parse_process(process_str, model)
|
||||
|
||||
println("Parsing DAG")
|
||||
@time graph = parse_dag("input/$process_str.txt", model)
|
||||
|
||||
println("Generating input data")
|
||||
@time input_data = [gen_process_input(process) for _ in 1:1000]
|
||||
|
||||
include("profiling_utilities.jl")
|
||||
println("Reducing graph")
|
||||
@time reduce_all!(graph)
|
||||
|
||||
println("Generating compute function")
|
||||
@time compute_func = get_compute_function(graph, process, machine)
|
||||
|
||||
println("First run, single argument")
|
||||
@time compute_func(input_data[1])
|
||||
|
||||
println("\nBenchmarking function, 1 input")
|
||||
display(@benchmark compute_func($(input_data[1])))
|
||||
|
||||
println("\nBenchmarking function, 1000 inputs")
|
||||
display(@benchmark compute_func.($input_data))
|
@ -13,16 +13,15 @@ function bench_txt(filepath::String, bench::Bool = true)
|
||||
return
|
||||
end
|
||||
|
||||
model = ABCModel()
|
||||
|
||||
println(name, ":")
|
||||
g = parse_abc(filepath)
|
||||
g = parse_dag(filepath, model)
|
||||
print(g)
|
||||
println(
|
||||
" Graph size in memory: ",
|
||||
bytes_to_human_readable(MetagraphOptimization.mem(g)),
|
||||
)
|
||||
println(" Graph size in memory: ", bytes_to_human_readable(MetagraphOptimization.mem(g)))
|
||||
|
||||
if (bench)
|
||||
@btime parse_abc($filepath)
|
||||
@btime parse_dag($filepath, $model)
|
||||
end
|
||||
|
||||
println(" Get Operations: ")
|
||||
|
@ -12,7 +12,7 @@ function gen_plot(filepath)
|
||||
return
|
||||
end
|
||||
|
||||
g = parse_abc(filepath)
|
||||
g = parse_dag(filepath, ABCModel())
|
||||
|
||||
Random.seed!(1)
|
||||
|
||||
@ -48,23 +48,10 @@ function gen_plot(filepath)
|
||||
|
||||
println("\rDone.")
|
||||
|
||||
plot(
|
||||
[x[1], x[2]],
|
||||
[y[1], y[2]],
|
||||
linestyle = :solid,
|
||||
linewidth = 1,
|
||||
color = :red,
|
||||
legend = false,
|
||||
)
|
||||
plot([x[1], x[2]], [y[1], y[2]], linestyle = :solid, linewidth = 1, color = :red, legend = false)
|
||||
# Create lines connecting the reference point to each data point
|
||||
for i in 3:length(x)
|
||||
plot!(
|
||||
[x[i - 1], x[i]],
|
||||
[y[i - 1], y[i]],
|
||||
linestyle = :solid,
|
||||
linewidth = 1,
|
||||
color = :red,
|
||||
)
|
||||
plot!([x[i - 1], x[i]], [y[i - 1], y[i]], linestyle = :solid, linewidth = 1, color = :red)
|
||||
end
|
||||
|
||||
return gui()
|
||||
|
@ -12,7 +12,7 @@ function gen_plot(filepath)
|
||||
return
|
||||
end
|
||||
|
||||
g = parse_abc(filepath)
|
||||
g = parse_dag(filepath, ABCModel())
|
||||
|
||||
Random.seed!(1)
|
||||
|
||||
@ -60,14 +60,7 @@ function gen_plot(filepath)
|
||||
push!(y, props.computeEffort)
|
||||
pop_operation!(g)
|
||||
|
||||
push!(
|
||||
names,
|
||||
"NF: (" *
|
||||
string(props.data) *
|
||||
", " *
|
||||
string(props.computeEffort) *
|
||||
")",
|
||||
)
|
||||
push!(names, "NF: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
|
||||
end
|
||||
for op in opt.nodeReductions
|
||||
push_operation!(g, op)
|
||||
@ -76,14 +69,7 @@ function gen_plot(filepath)
|
||||
push!(y, props.computeEffort)
|
||||
pop_operation!(g)
|
||||
|
||||
push!(
|
||||
names,
|
||||
"NR: (" *
|
||||
string(props.data) *
|
||||
", " *
|
||||
string(props.computeEffort) *
|
||||
")",
|
||||
)
|
||||
push!(names, "NR: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
|
||||
end
|
||||
for op in opt.nodeSplits
|
||||
push_operation!(g, op)
|
||||
@ -92,33 +78,13 @@ function gen_plot(filepath)
|
||||
push!(y, props.computeEffort)
|
||||
pop_operation!(g)
|
||||
|
||||
push!(
|
||||
names,
|
||||
"NS: (" *
|
||||
string(props.data) *
|
||||
", " *
|
||||
string(props.computeEffort) *
|
||||
")",
|
||||
)
|
||||
push!(names, "NS: (" * string(props.data) * ", " * string(props.computeEffort) * ")")
|
||||
end
|
||||
|
||||
plot(
|
||||
[x0, x[1]],
|
||||
[y0, y[1]],
|
||||
linestyle = :solid,
|
||||
linewidth = 1,
|
||||
color = :red,
|
||||
legend = false,
|
||||
)
|
||||
plot([x0, x[1]], [y0, y[1]], linestyle = :solid, linewidth = 1, color = :red, legend = false)
|
||||
# Create lines connecting the reference point to each data point
|
||||
for i in 2:length(x)
|
||||
plot!(
|
||||
[x0, x[i]],
|
||||
[y0, y[i]],
|
||||
linestyle = :solid,
|
||||
linewidth = 1,
|
||||
color = :red,
|
||||
)
|
||||
plot!([x0, x[i]], [y0, y[i]], linestyle = :solid, linewidth = 1, color = :red)
|
||||
end
|
||||
#scatter!(x, y, label=names)
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
function test_random_walk(g::DAG, n::Int64)
|
||||
# the purpose here is to do "random" operations and reverse them again and validate that the graph stays the same and doesn't diverge
|
||||
function random_walk!(g::DAG, n::Int64)
|
||||
# the purpose here is to do "random" operations on the graph to simulate an optimizer
|
||||
reset_graph!(g)
|
||||
|
||||
properties = get_properties(g)
|
||||
@ -32,7 +32,7 @@ function test_random_walk(g::DAG, n::Int64)
|
||||
end
|
||||
end
|
||||
|
||||
return reset_graph!(g)
|
||||
return nothing
|
||||
end
|
||||
|
||||
function reduce_all!(g::DAG)
|
||||
|
Binary file not shown.
637
notebooks/abc_model_large.ipynb
Normal file
637
notebooks/abc_model_large.ipynb
Normal file
@ -0,0 +1,637 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"using MetagraphOptimization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found 1 NUMA nodes\n",
|
||||
"CUDA is non-functional\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get machine and set dictionary caching strategy\n",
|
||||
"machine = get_machine_info()\n",
|
||||
"MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Graph:\n",
|
||||
" Nodes: Total: 7854, ComputeTaskP: 8, ComputeTaskS2: 720, \n",
|
||||
" ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 1230, \n",
|
||||
" ComputeTaskV: 1956, DataTask: 3931\n",
|
||||
" Edges: 11241\n",
|
||||
" Total Compute Effort: 33915.0\n",
|
||||
" Total Data Transfer: 322464.0\n",
|
||||
" Total Compute Intensity: 0.10517453111044954\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = ABCModel()\n",
|
||||
"process_str = \"AB->ABBBBB\"\n",
|
||||
"process = parse_process(process_str, model)\n",
|
||||
"graph = parse_dag(\"../input/$process_str.txt\", model)\n",
|
||||
"print(graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"compute__ae7097a4_7bfc_11ee_2cec_190d7ced64f1 (generic function with 1 method)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"compute_AB_AB5 = get_compute_function(graph, process, machine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 0.140021 seconds (791.41 k allocations: 30.317 MiB, 9.74% gc time)\n",
|
||||
"Graph:\n",
|
||||
" Nodes: Total: 4998, ComputeTaskP: 8, ComputeTaskS2: 720, \n",
|
||||
" ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 516, \n",
|
||||
" ComputeTaskV: 1242, DataTask: 2503\n",
|
||||
" Edges: 7671\n",
|
||||
" Total Compute Effort: 21777.0\n",
|
||||
" Total Data Transfer: 219648.0\n",
|
||||
" Total Compute Intensity: 0.09914499562937062\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"include(\"../examples/profiling_utilities.jl\")\n",
|
||||
"@time reduce_all!(graph)\n",
|
||||
"print(graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 3.626740 seconds (1.52 M allocations: 114.358 MiB, 0.84% gc time)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad (generic function with 1 method)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"@time compute_AB_AB5_reduced = get_compute_function(graph, process, machine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" 2.130952 seconds (4.31 M allocations: 276.129 MiB, 4.50% gc time, 99.02% compilation time)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1000-element Vector{ABCProcessInput}:\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [4.694213004647641, 0.0, 0.0, 4.58646222408983]\n",
|
||||
" B: [4.694213004647641, 0.0, 0.0, -4.58646222408983]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.1989656045893697, -0.40235742161696864, 0.06512533692021122, 0.5209469423550988]\n",
|
||||
" B: [-1.2555060342925868, 0.3685683194051901, 0.4785890883121294, -0.4597882997907804]\n",
|
||||
" B: [-2.189083660521547, 0.31663070338411387, 0.1742479621961443, -1.9134967776579581]\n",
|
||||
" B: [-1.0637129314000269, -0.2948512505337184, 0.0500740340487307, -0.2050378784528044]\n",
|
||||
" B: [-1.6149410305664367, 1.0344652685816964, -0.406159957064284, 0.6106965118475143]\n",
|
||||
" B: [-2.0662167479253144, -1.0224556192203134, -0.3618764644129321, 1.4466795016989296]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [5.621657834589244, 0.0, 0.0, 5.532001157736559]\n",
|
||||
" B: [5.621657834589244, 0.0, 0.0, -5.532001157736559]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.058801595505931, 0.7220299456693885, 0.22719930902793095, 1.6327024349806234]\n",
|
||||
" B: [-1.1826215869997767, 0.04638669502532437, -0.553508153090363, -0.30011800516629]\n",
|
||||
" B: [-2.3776830758041227, -0.8637209881441633, -0.22710813067439403, 1.9636152272240621]\n",
|
||||
" B: [-1.9086249240920268, 0.02598092498567318, -1.087715954825374, -1.2079106316365085]\n",
|
||||
" B: [-2.6526208210236426, 0.3117066248738638, 1.6178469805428013, -1.8225826038033035]\n",
|
||||
" B: [-1.0629636657529868, -0.24238320241008685, 0.023285949019398133, -0.2657064215985837]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.176284774018432, 0.0, 0.0, 6.094792335245879]\n",
|
||||
" B: [6.176284774018432, 0.0, 0.0, -6.094792335245879]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-3.2943110238771185, 1.9799744259594443, 2.3805040294128346, 0.5151572192390796]\n",
|
||||
" B: [-1.0255775134941767, 0.18009906891836583, -0.12779691496180498, 0.05514988745120904]\n",
|
||||
" B: [-1.7854209452644407, -0.56381615584479, -0.9572322565407875, 0.9764966468120639]\n",
|
||||
" B: [-3.3312939695760786, -0.5949754252793171, -2.9420979921841868, -1.0428725518649993]\n",
|
||||
" B: [-1.6551651824618003, -0.8748451354288965, 0.9749427327758187, -0.1539624566503731]\n",
|
||||
" B: [-1.260800913363249, -0.12643677832480643, 0.6716804014981268, -0.34996874498697933]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [4.747497785190141, 0.0, 0.0, 4.640984294348053]\n",
|
||||
" B: [4.747497785190141, 0.0, 0.0, -4.640984294348053]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.3704329562088802, 0.8292801285050307, 0.2251475790952209, 0.3737506167990253]\n",
|
||||
" B: [-1.352958681672649, 0.11120507604905326, 0.6088733084867489, -0.6688825902852584]\n",
|
||||
" B: [-1.4224569379606473, -0.25277059018918374, -0.4925475402927904, -0.84669220478242]\n",
|
||||
" B: [-2.4534584066229996, -0.23638988525842838, -1.4120549440785204, 1.7232756047945383]\n",
|
||||
" B: [-1.4378719974624208, 0.5461758322111039, 0.8131489669135029, -0.3285674953530594]\n",
|
||||
" B: [-1.457816590452685, -0.9975005613175758, 0.257432629875838, -0.25288393117282576]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.148648417619223, 0.0, 0.0, 6.066784763240853]\n",
|
||||
" B: [6.148648417619223, 0.0, 0.0, -6.066784763240853]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.5381168736188293, 0.5769721565317305, 1.0069443436143835, 0.13773066601554382]\n",
|
||||
" B: [-1.3178580311796126, 0.27781510267038506, -0.8083323925420551, 0.07853217328003184]\n",
|
||||
" B: [-1.5330954954905804, 0.4994081736550063, -1.0290017953406905, 0.20525247761163526]\n",
|
||||
" B: [-3.083592979398096, -2.1497728433794587, -1.2247634566690573, -1.5449844205264607]\n",
|
||||
" B: [-3.1391572693216845, 0.49043306139044257, 2.931865230552653, 0.13397777318202247]\n",
|
||||
" B: [-1.6854761862296446, 0.30514434913189475, -0.876711929615233, 0.989491330437227]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [7.422637433466136, 0.0, 0.0, 7.35496746890785]\n",
|
||||
" B: [7.422637433466136, 0.0, 0.0, -7.35496746890785]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-3.3788591199517355, 2.3069724486616927, -0.5016400230094518, 2.2006645271171985]\n",
|
||||
" B: [-2.193241133599192, -1.652465184572841, -0.691853387986234, -0.7752447184070871]\n",
|
||||
" B: [-2.295315825041209, 0.334376552772819, 0.5374003175214306, 1.966689593293318]\n",
|
||||
" B: [-2.3721558149969235, -2.0813404180022568, 0.4923496733367945, 0.22964554029865022]\n",
|
||||
" B: [-1.5367714331999278, 0.9008878309070798, 0.1482895506792473, -0.7266895920420517]\n",
|
||||
" B: [-3.068931540143284, 0.1915687702335065, 0.015453869458212284, -2.8950653502600274]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.716486802754837, 0.0, 0.0, 6.64162592830851]\n",
|
||||
" B: [6.716486802754837, 0.0, 0.0, -6.64162592830851]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.3263331205917814, -0.5023870926274977, 0.418137178911541, 0.5761319775467438]\n",
|
||||
" B: [-2.1603199304697136, -1.202627416523187, 1.024176720111292, -1.0824654936733602]\n",
|
||||
" B: [-1.1665818595303201, 0.5747508534091106, 0.05041215840441908, 0.16743149576984034]\n",
|
||||
" B: [-1.829760754209137, 0.5127529745920416, -0.17835468593467171, -1.4329334983509001]\n",
|
||||
" B: [-2.891550940379351, -2.652621236308268, 0.3953841214715819, 0.41029113320086874]\n",
|
||||
" B: [-4.05842700032937, 3.2701319174577996, -1.7097554929641623, 1.3615443855068068]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [7.700331598721008, 0.0, 0.0, 7.635123229539995]\n",
|
||||
" B: [7.700331598721008, 0.0, 0.0, -7.635123229539995]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.382743739041896, -1.410381415274026, 1.0613871843128353, 1.2496996576655786]\n",
|
||||
" B: [-3.021630369232257, 0.25595209564405125, -2.8389223073732714, 0.07251720968504605]\n",
|
||||
" B: [-2.7262381500229256, 1.0736489469437192, 2.293577756890956, 0.13839603484966886]\n",
|
||||
" B: [-2.222260574660266, 1.5432031708495264, -0.7055857379280247, 1.0291330339668954]\n",
|
||||
" B: [-1.650055097318715, -1.062833285640475, -0.34598865120359784, 0.6880109623839291]\n",
|
||||
" B: [-3.397735267165956, -0.3995895125227963, 0.5355317553011019, -3.1777568985511193]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [4.9341647451125334, 0.0, 0.0, 4.8317679716550375]\n",
|
||||
" B: [4.9341647451125334, 0.0, 0.0, -4.8317679716550375]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.834221818900379, 0.1070495973399568, 1.2695354794210922, 0.860923766155068]\n",
|
||||
" B: [-1.5116322118250454, 0.39753882899610743, -0.756426277560466, -0.7448584495617266]\n",
|
||||
" B: [-1.6588475476725886, 0.06712527283179799, 0.6875031760830096, -1.1289857249063835]\n",
|
||||
" B: [-1.5718164783029667, 0.4294130824657117, -0.6215317131811225, -0.9486357444151968]\n",
|
||||
" B: [-1.7838526603309615, -0.5732435925039472, -0.9425541080554634, 0.9824020820472578]\n",
|
||||
" B: [-1.5079587731931232, -0.4278831891296266, 0.36347344329295106, 0.979154070680981]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [7.099667747066588, 0.0, 0.0, 7.028889109862067]\n",
|
||||
" B: [7.099667747066588, 0.0, 0.0, -7.028889109862067]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-3.851129225519823, 2.5555470019017212, -2.502060728335724, 1.019837214678957]\n",
|
||||
" B: [-2.3860288930086897, 0.6059782347076652, 0.6711053982516709, 1.9686395814801452]\n",
|
||||
" B: [-1.9543999030878276, -1.5857282951514855, 0.5255033921941499, -0.17026726032362857]\n",
|
||||
" B: [-1.5523812781985644, -1.154244859738803, 0.03484928145183679, -0.2763909626783212]\n",
|
||||
" B: [-3.2795110937910716, -1.0290377989842119, 1.3607888704851536, -2.616204860580336]\n",
|
||||
" B: [-1.175885100527199, 0.6074857172651138, -0.09018621404708665, 0.07438628742318319]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.3653048194550985, 0.0, 0.0, 6.286263233796236]\n",
|
||||
" B: [6.3653048194550985, 0.0, 0.0, -6.286263233796236]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-3.274142279992413, -2.62046758782023, -1.339558866223036, 1.028950598785383]\n",
|
||||
" B: [-1.8502190446152251, -1.1967169760014287, 0.8476370040459147, 0.5221977611776395]\n",
|
||||
" B: [-1.3090919645484567, 0.8304076910302604, -0.132118345313184, 0.08178985973111547]\n",
|
||||
" B: [-1.7699077332157842, 0.8156249668276708, -0.2891156025546255, 1.1763254081859622]\n",
|
||||
" B: [-1.6671330761442815, 1.2573648831500233, 0.2190135291489001, -0.3878135096217862]\n",
|
||||
" B: [-2.8601155403940384, 0.913787022813704, 0.6941422808960306, -2.421450118258315]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [5.2620105860572215, 0.0, 0.0, 5.166116085395126]\n",
|
||||
" B: [5.2620105860572215, 0.0, 0.0, -5.166116085395126]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.9479176369516882, 0.8861257045164052, 1.1018829783040076, 0.8916379636750793]\n",
|
||||
" B: [-1.2433791528628988, 0.41365857789168176, 0.544699730060495, -0.27960776595565956]\n",
|
||||
" B: [-1.074755543453127, 0.3002469943380598, 0.01041159782849033, 0.25464253219924826]\n",
|
||||
" B: [-1.7453891507499704, 1.1576089006622574, 0.03134512003430503, -0.8398466551182168]\n",
|
||||
" B: [-1.5208938996272057, 0.008686514238768405, -1.1440782944999142, -0.06424682441800389]\n",
|
||||
" B: [-2.991685788469555, -2.7663266916471727, -0.544261131727384, 0.03742074961755215]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [4.439668869119513, 0.0, 0.0, 4.325582003318043]\n",
|
||||
" B: [4.439668869119513, 0.0, 0.0, -4.325582003318043]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.1969832203303146, 0.48265768801558717, -0.02482335564392214, 0.4463117598342591]\n",
|
||||
" B: [-1.7251727113760817, -1.0744400415092346, 0.6322269398265393, 0.6496834443295479]\n",
|
||||
" B: [-1.419669052608684, -0.4173084301546306, -0.44626125418717505, -0.8013518491074973]\n",
|
||||
" B: [-1.331289111993432, -0.7645577006899625, -0.3423664341778722, 0.2656453402118452]\n",
|
||||
" B: [-1.5156451020746182, 0.6491857388484042, 0.8955487542892042, -0.2715333876518423]\n",
|
||||
" B: [-1.6905785398558963, 1.1244627454898357, -0.7143246501067739, -0.2887553076163127]\n",
|
||||
"\n",
|
||||
" ⋮\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [5.750717080737416, 0.0, 0.0, 5.663104002460582]\n",
|
||||
" B: [5.750717080737416, 0.0, 0.0, -5.663104002460582]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.0362067302993534, 0.23737037129807034, 0.1316212944823847, 0.007451817649030921]\n",
|
||||
" B: [-3.597917991072113, -1.5787159301449987, 0.28387609057144564, 3.0613860010767477]\n",
|
||||
" B: [-1.0798303035395174, -0.06880694215947386, -0.2669312876106363, -0.3000779512850572]\n",
|
||||
" B: [-1.3394551212059678, -0.7053379424304421, 0.44160810884651497, -0.3187799976376953]\n",
|
||||
" B: [-3.270241523195321, 1.927780354010675, 0.003047457202140131, -2.4450221348130854]\n",
|
||||
" B: [-1.1777824921625586, 0.1877100894261692, -0.5932216634918489, -0.004957734989940532]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.84577391627276, 0.0, 0.0, 6.772342320993563]\n",
|
||||
" B: [6.84577391627276, 0.0, 0.0, -6.772342320993563]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.0594956991232163, -0.09579189209396338, 0.21296650876679918, 0.2607687021353065]\n",
|
||||
" B: [-1.8300488673592041, 0.8497425690197566, -0.8227483588311224, 0.9747315329664396]\n",
|
||||
" B: [-2.860723394379955, 0.6743651794772785, 0.1320397309862766, 2.5906631300310776]\n",
|
||||
" B: [-2.557528905485892, -1.3508678766931497, 1.2829278224554168, -1.4388211440218013]\n",
|
||||
" B: [-3.790115184858299, 0.47588521284738383, -1.0334447791446917, -3.474262262286086]\n",
|
||||
" B: [-1.5936357813389537, -0.553333192557306, 0.2282590757673212, 1.086920041175065]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.25909007687458, 0.0, 0.0, 6.178689876537731]\n",
|
||||
" B: [6.25909007687458, 0.0, 0.0, -6.178689876537731]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.15208406752572, -0.27987613820502405, 0.20983197963180572, -1.873260718983155]\n",
|
||||
" B: [-3.1436326945514232, -2.0821664144960677, -1.9679549582157083, 0.8210741885063981]\n",
|
||||
" B: [-2.206056617746511, 1.7689323832663284, -0.4273996865759156, -0.7449117612507478]\n",
|
||||
" B: [-1.8709609004510535, 0.5332842722412897, 1.48760475220818, -0.055988188078690854]\n",
|
||||
" B: [-1.0916331546903268, 0.018218872767661307, 0.4300802089857822, 0.07976234031782706]\n",
|
||||
" B: [-2.0538127187841235, 0.04160702442581186, 0.2678377039658561, 1.7733241394883685]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.8752382625158255, 0.0, 0.0, 6.802124753807565]\n",
|
||||
" B: [6.8752382625158255, 0.0, 0.0, -6.802124753807565]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-3.815955448364548, 1.7284392485789066, 3.22998101457395, -0.37581430702794955]\n",
|
||||
" B: [-3.705003390432734, 0.8773209536576554, -3.1633610279519866, -1.3966048382509024]\n",
|
||||
" B: [-1.4798429985544235, -0.876885056483666, -0.05155962504198175, 0.6467994303891397]\n",
|
||||
" B: [-1.196598159149068, -0.6492448407423084, 0.0066213036625077295, -0.10141227532326653]\n",
|
||||
" B: [-1.307725757451199, -0.47623875265044, -0.08939192779758245, -0.6894580410872709]\n",
|
||||
" B: [-2.2453507710796776, -0.6033915523601473, 0.06771026255509205, 1.91649003130025]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.591382068439754, 0.0, 0.0, 6.515083849970707]\n",
|
||||
" B: [6.591382068439754, 0.0, 0.0, -6.515083849970707]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.166341377746586, 0.738656605699622, 1.1097711420427974, -1.3841348908550482]\n",
|
||||
" B: [-1.9136122405957643, -1.3687809690739081, -0.8052302154690981, 0.37410528752561706]\n",
|
||||
" B: [-1.020282522629639, 0.01566959851558055, -0.04103060943002397, -0.1976040959992001]\n",
|
||||
" B: [-3.3680104240574718, -0.44221430614525714, -3.1855463435158966, -0.015336796039828009]\n",
|
||||
" B: [-1.1380460439601876, 0.33787512483866744, -0.3053034033656307, 0.2962752606648943]\n",
|
||||
" B: [-3.576471527889859, 0.7187939461652956, 3.227339429737853, 0.9266952347035636]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [7.366791305680796, 0.0, 0.0, 7.298603574756898]\n",
|
||||
" B: [7.366791305680796, 0.0, 0.0, -7.298603574756898]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.1161936134323496, 0.1815174250263101, -0.30155987378038246, 0.34928677273057857]\n",
|
||||
" B: [-1.1768168637671912, -0.488638136596838, -0.0387546058981897, 0.38030091090042567]\n",
|
||||
" B: [-3.8756829146246745, -0.22123631639903027, -3.6727532274395425, -0.694878606198396]\n",
|
||||
" B: [-1.4161987387916468, -0.42653096897021076, -0.26480462532703347, -0.8680833546784509]\n",
|
||||
" B: [-3.4638938410201177, 2.8217659294852746, 1.2824429941168167, 1.179634497585545]\n",
|
||||
" B: [-3.6847966397256138, -1.8668779325455054, 2.995429338328331, -0.346260220339702]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [4.762032860651893, 0.0, 0.0, 4.655851905497903]\n",
|
||||
" B: [4.762032860651893, 0.0, 0.0, -4.655851905497903]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.656166654414924, 2.017338594394486, -1.384735065574992, 0.2609120345236529]\n",
|
||||
" B: [-1.031990140619295, -0.035004877965791346, -0.20112979442869375, 0.15272561883031827]\n",
|
||||
" B: [-1.7319386082994335, -1.0359644740176492, 0.8025718625008718, -0.5312883934487891]\n",
|
||||
" B: [-1.7450617894727098, -0.49163856285061436, 1.1666756465784553, 0.6651316473275205]\n",
|
||||
" B: [-1.0945973465763637, -0.42438631366397905, -0.017047995524507212, 0.1332252744613839]\n",
|
||||
" B: [-1.2643111819210613, -0.030344365896452122, -0.3663346535511349, -0.6807061816940867]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.12211537837656, 0.0, 0.0, 6.039892110473065]\n",
|
||||
" B: [6.12211537837656, 0.0, 0.0, -6.039892110473065]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.09449973649211, -1.247911941781509, -0.776547530016726, 1.1075282684200622]\n",
|
||||
" B: [-2.857971140758051, 1.4507115887866229, 2.2078617054725442, 0.43449006556414854]\n",
|
||||
" B: [-2.068918524386865, -0.43350532192333185, 1.7407499017717505, -0.24957318745593]\n",
|
||||
" B: [-1.0503370840395667, 0.28162676024293815, -0.11219953076948735, 0.10632790470480236]\n",
|
||||
" B: [-1.6648953051752136, 0.3171875953909028, -1.2925202016854087, 0.025689195388605857]\n",
|
||||
" B: [-2.5076089659013125, -0.36810868071562286, -1.7673443447726724, -1.4244622466216894]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [7.431058837653249, 0.0, 0.0, 7.363466265874004]\n",
|
||||
" B: [7.431058837653249, 0.0, 0.0, -7.363466265874004]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.4340725727125623, 0.9525417282027518, 0.38239995291064965, -0.05476016666222433]\n",
|
||||
" B: [-3.5734117962040854, 2.3267511116139916, 2.49915109639257, -0.33127771922267657]\n",
|
||||
" B: [-2.3529075757582945, 1.185265706342765, -1.375530715171772, 1.1132091075119688]\n",
|
||||
" B: [-2.710381815585542, -2.1195780947035594, -1.2974231675570782, -0.4126153305389483]\n",
|
||||
" B: [-2.374272199256637, -1.2400410368129877, 1.6839473809113144, -0.5136028830766439]\n",
|
||||
" B: [-2.4170717157893766, -1.104939414642962, -1.8925445474856835, 0.1990469919885247]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [4.370360958267613, 0.0, 0.0, 4.254415930013168]\n",
|
||||
" B: [4.370360958267613, 0.0, 0.0, -4.254415930013168]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.0037967551530176, -0.04979456910726583, -0.007092097585518878, 0.07126098999442977]\n",
|
||||
" B: [-2.2427356029926337, 0.4432886498747459, -1.2315068062419472, -1.522087101319342]\n",
|
||||
" B: [-1.576810353663218, -0.08400160217698217, 1.025238316808337, 0.6543401378482231]\n",
|
||||
" B: [-1.1878570602356244, 0.3852696171578499, -0.47734716319323317, 0.18630996601909597]\n",
|
||||
" B: [-1.6436772930583505, -1.0018521094453126, 0.4216069097815019, 0.7212593210074284]\n",
|
||||
" B: [-1.0858448514323804, 0.3070900136969648, 0.26910084043086047, -0.11108331354983517]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [5.940760429560125, 0.0, 0.0, 5.855991332082674]\n",
|
||||
" B: [5.940760429560125, 0.0, 0.0, -5.855991332082674]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-2.5515863925730233, 0.0574036477190863, 1.9321385747234918, 1.3319678930281418]\n",
|
||||
" B: [-3.2707523737124977, -2.710802011299676, -1.41016923110446, -0.6006632045712658]\n",
|
||||
" B: [-1.6965910302662786, 0.9846458960035911, 0.9504416414719069, -0.07452697242920955]\n",
|
||||
" B: [-1.0283520810617242, 0.1620200166783027, 0.15874691422324994, -0.07782630689000514]\n",
|
||||
" B: [-1.277724475991329, 0.26836143674120055, -0.33222621981983513, -0.6709602929248032]\n",
|
||||
" B: [-2.0565145055153993, 1.2383710141574962, -1.298931679494354, 0.09200888378714224]\n",
|
||||
"\n",
|
||||
" Input for ABC Process: 'AB->ABBBBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [6.732994664701373, 0.0, 0.0, 6.65831939417877]\n",
|
||||
" B: [6.732994664701373, 0.0, 0.0, -6.65831939417877]\n",
|
||||
" 6 Outgoing Particles:\n",
|
||||
" A: [-1.602557260532173, -0.06659157948757613, 0.9308846463293637, -0.8349904850080558]\n",
|
||||
" B: [-1.3205375883536927, 0.7078592481114431, -0.05631226213188625, -0.48947291677035515]\n",
|
||||
" B: [-1.7625153098951976, 0.12706601232750347, 0.34097061443470383, 1.405010137407617]\n",
|
||||
" B: [-2.7792473938949334, 1.6510422215054068, 1.7155538904747691, -1.0272051928194055]\n",
|
||||
" B: [-2.722083339444658, -0.5204063912580275, -2.061236049180356, -1.3748530264647703]\n",
|
||||
" B: [-3.279048437282091, -1.89896951119875, -0.8698608399265956, 2.3215114836549695]\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"@time inputs = [gen_process_input(process) for _ in 1:1000]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Internal error: stack overflow in type inference of materialize(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(MetagraphOptimization.compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad), Tuple{Array{MetagraphOptimization.ABCProcessInput, 1}}}).\n",
|
||||
"This might be caused by recursion over very long tuples or argument lists.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "LoadError",
|
||||
"evalue": "StackOverflowError:",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"StackOverflowError:",
|
||||
"",
|
||||
"Stacktrace:",
|
||||
" [1] argtypes_to_type",
|
||||
" @ ./compiler/typeutils.jl:71 [inlined]",
|
||||
" [2] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1948",
|
||||
" [3] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
|
||||
" [4] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1566",
|
||||
" [5] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1855",
|
||||
" [6] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
|
||||
" [7] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
|
||||
" [8] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
|
||||
" [9] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
|
||||
" [10] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2682",
|
||||
" [11] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
|
||||
" [12] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
|
||||
" [13] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:246",
|
||||
" [14] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:216",
|
||||
" [15] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:932",
|
||||
" [16] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:611",
|
||||
" [17] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:152",
|
||||
" [18] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1949",
|
||||
"--- the last 16 lines are repeated 413 more times ---",
|
||||
" [6627] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
|
||||
" [6628] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1566",
|
||||
" [6629] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1855",
|
||||
" [6630] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
|
||||
" [6631] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
|
||||
" [6632] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
|
||||
" [6633] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
|
||||
" [6634] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2658",
|
||||
" [6635] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
|
||||
" [6636] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
|
||||
" [6637] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:246",
|
||||
" [6638] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:216",
|
||||
" [6639] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:932",
|
||||
" [6640] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:611",
|
||||
" [6641] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:152",
|
||||
" [6642] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1949",
|
||||
" [6643] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2020",
|
||||
" [6644] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:1999",
|
||||
" [6645] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2183",
|
||||
" [6646] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2396",
|
||||
" [6647] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2682",
|
||||
" [6648] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2867",
|
||||
" [6649] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/abstractinterpretation.jl:2955",
|
||||
" [6650] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:246",
|
||||
" [6651] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:216",
|
||||
" [6652] typeinf",
|
||||
" @ ./compiler/typeinfer.jl:12 [inlined]",
|
||||
" [6653] typeinf_type(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:1079",
|
||||
" [6654] return_type(interp::Core.Compiler.NativeInterpreter, t::DataType)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:1140",
|
||||
" [6655] return_type(f::Any, t::DataType)",
|
||||
" @ Core.Compiler ./compiler/typeinfer.jl:1112",
|
||||
" [6656] combine_eltypes(f::Function, args::Tuple{Vector{ABCProcessInput}})",
|
||||
" @ Base.Broadcast ./broadcast.jl:730",
|
||||
" [6657] copy(bc::Base.Broadcast.Broadcasted{Style}) where Style",
|
||||
" @ Base.Broadcast ./broadcast.jl:895",
|
||||
" [6658] materialize(bc::Base.Broadcast.Broadcasted)",
|
||||
" @ Base.Broadcast ./broadcast.jl:873",
|
||||
" [6659] var\"##core#302\"()",
|
||||
" @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:489",
|
||||
" [6660] var\"##sample#303\"(::Tuple{}, __params::BenchmarkTools.Parameters)",
|
||||
" @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:495",
|
||||
" [6661] _run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; verbose::Bool, pad::String, kwargs::Base.Pairs{Symbol, Integer, NTuple{4, Symbol}, NamedTuple{(:samples, :evals, :gctrial, :gcsample), Tuple{Int64, Int64, Bool, Bool}}})",
|
||||
" @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:99",
|
||||
" [6662] #invokelatest#2",
|
||||
" @ ./essentials.jl:821 [inlined]",
|
||||
" [6663] invokelatest",
|
||||
" @ ./essentials.jl:816 [inlined]",
|
||||
" [6664] #run_result#45",
|
||||
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]",
|
||||
" [6665] run_result",
|
||||
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]",
|
||||
" [6666] run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; progressid::Nothing, nleaves::Float64, ndone::Float64, kwargs::Base.Pairs{Symbol, Integer, NTuple{5, Symbol}, NamedTuple{(:verbose, :samples, :evals, :gctrial, :gcsample), Tuple{Bool, Int64, Int64, Bool, Bool}}})",
|
||||
" @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117",
|
||||
" [6667] run (repeats 2 times)",
|
||||
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117 [inlined]",
|
||||
" [6668] #warmup#54",
|
||||
" @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:169 [inlined]",
|
||||
" [6669] warmup(item::BenchmarkTools.Benchmark)",
|
||||
" @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:168"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"using BenchmarkTools\n",
|
||||
"#compute_bench = @benchmark compute_AB_AB5.(inputs)\n",
|
||||
"compute_bench_reduced = @benchmark compute_AB_AB5_reduced.(inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Julia 1.9.3",
|
||||
"language": "julia",
|
||||
"name": "julia-1.9"
|
||||
},
|
||||
"language_info": {
|
||||
"file_extension": ".jl",
|
||||
"mimetype": "application/julia",
|
||||
"name": "julia",
|
||||
"version": "1.9.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
409
notebooks/abc_model_showcase.ipynb
Normal file
409
notebooks/abc_model_showcase.ipynb
Normal file
@ -0,0 +1,409 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "20768e45-df62-4638-ba33-b0ccf239f1aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"using Revise\n",
|
||||
"using MetagraphOptimization\n",
|
||||
"using BenchmarkTools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "ff5f4a49",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found 1 NUMA nodes\n",
|
||||
"CUDA is non-functional\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, MetagraphOptimization.LocalVariables(), -1.0, UUID(\"a89974f6-6212-11ee-0866-0f591a3b69ea\"))], [-1.0;;])"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get our machine's info\n",
|
||||
"machine = get_machine_info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "9df482a4-ca44-44c5-9ea7-7a2977d529be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"ABCModel()"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a model identifier\n",
|
||||
"model = ABCModel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "30b16872-07f7-4d47-8ff8-8c3a849c9d4e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"ABC Process: 'AB->ABBB'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a process in our model\n",
|
||||
"process_str = \"AB->ABBB\"\n",
|
||||
"process = parse_process(process_str, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Total: 280, ComputeTaskP"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Graph:\n",
|
||||
" Nodes: \n",
|
||||
" Edges: 385\n",
|
||||
" Total Compute Effort: 1075.0\n",
|
||||
" Total Data Transfer: 10944.0\n",
|
||||
" Total Compute Intensity: 0.09822733918128655\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
": 6, ComputeTaskU: 6, \n",
|
||||
" ComputeTaskV: 64, ComputeTaskSum: 1, ComputeTaskS2: 24, \n",
|
||||
" ComputeTaskS1: 36, DataTask: 143"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Read the graph (of the same process) from a file\n",
|
||||
"graph = parse_dag(\"../input/$process_str.txt\", model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Input for ABC Process: 'AB->ABBB':\n",
|
||||
" 2 Incoming particles:\n",
|
||||
" A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n",
|
||||
" B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n",
|
||||
" 4 Outgoing Particles:\n",
|
||||
" A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n",
|
||||
" B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n",
|
||||
" B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n",
|
||||
" B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Generate some random input data for our process\n",
|
||||
"input_data = gen_process_input(process)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"compute__af4450a2_6212_11ee_2601_cde7cf2aedc1 (generic function with 1 method)"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get the function computing the result of the process from a ProcessInput\n",
|
||||
"AB_AB3_compute = get_compute_function(graph, process, machine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"-1.8924431710735022e-13"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Actually compute a result using the generated function and the input data\n",
|
||||
"result = AB_AB3_compute(input_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "80c70010",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"include(\"../examples/profiling_utilities.jl\")\n",
|
||||
"\n",
|
||||
"# We can also mute the graph by applying some operations to it\n",
|
||||
"reduce_all!(graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "5b192b44",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n",
|
||||
"@assert result ≈ execute(graph, process, machine, input_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "9b2f4a3f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1000-element Vector{Float64}:\n",
|
||||
" -2.1491995259940396e-11\n",
|
||||
" -1.04995646459455e-11\n",
|
||||
" 5.821760691187782e-15\n",
|
||||
" -6.556969485683705e-14\n",
|
||||
" -1.3588086164373753e-14\n",
|
||||
" -1.8789662441593694e-13\n",
|
||||
" -2.131973301835892e-13\n",
|
||||
" -5.3359759072004825e-12\n",
|
||||
" -9.053914191490223e-13\n",
|
||||
" -5.61107901706923e-13\n",
|
||||
" -5.063492275603428e-11\n",
|
||||
" 2.9168508985811397e-15\n",
|
||||
" -1.6420151378194157e-13\n",
|
||||
" ⋮\n",
|
||||
" 1.0931677247833436e-13\n",
|
||||
" -7.704755306462797e-16\n",
|
||||
" -1.8385907037491397e-12\n",
|
||||
" -6.036215596560059e-14\n",
|
||||
" -9.98872401400362e-12\n",
|
||||
" 3.4861755637292935e-13\n",
|
||||
" -1.1051119822969222e-10\n",
|
||||
" -2.496572513216201e-12\n",
|
||||
" -3.8682427847201926e-11\n",
|
||||
" 7.904149696653438e-15\n",
|
||||
" -7.606811743178716e-11\n",
|
||||
" -5.100594937480292e-13"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now we can generate a function and use it on lots of inputs\n",
|
||||
"inputs = [gen_process_input(process) for _ in 1:1000]\n",
|
||||
"AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n",
|
||||
"\n",
|
||||
"results = AB_AB3_reduced_compute.(inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "d43e4ff0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"BenchmarkTools.Trial: 879 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m4.567 ms\u001b[22m\u001b[39m … \u001b[35m14.334 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 54.51%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m4.998 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m5.686 ms\u001b[22m\u001b[39m ± \u001b[32m 1.414 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m9.09% ± 14.49%\n",
|
||||
"\n",
|
||||
" \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▇\u001b[39m█\u001b[34m▅\u001b[39m\u001b[39m▄\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
|
||||
" \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▁\u001b[39m▄\u001b[39m \u001b[39m▇\n",
|
||||
" 4.57 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 10 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
"\n",
|
||||
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143006\u001b[39m."
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"@benchmark results = AB_AB3_compute.($inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "e18d9546",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"BenchmarkTools.Trial: 1089 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m3.637 ms\u001b[22m\u001b[39m … \u001b[35m10.921 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 59.52%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m4.098 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 0.00%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m4.587 ms\u001b[22m\u001b[39m ± \u001b[32m 1.334 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m10.21% ± 15.77%\n",
|
||||
"\n",
|
||||
" \u001b[39m \u001b[39m▂\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
|
||||
" \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[32m▆\u001b[39m\u001b[39m▅\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m \u001b[39m█\n",
|
||||
" 3.64 ms\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 8.78 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
"\n",
|
||||
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123006\u001b[39m."
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"@benchmark results = AB_AB3_reduced_compute.($inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "13efed12-3547-400b-a7a2-5dfae9a973a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set a different caching strategy\n",
|
||||
"MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "ef62716b-a219-4f6e-9150-f984d3734839",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"BenchmarkTools.Trial: 331 samples with 1 evaluation.\n",
|
||||
" Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m12.148 ms\u001b[22m\u001b[39m … \u001b[35m24.164 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 13.35%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m15.412 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m17.47%\n",
|
||||
" Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m15.117 ms\u001b[22m\u001b[39m ± \u001b[32m 2.194 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m12.31% ± 8.95%\n",
|
||||
"\n",
|
||||
" \u001b[39m \u001b[39m▄\u001b[39m█\u001b[39m▄\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m▄\u001b[39m\u001b[39m▄\u001b[34m▂\u001b[39m\u001b[39m \u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n",
|
||||
" \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▅\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[32m█\u001b[39m\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▃\n",
|
||||
" 12.1 ms\u001b[90m Histogram: frequency by time\u001b[39m 21 ms \u001b[0m\u001b[1m<\u001b[22m\n",
|
||||
"\n",
|
||||
" Memory estimate\u001b[90m: \u001b[39m\u001b[33m27.46 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m118013\u001b[39m."
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# ... and bench again\n",
|
||||
"AB_AB3_reduced_dict_compute = get_compute_function(graph, process, machine)\n",
|
||||
"@benchmark results = AB_AB3_reduced_dict_compute.($inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5461ffd4-6a0e-4f1f-b1f1-3a2854a8ae88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Julia 1.9.3",
|
||||
"language": "julia",
|
||||
"name": "julia-1.9"
|
||||
},
|
||||
"language_info": {
|
||||
"file_extension": ".jl",
|
||||
"mimetype": "application/julia",
|
||||
"name": "julia",
|
||||
"version": "1.9.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
70
notebooks/profiling.ipynb
Normal file
70
notebooks/profiling.ipynb
Normal file
@ -0,0 +1,70 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"using Revise; using MetagraphOptimization; using BenchmarkTools; using ProfileView\n",
|
||||
"using Base.Threads\n",
|
||||
"nthreads()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ABCModel()\n",
|
||||
"process_str = \"AB->ABBBBB\"\n",
|
||||
"process = parse_process(process_str, model)\n",
|
||||
"graph = parse_dag(\"../input/$process_str.txt\", model)\n",
|
||||
"print(graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"include(\"../examples/profiling_utilities.jl\")\n",
|
||||
"@ProfileView.profview reduce_all!(graph)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@ProfileView.profview comp_func = get_compute_function(graph, process)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Julia 1.9.3",
|
||||
"language": "julia",
|
||||
"name": "julia-1.9"
|
||||
},
|
||||
"language_info": {
|
||||
"file_extension": ".jl",
|
||||
"mimetype": "application/julia",
|
||||
"name": "julia",
|
||||
"version": "1.9.3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -6,20 +6,20 @@ julia --project=./examples -t 4 -e 'import Pkg; Pkg.instantiate()'
|
||||
|
||||
#for i in $(seq $minthreads $maxthreads)
|
||||
# printf "(AB->AB, $i) "
|
||||
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->AB.txt"))'
|
||||
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->AB.txt"), ABCModel())'
|
||||
#end
|
||||
|
||||
#for i in $(seq $minthreads $maxthreads)
|
||||
# printf "(AB->ABBB, $i) "
|
||||
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBB.txt"))'
|
||||
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBB.txt"), ABCModel())'
|
||||
#end
|
||||
|
||||
#for i in $(seq $minthreads $maxthreads)
|
||||
# printf "(AB->ABBBBB, $i) "
|
||||
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBB.txt"))'
|
||||
# julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBB.txt"), ABCModel())'
|
||||
#end
|
||||
|
||||
for i in $(seq $minthreads $maxthreads)
|
||||
printf "(AB->ABBBBBBB, $i) "
|
||||
julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBBBB.txt"))'
|
||||
julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBBBB.txt"), ABCModel())'
|
||||
end
|
||||
|
@ -5,6 +5,7 @@ A module containing tools to work on DAGs.
|
||||
"""
|
||||
module MetagraphOptimization
|
||||
|
||||
# graph types
|
||||
export DAG
|
||||
export Node
|
||||
export Edge
|
||||
@ -18,6 +19,7 @@ export FusedComputeTask
|
||||
export PossibleOperations
|
||||
export GraphProperties
|
||||
|
||||
# graph functions
|
||||
export make_node
|
||||
export make_edge
|
||||
export insert_node
|
||||
@ -27,10 +29,13 @@ export is_exit_node
|
||||
export parents
|
||||
export children
|
||||
export compute
|
||||
export data
|
||||
export compute_effort
|
||||
export get_properties
|
||||
export get_exit_node
|
||||
export is_valid
|
||||
export is_valid, is_scheduled
|
||||
|
||||
# graph operation related
|
||||
export Operation
|
||||
export AppliedOperation
|
||||
export NodeFusion
|
||||
@ -42,7 +47,10 @@ export can_pop
|
||||
export reset_graph!
|
||||
export get_operations
|
||||
|
||||
export parse_abc
|
||||
# ABC model
|
||||
export ParticleValue
|
||||
export ParticleA, ParticleB, ParticleC
|
||||
export ABCProcessDescription, ABCProcessInput, ABCModel
|
||||
export ComputeTaskP
|
||||
export ComputeTaskS1
|
||||
export ComputeTaskS2
|
||||
@ -50,10 +58,19 @@ export ComputeTaskV
|
||||
export ComputeTaskU
|
||||
export ComputeTaskSum
|
||||
|
||||
# code generation related
|
||||
export execute
|
||||
export gen_particles
|
||||
export ParticleValue
|
||||
export Particle
|
||||
export parse_dag, parse_process
|
||||
export gen_process_input
|
||||
export get_compute_function
|
||||
|
||||
# estimator
|
||||
export cost_type, graph_cost, operation_effect
|
||||
export GlobalMetricEstimator, CDCost
|
||||
|
||||
# machine info
|
||||
export Machine
|
||||
export get_machine_info
|
||||
|
||||
export ==, in, show, isempty, delete!, length
|
||||
|
||||
@ -72,6 +89,7 @@ import Base.insert!
|
||||
import Base.collect
|
||||
|
||||
|
||||
include("devices/interface.jl")
|
||||
include("task/type.jl")
|
||||
include("node/type.jl")
|
||||
include("diff/type.jl")
|
||||
@ -111,15 +129,37 @@ include("properties/utility.jl")
|
||||
|
||||
include("task/create.jl")
|
||||
include("task/compare.jl")
|
||||
include("task/compute.jl")
|
||||
include("task/print.jl")
|
||||
include("task/properties.jl")
|
||||
|
||||
include("estimator/interface.jl")
|
||||
include("estimator/global_metric.jl")
|
||||
|
||||
include("models/interface.jl")
|
||||
include("models/print.jl")
|
||||
|
||||
include("models/abc/types.jl")
|
||||
include("models/abc/particle.jl")
|
||||
include("models/abc/compute.jl")
|
||||
include("models/abc/create.jl")
|
||||
include("models/abc/properties.jl")
|
||||
include("models/abc/parse.jl")
|
||||
include("models/abc/print.jl")
|
||||
|
||||
include("devices/measure.jl")
|
||||
include("devices/detect.jl")
|
||||
include("devices/impl.jl")
|
||||
|
||||
include("devices/numa/impl.jl")
|
||||
include("devices/cuda/impl.jl")
|
||||
# can currently not use AMDGPU because of incompatability with the newest rocm drivers
|
||||
# include("devices/rocm/impl.jl")
|
||||
# oneapi seems also broken for now
|
||||
# include("devices/oneapi/impl.jl")
|
||||
|
||||
include("scheduler/interface.jl")
|
||||
include("scheduler/greedy.jl")
|
||||
|
||||
include("code_gen/main.jl")
|
||||
|
||||
|
@ -1,126 +1,158 @@
|
||||
using DataStructures
|
||||
|
||||
"""
|
||||
gen_code(graph::DAG)
|
||||
|
||||
Generate the code for a given graph. The return value is a tuple of:
|
||||
Generate the code for a given graph. The return value is a named tuple of:
|
||||
|
||||
- `code::Expr`: The julia expression containing the code for the whole graph.
|
||||
- `inputSymbols::Dict{String, Symbol}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
|
||||
- `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
|
||||
- `outputSymbol::Symbol`: The symbol of the final calculated value
|
||||
|
||||
See also: [`execute`](@ref)
|
||||
"""
|
||||
function gen_code(graph::DAG)
|
||||
code = Vector{Expr}()
|
||||
sizehint!(code, length(graph.nodes))
|
||||
function gen_code(graph::DAG, machine::Machine)
|
||||
sched = schedule_dag(GreedyScheduler(), graph, machine)
|
||||
|
||||
nodeQueue = PriorityQueue{Node, Int}()
|
||||
inputSyms = Dict{String, Symbol}()
|
||||
codeAcc = Vector{Expr}()
|
||||
sizehint!(codeAcc, length(graph.nodes))
|
||||
|
||||
# use a priority equal to the number of unseen children -> 0 are nodes that can be added
|
||||
for node in sched
|
||||
# TODO: this is kind of ugly, should init nodes be scheduled differently from the rest?
|
||||
if (node isa DataTaskNode && length(node.children) == 0)
|
||||
push!(codeAcc, get_init_expression(node, entry_device(machine)))
|
||||
continue
|
||||
end
|
||||
push!(codeAcc, get_expression(node))
|
||||
end
|
||||
|
||||
# get inSymbols
|
||||
inputSyms = Dict{String, Vector{Symbol}}()
|
||||
for node in get_entry_nodes(graph)
|
||||
enqueue!(nodeQueue, node => 0)
|
||||
push!(inputSyms, node.name => Symbol("data_$(to_var_name(node.id))_in"))
|
||||
if !haskey(inputSyms, node.name)
|
||||
inputSyms[node.name] = Vector{Symbol}()
|
||||
end
|
||||
|
||||
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
|
||||
end
|
||||
|
||||
node = nothing
|
||||
while !isempty(nodeQueue)
|
||||
@assert peek(nodeQueue)[2] == 0
|
||||
node = dequeue!(nodeQueue)
|
||||
# get outSymbol
|
||||
outSym = Symbol(to_var_name(get_exit_node(graph).id))
|
||||
|
||||
push!(code, get_expression(node))
|
||||
for parent in node.parents
|
||||
# reduce the priority of all parents by one
|
||||
if (!haskey(nodeQueue, parent))
|
||||
enqueue!(nodeQueue, parent => length(parent.children) - 1)
|
||||
else
|
||||
nodeQueue[parent] = nodeQueue[parent] - 1
|
||||
end
|
||||
return (code = Expr(:block, codeAcc...), inputSymbols = inputSyms, outputSymbol = outSym)
|
||||
end
|
||||
|
||||
function gen_cache_init_code(machine::Machine)
|
||||
initializeCaches = Vector{Expr}()
|
||||
|
||||
for device in machine.devices
|
||||
push!(initializeCaches, gen_cache_init_code(device))
|
||||
end
|
||||
|
||||
return Expr(:block, initializeCaches...)
|
||||
end
|
||||
|
||||
function gen_input_assignment_code(
|
||||
inputSymbols::Dict{String, Vector{Symbol}},
|
||||
processDescription::AbstractProcessDescription,
|
||||
machine::Machine,
|
||||
processInputSymbol::Symbol = :input,
|
||||
)
|
||||
@assert length(inputSymbols) >=
|
||||
sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description"
|
||||
|
||||
assignInputs = Vector{Expr}()
|
||||
for (name, symbols) in inputSymbols
|
||||
type = type_from_name(name)
|
||||
index = parse(Int, name[2:end])
|
||||
|
||||
p = nothing
|
||||
|
||||
if (index > in_particles(processDescription)[type])
|
||||
index -= in_particles(processDescription)[type]
|
||||
@assert index <= out_particles(processDescription)[type] "Too few particles of type $type in input particles for this process"
|
||||
|
||||
p = "filter(x -> typeof(x) <: $type, out_particles($(processInputSymbol)))[$(index)]"
|
||||
else
|
||||
p = "filter(x -> typeof(x) <: $type, in_particles($(processInputSymbol)))[$(index)]"
|
||||
end
|
||||
|
||||
for symbol in symbols
|
||||
# TODO: how to get the "default" cpu device?
|
||||
device = entry_device(machine)
|
||||
evalExpr = eval(gen_access_expr(device, symbol))
|
||||
push!(assignInputs, Meta.parse("$(evalExpr)::ParticleValue{$type} = ParticleValue($p, 1.0)"))
|
||||
end
|
||||
end
|
||||
|
||||
# node is now the last node we looked at -> the output node
|
||||
outSym = Symbol("data_$(to_var_name(node.id))")
|
||||
return Expr(:block, assignInputs...)
|
||||
end
|
||||
|
||||
return (
|
||||
code = Expr(:block, code...),
|
||||
inputSymbols = inputSyms,
|
||||
outputSymbol = outSym,
|
||||
"""
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
|
||||
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
|
||||
"""
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
|
||||
|
||||
initCaches = gen_cache_init_code(machine)
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
|
||||
expr = Meta.parse(
|
||||
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
|
||||
)
|
||||
|
||||
func = eval(expr)
|
||||
|
||||
return func
|
||||
end
|
||||
|
||||
"""
|
||||
execute(generated_code, input::Dict{ParticleType, Vector{Particle}})
|
||||
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
|
||||
Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles.
|
||||
Execute the code of the given `graph` on the given input particles.
|
||||
|
||||
This is essentially shorthand for
|
||||
```julia
|
||||
compute_graph = get_compute_function(graph, process)
|
||||
result = compute_graph(particles)
|
||||
```
|
||||
|
||||
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
|
||||
"""
|
||||
function execute(generated_code, input::Dict{ParticleType, Vector{Particle}})
|
||||
(code, inputSymbols, outputSymbol) = generated_code
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
|
||||
|
||||
assignInputs = Vector{Expr}()
|
||||
for (name, symbol) in inputSymbols
|
||||
type = nothing
|
||||
if startswith(name, "A")
|
||||
type = A
|
||||
elseif startswith(name, "B")
|
||||
type = B
|
||||
else
|
||||
type = C
|
||||
initCaches = gen_cache_init_code(machine)
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
|
||||
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
|
||||
expr = Meta.parse(
|
||||
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
|
||||
)
|
||||
func = eval(expr)
|
||||
|
||||
result = 0
|
||||
try
|
||||
result = @eval $func($input)
|
||||
catch e
|
||||
println("Error while evaluating: $e")
|
||||
|
||||
# if we find a uuid in the exception we can color it in so it's easier to spot
|
||||
uuidRegex = r"[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}"
|
||||
m = match(uuidRegex, string(e))
|
||||
|
||||
functionStr = string(expr)
|
||||
if (isa(m, RegexMatch))
|
||||
functionStr = replace(functionStr, m.match => "\033[31m$(m.match)\033[0m")
|
||||
end
|
||||
index = parse(Int, name[2:end])
|
||||
|
||||
push!(
|
||||
assignInputs,
|
||||
Meta.parse(
|
||||
"$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)",
|
||||
),
|
||||
)
|
||||
println("Function:\n$functionStr")
|
||||
@assert false
|
||||
end
|
||||
|
||||
assignInputs = Expr(:block, assignInputs...)
|
||||
eval(assignInputs)
|
||||
eval(code)
|
||||
|
||||
eval(Meta.parse("result = $outputSymbol"))
|
||||
return result
|
||||
end
|
||||
|
||||
"""
|
||||
execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}})
|
||||
|
||||
Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles.
|
||||
The input particles should be sorted correctly into the dictionary to their according [`ParticleType`](@ref)s.
|
||||
|
||||
See also: [`gen_particles`](@ref)
|
||||
"""
|
||||
function execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}})
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph)
|
||||
|
||||
assignInputs = Vector{Expr}()
|
||||
for (name, symbol) in inputSymbols
|
||||
type = nothing
|
||||
if startswith(name, "A")
|
||||
type = A
|
||||
elseif startswith(name, "B")
|
||||
type = B
|
||||
else
|
||||
type = C
|
||||
end
|
||||
index = parse(Int, name[2:end])
|
||||
|
||||
push!(
|
||||
assignInputs,
|
||||
Meta.parse(
|
||||
"$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)",
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
assignInputs = Expr(:block, assignInputs...)
|
||||
eval(assignInputs)
|
||||
eval(code)
|
||||
|
||||
eval(Meta.parse("result = $outputSymbol"))
|
||||
return result
|
||||
end
|
||||
|
53
src/devices/cuda/impl.jl
Normal file
53
src/devices/cuda/impl.jl
Normal file
@ -0,0 +1,53 @@
|
||||
using CUDA
|
||||
|
||||
"""
|
||||
CUDAGPU <: AbstractGPU
|
||||
|
||||
Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
|
||||
"""
|
||||
mutable struct CUDAGPU <: AbstractGPU
|
||||
device::Any # TODO: what's the cuda device type?
|
||||
cacheStrategy::CacheStrategy
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, CUDAGPU)
|
||||
|
||||
CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::Type{T}) where {T <: CUDAGPU} = LocalVariables()
|
||||
|
||||
function measure_device!(device::CUDAGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring CUDA GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU}
|
||||
|
||||
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !CUDA.functional()
|
||||
if verbose
|
||||
println("CUDA is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
CUDADevices = CUDA.devices()
|
||||
if verbose
|
||||
println("Found $(length(CUDADevices)) CUDA devices")
|
||||
end
|
||||
for device in CUDADevices
|
||||
push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
23
src/devices/detect.jl
Normal file
23
src/devices/detect.jl
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
"""
|
||||
get_machine_info(verbose::Bool)
|
||||
|
||||
Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive.
|
||||
"""
|
||||
function get_machine_info(; verbose::Bool = Base.is_interactive)
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
for device in device_types()
|
||||
devs = get_devices(device, verbose = verbose)
|
||||
for dev in devs
|
||||
push!(devices, dev)
|
||||
end
|
||||
end
|
||||
|
||||
noDevices = length(devices)
|
||||
@assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!"
|
||||
|
||||
transferRates = Matrix{Float64}(undef, noDevices, noDevices)
|
||||
fill!(transferRates, -1)
|
||||
return Machine(devices, transferRates)
|
||||
end
|
52
src/devices/impl.jl
Normal file
52
src/devices/impl.jl
Normal file
@ -0,0 +1,52 @@
|
||||
"""
|
||||
device_types()
|
||||
|
||||
Return a vector of available and implemented device types.
|
||||
|
||||
See also: [`DEVICE_TYPES`](@ref)
|
||||
"""
|
||||
function device_types()
|
||||
return DEVICE_TYPES
|
||||
end
|
||||
|
||||
"""
|
||||
entry_device(machine::Machine)
|
||||
|
||||
Return the "entry" device, i.e., the device that starts CPU threads and GPU kernels, and takes input values and returns the output value.
|
||||
"""
|
||||
function entry_device(machine::Machine)
|
||||
return machine.devices[1]
|
||||
end
|
||||
|
||||
"""
|
||||
strategies(t::Type{T}) where {T <: AbstractDevice}
|
||||
|
||||
Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstractDevice`](@ref).
|
||||
The caching strategies are used in code generation.
|
||||
"""
|
||||
function strategies(t::Type{T}) where {T <: AbstractDevice}
|
||||
if !haskey(CACHE_STRATEGIES, t)
|
||||
error("Trying to get strategies for $T, but it has no strategies defined!")
|
||||
end
|
||||
|
||||
return CACHE_STRATEGIES[t]
|
||||
end
|
||||
|
||||
"""
|
||||
cache_strategy(device::AbstractDevice)
|
||||
|
||||
Returns the cache strategy set for this device.
|
||||
"""
|
||||
function cache_strategy(device::AbstractDevice)
|
||||
return device.cacheStrategy
|
||||
end
|
||||
|
||||
"""
|
||||
set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
|
||||
|
||||
Sets the device's cache strategy. After this call, [`cache_strategy`](@ref) should return `cacheStrategy` on the given device.
|
||||
"""
|
||||
function set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy)
|
||||
device.cacheStrategy = cacheStrategy
|
||||
return nothing
|
||||
end
|
108
src/devices/interface.jl
Normal file
108
src/devices/interface.jl
Normal file
@ -0,0 +1,108 @@
|
||||
"""
|
||||
AbstractDevice
|
||||
|
||||
Abstract base type for every device, like GPUs, CPUs or any other compute devices.
|
||||
Every implementation needs to implement various functions and needs a member `cacheStrategy`.
|
||||
"""
|
||||
abstract type AbstractDevice end
|
||||
|
||||
abstract type AbstractCPU <: AbstractDevice end
|
||||
|
||||
abstract type AbstractGPU <: AbstractDevice end
|
||||
|
||||
"""
|
||||
Machine
|
||||
|
||||
A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state.
|
||||
|
||||
See also: [`Scheduler`](@ref)
|
||||
"""
|
||||
struct Machine
|
||||
devices::Vector{AbstractDevice}
|
||||
|
||||
transferRates::Matrix{Float64}
|
||||
end
|
||||
|
||||
"""
|
||||
CacheStrategy
|
||||
|
||||
Abstract base type for caching strategies.
|
||||
|
||||
See also: [`strategies`](@ref)
|
||||
"""
|
||||
abstract type CacheStrategy end
|
||||
|
||||
"""
|
||||
LocalVariables <: CacheStrategy
|
||||
|
||||
A caching strategy relying solely on local variables for every input and output.
|
||||
|
||||
Implements the [`CacheStrategy`](@ref) interface.
|
||||
"""
|
||||
struct LocalVariables <: CacheStrategy end
|
||||
|
||||
"""
|
||||
Dictionary <: CacheStrategy
|
||||
|
||||
A caching strategy relying on a dictionary of Symbols to store every input and output.
|
||||
|
||||
Implements the [`CacheStrategy`](@ref) interface.
|
||||
"""
|
||||
struct Dictionary <: CacheStrategy end
|
||||
|
||||
"""
|
||||
DEVICE_TYPES::Vector{Type}
|
||||
|
||||
Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector.
|
||||
|
||||
See also: [`device_types`](@ref), [`get_devices`](@ref)
|
||||
"""
|
||||
DEVICE_TYPES = Vector{Type}()
|
||||
|
||||
"""
|
||||
CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol}
|
||||
|
||||
Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary.
|
||||
|
||||
See also: [`strategies`](@ref)
|
||||
"""
|
||||
CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}()
|
||||
|
||||
"""
|
||||
default_strategy(deviceType::Type{T}) where {T <: AbstractDevice}
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device type.
|
||||
See also: [`cache_strategy`](@ref), [`set_cache_strategy`](@ref)
|
||||
"""
|
||||
function default_strategy end
|
||||
|
||||
"""
|
||||
get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice}
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine.
|
||||
"""
|
||||
function get_devices end
|
||||
|
||||
"""
|
||||
measure_device!(device::AbstractDevice; verbose::Bool)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it.
|
||||
"""
|
||||
function measure_device! end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(device::AbstractDevice)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an `Expr` initializing this device's variable cache.
|
||||
|
||||
The strategy is a symbol
|
||||
"""
|
||||
function gen_cache_init_code end
|
||||
|
||||
"""
|
||||
gen_access_expr(device::AbstractDevice, symbol::Symbol)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref).
|
||||
Return an `Expr` or `QuoteNode` accessing the variable identified by [`symbol`].
|
||||
"""
|
||||
function gen_access_expr end
|
22
src/devices/measure.jl
Normal file
22
src/devices/measure.jl
Normal file
@ -0,0 +1,22 @@
|
||||
"""
|
||||
measure_devices(machine::Machine; verbose::Bool)
|
||||
|
||||
Measure FLOPS, RAM, cache sizes and what other properties can be extracted for the devices in the given machine.
|
||||
"""
|
||||
function measure_devices!(machine::Machine; verbose::Bool = Base.is_interactive())
|
||||
for device in machine.devices
|
||||
measure_device!(device; verbose = verbose)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
measure_transfer_rates(machine::Machine; verbose::Bool)
|
||||
|
||||
Measure the transfer rates between devices in the machine.
|
||||
"""
|
||||
function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive())
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
96
src/devices/numa/impl.jl
Normal file
96
src/devices/numa/impl.jl
Normal file
@ -0,0 +1,96 @@
|
||||
using NumaAllocators
|
||||
|
||||
"""
|
||||
NumaNode <: AbstractCPU
|
||||
|
||||
Representation of a specific CPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
|
||||
"""
|
||||
mutable struct NumaNode <: AbstractCPU
|
||||
numaId::UInt16
|
||||
threads::UInt16
|
||||
cacheStrategy::CacheStrategy
|
||||
FLOPS::Float64
|
||||
id::UUID
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, NumaNode)
|
||||
|
||||
CACHE_STRATEGIES[NumaNode] = [LocalVariables()]
|
||||
|
||||
default_strategy(::Type{T}) where {T <: NumaNode} = LocalVariables()
|
||||
|
||||
function measure_device!(device::NumaNode; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring Numa Node $(device.numaId)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode}
|
||||
|
||||
Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode}
|
||||
devices = Vector{AbstractDevice}()
|
||||
noNumaNodes = highest_numa_node()
|
||||
|
||||
if (verbose)
|
||||
println("Found $(noNumaNodes + 1) NUMA nodes")
|
||||
end
|
||||
for i in 0:noNumaNodes
|
||||
push!(devices, NumaNode(i, 1, default_strategy(NumaNode), -1, UUIDs.uuid1(rng[1])))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(device::NumaNode)
|
||||
|
||||
Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref).
|
||||
"""
|
||||
function gen_cache_init_code(device::NumaNode)
|
||||
if typeof(device.cacheStrategy) <: LocalVariables
|
||||
# don't need to initialize anything
|
||||
return Expr(:block)
|
||||
elseif typeof(device.cacheStrategy) <: Dictionary
|
||||
return Meta.parse("cache_$(to_var_name(device.id)) = Dict{Symbol, Any}()")
|
||||
# TODO: sizehint?
|
||||
end
|
||||
|
||||
return error("Unimplemented cache strategy \"$(device.cacheStrategy)\" for device \"$(device)\"")
|
||||
end
|
||||
|
||||
"""
|
||||
gen_access_expr(device::NumaNode, symbol::Symbol)
|
||||
|
||||
Generate code to access the variable designated by `symbol` on a [`NumaNode`](@ref), using the [`CacheStrategy`](@ref) set in the device.
|
||||
"""
|
||||
function gen_access_expr(device::NumaNode, symbol::Symbol)
|
||||
return _gen_access_expr(device, device.cacheStrategy, symbol)
|
||||
end
|
||||
|
||||
"""
|
||||
_gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
|
||||
|
||||
Internal function for dispatch, used in [`gen_access_expr`](@ref).
|
||||
"""
|
||||
function _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol)
|
||||
s = Symbol("data_$symbol")
|
||||
quoteNode = Meta.parse(":($s)")
|
||||
return quoteNode
|
||||
end
|
||||
|
||||
"""
|
||||
_gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
|
||||
|
||||
Internal function for dispatch, used in [`gen_access_expr`](@ref).
|
||||
"""
|
||||
function _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol)
|
||||
accessStr = ":(cache_$(to_var_name(device.id))[:$symbol])"
|
||||
quoteNode = Meta.parse(accessStr)
|
||||
return quoteNode
|
||||
end
|
53
src/devices/oneapi/impl.jl
Normal file
53
src/devices/oneapi/impl.jl
Normal file
@ -0,0 +1,53 @@
|
||||
using oneAPI
|
||||
|
||||
"""
|
||||
oneAPIGPU <: AbstractGPU
|
||||
|
||||
Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
|
||||
"""
|
||||
mutable struct oneAPIGPU <: AbstractGPU
|
||||
device::Any
|
||||
cacheStrategy::CacheStrategy
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, oneAPIGPU)
|
||||
|
||||
CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::Type{T}) where {T <: oneAPIGPU} = LocalVariables()
|
||||
|
||||
function measure_device!(device::oneAPIGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring oneAPI GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
|
||||
|
||||
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !oneAPI.functional()
|
||||
if verbose
|
||||
println("oneAPI is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
oneAPIDevices = oneAPI.devices()
|
||||
if verbose
|
||||
println("Found $(length(oneAPIDevices)) oneAPI devices")
|
||||
end
|
||||
for device in oneAPIDevices
|
||||
push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
53
src/devices/rocm/impl.jl
Normal file
53
src/devices/rocm/impl.jl
Normal file
@ -0,0 +1,53 @@
|
||||
using AMDGPU
|
||||
|
||||
"""
|
||||
ROCmGPU <: AbstractGPU
|
||||
|
||||
Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
|
||||
"""
|
||||
mutable struct ROCmGPU <: AbstractGPU
|
||||
device::Any
|
||||
cacheStrategy::CacheStrategy
|
||||
FLOPS::Float64
|
||||
end
|
||||
|
||||
push!(DEVICE_TYPES, ROCmGPU)
|
||||
|
||||
CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]
|
||||
|
||||
default_strategy(::Type{T}) where {T <: ROCmGPU} = LocalVariables()
|
||||
|
||||
function measure_device!(device::ROCmGPU; verbose::Bool)
|
||||
if verbose
|
||||
println("Measuring ROCm GPU $(device.device)")
|
||||
end
|
||||
|
||||
# TODO implement
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
|
||||
|
||||
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
|
||||
"""
|
||||
function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU}
|
||||
devices = Vector{AbstractDevice}()
|
||||
|
||||
if !AMDGPU.functional()
|
||||
if verbose
|
||||
println("AMDGPU is non-functional")
|
||||
end
|
||||
return devices
|
||||
end
|
||||
|
||||
AMDDevices = AMDGPU.devices()
|
||||
if verbose
|
||||
println("Found $(length(AMDDevices)) AMD devices")
|
||||
end
|
||||
for device in AMDDevices
|
||||
push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1))
|
||||
end
|
||||
|
||||
return devices
|
||||
end
|
@ -6,6 +6,6 @@ Pretty-print a [`Diff`](@ref). Called via print, println and co.
|
||||
function show(io::IO, diff::Diff)
|
||||
print(io, "Nodes: ")
|
||||
print(io, length(diff.addedNodes) + length(diff.removedNodes))
|
||||
print(io, " Edges: ")
|
||||
print(io, ", Edges: ")
|
||||
return print(io, length(diff.addedEdges) + length(diff.removedEdges))
|
||||
end
|
||||
|
@ -4,8 +4,8 @@
|
||||
A named tuple representing a difference of added and removed nodes and edges on a [`DAG`](@ref).
|
||||
"""
|
||||
const Diff = NamedTuple{
|
||||
(:addedNodes, :removedNodes, :addedEdges, :removedEdges),
|
||||
Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}},
|
||||
(:addedNodes, :removedNodes, :addedEdges, :removedEdges, :updatedChildren),
|
||||
Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}, Vector{Tuple{Node, AbstractTask}}},
|
||||
}
|
||||
|
||||
function Diff()
|
||||
@ -14,5 +14,8 @@ function Diff()
|
||||
removedNodes = Vector{Node}(),
|
||||
addedEdges = Vector{Edge}(),
|
||||
removedEdges = Vector{Edge}(),
|
||||
|
||||
# children were updated in the task, updatedChildren[x][2] is the task before the update
|
||||
updatedChildren = Vector{Tuple{Node, AbstractTask}}(),
|
||||
)::Diff
|
||||
end
|
||||
|
65
src/estimator/global_metric.jl
Normal file
65
src/estimator/global_metric.jl
Normal file
@ -0,0 +1,65 @@
|
||||
|
||||
"""
|
||||
CDCost
|
||||
|
||||
Representation of a [`DAG`](@ref)'s cost as estimated by the [`GlobalMetricEstimator`](@ref).
|
||||
|
||||
# Fields:
|
||||
`.data`: The total data transfer.\\
|
||||
`.computeEffort`: The total compute effort.\\
|
||||
`.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`.
|
||||
|
||||
|
||||
!!! note
|
||||
Note that the `computeIntensity` doesn't necessarily make sense in the context of only operation costs.
|
||||
For example, for node fusions this will always be 0, since the computeEffort is zero.
|
||||
It will still work as intended when adding/subtracting to/from a `graph_cost` estimate.
|
||||
"""
|
||||
const CDCost = NamedTuple{(:data, :computeEffort, :computeIntensity), Tuple{Float64, Float64, Float64}}
|
||||
|
||||
function +(cost1::CDCost, cost2::CDCost)::CDCost
|
||||
d = cost1.data + cost2.data
|
||||
ce = computeEffort = cost1.computeEffort + cost2.computeEffort
|
||||
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
|
||||
end
|
||||
|
||||
function -(cost1::CDCost, cost2::CDCost)::CDCost
|
||||
d = cost1.data - cost2.data
|
||||
ce = computeEffort = cost1.computeEffort - cost2.computeEffort
|
||||
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
|
||||
end
|
||||
|
||||
struct GlobalMetricEstimator <: AbstractEstimator end
|
||||
|
||||
function cost_type(estimator::GlobalMetricEstimator)
|
||||
return CDCost
|
||||
end
|
||||
|
||||
function graph_cost(estimator::GlobalMetricEstimator, graph::DAG)
|
||||
properties = get_properties(graph)
|
||||
return (
|
||||
data = properties.data,
|
||||
computeEffort = properties.computeEffort,
|
||||
computeIntensity = properties.computeIntensity,
|
||||
)::CDCost
|
||||
end
|
||||
|
||||
function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeFusion)
|
||||
return (data = -data(operation.input[2].task), computeEffort = 0.0, computeIntensity = 0.0)::CDCost
|
||||
end
|
||||
|
||||
function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeReduction)
|
||||
s = length(operation.input) - 1
|
||||
return (
|
||||
data = s * -data(operation.input[1].task),
|
||||
computeEffort = s * -compute_effort(operation.input[1].task),
|
||||
computeIntensity = typeof(operation.input) <: DataTaskNode ? 0.0 : Inf,
|
||||
)::CDCost
|
||||
end
|
||||
|
||||
function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeSplit)
|
||||
s = length(operation.input.parents) - 1
|
||||
d = s * data(operation.input.task)
|
||||
ce = s * compute_effort(operation.input.task)
|
||||
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
|
||||
end
|
44
src/estimator/interface.jl
Normal file
44
src/estimator/interface.jl
Normal file
@ -0,0 +1,44 @@
|
||||
|
||||
"""
|
||||
AbstractEstimator
|
||||
|
||||
Abstract base type for an estimator. An estimator estimates the cost of a graph or the difference an operation applied to a graph will make to its cost.
|
||||
|
||||
Interface functions are
|
||||
- [`graph_cost`](@ref)
|
||||
- [`operation_effect`](@ref)
|
||||
"""
|
||||
abstract type AbstractEstimator end
|
||||
|
||||
"""
|
||||
cost_type(estimator::AbstractEstimator)
|
||||
|
||||
Interface function returning a specific estimator's cost type, i.e., the type returned by its implementation of [`graph_cost`](@ref) and [`operation_effect`](@ref).
|
||||
"""
|
||||
function cost_type end
|
||||
|
||||
"""
|
||||
graph_cost(estimator::AbstractEstimator, graph::DAG)
|
||||
|
||||
Get the total estimated cost of the graph. The cost's data type can be chosen by the implementation, but should have usable comparison operators (<, <=, >, >=, ==) and basic math operators (+, -, *, /).
|
||||
"""
|
||||
function graph_cost end
|
||||
|
||||
"""
|
||||
operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
|
||||
|
||||
Get the estimated effect on the cost of the graph, such that `graph_cost(estimator, graph) + operation_effect(estimator, graph, operation) ~= graph_cost(estimator, graph_with_operation_applied)`. There is no hard requirement for this, but the better the estimate, the better an optimization algorithm will be.
|
||||
|
||||
!!! note
|
||||
There is a default implementation of this function, applying the operation, calling [`graph_cost`](@ref), then popping the operation again.
|
||||
|
||||
It can be much faster to overload this function for a specific estimator and directly compute the effects from the operation if possible.
|
||||
"""
|
||||
function operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation)
|
||||
# This is currently not stably working, see issue #16
|
||||
cost = graph_cost(estimator, graph)
|
||||
push_operation!(graph, operation)
|
||||
cost_after = graph_cost(estimator, graph)
|
||||
pop_operation!(graph)
|
||||
return cost_after - cost
|
||||
end
|
@ -38,8 +38,7 @@ end
|
||||
|
||||
Return `true` if [`pop_operation!`](@ref) is possible, `false` otherwise.
|
||||
"""
|
||||
can_pop(graph::DAG) =
|
||||
!isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
|
||||
can_pop(graph::DAG) = !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations)
|
||||
|
||||
"""
|
||||
reset_graph!(graph::DAG)
|
||||
|
@ -15,12 +15,7 @@ Insert the node into the graph.
|
||||
|
||||
See also: [`remove_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
|
||||
"""
|
||||
function insert_node!(
|
||||
graph::DAG,
|
||||
node::Node,
|
||||
track = true,
|
||||
invalidate_cache = true,
|
||||
)
|
||||
function insert_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
|
||||
# 1: mute
|
||||
push!(graph.nodes, node)
|
||||
|
||||
@ -50,14 +45,8 @@ Insert the edge between node1 (child) and node2 (parent) into the graph.
|
||||
|
||||
See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`remove_edge!`](@ref)
|
||||
"""
|
||||
function insert_edge!(
|
||||
graph::DAG,
|
||||
node1::Node,
|
||||
node2::Node,
|
||||
track = true,
|
||||
invalidate_cache = true,
|
||||
)
|
||||
# @assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists"
|
||||
function insert_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
|
||||
@assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists"
|
||||
|
||||
# 1: mute
|
||||
# edge points from child to parent
|
||||
@ -95,13 +84,8 @@ Remove the node from the graph.
|
||||
|
||||
See also: [`insert_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref)
|
||||
"""
|
||||
function remove_node!(
|
||||
graph::DAG,
|
||||
node::Node,
|
||||
track = true,
|
||||
invalidate_cache = true,
|
||||
)
|
||||
# @assert node in graph.nodes "Trying to remove a node that's not in the graph"
|
||||
function remove_node!(graph::DAG, node::Node; track = true, invalidate_cache = true)
|
||||
@assert node in graph.nodes "Trying to remove a node that's not in the graph"
|
||||
|
||||
# 1: mute
|
||||
delete!(graph.nodes, node)
|
||||
@ -134,13 +118,7 @@ Remove the edge between node1 (child) and node2 (parent) into the graph.
|
||||
|
||||
See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`insert_edge!`](@ref)
|
||||
"""
|
||||
function remove_edge!(
|
||||
graph::DAG,
|
||||
node1::Node,
|
||||
node2::Node,
|
||||
track = true,
|
||||
invalidate_cache = true,
|
||||
)
|
||||
function remove_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true)
|
||||
# 1: mute
|
||||
pre_length1 = length(node1.parents)
|
||||
pre_length2 = length(node2.children)
|
||||
@ -149,15 +127,15 @@ function remove_edge!(
|
||||
filter!(x -> x != node2, node1.parents)
|
||||
filter!(x -> x != node1, node2.children)
|
||||
|
||||
#=@assert begin
|
||||
removed = pre_length1 - length(node1.parents)
|
||||
removed <= 1
|
||||
end "removed more than one node from node1's parents"=#
|
||||
@assert begin
|
||||
removed = pre_length1 - length(node1.parents)
|
||||
removed <= 1
|
||||
end "removed more than one node from node1's parents"
|
||||
|
||||
#=@assert begin
|
||||
removed = pre_length2 - length(node2.children)
|
||||
removed <= 1
|
||||
end "removed more than one node from node2's children"=#
|
||||
@assert begin
|
||||
removed = pre_length2 - length(node2.children)
|
||||
removed <= 1
|
||||
end "removed more than one node from node2's children"
|
||||
|
||||
# 2: keep track
|
||||
if (track)
|
||||
@ -181,6 +159,66 @@ function remove_edge!(
|
||||
return nothing
|
||||
end
|
||||
|
||||
function replace_children!(task::FusedComputeTask, before, after)
|
||||
replacedIn1 = length(findall(x -> x == before, task.t1_inputs))
|
||||
replacedIn2 = length(findall(x -> x == before, task.t2_inputs))
|
||||
|
||||
@assert replacedIn1 >= 1 || replacedIn2 >= 1 "Nothing to replace while replacing $before with $after in $(task.t1_inputs...) and $(task.t2_inputs...)"
|
||||
|
||||
replace!(task.t1_inputs, before => after)
|
||||
replace!(task.t2_inputs, before => after)
|
||||
|
||||
# recursively descend down the tree, but only in the tasks where we're replacing things
|
||||
if replacedIn1 > 0
|
||||
replace_children!(task.first_task, before, after)
|
||||
end
|
||||
if replacedIn2 > 0
|
||||
replace_children!(task.second_task, before, after)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function replace_children!(task::AbstractTask, before, after)
|
||||
return nothing
|
||||
end
|
||||
|
||||
function update_child!(graph::DAG, n::Node, child_before::Symbol, child_after::Symbol; track = true)
|
||||
# only need to update fused compute tasks
|
||||
if !(typeof(n.task) <: FusedComputeTask)
|
||||
return nothing
|
||||
end
|
||||
|
||||
taskBefore = copy(n.task)
|
||||
|
||||
if !((child_before in n.task.t1_inputs) || (child_before in n.task.t2_inputs))
|
||||
println("------------------ Nothing to replace!! ------------------")
|
||||
child_ids = Vector{String}()
|
||||
for child in n.children
|
||||
push!(child_ids, "$(child.id)")
|
||||
end
|
||||
println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
|
||||
@assert false
|
||||
end
|
||||
|
||||
replace_children!(n.task, child_before, child_after)
|
||||
|
||||
if !((child_after in n.task.t1_inputs) || (child_after in n.task.t2_inputs))
|
||||
println("------------------ Did not replace anything!! ------------------")
|
||||
child_ids = Vector{String}()
|
||||
for child in n.children
|
||||
push!(child_ids, "$(child.id)")
|
||||
end
|
||||
println("From $(child_before) to $(child_after) in $n with children $(child_ids)")
|
||||
@assert false
|
||||
end
|
||||
|
||||
# keep track
|
||||
if (track)
|
||||
push!(graph.diff.updatedChildren, (n, taskBefore))
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
get_snapshot_diff(graph::DAG)
|
||||
|
||||
|
@ -62,9 +62,5 @@ function show(io::IO, graph::DAG)
|
||||
properties = get_properties(graph)
|
||||
println(io, " Total Compute Effort: ", properties.computeEffort)
|
||||
println(io, " Total Data Transfer: ", properties.data)
|
||||
return println(
|
||||
io,
|
||||
" Total Compute Intensity: ",
|
||||
properties.computeIntensity,
|
||||
)
|
||||
return println(io, " Total Compute Intensity: ", properties.computeIntensity)
|
||||
end
|
||||
|
@ -34,6 +34,7 @@ end
|
||||
Return a vector of the graph's entry nodes.
|
||||
"""
|
||||
function get_entry_nodes(graph::DAG)
|
||||
apply_all!(graph)
|
||||
result = Vector{Node}()
|
||||
for node in graph.nodes
|
||||
if (is_entry_node(node))
|
||||
|
@ -17,7 +17,7 @@ end
|
||||
|
||||
The representation of the graph as a set of [`Node`](@ref)s.
|
||||
|
||||
A DAG can be loaded using the appropriate parse function, e.g. [`parse_abc`](@ref).
|
||||
A DAG can be loaded using the appropriate parse_dag function, e.g. [`parse_dag`](@ref).
|
||||
|
||||
[`Operation`](@ref)s can be applied on it using [`push_operation!`](@ref) and reverted using [`pop_operation!`](@ref) like a stack.
|
||||
To get the set of possible operations, use [`get_operations`](@ref).
|
||||
@ -52,11 +52,7 @@ end
|
||||
Construct and return an empty [`PossibleOperations`](@ref) object.
|
||||
"""
|
||||
function PossibleOperations()
|
||||
return PossibleOperations(
|
||||
Set{NodeFusion}(),
|
||||
Set{NodeReduction}(),
|
||||
Set{NodeSplit}(),
|
||||
)
|
||||
return PossibleOperations(Set{NodeFusion}(), Set{NodeReduction}(), Set{NodeSplit}())
|
||||
end
|
||||
|
||||
"""
|
||||
|
@ -59,3 +59,19 @@ function is_valid(graph::DAG)
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
"""
|
||||
is_scheduled(graph::DAG)
|
||||
|
||||
Validate that the entire graph has been scheduled, i.e., every [`ComputeTaskNode`](@ref) has its `.device` set.
|
||||
"""
|
||||
function is_scheduled(graph::DAG)
|
||||
for node in graph.nodes
|
||||
if (node isa DataTaskNode)
|
||||
continue
|
||||
end
|
||||
@assert !ismissing(node.device)
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
@ -7,7 +7,7 @@ Return the particle and value as is.
|
||||
|
||||
0 FLOP.
|
||||
"""
|
||||
function compute(::ComputeTaskP, data::ParticleValue)
|
||||
function compute(::ComputeTaskP, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
|
||||
return data
|
||||
end
|
||||
|
||||
@ -18,7 +18,7 @@ Compute an outer edge. Return the particle value with the same particle and the
|
||||
|
||||
1 FLOP.
|
||||
"""
|
||||
function compute(::ComputeTaskU, data::ParticleValue)
|
||||
function compute(::ComputeTaskU, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
|
||||
return ParticleValue(data.p, data.v * outer_edge(data.p))
|
||||
end
|
||||
|
||||
@ -29,7 +29,11 @@ Compute a vertex. Preserve momentum and particle types (AB->C etc.) to create re
|
||||
|
||||
6 FLOP.
|
||||
"""
|
||||
function compute(::ComputeTaskV, data1::ParticleValue, data2::ParticleValue)
|
||||
function compute(
|
||||
::ComputeTaskV,
|
||||
data1::ParticleValue{P1},
|
||||
data2::ParticleValue{P2},
|
||||
)::ParticleValue where {P1 <: ABCParticle, P2 <: ABCParticle}
|
||||
p3 = preserve_momentum(data1.p, data2.p)
|
||||
dataOut = ParticleValue(p3, data1.v * vertex() * data2.v)
|
||||
return dataOut
|
||||
@ -44,8 +48,15 @@ For valid inputs, both input particles should have the same momenta at this poin
|
||||
|
||||
12 FLOP.
|
||||
"""
|
||||
function compute(::ComputeTaskS2, data1::ParticleValue, data2::ParticleValue)
|
||||
return data1.v * inner_edge(data1.p) * data2.v
|
||||
function compute(::ComputeTaskS2, data1::ParticleValue{P}, data2::ParticleValue{P})::Float64 where {P <: ABCParticle}
|
||||
#=
|
||||
@assert isapprox(abs(data1.p.momentum.E), abs(data2.p.momentum.E), rtol = 0.001, atol = sqrt(eps())) "E: $(data1.p.momentum.E) vs. $(data2.p.momentum.E)"
|
||||
@assert isapprox(data1.p.momentum.px, -data2.p.momentum.px, rtol = 0.001, atol = sqrt(eps())) "px: $(data1.p.momentum.px) vs. $(data2.p.momentum.px)"
|
||||
@assert isapprox(data1.p.momentum.py, -data2.p.momentum.py, rtol = 0.001, atol = sqrt(eps())) "py: $(data1.p.momentum.py) vs. $(data2.p.momentum.py)"
|
||||
@assert isapprox(data1.p.momentum.pz, -data2.p.momentum.pz, rtol = 0.001, atol = sqrt(eps())) "pz: $(data1.p.momentum.pz) vs. $(data2.p.momentum.pz)"
|
||||
=#
|
||||
inner = inner_edge(data1.p)
|
||||
return data1.v * inner * data2.v
|
||||
end
|
||||
|
||||
"""
|
||||
@ -55,7 +66,7 @@ Compute inner edge (1 input particle, 1 output particle).
|
||||
|
||||
11 FLOP.
|
||||
"""
|
||||
function compute(::ComputeTaskS1, data::ParticleValue)
|
||||
function compute(::ComputeTaskS1, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle}
|
||||
return ParticleValue(data.p, data.v * inner_edge(data.p))
|
||||
end
|
||||
|
||||
@ -66,191 +77,83 @@ Compute a sum over the vector. Use an algorithm that accounts for accumulated er
|
||||
|
||||
Linearly many FLOP with growing data.
|
||||
"""
|
||||
function compute(::ComputeTaskSum, data::Vector{Float64})
|
||||
function compute(::ComputeTaskSum, data::Vector{Float64})::Float64
|
||||
return sum_kbn(data)
|
||||
end
|
||||
|
||||
"""
|
||||
compute(t::FusedComputeTask, data)
|
||||
get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
|
||||
Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
"""
|
||||
function compute(t::FusedComputeTask, data)
|
||||
@assert false "This is not implemented and should never be called"
|
||||
function get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskP(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol)
|
||||
get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSymbol`, providing the output on `outSymbol`.
|
||||
Generate code evaluating [`ComputeTaskU`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol)
|
||||
return Meta.parse("$outSymbol = compute(ComputeTaskP(), $inSymbol)")
|
||||
function get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskU(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol)
|
||||
get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskU`](@ref) on `inSymbol`, providing the output on `outSymbol`.
|
||||
`inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
|
||||
Generate code evaluating [`ComputeTaskV`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSym[1]` and `inSym[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol)
|
||||
return Meta.parse("$outSymbol = compute(ComputeTaskU(), $inSymbol)")
|
||||
function get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1]), eval(inExprs[2])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskV(), $(in[1]), $(in[2]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskV, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol)
|
||||
get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskV`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`.
|
||||
`inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
|
||||
Generate code evaluating [`ComputeTaskS2`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms[1]` and `inSyms[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type `Float64`.
|
||||
"""
|
||||
function get_expression(
|
||||
::ComputeTaskV,
|
||||
inSymbol1::Symbol,
|
||||
inSymbol2::Symbol,
|
||||
outSymbol::Symbol,
|
||||
)
|
||||
return Meta.parse(
|
||||
"$outSymbol = compute(ComputeTaskV(), $inSymbol1, $inSymbol2)",
|
||||
)
|
||||
function get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1]), eval(inExprs[2])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskS2(), $(in[1]), $(in[2]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskS2, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol)
|
||||
get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskS2`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`.
|
||||
`inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type `Float64`.
|
||||
Generate code evaluating [`ComputeTaskS1`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(
|
||||
::ComputeTaskS2,
|
||||
inSymbol1::Symbol,
|
||||
inSymbol2::Symbol,
|
||||
outSymbol::Symbol,
|
||||
)
|
||||
return Meta.parse(
|
||||
"$outSymbol = compute(ComputeTaskS2(), $inSymbol1, $inSymbol2)",
|
||||
)
|
||||
function get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskS1(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol)
|
||||
get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskS1`](@ref) on `inSymbol`, providing the output on `outSymbol`.
|
||||
`inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref).
|
||||
Generate code evaluating [`ComputeTaskSum`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`].
|
||||
"""
|
||||
function get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol)
|
||||
return Meta.parse("$outSymbol = compute(ComputeTaskS1(), $inSymbol)")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskSum, inSymbols::Vector{Symbol}, outSymbol::Symbol)
|
||||
|
||||
Generate code evaluating [`ComputeTaskSum`](@ref) on `inSymbols`, providing the output on `outSymbol`.
|
||||
`inSymbols` should be of type [`Float64`], `outSymbol` will be of type [`Float64`].
|
||||
"""
|
||||
function get_expression(
|
||||
::ComputeTaskSum,
|
||||
inSymbols::Vector{Symbol},
|
||||
outSymbol::Symbol,
|
||||
)
|
||||
return quote
|
||||
$outSymbol = compute(ComputeTaskSum(), [$(inSymbols...)])
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(t::FusedComputeTask, inSymbols::Vector{Symbol}, outSymbol::Symbol)
|
||||
|
||||
Generate code evaluating a [`FusedComputeTask`](@ref) on `inSymbols`, providing the output on `outSymbol`.
|
||||
`inSymbols` should be of the correct types and may be heterogeneous. `outSymbol` will be of the type of the output of `T2` of t.
|
||||
"""
|
||||
function get_expression(
|
||||
t::FusedComputeTask,
|
||||
inSymbols::Vector{Symbol},
|
||||
outSymbol::Symbol,
|
||||
)
|
||||
(T1, T2) = get_types(t)
|
||||
c1 = children(T1())
|
||||
c2 = children(T2())
|
||||
|
||||
expr1 = nothing
|
||||
expr2 = nothing
|
||||
|
||||
# TODO need to figure out how to know which inputs belong to which subtask
|
||||
# since we order the vectors with the child nodes we can't just split
|
||||
if (c1 == 1)
|
||||
expr1 = get_expression(T1(), inSymbols[begin], :intermediate)
|
||||
elseif (c1 == 2)
|
||||
expr1 =
|
||||
get_expression(T1(), inSymbols[begin], inSymbols[2], :intermediate)
|
||||
else
|
||||
expr1 = get_expression(T1(), inSymbols[begin:c1], :intermediate)
|
||||
end
|
||||
|
||||
if (c2 == 1)
|
||||
expr2 = get_expression(T2(), :intermediate, outSymbol)
|
||||
elseif c2 == 2
|
||||
expr2 =
|
||||
get_expression(T2(), :intermediate, inSymbols[c1 + 1], outSymbol)
|
||||
else
|
||||
expr2 = get_expression(
|
||||
T2(),
|
||||
:intermediate * inSymbols[(c1 + 1):end],
|
||||
outSymbol,
|
||||
)
|
||||
end
|
||||
|
||||
return Expr(:block, expr1, expr2)
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(node::ComputeTaskNode)
|
||||
|
||||
Generate and return code for a given [`ComputeTaskNode`](@ref).
|
||||
"""
|
||||
function get_expression(node::ComputeTaskNode)
|
||||
t = typeof(node.task)
|
||||
@assert length(node.children) == children(node.task) || t <: ComputeTaskSum
|
||||
|
||||
if (t <: ComputeTaskU || t <: ComputeTaskP || t <: ComputeTaskS1) # single input
|
||||
symbolIn = Symbol("data_$(to_var_name(node.children[1].id))")
|
||||
symbolOut = Symbol("data_$(to_var_name(node.id))")
|
||||
return get_expression(t(), symbolIn, symbolOut)
|
||||
elseif (t <: ComputeTaskS2 || t <: ComputeTaskV) # double input
|
||||
symbolIn1 = Symbol("data_$(to_var_name(node.children[1].id))")
|
||||
symbolIn2 = Symbol("data_$(to_var_name(node.children[2].id))")
|
||||
symbolOut = Symbol("data_$(to_var_name(node.id))")
|
||||
return get_expression(t(), symbolIn1, symbolIn2, symbolOut)
|
||||
elseif (t <: ComputeTaskSum || t <: FusedComputeTask) # vector input
|
||||
inSymbols = Vector{Symbol}()
|
||||
for child in node.children
|
||||
push!(inSymbols, Symbol("data_$(to_var_name(child.id))"))
|
||||
end
|
||||
outSymbol = Symbol("data_$(to_var_name(node.id))")
|
||||
return get_expression(t(), inSymbols, outSymbol)
|
||||
else
|
||||
error("Unknown compute task")
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(node::DataTaskNode)
|
||||
|
||||
Generate and return code for a given [`DataTaskNode`](@ref).
|
||||
"""
|
||||
function get_expression(node::DataTaskNode)
|
||||
# TODO: do things to transport data from/to gpu, between numa nodes, etc.
|
||||
@assert length(node.children) <= 1
|
||||
|
||||
inSymbol = nothing
|
||||
if (length(node.children) == 1)
|
||||
inSymbol = Symbol("data_$(to_var_name(node.children[1].id))")
|
||||
else
|
||||
inSymbol = Symbol("data_$(to_var_name(node.id))_in")
|
||||
end
|
||||
outSymbol = Symbol("data_$(to_var_name(node.id))")
|
||||
|
||||
dataTransportExp = Meta.parse("$outSymbol = $inSymbol")
|
||||
|
||||
return dataTransportExp
|
||||
function get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = eval.(inExprs)
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskSum(), [$(unroll_symbol_vector(in))])")
|
||||
end
|
||||
|
@ -1,74 +1,198 @@
|
||||
using QEDbase
|
||||
using Random
|
||||
using Roots
|
||||
using ForwardDiff
|
||||
|
||||
ComputeTaskSum() = ComputeTaskSum(0)
|
||||
|
||||
"""
|
||||
Particle(rng)
|
||||
gen_process_input(processDescription::ABCProcessDescription)
|
||||
|
||||
Return a randomly generated particle.
|
||||
Return a ProcessInput of randomly generated [`ABCParticle`](@ref)s from a [`ABCProcessDescription`](@ref). The process description can be created manually or parsed from a string using [`parse_process`](@ref).
|
||||
|
||||
Note: This uses RAMBO to create a valid process with conservation of momentum and energy.
|
||||
"""
|
||||
function Particle(rng, type::ParticleType)
|
||||
function gen_process_input(processDescription::ABCProcessDescription)
|
||||
inParticleTypes = keys(processDescription.inParticles)
|
||||
outParticleTypes = keys(processDescription.outParticles)
|
||||
|
||||
p1 = rand(rng, Float64)
|
||||
p2 = rand(rng, Float64)
|
||||
p3 = rand(rng, Float64)
|
||||
m = mass(type)
|
||||
|
||||
# keep the momenta of the particles on-shell
|
||||
p4 = sqrt(p1^2 + p2^2 + p3^2 + m^2)
|
||||
|
||||
return Particle(p1, p2, p3, p4, type)
|
||||
end
|
||||
|
||||
"""
|
||||
gen_particles(n::Int)
|
||||
|
||||
Return a Vector of `n` randomly generated [`Particle`](@ref)s.
|
||||
|
||||
Note: This does not take into account the preservation of momenta required for an actual valid process!
|
||||
"""
|
||||
function gen_particles(ns::Dict{ParticleType, Int})
|
||||
particles = Dict{ParticleType, Vector{Particle}}()
|
||||
rng = MersenneTwister(0)
|
||||
|
||||
|
||||
if ns == Dict((A => 2), (B => 2))
|
||||
rho = 1.0
|
||||
|
||||
omega = rand(rng, Float64)
|
||||
theta = rand(rng, Float64) * π
|
||||
phi = rand(rng, Float64) * π
|
||||
|
||||
particles[A] = Vector{Particle}()
|
||||
particles[B] = Vector{Particle}()
|
||||
|
||||
push!(particles[A], Particle(omega, 0, 0, omega, A))
|
||||
push!(particles[B], Particle(omega, 0, 0, -omega, B))
|
||||
push!(
|
||||
particles[A],
|
||||
Particle(
|
||||
omega,
|
||||
rho * cos(theta) * cos(phi),
|
||||
rho * cos(theta) * sin(phi),
|
||||
rho * sin(theta),
|
||||
A,
|
||||
),
|
||||
)
|
||||
push!(
|
||||
particles[B],
|
||||
Particle(
|
||||
omega,
|
||||
-rho * cos(theta) * cos(phi),
|
||||
-rho * cos(theta) * sin(phi),
|
||||
-rho * sin(theta),
|
||||
B,
|
||||
),
|
||||
)
|
||||
return particles
|
||||
end
|
||||
|
||||
for (type, n) in ns
|
||||
particles[type] = Vector{Particle}()
|
||||
for i in 1:n
|
||||
push!(particles[type], Particle(rng, type))
|
||||
massSum = 0
|
||||
inputMasses = Vector{Float64}()
|
||||
for (particle, n) in processDescription.inParticles
|
||||
for _ in 1:n
|
||||
massSum += mass(particle)
|
||||
push!(inputMasses, mass(particle))
|
||||
end
|
||||
end
|
||||
return particles
|
||||
outputMasses = Vector{Float64}()
|
||||
for (particle, n) in processDescription.outParticles
|
||||
for _ in 1:n
|
||||
massSum += mass(particle)
|
||||
push!(outputMasses, mass(particle))
|
||||
end
|
||||
end
|
||||
|
||||
# add some extra random mass to allow for some momentum
|
||||
massSum += rand(rng[threadid()]) * (length(inputMasses) + length(outputMasses))
|
||||
|
||||
|
||||
inputParticles = Vector{ABCParticle}()
|
||||
initialMomenta = generate_initial_moms(massSum, inputMasses)
|
||||
index = 1
|
||||
for (particle, n) in processDescription.inParticles
|
||||
for _ in 1:n
|
||||
mom = initialMomenta[index]
|
||||
push!(inputParticles, particle(mom))
|
||||
index += 1
|
||||
end
|
||||
end
|
||||
|
||||
outputParticles = Vector{ABCParticle}()
|
||||
final_momenta = generate_physical_massive_moms(rng[threadid()], massSum, outputMasses)
|
||||
index = 1
|
||||
for (particle, n) in processDescription.outParticles
|
||||
for _ in 1:n
|
||||
mom = final_momenta[index]
|
||||
push!(outputParticles, particle(SFourMomentum(-mom.E, mom.px, mom.py, mom.pz)))
|
||||
index += 1
|
||||
end
|
||||
end
|
||||
|
||||
processInput = ABCProcessInput(processDescription, inputParticles, outputParticles)
|
||||
|
||||
return return processInput
|
||||
end
|
||||
|
||||
####################
|
||||
# CODE FROM HERE BORROWED FROM SOURCE: https://codebase.helmholtz.cloud/qedsandbox/QEDphasespaces.jl/
|
||||
# use qedphasespaces directly once released
|
||||
#
|
||||
# quick and dirty implementation of the RAMBO algorithm
|
||||
#
|
||||
# reference:
|
||||
# * https://cds.cern.ch/record/164736/files/198601282.pdf
|
||||
# * https://www.sciencedirect.com/science/article/pii/0010465586901190
|
||||
####################
|
||||
|
||||
function generate_initial_moms(ss, masses)
|
||||
E1 = (ss^2 + masses[1]^2 - masses[2]^2) / (2 * ss)
|
||||
E2 = (ss^2 + masses[2]^2 - masses[1]^2) / (2 * ss)
|
||||
|
||||
rho1 = sqrt(E1^2 - masses[1]^2)
|
||||
rho2 = sqrt(E2^2 - masses[2]^2)
|
||||
|
||||
return [SFourMomentum(E1, 0, 0, rho1), SFourMomentum(E2, 0, 0, -rho2)]
|
||||
end
|
||||
|
||||
|
||||
Random.rand(rng::AbstractRNG, ::Random.SamplerType{SFourMomentum}) = SFourMomentum(rand(rng, 4))
|
||||
Random.rand(rng::AbstractRNG, ::Random.SamplerType{NTuple{N, Float64}}) where {N} = Tuple(rand(rng, N))
|
||||
|
||||
|
||||
function _transform_uni_to_mom(u1, u2, u3, u4)
|
||||
cth = 2 * u1 - 1
|
||||
sth = sqrt(1 - cth^2)
|
||||
phi = 2 * pi * u2
|
||||
q0 = -log(u3 * u4)
|
||||
qx = q0 * sth * cos(phi)
|
||||
qy = q0 * sth * sin(phi)
|
||||
qz = q0 * cth
|
||||
|
||||
return SFourMomentum(q0, qx, qy, qz)
|
||||
end
|
||||
|
||||
function _transform_uni_to_mom!(uni_mom, dest)
|
||||
u1, u2, u3, u4 = Tuple(uni_mom)
|
||||
cth = 2 * u1 - 1
|
||||
sth = sqrt(1 - cth^2)
|
||||
phi = 2 * pi * u2
|
||||
q0 = -log(u3 * u4)
|
||||
qx = q0 * sth * cos(phi)
|
||||
qy = q0 * sth * sin(phi)
|
||||
qz = q0 * cth
|
||||
|
||||
return dest = SFourMomentum(q0, qx, qy, qz)
|
||||
end
|
||||
|
||||
_transform_uni_to_mom(u1234::Tuple) = _transform_uni_to_mom(u1234...)
|
||||
_transform_uni_to_mom(u1234::SFourMomentum) = _transform_uni_to_mom(Tuple(u1234))
|
||||
|
||||
function generate_massless_moms(rng, n::Int)
|
||||
a = Vector{SFourMomentum}(undef, n)
|
||||
rand!(rng, a)
|
||||
return map(_transform_uni_to_mom, a)
|
||||
end
|
||||
|
||||
function generate_physical_massless_moms(rng, ss, n)
|
||||
r_moms = generate_massless_moms(rng, n)
|
||||
Q = sum(r_moms)
|
||||
M = sqrt(Q * Q)
|
||||
fac = -1 / M
|
||||
Qx = getX(Q)
|
||||
Qy = getY(Q)
|
||||
Qz = getZ(Q)
|
||||
bx = fac * Qx
|
||||
by = fac * Qy
|
||||
bz = fac * Qz
|
||||
gamma = getT(Q) / M
|
||||
a = 1 / (1 + gamma)
|
||||
x = ss / M
|
||||
|
||||
i = 1
|
||||
while i <= n
|
||||
mom = r_moms[i]
|
||||
mom0 = getT(mom)
|
||||
mom1 = getX(mom)
|
||||
mom2 = getY(mom)
|
||||
mom3 = getZ(mom)
|
||||
|
||||
bq = bx * mom1 + by * mom2 + bz * mom3
|
||||
|
||||
p0 = x * (gamma * mom0 + bq)
|
||||
px = x * (mom1 + bx * mom0 + a * bq * bx)
|
||||
py = x * (mom2 + by * mom0 + a * bq * by)
|
||||
pz = x * (mom3 + bz * mom0 + a * bq * bz)
|
||||
|
||||
r_moms[i] = SFourMomentum(p0, px, py, pz)
|
||||
i += 1
|
||||
end
|
||||
return r_moms
|
||||
end
|
||||
|
||||
function _to_be_solved(xi, masses, p0s, ss)
|
||||
sum = 0.0
|
||||
for (i, E) in enumerate(p0s)
|
||||
sum += sqrt(masses[i]^2 + xi^2 * E^2)
|
||||
end
|
||||
return sum - ss
|
||||
end
|
||||
|
||||
function _build_massive_momenta(xi, masses, massless_moms)
|
||||
vec = SFourMomentum[]
|
||||
i = 1
|
||||
while i <= length(massless_moms)
|
||||
massless_mom = massless_moms[i]
|
||||
k0 = sqrt(getT(massless_mom)^2 * xi^2 + masses[i]^2)
|
||||
|
||||
kx = xi * getX(massless_mom)
|
||||
ky = xi * getY(massless_mom)
|
||||
kz = xi * getZ(massless_mom)
|
||||
|
||||
push!(vec, SFourMomentum(k0, kx, ky, kz))
|
||||
|
||||
i += 1
|
||||
end
|
||||
return vec
|
||||
end
|
||||
|
||||
first_derivative(func) = x -> ForwardDiff.derivative(func, float(x))
|
||||
|
||||
|
||||
function generate_physical_massive_moms(rng, ss, masses; x0 = 0.1)
|
||||
n = length(masses)
|
||||
massless_moms = generate_physical_massless_moms(rng, ss, n)
|
||||
energies = getT.(massless_moms)
|
||||
f = x -> _to_be_solved(x, masses, energies, ss)
|
||||
xi = find_zero((f, first_derivative(f)), x0, Roots.Newton())
|
||||
return _build_massive_momenta(xi, masses, massless_moms)
|
||||
end
|
||||
|
@ -32,13 +32,13 @@ function parse_edges(input::AbstractString)
|
||||
end
|
||||
|
||||
"""
|
||||
parse_abc(filename::String; verbose::Bool = false)
|
||||
parse_dag(filename::String, model::ABCModel; verbose::Bool = false)
|
||||
|
||||
Read an abc-model process from the given file. If `verbose` is set to true, print some progress information to stdout.
|
||||
|
||||
Returns a valid [`DAG`](@ref).
|
||||
"""
|
||||
function parse_abc(filename::String, verbose::Bool = false)
|
||||
function parse_dag(filename::AbstractString, model::ABCModel, verbose::Bool = false)
|
||||
file = open(filename, "r")
|
||||
|
||||
if (verbose)
|
||||
@ -63,10 +63,9 @@ function parse_abc(filename::String, verbose::Bool = false)
|
||||
end
|
||||
sizehint!(graph.nodes, estimate_no_nodes)
|
||||
|
||||
sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false, false)
|
||||
global_data_out =
|
||||
insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), false, false)
|
||||
insert_edge!(graph, sum_node, global_data_out, false, false)
|
||||
sum_node = insert_node!(graph, make_node(ComputeTaskSum(0)), track = false, invalidate_cache = false)
|
||||
global_data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, sum_node, global_data_out, track = false, invalidate_cache = false)
|
||||
|
||||
# remember the data out nodes for connection
|
||||
dataOutNodes = Dict()
|
||||
@ -81,10 +80,7 @@ function parse_abc(filename::String, verbose::Bool = false)
|
||||
noNodes += 1
|
||||
if (noNodes % 100 == 0)
|
||||
if (verbose)
|
||||
percent = string(
|
||||
round(100.0 * noNodes / nodesToRead, digits = 2),
|
||||
"%",
|
||||
)
|
||||
percent = string(round(100.0 * noNodes / nodesToRead, digits = 2), "%")
|
||||
print("\rReading Nodes... $percent")
|
||||
end
|
||||
end
|
||||
@ -93,30 +89,20 @@ function parse_abc(filename::String, verbose::Bool = false)
|
||||
data_in = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE), string(node)),
|
||||
false,
|
||||
false,
|
||||
track = false,
|
||||
invalidate_cache = false,
|
||||
) # read particle data node
|
||||
compute_P =
|
||||
insert_node!(graph, make_node(ComputeTaskP()), false, false) # compute P node
|
||||
data_Pu = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE)),
|
||||
false,
|
||||
false,
|
||||
) # transfer data from P to u (one ParticleValue object)
|
||||
compute_u =
|
||||
insert_node!(graph, make_node(ComputeTaskU()), false, false) # compute U node
|
||||
data_out = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE)),
|
||||
false,
|
||||
false,
|
||||
) # transfer data out from u (one ParticleValue object)
|
||||
compute_P = insert_node!(graph, make_node(ComputeTaskP()), track = false, invalidate_cache = false) # compute P node
|
||||
data_Pu =
|
||||
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data from P to u (one ParticleValue object)
|
||||
compute_u = insert_node!(graph, make_node(ComputeTaskU()), track = false, invalidate_cache = false) # compute U node
|
||||
data_out =
|
||||
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data out from u (one ParticleValue object)
|
||||
|
||||
insert_edge!(graph, data_in, compute_P, false, false)
|
||||
insert_edge!(graph, compute_P, data_Pu, false, false)
|
||||
insert_edge!(graph, data_Pu, compute_u, false, false)
|
||||
insert_edge!(graph, compute_u, data_out, false, false)
|
||||
insert_edge!(graph, data_in, compute_P, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, compute_P, data_Pu, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, data_Pu, compute_u, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, compute_u, data_out, track = false, invalidate_cache = false)
|
||||
|
||||
# remember the data_out node for future edges
|
||||
dataOutNodes[node] = data_out
|
||||
@ -126,63 +112,48 @@ function parse_abc(filename::String, verbose::Bool = false)
|
||||
in1 = capt.captures[1]
|
||||
in2 = capt.captures[2]
|
||||
|
||||
compute_v =
|
||||
insert_node!(graph, make_node(ComputeTaskV()), false, false)
|
||||
data_out = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE)),
|
||||
false,
|
||||
false,
|
||||
)
|
||||
compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
|
||||
data_out =
|
||||
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
|
||||
|
||||
if (occursin(regex_c, in1))
|
||||
# put an S node after this input
|
||||
compute_S = insert_node!(
|
||||
graph,
|
||||
make_node(ComputeTaskS1()),
|
||||
false,
|
||||
false,
|
||||
)
|
||||
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
|
||||
data_S_v = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE)),
|
||||
false,
|
||||
false,
|
||||
track = false,
|
||||
invalidate_cache = false,
|
||||
)
|
||||
|
||||
insert_edge!(graph, dataOutNodes[in1], compute_S, false, false)
|
||||
insert_edge!(graph, compute_S, data_S_v, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in1], compute_S, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
|
||||
|
||||
insert_edge!(graph, data_S_v, compute_v, false, false)
|
||||
insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
|
||||
else
|
||||
insert_edge!(graph, dataOutNodes[in1], compute_v, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in1], compute_v, track = false, invalidate_cache = false)
|
||||
end
|
||||
|
||||
if (occursin(regex_c, in2))
|
||||
# i think the current generator only puts the combined particles in the first space, so this case might never be entered
|
||||
# put an S node after this input
|
||||
compute_S = insert_node!(
|
||||
graph,
|
||||
make_node(ComputeTaskS1()),
|
||||
false,
|
||||
false,
|
||||
)
|
||||
compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false)
|
||||
data_S_v = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE)),
|
||||
false,
|
||||
false,
|
||||
track = false,
|
||||
invalidate_cache = false,
|
||||
)
|
||||
|
||||
insert_edge!(graph, dataOutNodes[in2], compute_S, false, false)
|
||||
insert_edge!(graph, compute_S, data_S_v, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in2], compute_S, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false)
|
||||
|
||||
insert_edge!(graph, data_S_v, compute_v, false, false)
|
||||
insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false)
|
||||
else
|
||||
insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
|
||||
end
|
||||
|
||||
insert_edge!(graph, compute_v, data_out, false, false)
|
||||
insert_edge!(graph, compute_v, data_out, track = false, invalidate_cache = false)
|
||||
dataOutNodes[node] = data_out
|
||||
|
||||
elseif occursin(regex_m, node)
|
||||
@ -193,43 +164,31 @@ function parse_abc(filename::String, verbose::Bool = false)
|
||||
in3 = capt.captures[3]
|
||||
|
||||
# in2 + in3 with a v
|
||||
compute_v =
|
||||
insert_node!(graph, make_node(ComputeTaskV()), false, false)
|
||||
data_v = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(PARTICLE_VALUE_SIZE)),
|
||||
false,
|
||||
false,
|
||||
)
|
||||
compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false)
|
||||
data_v =
|
||||
insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false)
|
||||
|
||||
insert_edge!(graph, dataOutNodes[in2], compute_v, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in3], compute_v, false, false)
|
||||
insert_edge!(graph, compute_v, data_v, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, dataOutNodes[in3], compute_v, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, compute_v, data_v, track = false, invalidate_cache = false)
|
||||
|
||||
# combine with the v of the combined other input
|
||||
compute_S2 =
|
||||
insert_node!(graph, make_node(ComputeTaskS2()), false, false)
|
||||
data_out = insert_node!(
|
||||
graph,
|
||||
make_node(DataTask(FLOAT_SIZE)),
|
||||
false,
|
||||
false,
|
||||
) # output of a S2 task is only a float
|
||||
compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), track = false, invalidate_cache = false)
|
||||
data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false) # output of a S2 task is only a float
|
||||
|
||||
insert_edge!(graph, data_v, compute_S2, false, false)
|
||||
insert_edge!(graph, dataOutNodes[in1], compute_S2, false, false)
|
||||
insert_edge!(graph, compute_S2, data_out, false, false)
|
||||
insert_edge!(graph, data_v, compute_S2, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, dataOutNodes[in1], compute_S2, track = false, invalidate_cache = false)
|
||||
insert_edge!(graph, compute_S2, data_out, track = false, invalidate_cache = false)
|
||||
|
||||
insert_edge!(graph, data_out, sum_node, false, false)
|
||||
insert_edge!(graph, data_out, sum_node, track = false, invalidate_cache = false)
|
||||
add_child!(sum_node.task)
|
||||
elseif occursin(regex_plus, node)
|
||||
if (verbose)
|
||||
println("\rReading Nodes Complete ")
|
||||
println("Added ", length(graph.nodes), " nodes")
|
||||
end
|
||||
else
|
||||
@assert false (
|
||||
"Unknown node '$node' while reading from file $filename"
|
||||
)
|
||||
@assert false ("Unknown node '$node' while reading from file $filename")
|
||||
end
|
||||
end
|
||||
|
||||
@ -244,6 +203,46 @@ function parse_abc(filename::String, verbose::Bool = false)
|
||||
if (verbose)
|
||||
println("Done")
|
||||
end
|
||||
|
||||
# don't actually need to read the edges
|
||||
return graph
|
||||
end
|
||||
|
||||
"""
|
||||
parse_process(string::AbstractString, model::ABCModel)
|
||||
|
||||
Parse a string representation of a process, such as "AB->ABBB" into the corresponding [`ABCProcessDescription`](@ref).
|
||||
"""
|
||||
function parse_process(str::AbstractString, model::ABCModel)
|
||||
inParticles = Dict{Type, Int}()
|
||||
outParticles = Dict{Type, Int}()
|
||||
|
||||
if !(contains(str, "->"))
|
||||
throw("Did not find -> while parsing process \"$str\"")
|
||||
end
|
||||
|
||||
(inStr, outStr) = split(str, "->")
|
||||
|
||||
if (isempty(inStr) || isempty(outStr))
|
||||
throw("Process (\"$str\") input or output part is empty!")
|
||||
end
|
||||
|
||||
for t in types(model)
|
||||
inCount = count(x -> x == String(t)[1], inStr)
|
||||
outCount = count(x -> x == String(t)[1], outStr)
|
||||
if inCount != 0
|
||||
inParticles[t] = inCount
|
||||
end
|
||||
if outCount != 0
|
||||
outParticles[t] = outCount
|
||||
end
|
||||
end
|
||||
|
||||
if length(inStr) != sum(values(inParticles))
|
||||
throw("Encountered unknown characters in the input part of process \"$str\"")
|
||||
elseif length(outStr) != sum(values(outParticles))
|
||||
throw("Encountered unknown characters in the output part of process \"$str\"")
|
||||
end
|
||||
|
||||
return ABCProcessDescription(inParticles, outParticles)
|
||||
end
|
||||
|
@ -1,99 +1,142 @@
|
||||
"""
|
||||
ParticleType
|
||||
using QEDbase
|
||||
|
||||
import QEDbase.mass
|
||||
|
||||
A Particle Type in the ABC Model as an enum, with types `A`, `B` and `C`.
|
||||
"""
|
||||
@enum ParticleType A = 1 B = 2 C = 3
|
||||
ABCModel <: AbstractPhysicsModel
|
||||
|
||||
Singleton definition for identification of the ABC-Model.
|
||||
"""
|
||||
struct ABCModel <: AbstractPhysicsModel end
|
||||
|
||||
"""
|
||||
ABCParticle
|
||||
|
||||
Base type for all particles in the [`ABCModel`](@ref).
|
||||
"""
|
||||
abstract type ABCParticle <: AbstractParticle end
|
||||
|
||||
"""
|
||||
ParticleA <: ABCParticle
|
||||
|
||||
An 'A' particle in the ABC Model.
|
||||
"""
|
||||
struct ParticleA <: ABCParticle
|
||||
momentum::SFourMomentum
|
||||
end
|
||||
|
||||
"""
|
||||
ParticleB <: ABCParticle
|
||||
|
||||
A 'B' particle in the ABC Model.
|
||||
"""
|
||||
struct ParticleB <: ABCParticle
|
||||
momentum::SFourMomentum
|
||||
end
|
||||
|
||||
"""
|
||||
ParticleC <: ABCParticle
|
||||
|
||||
A 'C' particle in the ABC Model.
|
||||
"""
|
||||
struct ParticleC <: ABCParticle
|
||||
momentum::SFourMomentum
|
||||
end
|
||||
|
||||
"""
|
||||
ABCProcessDescription <: AbstractProcessDescription
|
||||
|
||||
A description of a process in the ABC-Model. Contains the input and output particles.
|
||||
|
||||
See also: [`in_particles`](@ref), [`out_particles`](@ref), [`parse_process`](@ref)
|
||||
"""
|
||||
struct ABCProcessDescription <: AbstractProcessDescription
|
||||
inParticles::Dict{Type, Int}
|
||||
outParticles::Dict{Type, Int}
|
||||
end
|
||||
|
||||
"""
|
||||
ABCProcessInput <: AbstractProcessInput
|
||||
|
||||
Input for a ABC Process. Contains the [`ABCProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles.
|
||||
|
||||
See also: [`gen_process_input`](@ref)
|
||||
"""
|
||||
struct ABCProcessInput <: AbstractProcessInput
|
||||
process::ABCProcessDescription
|
||||
inParticles::Vector{ABCParticle}
|
||||
outParticles::Vector{ABCParticle}
|
||||
end
|
||||
|
||||
"""
|
||||
PARTICLE_MASSES
|
||||
|
||||
A constant dictionary containing the masses of the different [`ParticleType`](@ref)s.
|
||||
A constant dictionary containing the masses of the different [`ABCParticle`](@ref)s.
|
||||
"""
|
||||
const PARTICLE_MASSES =
|
||||
Dict{ParticleType, Float64}(A => 1.0, B => 1.0, C => 0.0)
|
||||
const PARTICLE_MASSES = Dict{Type, Float64}(ParticleA => 1.0, ParticleB => 1.0, ParticleC => 0.0)
|
||||
|
||||
"""
|
||||
Particle
|
||||
|
||||
A struct describing a particle of the ABC-Model. It has the 4 momentum parts P0...P3 and a [`ParticleType`](@ref).
|
||||
|
||||
`sizeof(Particle())` = 40 Byte
|
||||
"""
|
||||
struct Particle
|
||||
P0::Float64
|
||||
P1::Float64
|
||||
P2::Float64
|
||||
P3::Float64
|
||||
|
||||
type::ParticleType
|
||||
end
|
||||
|
||||
"""
|
||||
ParticleValue
|
||||
|
||||
A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
|
||||
|
||||
`sizeof(ParticleValue())` = 48 Byte
|
||||
"""
|
||||
struct ParticleValue
|
||||
p::Particle
|
||||
v::Float64
|
||||
end
|
||||
|
||||
"""
|
||||
mass(t::ParticleType)
|
||||
mass(t::Type{T}) where {T <: ABCParticle}
|
||||
|
||||
Return the mass (at rest) of the given particle type.
|
||||
"""
|
||||
mass(t::ParticleType) = PARTICLE_MASSES[t]
|
||||
mass(t::Type{T}) where {T <: ABCParticle} = PARTICLE_MASSES[t]
|
||||
|
||||
"""
|
||||
remaining_type(t1::ParticleType, t2::ParticleType)
|
||||
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
|
||||
|
||||
For 2 given (non-equal) particle types, return the third of ABC.
|
||||
"""
|
||||
function remaining_type(t1::ParticleType, t2::ParticleType)
|
||||
function interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
|
||||
@assert t1 != t2
|
||||
if t1 != A && t2 != A
|
||||
return A
|
||||
elseif t1 != B && t2 != B
|
||||
return B
|
||||
if t1 != ParticleA && t2 != ParticleA
|
||||
return ParticleA
|
||||
elseif t1 != ParticleB && t2 != ParticleB
|
||||
return ParticleB
|
||||
else
|
||||
return C
|
||||
return ParticleC
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
square(p::Particle)
|
||||
types(::ABCModel)
|
||||
|
||||
Return a Vector of the possible types of particle in the [`ABCModel`](@ref).
|
||||
"""
|
||||
function types(::ABCModel)
|
||||
return [ParticleA, ParticleB, ParticleC]
|
||||
end
|
||||
|
||||
"""
|
||||
square(p::ABCParticle)
|
||||
|
||||
Return the square of the particle's momentum as a `Float` value.
|
||||
|
||||
Takes 7 effective FLOP.
|
||||
"""
|
||||
function square(p::Particle)
|
||||
return p.P0 * p.P0 - p.P1 * p.P1 - p.P2 * p.P2 - p.P3 * p.P3
|
||||
function square(p::ABCParticle)
|
||||
return getMass2(p.momentum)
|
||||
end
|
||||
|
||||
"""
|
||||
inner_edge(p::Particle)
|
||||
inner_edge(p::ABCParticle)
|
||||
|
||||
Return the factor of the inner edge with the given (virtual) particle.
|
||||
|
||||
Takes 10 effective FLOP. (3 here + 10 in square(p))
|
||||
Takes 10 effective FLOP. (3 here + 7 in square(p))
|
||||
"""
|
||||
function inner_edge(p::Particle)
|
||||
return 1.0 / (square(p) - mass(p.type) * mass(p.type))
|
||||
function inner_edge(p::ABCParticle)
|
||||
return 1.0 / (square(p) - mass(typeof(p)) * mass(typeof(p)))
|
||||
end
|
||||
|
||||
"""
|
||||
outer_edge(p::Particle)
|
||||
outer_edge(p::ABCParticle)
|
||||
|
||||
Return the factor of the outer edge with the given (real) particle.
|
||||
|
||||
Takes 0 effective FLOP.
|
||||
"""
|
||||
function outer_edge(p::Particle)
|
||||
function outer_edge(p::ABCParticle)
|
||||
return 1.0
|
||||
end
|
||||
|
||||
@ -111,20 +154,57 @@ function vertex()
|
||||
end
|
||||
|
||||
"""
|
||||
preserve_momentum(p1::Particle, p2::Particle)
|
||||
preserve_momentum(p1::ABCParticle, p2::ABCParticle)
|
||||
|
||||
Calculate and return a new particle from two given interacting ones at a vertex.
|
||||
|
||||
Takes 4 effective FLOP.
|
||||
"""
|
||||
function preserve_momentum(p1::Particle, p2::Particle)
|
||||
p3 = Particle(
|
||||
p1.P0 + p2.P0,
|
||||
p1.P1 + p2.P1,
|
||||
p1.P2 + p2.P2,
|
||||
p1.P3 + p2.P3,
|
||||
remaining_type(p1.type, p2.type),
|
||||
)
|
||||
|
||||
function preserve_momentum(p1::ABCParticle, p2::ABCParticle)
|
||||
t3 = interaction_result(typeof(p1), typeof(p2))
|
||||
p3 = t3(p1.momentum + p2.momentum)
|
||||
return p3
|
||||
end
|
||||
|
||||
"""
|
||||
type_from_name(name::String)
|
||||
|
||||
For a name of a particle, return the particle's [`Type`].
|
||||
"""
|
||||
function type_from_name(name::String)
|
||||
if startswith(name, "A")
|
||||
return ParticleA
|
||||
elseif startswith(name, "B")
|
||||
return ParticleB
|
||||
elseif startswith(name, "C")
|
||||
return ParticleC
|
||||
else
|
||||
throw("Invalid name for a particle in the ABC model")
|
||||
end
|
||||
end
|
||||
|
||||
function String(::Type{ParticleA})
|
||||
return "A"
|
||||
end
|
||||
function String(::Type{ParticleB})
|
||||
return "B"
|
||||
end
|
||||
function String(::Type{ParticleC})
|
||||
return "C"
|
||||
end
|
||||
|
||||
function in_particles(process::ABCProcessDescription)
|
||||
return process.inParticles
|
||||
end
|
||||
|
||||
function in_particles(input::ABCProcessInput)
|
||||
return input.inParticles
|
||||
end
|
||||
|
||||
function out_particles(process::ABCProcessDescription)
|
||||
return process.outParticles
|
||||
end
|
||||
|
||||
function out_particles(input::ABCProcessInput)
|
||||
return input.outParticles
|
||||
end
|
||||
|
58
src/models/abc/print.jl
Normal file
58
src/models/abc/print.jl
Normal file
@ -0,0 +1,58 @@
|
||||
|
||||
"""
|
||||
show(io::IO, process::ABCProcessDescription)
|
||||
|
||||
Pretty print an [`ABCProcessDescription`](@ref) (no newlines).
|
||||
|
||||
```jldoctest
|
||||
julia> using MetagraphOptimization
|
||||
|
||||
julia> print(parse_process("AB->ABBB", ABCModel()))
|
||||
ABC Process: 'AB->ABBB'
|
||||
```
|
||||
"""
|
||||
function show(io::IO, process::ABCProcessDescription)
|
||||
# types() gives the types in order (ABC) instead of random like keys() would
|
||||
print(io, "ABC Process: \'")
|
||||
for type in types(ABCModel())
|
||||
for _ in 1:get(process.inParticles, type, 0)
|
||||
print(io, String(type))
|
||||
end
|
||||
end
|
||||
print(io, "->")
|
||||
for type in types(ABCModel())
|
||||
for _ in 1:get(process.outParticles, type, 0)
|
||||
print(io, String(type))
|
||||
end
|
||||
end
|
||||
print(io, "'")
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
show(io::IO, processInput::ABCProcessInput)
|
||||
|
||||
Pretty print an [`ABCProcessInput`](@ref) (with newlines).
|
||||
"""
|
||||
function show(io::IO, processInput::ABCProcessInput)
|
||||
println(io, "Input for $(processInput.process):")
|
||||
println(io, " $(length(processInput.inParticles)) Incoming particles:")
|
||||
for particle in processInput.inParticles
|
||||
println(io, " $particle")
|
||||
end
|
||||
println(io, " $(length(processInput.outParticles)) Outgoing Particles:")
|
||||
for particle in processInput.outParticles
|
||||
println(io, " $particle")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
show(io::IO, particle::T) where {T <: ABCParticle}
|
||||
|
||||
Pretty print an [`ABCParticle`](@ref) (no newlines).
|
||||
"""
|
||||
function show(io::IO, particle::T) where {T <: ABCParticle}
|
||||
print(io, "$(String(typeof(particle))): $(particle.momentum)")
|
||||
return nothing
|
||||
end
|
@ -3,35 +3,35 @@
|
||||
|
||||
Return the compute effort of an S1 task.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskS1) = 11
|
||||
compute_effort(t::ComputeTaskS1) = 11.0
|
||||
|
||||
"""
|
||||
compute_effort(t::ComputeTaskS2)
|
||||
|
||||
Return the compute effort of an S2 task.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskS2) = 12
|
||||
compute_effort(t::ComputeTaskS2) = 12.0
|
||||
|
||||
"""
|
||||
compute_effort(t::ComputeTaskU)
|
||||
|
||||
Return the compute effort of a U task.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskU) = 1
|
||||
compute_effort(t::ComputeTaskU) = 1.0
|
||||
|
||||
"""
|
||||
compute_effort(t::ComputeTaskV)
|
||||
|
||||
Return the compute effort of a V task.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskV) = 6
|
||||
compute_effort(t::ComputeTaskV) = 6.0
|
||||
|
||||
"""
|
||||
compute_effort(t::ComputeTaskP)
|
||||
|
||||
Return the compute effort of a P task.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskP) = 0
|
||||
compute_effort(t::ComputeTaskP) = 0.0
|
||||
|
||||
"""
|
||||
compute_effort(t::ComputeTaskSum)
|
||||
@ -41,7 +41,7 @@ Return the compute effort of a Sum task.
|
||||
Note: This is a constant compute effort, even though sum scales with the number of its inputs. Since there is only ever a single sum node in a graph generated from the ABC-Model,
|
||||
this doesn't matter.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskSum) = 1
|
||||
compute_effort(t::ComputeTaskSum) = 1.0
|
||||
|
||||
"""
|
||||
show(io::IO, t::DataTask)
|
||||
@ -57,42 +57,42 @@ end
|
||||
|
||||
Print the S1 task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskS1) = print("ComputeS1")
|
||||
show(io::IO, t::ComputeTaskS1) = print(io, "ComputeS1")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskS2)
|
||||
|
||||
Print the S2 task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskS2) = print("ComputeS2")
|
||||
show(io::IO, t::ComputeTaskS2) = print(io, "ComputeS2")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskP)
|
||||
|
||||
Print the P task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskP) = print("ComputeP")
|
||||
show(io::IO, t::ComputeTaskP) = print(io, "ComputeP")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskU)
|
||||
|
||||
Print the U task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskU) = print("ComputeU")
|
||||
show(io::IO, t::ComputeTaskU) = print(io, "ComputeU")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskV)
|
||||
|
||||
Print the V task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskV) = print("ComputeV")
|
||||
show(io::IO, t::ComputeTaskV) = print(io, "ComputeV")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskSum)
|
||||
|
||||
Print the sum task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskSum) = print("ComputeSum")
|
||||
show(io::IO, t::ComputeTaskSum) = print(io, "ComputeSum")
|
||||
|
||||
"""
|
||||
copy(t::DataTask)
|
||||
@ -147,19 +147,20 @@ children(::ComputeTaskV) = 2
|
||||
"""
|
||||
children(::ComputeTaskSum)
|
||||
|
||||
Return the number of children of a ComputeTaskSum, since this is variable and the task doesn't know
|
||||
how many children it will sum over, return a wildcard -1.
|
||||
|
||||
TODO: this is kind of bad because it means we can't fuse with a sum task
|
||||
Return the number of children of a ComputeTaskSum.
|
||||
"""
|
||||
children(::ComputeTaskSum) = -1
|
||||
children(t::ComputeTaskSum) = t.children_number
|
||||
|
||||
"""
|
||||
children(t::FusedComputeTask)
|
||||
|
||||
Return the number of children of a FusedComputeTask. It's the sum of the children of both tasks minus one.
|
||||
Return the number of children of a FusedComputeTask.
|
||||
"""
|
||||
function children(t::FusedComputeTask)
|
||||
(T1, T2) = get_types(t)
|
||||
return children(T1()) + children(T2()) - 1 # one of the inputs is the output of T1 and thus not a child of the node
|
||||
return length(union(Set(t.t1_inputs), Set(t.t2_inputs)))
|
||||
end
|
||||
|
||||
function add_child!(t::ComputeTaskSum)
|
||||
t.children_number += 1
|
||||
return nothing
|
||||
end
|
||||
|
@ -4,7 +4,7 @@
|
||||
Task representing a specific data transfer in the ABC Model.
|
||||
"""
|
||||
struct DataTask <: AbstractDataTask
|
||||
data::UInt64
|
||||
data::Float64
|
||||
end
|
||||
|
||||
"""
|
||||
@ -47,19 +47,13 @@ struct ComputeTaskU <: AbstractComputeTask end
|
||||
|
||||
Task that sums all its inputs, n children.
|
||||
"""
|
||||
struct ComputeTaskSum <: AbstractComputeTask end
|
||||
mutable struct ComputeTaskSum <: AbstractComputeTask
|
||||
children_number::Int
|
||||
end
|
||||
|
||||
"""
|
||||
ABC_TASKS
|
||||
|
||||
Constant vector of all tasks of the ABC-Model.
|
||||
"""
|
||||
ABC_TASKS = [
|
||||
DataTask,
|
||||
ComputeTaskS1,
|
||||
ComputeTaskS2,
|
||||
ComputeTaskP,
|
||||
ComputeTaskV,
|
||||
ComputeTaskU,
|
||||
ComputeTaskSum,
|
||||
]
|
||||
ABC_TASKS = [DataTask, ComputeTaskS1, ComputeTaskS2, ComputeTaskP, ComputeTaskV, ComputeTaskU, ComputeTaskSum]
|
||||
|
109
src/models/interface.jl
Normal file
109
src/models/interface.jl
Normal file
@ -0,0 +1,109 @@
|
||||
|
||||
"""
|
||||
AbstractPhysicsModel
|
||||
|
||||
Base type for a model, e.g. ABC-Model or QED. This is used to dispatch many functions.
|
||||
"""
|
||||
abstract type AbstractPhysicsModel end
|
||||
|
||||
"""
|
||||
AbstractParticle
|
||||
|
||||
Base type for particles belonging to a certain [`AbstractPhysicsModel`](@ref).
|
||||
"""
|
||||
abstract type AbstractParticle end
|
||||
|
||||
"""
|
||||
ParticleValue{ParticleType <: AbstractParticle}
|
||||
|
||||
A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated.
|
||||
|
||||
`sizeof(ParticleValue())` = 48 Byte
|
||||
"""
|
||||
struct ParticleValue{ParticleType <: AbstractParticle}
|
||||
p::ParticleType
|
||||
v::Float64
|
||||
end
|
||||
|
||||
"""
|
||||
AbstractProcessDescription
|
||||
|
||||
Base type for process descriptions. An object of this type of a corresponding [`AbstractPhysicsModel`](@ref) should uniquely identify a process in that model.
|
||||
|
||||
See also: [`parse_process`](@ref)
|
||||
"""
|
||||
abstract type AbstractProcessDescription end
|
||||
|
||||
"""
|
||||
AbstractProcessInput
|
||||
|
||||
Base type for process inputs. An object of this type contains the input values (e.g. momenta) of the particles in a process.
|
||||
|
||||
See also: [`gen_process_input`](@ref)
|
||||
"""
|
||||
abstract type AbstractProcessInput end
|
||||
|
||||
"""
|
||||
mass(t::Type{T}) where {T <: AbstractParticle}
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the particles mass at rest.
|
||||
"""
|
||||
function mass end
|
||||
|
||||
"""
|
||||
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: AbstractParticle, T2 <: AbstractParticle}
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the result particle type when the two given particles interact.
|
||||
"""
|
||||
function interaction_result end
|
||||
|
||||
"""
|
||||
types(::AbstractPhysicsModel)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref), returning a `Vector` of the available particle types in the model.
|
||||
"""
|
||||
function types end
|
||||
|
||||
"""
|
||||
in_particles(::AbstractProcessDescription)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
|
||||
Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of incoming particles for the process per particle type.
|
||||
|
||||
|
||||
in_particles(::AbstractProcessInput)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
|
||||
Returns a `<: Vector{AbstractParticle}` object with the values of all incoming particles for the corresponding `ProcessDescription`.
|
||||
"""
|
||||
function in_particles end
|
||||
|
||||
"""
|
||||
out_particles(::AbstractProcessDescription)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref).
|
||||
Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of outgoing particles for the process per particle type.
|
||||
|
||||
|
||||
out_particles(::AbstractProcessInput)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
|
||||
Returns a `<: Vector{AbstractParticle}` object with the values of all outgoing particles for the corresponding `ProcessDescription`.
|
||||
"""
|
||||
function out_particles end
|
||||
|
||||
"""
|
||||
parse_process(::AbstractString, ::AbstractPhysicsModel)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref).
|
||||
Returns a `ProcessDescription` object.
|
||||
"""
|
||||
function parse_process end
|
||||
|
||||
"""
|
||||
gen_process_input(::AbstractProcessDescription)
|
||||
|
||||
Interface function that must be implemented for every specific [`AbstractProcessDescription`](@ref).
|
||||
Returns a randomly generated and valid corresponding `ProcessInput`.
|
||||
"""
|
||||
function gen_process_input end
|
10
src/models/print.jl
Normal file
10
src/models/print.jl
Normal file
@ -0,0 +1,10 @@
|
||||
|
||||
"""
|
||||
show(io::IO, particleValue::ParticleValue)
|
||||
|
||||
Pretty print a [`ParticleValue`](@ref), no newlines.
|
||||
"""
|
||||
function show(io::IO, particleValue::ParticleValue)
|
||||
print(io, "($(particleValue.p), value: $(particleValue.v))")
|
||||
return nothing
|
||||
end
|
@ -1,44 +1,20 @@
|
||||
|
||||
DataTaskNode(t::AbstractDataTask, name = "") = DataTaskNode(
|
||||
t,
|
||||
Vector{Node}(),
|
||||
Vector{Node}(),
|
||||
UUIDs.uuid1(rng[threadid()]),
|
||||
missing,
|
||||
missing,
|
||||
missing,
|
||||
name,
|
||||
)
|
||||
DataTaskNode(t::AbstractDataTask, name = "") =
|
||||
DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, missing, name)
|
||||
ComputeTaskNode(t::AbstractComputeTask) = ComputeTaskNode(
|
||||
t,
|
||||
Vector{Node}(),
|
||||
Vector{Node}(),
|
||||
UUIDs.uuid1(rng[threadid()]),
|
||||
missing,
|
||||
missing,
|
||||
Vector{NodeFusion}(),
|
||||
t, # task
|
||||
Vector{Node}(), # parents
|
||||
Vector{Node}(), # children
|
||||
UUIDs.uuid1(rng[threadid()]), # id
|
||||
missing, # node reduction
|
||||
missing, # node split
|
||||
Vector{NodeFusion}(), # node fusions
|
||||
missing, # device
|
||||
)
|
||||
|
||||
copy(m::Missing) = missing
|
||||
copy(n::ComputeTaskNode) = ComputeTaskNode(
|
||||
copy(n.task),
|
||||
copy(n.parents),
|
||||
copy(n.children),
|
||||
UUIDs.uuid1(rng[threadid()]),
|
||||
copy(n.nodeReduction),
|
||||
copy(n.nodeSplit),
|
||||
copy(n.nodeFusions),
|
||||
)
|
||||
copy(n::DataTaskNode) = DataTaskNode(
|
||||
copy(n.task),
|
||||
copy(n.parents),
|
||||
copy(n.children),
|
||||
UUIDs.uuid1(rng[threadid()]),
|
||||
copy(n.nodeReduction),
|
||||
copy(n.nodeSplit),
|
||||
copy(n.nodeFusion),
|
||||
n.name,
|
||||
)
|
||||
copy(n::ComputeTaskNode) = ComputeTaskNode(copy(n.task))
|
||||
copy(n::DataTaskNode) = DataTaskNode(copy(n.task), n.name)
|
||||
|
||||
"""
|
||||
make_node(t::AbstractTask)
|
||||
|
@ -22,5 +22,6 @@ end
|
||||
Return the uuid as a string usable as a variable name in code generation.
|
||||
"""
|
||||
function to_var_name(id::UUID)
|
||||
return replace(string(id), "-" => "_")
|
||||
str = "_" * replace(string(id), "-" => "_")
|
||||
return str
|
||||
end
|
||||
|
@ -24,13 +24,14 @@ abstract type Operation end
|
||||
Any node that transfers data and does no computation.
|
||||
|
||||
# Fields
|
||||
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
|
||||
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
|
||||
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
|
||||
`.id`: The node's id. Improves the speed of comparisons.\\
|
||||
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.
|
||||
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
|
||||
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
|
||||
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
|
||||
`.id`: The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
|
||||
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.\\
|
||||
`.name`: The name of this node for entry nodes into the graph ([`is_entry_node`](@ref)) to reliably assign the inputs to the correct nodes when executing.\\
|
||||
"""
|
||||
mutable struct DataTaskNode <: Node
|
||||
task::AbstractDataTask
|
||||
@ -60,16 +61,17 @@ end
|
||||
"""
|
||||
ComputeTaskNode <: Node
|
||||
|
||||
Any node that transfers data and does no computation.
|
||||
Any node that computes a result from inputs using an [`AbstractComputeTask`](@ref).
|
||||
|
||||
# Fields
|
||||
`.task`: The node's data task type. Usually [`DataTask`](@ref).\\
|
||||
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
|
||||
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
|
||||
`.id`: The node's id. Improves the speed of comparisons.\\
|
||||
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeFusion`: A vector of this node's [`NodeFusion`](@ref)s. For a ComputeTaskNode there can be any number of these, unlike the DataTaskNodes.
|
||||
`.task`: The node's compute task type. A concrete subtype of [`AbstractComputeTask`](@ref).\\
|
||||
`.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\
|
||||
`.children`: A vector of the node's children (i.e. nodes that this one depends on).\\
|
||||
`.id`: The node's id. Improves the speed of comparisons and is used as a unique identifier.\\
|
||||
`.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\
|
||||
`.nodeFusions`: A vector of this node's [`NodeFusion`](@ref)s. For a `ComputeTaskNode` there can be any number of these, unlike the [`DataTaskNode`](@ref)s.\\
|
||||
`.device`: The Device this node has been scheduled on by a [`Scheduler`](@ref).
|
||||
"""
|
||||
mutable struct ComputeTaskNode <: Node
|
||||
task::AbstractComputeTask
|
||||
@ -82,6 +84,9 @@ mutable struct ComputeTaskNode <: Node
|
||||
|
||||
# for ComputeTasks there can be multiple fusions, unlike the DataTasks
|
||||
nodeFusions::Vector{Operation}
|
||||
|
||||
# the device this node is assigned to execute on
|
||||
device::Union{AbstractDevice, Missing}
|
||||
end
|
||||
|
||||
"""
|
||||
@ -95,8 +100,5 @@ The child is the prerequisite node of the parent.
|
||||
"""
|
||||
struct Edge
|
||||
# edge points from child to parent
|
||||
edge::Union{
|
||||
Tuple{DataTaskNode, ComputeTaskNode},
|
||||
Tuple{ComputeTaskNode, DataTaskNode},
|
||||
}
|
||||
edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}}
|
||||
end
|
||||
|
@ -22,12 +22,24 @@ function is_valid_node(graph::DAG, node::Node)
|
||||
@assert node in child.parents "Node is not a parent of its child!"
|
||||
end
|
||||
|
||||
if !ismissing(node.nodeReduction)
|
||||
#=if !ismissing(node.nodeReduction)
|
||||
@assert is_valid(graph, node.nodeReduction)
|
||||
end
|
||||
if !ismissing(node.nodeSplit)
|
||||
@assert is_valid(graph, node.nodeSplit)
|
||||
end=#
|
||||
|
||||
if !(typeof(node.task) <: FusedComputeTask)
|
||||
# the remaining checks are only necessary for fused compute tasks
|
||||
return true
|
||||
end
|
||||
|
||||
# every child must be in some input of the task
|
||||
for child in node.children
|
||||
str = Symbol(to_var_name(child.id))
|
||||
@assert (str in node.task.t1_inputs) || (str in node.task.t2_inputs) "$str was not in any of the tasks' inputs\nt1_inputs: $(node.task.t1_inputs)\nt2_inputs: $(node.task.t2_inputs)"
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
@ -41,9 +53,9 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
|
||||
function is_valid(graph::DAG, node::ComputeTaskNode)
|
||||
@assert is_valid_node(graph, node)
|
||||
|
||||
for nf in node.nodeFusions
|
||||
#=for nf in node.nodeFusions
|
||||
@assert is_valid(graph, nf)
|
||||
end
|
||||
end=#
|
||||
return true
|
||||
end
|
||||
|
||||
@ -57,8 +69,8 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref).
|
||||
function is_valid(graph::DAG, node::DataTaskNode)
|
||||
@assert is_valid_node(graph, node)
|
||||
|
||||
if !ismissing(node.nodeFusion)
|
||||
#=if !ismissing(node.nodeFusion)
|
||||
@assert is_valid(graph, node.nodeFusion)
|
||||
end
|
||||
end=#
|
||||
return true
|
||||
end
|
||||
|
@ -34,12 +34,7 @@ Apply the given [`NodeFusion`](@ref) to the graph. Generic wrapper around [`node
|
||||
Return an [`AppliedNodeFusion`](@ref) object generated from the graph's [`Diff`](@ref).
|
||||
"""
|
||||
function apply_operation!(graph::DAG, operation::NodeFusion)
|
||||
diff = node_fusion!(
|
||||
graph,
|
||||
operation.input[1],
|
||||
operation.input[2],
|
||||
operation.input[3],
|
||||
)
|
||||
diff = node_fusion!(graph, operation.input[1], operation.input[2], operation.input[3])
|
||||
|
||||
graph.properties += GraphProperties(diff)
|
||||
|
||||
@ -124,17 +119,24 @@ function revert_diff!(graph::DAG, diff::Diff)
|
||||
# add removed nodes, remove added nodes, same for edges
|
||||
# note the order
|
||||
for edge in diff.addedEdges
|
||||
remove_edge!(graph, edge.edge[1], edge.edge[2], false)
|
||||
remove_edge!(graph, edge.edge[1], edge.edge[2], track = false)
|
||||
end
|
||||
for node in diff.addedNodes
|
||||
remove_node!(graph, node, false)
|
||||
remove_node!(graph, node, track = false)
|
||||
end
|
||||
|
||||
for node in diff.removedNodes
|
||||
insert_node!(graph, node, false)
|
||||
insert_node!(graph, node, track = false)
|
||||
end
|
||||
for edge in diff.removedEdges
|
||||
insert_edge!(graph, edge.edge[1], edge.edge[2], false)
|
||||
insert_edge!(graph, edge.edge[1], edge.edge[2], track = false)
|
||||
end
|
||||
|
||||
for (node, task) in diff.updatedChildren
|
||||
# node must be fused compute task at this point
|
||||
@assert typeof(node.task) <: FusedComputeTask
|
||||
|
||||
node.task = task
|
||||
end
|
||||
|
||||
graph.properties -= GraphProperties(diff)
|
||||
@ -149,21 +151,24 @@ Fuse nodes n1 -> n2 -> n3 together into one node, return the applied difference
|
||||
|
||||
For details see [`NodeFusion`](@ref).
|
||||
"""
|
||||
function node_fusion!(
|
||||
graph::DAG,
|
||||
n1::ComputeTaskNode,
|
||||
n2::DataTaskNode,
|
||||
n3::ComputeTaskNode,
|
||||
)
|
||||
# @assert is_valid_node_fusion_input(graph, n1, n2, n3)
|
||||
function node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
|
||||
@assert is_valid_node_fusion_input(graph, n1, n2, n3)
|
||||
|
||||
# clear snapshot
|
||||
get_snapshot_diff(graph)
|
||||
|
||||
# save children and parents
|
||||
n1_children = children(n1)
|
||||
n3_parents = parents(n3)
|
||||
n3_children = children(n3)
|
||||
n1Children = children(n1)
|
||||
n3Parents = parents(n3)
|
||||
|
||||
n1Task = copy(n1.task)
|
||||
n3Task = copy(n3.task)
|
||||
|
||||
# assemble the input node vectors of n1 and n3 to save into the FusedComputeTask
|
||||
n1Inputs = Vector{Symbol}()
|
||||
for child in n1Children
|
||||
push!(n1Inputs, Symbol(to_var_name(child.id)))
|
||||
end
|
||||
|
||||
# remove the edges and nodes that will be replaced by the fused node
|
||||
remove_edge!(graph, n1, n2)
|
||||
@ -172,29 +177,38 @@ function node_fusion!(
|
||||
remove_node!(graph, n2)
|
||||
|
||||
# get n3's children now so it automatically excludes n2
|
||||
n3_children = children(n3)
|
||||
n3Children = children(n3)
|
||||
|
||||
n3Inputs = Vector{Symbol}()
|
||||
for child in n3Children
|
||||
push!(n3Inputs, Symbol(to_var_name(child.id)))
|
||||
end
|
||||
|
||||
remove_node!(graph, n3)
|
||||
|
||||
# create new node with the fused compute task
|
||||
new_node =
|
||||
ComputeTaskNode(FusedComputeTask{typeof(n1.task), typeof(n3.task)}())
|
||||
insert_node!(graph, new_node)
|
||||
newNode = ComputeTaskNode(FusedComputeTask(n1Task, n3Task, n1Inputs, Symbol(to_var_name(n2.id)), n3Inputs))
|
||||
insert_node!(graph, newNode)
|
||||
|
||||
for child in n1_children
|
||||
for child in n1Children
|
||||
remove_edge!(graph, child, n1)
|
||||
insert_edge!(graph, child, new_node)
|
||||
insert_edge!(graph, child, newNode)
|
||||
end
|
||||
|
||||
for child in n3_children
|
||||
for child in n3Children
|
||||
remove_edge!(graph, child, n3)
|
||||
if !(child in n1_children)
|
||||
insert_edge!(graph, child, new_node)
|
||||
if !(child in n1Children)
|
||||
insert_edge!(graph, child, newNode)
|
||||
end
|
||||
end
|
||||
|
||||
for parent in n3_parents
|
||||
for parent in n3Parents
|
||||
remove_edge!(graph, n3, parent)
|
||||
insert_edge!(graph, new_node, parent)
|
||||
insert_edge!(graph, newNode, parent)
|
||||
|
||||
# important! update the parent node's child names in case they are fused compute tasks
|
||||
# needed for compute generation so the fused compute task can correctly match inputs to its component tasks
|
||||
update_child!(graph, parent, Symbol(to_var_name(n3.id)), Symbol(to_var_name(newNode.id)))
|
||||
end
|
||||
|
||||
return get_snapshot_diff(graph)
|
||||
@ -208,21 +222,26 @@ Reduce the given nodes together into one node, return the applied difference to
|
||||
For details see [`NodeReduction`](@ref).
|
||||
"""
|
||||
function node_reduction!(graph::DAG, nodes::Vector{Node})
|
||||
# @assert is_valid_node_reduction_input(graph, nodes)
|
||||
@assert is_valid_node_reduction_input(graph, nodes)
|
||||
|
||||
# clear snapshot
|
||||
get_snapshot_diff(graph)
|
||||
|
||||
n1 = nodes[1]
|
||||
n1_children = children(n1)
|
||||
n1Children = children(n1)
|
||||
|
||||
n1_parents = Set(n1.parents)
|
||||
new_parents = Set{Node}()
|
||||
n1Parents = Set(n1.parents)
|
||||
|
||||
# set of the new parents of n1
|
||||
newParents = Set{Node}()
|
||||
|
||||
# names of the previous children that n1 now replaces per parent
|
||||
newParentsChildNames = Dict{Node, Symbol}()
|
||||
|
||||
# remove all of the nodes' parents and children and the nodes themselves (except for first node)
|
||||
for i in 2:length(nodes)
|
||||
n = nodes[i]
|
||||
for child in n1_children
|
||||
for child in n1Children
|
||||
remove_edge!(graph, child, n)
|
||||
end
|
||||
|
||||
@ -230,17 +249,23 @@ function node_reduction!(graph::DAG, nodes::Vector{Node})
|
||||
remove_edge!(graph, n, parent)
|
||||
|
||||
# collect all parents
|
||||
push!(new_parents, parent)
|
||||
push!(newParents, parent)
|
||||
newParentsChildNames[parent] = Symbol(to_var_name(n.id))
|
||||
end
|
||||
|
||||
remove_node!(graph, n)
|
||||
end
|
||||
|
||||
setdiff!(new_parents, n1_parents)
|
||||
|
||||
for parent in new_parents
|
||||
for parent in newParents
|
||||
# now add parents of all input nodes to n1 without duplicates
|
||||
insert_edge!(graph, n1, parent)
|
||||
if !(parent in n1Parents)
|
||||
# don't double insert edges
|
||||
insert_edge!(graph, n1, parent)
|
||||
end
|
||||
|
||||
# this has to be done for all parents, even the ones of n1 because they can be duplicate
|
||||
prevChild = newParentsChildNames[parent]
|
||||
update_child!(graph, parent, prevChild, Symbol(to_var_name(n1.id)))
|
||||
end
|
||||
|
||||
return get_snapshot_diff(graph)
|
||||
@ -254,30 +279,33 @@ Split the given node into one node per parent, return the applied difference to
|
||||
For details see [`NodeSplit`](@ref).
|
||||
"""
|
||||
function node_split!(graph::DAG, n1::Node)
|
||||
# @assert is_valid_node_split_input(graph, n1)
|
||||
@assert is_valid_node_split_input(graph, n1)
|
||||
|
||||
# clear snapshot
|
||||
get_snapshot_diff(graph)
|
||||
|
||||
n1_parents = parents(n1)
|
||||
n1_children = children(n1)
|
||||
n1Parents = parents(n1)
|
||||
n1Children = children(n1)
|
||||
|
||||
for parent in n1_parents
|
||||
for parent in n1Parents
|
||||
remove_edge!(graph, n1, parent)
|
||||
end
|
||||
for child in n1_children
|
||||
for child in n1Children
|
||||
remove_edge!(graph, child, n1)
|
||||
end
|
||||
remove_node!(graph, n1)
|
||||
|
||||
for parent in n1_parents
|
||||
n_copy = copy(n1)
|
||||
insert_node!(graph, n_copy)
|
||||
insert_edge!(graph, n_copy, parent)
|
||||
for parent in n1Parents
|
||||
nCopy = copy(n1)
|
||||
|
||||
for child in n1_children
|
||||
insert_edge!(graph, child, n_copy)
|
||||
insert_node!(graph, nCopy)
|
||||
insert_edge!(graph, nCopy, parent)
|
||||
|
||||
for child in n1Children
|
||||
insert_edge!(graph, child, nCopy)
|
||||
end
|
||||
|
||||
update_child!(graph, parent, Symbol(to_var_name(n1.id)), Symbol(to_var_name(nCopy.id)))
|
||||
end
|
||||
|
||||
return get_snapshot_diff(graph)
|
||||
|
@ -7,10 +7,7 @@ using Base.Threads
|
||||
|
||||
Insert the given node fusion into its input nodes' operation caches. For the compute nodes, locking via the given `locks` is employed to have safe multi-threading. For a large set of nodes, contention on the locks should be very small.
|
||||
"""
|
||||
function insert_operation!(
|
||||
nf::NodeFusion,
|
||||
locks::Dict{ComputeTaskNode, SpinLock},
|
||||
)
|
||||
function insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock})
|
||||
n1 = nf.input[1]
|
||||
n2 = nf.input[2]
|
||||
n3 = nf.input[3]
|
||||
@ -52,10 +49,7 @@ end
|
||||
|
||||
Insert the node reductions into the graph and the nodes' caches. Employs multithreading for speedup.
|
||||
"""
|
||||
function nr_insertion!(
|
||||
operations::PossibleOperations,
|
||||
nodeReductions::Vector{Vector{NodeReduction}},
|
||||
)
|
||||
function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}})
|
||||
total_len = 0
|
||||
for vec in nodeReductions
|
||||
total_len += length(vec)
|
||||
@ -83,11 +77,7 @@ end
|
||||
|
||||
Insert the node fusions into the graph and the nodes' caches. Employs multithreading for speedup.
|
||||
"""
|
||||
function nf_insertion!(
|
||||
graph::DAG,
|
||||
operations::PossibleOperations,
|
||||
nodeFusions::Vector{Vector{NodeFusion}},
|
||||
)
|
||||
function nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}})
|
||||
total_len = 0
|
||||
for vec in nodeFusions
|
||||
total_len += length(vec)
|
||||
@ -122,10 +112,7 @@ end
|
||||
|
||||
Insert the node splits into the graph and the nodes' caches. Employs multithreading for speedup.
|
||||
"""
|
||||
function ns_insertion!(
|
||||
operations::PossibleOperations,
|
||||
nodeSplits::Vector{Vector{NodeSplit}},
|
||||
)
|
||||
function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}})
|
||||
total_len = 0
|
||||
for vec in nodeSplits
|
||||
total_len += length(vec)
|
||||
@ -231,16 +218,12 @@ function generate_operations(graph::DAG)
|
||||
continue
|
||||
end
|
||||
|
||||
push!(
|
||||
generatedFusions[threadid()],
|
||||
NodeFusion((child_node, node, parent_node)),
|
||||
)
|
||||
push!(generatedFusions[threadid()], NodeFusion((child_node, node, parent_node)))
|
||||
end
|
||||
end
|
||||
|
||||
# launch thread for node fusion insertion
|
||||
nf_task =
|
||||
@task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
|
||||
nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions)
|
||||
schedule(nf_task)
|
||||
|
||||
# find possible node splits
|
||||
|
@ -4,9 +4,7 @@
|
||||
Return whether `operations` is empty, i.e. all of its fields are empty.
|
||||
"""
|
||||
function isempty(operations::PossibleOperations)
|
||||
return isempty(operations.nodeFusions) &&
|
||||
isempty(operations.nodeReductions) &&
|
||||
isempty(operations.nodeSplits)
|
||||
return isempty(operations.nodeFusions) && isempty(operations.nodeReductions) && isempty(operations.nodeSplits)
|
||||
end
|
||||
|
||||
"""
|
||||
@ -63,9 +61,7 @@ function can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
|
||||
return false
|
||||
end
|
||||
|
||||
if length(n2.parents) != 1 ||
|
||||
length(n2.children) != 1 ||
|
||||
length(n1.parents) != 1
|
||||
if length(n2.parents) != 1 || length(n2.children) != 1 || length(n1.parents) != 1
|
||||
return false
|
||||
end
|
||||
|
||||
|
@ -9,24 +9,12 @@ Assert for a gven node fusion input whether the nodes can be fused. For the requ
|
||||
|
||||
Intended for use with `@assert` or `@test`.
|
||||
"""
|
||||
function is_valid_node_fusion_input(
|
||||
graph::DAG,
|
||||
n1::ComputeTaskNode,
|
||||
n2::DataTaskNode,
|
||||
n3::ComputeTaskNode,
|
||||
)
|
||||
function is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode)
|
||||
if !(n1 in graph) || !(n2 in graph) || !(n3 in graph)
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Fusion] The given nodes are not part of the given graph",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Fusion] The given nodes are not part of the given graph"))
|
||||
end
|
||||
|
||||
if !is_child(n1, n2) ||
|
||||
!is_child(n2, n3) ||
|
||||
!is_parent(n3, n2) ||
|
||||
!is_parent(n2, n1)
|
||||
if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1)
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Fusion] The given nodes are not connected by edges which is required for node fusion",
|
||||
@ -35,27 +23,19 @@ function is_valid_node_fusion_input(
|
||||
end
|
||||
|
||||
if length(n2.parents) > 1
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Fusion] The given data node has more than one parent",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Fusion] The given data node has more than one parent"))
|
||||
end
|
||||
if length(n2.children) > 1
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Fusion] The given data node has more than one child",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Fusion] The given data node has more than one child"))
|
||||
end
|
||||
if length(n1.parents) > 1
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Fusion] The given n1 has more than one parent",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Fusion] The given n1 has more than one parent"))
|
||||
end
|
||||
|
||||
@assert is_valid(graph, n1)
|
||||
@assert is_valid(graph, n2)
|
||||
@assert is_valid(graph, n3)
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
@ -69,22 +49,21 @@ Intended for use with `@assert` or `@test`.
|
||||
function is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node})
|
||||
for n in nodes
|
||||
if n ∉ graph
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Reduction] The given nodes are not part of the given graph",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Reduction] The given nodes are not part of the given graph"))
|
||||
end
|
||||
@assert is_valid(graph, n)
|
||||
end
|
||||
|
||||
t = typeof(nodes[1].task)
|
||||
for n in nodes
|
||||
if typeof(n.task) != t
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Reduction] The given nodes are not of the same type",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Reduction] The given nodes are not of the same type"))
|
||||
end
|
||||
|
||||
if (typeof(n) <: DataTaskNode)
|
||||
if (n.name != nodes[1].name)
|
||||
throw(AssertionError("[Node Reduction] The given nodes do not have the same name"))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@ -111,11 +90,7 @@ Intended for use with `@assert` or `@test`.
|
||||
"""
|
||||
function is_valid_node_split_input(graph::DAG, n1::Node)
|
||||
if n1 ∉ graph
|
||||
throw(
|
||||
AssertionError(
|
||||
"[Node Split] The given node is not part of the given graph",
|
||||
),
|
||||
)
|
||||
throw(AssertionError("[Node Split] The given node is not part of the given graph"))
|
||||
end
|
||||
|
||||
if length(n1.parents) <= 1
|
||||
@ -126,6 +101,8 @@ function is_valid_node_split_input(graph::DAG, n1::Node)
|
||||
)
|
||||
end
|
||||
|
||||
@assert is_valid(graph, n1)
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
@ -163,12 +140,7 @@ Assert for a given [`NodeFusion`](@ref) whether it is a valid operation in the g
|
||||
Intended for use with `@assert` or `@test`.
|
||||
"""
|
||||
function is_valid(graph::DAG, nf::NodeFusion)
|
||||
@assert is_valid_node_fusion_input(
|
||||
graph,
|
||||
nf.input[1],
|
||||
nf.input[2],
|
||||
nf.input[3],
|
||||
)
|
||||
@assert is_valid_node_fusion_input(graph, nf.input[1], nf.input[2], nf.input[3])
|
||||
@assert nf in graph.possibleOperations.nodeFusions "NodeFusion is not part of the graph's possible operations!"
|
||||
return true
|
||||
end
|
||||
|
8
src/optimization/greedy.jl
Normal file
8
src/optimization/greedy.jl
Normal file
@ -0,0 +1,8 @@
|
||||
"""
|
||||
GreedyOptimizer
|
||||
|
||||
An implementation of the greedy optimization algorithm, simply choosing the best next option evaluated with the given estimator.
|
||||
"""
|
||||
struct GreedyOptimizer
|
||||
estimator::AbstractEstimator
|
||||
end
|
@ -11,8 +11,7 @@ function -(prop1::GraphProperties, prop2::GraphProperties)
|
||||
computeIntensity = if (prop1.data - prop2.data == 0)
|
||||
0.0
|
||||
else
|
||||
(prop1.computeEffort - prop2.computeEffort) /
|
||||
(prop1.data - prop2.data)
|
||||
(prop1.computeEffort - prop2.computeEffort) / (prop1.data - prop2.data)
|
||||
end,
|
||||
cost = prop1.cost - prop2.cost,
|
||||
noNodes = prop1.noNodes - prop2.noNodes,
|
||||
@ -33,8 +32,7 @@ function +(prop1::GraphProperties, prop2::GraphProperties)
|
||||
computeIntensity = if (prop1.data + prop2.data == 0)
|
||||
0.0
|
||||
else
|
||||
(prop1.computeEffort + prop2.computeEffort) /
|
||||
(prop1.data + prop2.data)
|
||||
(prop1.computeEffort + prop2.computeEffort) / (prop1.data + prop2.data)
|
||||
end,
|
||||
cost = prop1.cost + prop2.cost,
|
||||
noNodes = prop1.noNodes + prop2.noNodes,
|
||||
|
50
src/scheduler/greedy.jl
Normal file
50
src/scheduler/greedy.jl
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
"""
|
||||
GreedyScheduler
|
||||
|
||||
A greedy implementation of a scheduler, creating a topological ordering of nodes and naively balancing them onto the different devices.
|
||||
"""
|
||||
struct GreedyScheduler end
|
||||
|
||||
function schedule_dag(::GreedyScheduler, graph::DAG, machine::Machine)
|
||||
nodeQueue = PriorityQueue{Node, Int}()
|
||||
|
||||
# use a priority equal to the number of unseen children -> 0 are nodes that can be added
|
||||
for node in get_entry_nodes(graph)
|
||||
enqueue!(nodeQueue, node => 0)
|
||||
end
|
||||
|
||||
schedule = Vector{Node}()
|
||||
sizehint!(schedule, length(graph.nodes))
|
||||
|
||||
# keep an accumulated cost of things scheduled to this device so far
|
||||
deviceAccCost = PriorityQueue{AbstractDevice, Int}()
|
||||
for device in machine.devices
|
||||
enqueue!(deviceAccCost, device => 0)
|
||||
end
|
||||
|
||||
node = nothing
|
||||
while !isempty(nodeQueue)
|
||||
@assert peek(nodeQueue)[2] == 0
|
||||
node = dequeue!(nodeQueue)
|
||||
|
||||
# assign the device with lowest accumulated cost to the node (if it's a compute node)
|
||||
if (isa(node, ComputeTaskNode))
|
||||
lowestDevice = peek(deviceAccCost)[1]
|
||||
node.device = lowestDevice
|
||||
deviceAccCost[lowestDevice] = compute_effort(node.task)
|
||||
end
|
||||
|
||||
push!(schedule, node)
|
||||
for parent in node.parents
|
||||
# reduce the priority of all parents by one
|
||||
if (!haskey(nodeQueue, parent))
|
||||
enqueue!(nodeQueue, parent => length(parent.children) - 1)
|
||||
else
|
||||
nodeQueue[parent] = nodeQueue[parent] - 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return schedule
|
||||
end
|
18
src/scheduler/interface.jl
Normal file
18
src/scheduler/interface.jl
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
"""
|
||||
Scheduler
|
||||
|
||||
Abstract base type for scheduler implementations. The scheduler is used to assign each node to a device and create a topological ordering of tasks.
|
||||
"""
|
||||
abstract type Scheduler end
|
||||
|
||||
"""
|
||||
schedule_dag(::Scheduler, ::DAG, ::Machine)
|
||||
|
||||
Interface functions that must be implemented for implementations of [`Scheduler`](@ref).
|
||||
|
||||
The function assigns each [`ComputeTaskNode`](@ref) of the [`DAG`](@ref) to one of the devices in the given [`Machine`](@ref) and returns a `Vector{Node}` representing a topological ordering.
|
||||
|
||||
[`DataTaskNode`](@ref)s are not scheduled to devices since they do not compute. Instead, a data node transfers data from the [`AbstractDevice`](@ref) of their child to all [`AbstractDevice`](@ref)s of its parents.
|
||||
"""
|
||||
function schedule_dag end
|
89
src/task/compute.jl
Normal file
89
src/task/compute.jl
Normal file
@ -0,0 +1,89 @@
|
||||
|
||||
"""
|
||||
compute(t::FusedComputeTask, data)
|
||||
|
||||
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
|
||||
"""
|
||||
function compute(t::FusedComputeTask, data)
|
||||
@assert false "This is not implemented and should never be called"
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector{String}, outExpr::String)
|
||||
|
||||
Generate code evaluating a [`FusedComputeTask`](@ref) on `inExprs`, providing the output on `outExpr`.
|
||||
`inExprs` should be of the correct types and may be heterogeneous. `outExpr` will be of the type of the output of `T2` of t.
|
||||
"""
|
||||
function get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
inExprs1 = Vector()
|
||||
for sym in t.t1_inputs
|
||||
push!(inExprs1, gen_access_expr(device, sym))
|
||||
end
|
||||
|
||||
outExpr1 = gen_access_expr(device, t.t1_output)
|
||||
|
||||
inExprs2 = Vector()
|
||||
for sym in t.t2_inputs
|
||||
push!(inExprs2, gen_access_expr(device, sym))
|
||||
end
|
||||
|
||||
expr1 = get_expression(t.first_task, device, inExprs1, outExpr1)
|
||||
expr2 = get_expression(t.second_task, device, [inExprs2..., outExpr1], outExpr)
|
||||
|
||||
full_expr = Expr(:block, expr1, expr2)
|
||||
|
||||
return full_expr
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(node::ComputeTaskNode)
|
||||
|
||||
Generate and return code for a given [`ComputeTaskNode`](@ref).
|
||||
"""
|
||||
function get_expression(node::ComputeTaskNode)
|
||||
@assert length(node.children) <= children(node.task) "Node $(node) has too many children for its task: node has $(length(node.children)) versus task has $(children(node.task))\nNode's children: $(getfield.(node.children, :children))"
|
||||
@assert !ismissing(node.device) "Trying to get expression for an unscheduled ComputeTaskNode\nNode: $(node)"
|
||||
|
||||
inExprs = Vector()
|
||||
for id in getfield.(node.children, :id)
|
||||
push!(inExprs, gen_access_expr(node.device, Symbol(to_var_name(id))))
|
||||
end
|
||||
outExpr = gen_access_expr(node.device, Symbol(to_var_name(node.id)))
|
||||
|
||||
return get_expression(node.task, node.device, inExprs, outExpr)
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(node::DataTaskNode)
|
||||
|
||||
Generate and return code for a given [`DataTaskNode`](@ref).
|
||||
"""
|
||||
function get_expression(node::DataTaskNode)
|
||||
@assert length(node.children) == 1 "Trying to call get_expression on a data task node that has $(length(node.children)) children instead of 1"
|
||||
|
||||
# TODO: dispatch to device implementations generating the copy commands
|
||||
|
||||
child = node.children[1]
|
||||
inExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(child.id))))
|
||||
outExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(node.id))))
|
||||
dataTransportExp = Meta.parse("$outExpr = $inExpr")
|
||||
|
||||
return dataTransportExp
|
||||
end
|
||||
|
||||
"""
|
||||
get_init_expression(node::DataTaskNode, device::AbstractDevice)
|
||||
|
||||
Generate and return code for the initial input reading expression for [`DataTaskNode`](@ref)s with 0 children, i.e., entry nodes.
|
||||
|
||||
See also: [`get_entry_nodes`](@ref)
|
||||
"""
|
||||
function get_init_expression(node::DataTaskNode, device::AbstractDevice)
|
||||
@assert isempty(node.children) "Trying to call get_init_expression on a data task node that is not an entry node."
|
||||
|
||||
inExpr = eval(gen_access_expr(device, Symbol("$(to_var_name(node.id))_in")))
|
||||
outExpr = eval(gen_access_expr(device, Symbol(to_var_name(node.id))))
|
||||
dataTransportExp = Meta.parse("$outExpr = $inExpr")
|
||||
|
||||
return dataTransportExp
|
||||
end
|
@ -3,8 +3,7 @@
|
||||
|
||||
Fallback implementation of the copy of an abstract data task, throwing an error.
|
||||
"""
|
||||
copy(t::AbstractDataTask) =
|
||||
error("Need to implement copying for your data tasks!")
|
||||
copy(t::AbstractDataTask) = error("Need to implement copying for your data tasks!")
|
||||
|
||||
"""
|
||||
copy(t::AbstractComputeTask)
|
||||
@ -12,3 +11,21 @@ copy(t::AbstractDataTask) =
|
||||
Return a copy of the given compute task.
|
||||
"""
|
||||
copy(t::AbstractComputeTask) = typeof(t)()
|
||||
|
||||
"""
|
||||
copy(t::FusedComputeTask)
|
||||
|
||||
Return a copy of th egiven [`FusedComputeTask`](@ref).
|
||||
"""
|
||||
function copy(t::FusedComputeTask{T1, T2}) where {T1, T2}
|
||||
return FusedComputeTask{T1, T2}(
|
||||
copy(t.first_task),
|
||||
copy(t.second_task),
|
||||
copy(t.t1_inputs),
|
||||
t.t1_output,
|
||||
copy(t.t2_inputs),
|
||||
)
|
||||
end
|
||||
|
||||
FusedComputeTask{T1, T2}(t1_inputs::Vector{String}, t1_output::String, t2_inputs::Vector{String}) where {T1, T2} =
|
||||
FusedComputeTask{T1, T2}(T1(), T2(), t1_inputs, t1_output, t2_inputs)
|
||||
|
@ -4,6 +4,5 @@
|
||||
Print a string representation of the fused compute task to io.
|
||||
"""
|
||||
function show(io::IO, t::FusedComputeTask)
|
||||
(T1, T2) = get_types(t)
|
||||
return print(io, "ComputeFuse(", T1(), ", ", T2(), ")")
|
||||
return print(io, "ComputeFuse($(t.first_task), $(t.second_task))")
|
||||
end
|
||||
|
@ -49,7 +49,7 @@ end
|
||||
|
||||
Return the compute effort of a data task, always zero, regardless of the specific task.
|
||||
"""
|
||||
compute_effort(t::AbstractDataTask) = 0
|
||||
compute_effort(t::AbstractDataTask) = 0.0
|
||||
|
||||
"""
|
||||
data(t::AbstractDataTask)
|
||||
@ -63,7 +63,7 @@ data(t::AbstractDataTask) = getfield(t, :data)
|
||||
|
||||
Return the data of a compute task, always zero, regardless of the specific task.
|
||||
"""
|
||||
data(t::AbstractComputeTask) = 0
|
||||
data(t::AbstractComputeTask) = 0.0
|
||||
|
||||
"""
|
||||
compute_effort(t::FusedComputeTask)
|
||||
@ -71,8 +71,7 @@ data(t::AbstractComputeTask) = 0
|
||||
Return the compute effort of a fused compute task.
|
||||
"""
|
||||
function compute_effort(t::FusedComputeTask)
|
||||
(T1, T2) = collect(typeof(t).parameters)
|
||||
return compute_effort(T1()) + compute_effort(T2())
|
||||
return compute_effort(t.first_task) + compute_effort(t.second_task)
|
||||
end
|
||||
|
||||
"""
|
||||
@ -81,30 +80,3 @@ end
|
||||
Return a tuple of a the fused compute task's components' types.
|
||||
"""
|
||||
get_types(::FusedComputeTask{T1, T2}) where {T1, T2} = (T1, T2)
|
||||
|
||||
"""
|
||||
get_expression(t::AbstractTask)
|
||||
|
||||
Return an expression evaluating the given task on the :dataIn symbol
|
||||
"""
|
||||
function get_expression(t::AbstractTask)
|
||||
return quote
|
||||
dataOut = compute($t, dataIn)
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression()
|
||||
"""
|
||||
function get_expression(
|
||||
t::FusedComputeTask,
|
||||
inSymbol::Symbol,
|
||||
outSymbol::Symbol,
|
||||
)
|
||||
#TODO
|
||||
computeExp = quote
|
||||
$outSymbol = compute($t, $inSymbol)
|
||||
end
|
||||
|
||||
return computeExp
|
||||
end
|
||||
|
@ -26,5 +26,13 @@ A fused compute task made up of the computation of first `T1` and then `T2`.
|
||||
|
||||
Also see: [`get_types`](@ref).
|
||||
"""
|
||||
struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <:
|
||||
AbstractComputeTask end
|
||||
struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <: AbstractComputeTask
|
||||
first_task::T1
|
||||
second_task::T2
|
||||
# the names of the inputs for T1
|
||||
t1_inputs::Vector{Symbol}
|
||||
# output name of T1
|
||||
t1_output::Symbol
|
||||
# t2_inputs doesn't include the output of t1, that's implicit
|
||||
t2_inputs::Vector{Symbol}
|
||||
end
|
||||
|
@ -87,3 +87,19 @@ Return the memory footprint of the node in Byte. Used in [`mem(graph::DAG)`](@re
|
||||
function mem(node::Node)
|
||||
return Base.summarysize(node, exclude = Union{Node, Operation})
|
||||
end
|
||||
|
||||
"""
|
||||
unroll_symbol_vector(vec::Vector{Symbol})
|
||||
|
||||
Return the given vector as single String without quotation marks or brackets.
|
||||
"""
|
||||
function unroll_symbol_vector(vec::Vector)
|
||||
result = ""
|
||||
for s in vec
|
||||
if (result != "")
|
||||
result *= ", "
|
||||
end
|
||||
result *= "$s"
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
@ -1,3 +1,5 @@
|
||||
[deps]
|
||||
AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753"
|
||||
QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
@ -2,7 +2,7 @@ using Random
|
||||
|
||||
function test_known_graph(name::String, n, fusion_test = true)
|
||||
@testset "Test $name Graph ($n)" begin
|
||||
graph = parse_abc(joinpath(@__DIR__, "..", "input", "$name.txt"))
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "$name.txt"), ABCModel())
|
||||
props = get_properties(graph)
|
||||
|
||||
if (fusion_test)
|
||||
|
@ -5,51 +5,51 @@ import MetagraphOptimization.make_node
|
||||
@testset "Unit Tests Node Reduction" begin
|
||||
graph = MetagraphOptimization.DAG()
|
||||
|
||||
d_exit = insert_node!(graph, make_node(DataTask(10)), false)
|
||||
d_exit = insert_node!(graph, make_node(DataTask(10)), track = false)
|
||||
|
||||
s0 = insert_node!(graph, make_node(ComputeTaskS2()), false)
|
||||
s0 = insert_node!(graph, make_node(ComputeTaskS2()), track = false)
|
||||
|
||||
ED = insert_node!(graph, make_node(DataTask(3)), false)
|
||||
FD = insert_node!(graph, make_node(DataTask(3)), false)
|
||||
ED = insert_node!(graph, make_node(DataTask(3)), track = false)
|
||||
FD = insert_node!(graph, make_node(DataTask(3)), track = false)
|
||||
|
||||
EC = insert_node!(graph, make_node(ComputeTaskV()), false)
|
||||
FC = insert_node!(graph, make_node(ComputeTaskV()), false)
|
||||
EC = insert_node!(graph, make_node(ComputeTaskV()), track = false)
|
||||
FC = insert_node!(graph, make_node(ComputeTaskV()), track = false)
|
||||
|
||||
A1D = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
B1D_1 = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
B1D_2 = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
C1D = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
A1D = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
B1D_1 = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
B1D_2 = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
C1D = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
|
||||
A1C = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
B1C_1 = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
B1C_2 = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
C1C = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
A1C = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
B1C_1 = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
B1C_2 = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
C1C = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
|
||||
AD = insert_node!(graph, make_node(DataTask(5)), false)
|
||||
BD = insert_node!(graph, make_node(DataTask(5)), false)
|
||||
CD = insert_node!(graph, make_node(DataTask(5)), false)
|
||||
AD = insert_node!(graph, make_node(DataTask(5)), track = false)
|
||||
BD = insert_node!(graph, make_node(DataTask(5)), track = false)
|
||||
CD = insert_node!(graph, make_node(DataTask(5)), track = false)
|
||||
|
||||
insert_edge!(graph, s0, d_exit, false)
|
||||
insert_edge!(graph, ED, s0, false)
|
||||
insert_edge!(graph, FD, s0, false)
|
||||
insert_edge!(graph, EC, ED, false)
|
||||
insert_edge!(graph, FC, FD, false)
|
||||
insert_edge!(graph, s0, d_exit, track = false)
|
||||
insert_edge!(graph, ED, s0, track = false)
|
||||
insert_edge!(graph, FD, s0, track = false)
|
||||
insert_edge!(graph, EC, ED, track = false)
|
||||
insert_edge!(graph, FC, FD, track = false)
|
||||
|
||||
insert_edge!(graph, A1D, EC, false)
|
||||
insert_edge!(graph, B1D_1, EC, false)
|
||||
insert_edge!(graph, A1D, EC, track = false)
|
||||
insert_edge!(graph, B1D_1, EC, track = false)
|
||||
|
||||
insert_edge!(graph, B1D_2, FC, false)
|
||||
insert_edge!(graph, C1D, FC, false)
|
||||
insert_edge!(graph, B1D_2, FC, track = false)
|
||||
insert_edge!(graph, C1D, FC, track = false)
|
||||
|
||||
insert_edge!(graph, A1C, A1D, false)
|
||||
insert_edge!(graph, B1C_1, B1D_1, false)
|
||||
insert_edge!(graph, B1C_2, B1D_2, false)
|
||||
insert_edge!(graph, C1C, C1D, false)
|
||||
insert_edge!(graph, A1C, A1D, track = false)
|
||||
insert_edge!(graph, B1C_1, B1D_1, track = false)
|
||||
insert_edge!(graph, B1C_2, B1D_2, track = false)
|
||||
insert_edge!(graph, C1C, C1D, track = false)
|
||||
|
||||
insert_edge!(graph, AD, A1C, false)
|
||||
insert_edge!(graph, BD, B1C_1, false)
|
||||
insert_edge!(graph, BD, B1C_2, false)
|
||||
insert_edge!(graph, CD, C1C, false)
|
||||
insert_edge!(graph, AD, A1C, track = false)
|
||||
insert_edge!(graph, BD, B1C_1, track = false)
|
||||
insert_edge!(graph, BD, B1C_2, track = false)
|
||||
insert_edge!(graph, CD, C1C, track = false)
|
||||
|
||||
@test is_valid(graph)
|
||||
|
||||
|
@ -6,6 +6,8 @@ using Test
|
||||
include("unit_tests_tasks.jl")
|
||||
include("unit_tests_nodes.jl")
|
||||
include("unit_tests_properties.jl")
|
||||
include("unit_tests_estimator.jl")
|
||||
include("unit_tests_abcmodel.jl")
|
||||
include("node_reduction.jl")
|
||||
include("unit_tests_graph.jl")
|
||||
include("unit_tests_execution.jl")
|
||||
|
26
test/unit_tests_abcmodel.jl
Normal file
26
test/unit_tests_abcmodel.jl
Normal file
@ -0,0 +1,26 @@
|
||||
using MetagraphOptimization
|
||||
using QEDbase
|
||||
|
||||
import MetagraphOptimization.interaction_result
|
||||
|
||||
def_momentum = SFourMomentum(1.0, 0.0, 0.0, 0.0)
|
||||
|
||||
testparticleTypes = [ParticleA, ParticleB, ParticleC]
|
||||
testparticles = [ParticleA(def_momentum), ParticleB(def_momentum), ParticleC(def_momentum)]
|
||||
|
||||
@testset "Unit Tests ABC-Model" begin
|
||||
@testset "Interaction Result" begin
|
||||
for p1 in testparticleTypes, p2 in testparticleTypes
|
||||
if (p1 == p2)
|
||||
@test_throws AssertionError interaction_result(p1, p2)
|
||||
else
|
||||
@test interaction_result(p1, p2) == setdiff(testparticleTypes, [p1, p2])[1]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@testset "Vertex" begin
|
||||
@test isapprox(MetagraphOptimization.vertex(), 1 / 137.0)
|
||||
end
|
||||
end
|
||||
println("ABC-Model Unit Tests Complete!")
|
99
test/unit_tests_estimator.jl
Normal file
99
test/unit_tests_estimator.jl
Normal file
@ -0,0 +1,99 @@
|
||||
function test_op_specific(estimator, graph, nf::NodeFusion)
|
||||
estimate = operation_effect(estimator, graph, nf)
|
||||
data_reduce = data(nf.input[2].task)
|
||||
|
||||
@test isapprox(estimate.data, -data_reduce)
|
||||
@test isapprox(estimate.computeEffort, 0; atol = eps(Float64))
|
||||
@test isapprox(estimate.computeIntensity, 0; atol = eps(Float64))
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function test_op_specific(estimator, graph, nr::NodeReduction)
|
||||
estimate = operation_effect(estimator, graph, nr)
|
||||
|
||||
data_reduce = data(nr.input[1].task) * (length(nr.input) - 1)
|
||||
compute_effort_reduce = compute_effort(nr.input[1].task) * (length(nr.input) - 1)
|
||||
|
||||
@test isapprox(estimate.data, -data_reduce; atol = eps(Float64))
|
||||
@test isapprox(estimate.computeEffort, -compute_effort_reduce)
|
||||
@test isapprox(estimate.computeIntensity, compute_effort_reduce / data_reduce)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function test_op_specific(estimator, graph, ns::NodeSplit)
|
||||
estimate = operation_effect(estimator, graph, ns)
|
||||
|
||||
copies = length(ns.input.parents) - 1
|
||||
|
||||
data_increase = data(ns.input.task) * copies
|
||||
compute_effort_increase = compute_effort(ns.input.task) * copies
|
||||
|
||||
@test isapprox(estimate.data, data_increase; atol = eps(Float64))
|
||||
@test isapprox(estimate.computeEffort, compute_effort_increase)
|
||||
@test isapprox(estimate.computeIntensity, compute_effort_increase / data_increase)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function test_op(estimator, graph, op)
|
||||
#=
|
||||
See issue #16
|
||||
|
||||
estimate_before = graph_cost(estimator, graph)
|
||||
|
||||
estimate = operation_effect(estimator, graph, op)
|
||||
|
||||
push_operation!(graph, op)
|
||||
estimate_after_apply = graph_cost(estimator, graph)
|
||||
reset_graph!(graph)
|
||||
|
||||
@test isapprox((estimate_before + estimate).data, estimate_after_apply.data)
|
||||
@test isapprox((estimate_before + estimate).computeEffort, estimate_after_apply.computeEffort)
|
||||
@test isapprox((estimate_before + estimate).computeIntensity, estimate_after_apply.computeIntensity)
|
||||
=#
|
||||
|
||||
test_op_specific(estimator, graph, op)
|
||||
return nothing
|
||||
end
|
||||
|
||||
@testset "Unit Tests Estimator" begin
|
||||
@testset "Global Metric Estimator" for (graph_string, exp_data, exp_computeEffort) in
|
||||
zip(["AB->AB", "AB->ABBB"], [976, 10944], [53, 1075])
|
||||
estimator = GlobalMetricEstimator()
|
||||
|
||||
@test cost_type(estimator) == CDCost
|
||||
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "$(graph_string).txt"), ABCModel())
|
||||
|
||||
@testset "Graph Cost" begin
|
||||
estimate = graph_cost(estimator, graph)
|
||||
|
||||
@test estimate.data == exp_data
|
||||
@test estimate.computeEffort == exp_computeEffort
|
||||
@test isapprox(estimate.computeIntensity, exp_computeEffort / exp_data)
|
||||
end
|
||||
|
||||
@testset "Operation Cost" begin
|
||||
ops = get_operations(graph)
|
||||
nfs = copy(ops.nodeFusions)
|
||||
nrs = copy(ops.nodeReductions)
|
||||
nss = copy(ops.nodeSplits)
|
||||
|
||||
println(
|
||||
"Testing $(length(ops.nodeFusions))xNF, $(length(ops.nodeReductions))xNR, $(length(ops.nodeSplits))xNS",
|
||||
)
|
||||
for nf in nfs
|
||||
test_op(estimator, graph, nf)
|
||||
end
|
||||
for nr in nrs
|
||||
test_op(estimator, graph, nr)
|
||||
end
|
||||
for ns in nss
|
||||
test_op(estimator, graph, ns)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
println("Estimator Unit Tests Complete!")
|
@ -1,31 +1,219 @@
|
||||
import MetagraphOptimization.A
|
||||
import MetagraphOptimization.B
|
||||
import MetagraphOptimization.ParticleType
|
||||
import MetagraphOptimization.ABCParticle
|
||||
import MetagraphOptimization.interaction_result
|
||||
|
||||
@testset "Unit Tests Graph" begin
|
||||
particles = Dict{ParticleType, Vector{Particle}}(
|
||||
(
|
||||
A => [
|
||||
Particle(0.823648, 0.0, 0.0, 0.823648, A),
|
||||
Particle(0.823648, -0.835061, -0.474802, 0.277915, A),
|
||||
]
|
||||
),
|
||||
(
|
||||
B => [
|
||||
Particle(0.823648, 0.0, 0.0, -0.823648, B),
|
||||
Particle(0.823648, 0.835061, 0.474802, -0.277915, B),
|
||||
]
|
||||
),
|
||||
using QEDbase
|
||||
using AccurateArithmetic
|
||||
|
||||
include("../examples/profiling_utilities.jl")
|
||||
|
||||
const RTOL = sqrt(eps(Float64))
|
||||
|
||||
function check_particle_reverse_moment(p1::SFourMomentum, p2::SFourMomentum)
|
||||
@test isapprox(abs(p1.E), abs(p2.E))
|
||||
@test isapprox(p1.px, -p2.px)
|
||||
@test isapprox(p1.py, -p2.py)
|
||||
@test isapprox(p1.pz, -p2.pz)
|
||||
return nothing
|
||||
end
|
||||
|
||||
function ground_truth_graph_result(input::ABCProcessInput)
|
||||
# formula for one diagram:
|
||||
# u_Bp * iλ * u_Ap * S_C * u_B * iλ * u_A
|
||||
# for the second diagram:
|
||||
# u_B * iλ * u_Ap * S_C * u_Bp * iλ * u_Ap
|
||||
# the "u"s are all 1, we ignore the i, λ is 1/137.
|
||||
|
||||
constant = (1 / 137.0)^2
|
||||
|
||||
# calculate particle C in diagram 1
|
||||
diagram1_C = ParticleC(input.inParticles[1].momentum + input.inParticles[2].momentum)
|
||||
diagram2_C = ParticleC(input.inParticles[1].momentum + input.outParticles[2].momentum)
|
||||
|
||||
diagram1_Cp = ParticleC(input.outParticles[1].momentum + input.outParticles[2].momentum)
|
||||
diagram2_Cp = ParticleC(input.outParticles[1].momentum + input.inParticles[2].momentum)
|
||||
|
||||
check_particle_reverse_moment(diagram1_Cp.momentum, diagram1_C.momentum)
|
||||
check_particle_reverse_moment(diagram2_Cp.momentum, diagram2_C.momentum)
|
||||
@test isapprox(getMass2(diagram1_C.momentum), getMass2(diagram1_Cp.momentum))
|
||||
@test isapprox(getMass2(diagram2_C.momentum), getMass2(diagram2_Cp.momentum))
|
||||
|
||||
inner1 = MetagraphOptimization.inner_edge(diagram1_C)
|
||||
inner2 = MetagraphOptimization.inner_edge(diagram2_C)
|
||||
|
||||
diagram1_result = inner1 * constant
|
||||
diagram2_result = inner2 * constant
|
||||
|
||||
return sum_kbn([diagram1_result, diagram2_result])
|
||||
end
|
||||
|
||||
@testset "Unit Tests Execution" begin
|
||||
machine = get_machine_info()
|
||||
|
||||
process_2_2 = ABCProcessDescription(
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
|
||||
)
|
||||
|
||||
expected_result = 5.5320567694746876e-5
|
||||
particles_2_2 = ABCProcessInput(
|
||||
process_2_2,
|
||||
ABCParticle[
|
||||
ParticleA(SFourMomentum(0.823648, 0.0, 0.0, 0.823648)),
|
||||
ParticleB(SFourMomentum(0.823648, 0.0, 0.0, -0.823648)),
|
||||
],
|
||||
ABCParticle[
|
||||
ParticleA(SFourMomentum(0.823648, -0.835061, -0.474802, 0.277915)),
|
||||
ParticleB(SFourMomentum(0.823648, 0.835061, 0.474802, -0.277915)),
|
||||
],
|
||||
)
|
||||
expected_result = ground_truth_graph_result(particles_2_2)
|
||||
|
||||
for _ in 1:10 # test in a loop because graph layout should not change the result
|
||||
graph = parse_abc(joinpath(@__DIR__, "..", "input", "AB->AB.txt"))
|
||||
@test isapprox(execute(graph, particles), expected_result; rtol = 0.001)
|
||||
@testset "AB->AB no optimization" begin
|
||||
for _ in 1:10 # test in a loop because graph layout should not change the result
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
|
||||
|
||||
code = MetagraphOptimization.gen_code(graph)
|
||||
@test isapprox(execute(code, particles), expected_result; rtol = 0.001)
|
||||
# graph should be fully scheduled after being executed
|
||||
@test is_scheduled(graph)
|
||||
|
||||
func = get_compute_function(graph, process_2_2, machine)
|
||||
@test isapprox(func(particles_2_2), expected_result; rtol = RTOL)
|
||||
end
|
||||
end
|
||||
|
||||
@testset "AB->AB after random walk" begin
|
||||
for i in 1:200
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
|
||||
random_walk!(graph, 50)
|
||||
|
||||
@test is_valid(graph)
|
||||
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
|
||||
|
||||
# graph should be fully scheduled after being executed
|
||||
@test is_scheduled(graph)
|
||||
end
|
||||
end
|
||||
|
||||
process_2_4 = ABCProcessDescription(
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 3),
|
||||
)
|
||||
particles_2_4 = gen_process_input(process_2_4)
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
|
||||
expected_result = execute(graph, process_2_4, machine, particles_2_4)
|
||||
|
||||
@testset "AB->ABBB no optimization" begin
|
||||
for _ in 1:5 # test in a loop because graph layout should not change the result
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
|
||||
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = RTOL)
|
||||
|
||||
func = get_compute_function(graph, process_2_4, machine)
|
||||
@test isapprox(func(particles_2_4), expected_result; rtol = RTOL)
|
||||
end
|
||||
end
|
||||
|
||||
@testset "AB->ABBB after random walk" begin
|
||||
for i in 1:50
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel())
|
||||
random_walk!(graph, 100)
|
||||
@test is_valid(graph)
|
||||
|
||||
@test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = RTOL)
|
||||
end
|
||||
end
|
||||
|
||||
@testset "AB->AB large sum fusion" for _ in 1:20
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
|
||||
|
||||
# push a fusion with the sum node
|
||||
ops = get_operations(graph)
|
||||
for fusion in ops.nodeFusions
|
||||
if isa(fusion.input[3].task, ComputeTaskSum)
|
||||
push_operation!(graph, fusion)
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
# push two more fusions with the fused node
|
||||
for _ in 1:15
|
||||
ops = get_operations(graph)
|
||||
for fusion in ops.nodeFusions
|
||||
if isa(fusion.input[3].task, FusedComputeTask)
|
||||
push_operation!(graph, fusion)
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# try execute
|
||||
@test is_valid(graph)
|
||||
expected_result = ground_truth_graph_result(particles_2_2)
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
|
||||
end
|
||||
|
||||
|
||||
@testset "AB->AB large sum fusion" for _ in 1:20
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
|
||||
|
||||
# push a fusion with the sum node
|
||||
ops = get_operations(graph)
|
||||
for fusion in ops.nodeFusions
|
||||
if isa(fusion.input[3].task, ComputeTaskSum)
|
||||
push_operation!(graph, fusion)
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
# push two more fusions with the fused node
|
||||
for _ in 1:15
|
||||
ops = get_operations(graph)
|
||||
for fusion in ops.nodeFusions
|
||||
if isa(fusion.input[3].task, FusedComputeTask)
|
||||
push_operation!(graph, fusion)
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# try execute
|
||||
@test is_valid(graph)
|
||||
expected_result = ground_truth_graph_result(particles_2_2)
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
|
||||
end
|
||||
|
||||
@testset "AB->AB fusion edge case" for _ in 1:20
|
||||
graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel())
|
||||
|
||||
# push two fusions with ComputeTaskV
|
||||
for _ in 1:2
|
||||
ops = get_operations(graph)
|
||||
for fusion in ops.nodeFusions
|
||||
if isa(fusion.input[1].task, ComputeTaskV)
|
||||
push_operation!(graph, fusion)
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# push fusions until the end
|
||||
cont = true
|
||||
while cont
|
||||
cont = false
|
||||
ops = get_operations(graph)
|
||||
for fusion in ops.nodeFusions
|
||||
if isa(fusion.input[1].task, FusedComputeTask)
|
||||
push_operation!(graph, fusion)
|
||||
cont = true
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# try execute
|
||||
@test is_valid(graph)
|
||||
expected_result = ground_truth_graph_result(particles_2_2)
|
||||
@test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL)
|
||||
end
|
||||
|
||||
end
|
||||
println("Execution Unit Tests Complete!")
|
||||
|
@ -11,104 +11,101 @@ import MetagraphOptimization.partners
|
||||
@test length(graph.appliedOperations) == 0
|
||||
@test length(graph.operationsToApply) == 0
|
||||
@test length(graph.dirtyNodes) == 0
|
||||
@test length(graph.diff) ==
|
||||
(addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
|
||||
@test length(get_operations(graph)) ==
|
||||
(nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
|
||||
@test length(get_operations(graph)) == (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
|
||||
|
||||
# s to output (exit node)
|
||||
d_exit = insert_node!(graph, make_node(DataTask(10)), false)
|
||||
d_exit = insert_node!(graph, make_node(DataTask(10)), track = false)
|
||||
|
||||
@test length(graph.nodes) == 1
|
||||
@test length(graph.dirtyNodes) == 1
|
||||
|
||||
# final s compute
|
||||
s0 = insert_node!(graph, make_node(ComputeTaskS2()), false)
|
||||
s0 = insert_node!(graph, make_node(ComputeTaskS2()), track = false)
|
||||
|
||||
@test length(graph.nodes) == 2
|
||||
@test length(graph.dirtyNodes) == 2
|
||||
|
||||
# data from v0 and v1 to s0
|
||||
d_v0_s0 = insert_node!(graph, make_node(DataTask(5)), false)
|
||||
d_v1_s0 = insert_node!(graph, make_node(DataTask(5)), false)
|
||||
d_v0_s0 = insert_node!(graph, make_node(DataTask(5)), track = false)
|
||||
d_v1_s0 = insert_node!(graph, make_node(DataTask(5)), track = false)
|
||||
|
||||
# v0 and v1 compute
|
||||
v0 = insert_node!(graph, make_node(ComputeTaskV()), false)
|
||||
v1 = insert_node!(graph, make_node(ComputeTaskV()), false)
|
||||
v0 = insert_node!(graph, make_node(ComputeTaskV()), track = false)
|
||||
v1 = insert_node!(graph, make_node(ComputeTaskV()), track = false)
|
||||
|
||||
# data from uB, uA, uBp and uAp to v0 and v1
|
||||
d_uB_v0 = insert_node!(graph, make_node(DataTask(3)), false)
|
||||
d_uA_v0 = insert_node!(graph, make_node(DataTask(3)), false)
|
||||
d_uBp_v1 = insert_node!(graph, make_node(DataTask(3)), false)
|
||||
d_uAp_v1 = insert_node!(graph, make_node(DataTask(3)), false)
|
||||
d_uB_v0 = insert_node!(graph, make_node(DataTask(3)), track = false)
|
||||
d_uA_v0 = insert_node!(graph, make_node(DataTask(3)), track = false)
|
||||
d_uBp_v1 = insert_node!(graph, make_node(DataTask(3)), track = false)
|
||||
d_uAp_v1 = insert_node!(graph, make_node(DataTask(3)), track = false)
|
||||
|
||||
# uB, uA, uBp and uAp computes
|
||||
uB = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
uA = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
uBp = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
uAp = insert_node!(graph, make_node(ComputeTaskU()), false)
|
||||
uB = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
uA = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
uBp = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
uAp = insert_node!(graph, make_node(ComputeTaskU()), track = false)
|
||||
|
||||
# data from PB, PA, PBp and PAp to uB, uA, uBp and uAp
|
||||
d_PB_uB = insert_node!(graph, make_node(DataTask(6)), false)
|
||||
d_PA_uA = insert_node!(graph, make_node(DataTask(6)), false)
|
||||
d_PBp_uBp = insert_node!(graph, make_node(DataTask(6)), false)
|
||||
d_PAp_uAp = insert_node!(graph, make_node(DataTask(6)), false)
|
||||
d_PB_uB = insert_node!(graph, make_node(DataTask(6)), track = false)
|
||||
d_PA_uA = insert_node!(graph, make_node(DataTask(6)), track = false)
|
||||
d_PBp_uBp = insert_node!(graph, make_node(DataTask(6)), track = false)
|
||||
d_PAp_uAp = insert_node!(graph, make_node(DataTask(6)), track = false)
|
||||
|
||||
# P computes PB, PA, PBp and PAp
|
||||
PB = insert_node!(graph, make_node(ComputeTaskP()), false)
|
||||
PA = insert_node!(graph, make_node(ComputeTaskP()), false)
|
||||
PBp = insert_node!(graph, make_node(ComputeTaskP()), false)
|
||||
PAp = insert_node!(graph, make_node(ComputeTaskP()), false)
|
||||
PB = insert_node!(graph, make_node(ComputeTaskP()), track = false)
|
||||
PA = insert_node!(graph, make_node(ComputeTaskP()), track = false)
|
||||
PBp = insert_node!(graph, make_node(ComputeTaskP()), track = false)
|
||||
PAp = insert_node!(graph, make_node(ComputeTaskP()), track = false)
|
||||
|
||||
# entry nodes getting data for P computes
|
||||
d_PB = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
d_PA = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
d_PBp = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
d_PAp = insert_node!(graph, make_node(DataTask(4)), false)
|
||||
d_PB = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
d_PA = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
d_PBp = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
d_PAp = insert_node!(graph, make_node(DataTask(4)), track = false)
|
||||
|
||||
@test length(graph.nodes) == 26
|
||||
@test length(graph.dirtyNodes) == 26
|
||||
|
||||
# now for all the edges
|
||||
insert_edge!(graph, d_PB, PB, false)
|
||||
insert_edge!(graph, d_PA, PA, false)
|
||||
insert_edge!(graph, d_PBp, PBp, false)
|
||||
insert_edge!(graph, d_PAp, PAp, false)
|
||||
insert_edge!(graph, d_PB, PB, track = false)
|
||||
insert_edge!(graph, d_PA, PA, track = false)
|
||||
insert_edge!(graph, d_PBp, PBp, track = false)
|
||||
insert_edge!(graph, d_PAp, PAp, track = false)
|
||||
|
||||
insert_edge!(graph, PB, d_PB_uB, false)
|
||||
insert_edge!(graph, PA, d_PA_uA, false)
|
||||
insert_edge!(graph, PBp, d_PBp_uBp, false)
|
||||
insert_edge!(graph, PAp, d_PAp_uAp, false)
|
||||
insert_edge!(graph, PB, d_PB_uB, track = false)
|
||||
insert_edge!(graph, PA, d_PA_uA, track = false)
|
||||
insert_edge!(graph, PBp, d_PBp_uBp, track = false)
|
||||
insert_edge!(graph, PAp, d_PAp_uAp, track = false)
|
||||
|
||||
insert_edge!(graph, d_PB_uB, uB, false)
|
||||
insert_edge!(graph, d_PA_uA, uA, false)
|
||||
insert_edge!(graph, d_PBp_uBp, uBp, false)
|
||||
insert_edge!(graph, d_PAp_uAp, uAp, false)
|
||||
insert_edge!(graph, d_PB_uB, uB, track = false)
|
||||
insert_edge!(graph, d_PA_uA, uA, track = false)
|
||||
insert_edge!(graph, d_PBp_uBp, uBp, track = false)
|
||||
insert_edge!(graph, d_PAp_uAp, uAp, track = false)
|
||||
|
||||
insert_edge!(graph, uB, d_uB_v0, false)
|
||||
insert_edge!(graph, uA, d_uA_v0, false)
|
||||
insert_edge!(graph, uBp, d_uBp_v1, false)
|
||||
insert_edge!(graph, uAp, d_uAp_v1, false)
|
||||
insert_edge!(graph, uB, d_uB_v0, track = false)
|
||||
insert_edge!(graph, uA, d_uA_v0, track = false)
|
||||
insert_edge!(graph, uBp, d_uBp_v1, track = false)
|
||||
insert_edge!(graph, uAp, d_uAp_v1, track = false)
|
||||
|
||||
insert_edge!(graph, d_uB_v0, v0, false)
|
||||
insert_edge!(graph, d_uA_v0, v0, false)
|
||||
insert_edge!(graph, d_uBp_v1, v1, false)
|
||||
insert_edge!(graph, d_uAp_v1, v1, false)
|
||||
insert_edge!(graph, d_uB_v0, v0, track = false)
|
||||
insert_edge!(graph, d_uA_v0, v0, track = false)
|
||||
insert_edge!(graph, d_uBp_v1, v1, track = false)
|
||||
insert_edge!(graph, d_uAp_v1, v1, track = false)
|
||||
|
||||
insert_edge!(graph, v0, d_v0_s0, false)
|
||||
insert_edge!(graph, v1, d_v1_s0, false)
|
||||
insert_edge!(graph, v0, d_v0_s0, track = false)
|
||||
insert_edge!(graph, v1, d_v1_s0, track = false)
|
||||
|
||||
insert_edge!(graph, d_v0_s0, s0, false)
|
||||
insert_edge!(graph, d_v1_s0, s0, false)
|
||||
insert_edge!(graph, d_v0_s0, s0, track = false)
|
||||
insert_edge!(graph, d_v1_s0, s0, track = false)
|
||||
|
||||
insert_edge!(graph, s0, d_exit, false)
|
||||
insert_edge!(graph, s0, d_exit, track = false)
|
||||
|
||||
@test length(graph.nodes) == 26
|
||||
@test length(graph.appliedOperations) == 0
|
||||
@test length(graph.operationsToApply) == 0
|
||||
@test length(graph.dirtyNodes) == 26
|
||||
@test length(graph.diff) ==
|
||||
(addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
|
||||
@test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
|
||||
|
||||
@test is_valid(graph)
|
||||
|
||||
@ -135,8 +132,7 @@ import MetagraphOptimization.partners
|
||||
@test length(siblings(s0)) == 1
|
||||
|
||||
operations = get_operations(graph)
|
||||
@test length(operations) ==
|
||||
(nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(operations) == (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(graph.dirtyNodes) == 0
|
||||
|
||||
@test operations == get_operations(graph)
|
||||
@ -157,8 +153,7 @@ import MetagraphOptimization.partners
|
||||
@test length(graph.operationsToApply) == 1
|
||||
@test first(graph.operationsToApply) == nf
|
||||
@test length(graph.dirtyNodes) == 0
|
||||
@test length(graph.diff) ==
|
||||
(addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
|
||||
@test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0)
|
||||
|
||||
# this applies pending operations
|
||||
properties = get_properties(graph)
|
||||
@ -176,8 +171,7 @@ import MetagraphOptimization.partners
|
||||
operations = get_operations(graph)
|
||||
@test length(graph.dirtyNodes) == 0
|
||||
|
||||
@test length(operations) ==
|
||||
(nodeFusions = 9, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(operations) == (nodeFusions = 9, nodeReductions = 0, nodeSplits = 0)
|
||||
@test !isempty(operations)
|
||||
|
||||
possibleNF = 9
|
||||
@ -185,14 +179,12 @@ import MetagraphOptimization.partners
|
||||
push_operation!(graph, first(operations.nodeFusions))
|
||||
operations = get_operations(graph)
|
||||
possibleNF = possibleNF - 1
|
||||
@test length(operations) ==
|
||||
(nodeFusions = possibleNF, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(operations) == (nodeFusions = possibleNF, nodeReductions = 0, nodeSplits = 0)
|
||||
end
|
||||
|
||||
@test isempty(operations)
|
||||
|
||||
@test length(operations) ==
|
||||
(nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(operations) == (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(graph.dirtyNodes) == 0
|
||||
@test length(graph.nodes) == 6
|
||||
@test length(graph.appliedOperations) == 10
|
||||
@ -213,8 +205,7 @@ import MetagraphOptimization.partners
|
||||
@test properties.computeIntensity ≈ 28 / 62
|
||||
|
||||
operations = get_operations(graph)
|
||||
@test length(operations) ==
|
||||
(nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
|
||||
@test length(operations) == (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0)
|
||||
|
||||
@test is_valid(graph)
|
||||
end
|
||||
|
@ -3,8 +3,7 @@
|
||||
nC1 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskU())
|
||||
nC2 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskV())
|
||||
nC3 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskP())
|
||||
nC4 =
|
||||
MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskSum())
|
||||
nC4 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskSum())
|
||||
|
||||
nD1 = MetagraphOptimization.make_node(MetagraphOptimization.DataTask(10))
|
||||
nD2 = MetagraphOptimization.make_node(MetagraphOptimization.DataTask(20))
|
||||
|
@ -5,9 +5,7 @@
|
||||
@test MetagraphOptimization.bytes_to_human_readable(1025) == "1.001 KiB"
|
||||
@test MetagraphOptimization.bytes_to_human_readable(684235) == "668.2 KiB"
|
||||
@test MetagraphOptimization.bytes_to_human_readable(86214576) == "82.22 MiB"
|
||||
@test MetagraphOptimization.bytes_to_human_readable(9241457698) ==
|
||||
"8.607 GiB"
|
||||
@test MetagraphOptimization.bytes_to_human_readable(3218598654367) ==
|
||||
"2.927 TiB"
|
||||
@test MetagraphOptimization.bytes_to_human_readable(9241457698) == "8.607 GiB"
|
||||
@test MetagraphOptimization.bytes_to_human_readable(3218598654367) == "2.927 TiB"
|
||||
end
|
||||
println("Utility Unit Tests Complete!")
|
||||
|
Reference in New Issue
Block a user