Compare commits
	
		
			13 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 43e1866988 | |||
| 4d1dc27f4f | |||
| 968f6856de | |||
| 7d31f61e5f | |||
| 705bfb30fe | |||
| 9d947a49ce | |||
| 1e0e699e6d | |||
| c73053f991 | |||
| 992450374c | |||
| 16274919e4 | |||
| 2709eeb3dc | |||
| 5a30f57e1f | |||
|  | bd6c54c1ae | 
| @@ -1,5 +1,5 @@ | ||||
| indent = 4 | ||||
| margin = 80 | ||||
| margin = 120 | ||||
| always_for_in = true | ||||
| for_in_replacement = "in" | ||||
| whitespace_typedefs = true | ||||
|   | ||||
| @@ -8,7 +8,7 @@ env: | ||||
|  | ||||
| jobs: | ||||
|   prepare: | ||||
|     runs-on: arch-latest | ||||
|     runs-on: ubuntu-22.04 | ||||
|  | ||||
|     steps: | ||||
|       - name: Checkout repository | ||||
| @@ -65,7 +65,7 @@ jobs: | ||||
|  | ||||
|   test: | ||||
|     needs: prepare | ||||
|     runs-on: arch-latest | ||||
|     runs-on: ubuntu-22.04 | ||||
|  | ||||
|     steps: | ||||
|       - name: Checkout repository | ||||
| @@ -108,7 +108,7 @@ jobs: | ||||
|  | ||||
|       - name: Format check | ||||
|         run: | | ||||
|           julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true)' | ||||
|           julia --project=./ -e 'using JuliaFormatter; format(".", verbose=true, ignore=[".julia/*"])' | ||||
|           julia --project=./ -e ' | ||||
|           out = Cmd(`git diff --name-only`) |> read |> String | ||||
|           if out == "" | ||||
| @@ -127,7 +127,7 @@ jobs: | ||||
|  | ||||
|   docs: | ||||
|     needs: prepare | ||||
|     runs-on: arch-latest | ||||
|     runs-on: ubuntu-22.04 | ||||
|  | ||||
|     steps: | ||||
|       - name: Checkout repository | ||||
|   | ||||
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -26,3 +26,5 @@ Manifest.toml | ||||
|  | ||||
| # vscode workspace directory | ||||
| .vscode | ||||
| .julia | ||||
| **/.ipynb_checkpoints/ | ||||
|   | ||||
| @@ -5,9 +5,15 @@ version = "0.1.0" | ||||
|  | ||||
| [deps] | ||||
| AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753" | ||||
| CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" | ||||
| DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" | ||||
| ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" | ||||
| JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" | ||||
| KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | ||||
| NumaAllocators = "21436f30-1b4a-4f08-87af-e26101bb5379" | ||||
| QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93" | ||||
| Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | ||||
| Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" | ||||
| UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" | ||||
|  | ||||
| [extras] | ||||
|   | ||||
| @@ -42,7 +42,7 @@ Problems: | ||||
| - Lots of testing required because mistakes will propagate and multiply. | ||||
|  | ||||
| ## Other TODOs | ||||
| - Reduce memory footprint of the graph, are the UUIDs too large?  | ||||
| - Reduce memory footprint of the graph | ||||
| - Memory layout of Nodes? They should lie linearly in memory, right now probably on heap? | ||||
| - Add scaling functions | ||||
|  | ||||
| @@ -53,7 +53,7 @@ For graphs AB->AB^n: | ||||
| - Number of ComputeTaskS2 should always be (n+1)! | ||||
| - Number of ComputeTaskU should always be (n+3) | ||||
|  | ||||
| Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz | ||||
| Times are from my home machine: AMD Ryzen 7900X3D, 64GB DDR5 RAM @ 6000MHz (not necessarily up to date, check Jupyter Notebooks in `notebooks/` instead) | ||||
|  | ||||
| ``` | ||||
| $ julia --project examples/import_bench.jl | ||||
|   | ||||
| @@ -27,6 +27,7 @@ makedocs( | ||||
|             "Diff" => "lib/internals/diff.md", | ||||
|             "Utility" => "lib/internals/utility.md", | ||||
|             "Code Generation" => "lib/internals/code_gen.md", | ||||
|             "Devices" => "lib/internals/devices.md", | ||||
|         ], | ||||
|         "Contribution" => "contribution.md", | ||||
|     ], | ||||
|   | ||||
							
								
								
									
										75
									
								
								docs/src/flowchart.drawio
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								docs/src/flowchart.drawio
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| <mxfile host="Electron" modified="2023-09-17T13:34:45.840Z" agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.1 Chrome/114.0.5735.134 Electron/25.6.0 Safari/537.36" etag="e0c8qLevhaP_q_R2fyC9" version="21.6.1" type="device"> | ||||
|   <diagram name="Page-1" id="Vy0cA1nkMPfy-3cC5ahA"> | ||||
|     <mxGraphModel dx="1185" dy="707" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="500" pageHeight="900" math="0" shadow="0"> | ||||
|       <root> | ||||
|         <mxCell id="0" /> | ||||
|         <mxCell id="1" parent="0" /> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-1" target="yG8qeggDCLqQ8GwY7ugi-2"> | ||||
|           <mxGeometry relative="1" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-1" value="Process Generator Script" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="180" y="120" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-11" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-2" target="yG8qeggDCLqQ8GwY7ugi-3"> | ||||
|           <mxGeometry relative="1" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-2" value="Process Parser" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="180" y="220" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-8" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.25;exitDx=0;exitDy=0;entryX=0;entryY=0.25;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-6"> | ||||
|           <mxGeometry relative="1" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-3" target="yG8qeggDCLqQ8GwY7ugi-12"> | ||||
|           <mxGeometry relative="1" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-3" value="Optimizer" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="180" y="320" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.75;entryDx=0;entryDy=0;exitX=0;exitY=0.75;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-6" target="yG8qeggDCLqQ8GwY7ugi-3"> | ||||
|           <mxGeometry relative="1" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-6" value="Fast Cost Estimator<br>(Global Metrics)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="340" y="320" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-13"> | ||||
|           <mxGeometry relative="1" as="geometry"> | ||||
|             <Array as="points"> | ||||
|               <mxPoint x="80" y="450" /> | ||||
|             </Array> | ||||
|           </mxGeometry> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-12" value="Scheduler" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="180" y="420" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0;entryY=0.5;entryDx=0;entryDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-13" target="yG8qeggDCLqQ8GwY7ugi-3"> | ||||
|           <mxGeometry relative="1" as="geometry"> | ||||
|             <mxPoint x="120" y="380" as="targetPoint" /> | ||||
|             <Array as="points"> | ||||
|               <mxPoint x="80" y="350" /> | ||||
|             </Array> | ||||
|           </mxGeometry> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-12" target="yG8qeggDCLqQ8GwY7ugi-18"> | ||||
|           <mxGeometry relative="1" as="geometry"> | ||||
|             <Array as="points"> | ||||
|               <mxPoint x="240" y="500" /> | ||||
|               <mxPoint x="240" y="500" /> | ||||
|             </Array> | ||||
|           </mxGeometry> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-13" value="Accurate Cost Estimator<br>(Machine Specific)" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="20" y="370" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-21" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;shadow=1;" edge="1" parent="1" source="yG8qeggDCLqQ8GwY7ugi-18" target="yG8qeggDCLqQ8GwY7ugi-20"> | ||||
|           <mxGeometry relative="1" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-18" value="Code Generator" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="180" y="520" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|         <mxCell id="yG8qeggDCLqQ8GwY7ugi-20" value="Executor" style="rounded=1;whiteSpace=wrap;html=1;shadow=1;" vertex="1" parent="1"> | ||||
|           <mxGeometry x="180" y="620" width="120" height="60" as="geometry" /> | ||||
|         </mxCell> | ||||
|       </root> | ||||
|     </mxGraphModel> | ||||
|   </diagram> | ||||
| </mxfile> | ||||
							
								
								
									
										59
									
								
								docs/src/lib/internals/devices.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								docs/src/lib/internals/devices.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| # Devices | ||||
|  | ||||
| ## Interface | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/interface.jl"] | ||||
| Order = [:type, :constant, :function] | ||||
| ``` | ||||
|  | ||||
| ## Detect | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/detect.jl"] | ||||
| Order = [:function] | ||||
| ``` | ||||
|  | ||||
| ## Measure | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/measure.jl"] | ||||
| Order = [:function] | ||||
| ``` | ||||
|  | ||||
| ## Implementations | ||||
|  | ||||
| ### General | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/impl.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ### NUMA | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/numa/impl.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ### CUDA | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/cuda/impl.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ### ROCm | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/rocm/impl.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ### oneAPI | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["devices/oneapi/impl.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
							
								
								
									
										21
									
								
								docs/src/lib/internals/estimator.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								docs/src/lib/internals/estimator.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| # Estimation | ||||
|  | ||||
| ## Interface | ||||
|  | ||||
| The interface that has to be implemented for an estimator. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["estimator/interafce.jl"] | ||||
| Order = [:type, :constant, :function] | ||||
| ``` | ||||
|  | ||||
| ## Global Metric Estimator | ||||
|  | ||||
| Implementation of a global metric estimator. It uses the graph properties compute effort, data transfer, and compute intensity. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["estimator/global_metric.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
| @@ -1,5 +1,21 @@ | ||||
| # Models | ||||
|  | ||||
| ## Interface | ||||
|  | ||||
| The interface that has to be implemented for a model to be usable is defined in `src/models/interface.jl`. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["models/interface.jl"] | ||||
| Order = [:type, :constant, :function] | ||||
| ``` | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["models/print.jl"] | ||||
| Order = [:function] | ||||
| ``` | ||||
|  | ||||
| ## ABC-Model | ||||
|  | ||||
| ### Types | ||||
| @@ -44,6 +60,13 @@ Pages = ["models/abc/compute.jl"] | ||||
| Order = [:function] | ||||
| ``` | ||||
|  | ||||
| ### Print | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["models/abc/print.jl"] | ||||
| Order = [:function] | ||||
| ``` | ||||
|  | ||||
| ## QED-Model | ||||
|  | ||||
| *To be added* | ||||
|   | ||||
							
								
								
									
										41
									
								
								docs/src/lib/internals/optimization.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								docs/src/lib/internals/optimization.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| # Optimization | ||||
|  | ||||
| ## Interface | ||||
|  | ||||
| The interface that has to be implemented for an optimization algorithm. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["optimization/interafce.jl"] | ||||
| Order = [:type, :constant, :function] | ||||
| ``` | ||||
|  | ||||
| ## Random Walk Optimizer | ||||
|  | ||||
| Implementation of a random walk algorithm. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["estimator/random_walk.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ## Reduction Optimizer | ||||
|  | ||||
| Implementation of a an optimizer that reduces as far as possible. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["estimator/reduce.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ## Greedy Optimizer | ||||
|  | ||||
| Implementation of a greedy optimization algorithm. | ||||
|  | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["estimator/greedy.jl"] | ||||
| Order = [:type, :function] | ||||
| ``` | ||||
							
								
								
									
										15
									
								
								docs/src/lib/internals/scheduler.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								docs/src/lib/internals/scheduler.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| # Scheduler | ||||
|  | ||||
| ## Interface | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["scheduler/interface.jl"] | ||||
| Order   = [:type, :function] | ||||
| ``` | ||||
|  | ||||
| ## Greedy | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["scheduler/greedy.jl"] | ||||
| Order   = [:type, :function] | ||||
| ``` | ||||
| @@ -21,6 +21,13 @@ Pages = ["task/compare.jl"] | ||||
| Order   = [:function] | ||||
| ``` | ||||
|  | ||||
| ## Compute | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
| Pages = ["task/compute.jl"] | ||||
| Order   = [:function] | ||||
| ``` | ||||
|  | ||||
| ## Properties | ||||
| ```@autodocs | ||||
| Modules = [MetagraphOptimization] | ||||
|   | ||||
| @@ -1,3 +1,7 @@ | ||||
| # Manual | ||||
|  | ||||
| This will become a manual. | ||||
| ## Jupyter Notebooks | ||||
|  | ||||
| In the `notebooks` directory are notebooks containing some examples of the usage of this repository. | ||||
|  | ||||
| - `abc_model_showcase`: A simple showcase of the intended usage of the ABC Model implementation. | ||||
|   | ||||
							
								
								
									
										33
									
								
								examples/ab5.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								examples/ab5.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| using MetagraphOptimization | ||||
| using BenchmarkTools | ||||
|  | ||||
| println("Getting machine info") | ||||
| @time machine = get_machine_info() | ||||
|  | ||||
| println("Making model") | ||||
| @time model = ABCModel() | ||||
|  | ||||
| println("Making process") | ||||
| process_str = "AB->ABBBBB" | ||||
| @time process = parse_process(process_str, model) | ||||
|  | ||||
| println("Parsing DAG") | ||||
| @time graph = parse_dag("input/$process_str.txt", model) | ||||
|  | ||||
| println("Generating input data") | ||||
| @time input_data = [gen_process_input(process) for _ in 1:1000] | ||||
|  | ||||
| println("Reducing graph") | ||||
| @time optimize_to_fixpoint!(ReductionOptimizer(), graph) | ||||
|  | ||||
| println("Generating compute function") | ||||
| @time compute_func = get_compute_function(graph, process, machine) | ||||
|  | ||||
| println("First run, single argument") | ||||
| @time compute_func(input_data[1]) | ||||
|  | ||||
| println("\nBenchmarking function, 1 input") | ||||
| display(@benchmark compute_func($(input_data[1]))) | ||||
|  | ||||
| println("\nBenchmarking function, 1000 inputs") | ||||
| display(@benchmark compute_func.($input_data)) | ||||
							
								
								
									
										33
									
								
								examples/ab7.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								examples/ab7.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| using MetagraphOptimization | ||||
| using BenchmarkTools | ||||
|  | ||||
| println("Getting machine info") | ||||
| @time machine = get_machine_info() | ||||
|  | ||||
| println("Making model") | ||||
| @time model = ABCModel() | ||||
|  | ||||
| println("Making process") | ||||
| process_str = "AB->ABBBBBBB" | ||||
| @time process = parse_process(process_str, model) | ||||
|  | ||||
| println("Parsing DAG") | ||||
| @time graph = parse_dag("input/$process_str.txt", model) | ||||
|  | ||||
| println("Generating input data") | ||||
| @time input_data = [gen_process_input(process) for _ in 1:1000] | ||||
|  | ||||
| println("Reducing graph") | ||||
| @time optimize_to_fixpoint!(ReductionOptimizer(), graph) | ||||
|  | ||||
| println("Generating compute function") | ||||
| @time compute_func = get_compute_function(graph, process, machine) | ||||
|  | ||||
| println("First run, single argument") | ||||
| @time compute_func(input_data[1]) | ||||
|  | ||||
| println("\nBenchmarking function, 1 input") | ||||
| display(@benchmark compute_func($(input_data[1]))) | ||||
|  | ||||
| println("\nBenchmarking function, 1000 inputs") | ||||
| display(@benchmark compute_func.($input_data)) | ||||
| @@ -13,16 +13,15 @@ function bench_txt(filepath::String, bench::Bool = true) | ||||
|         return | ||||
|     end | ||||
|  | ||||
|     model = ABCModel() | ||||
|  | ||||
|     println(name, ":") | ||||
|     g = parse_abc(filepath) | ||||
|     g = parse_dag(filepath, model) | ||||
|     print(g) | ||||
|     println( | ||||
|         "  Graph size in memory: ", | ||||
|         bytes_to_human_readable(MetagraphOptimization.mem(g)), | ||||
|     ) | ||||
|     println("  Graph size in memory: ", bytes_to_human_readable(MetagraphOptimization.mem(g))) | ||||
|  | ||||
|     if (bench) | ||||
|         @btime parse_abc($filepath) | ||||
|         @btime parse_dag($filepath, $model) | ||||
|     end | ||||
|  | ||||
|     println("  Get Operations: ") | ||||
|   | ||||
| @@ -12,7 +12,7 @@ function gen_plot(filepath) | ||||
|         return | ||||
|     end | ||||
|  | ||||
|     g = parse_abc(filepath) | ||||
|     g = parse_dag(filepath, ABCModel()) | ||||
|  | ||||
|     Random.seed!(1) | ||||
|  | ||||
| @@ -48,23 +48,10 @@ function gen_plot(filepath) | ||||
|  | ||||
|     println("\rDone.") | ||||
|  | ||||
|     plot( | ||||
|         [x[1], x[2]], | ||||
|         [y[1], y[2]], | ||||
|         linestyle = :solid, | ||||
|         linewidth = 1, | ||||
|         color = :red, | ||||
|         legend = false, | ||||
|     ) | ||||
|     plot([x[1], x[2]], [y[1], y[2]], linestyle = :solid, linewidth = 1, color = :red, legend = false) | ||||
|     # Create lines connecting the reference point to each data point | ||||
|     for i in 3:length(x) | ||||
|         plot!( | ||||
|             [x[i - 1], x[i]], | ||||
|             [y[i - 1], y[i]], | ||||
|             linestyle = :solid, | ||||
|             linewidth = 1, | ||||
|             color = :red, | ||||
|         ) | ||||
|         plot!([x[i - 1], x[i]], [y[i - 1], y[i]], linestyle = :solid, linewidth = 1, color = :red) | ||||
|     end | ||||
|  | ||||
|     return gui() | ||||
|   | ||||
| @@ -12,7 +12,7 @@ function gen_plot(filepath) | ||||
|         return | ||||
|     end | ||||
|  | ||||
|     g = parse_abc(filepath) | ||||
|     g = parse_dag(filepath, ABCModel()) | ||||
|  | ||||
|     Random.seed!(1) | ||||
|  | ||||
| @@ -60,14 +60,7 @@ function gen_plot(filepath) | ||||
|         push!(y, props.computeEffort) | ||||
|         pop_operation!(g) | ||||
|  | ||||
|         push!( | ||||
|             names, | ||||
|             "NF: (" * | ||||
|             string(props.data) * | ||||
|             ", " * | ||||
|             string(props.computeEffort) * | ||||
|             ")", | ||||
|         ) | ||||
|         push!(names, "NF: (" * string(props.data) * ", " * string(props.computeEffort) * ")") | ||||
|     end | ||||
|     for op in opt.nodeReductions | ||||
|         push_operation!(g, op) | ||||
| @@ -76,14 +69,7 @@ function gen_plot(filepath) | ||||
|         push!(y, props.computeEffort) | ||||
|         pop_operation!(g) | ||||
|  | ||||
|         push!( | ||||
|             names, | ||||
|             "NR: (" * | ||||
|             string(props.data) * | ||||
|             ", " * | ||||
|             string(props.computeEffort) * | ||||
|             ")", | ||||
|         ) | ||||
|         push!(names, "NR: (" * string(props.data) * ", " * string(props.computeEffort) * ")") | ||||
|     end | ||||
|     for op in opt.nodeSplits | ||||
|         push_operation!(g, op) | ||||
| @@ -92,33 +78,13 @@ function gen_plot(filepath) | ||||
|         push!(y, props.computeEffort) | ||||
|         pop_operation!(g) | ||||
|  | ||||
|         push!( | ||||
|             names, | ||||
|             "NS: (" * | ||||
|             string(props.data) * | ||||
|             ", " * | ||||
|             string(props.computeEffort) * | ||||
|             ")", | ||||
|         ) | ||||
|         push!(names, "NS: (" * string(props.data) * ", " * string(props.computeEffort) * ")") | ||||
|     end | ||||
|  | ||||
|     plot( | ||||
|         [x0, x[1]], | ||||
|         [y0, y[1]], | ||||
|         linestyle = :solid, | ||||
|         linewidth = 1, | ||||
|         color = :red, | ||||
|         legend = false, | ||||
|     ) | ||||
|     plot([x0, x[1]], [y0, y[1]], linestyle = :solid, linewidth = 1, color = :red, legend = false) | ||||
|     # Create lines connecting the reference point to each data point | ||||
|     for i in 2:length(x) | ||||
|         plot!( | ||||
|             [x0, x[i]], | ||||
|             [y0, y[i]], | ||||
|             linestyle = :solid, | ||||
|             linewidth = 1, | ||||
|             color = :red, | ||||
|         ) | ||||
|         plot!([x0, x[i]], [y0, y[i]], linestyle = :solid, linewidth = 1, color = :red) | ||||
|     end | ||||
|     #scatter!(x, y, label=names) | ||||
|  | ||||
|   | ||||
| @@ -1,59 +0,0 @@ | ||||
|  | ||||
| function test_random_walk(g::DAG, n::Int64) | ||||
|     # the purpose here is to do "random" operations and reverse them again and validate that the graph stays the same and doesn't diverge | ||||
|     reset_graph!(g) | ||||
|  | ||||
|     properties = get_properties(g) | ||||
|  | ||||
|     for i in 1:n | ||||
|         # choose push or pop | ||||
|         if rand(Bool) | ||||
|             # push | ||||
|             opt = get_operations(g) | ||||
|  | ||||
|             # choose one of fuse/split/reduce | ||||
|             option = rand(1:3) | ||||
|             if option == 1 && !isempty(opt.nodeFusions) | ||||
|                 push_operation!(g, rand(collect(opt.nodeFusions))) | ||||
|             elseif option == 2 && !isempty(opt.nodeReductions) | ||||
|                 push_operation!(g, rand(collect(opt.nodeReductions))) | ||||
|             elseif option == 3 && !isempty(opt.nodeSplits) | ||||
|                 push_operation!(g, rand(collect(opt.nodeSplits))) | ||||
|             else | ||||
|                 i = i - 1 | ||||
|             end | ||||
|         else | ||||
|             # pop | ||||
|             if (can_pop(g)) | ||||
|                 pop_operation!(g) | ||||
|             else | ||||
|                 i = i - 1 | ||||
|             end | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     return reset_graph!(g) | ||||
| end | ||||
|  | ||||
| function reduce_all!(g::DAG) | ||||
|     reset_graph!(g) | ||||
|  | ||||
|     opt = get_operations(g) | ||||
|     while (!isempty(opt.nodeReductions)) | ||||
|         push_operation!(g, pop!(opt.nodeReductions)) | ||||
|  | ||||
|         if (isempty(opt.nodeReductions)) | ||||
|             opt = get_operations(g) | ||||
|         end | ||||
|     end | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function reduce_one!(g::DAG) | ||||
|     opt = get_operations(g) | ||||
|     if !isempty(opt.nodeReductions) | ||||
|         push_operation!(g, pop!(opt.nodeReductions)) | ||||
|     end | ||||
|     opt = get_operations(g) | ||||
|     return nothing | ||||
| end | ||||
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										636
									
								
								notebooks/abc_model_large.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										636
									
								
								notebooks/abc_model_large.ipynb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,636 @@ | ||||
| { | ||||
|  "cells": [ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 1, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "using MetagraphOptimization" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Found 1 NUMA nodes\n", | ||||
|       "CUDA is non-functional\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Get machine and set dictionary caching strategy\n", | ||||
|     "machine = get_machine_info()\n", | ||||
|     "MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Graph:\n", | ||||
|       "  Nodes: Total: 7854, ComputeTaskP: 8, ComputeTaskS2: 720, \n", | ||||
|       "         ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 1230, \n", | ||||
|       "         ComputeTaskV: 1956, DataTask: 3931\n", | ||||
|       "  Edges: 11241\n", | ||||
|       "  Total Compute Effort: 33915.0\n", | ||||
|       "  Total Data Transfer: 322464.0\n", | ||||
|       "  Total Compute Intensity: 0.10517453111044954\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "model = ABCModel()\n", | ||||
|     "process_str = \"AB->ABBBBB\"\n", | ||||
|     "process = parse_process(process_str, model)\n", | ||||
|     "graph = parse_dag(\"../input/$process_str.txt\", model)\n", | ||||
|     "print(graph)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "compute__ae7097a4_7bfc_11ee_2cec_190d7ced64f1 (generic function with 1 method)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 7, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "compute_AB_AB5 = get_compute_function(graph, process, machine)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "  0.140021 seconds (791.41 k allocations: 30.317 MiB, 9.74% gc time)\n", | ||||
|       "Graph:\n", | ||||
|       "  Nodes: Total: 4998, ComputeTaskP: 8, ComputeTaskS2: 720, \n", | ||||
|       "         ComputeTaskU: 8, ComputeTaskSum: 1, ComputeTaskS1: 516, \n", | ||||
|       "         ComputeTaskV: 1242, DataTask: 2503\n", | ||||
|       "  Edges: 7671\n", | ||||
|       "  Total Compute Effort: 21777.0\n", | ||||
|       "  Total Data Transfer: 219648.0\n", | ||||
|       "  Total Compute Intensity: 0.09914499562937062\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "@time optimize_to_fixpoint!(ReductionOptimizer(), graph)\n", | ||||
|     "print(graph)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "  3.626740 seconds (1.52 M allocations: 114.358 MiB, 0.84% gc time)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad (generic function with 1 method)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 9, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "@time compute_AB_AB5_reduced = get_compute_function(graph, process, machine)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "  2.130952 seconds (4.31 M allocations: 276.129 MiB, 4.50% gc time, 99.02% compilation time)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "1000-element Vector{ABCProcessInput}:\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [4.694213004647641, 0.0, 0.0, 4.58646222408983]\n", | ||||
|        "    B: [4.694213004647641, 0.0, 0.0, -4.58646222408983]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.1989656045893697, -0.40235742161696864, 0.06512533692021122, 0.5209469423550988]\n", | ||||
|        "    B: [-1.2555060342925868, 0.3685683194051901, 0.4785890883121294, -0.4597882997907804]\n", | ||||
|        "    B: [-2.189083660521547, 0.31663070338411387, 0.1742479621961443, -1.9134967776579581]\n", | ||||
|        "    B: [-1.0637129314000269, -0.2948512505337184, 0.0500740340487307, -0.2050378784528044]\n", | ||||
|        "    B: [-1.6149410305664367, 1.0344652685816964, -0.406159957064284, 0.6106965118475143]\n", | ||||
|        "    B: [-2.0662167479253144, -1.0224556192203134, -0.3618764644129321, 1.4466795016989296]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [5.621657834589244, 0.0, 0.0, 5.532001157736559]\n", | ||||
|        "    B: [5.621657834589244, 0.0, 0.0, -5.532001157736559]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.058801595505931, 0.7220299456693885, 0.22719930902793095, 1.6327024349806234]\n", | ||||
|        "    B: [-1.1826215869997767, 0.04638669502532437, -0.553508153090363, -0.30011800516629]\n", | ||||
|        "    B: [-2.3776830758041227, -0.8637209881441633, -0.22710813067439403, 1.9636152272240621]\n", | ||||
|        "    B: [-1.9086249240920268, 0.02598092498567318, -1.087715954825374, -1.2079106316365085]\n", | ||||
|        "    B: [-2.6526208210236426, 0.3117066248738638, 1.6178469805428013, -1.8225826038033035]\n", | ||||
|        "    B: [-1.0629636657529868, -0.24238320241008685, 0.023285949019398133, -0.2657064215985837]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.176284774018432, 0.0, 0.0, 6.094792335245879]\n", | ||||
|        "    B: [6.176284774018432, 0.0, 0.0, -6.094792335245879]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-3.2943110238771185, 1.9799744259594443, 2.3805040294128346, 0.5151572192390796]\n", | ||||
|        "    B: [-1.0255775134941767, 0.18009906891836583, -0.12779691496180498, 0.05514988745120904]\n", | ||||
|        "    B: [-1.7854209452644407, -0.56381615584479, -0.9572322565407875, 0.9764966468120639]\n", | ||||
|        "    B: [-3.3312939695760786, -0.5949754252793171, -2.9420979921841868, -1.0428725518649993]\n", | ||||
|        "    B: [-1.6551651824618003, -0.8748451354288965, 0.9749427327758187, -0.1539624566503731]\n", | ||||
|        "    B: [-1.260800913363249, -0.12643677832480643, 0.6716804014981268, -0.34996874498697933]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [4.747497785190141, 0.0, 0.0, 4.640984294348053]\n", | ||||
|        "    B: [4.747497785190141, 0.0, 0.0, -4.640984294348053]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.3704329562088802, 0.8292801285050307, 0.2251475790952209, 0.3737506167990253]\n", | ||||
|        "    B: [-1.352958681672649, 0.11120507604905326, 0.6088733084867489, -0.6688825902852584]\n", | ||||
|        "    B: [-1.4224569379606473, -0.25277059018918374, -0.4925475402927904, -0.84669220478242]\n", | ||||
|        "    B: [-2.4534584066229996, -0.23638988525842838, -1.4120549440785204, 1.7232756047945383]\n", | ||||
|        "    B: [-1.4378719974624208, 0.5461758322111039, 0.8131489669135029, -0.3285674953530594]\n", | ||||
|        "    B: [-1.457816590452685, -0.9975005613175758, 0.257432629875838, -0.25288393117282576]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.148648417619223, 0.0, 0.0, 6.066784763240853]\n", | ||||
|        "    B: [6.148648417619223, 0.0, 0.0, -6.066784763240853]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.5381168736188293, 0.5769721565317305, 1.0069443436143835, 0.13773066601554382]\n", | ||||
|        "    B: [-1.3178580311796126, 0.27781510267038506, -0.8083323925420551, 0.07853217328003184]\n", | ||||
|        "    B: [-1.5330954954905804, 0.4994081736550063, -1.0290017953406905, 0.20525247761163526]\n", | ||||
|        "    B: [-3.083592979398096, -2.1497728433794587, -1.2247634566690573, -1.5449844205264607]\n", | ||||
|        "    B: [-3.1391572693216845, 0.49043306139044257, 2.931865230552653, 0.13397777318202247]\n", | ||||
|        "    B: [-1.6854761862296446, 0.30514434913189475, -0.876711929615233, 0.989491330437227]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [7.422637433466136, 0.0, 0.0, 7.35496746890785]\n", | ||||
|        "    B: [7.422637433466136, 0.0, 0.0, -7.35496746890785]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-3.3788591199517355, 2.3069724486616927, -0.5016400230094518, 2.2006645271171985]\n", | ||||
|        "    B: [-2.193241133599192, -1.652465184572841, -0.691853387986234, -0.7752447184070871]\n", | ||||
|        "    B: [-2.295315825041209, 0.334376552772819, 0.5374003175214306, 1.966689593293318]\n", | ||||
|        "    B: [-2.3721558149969235, -2.0813404180022568, 0.4923496733367945, 0.22964554029865022]\n", | ||||
|        "    B: [-1.5367714331999278, 0.9008878309070798, 0.1482895506792473, -0.7266895920420517]\n", | ||||
|        "    B: [-3.068931540143284, 0.1915687702335065, 0.015453869458212284, -2.8950653502600274]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.716486802754837, 0.0, 0.0, 6.64162592830851]\n", | ||||
|        "    B: [6.716486802754837, 0.0, 0.0, -6.64162592830851]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.3263331205917814, -0.5023870926274977, 0.418137178911541, 0.5761319775467438]\n", | ||||
|        "    B: [-2.1603199304697136, -1.202627416523187, 1.024176720111292, -1.0824654936733602]\n", | ||||
|        "    B: [-1.1665818595303201, 0.5747508534091106, 0.05041215840441908, 0.16743149576984034]\n", | ||||
|        "    B: [-1.829760754209137, 0.5127529745920416, -0.17835468593467171, -1.4329334983509001]\n", | ||||
|        "    B: [-2.891550940379351, -2.652621236308268, 0.3953841214715819, 0.41029113320086874]\n", | ||||
|        "    B: [-4.05842700032937, 3.2701319174577996, -1.7097554929641623, 1.3615443855068068]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [7.700331598721008, 0.0, 0.0, 7.635123229539995]\n", | ||||
|        "    B: [7.700331598721008, 0.0, 0.0, -7.635123229539995]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.382743739041896, -1.410381415274026, 1.0613871843128353, 1.2496996576655786]\n", | ||||
|        "    B: [-3.021630369232257, 0.25595209564405125, -2.8389223073732714, 0.07251720968504605]\n", | ||||
|        "    B: [-2.7262381500229256, 1.0736489469437192, 2.293577756890956, 0.13839603484966886]\n", | ||||
|        "    B: [-2.222260574660266, 1.5432031708495264, -0.7055857379280247, 1.0291330339668954]\n", | ||||
|        "    B: [-1.650055097318715, -1.062833285640475, -0.34598865120359784, 0.6880109623839291]\n", | ||||
|        "    B: [-3.397735267165956, -0.3995895125227963, 0.5355317553011019, -3.1777568985511193]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [4.9341647451125334, 0.0, 0.0, 4.8317679716550375]\n", | ||||
|        "    B: [4.9341647451125334, 0.0, 0.0, -4.8317679716550375]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.834221818900379, 0.1070495973399568, 1.2695354794210922, 0.860923766155068]\n", | ||||
|        "    B: [-1.5116322118250454, 0.39753882899610743, -0.756426277560466, -0.7448584495617266]\n", | ||||
|        "    B: [-1.6588475476725886, 0.06712527283179799, 0.6875031760830096, -1.1289857249063835]\n", | ||||
|        "    B: [-1.5718164783029667, 0.4294130824657117, -0.6215317131811225, -0.9486357444151968]\n", | ||||
|        "    B: [-1.7838526603309615, -0.5732435925039472, -0.9425541080554634, 0.9824020820472578]\n", | ||||
|        "    B: [-1.5079587731931232, -0.4278831891296266, 0.36347344329295106, 0.979154070680981]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [7.099667747066588, 0.0, 0.0, 7.028889109862067]\n", | ||||
|        "    B: [7.099667747066588, 0.0, 0.0, -7.028889109862067]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-3.851129225519823, 2.5555470019017212, -2.502060728335724, 1.019837214678957]\n", | ||||
|        "    B: [-2.3860288930086897, 0.6059782347076652, 0.6711053982516709, 1.9686395814801452]\n", | ||||
|        "    B: [-1.9543999030878276, -1.5857282951514855, 0.5255033921941499, -0.17026726032362857]\n", | ||||
|        "    B: [-1.5523812781985644, -1.154244859738803, 0.03484928145183679, -0.2763909626783212]\n", | ||||
|        "    B: [-3.2795110937910716, -1.0290377989842119, 1.3607888704851536, -2.616204860580336]\n", | ||||
|        "    B: [-1.175885100527199, 0.6074857172651138, -0.09018621404708665, 0.07438628742318319]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.3653048194550985, 0.0, 0.0, 6.286263233796236]\n", | ||||
|        "    B: [6.3653048194550985, 0.0, 0.0, -6.286263233796236]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-3.274142279992413, -2.62046758782023, -1.339558866223036, 1.028950598785383]\n", | ||||
|        "    B: [-1.8502190446152251, -1.1967169760014287, 0.8476370040459147, 0.5221977611776395]\n", | ||||
|        "    B: [-1.3090919645484567, 0.8304076910302604, -0.132118345313184, 0.08178985973111547]\n", | ||||
|        "    B: [-1.7699077332157842, 0.8156249668276708, -0.2891156025546255, 1.1763254081859622]\n", | ||||
|        "    B: [-1.6671330761442815, 1.2573648831500233, 0.2190135291489001, -0.3878135096217862]\n", | ||||
|        "    B: [-2.8601155403940384, 0.913787022813704, 0.6941422808960306, -2.421450118258315]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [5.2620105860572215, 0.0, 0.0, 5.166116085395126]\n", | ||||
|        "    B: [5.2620105860572215, 0.0, 0.0, -5.166116085395126]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.9479176369516882, 0.8861257045164052, 1.1018829783040076, 0.8916379636750793]\n", | ||||
|        "    B: [-1.2433791528628988, 0.41365857789168176, 0.544699730060495, -0.27960776595565956]\n", | ||||
|        "    B: [-1.074755543453127, 0.3002469943380598, 0.01041159782849033, 0.25464253219924826]\n", | ||||
|        "    B: [-1.7453891507499704, 1.1576089006622574, 0.03134512003430503, -0.8398466551182168]\n", | ||||
|        "    B: [-1.5208938996272057, 0.008686514238768405, -1.1440782944999142, -0.06424682441800389]\n", | ||||
|        "    B: [-2.991685788469555, -2.7663266916471727, -0.544261131727384, 0.03742074961755215]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [4.439668869119513, 0.0, 0.0, 4.325582003318043]\n", | ||||
|        "    B: [4.439668869119513, 0.0, 0.0, -4.325582003318043]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.1969832203303146, 0.48265768801558717, -0.02482335564392214, 0.4463117598342591]\n", | ||||
|        "    B: [-1.7251727113760817, -1.0744400415092346, 0.6322269398265393, 0.6496834443295479]\n", | ||||
|        "    B: [-1.419669052608684, -0.4173084301546306, -0.44626125418717505, -0.8013518491074973]\n", | ||||
|        "    B: [-1.331289111993432, -0.7645577006899625, -0.3423664341778722, 0.2656453402118452]\n", | ||||
|        "    B: [-1.5156451020746182, 0.6491857388484042, 0.8955487542892042, -0.2715333876518423]\n", | ||||
|        "    B: [-1.6905785398558963, 1.1244627454898357, -0.7143246501067739, -0.2887553076163127]\n", | ||||
|        "\n", | ||||
|        " ⋮\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [5.750717080737416, 0.0, 0.0, 5.663104002460582]\n", | ||||
|        "    B: [5.750717080737416, 0.0, 0.0, -5.663104002460582]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.0362067302993534, 0.23737037129807034, 0.1316212944823847, 0.007451817649030921]\n", | ||||
|        "    B: [-3.597917991072113, -1.5787159301449987, 0.28387609057144564, 3.0613860010767477]\n", | ||||
|        "    B: [-1.0798303035395174, -0.06880694215947386, -0.2669312876106363, -0.3000779512850572]\n", | ||||
|        "    B: [-1.3394551212059678, -0.7053379424304421, 0.44160810884651497, -0.3187799976376953]\n", | ||||
|        "    B: [-3.270241523195321, 1.927780354010675, 0.003047457202140131, -2.4450221348130854]\n", | ||||
|        "    B: [-1.1777824921625586, 0.1877100894261692, -0.5932216634918489, -0.004957734989940532]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.84577391627276, 0.0, 0.0, 6.772342320993563]\n", | ||||
|        "    B: [6.84577391627276, 0.0, 0.0, -6.772342320993563]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.0594956991232163, -0.09579189209396338, 0.21296650876679918, 0.2607687021353065]\n", | ||||
|        "    B: [-1.8300488673592041, 0.8497425690197566, -0.8227483588311224, 0.9747315329664396]\n", | ||||
|        "    B: [-2.860723394379955, 0.6743651794772785, 0.1320397309862766, 2.5906631300310776]\n", | ||||
|        "    B: [-2.557528905485892, -1.3508678766931497, 1.2829278224554168, -1.4388211440218013]\n", | ||||
|        "    B: [-3.790115184858299, 0.47588521284738383, -1.0334447791446917, -3.474262262286086]\n", | ||||
|        "    B: [-1.5936357813389537, -0.553333192557306, 0.2282590757673212, 1.086920041175065]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.25909007687458, 0.0, 0.0, 6.178689876537731]\n", | ||||
|        "    B: [6.25909007687458, 0.0, 0.0, -6.178689876537731]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.15208406752572, -0.27987613820502405, 0.20983197963180572, -1.873260718983155]\n", | ||||
|        "    B: [-3.1436326945514232, -2.0821664144960677, -1.9679549582157083, 0.8210741885063981]\n", | ||||
|        "    B: [-2.206056617746511, 1.7689323832663284, -0.4273996865759156, -0.7449117612507478]\n", | ||||
|        "    B: [-1.8709609004510535, 0.5332842722412897, 1.48760475220818, -0.055988188078690854]\n", | ||||
|        "    B: [-1.0916331546903268, 0.018218872767661307, 0.4300802089857822, 0.07976234031782706]\n", | ||||
|        "    B: [-2.0538127187841235, 0.04160702442581186, 0.2678377039658561, 1.7733241394883685]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.8752382625158255, 0.0, 0.0, 6.802124753807565]\n", | ||||
|        "    B: [6.8752382625158255, 0.0, 0.0, -6.802124753807565]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-3.815955448364548, 1.7284392485789066, 3.22998101457395, -0.37581430702794955]\n", | ||||
|        "    B: [-3.705003390432734, 0.8773209536576554, -3.1633610279519866, -1.3966048382509024]\n", | ||||
|        "    B: [-1.4798429985544235, -0.876885056483666, -0.05155962504198175, 0.6467994303891397]\n", | ||||
|        "    B: [-1.196598159149068, -0.6492448407423084, 0.0066213036625077295, -0.10141227532326653]\n", | ||||
|        "    B: [-1.307725757451199, -0.47623875265044, -0.08939192779758245, -0.6894580410872709]\n", | ||||
|        "    B: [-2.2453507710796776, -0.6033915523601473, 0.06771026255509205, 1.91649003130025]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.591382068439754, 0.0, 0.0, 6.515083849970707]\n", | ||||
|        "    B: [6.591382068439754, 0.0, 0.0, -6.515083849970707]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.166341377746586, 0.738656605699622, 1.1097711420427974, -1.3841348908550482]\n", | ||||
|        "    B: [-1.9136122405957643, -1.3687809690739081, -0.8052302154690981, 0.37410528752561706]\n", | ||||
|        "    B: [-1.020282522629639, 0.01566959851558055, -0.04103060943002397, -0.1976040959992001]\n", | ||||
|        "    B: [-3.3680104240574718, -0.44221430614525714, -3.1855463435158966, -0.015336796039828009]\n", | ||||
|        "    B: [-1.1380460439601876, 0.33787512483866744, -0.3053034033656307, 0.2962752606648943]\n", | ||||
|        "    B: [-3.576471527889859, 0.7187939461652956, 3.227339429737853, 0.9266952347035636]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [7.366791305680796, 0.0, 0.0, 7.298603574756898]\n", | ||||
|        "    B: [7.366791305680796, 0.0, 0.0, -7.298603574756898]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.1161936134323496, 0.1815174250263101, -0.30155987378038246, 0.34928677273057857]\n", | ||||
|        "    B: [-1.1768168637671912, -0.488638136596838, -0.0387546058981897, 0.38030091090042567]\n", | ||||
|        "    B: [-3.8756829146246745, -0.22123631639903027, -3.6727532274395425, -0.694878606198396]\n", | ||||
|        "    B: [-1.4161987387916468, -0.42653096897021076, -0.26480462532703347, -0.8680833546784509]\n", | ||||
|        "    B: [-3.4638938410201177, 2.8217659294852746, 1.2824429941168167, 1.179634497585545]\n", | ||||
|        "    B: [-3.6847966397256138, -1.8668779325455054, 2.995429338328331, -0.346260220339702]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [4.762032860651893, 0.0, 0.0, 4.655851905497903]\n", | ||||
|        "    B: [4.762032860651893, 0.0, 0.0, -4.655851905497903]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.656166654414924, 2.017338594394486, -1.384735065574992, 0.2609120345236529]\n", | ||||
|        "    B: [-1.031990140619295, -0.035004877965791346, -0.20112979442869375, 0.15272561883031827]\n", | ||||
|        "    B: [-1.7319386082994335, -1.0359644740176492, 0.8025718625008718, -0.5312883934487891]\n", | ||||
|        "    B: [-1.7450617894727098, -0.49163856285061436, 1.1666756465784553, 0.6651316473275205]\n", | ||||
|        "    B: [-1.0945973465763637, -0.42438631366397905, -0.017047995524507212, 0.1332252744613839]\n", | ||||
|        "    B: [-1.2643111819210613, -0.030344365896452122, -0.3663346535511349, -0.6807061816940867]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.12211537837656, 0.0, 0.0, 6.039892110473065]\n", | ||||
|        "    B: [6.12211537837656, 0.0, 0.0, -6.039892110473065]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.09449973649211, -1.247911941781509, -0.776547530016726, 1.1075282684200622]\n", | ||||
|        "    B: [-2.857971140758051, 1.4507115887866229, 2.2078617054725442, 0.43449006556414854]\n", | ||||
|        "    B: [-2.068918524386865, -0.43350532192333185, 1.7407499017717505, -0.24957318745593]\n", | ||||
|        "    B: [-1.0503370840395667, 0.28162676024293815, -0.11219953076948735, 0.10632790470480236]\n", | ||||
|        "    B: [-1.6648953051752136, 0.3171875953909028, -1.2925202016854087, 0.025689195388605857]\n", | ||||
|        "    B: [-2.5076089659013125, -0.36810868071562286, -1.7673443447726724, -1.4244622466216894]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [7.431058837653249, 0.0, 0.0, 7.363466265874004]\n", | ||||
|        "    B: [7.431058837653249, 0.0, 0.0, -7.363466265874004]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.4340725727125623, 0.9525417282027518, 0.38239995291064965, -0.05476016666222433]\n", | ||||
|        "    B: [-3.5734117962040854, 2.3267511116139916, 2.49915109639257, -0.33127771922267657]\n", | ||||
|        "    B: [-2.3529075757582945, 1.185265706342765, -1.375530715171772, 1.1132091075119688]\n", | ||||
|        "    B: [-2.710381815585542, -2.1195780947035594, -1.2974231675570782, -0.4126153305389483]\n", | ||||
|        "    B: [-2.374272199256637, -1.2400410368129877, 1.6839473809113144, -0.5136028830766439]\n", | ||||
|        "    B: [-2.4170717157893766, -1.104939414642962, -1.8925445474856835, 0.1990469919885247]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [4.370360958267613, 0.0, 0.0, 4.254415930013168]\n", | ||||
|        "    B: [4.370360958267613, 0.0, 0.0, -4.254415930013168]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.0037967551530176, -0.04979456910726583, -0.007092097585518878, 0.07126098999442977]\n", | ||||
|        "    B: [-2.2427356029926337, 0.4432886498747459, -1.2315068062419472, -1.522087101319342]\n", | ||||
|        "    B: [-1.576810353663218, -0.08400160217698217, 1.025238316808337, 0.6543401378482231]\n", | ||||
|        "    B: [-1.1878570602356244, 0.3852696171578499, -0.47734716319323317, 0.18630996601909597]\n", | ||||
|        "    B: [-1.6436772930583505, -1.0018521094453126, 0.4216069097815019, 0.7212593210074284]\n", | ||||
|        "    B: [-1.0858448514323804, 0.3070900136969648, 0.26910084043086047, -0.11108331354983517]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [5.940760429560125, 0.0, 0.0, 5.855991332082674]\n", | ||||
|        "    B: [5.940760429560125, 0.0, 0.0, -5.855991332082674]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-2.5515863925730233, 0.0574036477190863, 1.9321385747234918, 1.3319678930281418]\n", | ||||
|        "    B: [-3.2707523737124977, -2.710802011299676, -1.41016923110446, -0.6006632045712658]\n", | ||||
|        "    B: [-1.6965910302662786, 0.9846458960035911, 0.9504416414719069, -0.07452697242920955]\n", | ||||
|        "    B: [-1.0283520810617242, 0.1620200166783027, 0.15874691422324994, -0.07782630689000514]\n", | ||||
|        "    B: [-1.277724475991329, 0.26836143674120055, -0.33222621981983513, -0.6709602929248032]\n", | ||||
|        "    B: [-2.0565145055153993, 1.2383710141574962, -1.298931679494354, 0.09200888378714224]\n", | ||||
|        "\n", | ||||
|        " Input for ABC Process: 'AB->ABBBBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [6.732994664701373, 0.0, 0.0, 6.65831939417877]\n", | ||||
|        "    B: [6.732994664701373, 0.0, 0.0, -6.65831939417877]\n", | ||||
|        "  6 Outgoing Particles:\n", | ||||
|        "    A: [-1.602557260532173, -0.06659157948757613, 0.9308846463293637, -0.8349904850080558]\n", | ||||
|        "    B: [-1.3205375883536927, 0.7078592481114431, -0.05631226213188625, -0.48947291677035515]\n", | ||||
|        "    B: [-1.7625153098951976, 0.12706601232750347, 0.34097061443470383, 1.405010137407617]\n", | ||||
|        "    B: [-2.7792473938949334, 1.6510422215054068, 1.7155538904747691, -1.0272051928194055]\n", | ||||
|        "    B: [-2.722083339444658, -0.5204063912580275, -2.061236049180356, -1.3748530264647703]\n", | ||||
|        "    B: [-3.279048437282091, -1.89896951119875, -0.8698608399265956, 2.3215114836549695]\n" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 10, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "@time inputs = [gen_process_input(process) for _ in 1:1000]" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 14, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Internal error: stack overflow in type inference of materialize(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(MetagraphOptimization.compute__bad8f2ac_7bfc_11ee_176b_b72dc8919aad), Tuple{Array{MetagraphOptimization.ABCProcessInput, 1}}}).\n", | ||||
|       "This might be caused by recursion over very long tuples or argument lists.\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "ename": "LoadError", | ||||
|      "evalue": "StackOverflowError:", | ||||
|      "output_type": "error", | ||||
|      "traceback": [ | ||||
|       "StackOverflowError:", | ||||
|       "", | ||||
|       "Stacktrace:", | ||||
|       "    [1] argtypes_to_type", | ||||
|       "      @ ./compiler/typeutils.jl:71 [inlined]", | ||||
|       "    [2] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1948", | ||||
|       "    [3] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020", | ||||
|       "    [4] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1566", | ||||
|       "    [5] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1855", | ||||
|       "    [6] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020", | ||||
|       "    [7] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1999", | ||||
|       "    [8] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2183", | ||||
|       "    [9] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2396", | ||||
|       "   [10] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2682", | ||||
|       "   [11] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2867", | ||||
|       "   [12] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2955", | ||||
|       "   [13] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:246", | ||||
|       "   [14] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:216", | ||||
|       "   [15] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:932", | ||||
|       "   [16] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:611", | ||||
|       "   [17] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:152", | ||||
|       "   [18] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1949", | ||||
|       "--- the last 16 lines are repeated 413 more times ---", | ||||
|       " [6627] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020", | ||||
|       " [6628] abstract_apply(interp::Core.Compiler.NativeInterpreter, argtypes::Vector{Any}, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1566", | ||||
|       " [6629] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1855", | ||||
|       " [6630] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020", | ||||
|       " [6631] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1999", | ||||
|       " [6632] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2183", | ||||
|       " [6633] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2396", | ||||
|       " [6634] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2658", | ||||
|       " [6635] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2867", | ||||
|       " [6636] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2955", | ||||
|       " [6637] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:246", | ||||
|       " [6638] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:216", | ||||
|       " [6639] typeinf_edge(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector, caller::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:932", | ||||
|       " [6640] abstract_call_method(interp::Core.Compiler.NativeInterpreter, method::Method, sig::Any, sparams::Core.SimpleVector, hardlimit::Bool, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:611", | ||||
|       " [6641] abstract_call_gf_by_type(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, atype::Any, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:152", | ||||
|       " [6642] abstract_call_known(interp::Core.Compiler.NativeInterpreter, f::Any, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Int64)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1949", | ||||
|       " [6643] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState, max_methods::Nothing)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2020", | ||||
|       " [6644] abstract_call(interp::Core.Compiler.NativeInterpreter, arginfo::Core.Compiler.ArgInfo, si::Core.Compiler.StmtInfo, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:1999", | ||||
|       " [6645] abstract_eval_statement_expr(interp::Core.Compiler.NativeInterpreter, e::Expr, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState, mi::Nothing)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2183", | ||||
|       " [6646] abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Core.Compiler.VarState}, sv::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2396", | ||||
|       " [6647] abstract_eval_basic_statement(interp::Core.Compiler.NativeInterpreter, stmt::Any, pc_vartable::Vector{Core.Compiler.VarState}, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2682", | ||||
|       " [6648] typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2867", | ||||
|       " [6649] typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/abstractinterpretation.jl:2955", | ||||
|       " [6650] _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:246", | ||||
|       " [6651] typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:216", | ||||
|       " [6652] typeinf", | ||||
|       "      @ ./compiler/typeinfer.jl:12 [inlined]", | ||||
|       " [6653] typeinf_type(interp::Core.Compiler.NativeInterpreter, method::Method, atype::Any, sparams::Core.SimpleVector)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:1079", | ||||
|       " [6654] return_type(interp::Core.Compiler.NativeInterpreter, t::DataType)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:1140", | ||||
|       " [6655] return_type(f::Any, t::DataType)", | ||||
|       "      @ Core.Compiler ./compiler/typeinfer.jl:1112", | ||||
|       " [6656] combine_eltypes(f::Function, args::Tuple{Vector{ABCProcessInput}})", | ||||
|       "      @ Base.Broadcast ./broadcast.jl:730", | ||||
|       " [6657] copy(bc::Base.Broadcast.Broadcasted{Style}) where Style", | ||||
|       "      @ Base.Broadcast ./broadcast.jl:895", | ||||
|       " [6658] materialize(bc::Base.Broadcast.Broadcasted)", | ||||
|       "      @ Base.Broadcast ./broadcast.jl:873", | ||||
|       " [6659] var\"##core#302\"()", | ||||
|       "      @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:489", | ||||
|       " [6660] var\"##sample#303\"(::Tuple{}, __params::BenchmarkTools.Parameters)", | ||||
|       "      @ Main ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:495", | ||||
|       " [6661] _run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; verbose::Bool, pad::String, kwargs::Base.Pairs{Symbol, Integer, NTuple{4, Symbol}, NamedTuple{(:samples, :evals, :gctrial, :gcsample), Tuple{Int64, Int64, Bool, Bool}}})", | ||||
|       "      @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:99", | ||||
|       " [6662] #invokelatest#2", | ||||
|       "      @ ./essentials.jl:821 [inlined]", | ||||
|       " [6663] invokelatest", | ||||
|       "      @ ./essentials.jl:816 [inlined]", | ||||
|       " [6664] #run_result#45", | ||||
|       "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]", | ||||
|       " [6665] run_result", | ||||
|       "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:34 [inlined]", | ||||
|       " [6666] run(b::BenchmarkTools.Benchmark, p::BenchmarkTools.Parameters; progressid::Nothing, nleaves::Float64, ndone::Float64, kwargs::Base.Pairs{Symbol, Integer, NTuple{5, Symbol}, NamedTuple{(:verbose, :samples, :evals, :gctrial, :gcsample), Tuple{Bool, Int64, Int64, Bool, Bool}}})", | ||||
|       "      @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117", | ||||
|       " [6667] run (repeats 2 times)", | ||||
|       "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:117 [inlined]", | ||||
|       " [6668] #warmup#54", | ||||
|       "      @ ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:169 [inlined]", | ||||
|       " [6669] warmup(item::BenchmarkTools.Benchmark)", | ||||
|       "      @ BenchmarkTools ~/.julia/packages/BenchmarkTools/0owsb/src/execution.jl:168" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "using BenchmarkTools\n", | ||||
|     "#compute_bench = @benchmark compute_AB_AB5.(inputs)\n", | ||||
|     "compute_bench_reduced = @benchmark compute_AB_AB5_reduced.(inputs)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   } | ||||
|  ], | ||||
|  "metadata": { | ||||
|   "kernelspec": { | ||||
|    "display_name": "Julia 1.9.3", | ||||
|    "language": "julia", | ||||
|    "name": "julia-1.9" | ||||
|   }, | ||||
|   "language_info": { | ||||
|    "file_extension": ".jl", | ||||
|    "mimetype": "application/julia", | ||||
|    "name": "julia", | ||||
|    "version": "1.9.3" | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 4 | ||||
| } | ||||
							
								
								
									
										407
									
								
								notebooks/abc_model_showcase.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										407
									
								
								notebooks/abc_model_showcase.ipynb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,407 @@ | ||||
| { | ||||
|  "cells": [ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "id": "20768e45-df62-4638-ba33-b0ccf239f1aa", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "using Revise\n", | ||||
|     "using MetagraphOptimization\n", | ||||
|     "using BenchmarkTools" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "id": "ff5f4a49", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Found 1 NUMA nodes\n", | ||||
|       "CUDA is non-functional\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Machine(MetagraphOptimization.AbstractDevice[MetagraphOptimization.NumaNode(0x0000, 0x0001, MetagraphOptimization.LocalVariables(), -1.0, UUID(\"a89974f6-6212-11ee-0866-0f591a3b69ea\"))], [-1.0;;])" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 3, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Get our machine's info\n", | ||||
|     "machine = get_machine_info()" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "id": "9df482a4-ca44-44c5-9ea7-7a2977d529be", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "ABCModel()" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 4, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Create a model identifier\n", | ||||
|     "model = ABCModel()" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "id": "30b16872-07f7-4d47-8ff8-8c3a849c9d4e", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "ABC Process: 'AB->ABBB'" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 5, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Create a process in our model\n", | ||||
|     "process_str = \"AB->ABBB\"\n", | ||||
|     "process = parse_process(process_str, model)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "id": "043bd9e2-f89a-4362-885a-8c89d4cdd76f", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Total: 280, ComputeTaskP" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Graph:\n", | ||||
|        "  Nodes: \n", | ||||
|        "  Edges: 385\n", | ||||
|        "  Total Compute Effort: 1075.0\n", | ||||
|        "  Total Data Transfer: 10944.0\n", | ||||
|        "  Total Compute Intensity: 0.09822733918128655\n" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 6, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     }, | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       ": 6, ComputeTaskU: 6, \n", | ||||
|       "         ComputeTaskV: 64, ComputeTaskSum: 1, ComputeTaskS2: 24, \n", | ||||
|       "         ComputeTaskS1: 36, DataTask: 143" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Read the graph (of the same process) from a file\n", | ||||
|     "graph = parse_dag(\"../input/$process_str.txt\", model)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "id": "02f01ad3-fd10-48d5-a0e0-c03dc83c80a4", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "Input for ABC Process: 'AB->ABBB':\n", | ||||
|        "  2 Incoming particles:\n", | ||||
|        "    A: [5.77986599979293, 0.0, 0.0, 5.692701553354288]\n", | ||||
|        "    B: [5.77986599979293, 0.0, 0.0, -5.692701553354288]\n", | ||||
|        "  4 Outgoing Particles:\n", | ||||
|        "    A: [-3.8835293143673746, -1.4292027910861678, 2.8576090179942106, 1.968057422378813]\n", | ||||
|        "    B: [-1.1554024905063585, -0.1464656500147254, -0.2082400426692148, 0.5197487980391896]\n", | ||||
|        "    B: [-2.849749730594798, -1.0177034035100576, -2.464951858896686, -0.09677625137882176]\n", | ||||
|        "    B: [-3.6710504641173287, 2.5933718446109513, -0.1844171164283155, -2.391029969039186]\n" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 7, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Generate some random input data for our process\n", | ||||
|     "input_data = gen_process_input(process)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "id": "083fb1be-ce2a-47f9-afb9-60a6fdfaed0b", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "compute__af4450a2_6212_11ee_2601_cde7cf2aedc1 (generic function with 1 method)" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 8, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Get the function computing the result of the process from a ProcessInput\n", | ||||
|     "AB_AB3_compute = get_compute_function(graph, process, machine)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "id": "a40c9500-8f79-4f04-b3c5-59b72a6b7ba9", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "-1.8924431710735022e-13" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 9, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Actually compute a result using the generated function and the input data\n", | ||||
|     "result = AB_AB3_compute(input_data)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "id": "80c70010", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# We can also mute the graph by applying some operations to it\n", | ||||
|     "optimize_to_fixpoint!(ReductionOptimizer(), graph)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 11, | ||||
|    "id": "5b192b44", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# The result should be the same as before (we can use execute to save having to generate the function ourselves)\n", | ||||
|     "@assert result ≈ execute(graph, process, machine, input_data)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 12, | ||||
|    "id": "9b2f4a3f", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "1000-element Vector{Float64}:\n", | ||||
|        " -2.1491995259940396e-11\n", | ||||
|        " -1.04995646459455e-11\n", | ||||
|        "  5.821760691187782e-15\n", | ||||
|        " -6.556969485683705e-14\n", | ||||
|        " -1.3588086164373753e-14\n", | ||||
|        " -1.8789662441593694e-13\n", | ||||
|        " -2.131973301835892e-13\n", | ||||
|        " -5.3359759072004825e-12\n", | ||||
|        " -9.053914191490223e-13\n", | ||||
|        " -5.61107901706923e-13\n", | ||||
|        " -5.063492275603428e-11\n", | ||||
|        "  2.9168508985811397e-15\n", | ||||
|        " -1.6420151378194157e-13\n", | ||||
|        "  ⋮\n", | ||||
|        "  1.0931677247833436e-13\n", | ||||
|        " -7.704755306462797e-16\n", | ||||
|        " -1.8385907037491397e-12\n", | ||||
|        " -6.036215596560059e-14\n", | ||||
|        " -9.98872401400362e-12\n", | ||||
|        "  3.4861755637292935e-13\n", | ||||
|        " -1.1051119822969222e-10\n", | ||||
|        " -2.496572513216201e-12\n", | ||||
|        " -3.8682427847201926e-11\n", | ||||
|        "  7.904149696653438e-15\n", | ||||
|        " -7.606811743178716e-11\n", | ||||
|        " -5.100594937480292e-13" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 12, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# Now we can generate a function and use it on lots of inputs\n", | ||||
|     "inputs = [gen_process_input(process) for _ in 1:1000]\n", | ||||
|     "AB_AB3_reduced_compute = get_compute_function(graph, process, machine)\n", | ||||
|     "\n", | ||||
|     "results = AB_AB3_reduced_compute.(inputs)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 13, | ||||
|    "id": "d43e4ff0", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "BenchmarkTools.Trial: 879 samples with 1 evaluation.\n", | ||||
|        " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m4.567 ms\u001b[22m\u001b[39m … \u001b[35m14.334 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 54.51%\n", | ||||
|        " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m4.998 ms              \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m0.00%\n", | ||||
|        " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m5.686 ms\u001b[22m\u001b[39m ± \u001b[32m 1.414 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m9.09% ± 14.49%\n", | ||||
|        "\n", | ||||
|        "  \u001b[39m \u001b[39m \u001b[39m▃\u001b[39m▇\u001b[39m█\u001b[34m▅\u001b[39m\u001b[39m▄\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", | ||||
|        "  \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[39m▇\u001b[32m█\u001b[39m\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▄\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▄\u001b[39m▅\u001b[39m▆\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▁\u001b[39m▅\u001b[39m▄\u001b[39m▄\u001b[39m▅\u001b[39m▁\u001b[39m▄\u001b[39m \u001b[39m▇\n", | ||||
|        "  4.57 ms\u001b[90m      \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m       10 ms \u001b[0m\u001b[1m<\u001b[22m\n", | ||||
|        "\n", | ||||
|        " Memory estimate\u001b[90m: \u001b[39m\u001b[33m6.17 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m143006\u001b[39m." | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 13, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "@benchmark results = AB_AB3_compute.($inputs)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 14, | ||||
|    "id": "e18d9546", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "BenchmarkTools.Trial: 1089 samples with 1 evaluation.\n", | ||||
|        " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m3.637 ms\u001b[22m\u001b[39m … \u001b[35m10.921 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 59.52%\n", | ||||
|        " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m4.098 ms              \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m 0.00%\n", | ||||
|        " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m4.587 ms\u001b[22m\u001b[39m ± \u001b[32m 1.334 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m10.21% ± 15.77%\n", | ||||
|        "\n", | ||||
|        "  \u001b[39m \u001b[39m▂\u001b[39m▆\u001b[39m▆\u001b[39m▇\u001b[34m█\u001b[39m\u001b[39m▆\u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m▁\u001b[39m▁\u001b[39m \u001b[39m▁\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", | ||||
|        "  \u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m▇\u001b[32m▆\u001b[39m\u001b[39m▅\u001b[39m▇\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▅\u001b[39m▆\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m█\u001b[39m█\u001b[39m▆\u001b[39m▆\u001b[39m▆\u001b[39m█\u001b[39m█\u001b[39m▇\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m \u001b[39m█\n", | ||||
|        "  3.64 ms\u001b[90m      \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m     8.78 ms \u001b[0m\u001b[1m<\u001b[22m\n", | ||||
|        "\n", | ||||
|        " Memory estimate\u001b[90m: \u001b[39m\u001b[33m5.26 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m123006\u001b[39m." | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 14, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "@benchmark results = AB_AB3_reduced_compute.($inputs)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 16, | ||||
|    "id": "13efed12-3547-400b-a7a2-5dfae9a973a2", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "# Set a different caching strategy\n", | ||||
|     "MetagraphOptimization.set_cache_strategy(machine.devices[1], MetagraphOptimization.Dictionary())" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 18, | ||||
|    "id": "ef62716b-a219-4f6e-9150-f984d3734839", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "BenchmarkTools.Trial: 331 samples with 1 evaluation.\n", | ||||
|        " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m):  \u001b[39m\u001b[36m\u001b[1m12.148 ms\u001b[22m\u001b[39m … \u001b[35m24.164 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 0.00% … 13.35%\n", | ||||
|        " Time  \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m):     \u001b[39m\u001b[34m\u001b[1m15.412 ms              \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m):    \u001b[39m17.47%\n", | ||||
|        " Time  \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m):   \u001b[39m\u001b[32m\u001b[1m15.117 ms\u001b[22m\u001b[39m ± \u001b[32m 2.194 ms\u001b[39m  \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m):  \u001b[39m12.31% ±  8.95%\n", | ||||
|        "\n", | ||||
|        "  \u001b[39m \u001b[39m▄\u001b[39m█\u001b[39m▄\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m▄\u001b[39m\u001b[39m▄\u001b[34m▂\u001b[39m\u001b[39m \u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", | ||||
|        "  \u001b[39m▅\u001b[39m█\u001b[39m█\u001b[39m█\u001b[39m▅\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▅\u001b[39m▂\u001b[39m▃\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▃\u001b[32m█\u001b[39m\u001b[39m█\u001b[34m█\u001b[39m\u001b[39m▇\u001b[39m█\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▆\u001b[39m▄\u001b[39m▄\u001b[39m▆\u001b[39m▅\u001b[39m▄\u001b[39m▃\u001b[39m▄\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▂\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m▁\u001b[39m▃\u001b[39m▃\u001b[39m▂\u001b[39m▂\u001b[39m▁\u001b[39m▂\u001b[39m \u001b[39m▃\n", | ||||
|        "  12.1 ms\u001b[90m         Histogram: frequency by time\u001b[39m          21 ms \u001b[0m\u001b[1m<\u001b[22m\n", | ||||
|        "\n", | ||||
|        " Memory estimate\u001b[90m: \u001b[39m\u001b[33m27.46 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m118013\u001b[39m." | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 18, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "# ... and bench again\n", | ||||
|     "AB_AB3_reduced_dict_compute = get_compute_function(graph, process, machine)\n", | ||||
|     "@benchmark results = AB_AB3_reduced_dict_compute.($inputs)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "id": "5461ffd4-6a0e-4f1f-b1f1-3a2854a8ae88", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   } | ||||
|  ], | ||||
|  "metadata": { | ||||
|   "kernelspec": { | ||||
|    "display_name": "Julia 1.9.3", | ||||
|    "language": "julia", | ||||
|    "name": "julia-1.9" | ||||
|   }, | ||||
|   "language_info": { | ||||
|    "file_extension": ".jl", | ||||
|    "mimetype": "application/julia", | ||||
|    "name": "julia", | ||||
|    "version": "1.9.3" | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 5 | ||||
| } | ||||
							
								
								
									
										69
									
								
								notebooks/profiling.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								notebooks/profiling.ipynb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| { | ||||
|  "cells": [ | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "using Revise; using MetagraphOptimization; using BenchmarkTools; using ProfileView\n", | ||||
|     "using Base.Threads\n", | ||||
|     "nthreads()" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "model = ABCModel()\n", | ||||
|     "process_str = \"AB->ABBBBB\"\n", | ||||
|     "process = parse_process(process_str, model)\n", | ||||
|     "graph = parse_dag(\"../input/$process_str.txt\", model)\n", | ||||
|     "print(graph)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "@ProfileView.profview optimize_to_fixpoint!(ReductionOptimizer(), graph)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "@ProfileView.profview comp_func = get_compute_function(graph, process)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [] | ||||
|   } | ||||
|  ], | ||||
|  "metadata": { | ||||
|   "kernelspec": { | ||||
|    "display_name": "Julia 1.9.3", | ||||
|    "language": "julia", | ||||
|    "name": "julia-1.9" | ||||
|   }, | ||||
|   "language_info": { | ||||
|    "file_extension": ".jl", | ||||
|    "mimetype": "application/julia", | ||||
|    "name": "julia", | ||||
|    "version": "1.9.3" | ||||
|   }, | ||||
|   "orig_nbformat": 4 | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  "nbformat_minor": 2 | ||||
| } | ||||
| @@ -6,20 +6,20 @@ julia --project=./examples -t 4 -e 'import Pkg; Pkg.instantiate()' | ||||
|  | ||||
| #for i in $(seq $minthreads $maxthreads) | ||||
| #   printf "(AB->AB, $i) " | ||||
| #   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->AB.txt"))' | ||||
| #   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->AB.txt"), ABCModel())' | ||||
| #end | ||||
|  | ||||
| #for i in $(seq $minthreads $maxthreads) | ||||
| #   printf "(AB->ABBB, $i) " | ||||
| #   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBB.txt"))' | ||||
| #   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBB.txt"), ABCModel())' | ||||
| #end | ||||
|  | ||||
| #for i in $(seq $minthreads $maxthreads) | ||||
| #   printf "(AB->ABBBBB, $i) " | ||||
| #   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBB.txt"))' | ||||
| #   julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBB.txt"), ABCModel())' | ||||
| #end | ||||
|  | ||||
| for i in $(seq $minthreads $maxthreads) | ||||
|    printf "(AB->ABBBBBBB, $i) " | ||||
|    julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_abc("input/AB->ABBBBBBB.txt"))' | ||||
|    julia --project=./examples -t $i -O3 -e 'using MetagraphOptimization; using BenchmarkTools; @btime get_operations(graph) setup=(graph = parse_dag("input/AB->ABBBBBBB.txt"), ABCModel())' | ||||
| end | ||||
|   | ||||
| @@ -5,6 +5,7 @@ A module containing tools to work on DAGs. | ||||
| """ | ||||
| module MetagraphOptimization | ||||
|  | ||||
| # graph types | ||||
| export DAG | ||||
| export Node | ||||
| export Edge | ||||
| @@ -18,6 +19,7 @@ export FusedComputeTask | ||||
| export PossibleOperations | ||||
| export GraphProperties | ||||
|  | ||||
| # graph functions | ||||
| export make_node | ||||
| export make_edge | ||||
| export insert_node | ||||
| @@ -27,10 +29,15 @@ export is_exit_node | ||||
| export parents | ||||
| export children | ||||
| export compute | ||||
| export data | ||||
| export compute_effort | ||||
| export task | ||||
| export get_properties | ||||
| export get_exit_node | ||||
| export is_valid | ||||
| export operation_stack_length | ||||
| export is_valid, is_scheduled | ||||
|  | ||||
| # graph operation related | ||||
| export Operation | ||||
| export AppliedOperation | ||||
| export NodeFusion | ||||
| @@ -42,7 +49,10 @@ export can_pop | ||||
| export reset_graph! | ||||
| export get_operations | ||||
|  | ||||
| export parse_abc | ||||
| # ABC model | ||||
| export ParticleValue | ||||
| export ParticleA, ParticleB, ParticleC | ||||
| export ABCProcessDescription, ABCProcessInput, ABCModel | ||||
| export ComputeTaskP | ||||
| export ComputeTaskS1 | ||||
| export ComputeTaskS2 | ||||
| @@ -50,10 +60,24 @@ export ComputeTaskV | ||||
| export ComputeTaskU | ||||
| export ComputeTaskSum | ||||
|  | ||||
| # code generation related | ||||
| export execute | ||||
| export gen_particles | ||||
| export ParticleValue | ||||
| export Particle | ||||
| export parse_dag, parse_process | ||||
| export gen_process_input | ||||
| export get_compute_function | ||||
|  | ||||
| # estimator | ||||
| export cost_type, graph_cost, operation_effect | ||||
| export GlobalMetricEstimator, CDCost | ||||
|  | ||||
| # optimization | ||||
| export AbstractOptimizer, GreedyOptimizer, ReductionOptimizer, RandomWalkOptimizer | ||||
| export optimize_step!, optimize! | ||||
| export fixpoint_reached, optimize_to_fixpoint! | ||||
|  | ||||
| # machine info | ||||
| export Machine | ||||
| export get_machine_info | ||||
|  | ||||
| export ==, in, show, isempty, delete!, length | ||||
|  | ||||
| @@ -72,6 +96,7 @@ import Base.insert! | ||||
| import Base.collect | ||||
|  | ||||
|  | ||||
| include("devices/interface.jl") | ||||
| include("task/type.jl") | ||||
| include("node/type.jl") | ||||
| include("diff/type.jl") | ||||
| @@ -99,6 +124,7 @@ include("node/properties.jl") | ||||
| include("node/validate.jl") | ||||
|  | ||||
| include("operation/utility.jl") | ||||
| include("operation/iterate.jl") | ||||
| include("operation/apply.jl") | ||||
| include("operation/clean.jl") | ||||
| include("operation/find.jl") | ||||
| @@ -111,15 +137,42 @@ include("properties/utility.jl") | ||||
|  | ||||
| include("task/create.jl") | ||||
| include("task/compare.jl") | ||||
| include("task/compute.jl") | ||||
| include("task/print.jl") | ||||
| include("task/properties.jl") | ||||
|  | ||||
| include("estimator/interface.jl") | ||||
| include("estimator/global_metric.jl") | ||||
|  | ||||
| include("optimization/interface.jl") | ||||
| include("optimization/greedy.jl") | ||||
| include("optimization/random_walk.jl") | ||||
| include("optimization/reduce.jl") | ||||
|  | ||||
| include("models/interface.jl") | ||||
| include("models/print.jl") | ||||
|  | ||||
| include("models/abc/types.jl") | ||||
| include("models/abc/particle.jl") | ||||
| include("models/abc/compute.jl") | ||||
| include("models/abc/create.jl") | ||||
| include("models/abc/properties.jl") | ||||
| include("models/abc/parse.jl") | ||||
| include("models/abc/print.jl") | ||||
|  | ||||
| include("devices/measure.jl") | ||||
| include("devices/detect.jl") | ||||
| include("devices/impl.jl") | ||||
|  | ||||
| include("devices/numa/impl.jl") | ||||
| include("devices/cuda/impl.jl") | ||||
| # can currently not use AMDGPU because of incompatability with the newest rocm drivers | ||||
| # include("devices/rocm/impl.jl") | ||||
| # oneapi seems also broken for now | ||||
| # include("devices/oneapi/impl.jl") | ||||
|  | ||||
| include("scheduler/interface.jl") | ||||
| include("scheduler/greedy.jl") | ||||
|  | ||||
| include("code_gen/main.jl") | ||||
|  | ||||
|   | ||||
| @@ -1,126 +1,158 @@ | ||||
| using DataStructures | ||||
|  | ||||
| """ | ||||
|     gen_code(graph::DAG) | ||||
|  | ||||
| Generate the code for a given graph. The return value is a tuple of: | ||||
| Generate the code for a given graph. The return value is a named tuple of: | ||||
|  | ||||
| - `code::Expr`: The julia expression containing the code for the whole graph. | ||||
| - `inputSymbols::Dict{String, Symbol}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on. | ||||
| - `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on. | ||||
| - `outputSymbol::Symbol`: The symbol of the final calculated value | ||||
|  | ||||
| See also: [`execute`](@ref) | ||||
| """ | ||||
| function gen_code(graph::DAG) | ||||
|     code = Vector{Expr}() | ||||
|     sizehint!(code, length(graph.nodes)) | ||||
| function gen_code(graph::DAG, machine::Machine) | ||||
|     sched = schedule_dag(GreedyScheduler(), graph, machine) | ||||
|  | ||||
|     nodeQueue = PriorityQueue{Node, Int}() | ||||
|     inputSyms = Dict{String, Symbol}() | ||||
|     codeAcc = Vector{Expr}() | ||||
|     sizehint!(codeAcc, length(graph.nodes)) | ||||
|  | ||||
|     # use a priority equal to the number of unseen children -> 0 are nodes that can be added | ||||
|     for node in sched | ||||
|         # TODO: this is kind of ugly, should init nodes be scheduled differently from the rest? | ||||
|         if (node isa DataTaskNode && length(node.children) == 0) | ||||
|             push!(codeAcc, get_init_expression(node, entry_device(machine))) | ||||
|             continue | ||||
|         end | ||||
|         push!(codeAcc, get_expression(node)) | ||||
|     end | ||||
|  | ||||
|     # get inSymbols | ||||
|     inputSyms = Dict{String, Vector{Symbol}}() | ||||
|     for node in get_entry_nodes(graph) | ||||
|         enqueue!(nodeQueue, node => 0) | ||||
|         push!(inputSyms, node.name => Symbol("data_$(to_var_name(node.id))_in")) | ||||
|         if !haskey(inputSyms, node.name) | ||||
|             inputSyms[node.name] = Vector{Symbol}() | ||||
|         end | ||||
|  | ||||
|         push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in")) | ||||
|     end | ||||
|  | ||||
|     node = nothing | ||||
|     while !isempty(nodeQueue) | ||||
|         @assert peek(nodeQueue)[2] == 0 | ||||
|         node = dequeue!(nodeQueue) | ||||
|     # get outSymbol | ||||
|     outSym = Symbol(to_var_name(get_exit_node(graph).id)) | ||||
|  | ||||
|         push!(code, get_expression(node)) | ||||
|         for parent in node.parents | ||||
|             # reduce the priority of all parents by one | ||||
|             if (!haskey(nodeQueue, parent)) | ||||
|                 enqueue!(nodeQueue, parent => length(parent.children) - 1) | ||||
|             else | ||||
|                 nodeQueue[parent] = nodeQueue[parent] - 1 | ||||
|             end | ||||
|     return (code = Expr(:block, codeAcc...), inputSymbols = inputSyms, outputSymbol = outSym) | ||||
| end | ||||
|  | ||||
| function gen_cache_init_code(machine::Machine) | ||||
|     initializeCaches = Vector{Expr}() | ||||
|  | ||||
|     for device in machine.devices | ||||
|         push!(initializeCaches, gen_cache_init_code(device)) | ||||
|     end | ||||
|  | ||||
|     return Expr(:block, initializeCaches...) | ||||
| end | ||||
|  | ||||
| function gen_input_assignment_code( | ||||
|     inputSymbols::Dict{String, Vector{Symbol}}, | ||||
|     processDescription::AbstractProcessDescription, | ||||
|     machine::Machine, | ||||
|     processInputSymbol::Symbol = :input, | ||||
| ) | ||||
|     @assert length(inputSymbols) >= | ||||
|             sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description" | ||||
|  | ||||
|     assignInputs = Vector{Expr}() | ||||
|     for (name, symbols) in inputSymbols | ||||
|         type = type_from_name(name) | ||||
|         index = parse(Int, name[2:end]) | ||||
|  | ||||
|         p = nothing | ||||
|  | ||||
|         if (index > in_particles(processDescription)[type]) | ||||
|             index -= in_particles(processDescription)[type] | ||||
|             @assert index <= out_particles(processDescription)[type] "Too few particles of type $type in input particles for this process" | ||||
|  | ||||
|             p = "filter(x -> typeof(x) <: $type, out_particles($(processInputSymbol)))[$(index)]" | ||||
|         else | ||||
|             p = "filter(x -> typeof(x) <: $type, in_particles($(processInputSymbol)))[$(index)]" | ||||
|         end | ||||
|  | ||||
|         for symbol in symbols | ||||
|             # TODO: how to get the "default" cpu device? | ||||
|             device = entry_device(machine) | ||||
|             evalExpr = eval(gen_access_expr(device, symbol)) | ||||
|             push!(assignInputs, Meta.parse("$(evalExpr)::ParticleValue{$type} = ParticleValue($p, 1.0)")) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     # node is now the last node we looked at -> the output node | ||||
|     outSym = Symbol("data_$(to_var_name(node.id))") | ||||
|     return Expr(:block, assignInputs...) | ||||
| end | ||||
|  | ||||
|     return ( | ||||
|         code = Expr(:block, code...), | ||||
|         inputSymbols = inputSyms, | ||||
|         outputSymbol = outSym, | ||||
| """ | ||||
|     get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine) | ||||
|  | ||||
| Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input. | ||||
| """ | ||||
| function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine) | ||||
|     (code, inputSymbols, outputSymbol) = gen_code(graph, machine) | ||||
|  | ||||
|     initCaches = gen_cache_init_code(machine) | ||||
|     assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input) | ||||
|  | ||||
|     functionId = to_var_name(UUIDs.uuid1(rng[1])) | ||||
|     resSym = eval(gen_access_expr(entry_device(machine), outputSymbol)) | ||||
|     expr = Meta.parse( | ||||
|         "function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end", | ||||
|     ) | ||||
|  | ||||
|     func = eval(expr) | ||||
|  | ||||
|     return func | ||||
| end | ||||
|  | ||||
| """ | ||||
|     execute(generated_code, input::Dict{ParticleType, Vector{Particle}}) | ||||
|     execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput) | ||||
|  | ||||
| Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles. | ||||
| Execute the code of the given `graph` on the given input particles. | ||||
|  | ||||
| This is essentially shorthand for | ||||
|     ```julia | ||||
|     compute_graph = get_compute_function(graph, process) | ||||
|     result = compute_graph(particles) | ||||
|     ``` | ||||
|  | ||||
| See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref) | ||||
| """ | ||||
| function execute(generated_code, input::Dict{ParticleType, Vector{Particle}}) | ||||
|     (code, inputSymbols, outputSymbol) = generated_code | ||||
| function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput) | ||||
|     (code, inputSymbols, outputSymbol) = gen_code(graph, machine) | ||||
|  | ||||
|     assignInputs = Vector{Expr}() | ||||
|     for (name, symbol) in inputSymbols | ||||
|         type = nothing | ||||
|         if startswith(name, "A") | ||||
|             type = A | ||||
|         elseif startswith(name, "B") | ||||
|             type = B | ||||
|         else | ||||
|             type = C | ||||
|     initCaches = gen_cache_init_code(machine) | ||||
|     assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input) | ||||
|  | ||||
|  | ||||
|     functionId = to_var_name(UUIDs.uuid1(rng[1])) | ||||
|     resSym = eval(gen_access_expr(entry_device(machine), outputSymbol)) | ||||
|     expr = Meta.parse( | ||||
|         "function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end", | ||||
|     ) | ||||
|     func = eval(expr) | ||||
|  | ||||
|     result = 0 | ||||
|     try | ||||
|         result = @eval $func($input) | ||||
|     catch e | ||||
|         println("Error while evaluating: $e") | ||||
|  | ||||
|         # if we find a uuid in the exception we can color it in so it's easier to spot | ||||
|         uuidRegex = r"[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}" | ||||
|         m = match(uuidRegex, string(e)) | ||||
|  | ||||
|         functionStr = string(expr) | ||||
|         if (isa(m, RegexMatch)) | ||||
|             functionStr = replace(functionStr, m.match => "\033[31m$(m.match)\033[0m") | ||||
|         end | ||||
|         index = parse(Int, name[2:end]) | ||||
|  | ||||
|         push!( | ||||
|             assignInputs, | ||||
|             Meta.parse( | ||||
|                 "$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)", | ||||
|             ), | ||||
|         ) | ||||
|         println("Function:\n$functionStr") | ||||
|         @assert false | ||||
|     end | ||||
|  | ||||
|     assignInputs = Expr(:block, assignInputs...) | ||||
|     eval(assignInputs) | ||||
|     eval(code) | ||||
|  | ||||
|     eval(Meta.parse("result = $outputSymbol")) | ||||
|     return result | ||||
| end | ||||
|  | ||||
| """ | ||||
|     execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}}) | ||||
|  | ||||
| Execute the given `generated_code` (as returned by [`gen_code`](@ref)) on the given input particles. | ||||
| The input particles should be sorted correctly into the dictionary to their according [`ParticleType`](@ref)s. | ||||
|  | ||||
| See also: [`gen_particles`](@ref) | ||||
| """ | ||||
| function execute(graph::DAG, input::Dict{ParticleType, Vector{Particle}}) | ||||
|     (code, inputSymbols, outputSymbol) = gen_code(graph) | ||||
|  | ||||
|     assignInputs = Vector{Expr}() | ||||
|     for (name, symbol) in inputSymbols | ||||
|         type = nothing | ||||
|         if startswith(name, "A") | ||||
|             type = A | ||||
|         elseif startswith(name, "B") | ||||
|             type = B | ||||
|         else | ||||
|             type = C | ||||
|         end | ||||
|         index = parse(Int, name[2:end]) | ||||
|  | ||||
|         push!( | ||||
|             assignInputs, | ||||
|             Meta.parse( | ||||
|                 "$(symbol) = ParticleValue(Particle($(input[type][index]).P0, $(input[type][index]).P1, $(input[type][index]).P2, $(input[type][index]).P3, $(type)), 1.0)", | ||||
|             ), | ||||
|         ) | ||||
|     end | ||||
|  | ||||
|     assignInputs = Expr(:block, assignInputs...) | ||||
|     eval(assignInputs) | ||||
|     eval(code) | ||||
|  | ||||
|     eval(Meta.parse("result = $outputSymbol")) | ||||
|     return result | ||||
| end | ||||
|   | ||||
							
								
								
									
										53
									
								
								src/devices/cuda/impl.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/devices/cuda/impl.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| using CUDA | ||||
|  | ||||
| """ | ||||
|     CUDAGPU <: AbstractGPU | ||||
|  | ||||
| Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface. | ||||
| """ | ||||
| mutable struct CUDAGPU <: AbstractGPU | ||||
|     device::Any # TODO: what's the cuda device type? | ||||
|     cacheStrategy::CacheStrategy | ||||
|     FLOPS::Float64 | ||||
| end | ||||
|  | ||||
| push!(DEVICE_TYPES, CUDAGPU) | ||||
|  | ||||
| CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()] | ||||
|  | ||||
| default_strategy(::Type{T}) where {T <: CUDAGPU} = LocalVariables() | ||||
|  | ||||
| function measure_device!(device::CUDAGPU; verbose::Bool) | ||||
|     if verbose | ||||
|         println("Measuring CUDA GPU $(device.device)") | ||||
|     end | ||||
|  | ||||
|     # TODO implement | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_devices(deviceType::Type{T}; verbose::Bool) where {T <: CUDAGPU} | ||||
|  | ||||
| Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information. | ||||
| """ | ||||
| function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: CUDAGPU} | ||||
|     devices = Vector{AbstractDevice}() | ||||
|  | ||||
|     if !CUDA.functional() | ||||
|         if verbose | ||||
|             println("CUDA is non-functional") | ||||
|         end | ||||
|         return devices | ||||
|     end | ||||
|  | ||||
|     CUDADevices = CUDA.devices() | ||||
|     if verbose | ||||
|         println("Found $(length(CUDADevices)) CUDA devices") | ||||
|     end | ||||
|     for device in CUDADevices | ||||
|         push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1)) | ||||
|     end | ||||
|  | ||||
|     return devices | ||||
| end | ||||
							
								
								
									
										23
									
								
								src/devices/detect.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								src/devices/detect.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
|  | ||||
| """ | ||||
|     get_machine_info(verbose::Bool) | ||||
|  | ||||
| Return the [`Machine`](@ref) currently running on. The parameter `verbose` defaults to true when interactive. | ||||
| """ | ||||
| function get_machine_info(; verbose::Bool = Base.is_interactive) | ||||
|     devices = Vector{AbstractDevice}() | ||||
|  | ||||
|     for device in device_types() | ||||
|         devs = get_devices(device, verbose = verbose) | ||||
|         for dev in devs | ||||
|             push!(devices, dev) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     noDevices = length(devices) | ||||
|     @assert noDevices > 0 "No devices were found, but at least one NUMA node should always be available!" | ||||
|  | ||||
|     transferRates = Matrix{Float64}(undef, noDevices, noDevices) | ||||
|     fill!(transferRates, -1) | ||||
|     return Machine(devices, transferRates) | ||||
| end | ||||
							
								
								
									
										52
									
								
								src/devices/impl.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								src/devices/impl.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| """ | ||||
|     device_types() | ||||
|  | ||||
| Return a vector of available and implemented device types. | ||||
|  | ||||
| See also: [`DEVICE_TYPES`](@ref) | ||||
| """ | ||||
| function device_types() | ||||
|     return DEVICE_TYPES | ||||
| end | ||||
|  | ||||
| """ | ||||
|     entry_device(machine::Machine) | ||||
|  | ||||
| Return the "entry" device, i.e., the device that starts CPU threads and GPU kernels, and takes input values and returns the output value. | ||||
| """ | ||||
| function entry_device(machine::Machine) | ||||
|     return machine.devices[1] | ||||
| end | ||||
|  | ||||
| """ | ||||
|     strategies(t::Type{T}) where {T <: AbstractDevice} | ||||
|  | ||||
| Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstractDevice`](@ref). | ||||
| The caching strategies are used in code generation. | ||||
| """ | ||||
| function strategies(t::Type{T}) where {T <: AbstractDevice} | ||||
|     if !haskey(CACHE_STRATEGIES, t) | ||||
|         error("Trying to get strategies for $T, but it has no strategies defined!") | ||||
|     end | ||||
|  | ||||
|     return CACHE_STRATEGIES[t] | ||||
| end | ||||
|  | ||||
| """ | ||||
|     cache_strategy(device::AbstractDevice) | ||||
|  | ||||
| Returns the cache strategy set for this device. | ||||
| """ | ||||
| function cache_strategy(device::AbstractDevice) | ||||
|     return device.cacheStrategy | ||||
| end | ||||
|  | ||||
| """ | ||||
|     set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy) | ||||
|  | ||||
| Sets the device's cache strategy. After this call, [`cache_strategy`](@ref) should return `cacheStrategy` on the given device. | ||||
| """ | ||||
| function set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy) | ||||
|     device.cacheStrategy = cacheStrategy | ||||
|     return nothing | ||||
| end | ||||
							
								
								
									
										108
									
								
								src/devices/interface.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								src/devices/interface.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | ||||
| """ | ||||
|     AbstractDevice | ||||
|  | ||||
| Abstract base type for every device, like GPUs, CPUs or any other compute devices. | ||||
| Every implementation needs to implement various functions and needs a member `cacheStrategy`. | ||||
| """ | ||||
| abstract type AbstractDevice end | ||||
|  | ||||
| abstract type AbstractCPU <: AbstractDevice end | ||||
|  | ||||
| abstract type AbstractGPU <: AbstractDevice end | ||||
|  | ||||
| """ | ||||
|     Machine | ||||
|  | ||||
| A representation of a machine to execute on. Contains information about its architecture (CPUs, GPUs, maybe more). This representation can be used to make a more accurate cost prediction of a [`DAG`](@ref) state. | ||||
|  | ||||
| See also: [`Scheduler`](@ref) | ||||
| """ | ||||
| struct Machine | ||||
|     devices::Vector{AbstractDevice} | ||||
|  | ||||
|     transferRates::Matrix{Float64} | ||||
| end | ||||
|  | ||||
| """ | ||||
|     CacheStrategy | ||||
|  | ||||
| Abstract base type for caching strategies. | ||||
|  | ||||
| See also: [`strategies`](@ref) | ||||
| """ | ||||
| abstract type CacheStrategy end | ||||
|  | ||||
| """ | ||||
|     LocalVariables <: CacheStrategy | ||||
|  | ||||
| A caching strategy relying solely on local variables for every input and output. | ||||
|  | ||||
| Implements the [`CacheStrategy`](@ref) interface. | ||||
| """ | ||||
| struct LocalVariables <: CacheStrategy end | ||||
|  | ||||
| """ | ||||
|     Dictionary <: CacheStrategy | ||||
|  | ||||
| A caching strategy relying on a dictionary of Symbols to store every input and output. | ||||
|  | ||||
| Implements the [`CacheStrategy`](@ref) interface. | ||||
| """ | ||||
| struct Dictionary <: CacheStrategy end | ||||
|  | ||||
| """ | ||||
|     DEVICE_TYPES::Vector{Type} | ||||
|  | ||||
| Global vector of available and implemented device types. Each implementation of a [`AbstractDevice`](@ref) should add its concrete type to this vector. | ||||
|  | ||||
| See also: [`device_types`](@ref), [`get_devices`](@ref) | ||||
| """ | ||||
| DEVICE_TYPES = Vector{Type}() | ||||
|  | ||||
| """ | ||||
|     CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol} | ||||
|  | ||||
| Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary. | ||||
|  | ||||
| See also: [`strategies`](@ref) | ||||
| """ | ||||
| CACHE_STRATEGIES = Dict{Type, Vector{CacheStrategy}}() | ||||
|  | ||||
| """ | ||||
|     default_strategy(deviceType::Type{T}) where {T <: AbstractDevice} | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device type. | ||||
| See also: [`cache_strategy`](@ref), [`set_cache_strategy`](@ref) | ||||
| """ | ||||
| function default_strategy end | ||||
|  | ||||
| """ | ||||
|     get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice} | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns a `Vector{Type}` of the devices for the given [`AbstractDevice`](@ref) Type available on the current machine. | ||||
| """ | ||||
| function get_devices end | ||||
|  | ||||
| """ | ||||
|     measure_device!(device::AbstractDevice; verbose::Bool) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Measures the compute speed of the given device and writes into it. | ||||
| """ | ||||
| function measure_device! end | ||||
|  | ||||
| """ | ||||
|     gen_cache_init_code(device::AbstractDevice) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an `Expr` initializing this device's variable cache. | ||||
|      | ||||
| The strategy is a symbol | ||||
| """ | ||||
| function gen_cache_init_code end | ||||
|  | ||||
| """ | ||||
|     gen_access_expr(device::AbstractDevice, symbol::Symbol) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). | ||||
| Return an `Expr` or `QuoteNode` accessing the variable identified by [`symbol`]. | ||||
| """ | ||||
| function gen_access_expr end | ||||
							
								
								
									
										22
									
								
								src/devices/measure.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/devices/measure.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| """ | ||||
|     measure_devices(machine::Machine; verbose::Bool) | ||||
|  | ||||
| Measure FLOPS, RAM, cache sizes and what other properties can be extracted for the devices in the given machine. | ||||
| """ | ||||
| function measure_devices!(machine::Machine; verbose::Bool = Base.is_interactive()) | ||||
|     for device in machine.devices | ||||
|         measure_device!(device; verbose = verbose) | ||||
|     end | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     measure_transfer_rates(machine::Machine; verbose::Bool) | ||||
|  | ||||
| Measure the transfer rates between devices in the machine. | ||||
| """ | ||||
| function measure_transfer_rates!(machine::Machine; verbose::Bool = Base.is_interactive()) | ||||
|     # TODO implement | ||||
|     return nothing | ||||
| end | ||||
							
								
								
									
										96
									
								
								src/devices/numa/impl.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								src/devices/numa/impl.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | ||||
| using NumaAllocators | ||||
|  | ||||
| """ | ||||
|     NumaNode <: AbstractCPU | ||||
|  | ||||
| Representation of a specific CPU that code can run on. Implements the [`AbstractDevice`](@ref) interface. | ||||
| """ | ||||
| mutable struct NumaNode <: AbstractCPU | ||||
|     numaId::UInt16 | ||||
|     threads::UInt16 | ||||
|     cacheStrategy::CacheStrategy | ||||
|     FLOPS::Float64 | ||||
|     id::UUID | ||||
| end | ||||
|  | ||||
| push!(DEVICE_TYPES, NumaNode) | ||||
|  | ||||
| CACHE_STRATEGIES[NumaNode] = [LocalVariables()] | ||||
|  | ||||
| default_strategy(::Type{T}) where {T <: NumaNode} = LocalVariables() | ||||
|  | ||||
| function measure_device!(device::NumaNode; verbose::Bool) | ||||
|     if verbose | ||||
|         println("Measuring Numa Node $(device.numaId)") | ||||
|     end | ||||
|  | ||||
|     # TODO implement | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_devices(deviceType::Type{T}; verbose::Bool) where {T <: NumaNode} | ||||
|  | ||||
| Return a Vector of [`NumaNode`](@ref)s available on the current machine. If `verbose` is true, print some additional information. | ||||
| """ | ||||
| function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: NumaNode} | ||||
|     devices = Vector{AbstractDevice}() | ||||
|     noNumaNodes = highest_numa_node() | ||||
|  | ||||
|     if (verbose) | ||||
|         println("Found $(noNumaNodes + 1) NUMA nodes") | ||||
|     end | ||||
|     for i in 0:noNumaNodes | ||||
|         push!(devices, NumaNode(i, 1, default_strategy(NumaNode), -1, UUIDs.uuid1(rng[1]))) | ||||
|     end | ||||
|  | ||||
|     return devices | ||||
| end | ||||
|  | ||||
| """ | ||||
|     gen_cache_init_code(device::NumaNode) | ||||
|  | ||||
| Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref). | ||||
| """ | ||||
| function gen_cache_init_code(device::NumaNode) | ||||
|     if typeof(device.cacheStrategy) <: LocalVariables | ||||
|         # don't need to initialize anything | ||||
|         return Expr(:block) | ||||
|     elseif typeof(device.cacheStrategy) <: Dictionary | ||||
|         return Meta.parse("cache_$(to_var_name(device.id)) = Dict{Symbol, Any}()") | ||||
|         # TODO: sizehint? | ||||
|     end | ||||
|  | ||||
|     return error("Unimplemented cache strategy \"$(device.cacheStrategy)\" for device \"$(device)\"") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     gen_access_expr(device::NumaNode, symbol::Symbol) | ||||
|  | ||||
| Generate code to access the variable designated by `symbol` on a [`NumaNode`](@ref), using the [`CacheStrategy`](@ref) set in the device. | ||||
| """ | ||||
| function gen_access_expr(device::NumaNode, symbol::Symbol) | ||||
|     return _gen_access_expr(device, device.cacheStrategy, symbol) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol) | ||||
|  | ||||
| Internal function for dispatch, used in [`gen_access_expr`](@ref). | ||||
| """ | ||||
| function _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol) | ||||
|     s = Symbol("data_$symbol") | ||||
|     quoteNode = Meta.parse(":($s)") | ||||
|     return quoteNode | ||||
| end | ||||
|  | ||||
| """ | ||||
|     _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol) | ||||
|  | ||||
| Internal function for dispatch, used in [`gen_access_expr`](@ref). | ||||
| """ | ||||
| function _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol) | ||||
|     accessStr = ":(cache_$(to_var_name(device.id))[:$symbol])" | ||||
|     quoteNode = Meta.parse(accessStr) | ||||
|     return quoteNode | ||||
| end | ||||
							
								
								
									
										53
									
								
								src/devices/oneapi/impl.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/devices/oneapi/impl.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| using oneAPI | ||||
|  | ||||
| """ | ||||
|     oneAPIGPU <: AbstractGPU | ||||
|  | ||||
| Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface. | ||||
| """ | ||||
| mutable struct oneAPIGPU <: AbstractGPU | ||||
|     device::Any | ||||
|     cacheStrategy::CacheStrategy | ||||
|     FLOPS::Float64 | ||||
| end | ||||
|  | ||||
| push!(DEVICE_TYPES, oneAPIGPU) | ||||
|  | ||||
| CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()] | ||||
|  | ||||
| default_strategy(::Type{T}) where {T <: oneAPIGPU} = LocalVariables() | ||||
|  | ||||
| function measure_device!(device::oneAPIGPU; verbose::Bool) | ||||
|     if verbose | ||||
|         println("Measuring oneAPI GPU $(device.device)") | ||||
|     end | ||||
|  | ||||
|     # TODO implement | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU} | ||||
|  | ||||
| Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information. | ||||
| """ | ||||
| function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: oneAPIGPU} | ||||
|     devices = Vector{AbstractDevice}() | ||||
|  | ||||
|     if !oneAPI.functional() | ||||
|         if verbose | ||||
|             println("oneAPI is non-functional") | ||||
|         end | ||||
|         return devices | ||||
|     end | ||||
|  | ||||
|     oneAPIDevices = oneAPI.devices() | ||||
|     if verbose | ||||
|         println("Found $(length(oneAPIDevices)) oneAPI devices") | ||||
|     end | ||||
|     for device in oneAPIDevices | ||||
|         push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1)) | ||||
|     end | ||||
|  | ||||
|     return devices | ||||
| end | ||||
							
								
								
									
										53
									
								
								src/devices/rocm/impl.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/devices/rocm/impl.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| using AMDGPU | ||||
|  | ||||
| """ | ||||
|     ROCmGPU <: AbstractGPU | ||||
|  | ||||
| Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface. | ||||
| """ | ||||
| mutable struct ROCmGPU <: AbstractGPU | ||||
|     device::Any | ||||
|     cacheStrategy::CacheStrategy | ||||
|     FLOPS::Float64 | ||||
| end | ||||
|  | ||||
| push!(DEVICE_TYPES, ROCmGPU) | ||||
|  | ||||
| CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()] | ||||
|  | ||||
| default_strategy(::Type{T}) where {T <: ROCmGPU} = LocalVariables() | ||||
|  | ||||
| function measure_device!(device::ROCmGPU; verbose::Bool) | ||||
|     if verbose | ||||
|         println("Measuring ROCm GPU $(device.device)") | ||||
|     end | ||||
|  | ||||
|     # TODO implement | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU} | ||||
|  | ||||
| Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information. | ||||
| """ | ||||
| function get_devices(deviceType::Type{T}; verbose::Bool = false) where {T <: ROCmGPU} | ||||
|     devices = Vector{AbstractDevice}() | ||||
|  | ||||
|     if !AMDGPU.functional() | ||||
|         if verbose | ||||
|             println("AMDGPU is non-functional") | ||||
|         end | ||||
|         return devices | ||||
|     end | ||||
|  | ||||
|     AMDDevices = AMDGPU.devices() | ||||
|     if verbose | ||||
|         println("Found $(length(AMDDevices)) AMD devices") | ||||
|     end | ||||
|     for device in AMDDevices | ||||
|         push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1)) | ||||
|     end | ||||
|  | ||||
|     return devices | ||||
| end | ||||
| @@ -6,6 +6,6 @@ Pretty-print a [`Diff`](@ref). Called via print, println and co. | ||||
| function show(io::IO, diff::Diff) | ||||
|     print(io, "Nodes: ") | ||||
|     print(io, length(diff.addedNodes) + length(diff.removedNodes)) | ||||
|     print(io, " Edges: ") | ||||
|     print(io, ", Edges: ") | ||||
|     return print(io, length(diff.addedEdges) + length(diff.removedEdges)) | ||||
| end | ||||
|   | ||||
| @@ -4,8 +4,8 @@ | ||||
| A named tuple representing a difference of added and removed nodes and edges on a [`DAG`](@ref). | ||||
| """ | ||||
| const Diff = NamedTuple{ | ||||
|     (:addedNodes, :removedNodes, :addedEdges, :removedEdges), | ||||
|     Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}}, | ||||
|     (:addedNodes, :removedNodes, :addedEdges, :removedEdges, :updatedChildren), | ||||
|     Tuple{Vector{Node}, Vector{Node}, Vector{Edge}, Vector{Edge}, Vector{Tuple{Node, AbstractTask}}}, | ||||
| } | ||||
|  | ||||
| function Diff() | ||||
| @@ -14,5 +14,8 @@ function Diff() | ||||
|         removedNodes = Vector{Node}(), | ||||
|         addedEdges = Vector{Edge}(), | ||||
|         removedEdges = Vector{Edge}(), | ||||
|  | ||||
|         # children were updated in the task, updatedChildren[x][2] is the task before the update | ||||
|         updatedChildren = Vector{Tuple{Node, AbstractTask}}(), | ||||
|     )::Diff | ||||
| end | ||||
|   | ||||
							
								
								
									
										77
									
								
								src/estimator/global_metric.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								src/estimator/global_metric.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
|  | ||||
| """ | ||||
|    CDCost | ||||
|  | ||||
| Representation of a [`DAG`](@ref)'s cost as estimated by the [`GlobalMetricEstimator`](@ref). | ||||
|  | ||||
| # Fields: | ||||
| `.data`: The total data transfer.\\ | ||||
| `.computeEffort`: The total compute effort.\\ | ||||
| `.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`. | ||||
|  | ||||
|  | ||||
| !!! note | ||||
|     Note that the `computeIntensity` doesn't necessarily make sense in the context of only operation costs.  | ||||
|     For example, for node fusions this will always be 0, since the computeEffort is zero.  | ||||
|     It will still work as intended when adding/subtracting to/from a `graph_cost` estimate. | ||||
| """ | ||||
| const CDCost = NamedTuple{(:data, :computeEffort, :computeIntensity), Tuple{Float64, Float64, Float64}} | ||||
|  | ||||
| function +(cost1::CDCost, cost2::CDCost)::CDCost | ||||
|     d = cost1.data + cost2.data | ||||
|     ce = computeEffort = cost1.computeEffort + cost2.computeEffort | ||||
|     return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost | ||||
| end | ||||
|  | ||||
| function -(cost1::CDCost, cost2::CDCost)::CDCost | ||||
|     d = cost1.data - cost2.data | ||||
|     ce = computeEffort = cost1.computeEffort - cost2.computeEffort | ||||
|     return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost | ||||
| end | ||||
|  | ||||
| function isless(cost1::CDCost, cost2::CDCost)::Bool | ||||
|     return cost1.data + cost1.computeEffort < cost2.data + cost2.computeEffort | ||||
| end | ||||
|  | ||||
| function zero(type::Type{CDCost}) | ||||
|     return (data = 0.0, computeEffort = 00.0, computeIntensity = 0.0)::CDCost | ||||
| end | ||||
|  | ||||
| function typemax(type::Type{CDCost}) | ||||
|     return (data = Inf, computeEffort = Inf, computeIntensity = 0.0)::CDCost | ||||
| end | ||||
|  | ||||
| struct GlobalMetricEstimator <: AbstractEstimator end | ||||
|  | ||||
| function cost_type(estimator::GlobalMetricEstimator)::Type{CDCost} | ||||
|     return CDCost | ||||
| end | ||||
|  | ||||
| function graph_cost(estimator::GlobalMetricEstimator, graph::DAG) | ||||
|     properties = get_properties(graph) | ||||
|     return ( | ||||
|         data = properties.data, | ||||
|         computeEffort = properties.computeEffort, | ||||
|         computeIntensity = properties.computeIntensity, | ||||
|     )::CDCost | ||||
| end | ||||
|  | ||||
| function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeFusion) | ||||
|     return (data = -data(operation.input[2].task), computeEffort = 0.0, computeIntensity = 0.0)::CDCost | ||||
| end | ||||
|  | ||||
| function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeReduction) | ||||
|     s = length(operation.input) - 1 | ||||
|     return ( | ||||
|         data = s * -data(task(operation.input[1])), | ||||
|         computeEffort = s * -compute_effort(task(operation.input[1])), | ||||
|         computeIntensity = typeof(operation.input) <: DataTaskNode ? 0.0 : Inf, | ||||
|     )::CDCost | ||||
| end | ||||
|  | ||||
| function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operation::NodeSplit) | ||||
|     s::Float64 = length(parents(operation.input)) - 1 | ||||
|     d::Float64 = s * data(task(operation.input)) | ||||
|     ce::Float64 = s * compute_effort(task(operation.input)) | ||||
|     return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost | ||||
| end | ||||
							
								
								
									
										44
									
								
								src/estimator/interface.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								src/estimator/interface.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
|  | ||||
| """ | ||||
|     AbstractEstimator | ||||
|  | ||||
| Abstract base type for an estimator. An estimator estimates the cost of a graph or the difference an operation applied to a graph will make to its cost. | ||||
|  | ||||
| Interface functions are | ||||
| - [`graph_cost`](@ref) | ||||
| - [`operation_effect`](@ref) | ||||
| """ | ||||
| abstract type AbstractEstimator end | ||||
|  | ||||
| """ | ||||
|     cost_type(estimator::AbstractEstimator) | ||||
|  | ||||
| Interface function returning a specific estimator's cost type, i.e., the type returned by its implementation of [`graph_cost`](@ref) and [`operation_effect`](@ref). | ||||
| """ | ||||
| function cost_type end | ||||
|  | ||||
| """ | ||||
|     graph_cost(estimator::AbstractEstimator, graph::DAG) | ||||
|  | ||||
| Get the total estimated cost of the graph. The cost's data type can be chosen by the implementation, but must have a usable lessthan comparison operator (<), basic math operators (+, -) and an implementation of `zero()` and `typemax()`. | ||||
| """ | ||||
| function graph_cost end | ||||
|  | ||||
| """ | ||||
|     operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation) | ||||
|  | ||||
| Get the estimated effect on the cost of the graph, such that `graph_cost(estimator, graph) + operation_effect(estimator, graph, operation) ~= graph_cost(estimator, graph_with_operation_applied)`. There is no hard requirement for this, but the better the estimate, the better an optimization algorithm will be. | ||||
|  | ||||
| !!! note | ||||
|     There is a default implementation of this function, applying the operation, calling [`graph_cost`](@ref), then popping the operation again. | ||||
|      | ||||
|     It can be much faster to overload this function for a specific estimator and directly compute the effects from the operation if possible. | ||||
| """ | ||||
| function operation_effect(estimator::AbstractEstimator, graph::DAG, operation::Operation) | ||||
|     # This is currently not stably working, see issue #16 | ||||
|     cost = graph_cost(estimator, graph) | ||||
|     push_operation!(graph, operation) | ||||
|     cost_after = graph_cost(estimator, graph) | ||||
|     pop_operation!(graph) | ||||
|     return cost_after - cost | ||||
| end | ||||
| @@ -17,21 +17,5 @@ function in(edge::Edge, graph::DAG) | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     return n1 in n2.children | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ==(n1::Node, n2::Node, g::DAG) | ||||
|  | ||||
| Check equality of two nodes in a graph. | ||||
| """ | ||||
| function ==(n1::Node, n2::Node, g::DAG) | ||||
|     if typeof(n1) != typeof(n2) | ||||
|         return false | ||||
|     end | ||||
|     if !(n1 in g) || !(n2 in g) | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     return n1.task == n2.task && children(n1) == children(n2) | ||||
|     return n1 in children(n2) | ||||
| end | ||||
|   | ||||
| @@ -38,8 +38,7 @@ end | ||||
|  | ||||
| Return `true` if [`pop_operation!`](@ref) is possible, `false` otherwise. | ||||
| """ | ||||
| can_pop(graph::DAG) = | ||||
|     !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations) | ||||
| can_pop(graph::DAG) = !isempty(graph.operationsToApply) || !isempty(graph.appliedOperations) | ||||
|  | ||||
| """ | ||||
|     reset_graph!(graph::DAG) | ||||
|   | ||||
| @@ -15,12 +15,7 @@ Insert the node into the graph. | ||||
|  | ||||
| See also: [`remove_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref) | ||||
| """ | ||||
| function insert_node!( | ||||
|     graph::DAG, | ||||
|     node::Node, | ||||
|     track = true, | ||||
|     invalidate_cache = true, | ||||
| ) | ||||
| function insert_node!(graph::DAG, node::Node; track = true, invalidate_cache = true) | ||||
|     # 1: mute | ||||
|     push!(graph.nodes, node) | ||||
|  | ||||
| @@ -50,14 +45,8 @@ Insert the edge between node1 (child) and node2 (parent) into the graph. | ||||
|  | ||||
| See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`remove_edge!`](@ref) | ||||
| """ | ||||
| function insert_edge!( | ||||
|     graph::DAG, | ||||
|     node1::Node, | ||||
|     node2::Node, | ||||
|     track = true, | ||||
|     invalidate_cache = true, | ||||
| ) | ||||
|     # @assert (node2 ∉ node1.parents) && (node1 ∉ node2.children) "Edge to insert already exists" | ||||
| function insert_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true) | ||||
|     #@assert (node2 ∉ parents(node1)) && (node1 ∉ children(node2)) "Edge to insert already exists" | ||||
|  | ||||
|     # 1: mute | ||||
|     # edge points from child to parent | ||||
| @@ -95,13 +84,8 @@ Remove the node from the graph. | ||||
|  | ||||
| See also: [`insert_node!`](@ref), [`insert_edge!`](@ref), [`remove_edge!`](@ref) | ||||
| """ | ||||
| function remove_node!( | ||||
|     graph::DAG, | ||||
|     node::Node, | ||||
|     track = true, | ||||
|     invalidate_cache = true, | ||||
| ) | ||||
|     # @assert node in graph.nodes "Trying to remove a node that's not in the graph" | ||||
| function remove_node!(graph::DAG, node::Node; track = true, invalidate_cache = true) | ||||
|     #@assert node in graph.nodes "Trying to remove a node that's not in the graph" | ||||
|  | ||||
|     # 1: mute | ||||
|     delete!(graph.nodes, node) | ||||
| @@ -134,29 +118,34 @@ Remove the edge between node1 (child) and node2 (parent) into the graph. | ||||
|  | ||||
| See also: [`insert_node!`](@ref), [`remove_node!`](@ref), [`insert_edge!`](@ref) | ||||
| """ | ||||
| function remove_edge!( | ||||
|     graph::DAG, | ||||
|     node1::Node, | ||||
|     node2::Node, | ||||
|     track = true, | ||||
|     invalidate_cache = true, | ||||
| ) | ||||
| function remove_edge!(graph::DAG, node1::Node, node2::Node; track = true, invalidate_cache = true) | ||||
|     # 1: mute | ||||
|     pre_length1 = length(node1.parents) | ||||
|     pre_length2 = length(node2.children) | ||||
|  | ||||
|     #TODO: filter is very slow | ||||
|     filter!(x -> x != node2, node1.parents) | ||||
|     filter!(x -> x != node1, node2.children) | ||||
|     for i in eachindex(node1.parents) | ||||
|         if (node1.parents[i] == node2) | ||||
|             splice!(node1.parents, i) | ||||
|             break | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     for i in eachindex(node2.children) | ||||
|         if (node2.children[i] == node1) | ||||
|             splice!(node2.children, i) | ||||
|             break | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     #=@assert begin | ||||
|        removed = pre_length1 - length(node1.parents) | ||||
|        removed <= 1 | ||||
|         removed = pre_length1 - length(node1.parents) | ||||
|         removed <= 1 | ||||
|     end "removed more than one node from node1's parents"=# | ||||
|  | ||||
|     #=@assert begin | ||||
|        removed = pre_length2 - length(node2.children) | ||||
|        removed <= 1 | ||||
|         removed = pre_length2 - length(children(node2)) | ||||
|         removed <= 1 | ||||
|     end "removed more than one node from node2's children"=# | ||||
|  | ||||
|     # 2: keep track | ||||
| @@ -181,6 +170,66 @@ function remove_edge!( | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function replace_children!(task::FusedComputeTask, before, after) | ||||
|     replacedIn1 = length(findall(x -> x == before, task.t1_inputs)) | ||||
|     replacedIn2 = length(findall(x -> x == before, task.t2_inputs)) | ||||
|  | ||||
|     #@assert replacedIn1 >= 1 || replacedIn2 >= 1 "Nothing to replace while replacing $before with $after in $(task.t1_inputs...) and $(task.t2_inputs...)" | ||||
|  | ||||
|     replace!(task.t1_inputs, before => after) | ||||
|     replace!(task.t2_inputs, before => after) | ||||
|  | ||||
|     # recursively descend down the tree, but only in the tasks where we're replacing things | ||||
|     if replacedIn1 > 0 | ||||
|         replace_children!(task.first_task, before, after) | ||||
|     end | ||||
|     if replacedIn2 > 0 | ||||
|         replace_children!(task.second_task, before, after) | ||||
|     end | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function replace_children!(task::AbstractTask, before, after) | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function update_child!(graph::DAG, n::Node, child_before::Symbol, child_after::Symbol; track = true) | ||||
|     # only need to update fused compute tasks | ||||
|     if !(typeof(task(n)) <: FusedComputeTask) | ||||
|         return nothing | ||||
|     end | ||||
|  | ||||
|     taskBefore = copy(task(n)) | ||||
|  | ||||
|     #=if !((child_before in task(n).t1_inputs) || (child_before in task(n).t2_inputs)) | ||||
|         println("------------------ Nothing to replace!! ------------------") | ||||
|         child_ids = Vector{String}() | ||||
|         for child in children(n) | ||||
|             push!(child_ids, "$(child.id)") | ||||
|         end | ||||
|         println("From $(child_before) to $(child_after) in $n with children $(child_ids)") | ||||
|         @assert false | ||||
|     end=# | ||||
|  | ||||
|     replace_children!(task(n), child_before, child_after) | ||||
|  | ||||
|     #=if !((child_after in task(n).t1_inputs) || (child_after in task(n).t2_inputs)) | ||||
|         println("------------------ Did not replace anything!! ------------------") | ||||
|         child_ids = Vector{String}() | ||||
|         for child in children(n) | ||||
|             push!(child_ids, "$(child.id)") | ||||
|         end | ||||
|         println("From $(child_before) to $(child_after) in $n with children $(child_ids)") | ||||
|         @assert false | ||||
|     end=# | ||||
|  | ||||
|     # keep track | ||||
|     if (track) | ||||
|         push!(graph.diff.updatedChildren, (n, taskBefore)) | ||||
|     end | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_snapshot_diff(graph::DAG) | ||||
|  | ||||
| @@ -204,8 +253,14 @@ function invalidate_caches!(graph::DAG, operation::NodeFusion) | ||||
|  | ||||
|     # delete the operation from all caches of nodes involved in the operation | ||||
|     # TODO: filter is very slow | ||||
|     filter!(!=(operation), operation.input[1].nodeFusions) | ||||
|     filter!(!=(operation), operation.input[3].nodeFusions) | ||||
|     for n in [1, 3] | ||||
|         for i in eachindex(operation.input[n].nodeFusions) | ||||
|             if operation == operation.input[n].nodeFusions[i] | ||||
|                 splice!(operation.input[n].nodeFusions, i) | ||||
|                 break | ||||
|             end | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     operation.input[2].nodeFusion = missing | ||||
|  | ||||
|   | ||||
| @@ -30,10 +30,10 @@ function show(io::IO, graph::DAG) | ||||
|     nodeDict = Dict{Type, Int64}() | ||||
|     noEdges = 0 | ||||
|     for node in graph.nodes | ||||
|         if haskey(nodeDict, typeof(node.task)) | ||||
|             nodeDict[typeof(node.task)] = nodeDict[typeof(node.task)] + 1 | ||||
|         if haskey(nodeDict, typeof(task(node))) | ||||
|             nodeDict[typeof(task(node))] = nodeDict[typeof(task(node))] + 1 | ||||
|         else | ||||
|             nodeDict[typeof(node.task)] = 1 | ||||
|             nodeDict[typeof(task(node))] = 1 | ||||
|         end | ||||
|         noEdges += length(parents(node)) | ||||
|     end | ||||
| @@ -62,9 +62,5 @@ function show(io::IO, graph::DAG) | ||||
|     properties = get_properties(graph) | ||||
|     println(io, "  Total Compute Effort: ", properties.computeEffort) | ||||
|     println(io, "  Total Data Transfer: ", properties.data) | ||||
|     return println( | ||||
|         io, | ||||
|         "  Total Compute Intensity: ", | ||||
|         properties.computeIntensity, | ||||
|     ) | ||||
|     return println(io, "  Total Compute Intensity: ", properties.computeIntensity) | ||||
| end | ||||
|   | ||||
| @@ -34,6 +34,7 @@ end | ||||
| Return a vector of the graph's entry nodes. | ||||
| """ | ||||
| function get_entry_nodes(graph::DAG) | ||||
|     apply_all!(graph) | ||||
|     result = Vector{Node}() | ||||
|     for node in graph.nodes | ||||
|         if (is_entry_node(node)) | ||||
| @@ -42,3 +43,12 @@ function get_entry_nodes(graph::DAG) | ||||
|     end | ||||
|     return result | ||||
| end | ||||
|  | ||||
| """ | ||||
|     operation_stack_length(graph::DAG) | ||||
|  | ||||
| Return the number of operations applied to the graph. | ||||
| """ | ||||
| function operation_stack_length(graph::DAG) | ||||
|     return length(graph.appliedOperations) + length(graph.operationsToApply) | ||||
| end | ||||
|   | ||||
| @@ -17,14 +17,14 @@ end | ||||
|  | ||||
| The representation of the graph as a set of [`Node`](@ref)s. | ||||
|  | ||||
| A DAG can be loaded using the appropriate parse function, e.g. [`parse_abc`](@ref). | ||||
| A DAG can be loaded using the appropriate parse_dag function, e.g. [`parse_dag`](@ref). | ||||
|  | ||||
| [`Operation`](@ref)s can be applied on it using [`push_operation!`](@ref) and reverted using [`pop_operation!`](@ref) like a stack. | ||||
| To get the set of possible operations, use [`get_operations`](@ref). | ||||
| The members of the object should not be manually accessed, instead always use the provided interface functions. | ||||
| """ | ||||
| mutable struct DAG | ||||
|     nodes::Set{Node} | ||||
|     nodes::Set{Union{DataTaskNode, ComputeTaskNode}} | ||||
|  | ||||
|     # The operations currently applied to the set of nodes | ||||
|     appliedOperations::Stack{AppliedOperation} | ||||
| @@ -36,7 +36,7 @@ mutable struct DAG | ||||
|     possibleOperations::PossibleOperations | ||||
|  | ||||
|     # The set of nodes whose possible operations need to be reevaluated | ||||
|     dirtyNodes::Set{Node} | ||||
|     dirtyNodes::Set{Union{DataTaskNode, ComputeTaskNode}} | ||||
|  | ||||
|     # "snapshot" system: keep track of added/removed nodes/edges since last snapshot | ||||
|     # these are muted in insert_node! etc. | ||||
| @@ -52,11 +52,7 @@ end | ||||
| Construct and return an empty [`PossibleOperations`](@ref) object. | ||||
| """ | ||||
| function PossibleOperations() | ||||
|     return PossibleOperations( | ||||
|         Set{NodeFusion}(), | ||||
|         Set{NodeReduction}(), | ||||
|         Set{NodeSplit}(), | ||||
|     ) | ||||
|     return PossibleOperations(Set{NodeFusion}(), Set{NodeReduction}(), Set{NodeSplit}()) | ||||
| end | ||||
|  | ||||
| """ | ||||
|   | ||||
| @@ -59,3 +59,19 @@ function is_valid(graph::DAG) | ||||
|  | ||||
|     return true | ||||
| end | ||||
|  | ||||
| """ | ||||
|     is_scheduled(graph::DAG) | ||||
|  | ||||
| Validate that the entire graph has been scheduled, i.e., every [`ComputeTaskNode`](@ref) has its `.device` set. | ||||
| """ | ||||
| function is_scheduled(graph::DAG) | ||||
|     for node in graph.nodes | ||||
|         if (node isa DataTaskNode) | ||||
|             continue | ||||
|         end | ||||
|         @assert !ismissing(node.device) | ||||
|     end | ||||
|  | ||||
|     return true | ||||
| end | ||||
|   | ||||
| @@ -7,7 +7,7 @@ Return the particle and value as is. | ||||
|  | ||||
| 0 FLOP. | ||||
| """ | ||||
| function compute(::ComputeTaskP, data::ParticleValue) | ||||
| function compute(::ComputeTaskP, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle} | ||||
|     return data | ||||
| end | ||||
|  | ||||
| @@ -18,7 +18,7 @@ Compute an outer edge. Return the particle value with the same particle and the | ||||
|  | ||||
| 1 FLOP. | ||||
| """ | ||||
| function compute(::ComputeTaskU, data::ParticleValue) | ||||
| function compute(::ComputeTaskU, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle} | ||||
|     return ParticleValue(data.p, data.v * outer_edge(data.p)) | ||||
| end | ||||
|  | ||||
| @@ -29,7 +29,11 @@ Compute a vertex. Preserve momentum and particle types (AB->C etc.) to create re | ||||
|  | ||||
| 6 FLOP. | ||||
| """ | ||||
| function compute(::ComputeTaskV, data1::ParticleValue, data2::ParticleValue) | ||||
| function compute( | ||||
|     ::ComputeTaskV, | ||||
|     data1::ParticleValue{P1}, | ||||
|     data2::ParticleValue{P2}, | ||||
| )::ParticleValue where {P1 <: ABCParticle, P2 <: ABCParticle} | ||||
|     p3 = preserve_momentum(data1.p, data2.p) | ||||
|     dataOut = ParticleValue(p3, data1.v * vertex() * data2.v) | ||||
|     return dataOut | ||||
| @@ -44,8 +48,15 @@ For valid inputs, both input particles should have the same momenta at this poin | ||||
|  | ||||
| 12 FLOP. | ||||
| """ | ||||
| function compute(::ComputeTaskS2, data1::ParticleValue, data2::ParticleValue) | ||||
|     return data1.v * inner_edge(data1.p) * data2.v | ||||
| function compute(::ComputeTaskS2, data1::ParticleValue{P}, data2::ParticleValue{P})::Float64 where {P <: ABCParticle} | ||||
|     #= | ||||
|     @assert isapprox(abs(data1.p.momentum.E), abs(data2.p.momentum.E), rtol = 0.001, atol = sqrt(eps())) "E: $(data1.p.momentum.E) vs. $(data2.p.momentum.E)" | ||||
|     @assert isapprox(data1.p.momentum.px, -data2.p.momentum.px, rtol = 0.001, atol = sqrt(eps())) "px: $(data1.p.momentum.px) vs. $(data2.p.momentum.px)" | ||||
|     @assert isapprox(data1.p.momentum.py, -data2.p.momentum.py, rtol = 0.001, atol = sqrt(eps())) "py: $(data1.p.momentum.py) vs. $(data2.p.momentum.py)" | ||||
|     @assert isapprox(data1.p.momentum.pz, -data2.p.momentum.pz, rtol = 0.001, atol = sqrt(eps())) "pz: $(data1.p.momentum.pz) vs. $(data2.p.momentum.pz)" | ||||
|     =# | ||||
|     inner = inner_edge(data1.p) | ||||
|     return data1.v * inner * data2.v | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -55,7 +66,7 @@ Compute inner edge (1 input particle, 1 output particle). | ||||
|  | ||||
| 11 FLOP. | ||||
| """ | ||||
| function compute(::ComputeTaskS1, data::ParticleValue) | ||||
| function compute(::ComputeTaskS1, data::ParticleValue{P})::ParticleValue{P} where {P <: ABCParticle} | ||||
|     return ParticleValue(data.p, data.v * inner_edge(data.p)) | ||||
| end | ||||
|  | ||||
| @@ -66,191 +77,83 @@ Compute a sum over the vector. Use an algorithm that accounts for accumulated er | ||||
|  | ||||
| Linearly many FLOP with growing data. | ||||
| """ | ||||
| function compute(::ComputeTaskSum, data::Vector{Float64}) | ||||
| function compute(::ComputeTaskSum, data::Vector{Float64})::Float64 | ||||
|     return sum_kbn(data) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     compute(t::FusedComputeTask, data) | ||||
|     get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr) | ||||
|  | ||||
| Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead. | ||||
| Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSyms`, providing the output on `outSym`. | ||||
| """ | ||||
| function compute(t::FusedComputeTask, data) | ||||
|     @assert false "This is not implemented and should never be called" | ||||
| function get_expression(::ComputeTaskP, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     in = [eval(inExprs[1])] | ||||
|     out = eval(outExpr) | ||||
|  | ||||
|     return Meta.parse("$out = compute(ComputeTaskP(), $(in[1]))") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol) | ||||
|     get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr) | ||||
|  | ||||
| Generate and return code evaluating [`ComputeTaskP`](@ref) on `inSymbol`, providing the output on `outSymbol`. | ||||
| Generate code evaluating [`ComputeTaskU`](@ref) on `inSyms`, providing the output on `outSym`. | ||||
| `inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref). | ||||
| """ | ||||
| function get_expression(::ComputeTaskP, inSymbol::Symbol, outSymbol::Symbol) | ||||
|     return Meta.parse("$outSymbol = compute(ComputeTaskP(), $inSymbol)") | ||||
| function get_expression(::ComputeTaskU, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     in = [eval(inExprs[1])] | ||||
|     out = eval(outExpr) | ||||
|  | ||||
|     return Meta.parse("$out = compute(ComputeTaskU(), $(in[1]))") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol) | ||||
|     get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr) | ||||
|  | ||||
| Generate code evaluating [`ComputeTaskU`](@ref) on `inSymbol`, providing the output on `outSymbol`. | ||||
| `inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref). | ||||
| Generate code evaluating [`ComputeTaskV`](@ref) on `inSyms`, providing the output on `outSym`. | ||||
| `inSym[1]` and `inSym[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref). | ||||
| """ | ||||
| function get_expression(::ComputeTaskU, inSymbol::Symbol, outSymbol::Symbol) | ||||
|     return Meta.parse("$outSymbol = compute(ComputeTaskU(), $inSymbol)") | ||||
| function get_expression(::ComputeTaskV, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     in = [eval(inExprs[1]), eval(inExprs[2])] | ||||
|     out = eval(outExpr) | ||||
|  | ||||
|     return Meta.parse("$out = compute(ComputeTaskV(), $(in[1]), $(in[2]))") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(::ComputeTaskV, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol) | ||||
|     get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr) | ||||
|  | ||||
| Generate code evaluating [`ComputeTaskV`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`. | ||||
| `inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref). | ||||
| Generate code evaluating [`ComputeTaskS2`](@ref) on `inSyms`, providing the output on `outSym`. | ||||
| `inSyms[1]` and `inSyms[2]` should be of type [`ParticleValue`](@ref), `outSym` will be of type `Float64`. | ||||
| """ | ||||
| function get_expression( | ||||
|     ::ComputeTaskV, | ||||
|     inSymbol1::Symbol, | ||||
|     inSymbol2::Symbol, | ||||
|     outSymbol::Symbol, | ||||
| ) | ||||
|     return Meta.parse( | ||||
|         "$outSymbol = compute(ComputeTaskV(), $inSymbol1, $inSymbol2)", | ||||
|     ) | ||||
| function get_expression(::ComputeTaskS2, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     in = [eval(inExprs[1]), eval(inExprs[2])] | ||||
|     out = eval(outExpr) | ||||
|  | ||||
|     return Meta.parse("$out = compute(ComputeTaskS2(), $(in[1]), $(in[2]))") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(::ComputeTaskS2, inSymbol1::Symbol, inSymbol2::Symbol, outSymbol::Symbol) | ||||
|     get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr) | ||||
|  | ||||
| Generate code evaluating [`ComputeTaskS2`](@ref) on `inSymbol1` and `inSymbol2`, providing the output on `outSymbol`. | ||||
| `inSymbol1` and `inSymbol2` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type `Float64`. | ||||
| Generate code evaluating [`ComputeTaskS1`](@ref) on `inSyms`, providing the output on `outSym`. | ||||
| `inSyms` should be of type [`ParticleValue`](@ref), `outSym` will be of type [`ParticleValue`](@ref). | ||||
| """ | ||||
| function get_expression( | ||||
|     ::ComputeTaskS2, | ||||
|     inSymbol1::Symbol, | ||||
|     inSymbol2::Symbol, | ||||
|     outSymbol::Symbol, | ||||
| ) | ||||
|     return Meta.parse( | ||||
|         "$outSymbol = compute(ComputeTaskS2(), $inSymbol1, $inSymbol2)", | ||||
|     ) | ||||
| function get_expression(::ComputeTaskS1, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     in = [eval(inExprs[1])] | ||||
|     out = eval(outExpr) | ||||
|  | ||||
|     return Meta.parse("$out = compute(ComputeTaskS1(), $(in[1]))") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol) | ||||
|     get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr) | ||||
|  | ||||
| Generate code evaluating [`ComputeTaskS1`](@ref) on `inSymbol`, providing the output on `outSymbol`. | ||||
| `inSymbol` should be of type [`ParticleValue`](@ref), `outSymbol` will be of type [`ParticleValue`](@ref). | ||||
| Generate code evaluating [`ComputeTaskSum`](@ref) on `inSyms`, providing the output on `outSym`. | ||||
| `inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`]. | ||||
| """ | ||||
| function get_expression(::ComputeTaskS1, inSymbol::Symbol, outSymbol::Symbol) | ||||
|     return Meta.parse("$outSymbol = compute(ComputeTaskS1(), $inSymbol)") | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(::ComputeTaskSum, inSymbols::Vector{Symbol}, outSymbol::Symbol) | ||||
|  | ||||
| Generate code evaluating [`ComputeTaskSum`](@ref) on `inSymbols`, providing the output on `outSymbol`. | ||||
| `inSymbols` should be of type [`Float64`], `outSymbol` will be of type [`Float64`]. | ||||
| """ | ||||
| function get_expression( | ||||
|     ::ComputeTaskSum, | ||||
|     inSymbols::Vector{Symbol}, | ||||
|     outSymbol::Symbol, | ||||
| ) | ||||
|     return quote | ||||
|         $outSymbol = compute(ComputeTaskSum(), [$(inSymbols...)]) | ||||
|     end | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(t::FusedComputeTask, inSymbols::Vector{Symbol}, outSymbol::Symbol) | ||||
|  | ||||
| Generate code evaluating a [`FusedComputeTask`](@ref) on `inSymbols`, providing the output on `outSymbol`. | ||||
| `inSymbols` should be of the correct types and may be heterogeneous. `outSymbol` will be of the type of the output of `T2` of t. | ||||
| """ | ||||
| function get_expression( | ||||
|     t::FusedComputeTask, | ||||
|     inSymbols::Vector{Symbol}, | ||||
|     outSymbol::Symbol, | ||||
| ) | ||||
|     (T1, T2) = get_types(t) | ||||
|     c1 = children(T1()) | ||||
|     c2 = children(T2()) | ||||
|  | ||||
|     expr1 = nothing | ||||
|     expr2 = nothing | ||||
|  | ||||
|     # TODO need to figure out how to know which inputs belong to which subtask | ||||
|     # since we order the vectors with the child nodes we can't just split | ||||
|     if (c1 == 1) | ||||
|         expr1 = get_expression(T1(), inSymbols[begin], :intermediate) | ||||
|     elseif (c1 == 2) | ||||
|         expr1 = | ||||
|             get_expression(T1(), inSymbols[begin], inSymbols[2], :intermediate) | ||||
|     else | ||||
|         expr1 = get_expression(T1(), inSymbols[begin:c1], :intermediate) | ||||
|     end | ||||
|  | ||||
|     if (c2 == 1) | ||||
|         expr2 = get_expression(T2(), :intermediate, outSymbol) | ||||
|     elseif c2 == 2 | ||||
|         expr2 = | ||||
|             get_expression(T2(), :intermediate, inSymbols[c1 + 1], outSymbol) | ||||
|     else | ||||
|         expr2 = get_expression( | ||||
|             T2(), | ||||
|             :intermediate * inSymbols[(c1 + 1):end], | ||||
|             outSymbol, | ||||
|         ) | ||||
|     end | ||||
|  | ||||
|     return Expr(:block, expr1, expr2) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(node::ComputeTaskNode) | ||||
|  | ||||
| Generate and return code for a given [`ComputeTaskNode`](@ref). | ||||
| """ | ||||
| function get_expression(node::ComputeTaskNode) | ||||
|     t = typeof(node.task) | ||||
|     @assert length(node.children) == children(node.task) || t <: ComputeTaskSum | ||||
|  | ||||
|     if (t <: ComputeTaskU || t <: ComputeTaskP || t <: ComputeTaskS1) # single input | ||||
|         symbolIn = Symbol("data_$(to_var_name(node.children[1].id))") | ||||
|         symbolOut = Symbol("data_$(to_var_name(node.id))") | ||||
|         return get_expression(t(), symbolIn, symbolOut) | ||||
|     elseif (t <: ComputeTaskS2 || t <: ComputeTaskV) # double input | ||||
|         symbolIn1 = Symbol("data_$(to_var_name(node.children[1].id))") | ||||
|         symbolIn2 = Symbol("data_$(to_var_name(node.children[2].id))") | ||||
|         symbolOut = Symbol("data_$(to_var_name(node.id))") | ||||
|         return get_expression(t(), symbolIn1, symbolIn2, symbolOut) | ||||
|     elseif (t <: ComputeTaskSum || t <: FusedComputeTask) # vector input | ||||
|         inSymbols = Vector{Symbol}() | ||||
|         for child in node.children | ||||
|             push!(inSymbols, Symbol("data_$(to_var_name(child.id))")) | ||||
|         end | ||||
|         outSymbol = Symbol("data_$(to_var_name(node.id))") | ||||
|         return get_expression(t(), inSymbols, outSymbol) | ||||
|     else | ||||
|         error("Unknown compute task") | ||||
|     end | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(node::DataTaskNode) | ||||
|  | ||||
| Generate and return code for a given [`DataTaskNode`](@ref). | ||||
| """ | ||||
| function get_expression(node::DataTaskNode) | ||||
|     # TODO: do things to transport data from/to gpu, between numa nodes, etc. | ||||
|     @assert length(node.children) <= 1 | ||||
|  | ||||
|     inSymbol = nothing | ||||
|     if (length(node.children) == 1) | ||||
|         inSymbol = Symbol("data_$(to_var_name(node.children[1].id))") | ||||
|     else | ||||
|         inSymbol = Symbol("data_$(to_var_name(node.id))_in") | ||||
|     end | ||||
|     outSymbol = Symbol("data_$(to_var_name(node.id))") | ||||
|  | ||||
|     dataTransportExp = Meta.parse("$outSymbol = $inSymbol") | ||||
|  | ||||
|     return dataTransportExp | ||||
| function get_expression(::ComputeTaskSum, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     in = eval.(inExprs) | ||||
|     out = eval(outExpr) | ||||
|  | ||||
|     return Meta.parse("$out = compute(ComputeTaskSum(), [$(unroll_symbol_vector(in))])") | ||||
| end | ||||
|   | ||||
| @@ -1,74 +1,198 @@ | ||||
| using QEDbase | ||||
| using Random | ||||
| using Roots | ||||
| using ForwardDiff | ||||
|  | ||||
| ComputeTaskSum() = ComputeTaskSum(0) | ||||
|  | ||||
| """ | ||||
|     Particle(rng) | ||||
|     gen_process_input(processDescription::ABCProcessDescription) | ||||
|  | ||||
| Return a randomly generated particle. | ||||
| Return a ProcessInput of randomly generated [`ABCParticle`](@ref)s from a [`ABCProcessDescription`](@ref). The process description can be created manually or parsed from a string using [`parse_process`](@ref). | ||||
|  | ||||
| Note: This uses RAMBO to create a valid process with conservation of momentum and energy. | ||||
| """ | ||||
| function Particle(rng, type::ParticleType) | ||||
| function gen_process_input(processDescription::ABCProcessDescription) | ||||
|     inParticleTypes = keys(processDescription.inParticles) | ||||
|     outParticleTypes = keys(processDescription.outParticles) | ||||
|  | ||||
|     p1 = rand(rng, Float64) | ||||
|     p2 = rand(rng, Float64) | ||||
|     p3 = rand(rng, Float64) | ||||
|     m = mass(type) | ||||
|  | ||||
|     # keep the momenta of the particles on-shell | ||||
|     p4 = sqrt(p1^2 + p2^2 + p3^2 + m^2) | ||||
|  | ||||
|     return Particle(p1, p2, p3, p4, type) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     gen_particles(n::Int) | ||||
|  | ||||
| Return a Vector of `n` randomly generated [`Particle`](@ref)s. | ||||
|  | ||||
| Note: This does not take into account the preservation of momenta required for an actual valid process! | ||||
| """ | ||||
| function gen_particles(ns::Dict{ParticleType, Int}) | ||||
|     particles = Dict{ParticleType, Vector{Particle}}() | ||||
|     rng = MersenneTwister(0) | ||||
|  | ||||
|  | ||||
|     if ns == Dict((A => 2), (B => 2)) | ||||
|         rho = 1.0 | ||||
|  | ||||
|         omega = rand(rng, Float64) | ||||
|         theta = rand(rng, Float64) * π | ||||
|         phi = rand(rng, Float64) * π | ||||
|  | ||||
|         particles[A] = Vector{Particle}() | ||||
|         particles[B] = Vector{Particle}() | ||||
|  | ||||
|         push!(particles[A], Particle(omega, 0, 0, omega, A)) | ||||
|         push!(particles[B], Particle(omega, 0, 0, -omega, B)) | ||||
|         push!( | ||||
|             particles[A], | ||||
|             Particle( | ||||
|                 omega, | ||||
|                 rho * cos(theta) * cos(phi), | ||||
|                 rho * cos(theta) * sin(phi), | ||||
|                 rho * sin(theta), | ||||
|                 A, | ||||
|             ), | ||||
|         ) | ||||
|         push!( | ||||
|             particles[B], | ||||
|             Particle( | ||||
|                 omega, | ||||
|                 -rho * cos(theta) * cos(phi), | ||||
|                 -rho * cos(theta) * sin(phi), | ||||
|                 -rho * sin(theta), | ||||
|                 B, | ||||
|             ), | ||||
|         ) | ||||
|         return particles | ||||
|     end | ||||
|  | ||||
|     for (type, n) in ns | ||||
|         particles[type] = Vector{Particle}() | ||||
|         for i in 1:n | ||||
|             push!(particles[type], Particle(rng, type)) | ||||
|     massSum = 0 | ||||
|     inputMasses = Vector{Float64}() | ||||
|     for (particle, n) in processDescription.inParticles | ||||
|         for _ in 1:n | ||||
|             massSum += mass(particle) | ||||
|             push!(inputMasses, mass(particle)) | ||||
|         end | ||||
|     end | ||||
|     return particles | ||||
|     outputMasses = Vector{Float64}() | ||||
|     for (particle, n) in processDescription.outParticles | ||||
|         for _ in 1:n | ||||
|             massSum += mass(particle) | ||||
|             push!(outputMasses, mass(particle)) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     # add some extra random mass to allow for some momentum | ||||
|     massSum += rand(rng[threadid()]) * (length(inputMasses) + length(outputMasses)) | ||||
|  | ||||
|  | ||||
|     inputParticles = Vector{ABCParticle}() | ||||
|     initialMomenta = generate_initial_moms(massSum, inputMasses) | ||||
|     index = 1 | ||||
|     for (particle, n) in processDescription.inParticles | ||||
|         for _ in 1:n | ||||
|             mom = initialMomenta[index] | ||||
|             push!(inputParticles, particle(mom)) | ||||
|             index += 1 | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     outputParticles = Vector{ABCParticle}() | ||||
|     final_momenta = generate_physical_massive_moms(rng[threadid()], massSum, outputMasses) | ||||
|     index = 1 | ||||
|     for (particle, n) in processDescription.outParticles | ||||
|         for _ in 1:n | ||||
|             mom = final_momenta[index] | ||||
|             push!(outputParticles, particle(SFourMomentum(-mom.E, mom.px, mom.py, mom.pz))) | ||||
|             index += 1 | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     processInput = ABCProcessInput(processDescription, inputParticles, outputParticles) | ||||
|  | ||||
|     return return processInput | ||||
| end | ||||
|  | ||||
| #################### | ||||
| # CODE FROM HERE BORROWED FROM SOURCE: https://codebase.helmholtz.cloud/qedsandbox/QEDphasespaces.jl/ | ||||
| # use qedphasespaces directly once released | ||||
| # | ||||
| # quick and dirty implementation of the RAMBO algorithm | ||||
| # | ||||
| # reference:  | ||||
| # * https://cds.cern.ch/record/164736/files/198601282.pdf | ||||
| # * https://www.sciencedirect.com/science/article/pii/0010465586901190 | ||||
| #################### | ||||
|  | ||||
| function generate_initial_moms(ss, masses) | ||||
|     E1 = (ss^2 + masses[1]^2 - masses[2]^2) / (2 * ss) | ||||
|     E2 = (ss^2 + masses[2]^2 - masses[1]^2) / (2 * ss) | ||||
|  | ||||
|     rho1 = sqrt(E1^2 - masses[1]^2) | ||||
|     rho2 = sqrt(E2^2 - masses[2]^2) | ||||
|  | ||||
|     return [SFourMomentum(E1, 0, 0, rho1), SFourMomentum(E2, 0, 0, -rho2)] | ||||
| end | ||||
|  | ||||
|  | ||||
| Random.rand(rng::AbstractRNG, ::Random.SamplerType{SFourMomentum}) = SFourMomentum(rand(rng, 4)) | ||||
| Random.rand(rng::AbstractRNG, ::Random.SamplerType{NTuple{N, Float64}}) where {N} = Tuple(rand(rng, N)) | ||||
|  | ||||
|  | ||||
| function _transform_uni_to_mom(u1, u2, u3, u4) | ||||
|     cth = 2 * u1 - 1 | ||||
|     sth = sqrt(1 - cth^2) | ||||
|     phi = 2 * pi * u2 | ||||
|     q0 = -log(u3 * u4) | ||||
|     qx = q0 * sth * cos(phi) | ||||
|     qy = q0 * sth * sin(phi) | ||||
|     qz = q0 * cth | ||||
|  | ||||
|     return SFourMomentum(q0, qx, qy, qz) | ||||
| end | ||||
|  | ||||
| function _transform_uni_to_mom!(uni_mom, dest) | ||||
|     u1, u2, u3, u4 = Tuple(uni_mom) | ||||
|     cth = 2 * u1 - 1 | ||||
|     sth = sqrt(1 - cth^2) | ||||
|     phi = 2 * pi * u2 | ||||
|     q0 = -log(u3 * u4) | ||||
|     qx = q0 * sth * cos(phi) | ||||
|     qy = q0 * sth * sin(phi) | ||||
|     qz = q0 * cth | ||||
|  | ||||
|     return dest = SFourMomentum(q0, qx, qy, qz) | ||||
| end | ||||
|  | ||||
| _transform_uni_to_mom(u1234::Tuple) = _transform_uni_to_mom(u1234...) | ||||
| _transform_uni_to_mom(u1234::SFourMomentum) = _transform_uni_to_mom(Tuple(u1234)) | ||||
|  | ||||
| function generate_massless_moms(rng, n::Int) | ||||
|     a = Vector{SFourMomentum}(undef, n) | ||||
|     rand!(rng, a) | ||||
|     return map(_transform_uni_to_mom, a) | ||||
| end | ||||
|  | ||||
| function generate_physical_massless_moms(rng, ss, n) | ||||
|     r_moms = generate_massless_moms(rng, n) | ||||
|     Q = sum(r_moms) | ||||
|     M = sqrt(Q * Q) | ||||
|     fac = -1 / M | ||||
|     Qx = getX(Q) | ||||
|     Qy = getY(Q) | ||||
|     Qz = getZ(Q) | ||||
|     bx = fac * Qx | ||||
|     by = fac * Qy | ||||
|     bz = fac * Qz | ||||
|     gamma = getT(Q) / M | ||||
|     a = 1 / (1 + gamma) | ||||
|     x = ss / M | ||||
|  | ||||
|     i = 1 | ||||
|     while i <= n | ||||
|         mom = r_moms[i] | ||||
|         mom0 = getT(mom) | ||||
|         mom1 = getX(mom) | ||||
|         mom2 = getY(mom) | ||||
|         mom3 = getZ(mom) | ||||
|  | ||||
|         bq = bx * mom1 + by * mom2 + bz * mom3 | ||||
|  | ||||
|         p0 = x * (gamma * mom0 + bq) | ||||
|         px = x * (mom1 + bx * mom0 + a * bq * bx) | ||||
|         py = x * (mom2 + by * mom0 + a * bq * by) | ||||
|         pz = x * (mom3 + bz * mom0 + a * bq * bz) | ||||
|  | ||||
|         r_moms[i] = SFourMomentum(p0, px, py, pz) | ||||
|         i += 1 | ||||
|     end | ||||
|     return r_moms | ||||
| end | ||||
|  | ||||
| function _to_be_solved(xi, masses, p0s, ss) | ||||
|     sum = 0.0 | ||||
|     for (i, E) in enumerate(p0s) | ||||
|         sum += sqrt(masses[i]^2 + xi^2 * E^2) | ||||
|     end | ||||
|     return sum - ss | ||||
| end | ||||
|  | ||||
| function _build_massive_momenta(xi, masses, massless_moms) | ||||
|     vec = SFourMomentum[] | ||||
|     i = 1 | ||||
|     while i <= length(massless_moms) | ||||
|         massless_mom = massless_moms[i] | ||||
|         k0 = sqrt(getT(massless_mom)^2 * xi^2 + masses[i]^2) | ||||
|  | ||||
|         kx = xi * getX(massless_mom) | ||||
|         ky = xi * getY(massless_mom) | ||||
|         kz = xi * getZ(massless_mom) | ||||
|  | ||||
|         push!(vec, SFourMomentum(k0, kx, ky, kz)) | ||||
|  | ||||
|         i += 1 | ||||
|     end | ||||
|     return vec | ||||
| end | ||||
|  | ||||
| first_derivative(func) = x -> ForwardDiff.derivative(func, float(x)) | ||||
|  | ||||
|  | ||||
| function generate_physical_massive_moms(rng, ss, masses; x0 = 0.1) | ||||
|     n = length(masses) | ||||
|     massless_moms = generate_physical_massless_moms(rng, ss, n) | ||||
|     energies = getT.(massless_moms) | ||||
|     f = x -> _to_be_solved(x, masses, energies, ss) | ||||
|     xi = find_zero((f, first_derivative(f)), x0, Roots.Newton()) | ||||
|     return _build_massive_momenta(xi, masses, massless_moms) | ||||
| end | ||||
|   | ||||
| @@ -32,13 +32,13 @@ function parse_edges(input::AbstractString) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     parse_abc(filename::String; verbose::Bool = false) | ||||
|     parse_dag(filename::String, model::ABCModel; verbose::Bool = false) | ||||
|  | ||||
| Read an abc-model process from the given file. If `verbose` is set to true, print some progress information to stdout. | ||||
|  | ||||
| Returns a valid [`DAG`](@ref). | ||||
| """ | ||||
| function parse_abc(filename::String, verbose::Bool = false) | ||||
| function parse_dag(filename::AbstractString, model::ABCModel, verbose::Bool = false) | ||||
|     file = open(filename, "r") | ||||
|  | ||||
|     if (verbose) | ||||
| @@ -63,10 +63,9 @@ function parse_abc(filename::String, verbose::Bool = false) | ||||
|     end | ||||
|     sizehint!(graph.nodes, estimate_no_nodes) | ||||
|  | ||||
|     sum_node = insert_node!(graph, make_node(ComputeTaskSum()), false, false) | ||||
|     global_data_out = | ||||
|         insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), false, false) | ||||
|     insert_edge!(graph, sum_node, global_data_out, false, false) | ||||
|     sum_node = insert_node!(graph, make_node(ComputeTaskSum(0)), track = false, invalidate_cache = false) | ||||
|     global_data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false) | ||||
|     insert_edge!(graph, sum_node, global_data_out, track = false, invalidate_cache = false) | ||||
|  | ||||
|     # remember the data out nodes for connection | ||||
|     dataOutNodes = Dict() | ||||
| @@ -81,10 +80,7 @@ function parse_abc(filename::String, verbose::Bool = false) | ||||
|         noNodes += 1 | ||||
|         if (noNodes % 100 == 0) | ||||
|             if (verbose) | ||||
|                 percent = string( | ||||
|                     round(100.0 * noNodes / nodesToRead, digits = 2), | ||||
|                     "%", | ||||
|                 ) | ||||
|                 percent = string(round(100.0 * noNodes / nodesToRead, digits = 2), "%") | ||||
|                 print("\rReading Nodes... $percent") | ||||
|             end | ||||
|         end | ||||
| @@ -93,30 +89,20 @@ function parse_abc(filename::String, verbose::Bool = false) | ||||
|             data_in = insert_node!( | ||||
|                 graph, | ||||
|                 make_node(DataTask(PARTICLE_VALUE_SIZE), string(node)), | ||||
|                 false, | ||||
|                 false, | ||||
|                 track = false, | ||||
|                 invalidate_cache = false, | ||||
|             ) # read particle data node | ||||
|             compute_P = | ||||
|                 insert_node!(graph, make_node(ComputeTaskP()), false, false) # compute P node | ||||
|             data_Pu = insert_node!( | ||||
|                 graph, | ||||
|                 make_node(DataTask(PARTICLE_VALUE_SIZE)), | ||||
|                 false, | ||||
|                 false, | ||||
|             ) # transfer data from P to u (one ParticleValue object) | ||||
|             compute_u = | ||||
|                 insert_node!(graph, make_node(ComputeTaskU()), false, false) # compute U node | ||||
|             data_out = insert_node!( | ||||
|                 graph, | ||||
|                 make_node(DataTask(PARTICLE_VALUE_SIZE)), | ||||
|                 false, | ||||
|                 false, | ||||
|             ) # transfer data out from u (one ParticleValue object) | ||||
|             compute_P = insert_node!(graph, make_node(ComputeTaskP()), track = false, invalidate_cache = false) # compute P node | ||||
|             data_Pu = | ||||
|                 insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data from P to u (one ParticleValue object) | ||||
|             compute_u = insert_node!(graph, make_node(ComputeTaskU()), track = false, invalidate_cache = false) # compute U node | ||||
|             data_out = | ||||
|                 insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) # transfer data out from u (one ParticleValue object) | ||||
|  | ||||
|             insert_edge!(graph, data_in, compute_P, false, false) | ||||
|             insert_edge!(graph, compute_P, data_Pu, false, false) | ||||
|             insert_edge!(graph, data_Pu, compute_u, false, false) | ||||
|             insert_edge!(graph, compute_u, data_out, false, false) | ||||
|             insert_edge!(graph, data_in, compute_P, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, compute_P, data_Pu, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, data_Pu, compute_u, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, compute_u, data_out, track = false, invalidate_cache = false) | ||||
|  | ||||
|             # remember the data_out node for future edges | ||||
|             dataOutNodes[node] = data_out | ||||
| @@ -126,63 +112,48 @@ function parse_abc(filename::String, verbose::Bool = false) | ||||
|             in1 = capt.captures[1] | ||||
|             in2 = capt.captures[2] | ||||
|  | ||||
|             compute_v = | ||||
|                 insert_node!(graph, make_node(ComputeTaskV()), false, false) | ||||
|             data_out = insert_node!( | ||||
|                 graph, | ||||
|                 make_node(DataTask(PARTICLE_VALUE_SIZE)), | ||||
|                 false, | ||||
|                 false, | ||||
|             ) | ||||
|             compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false) | ||||
|             data_out = | ||||
|                 insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) | ||||
|  | ||||
|             if (occursin(regex_c, in1)) | ||||
|                 # put an S node after this input | ||||
|                 compute_S = insert_node!( | ||||
|                     graph, | ||||
|                     make_node(ComputeTaskS1()), | ||||
|                     false, | ||||
|                     false, | ||||
|                 ) | ||||
|                 compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false) | ||||
|                 data_S_v = insert_node!( | ||||
|                     graph, | ||||
|                     make_node(DataTask(PARTICLE_VALUE_SIZE)), | ||||
|                     false, | ||||
|                     false, | ||||
|                     track = false, | ||||
|                     invalidate_cache = false, | ||||
|                 ) | ||||
|  | ||||
|                 insert_edge!(graph, dataOutNodes[in1], compute_S, false, false) | ||||
|                 insert_edge!(graph, compute_S, data_S_v, false, false) | ||||
|                 insert_edge!(graph, dataOutNodes[in1], compute_S, track = false, invalidate_cache = false) | ||||
|                 insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false) | ||||
|  | ||||
|                 insert_edge!(graph, data_S_v, compute_v, false, false) | ||||
|                 insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false) | ||||
|             else | ||||
|                 insert_edge!(graph, dataOutNodes[in1], compute_v, false, false) | ||||
|                 insert_edge!(graph, dataOutNodes[in1], compute_v, track = false, invalidate_cache = false) | ||||
|             end | ||||
|  | ||||
|             if (occursin(regex_c, in2)) | ||||
|                 # i think the current generator only puts the combined particles in the first space, so this case might never be entered | ||||
|                 # put an S node after this input | ||||
|                 compute_S = insert_node!( | ||||
|                     graph, | ||||
|                     make_node(ComputeTaskS1()), | ||||
|                     false, | ||||
|                     false, | ||||
|                 ) | ||||
|                 compute_S = insert_node!(graph, make_node(ComputeTaskS1()), track = false, invalidate_cache = false) | ||||
|                 data_S_v = insert_node!( | ||||
|                     graph, | ||||
|                     make_node(DataTask(PARTICLE_VALUE_SIZE)), | ||||
|                     false, | ||||
|                     false, | ||||
|                     track = false, | ||||
|                     invalidate_cache = false, | ||||
|                 ) | ||||
|  | ||||
|                 insert_edge!(graph, dataOutNodes[in2], compute_S, false, false) | ||||
|                 insert_edge!(graph, compute_S, data_S_v, false, false) | ||||
|                 insert_edge!(graph, dataOutNodes[in2], compute_S, track = false, invalidate_cache = false) | ||||
|                 insert_edge!(graph, compute_S, data_S_v, track = false, invalidate_cache = false) | ||||
|  | ||||
|                 insert_edge!(graph, data_S_v, compute_v, false, false) | ||||
|                 insert_edge!(graph, data_S_v, compute_v, track = false, invalidate_cache = false) | ||||
|             else | ||||
|                 insert_edge!(graph, dataOutNodes[in2], compute_v, false, false) | ||||
|                 insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false) | ||||
|             end | ||||
|  | ||||
|             insert_edge!(graph, compute_v, data_out, false, false) | ||||
|             insert_edge!(graph, compute_v, data_out, track = false, invalidate_cache = false) | ||||
|             dataOutNodes[node] = data_out | ||||
|  | ||||
|         elseif occursin(regex_m, node) | ||||
| @@ -193,43 +164,31 @@ function parse_abc(filename::String, verbose::Bool = false) | ||||
|             in3 = capt.captures[3] | ||||
|  | ||||
|             # in2 + in3 with a v | ||||
|             compute_v = | ||||
|                 insert_node!(graph, make_node(ComputeTaskV()), false, false) | ||||
|             data_v = insert_node!( | ||||
|                 graph, | ||||
|                 make_node(DataTask(PARTICLE_VALUE_SIZE)), | ||||
|                 false, | ||||
|                 false, | ||||
|             ) | ||||
|             compute_v = insert_node!(graph, make_node(ComputeTaskV()), track = false, invalidate_cache = false) | ||||
|             data_v = | ||||
|                 insert_node!(graph, make_node(DataTask(PARTICLE_VALUE_SIZE)), track = false, invalidate_cache = false) | ||||
|  | ||||
|             insert_edge!(graph, dataOutNodes[in2], compute_v, false, false) | ||||
|             insert_edge!(graph, dataOutNodes[in3], compute_v, false, false) | ||||
|             insert_edge!(graph, compute_v, data_v, false, false) | ||||
|             insert_edge!(graph, dataOutNodes[in2], compute_v, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, dataOutNodes[in3], compute_v, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, compute_v, data_v, track = false, invalidate_cache = false) | ||||
|  | ||||
|             # combine with the v of the combined other input | ||||
|             compute_S2 = | ||||
|                 insert_node!(graph, make_node(ComputeTaskS2()), false, false) | ||||
|             data_out = insert_node!( | ||||
|                 graph, | ||||
|                 make_node(DataTask(FLOAT_SIZE)), | ||||
|                 false, | ||||
|                 false, | ||||
|             ) # output of a S2 task is only a float | ||||
|             compute_S2 = insert_node!(graph, make_node(ComputeTaskS2()), track = false, invalidate_cache = false) | ||||
|             data_out = insert_node!(graph, make_node(DataTask(FLOAT_SIZE)), track = false, invalidate_cache = false) # output of a S2 task is only a float | ||||
|  | ||||
|             insert_edge!(graph, data_v, compute_S2, false, false) | ||||
|             insert_edge!(graph, dataOutNodes[in1], compute_S2, false, false) | ||||
|             insert_edge!(graph, compute_S2, data_out, false, false) | ||||
|             insert_edge!(graph, data_v, compute_S2, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, dataOutNodes[in1], compute_S2, track = false, invalidate_cache = false) | ||||
|             insert_edge!(graph, compute_S2, data_out, track = false, invalidate_cache = false) | ||||
|  | ||||
|             insert_edge!(graph, data_out, sum_node, false, false) | ||||
|             insert_edge!(graph, data_out, sum_node, track = false, invalidate_cache = false) | ||||
|             add_child!(task(sum_node)) | ||||
|         elseif occursin(regex_plus, node) | ||||
|             if (verbose) | ||||
|                 println("\rReading Nodes Complete    ") | ||||
|                 println("Added ", length(graph.nodes), " nodes") | ||||
|             end | ||||
|         else | ||||
|             @assert false ( | ||||
|                 "Unknown node '$node' while reading from file $filename" | ||||
|             ) | ||||
|             @assert false ("Unknown node '$node' while reading from file $filename") | ||||
|         end | ||||
|     end | ||||
|  | ||||
| @@ -244,6 +203,46 @@ function parse_abc(filename::String, verbose::Bool = false) | ||||
|     if (verbose) | ||||
|         println("Done") | ||||
|     end | ||||
|  | ||||
|     # don't actually need to read the edges | ||||
|     return graph | ||||
| end | ||||
|  | ||||
| """ | ||||
|     parse_process(string::AbstractString, model::ABCModel) | ||||
|  | ||||
| Parse a string representation of a process, such as "AB->ABBB" into the corresponding [`ABCProcessDescription`](@ref). | ||||
| """ | ||||
| function parse_process(str::AbstractString, model::ABCModel) | ||||
|     inParticles = Dict{Type, Int}() | ||||
|     outParticles = Dict{Type, Int}() | ||||
|  | ||||
|     if !(contains(str, "->")) | ||||
|         throw("Did not find -> while parsing process \"$str\"") | ||||
|     end | ||||
|  | ||||
|     (inStr, outStr) = split(str, "->") | ||||
|  | ||||
|     if (isempty(inStr) || isempty(outStr)) | ||||
|         throw("Process (\"$str\") input or output part is empty!") | ||||
|     end | ||||
|  | ||||
|     for t in types(model) | ||||
|         inCount = count(x -> x == String(t)[1], inStr) | ||||
|         outCount = count(x -> x == String(t)[1], outStr) | ||||
|         if inCount != 0 | ||||
|             inParticles[t] = inCount | ||||
|         end | ||||
|         if outCount != 0 | ||||
|             outParticles[t] = outCount | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     if length(inStr) != sum(values(inParticles)) | ||||
|         throw("Encountered unknown characters in the input part of process \"$str\"") | ||||
|     elseif length(outStr) != sum(values(outParticles)) | ||||
|         throw("Encountered unknown characters in the output part of process \"$str\"") | ||||
|     end | ||||
|  | ||||
|     return ABCProcessDescription(inParticles, outParticles) | ||||
| end | ||||
|   | ||||
| @@ -1,99 +1,142 @@ | ||||
| """ | ||||
|     ParticleType | ||||
| using QEDbase | ||||
|  | ||||
| import QEDbase.mass | ||||
|  | ||||
| A Particle Type in the ABC Model as an enum, with types `A`, `B` and `C`. | ||||
| """ | ||||
| @enum ParticleType A = 1 B = 2 C = 3 | ||||
|     ABCModel <: AbstractPhysicsModel | ||||
|  | ||||
| Singleton definition for identification of the ABC-Model. | ||||
| """ | ||||
| struct ABCModel <: AbstractPhysicsModel end | ||||
|  | ||||
| """ | ||||
|     ABCParticle | ||||
|  | ||||
| Base type for all particles in the [`ABCModel`](@ref). | ||||
| """ | ||||
| abstract type ABCParticle <: AbstractParticle end | ||||
|  | ||||
| """ | ||||
|     ParticleA <: ABCParticle | ||||
|  | ||||
| An 'A' particle in the ABC Model. | ||||
| """ | ||||
| struct ParticleA <: ABCParticle | ||||
|     momentum::SFourMomentum | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ParticleB <: ABCParticle | ||||
|  | ||||
| A 'B' particle in the ABC Model. | ||||
| """ | ||||
| struct ParticleB <: ABCParticle | ||||
|     momentum::SFourMomentum | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ParticleC <: ABCParticle | ||||
|  | ||||
| A 'C' particle in the ABC Model. | ||||
| """ | ||||
| struct ParticleC <: ABCParticle | ||||
|     momentum::SFourMomentum | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ABCProcessDescription <: AbstractProcessDescription | ||||
|  | ||||
| A description of a process in the ABC-Model. Contains the input and output particles. | ||||
|  | ||||
| See also: [`in_particles`](@ref), [`out_particles`](@ref), [`parse_process`](@ref) | ||||
| """ | ||||
| struct ABCProcessDescription <: AbstractProcessDescription | ||||
|     inParticles::Dict{Type, Int} | ||||
|     outParticles::Dict{Type, Int} | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ABCProcessInput <: AbstractProcessInput | ||||
|  | ||||
| Input for a ABC Process. Contains the [`ABCProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles. | ||||
|  | ||||
| See also: [`gen_process_input`](@ref) | ||||
| """ | ||||
| struct ABCProcessInput <: AbstractProcessInput | ||||
|     process::ABCProcessDescription | ||||
|     inParticles::Vector{ABCParticle} | ||||
|     outParticles::Vector{ABCParticle} | ||||
| end | ||||
|  | ||||
| """ | ||||
|     PARTICLE_MASSES | ||||
|  | ||||
| A constant dictionary containing the masses of the different [`ParticleType`](@ref)s. | ||||
| A constant dictionary containing the masses of the different [`ABCParticle`](@ref)s. | ||||
| """ | ||||
| const PARTICLE_MASSES = | ||||
|     Dict{ParticleType, Float64}(A => 1.0, B => 1.0, C => 0.0) | ||||
| const PARTICLE_MASSES = Dict{Type, Float64}(ParticleA => 1.0, ParticleB => 1.0, ParticleC => 0.0) | ||||
|  | ||||
| """ | ||||
|     Particle | ||||
|  | ||||
| A struct describing a particle of the ABC-Model. It has the 4 momentum parts P0...P3 and a [`ParticleType`](@ref). | ||||
|  | ||||
| `sizeof(Particle())` = 40 Byte | ||||
| """ | ||||
| struct Particle | ||||
|     P0::Float64 | ||||
|     P1::Float64 | ||||
|     P2::Float64 | ||||
|     P3::Float64 | ||||
|  | ||||
|     type::ParticleType | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ParticleValue | ||||
|  | ||||
| A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated. | ||||
|  | ||||
| `sizeof(ParticleValue())` = 48 Byte | ||||
| """ | ||||
| struct ParticleValue | ||||
|     p::Particle | ||||
|     v::Float64 | ||||
| end | ||||
|  | ||||
| """ | ||||
|     mass(t::ParticleType) | ||||
|     mass(t::Type{T}) where {T <: ABCParticle} | ||||
|      | ||||
| Return the mass (at rest) of the given particle type. | ||||
| """ | ||||
| mass(t::ParticleType) = PARTICLE_MASSES[t] | ||||
| mass(t::Type{T}) where {T <: ABCParticle} = PARTICLE_MASSES[t] | ||||
|  | ||||
| """ | ||||
|     remaining_type(t1::ParticleType, t2::ParticleType) | ||||
|     interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle} | ||||
|  | ||||
| For 2 given (non-equal) particle types, return the third of ABC. | ||||
| """ | ||||
| function remaining_type(t1::ParticleType, t2::ParticleType) | ||||
| function interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle} | ||||
|     @assert t1 != t2 | ||||
|     if t1 != A && t2 != A | ||||
|         return A | ||||
|     elseif t1 != B && t2 != B | ||||
|         return B | ||||
|     if t1 != ParticleA && t2 != ParticleA | ||||
|         return ParticleA | ||||
|     elseif t1 != ParticleB && t2 != ParticleB | ||||
|         return ParticleB | ||||
|     else | ||||
|         return C | ||||
|         return ParticleC | ||||
|     end | ||||
| end | ||||
|  | ||||
| """ | ||||
|     square(p::Particle) | ||||
|     types(::ABCModel) | ||||
|  | ||||
| Return a Vector of the possible types of particle in the [`ABCModel`](@ref). | ||||
| """ | ||||
| function types(::ABCModel) | ||||
|     return [ParticleA, ParticleB, ParticleC] | ||||
| end | ||||
|  | ||||
| """ | ||||
|     square(p::ABCParticle) | ||||
|  | ||||
| Return the square of the particle's momentum as a `Float` value. | ||||
|  | ||||
| Takes 7 effective FLOP. | ||||
| """ | ||||
| function square(p::Particle) | ||||
|     return p.P0 * p.P0 - p.P1 * p.P1 - p.P2 * p.P2 - p.P3 * p.P3 | ||||
| function square(p::ABCParticle) | ||||
|     return getMass2(p.momentum) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     inner_edge(p::Particle) | ||||
|     inner_edge(p::ABCParticle) | ||||
|  | ||||
| Return the factor of the inner edge with the given (virtual) particle. | ||||
|  | ||||
| Takes 10 effective FLOP. (3 here + 10 in square(p)) | ||||
| Takes 10 effective FLOP. (3 here + 7 in square(p)) | ||||
| """ | ||||
| function inner_edge(p::Particle) | ||||
|     return 1.0 / (square(p) - mass(p.type) * mass(p.type)) | ||||
| function inner_edge(p::ABCParticle) | ||||
|     return 1.0 / (square(p) - mass(typeof(p)) * mass(typeof(p))) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     outer_edge(p::Particle) | ||||
|     outer_edge(p::ABCParticle) | ||||
|  | ||||
| Return the factor of the outer edge with the given (real) particle. | ||||
|  | ||||
| Takes 0 effective FLOP. | ||||
| """ | ||||
| function outer_edge(p::Particle) | ||||
| function outer_edge(p::ABCParticle) | ||||
|     return 1.0 | ||||
| end | ||||
|  | ||||
| @@ -111,20 +154,57 @@ function vertex() | ||||
| end | ||||
|  | ||||
| """ | ||||
|     preserve_momentum(p1::Particle, p2::Particle) | ||||
|     preserve_momentum(p1::ABCParticle, p2::ABCParticle) | ||||
|  | ||||
| Calculate and return a new particle from two given interacting ones at a vertex. | ||||
|  | ||||
| Takes 4 effective FLOP. | ||||
| """ | ||||
| function preserve_momentum(p1::Particle, p2::Particle) | ||||
|     p3 = Particle( | ||||
|         p1.P0 + p2.P0, | ||||
|         p1.P1 + p2.P1, | ||||
|         p1.P2 + p2.P2, | ||||
|         p1.P3 + p2.P3, | ||||
|         remaining_type(p1.type, p2.type), | ||||
|     ) | ||||
|  | ||||
| function preserve_momentum(p1::ABCParticle, p2::ABCParticle) | ||||
|     t3 = interaction_result(typeof(p1), typeof(p2)) | ||||
|     p3 = t3(p1.momentum + p2.momentum) | ||||
|     return p3 | ||||
| end | ||||
|  | ||||
| """ | ||||
|     type_from_name(name::String) | ||||
|  | ||||
| For a name of a particle, return the particle's [`Type`]. | ||||
| """ | ||||
| function type_from_name(name::String) | ||||
|     if startswith(name, "A") | ||||
|         return ParticleA | ||||
|     elseif startswith(name, "B") | ||||
|         return ParticleB | ||||
|     elseif startswith(name, "C") | ||||
|         return ParticleC | ||||
|     else | ||||
|         throw("Invalid name for a particle in the ABC model") | ||||
|     end | ||||
| end | ||||
|  | ||||
| function String(::Type{ParticleA}) | ||||
|     return "A" | ||||
| end | ||||
| function String(::Type{ParticleB}) | ||||
|     return "B" | ||||
| end | ||||
| function String(::Type{ParticleC}) | ||||
|     return "C" | ||||
| end | ||||
|  | ||||
| function in_particles(process::ABCProcessDescription) | ||||
|     return process.inParticles | ||||
| end | ||||
|  | ||||
| function in_particles(input::ABCProcessInput) | ||||
|     return input.inParticles | ||||
| end | ||||
|  | ||||
| function out_particles(process::ABCProcessDescription) | ||||
|     return process.outParticles | ||||
| end | ||||
|  | ||||
| function out_particles(input::ABCProcessInput) | ||||
|     return input.outParticles | ||||
| end | ||||
|   | ||||
							
								
								
									
										58
									
								
								src/models/abc/print.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								src/models/abc/print.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
|  | ||||
| """ | ||||
|     show(io::IO, process::ABCProcessDescription) | ||||
|  | ||||
| Pretty print an [`ABCProcessDescription`](@ref) (no newlines). | ||||
|  | ||||
| ```jldoctest | ||||
| julia> using MetagraphOptimization | ||||
|  | ||||
| julia> print(parse_process("AB->ABBB", ABCModel())) | ||||
| ABC Process: 'AB->ABBB' | ||||
| ``` | ||||
| """ | ||||
| function show(io::IO, process::ABCProcessDescription) | ||||
|     # types() gives the types in order (ABC) instead of random like keys() would | ||||
|     print(io, "ABC Process: \'") | ||||
|     for type in types(ABCModel()) | ||||
|         for _ in 1:get(process.inParticles, type, 0) | ||||
|             print(io, String(type)) | ||||
|         end | ||||
|     end | ||||
|     print(io, "->") | ||||
|     for type in types(ABCModel()) | ||||
|         for _ in 1:get(process.outParticles, type, 0) | ||||
|             print(io, String(type)) | ||||
|         end | ||||
|     end | ||||
|     print(io, "'") | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     show(io::IO, processInput::ABCProcessInput) | ||||
|  | ||||
| Pretty print an [`ABCProcessInput`](@ref) (with newlines). | ||||
| """ | ||||
| function show(io::IO, processInput::ABCProcessInput) | ||||
|     println(io, "Input for $(processInput.process):") | ||||
|     println(io, "  $(length(processInput.inParticles)) Incoming particles:") | ||||
|     for particle in processInput.inParticles | ||||
|         println(io, "    $particle") | ||||
|     end | ||||
|     println(io, "  $(length(processInput.outParticles)) Outgoing Particles:") | ||||
|     for particle in processInput.outParticles | ||||
|         println(io, "    $particle") | ||||
|     end | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| """ | ||||
|     show(io::IO, particle::T) where {T <: ABCParticle} | ||||
|  | ||||
| Pretty print an [`ABCParticle`](@ref) (no newlines). | ||||
| """ | ||||
| function show(io::IO, particle::T) where {T <: ABCParticle} | ||||
|     print(io, "$(String(typeof(particle))): $(particle.momentum)") | ||||
|     return nothing | ||||
| end | ||||
| @@ -3,35 +3,35 @@ | ||||
|  | ||||
| Return the compute effort of an S1 task. | ||||
| """ | ||||
| compute_effort(t::ComputeTaskS1) = 11 | ||||
| compute_effort(t::ComputeTaskS1)::Float64 = 11.0 | ||||
|  | ||||
| """ | ||||
|     compute_effort(t::ComputeTaskS2) | ||||
|  | ||||
| Return the compute effort of an S2 task. | ||||
| """ | ||||
| compute_effort(t::ComputeTaskS2) = 12 | ||||
| compute_effort(t::ComputeTaskS2)::Float64 = 12.0 | ||||
|  | ||||
| """ | ||||
|     compute_effort(t::ComputeTaskU) | ||||
|  | ||||
| Return the compute effort of a U task. | ||||
| """ | ||||
| compute_effort(t::ComputeTaskU) = 1 | ||||
| compute_effort(t::ComputeTaskU)::Float64 = 1.0 | ||||
|  | ||||
| """ | ||||
|     compute_effort(t::ComputeTaskV) | ||||
|  | ||||
| Return the compute effort of a V task. | ||||
| """ | ||||
| compute_effort(t::ComputeTaskV) = 6 | ||||
| compute_effort(t::ComputeTaskV)::Float64 = 6.0 | ||||
|  | ||||
| """ | ||||
|     compute_effort(t::ComputeTaskP) | ||||
|  | ||||
| Return the compute effort of a P task. | ||||
| """ | ||||
| compute_effort(t::ComputeTaskP) = 0 | ||||
| compute_effort(t::ComputeTaskP)::Float64 = 0.0 | ||||
|  | ||||
| """ | ||||
|     compute_effort(t::ComputeTaskSum) | ||||
| @@ -41,7 +41,7 @@ Return the compute effort of a Sum task. | ||||
| Note: This is a constant compute effort, even though sum scales with the number of its inputs. Since there is only ever a single sum node in a graph generated from the ABC-Model, | ||||
| this doesn't matter. | ||||
| """ | ||||
| compute_effort(t::ComputeTaskSum) = 1 | ||||
| compute_effort(t::ComputeTaskSum)::Float64 = 1.0 | ||||
|  | ||||
| """ | ||||
|     show(io::IO, t::DataTask) | ||||
| @@ -57,42 +57,42 @@ end | ||||
|  | ||||
| Print the S1 task to io. | ||||
| """ | ||||
| show(io::IO, t::ComputeTaskS1) = print("ComputeS1") | ||||
| show(io::IO, t::ComputeTaskS1) = print(io, "ComputeS1") | ||||
|  | ||||
| """ | ||||
|     show(io::IO, t::ComputeTaskS2) | ||||
|  | ||||
| Print the S2 task to io. | ||||
| """ | ||||
| show(io::IO, t::ComputeTaskS2) = print("ComputeS2") | ||||
| show(io::IO, t::ComputeTaskS2) = print(io, "ComputeS2") | ||||
|  | ||||
| """ | ||||
|     show(io::IO, t::ComputeTaskP) | ||||
|  | ||||
| Print the P task to io. | ||||
| """ | ||||
| show(io::IO, t::ComputeTaskP) = print("ComputeP") | ||||
| show(io::IO, t::ComputeTaskP) = print(io, "ComputeP") | ||||
|  | ||||
| """ | ||||
|     show(io::IO, t::ComputeTaskU) | ||||
|  | ||||
| Print the U task to io. | ||||
| """ | ||||
| show(io::IO, t::ComputeTaskU) = print("ComputeU") | ||||
| show(io::IO, t::ComputeTaskU) = print(io, "ComputeU") | ||||
|  | ||||
| """ | ||||
|     show(io::IO, t::ComputeTaskV) | ||||
|  | ||||
| Print the V task to io. | ||||
| """ | ||||
| show(io::IO, t::ComputeTaskV) = print("ComputeV") | ||||
| show(io::IO, t::ComputeTaskV) = print(io, "ComputeV") | ||||
|  | ||||
| """ | ||||
|     show(io::IO, t::ComputeTaskSum) | ||||
|  | ||||
| Print the sum task to io. | ||||
| """ | ||||
| show(io::IO, t::ComputeTaskSum) = print("ComputeSum") | ||||
| show(io::IO, t::ComputeTaskSum) = print(io, "ComputeSum") | ||||
|  | ||||
| """ | ||||
|     copy(t::DataTask) | ||||
| @@ -147,19 +147,20 @@ children(::ComputeTaskV) = 2 | ||||
| """ | ||||
|     children(::ComputeTaskSum) | ||||
|  | ||||
| Return the number of children of a ComputeTaskSum, since this is variable and the task doesn't know | ||||
| how many children it will sum over, return a wildcard -1. | ||||
|  | ||||
| TODO: this is kind of bad because it means we can't fuse with a sum task | ||||
| Return the number of children of a ComputeTaskSum. | ||||
| """ | ||||
| children(::ComputeTaskSum) = -1 | ||||
| children(t::ComputeTaskSum) = t.children_number | ||||
|  | ||||
| """ | ||||
|     children(t::FusedComputeTask) | ||||
|  | ||||
| Return the number of children of a FusedComputeTask. It's the sum of the children of both tasks minus one. | ||||
| Return the number of children of a FusedComputeTask. | ||||
| """ | ||||
| function children(t::FusedComputeTask) | ||||
|     (T1, T2) = get_types(t) | ||||
|     return children(T1()) + children(T2()) - 1 # one of the inputs is the output of T1 and thus not a child of the node | ||||
|     return length(union(Set(t.t1_inputs), Set(t.t2_inputs))) | ||||
| end | ||||
|  | ||||
| function add_child!(t::ComputeTaskSum) | ||||
|     t.children_number += 1 | ||||
|     return nothing | ||||
| end | ||||
|   | ||||
| @@ -4,7 +4,7 @@ | ||||
| Task representing a specific data transfer in the ABC Model. | ||||
| """ | ||||
| struct DataTask <: AbstractDataTask | ||||
|     data::UInt64 | ||||
|     data::Float64 | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -47,19 +47,13 @@ struct ComputeTaskU <: AbstractComputeTask end | ||||
|  | ||||
| Task that sums all its inputs, n children. | ||||
| """ | ||||
| struct ComputeTaskSum <: AbstractComputeTask end | ||||
| mutable struct ComputeTaskSum <: AbstractComputeTask | ||||
|     children_number::Int | ||||
| end | ||||
|  | ||||
| """ | ||||
|     ABC_TASKS | ||||
|  | ||||
| Constant vector of all tasks of the ABC-Model. | ||||
| """ | ||||
| ABC_TASKS = [ | ||||
|     DataTask, | ||||
|     ComputeTaskS1, | ||||
|     ComputeTaskS2, | ||||
|     ComputeTaskP, | ||||
|     ComputeTaskV, | ||||
|     ComputeTaskU, | ||||
|     ComputeTaskSum, | ||||
| ] | ||||
| ABC_TASKS = [DataTask, ComputeTaskS1, ComputeTaskS2, ComputeTaskP, ComputeTaskV, ComputeTaskU, ComputeTaskSum] | ||||
|   | ||||
							
								
								
									
										109
									
								
								src/models/interface.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								src/models/interface.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,109 @@ | ||||
|  | ||||
| """ | ||||
|     AbstractPhysicsModel | ||||
|  | ||||
| Base type for a model, e.g. ABC-Model or QED. This is used to dispatch many functions. | ||||
| """ | ||||
| abstract type AbstractPhysicsModel end | ||||
|  | ||||
| """ | ||||
|     AbstractParticle | ||||
|  | ||||
| Base type for particles belonging to a certain [`AbstractPhysicsModel`](@ref). | ||||
| """ | ||||
| abstract type AbstractParticle end | ||||
|  | ||||
| """ | ||||
|     ParticleValue{ParticleType <: AbstractParticle} | ||||
|  | ||||
| A struct describing a particle during a calculation of a Feynman Diagram, together with the value that's being calculated. | ||||
|  | ||||
| `sizeof(ParticleValue())` = 48 Byte | ||||
| """ | ||||
| struct ParticleValue{ParticleType <: AbstractParticle} | ||||
|     p::ParticleType | ||||
|     v::Float64 | ||||
| end | ||||
|  | ||||
| """ | ||||
|     AbstractProcessDescription | ||||
|  | ||||
| Base type for process descriptions. An object of this type of a corresponding [`AbstractPhysicsModel`](@ref) should uniquely identify a process in that model. | ||||
|  | ||||
| See also: [`parse_process`](@ref) | ||||
| """ | ||||
| abstract type AbstractProcessDescription end | ||||
|  | ||||
| """ | ||||
|     AbstractProcessInput | ||||
|  | ||||
| Base type for process inputs. An object of this type contains the input values (e.g. momenta) of the particles in a process. | ||||
|  | ||||
| See also: [`gen_process_input`](@ref) | ||||
| """ | ||||
| abstract type AbstractProcessInput end | ||||
|  | ||||
| """ | ||||
|     mass(t::Type{T}) where {T <: AbstractParticle} | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the particles mass at rest. | ||||
| """ | ||||
| function mass end | ||||
|  | ||||
| """ | ||||
|     interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: AbstractParticle, T2 <: AbstractParticle} | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractParticle`](@ref), returning the result particle type when the two given particles interact. | ||||
| """ | ||||
| function interaction_result end | ||||
|  | ||||
| """ | ||||
|     types(::AbstractPhysicsModel) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref), returning a `Vector` of the available particle types in the model. | ||||
| """ | ||||
| function types end | ||||
|  | ||||
| """ | ||||
|     in_particles(::AbstractProcessDescription) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref). | ||||
| Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of incoming particles for the process per particle type. | ||||
|  | ||||
|  | ||||
|     in_particles(::AbstractProcessInput) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref). | ||||
| Returns a `<: Vector{AbstractParticle}` object with the values of all incoming particles for the corresponding `ProcessDescription`. | ||||
| """ | ||||
| function in_particles end | ||||
|  | ||||
| """ | ||||
|     out_particles(::AbstractProcessDescription) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractProcessDescription`](@ref). | ||||
| Returns a `<: Dict{Type{AbstractParticle}, Int}` object, representing the number of outgoing particles for the process per particle type. | ||||
|  | ||||
|  | ||||
|     out_particles(::AbstractProcessInput) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref). | ||||
| Returns a `<: Vector{AbstractParticle}` object with the values of all outgoing particles for the corresponding `ProcessDescription`. | ||||
| """ | ||||
| function out_particles end | ||||
|  | ||||
| """ | ||||
|     parse_process(::AbstractString, ::AbstractPhysicsModel) | ||||
|  | ||||
| Interface function that must be implemented for every subtype of [`AbstractPhysicsModel`](@ref). | ||||
| Returns a `ProcessDescription` object. | ||||
| """ | ||||
| function parse_process end | ||||
|  | ||||
| """ | ||||
|     gen_process_input(::AbstractProcessDescription) | ||||
|  | ||||
| Interface function that must be implemented for every specific [`AbstractProcessDescription`](@ref). | ||||
| Returns a randomly generated and valid corresponding `ProcessInput`. | ||||
| """ | ||||
| function gen_process_input end | ||||
							
								
								
									
										10
									
								
								src/models/print.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/models/print.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
|  | ||||
| """ | ||||
|     show(io::IO, particleValue::ParticleValue) | ||||
|  | ||||
| Pretty print a [`ParticleValue`](@ref), no newlines. | ||||
| """ | ||||
| function show(io::IO, particleValue::ParticleValue) | ||||
|     print(io, "($(particleValue.p), value: $(particleValue.v))") | ||||
|     return nothing | ||||
| end | ||||
| @@ -21,7 +21,7 @@ end | ||||
|  | ||||
| Equality comparison between two [`ComputeTaskNode`](@ref)s. | ||||
| """ | ||||
| function ==(n1::ComputeTaskNode, n2::ComputeTaskNode) | ||||
| function ==(n1::ComputeTaskNode{TaskType}, n2::ComputeTaskNode{TaskType}) where {TaskType <: AbstractComputeTask} | ||||
|     return n1.id == n2.id | ||||
| end | ||||
|  | ||||
| @@ -30,6 +30,6 @@ end | ||||
|  | ||||
| Equality comparison between two [`DataTaskNode`](@ref)s. | ||||
| """ | ||||
| function ==(n1::DataTaskNode, n2::DataTaskNode) | ||||
| function ==(n1::DataTaskNode{TaskType}, n2::DataTaskNode{TaskType}) where {TaskType <: AbstractDataTask} | ||||
|     return n1.id == n2.id | ||||
| end | ||||
|   | ||||
| @@ -1,44 +1,20 @@ | ||||
|  | ||||
| DataTaskNode(t::AbstractDataTask, name = "") = DataTaskNode( | ||||
|     t, | ||||
|     Vector{Node}(), | ||||
|     Vector{Node}(), | ||||
|     UUIDs.uuid1(rng[threadid()]), | ||||
|     missing, | ||||
|     missing, | ||||
|     missing, | ||||
|     name, | ||||
| ) | ||||
| DataTaskNode(t::AbstractDataTask, name = "") = | ||||
|     DataTaskNode(t, Vector{Node}(), Vector{Node}(), UUIDs.uuid1(rng[threadid()]), missing, missing, missing, name) | ||||
| ComputeTaskNode(t::AbstractComputeTask) = ComputeTaskNode( | ||||
|     t, | ||||
|     Vector{Node}(), | ||||
|     Vector{Node}(), | ||||
|     UUIDs.uuid1(rng[threadid()]), | ||||
|     missing, | ||||
|     missing, | ||||
|     Vector{NodeFusion}(), | ||||
|     t,                              # task | ||||
|     Vector{Node}(),                 # parents | ||||
|     Vector{Node}(),                 # children | ||||
|     UUIDs.uuid1(rng[threadid()]),   # id | ||||
|     missing,                        # node reduction | ||||
|     missing,                        # node split | ||||
|     Vector{NodeFusion}(),           # node fusions | ||||
|     missing,                        # device | ||||
| ) | ||||
|  | ||||
| copy(m::Missing) = missing | ||||
| copy(n::ComputeTaskNode) = ComputeTaskNode( | ||||
|     copy(n.task), | ||||
|     copy(n.parents), | ||||
|     copy(n.children), | ||||
|     UUIDs.uuid1(rng[threadid()]), | ||||
|     copy(n.nodeReduction), | ||||
|     copy(n.nodeSplit), | ||||
|     copy(n.nodeFusions), | ||||
| ) | ||||
| copy(n::DataTaskNode) = DataTaskNode( | ||||
|     copy(n.task), | ||||
|     copy(n.parents), | ||||
|     copy(n.children), | ||||
|     UUIDs.uuid1(rng[threadid()]), | ||||
|     copy(n.nodeReduction), | ||||
|     copy(n.nodeSplit), | ||||
|     copy(n.nodeFusion), | ||||
|     n.name, | ||||
| ) | ||||
| copy(n::ComputeTaskNode) = ComputeTaskNode(copy(task(n))) | ||||
| copy(n::DataTaskNode) = DataTaskNode(copy(task(n)), n.name) | ||||
|  | ||||
| """ | ||||
|     make_node(t::AbstractTask) | ||||
|   | ||||
| @@ -4,7 +4,7 @@ | ||||
| Print a short string representation of the node to io. | ||||
| """ | ||||
| function show(io::IO, n::Node) | ||||
|     return print(io, "Node(", n.task, ")") | ||||
|     return print(io, "Node(", task(n), ")") | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -22,5 +22,6 @@ end | ||||
| Return the uuid as a string usable as a variable name in code generation. | ||||
| """ | ||||
| function to_var_name(id::UUID) | ||||
|     return replace(string(id), "-" => "_") | ||||
|     str = "_" * replace(string(id), "-" => "_") | ||||
|     return str | ||||
| end | ||||
|   | ||||
| @@ -3,25 +3,27 @@ | ||||
|  | ||||
| Return whether this node is an entry node in its graph, i.e., it has no children. | ||||
| """ | ||||
| is_entry_node(node::Node) = length(node.children) == 0 | ||||
| is_entry_node(node::Node) = length(children(node)) == 0 | ||||
|  | ||||
| """ | ||||
|     is_exit_node(node::Node) | ||||
|  | ||||
| Return whether this node is an exit node of its graph, i.e., it has no parents. | ||||
| """ | ||||
| is_exit_node(node::Node) = length(node.parents) == 0 | ||||
| is_exit_node(node::Node)::Bool = length(parents(node)) == 0 | ||||
|  | ||||
| """ | ||||
|     data(edge::Edge) | ||||
|     task(node::Node) | ||||
|  | ||||
| Return the data transfered by this edge, i.e., 0 if the child is a [`ComputeTaskNode`](@ref), otherwise the child's `data()`. | ||||
| Return the node's task. | ||||
| """ | ||||
| function data(edge::Edge) | ||||
|     if typeof(edge.edge[1]) <: DataTaskNode | ||||
|         return data(edge.edge[1].task) | ||||
|     end | ||||
|     return 0.0 | ||||
| function task(node::DataTaskNode{TaskType})::TaskType where {TaskType <: Union{AbstractDataTask, AbstractComputeTask}} | ||||
|     return node.task | ||||
| end | ||||
| function task( | ||||
|     node::ComputeTaskNode{TaskType}, | ||||
| )::TaskType where {TaskType <: Union{AbstractDataTask, AbstractComputeTask}} | ||||
|     return node.task | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -31,8 +33,11 @@ Return a copy of the node's children so it can safely be muted without changing | ||||
|  | ||||
| A node's children are its prerequisite nodes, nodes that need to execute before the task of this node. | ||||
| """ | ||||
| function children(node::Node) | ||||
|     return copy(node.children) | ||||
| function children(node::DataTaskNode)::Vector{ComputeTaskNode} | ||||
|     return node.children | ||||
| end | ||||
| function children(node::ComputeTaskNode)::Vector{DataTaskNode} | ||||
|     return node.children | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -42,8 +47,11 @@ Return a copy of the node's parents so it can safely be muted without changing t | ||||
|  | ||||
| A node's parents are its subsequent nodes, nodes that need this node to execute. | ||||
| """ | ||||
| function parents(node::Node) | ||||
|     return copy(node.parents) | ||||
| function parents(node::DataTaskNode)::Vector{ComputeTaskNode} | ||||
|     return node.parents | ||||
| end | ||||
| function parents(node::ComputeTaskNode)::Vector{DataTaskNode} | ||||
|     return node.parents | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -53,11 +61,11 @@ Return a vector of all siblings of this node. | ||||
|  | ||||
| A node's siblings are all children of any of its parents. The result contains no duplicates and includes the node itself. | ||||
| """ | ||||
| function siblings(node::Node) | ||||
| function siblings(node::Node)::Set{Node} | ||||
|     result = Set{Node}() | ||||
|     push!(result, node) | ||||
|     for parent in node.parents | ||||
|         union!(result, parent.children) | ||||
|     for parent in parents(node) | ||||
|         union!(result, children(parent)) | ||||
|     end | ||||
|  | ||||
|     return result | ||||
| @@ -73,11 +81,11 @@ A node's partners are all parents of any of its children. The result contains no | ||||
| Note: This is very slow when there are multiple children with many parents.  | ||||
| This is less of a problem in [`siblings(node::Node)`](@ref) because (depending on the model) there are no nodes with a large number of children, or only a single one. | ||||
| """ | ||||
| function partners(node::Node) | ||||
| function partners(node::Node)::Set{Node} | ||||
|     result = Set{Node}() | ||||
|     push!(result, node) | ||||
|     for child in node.children | ||||
|         union!(result, child.parents) | ||||
|     for child in children(node) | ||||
|         union!(result, parents(child)) | ||||
|     end | ||||
|  | ||||
|     return result | ||||
| @@ -90,8 +98,8 @@ Alternative version to [`partners(node::Node)`](@ref), avoiding allocation of a | ||||
| """ | ||||
| function partners(node::Node, set::Set{Node}) | ||||
|     push!(set, node) | ||||
|     for child in node.children | ||||
|         union!(set, child.parents) | ||||
|     for child in children(node) | ||||
|         union!(set, parents(child)) | ||||
|     end | ||||
|     return nothing | ||||
| end | ||||
| @@ -101,8 +109,8 @@ end | ||||
|  | ||||
| Return whether the `potential_parent` is a parent of `node`. | ||||
| """ | ||||
| function is_parent(potential_parent::Node, node::Node) | ||||
|     return potential_parent in node.parents | ||||
| function is_parent(potential_parent::Node, node::Node)::Bool | ||||
|     return potential_parent in parents(node) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -110,6 +118,6 @@ end | ||||
|  | ||||
| Return whether the `potential_child` is a child of `node`. | ||||
| """ | ||||
| function is_child(potential_child::Node, node::Node) | ||||
|     return potential_child in node.children | ||||
| function is_child(potential_child::Node, node::Node)::Bool | ||||
|     return potential_child in children(node) | ||||
| end | ||||
|   | ||||
| @@ -24,16 +24,17 @@ abstract type Operation end | ||||
| Any node that transfers data and does no computation. | ||||
|  | ||||
| # Fields | ||||
| `.task`: The node's data task type. Usually [`DataTask`](@ref).\\ | ||||
| `.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\ | ||||
| `.children`: A vector of the node's children (i.e. nodes that this one depends on).\\ | ||||
| `.id`: The node's id. Improves the speed of comparisons.\\ | ||||
| `.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeFusion`: Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes. | ||||
| `.task`:            The node's data task type. Usually [`DataTask`](@ref).\\ | ||||
| `.parents`:         A vector of the node's parents (i.e. nodes that depend on this one).\\ | ||||
| `.children`:        A vector of the node's children (i.e. nodes that this one depends on).\\ | ||||
| `.id`:              The node's id. Improves the speed of comparisons and is used as a unique identifier.\\ | ||||
| `.nodeReduction`:   Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeSplit`:       Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeFusion`:      Either this node's [`NodeFusion`](@ref) or `missing`, if none. There can only be at most one for DataTaskNodes.\\ | ||||
| `.name`:            The name of this node for entry nodes into the graph ([`is_entry_node`](@ref)) to reliably assign the inputs to the correct nodes when executing.\\ | ||||
| """ | ||||
| mutable struct DataTaskNode <: Node | ||||
|     task::AbstractDataTask | ||||
| mutable struct DataTaskNode{TaskType <: AbstractDataTask} <: Node | ||||
|     task::TaskType | ||||
|  | ||||
|     # use vectors as sets have way too much memory overhead | ||||
|     parents::Vector{Node} | ||||
| @@ -60,19 +61,20 @@ end | ||||
| """ | ||||
|     ComputeTaskNode <: Node | ||||
|      | ||||
| Any node that transfers data and does no computation. | ||||
| Any node that computes a result from inputs using an [`AbstractComputeTask`](@ref). | ||||
|  | ||||
| # Fields | ||||
| `.task`: The node's data task type. Usually [`DataTask`](@ref).\\ | ||||
| `.parents`: A vector of the node's parents (i.e. nodes that depend on this one).\\ | ||||
| `.children`: A vector of the node's children (i.e. nodes that this one depends on).\\ | ||||
| `.id`: The node's id. Improves the speed of comparisons.\\ | ||||
| `.nodeReduction`: Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeSplit`: Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeFusion`: A vector of this node's [`NodeFusion`](@ref)s. For a ComputeTaskNode there can be any number of these, unlike the DataTaskNodes. | ||||
| `.task`:            The node's compute task type. A concrete subtype of [`AbstractComputeTask`](@ref).\\ | ||||
| `.parents`:         A vector of the node's parents (i.e. nodes that depend on this one).\\ | ||||
| `.children`:        A vector of the node's children (i.e. nodes that this one depends on).\\ | ||||
| `.id`:              The node's id. Improves the speed of comparisons and is used as a unique identifier.\\ | ||||
| `.nodeReduction`:   Either this node's [`NodeReduction`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeSplit`:       Either this node's [`NodeSplit`](@ref) or `missing`, if none. There can only be at most one.\\ | ||||
| `.nodeFusions`:     A vector of this node's [`NodeFusion`](@ref)s. For a `ComputeTaskNode` there can be any number of these, unlike the [`DataTaskNode`](@ref)s.\\ | ||||
| `.device`:          The Device this node has been scheduled on by a [`Scheduler`](@ref). | ||||
| """ | ||||
| mutable struct ComputeTaskNode <: Node | ||||
|     task::AbstractComputeTask | ||||
| mutable struct ComputeTaskNode{TaskType <: AbstractComputeTask} <: Node | ||||
|     task::TaskType | ||||
|     parents::Vector{Node} | ||||
|     children::Vector{Node} | ||||
|     id::Base.UUID | ||||
| @@ -81,7 +83,10 @@ mutable struct ComputeTaskNode <: Node | ||||
|     nodeSplit::Union{Operation, Missing} | ||||
|  | ||||
|     # for ComputeTasks there can be multiple fusions, unlike the DataTasks | ||||
|     nodeFusions::Vector{Operation} | ||||
|     nodeFusions::Vector{<:Operation} | ||||
|  | ||||
|     # the device this node is assigned to execute on | ||||
|     device::Union{AbstractDevice, Missing} | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -95,8 +100,5 @@ The child is the prerequisite node of the parent. | ||||
| """ | ||||
| struct Edge | ||||
|     # edge points from child to parent | ||||
|     edge::Union{ | ||||
|         Tuple{DataTaskNode, ComputeTaskNode}, | ||||
|         Tuple{ComputeTaskNode, DataTaskNode}, | ||||
|     } | ||||
|     edge::Union{Tuple{DataTaskNode, ComputeTaskNode}, Tuple{ComputeTaskNode, DataTaskNode}} | ||||
| end | ||||
|   | ||||
| @@ -22,12 +22,24 @@ function is_valid_node(graph::DAG, node::Node) | ||||
|         @assert node in child.parents "Node is not a parent of its child!" | ||||
|     end | ||||
|  | ||||
|     if !ismissing(node.nodeReduction) | ||||
|     #=if !ismissing(node.nodeReduction) | ||||
|         @assert is_valid(graph, node.nodeReduction) | ||||
|     end | ||||
|     if !ismissing(node.nodeSplit) | ||||
|         @assert is_valid(graph, node.nodeSplit) | ||||
|     end=# | ||||
|  | ||||
|     if !(typeof(task(node)) <: FusedComputeTask) | ||||
|         # the remaining checks are only necessary for fused compute tasks | ||||
|         return true | ||||
|     end | ||||
|  | ||||
|     # every child must be in some input of the task | ||||
|     for child in node.children | ||||
|         str = Symbol(to_var_name(child.id)) | ||||
|         @assert (str in task(node).t1_inputs) || (str in task(node).t2_inputs) "$str was not in any of the tasks' inputs\nt1_inputs: $(task(node).t1_inputs)\nt2_inputs: $(task(node).t2_inputs)" | ||||
|     end | ||||
|  | ||||
|     return true | ||||
| end | ||||
|  | ||||
| @@ -41,9 +53,9 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref). | ||||
| function is_valid(graph::DAG, node::ComputeTaskNode) | ||||
|     @assert is_valid_node(graph, node) | ||||
|  | ||||
|     for nf in node.nodeFusions | ||||
|     #=for nf in node.nodeFusions | ||||
|         @assert is_valid(graph, nf) | ||||
|     end | ||||
|     end=# | ||||
|     return true | ||||
| end | ||||
|  | ||||
| @@ -57,8 +69,8 @@ This also calls [`is_valid_node(graph::DAG, node::Node)`](@ref). | ||||
| function is_valid(graph::DAG, node::DataTaskNode) | ||||
|     @assert is_valid_node(graph, node) | ||||
|  | ||||
|     if !ismissing(node.nodeFusion) | ||||
|     #=if !ismissing(node.nodeFusion) | ||||
|         @assert is_valid(graph, node.nodeFusion) | ||||
|     end | ||||
|     end=# | ||||
|     return true | ||||
| end | ||||
|   | ||||
| @@ -34,12 +34,7 @@ Apply the given [`NodeFusion`](@ref) to the graph. Generic wrapper around [`node | ||||
| Return an [`AppliedNodeFusion`](@ref) object generated from the graph's [`Diff`](@ref). | ||||
| """ | ||||
| function apply_operation!(graph::DAG, operation::NodeFusion) | ||||
|     diff = node_fusion!( | ||||
|         graph, | ||||
|         operation.input[1], | ||||
|         operation.input[2], | ||||
|         operation.input[3], | ||||
|     ) | ||||
|     diff = node_fusion!(graph, operation.input[1], operation.input[2], operation.input[3]) | ||||
|  | ||||
|     graph.properties += GraphProperties(diff) | ||||
|  | ||||
| @@ -124,17 +119,24 @@ function revert_diff!(graph::DAG, diff::Diff) | ||||
|     # add removed nodes, remove added nodes, same for edges | ||||
|     # note the order | ||||
|     for edge in diff.addedEdges | ||||
|         remove_edge!(graph, edge.edge[1], edge.edge[2], false) | ||||
|         remove_edge!(graph, edge.edge[1], edge.edge[2], track = false) | ||||
|     end | ||||
|     for node in diff.addedNodes | ||||
|         remove_node!(graph, node, false) | ||||
|         remove_node!(graph, node, track = false) | ||||
|     end | ||||
|  | ||||
|     for node in diff.removedNodes | ||||
|         insert_node!(graph, node, false) | ||||
|         insert_node!(graph, node, track = false) | ||||
|     end | ||||
|     for edge in diff.removedEdges | ||||
|         insert_edge!(graph, edge.edge[1], edge.edge[2], false) | ||||
|         insert_edge!(graph, edge.edge[1], edge.edge[2], track = false) | ||||
|     end | ||||
|  | ||||
|     for (node, t) in diff.updatedChildren | ||||
|         # node must be fused compute task at this point | ||||
|         @assert typeof(task(node)) <: FusedComputeTask | ||||
|  | ||||
|         node.task = t | ||||
|     end | ||||
|  | ||||
|     graph.properties -= GraphProperties(diff) | ||||
| @@ -149,21 +151,24 @@ Fuse nodes n1 -> n2 -> n3 together into one node, return the applied difference | ||||
|  | ||||
| For details see [`NodeFusion`](@ref). | ||||
| """ | ||||
| function node_fusion!( | ||||
|     graph::DAG, | ||||
|     n1::ComputeTaskNode, | ||||
|     n2::DataTaskNode, | ||||
|     n3::ComputeTaskNode, | ||||
| ) | ||||
|     # @assert is_valid_node_fusion_input(graph, n1, n2, n3) | ||||
| function node_fusion!(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode) | ||||
|     @assert is_valid_node_fusion_input(graph, n1, n2, n3) | ||||
|  | ||||
|     # clear snapshot | ||||
|     get_snapshot_diff(graph) | ||||
|  | ||||
|     # save children and parents | ||||
|     n1_children = children(n1) | ||||
|     n3_parents = parents(n3) | ||||
|     n3_children = children(n3) | ||||
|     n1Children = copy(children(n1)) | ||||
|     n3Parents = copy(parents(n3)) | ||||
|  | ||||
|     n1Task = copy(task(n1)) | ||||
|     n3Task = copy(task(n3)) | ||||
|  | ||||
|     # assemble the input node vectors of n1 and n3 to save into the FusedComputeTask | ||||
|     n1Inputs = Vector{Symbol}() | ||||
|     for child in n1Children | ||||
|         push!(n1Inputs, Symbol(to_var_name(child.id))) | ||||
|     end | ||||
|  | ||||
|     # remove the edges and nodes that will be replaced by the fused node | ||||
|     remove_edge!(graph, n1, n2) | ||||
| @@ -172,29 +177,38 @@ function node_fusion!( | ||||
|     remove_node!(graph, n2) | ||||
|  | ||||
|     # get n3's children now so it automatically excludes n2 | ||||
|     n3_children = children(n3) | ||||
|     n3Children = copy(children(n3)) | ||||
|  | ||||
|     n3Inputs = Vector{Symbol}() | ||||
|     for child in n3Children | ||||
|         push!(n3Inputs, Symbol(to_var_name(child.id))) | ||||
|     end | ||||
|  | ||||
|     remove_node!(graph, n3) | ||||
|  | ||||
|     # create new node with the fused compute task | ||||
|     new_node = | ||||
|         ComputeTaskNode(FusedComputeTask{typeof(n1.task), typeof(n3.task)}()) | ||||
|     insert_node!(graph, new_node) | ||||
|     newNode = ComputeTaskNode(FusedComputeTask(n1Task, n3Task, n1Inputs, Symbol(to_var_name(n2.id)), n3Inputs)) | ||||
|     insert_node!(graph, newNode) | ||||
|  | ||||
|     for child in n1_children | ||||
|     for child in n1Children | ||||
|         remove_edge!(graph, child, n1) | ||||
|         insert_edge!(graph, child, new_node) | ||||
|         insert_edge!(graph, child, newNode) | ||||
|     end | ||||
|  | ||||
|     for child in n3_children | ||||
|     for child in n3Children | ||||
|         remove_edge!(graph, child, n3) | ||||
|         if !(child in n1_children) | ||||
|             insert_edge!(graph, child, new_node) | ||||
|         if !(child in n1Children) | ||||
|             insert_edge!(graph, child, newNode) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     for parent in n3_parents | ||||
|     for parent in n3Parents | ||||
|         remove_edge!(graph, n3, parent) | ||||
|         insert_edge!(graph, new_node, parent) | ||||
|         insert_edge!(graph, newNode, parent) | ||||
|  | ||||
|         # important! update the parent node's child names in case they are fused compute tasks | ||||
|         # needed for compute generation so the fused compute task can correctly match inputs to its component tasks | ||||
|         update_child!(graph, parent, Symbol(to_var_name(n3.id)), Symbol(to_var_name(newNode.id))) | ||||
|     end | ||||
|  | ||||
|     return get_snapshot_diff(graph) | ||||
| @@ -208,39 +222,50 @@ Reduce the given nodes together into one node, return the applied difference to | ||||
| For details see [`NodeReduction`](@ref). | ||||
| """ | ||||
| function node_reduction!(graph::DAG, nodes::Vector{Node}) | ||||
|     # @assert is_valid_node_reduction_input(graph, nodes) | ||||
|     @assert is_valid_node_reduction_input(graph, nodes) | ||||
|  | ||||
|     # clear snapshot | ||||
|     get_snapshot_diff(graph) | ||||
|  | ||||
|     n1 = nodes[1] | ||||
|     n1_children = children(n1) | ||||
|     n1Children = copy(children(n1)) | ||||
|  | ||||
|     n1_parents = Set(n1.parents) | ||||
|     new_parents = Set{Node}() | ||||
|     n1Parents = Set(n1.parents) | ||||
|  | ||||
|     # set of the new parents of n1 | ||||
|     newParents = Set{Node}() | ||||
|  | ||||
|     # names of the previous children that n1 now replaces per parent | ||||
|     newParentsChildNames = Dict{Node, Symbol}() | ||||
|  | ||||
|     # remove all of the nodes' parents and children and the nodes themselves (except for first node) | ||||
|     for i in 2:length(nodes) | ||||
|         n = nodes[i] | ||||
|         for child in n1_children | ||||
|         for child in n1Children | ||||
|             remove_edge!(graph, child, n) | ||||
|         end | ||||
|  | ||||
|         for parent in parents(n) | ||||
|         for parent in copy(parents(n)) | ||||
|             remove_edge!(graph, n, parent) | ||||
|  | ||||
|             # collect all parents | ||||
|             push!(new_parents, parent) | ||||
|             push!(newParents, parent) | ||||
|             newParentsChildNames[parent] = Symbol(to_var_name(n.id)) | ||||
|         end | ||||
|  | ||||
|         remove_node!(graph, n) | ||||
|     end | ||||
|  | ||||
|     setdiff!(new_parents, n1_parents) | ||||
|  | ||||
|     for parent in new_parents | ||||
|     for parent in newParents | ||||
|         # now add parents of all input nodes to n1 without duplicates | ||||
|         insert_edge!(graph, n1, parent) | ||||
|         if !(parent in n1Parents) | ||||
|             # don't double insert edges | ||||
|             insert_edge!(graph, n1, parent) | ||||
|         end | ||||
|  | ||||
|         # this has to be done for all parents, even the ones of n1 because they can be duplicate | ||||
|         prevChild = newParentsChildNames[parent] | ||||
|         update_child!(graph, parent, prevChild, Symbol(to_var_name(n1.id))) | ||||
|     end | ||||
|  | ||||
|     return get_snapshot_diff(graph) | ||||
| @@ -253,31 +278,37 @@ Split the given node into one node per parent, return the applied difference to | ||||
|  | ||||
| For details see [`NodeSplit`](@ref). | ||||
| """ | ||||
| function node_split!(graph::DAG, n1::Node) | ||||
|     # @assert is_valid_node_split_input(graph, n1) | ||||
| function node_split!( | ||||
|     graph::DAG, | ||||
|     n1::Union{DataTaskNode{TaskType}, ComputeTaskNode{TaskType}}, | ||||
| ) where {TaskType <: AbstractTask} | ||||
|     @assert is_valid_node_split_input(graph, n1) | ||||
|  | ||||
|     # clear snapshot | ||||
|     get_snapshot_diff(graph) | ||||
|  | ||||
|     n1_parents = parents(n1) | ||||
|     n1_children = children(n1) | ||||
|     n1Parents = copy(parents(n1)) | ||||
|     n1Children = copy(children(n1)) | ||||
|  | ||||
|     for parent in n1_parents | ||||
|     for parent in n1Parents | ||||
|         remove_edge!(graph, n1, parent) | ||||
|     end | ||||
|     for child in n1_children | ||||
|     for child in n1Children | ||||
|         remove_edge!(graph, child, n1) | ||||
|     end | ||||
|     remove_node!(graph, n1) | ||||
|  | ||||
|     for parent in n1_parents | ||||
|         n_copy = copy(n1) | ||||
|         insert_node!(graph, n_copy) | ||||
|         insert_edge!(graph, n_copy, parent) | ||||
|     for parent in n1Parents | ||||
|         nCopy = copy(n1) | ||||
|  | ||||
|         for child in n1_children | ||||
|             insert_edge!(graph, child, n_copy) | ||||
|         insert_node!(graph, nCopy) | ||||
|         insert_edge!(graph, nCopy, parent) | ||||
|  | ||||
|         for child in n1Children | ||||
|             insert_edge!(graph, child, nCopy) | ||||
|         end | ||||
|  | ||||
|         update_child!(graph, parent, Symbol(to_var_name(n1.id)), Symbol(to_var_name(nCopy.id))) | ||||
|     end | ||||
|  | ||||
|     return get_snapshot_diff(graph) | ||||
|   | ||||
| @@ -13,18 +13,18 @@ function find_fusions!(graph::DAG, node::DataTaskNode) | ||||
|         return nothing | ||||
|     end | ||||
|  | ||||
|     if length(node.parents) != 1 || length(node.children) != 1 | ||||
|     if length(parents(node)) != 1 || length(children(node)) != 1 | ||||
|         return nothing | ||||
|     end | ||||
|  | ||||
|     child_node = first(node.children) | ||||
|     parent_node = first(node.parents) | ||||
|     child_node = first(children(node)) | ||||
|     parent_node = first(parents(node)) | ||||
|  | ||||
|     if !(child_node in graph) || !(parent_node in graph) | ||||
|         error("Parents/Children that are not in the graph!!!") | ||||
|     end | ||||
|  | ||||
|     if length(child_node.parents) != 1 | ||||
|     if length(parents(child_node)) != 1 | ||||
|         return nothing | ||||
|     end | ||||
|  | ||||
| @@ -44,11 +44,11 @@ Find node fusions involving the given compute node. The function pushes the foun | ||||
| """ | ||||
| function find_fusions!(graph::DAG, node::ComputeTaskNode) | ||||
|     # just find fusions in neighbouring DataTaskNodes | ||||
|     for child in node.children | ||||
|     for child in children(node) | ||||
|         find_fusions!(graph, child) | ||||
|     end | ||||
|  | ||||
|     for parent in node.parents | ||||
|     for parent in parents(node) | ||||
|         find_fusions!(graph, parent) | ||||
|     end | ||||
|  | ||||
| @@ -123,7 +123,10 @@ end | ||||
|  | ||||
| Sort this node's parent and child sets, then find fusions, reductions and splits involving it. Needs to be called after the node was changed in some way. | ||||
| """ | ||||
| function clean_node!(graph::DAG, node::Node) | ||||
| function clean_node!( | ||||
|     graph::DAG, | ||||
|     node::Union{DataTaskNode{TaskType}, ComputeTaskNode{TaskType}}, | ||||
| ) where {TaskType <: AbstractTask} | ||||
|     sort_node!(node) | ||||
|  | ||||
|     find_fusions!(graph, node) | ||||
|   | ||||
| @@ -7,10 +7,7 @@ using Base.Threads | ||||
|  | ||||
| Insert the given node fusion into its input nodes' operation caches. For the compute nodes, locking via the given `locks` is employed to have safe multi-threading. For a large set of nodes, contention on the locks should be very small. | ||||
| """ | ||||
| function insert_operation!( | ||||
|     nf::NodeFusion, | ||||
|     locks::Dict{ComputeTaskNode, SpinLock}, | ||||
| ) | ||||
| function insert_operation!(nf::NodeFusion, locks::Dict{ComputeTaskNode, SpinLock}) | ||||
|     n1 = nf.input[1] | ||||
|     n2 = nf.input[2] | ||||
|     n3 = nf.input[3] | ||||
| @@ -52,10 +49,7 @@ end | ||||
|  | ||||
| Insert the node reductions into the graph and the nodes' caches. Employs multithreading for speedup. | ||||
| """ | ||||
| function nr_insertion!( | ||||
|     operations::PossibleOperations, | ||||
|     nodeReductions::Vector{Vector{NodeReduction}}, | ||||
| ) | ||||
| function nr_insertion!(operations::PossibleOperations, nodeReductions::Vector{Vector{NodeReduction}}) | ||||
|     total_len = 0 | ||||
|     for vec in nodeReductions | ||||
|         total_len += length(vec) | ||||
| @@ -83,11 +77,7 @@ end | ||||
|  | ||||
| Insert the node fusions into the graph and the nodes' caches. Employs multithreading for speedup. | ||||
| """ | ||||
| function nf_insertion!( | ||||
|     graph::DAG, | ||||
|     operations::PossibleOperations, | ||||
|     nodeFusions::Vector{Vector{NodeFusion}}, | ||||
| ) | ||||
| function nf_insertion!(graph::DAG, operations::PossibleOperations, nodeFusions::Vector{Vector{NodeFusion}}) | ||||
|     total_len = 0 | ||||
|     for vec in nodeFusions | ||||
|         total_len += length(vec) | ||||
| @@ -122,10 +112,7 @@ end | ||||
|  | ||||
| Insert the node splits into the graph and the nodes' caches. Employs multithreading for speedup. | ||||
| """ | ||||
| function ns_insertion!( | ||||
|     operations::PossibleOperations, | ||||
|     nodeSplits::Vector{Vector{NodeSplit}}, | ||||
| ) | ||||
| function ns_insertion!(operations::PossibleOperations, nodeSplits::Vector{Vector{NodeSplit}}) | ||||
|     total_len = 0 | ||||
|     for vec in nodeSplits | ||||
|         total_len += length(vec) | ||||
| @@ -216,31 +203,27 @@ function generate_operations(graph::DAG) | ||||
|     # --- find possible node fusions --- | ||||
|     @threads for node in nodeArray | ||||
|         if (typeof(node) <: DataTaskNode) | ||||
|             if length(node.parents) != 1 | ||||
|             if length(parents(node)) != 1 | ||||
|                 # data node can only have a single parent | ||||
|                 continue | ||||
|             end | ||||
|             parent_node = first(node.parents) | ||||
|             parent_node = first(parents(node)) | ||||
|  | ||||
|             if length(node.children) != 1 | ||||
|             if length(children(node)) != 1 | ||||
|                 # this node is an entry node or has multiple children which should not be possible | ||||
|                 continue | ||||
|             end | ||||
|             child_node = first(node.children) | ||||
|             if (length(child_node.parents) != 1) | ||||
|             child_node = first(children(node)) | ||||
|             if (length(parents(child_node)) != 1) | ||||
|                 continue | ||||
|             end | ||||
|  | ||||
|             push!( | ||||
|                 generatedFusions[threadid()], | ||||
|                 NodeFusion((child_node, node, parent_node)), | ||||
|             ) | ||||
|             push!(generatedFusions[threadid()], NodeFusion((child_node, node, parent_node))) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     # launch thread for node fusion insertion | ||||
|     nf_task = | ||||
|         @task nf_insertion!(graph, graph.possibleOperations, generatedFusions) | ||||
|     nf_task = @task nf_insertion!(graph, graph.possibleOperations, generatedFusions) | ||||
|     schedule(nf_task) | ||||
|  | ||||
|     # find possible node splits | ||||
|   | ||||
| @@ -14,9 +14,7 @@ function get_operations(graph::DAG) | ||||
|         generate_operations(graph) | ||||
|     end | ||||
|  | ||||
|     for node in graph.dirtyNodes | ||||
|         clean_node!(graph, node) | ||||
|     end | ||||
|     clean_node!.(Ref(graph), graph.dirtyNodes) | ||||
|     empty!(graph.dirtyNodes) | ||||
|  | ||||
|     return graph.possibleOperations | ||||
|   | ||||
							
								
								
									
										39
									
								
								src/operation/iterate.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								src/operation/iterate.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | ||||
| import Base.iterate | ||||
|  | ||||
| const _POSSIBLE_OPERATIONS_FIELDS = fieldnames(PossibleOperations) | ||||
|  | ||||
| _POIteratorStateType = | ||||
|     NamedTuple{(:result, :state), Tuple{Union{NodeFusion, NodeReduction, NodeSplit}, Tuple{Symbol, Int64}}} | ||||
|  | ||||
| @inline function iterate(possibleOperations::PossibleOperations)::Union{Nothing, _POIteratorStateType} | ||||
|     for fieldname in _POSSIBLE_OPERATIONS_FIELDS | ||||
|         iterator = iterate(getfield(possibleOperations, fieldname)) | ||||
|         if (!isnothing(iterator)) | ||||
|             return (result = iterator[1], state = (fieldname, iterator[2])) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| @inline function iterate(possibleOperations::PossibleOperations, state)::Union{Nothing, _POIteratorStateType} | ||||
|     newStateSym = state[1] | ||||
|     newStateIt = iterate(getfield(possibleOperations, newStateSym), state[2]) | ||||
|     if !isnothing(newStateIt) | ||||
|         return (result = newStateIt[1], state = (newStateSym, newStateIt[2])) | ||||
|     end | ||||
|  | ||||
|     # cycle to next field | ||||
|     index = findfirst(x -> x == newStateSym, _POSSIBLE_OPERATIONS_FIELDS) + 1 | ||||
|  | ||||
|     while index <= length(_POSSIBLE_OPERATIONS_FIELDS) | ||||
|         newStateSym = _POSSIBLE_OPERATIONS_FIELDS[index] | ||||
|         newStateIt = iterate(getfield(possibleOperations, newStateSym)) | ||||
|         if !isnothing(newStateIt) | ||||
|             return (result = newStateIt[1], state = (newStateSym, newStateIt[2])) | ||||
|         end | ||||
|         index += 1 | ||||
|     end | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
| @@ -30,7 +30,7 @@ function show(io::IO, op::NodeReduction) | ||||
|     print(io, "NR: ") | ||||
|     print(io, length(op.input)) | ||||
|     print(io, "x") | ||||
|     return print(io, op.input[1].task) | ||||
|     return print(io, task(op.input[1])) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -40,7 +40,7 @@ Print a string representation of the node split to io. | ||||
| """ | ||||
| function show(io::IO, op::NodeSplit) | ||||
|     print(io, "NS: ") | ||||
|     return print(io, op.input.task) | ||||
|     return print(io, task(op.input)) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -50,9 +50,9 @@ Print a string representation of the node fusion to io. | ||||
| """ | ||||
| function show(io::IO, op::NodeFusion) | ||||
|     print(io, "NF: ") | ||||
|     print(io, op.input[1].task) | ||||
|     print(io, task(op.input[1])) | ||||
|     print(io, "->") | ||||
|     print(io, op.input[2].task) | ||||
|     print(io, task(op.input[2])) | ||||
|     print(io, "->") | ||||
|     return print(io, op.input[3].task) | ||||
|     return print(io, task(op.input[3])) | ||||
| end | ||||
|   | ||||
| @@ -40,8 +40,9 @@ A chain of (n1, n2, n3) can be fused if: | ||||
|  | ||||
| See also: [`can_fuse`](@ref) | ||||
| """ | ||||
| struct NodeFusion <: Operation | ||||
|     input::Tuple{ComputeTaskNode, DataTaskNode, ComputeTaskNode} | ||||
| struct NodeFusion{TaskType1 <: AbstractComputeTask, TaskType2 <: AbstractDataTask, TaskType3 <: AbstractComputeTask} <: | ||||
|        Operation | ||||
|     input::Tuple{ComputeTaskNode{TaskType1}, DataTaskNode{TaskType2}, ComputeTaskNode{TaskType3}} | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -49,8 +50,12 @@ end | ||||
|  | ||||
| The applied version of the [`NodeFusion`](@ref). | ||||
| """ | ||||
| struct AppliedNodeFusion <: AppliedOperation | ||||
|     operation::NodeFusion | ||||
| struct AppliedNodeFusion{ | ||||
|     TaskType1 <: AbstractComputeTask, | ||||
|     TaskType2 <: AbstractDataTask, | ||||
|     TaskType3 <: AbstractComputeTask, | ||||
| } <: AppliedOperation | ||||
|     operation::NodeFusion{TaskType1, TaskType2, TaskType3} | ||||
|     diff::Diff | ||||
| end | ||||
|  | ||||
| @@ -73,8 +78,8 @@ A vector of nodes can be reduced if: | ||||
|  | ||||
| See also: [`can_reduce`](@ref) | ||||
| """ | ||||
| struct NodeReduction <: Operation | ||||
|     input::Vector{Node} | ||||
| struct NodeReduction{NodeType <: Node} <: Operation | ||||
|     input::Vector{NodeType} | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -82,8 +87,8 @@ end | ||||
|  | ||||
| The applied version of the [`NodeReduction`](@ref). | ||||
| """ | ||||
| struct AppliedNodeReduction <: AppliedOperation | ||||
|     operation::NodeReduction | ||||
| struct AppliedNodeReduction{NodeType <: Node} <: AppliedOperation | ||||
|     operation::NodeReduction{NodeType} | ||||
|     diff::Diff | ||||
| end | ||||
|  | ||||
| @@ -102,8 +107,8 @@ A node can be split if: | ||||
|  | ||||
| See also: [`can_split`](@ref) | ||||
| """ | ||||
| struct NodeSplit <: Operation | ||||
|     input::Node | ||||
| struct NodeSplit{NodeType <: Node} <: Operation | ||||
|     input::NodeType | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -111,7 +116,7 @@ end | ||||
|  | ||||
| The applied version of the [`NodeSplit`](@ref). | ||||
| """ | ||||
| struct AppliedNodeSplit <: AppliedOperation | ||||
|     operation::NodeSplit | ||||
| struct AppliedNodeSplit{NodeType <: Node} <: AppliedOperation | ||||
|     operation::NodeSplit{NodeType} | ||||
|     diff::Diff | ||||
| end | ||||
|   | ||||
| @@ -4,9 +4,7 @@ | ||||
| Return whether `operations` is empty, i.e. all of its fields are empty. | ||||
| """ | ||||
| function isempty(operations::PossibleOperations) | ||||
|     return isempty(operations.nodeFusions) && | ||||
|            isempty(operations.nodeReductions) && | ||||
|            isempty(operations.nodeSplits) | ||||
|     return isempty(operations.nodeFusions) && isempty(operations.nodeReductions) && isempty(operations.nodeSplits) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -63,9 +61,7 @@ function can_fuse(n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode) | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     if length(n2.parents) != 1 || | ||||
|        length(n2.children) != 1 || | ||||
|        length(n1.parents) != 1 | ||||
|     if length(parents(n2)) != 1 || length(children(n2)) != 1 || length(parents(n1)) != 1 | ||||
|         return false | ||||
|     end | ||||
|  | ||||
| @@ -78,12 +74,15 @@ end | ||||
| Return whether the given two nodes can be reduced. See [`NodeReduction`](@ref) for the requirements. | ||||
| """ | ||||
| function can_reduce(n1::Node, n2::Node) | ||||
|     if (n1.task != n2.task) | ||||
|         return false | ||||
|     end | ||||
|     return false | ||||
| end | ||||
|  | ||||
|     n1_length = length(n1.children) | ||||
|     n2_length = length(n2.children) | ||||
| function can_reduce( | ||||
|     n1::NodeType, | ||||
|     n2::NodeType, | ||||
| ) where {TaskType <: AbstractTask, NodeType <: Union{DataTaskNode{TaskType}, ComputeTaskNode{TaskType}}} | ||||
|     n1_length = length(children(n1)) | ||||
|     n2_length = length(children(n2)) | ||||
|  | ||||
|     if (n1_length != n2_length) | ||||
|         return false | ||||
| @@ -92,19 +91,19 @@ function can_reduce(n1::Node, n2::Node) | ||||
|     # this seems to be the most common case so do this first | ||||
|     # doing it manually is a lot faster than using the sets for a general solution | ||||
|     if (n1_length == 2) | ||||
|         if (n1.children[1] != n2.children[1]) | ||||
|             if (n1.children[1] != n2.children[2]) | ||||
|         if (children(n1)[1] != children(n2)[1]) | ||||
|             if (children(n1)[1] != children(n2)[2]) | ||||
|                 return false | ||||
|             end | ||||
|             # 1_1 == 2_2 | ||||
|             if (n1.children[2] != n2.children[1]) | ||||
|             if (children(n1)[2] != children(n2)[1]) | ||||
|                 return false | ||||
|             end | ||||
|             return true | ||||
|         end | ||||
|  | ||||
|         # 1_1 == 2_1 | ||||
|         if (n1.children[2] != n2.children[2]) | ||||
|         if (children(n1)[2] != children(n2)[2]) | ||||
|             return false | ||||
|         end | ||||
|         return true | ||||
| @@ -112,11 +111,11 @@ function can_reduce(n1::Node, n2::Node) | ||||
|  | ||||
|     # this is simple | ||||
|     if (n1_length == 1) | ||||
|         return n1.children[1] == n2.children[1] | ||||
|         return children(n1)[1] == children(n2)[1] | ||||
|     end | ||||
|  | ||||
|     # this takes a long time | ||||
|     return Set(n1.children) == Set(n2.children) | ||||
|     return Set(children(n1)) == Set(children(n2)) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -142,7 +141,14 @@ end | ||||
|  | ||||
| Equality comparison between two node fusions. Two node fusions are considered equal if they have the same inputs. | ||||
| """ | ||||
| function ==(op1::NodeFusion, op2::NodeFusion) | ||||
| function ==( | ||||
|     op1::NodeFusion{ComputeTaskType1, DataTaskType, ComputeTaskType2}, | ||||
|     op2::NodeFusion{ComputeTaskType1, DataTaskType, ComputeTaskType2}, | ||||
| ) where { | ||||
|     ComputeTaskType1 <: AbstractComputeTask, | ||||
|     DataTaskType <: AbstractDataTask, | ||||
|     ComputeTaskType2 <: AbstractComputeTask, | ||||
| } | ||||
|     # there can only be one node fusion on a given data task, so if the data task is the same, the fusion is the same | ||||
|     return op1.input[2] == op2.input[2] | ||||
| end | ||||
|   | ||||
| @@ -9,24 +9,12 @@ Assert for a gven node fusion input whether the nodes can be fused. For the requ | ||||
|  | ||||
| Intended for use with `@assert` or `@test`. | ||||
| """ | ||||
| function is_valid_node_fusion_input( | ||||
|     graph::DAG, | ||||
|     n1::ComputeTaskNode, | ||||
|     n2::DataTaskNode, | ||||
|     n3::ComputeTaskNode, | ||||
| ) | ||||
| function is_valid_node_fusion_input(graph::DAG, n1::ComputeTaskNode, n2::DataTaskNode, n3::ComputeTaskNode) | ||||
|     if !(n1 in graph) || !(n2 in graph) || !(n3 in graph) | ||||
|         throw( | ||||
|             AssertionError( | ||||
|                 "[Node Fusion] The given nodes are not part of the given graph", | ||||
|             ), | ||||
|         ) | ||||
|         throw(AssertionError("[Node Fusion] The given nodes are not part of the given graph")) | ||||
|     end | ||||
|  | ||||
|     if !is_child(n1, n2) || | ||||
|        !is_child(n2, n3) || | ||||
|        !is_parent(n3, n2) || | ||||
|        !is_parent(n2, n1) | ||||
|     if !is_child(n1, n2) || !is_child(n2, n3) || !is_parent(n3, n2) || !is_parent(n2, n1) | ||||
|         throw( | ||||
|             AssertionError( | ||||
|                 "[Node Fusion] The given nodes are not connected by edges which is required for node fusion", | ||||
| @@ -35,27 +23,19 @@ function is_valid_node_fusion_input( | ||||
|     end | ||||
|  | ||||
|     if length(n2.parents) > 1 | ||||
|         throw( | ||||
|             AssertionError( | ||||
|                 "[Node Fusion] The given data node has more than one parent", | ||||
|             ), | ||||
|         ) | ||||
|         throw(AssertionError("[Node Fusion] The given data node has more than one parent")) | ||||
|     end | ||||
|     if length(n2.children) > 1 | ||||
|         throw( | ||||
|             AssertionError( | ||||
|                 "[Node Fusion] The given data node has more than one child", | ||||
|             ), | ||||
|         ) | ||||
|         throw(AssertionError("[Node Fusion] The given data node has more than one child")) | ||||
|     end | ||||
|     if length(n1.parents) > 1 | ||||
|         throw( | ||||
|             AssertionError( | ||||
|                 "[Node Fusion] The given n1 has more than one parent", | ||||
|             ), | ||||
|         ) | ||||
|         throw(AssertionError("[Node Fusion] The given n1 has more than one parent")) | ||||
|     end | ||||
|  | ||||
|     @assert is_valid(graph, n1) | ||||
|     @assert is_valid(graph, n2) | ||||
|     @assert is_valid(graph, n3) | ||||
|  | ||||
|     return true | ||||
| end | ||||
|  | ||||
| @@ -69,22 +49,21 @@ Intended for use with `@assert` or `@test`. | ||||
| function is_valid_node_reduction_input(graph::DAG, nodes::Vector{Node}) | ||||
|     for n in nodes | ||||
|         if n ∉ graph | ||||
|             throw( | ||||
|                 AssertionError( | ||||
|                     "[Node Reduction] The given nodes are not part of the given graph", | ||||
|                 ), | ||||
|             ) | ||||
|             throw(AssertionError("[Node Reduction] The given nodes are not part of the given graph")) | ||||
|         end | ||||
|         @assert is_valid(graph, n) | ||||
|     end | ||||
|  | ||||
|     t = typeof(nodes[1].task) | ||||
|     t = typeof(task(nodes[1])) | ||||
|     for n in nodes | ||||
|         if typeof(n.task) != t | ||||
|             throw( | ||||
|                 AssertionError( | ||||
|                     "[Node Reduction] The given nodes are not of the same type", | ||||
|                 ), | ||||
|             ) | ||||
|         if typeof(task(n)) != t | ||||
|             throw(AssertionError("[Node Reduction] The given nodes are not of the same type")) | ||||
|         end | ||||
|  | ||||
|         if (typeof(n) <: DataTaskNode) | ||||
|             if (n.name != nodes[1].name) | ||||
|                 throw(AssertionError("[Node Reduction] The given nodes do not have the same name")) | ||||
|             end | ||||
|         end | ||||
|     end | ||||
|  | ||||
| @@ -111,11 +90,7 @@ Intended for use with `@assert` or `@test`. | ||||
| """ | ||||
| function is_valid_node_split_input(graph::DAG, n1::Node) | ||||
|     if n1 ∉ graph | ||||
|         throw( | ||||
|             AssertionError( | ||||
|                 "[Node Split] The given node is not part of the given graph", | ||||
|             ), | ||||
|         ) | ||||
|         throw(AssertionError("[Node Split] The given node is not part of the given graph")) | ||||
|     end | ||||
|  | ||||
|     if length(n1.parents) <= 1 | ||||
| @@ -126,6 +101,8 @@ function is_valid_node_split_input(graph::DAG, n1::Node) | ||||
|         ) | ||||
|     end | ||||
|  | ||||
|     @assert is_valid(graph, n1) | ||||
|  | ||||
|     return true | ||||
| end | ||||
|  | ||||
| @@ -138,7 +115,7 @@ Intended for use with `@assert` or `@test`. | ||||
| """ | ||||
| function is_valid(graph::DAG, nr::NodeReduction) | ||||
|     @assert is_valid_node_reduction_input(graph, nr.input) | ||||
|     @assert nr in graph.possibleOperations.nodeReductions "NodeReduction is not part of the graph's possible operations!" | ||||
|     #@assert nr in graph.possibleOperations.nodeReductions "NodeReduction is not part of the graph's possible operations!" | ||||
|     return true | ||||
| end | ||||
|  | ||||
| @@ -151,7 +128,7 @@ Intended for use with `@assert` or `@test`. | ||||
| """ | ||||
| function is_valid(graph::DAG, ns::NodeSplit) | ||||
|     @assert is_valid_node_split_input(graph, ns.input) | ||||
|     @assert ns in graph.possibleOperations.nodeSplits "NodeSplit is not part of the graph's possible operations!" | ||||
|     #@assert ns in graph.possibleOperations.nodeSplits "NodeSplit is not part of the graph's possible operations!" | ||||
|     return true | ||||
| end | ||||
|  | ||||
| @@ -163,12 +140,7 @@ Assert for a given [`NodeFusion`](@ref) whether it is a valid operation in the g | ||||
| Intended for use with `@assert` or `@test`. | ||||
| """ | ||||
| function is_valid(graph::DAG, nf::NodeFusion) | ||||
|     @assert is_valid_node_fusion_input( | ||||
|         graph, | ||||
|         nf.input[1], | ||||
|         nf.input[2], | ||||
|         nf.input[3], | ||||
|     ) | ||||
|     @assert nf in graph.possibleOperations.nodeFusions "NodeFusion is not part of the graph's possible operations!" | ||||
|     @assert is_valid_node_fusion_input(graph, nf.input[1], nf.input[2], nf.input[3]) | ||||
|     #@assert nf in graph.possibleOperations.nodeFusions "NodeFusion is not part of the graph's possible operations!" | ||||
|     return true | ||||
| end | ||||
|   | ||||
							
								
								
									
										73
									
								
								src/optimization/greedy.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								src/optimization/greedy.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| """ | ||||
|     GreedyOptimizer | ||||
|  | ||||
| An implementation of the greedy optimization algorithm, simply choosing the best next option evaluated with the given estimator. | ||||
|  | ||||
| The fixpoint is reached when any leftover operation would increase the graph's total cost according to the given estimator. | ||||
| """ | ||||
| struct GreedyOptimizer{EstimatorType <: AbstractEstimator} <: AbstractOptimizer | ||||
|     estimator::EstimatorType | ||||
| end | ||||
|  | ||||
| function optimize_step!(optimizer::GreedyOptimizer, graph::DAG) | ||||
|     # generate all options | ||||
|     operations = get_operations(graph) | ||||
|     if isempty(operations) | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     result = nothing | ||||
|  | ||||
|     lowestCost = reduce( | ||||
|         (acc, op) -> begin | ||||
|             op_cost = operation_effect(optimizer.estimator, graph, op) | ||||
|             if op_cost < acc | ||||
|                 result = op | ||||
|                 return op_cost | ||||
|             end | ||||
|             return acc | ||||
|         end, | ||||
|         operations; | ||||
|         init = typemax(cost_type(optimizer.estimator)), | ||||
|     ) | ||||
|  | ||||
|     if lowestCost > zero(cost_type(optimizer.estimator)) | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     push_operation!(graph, result) | ||||
|  | ||||
|     return true | ||||
| end | ||||
|  | ||||
| function fixpoint_reached(optimizer::GreedyOptimizer, graph::DAG) | ||||
|     # generate all options | ||||
|     operations = get_operations(graph) | ||||
|     if isempty(operations) | ||||
|         return true | ||||
|     end | ||||
|  | ||||
|     lowestCost = reduce( | ||||
|         (acc, op) -> begin | ||||
|             op_cost = operation_effect(optimizer.estimator, graph, op) | ||||
|             if op_cost < acc | ||||
|                 return op_cost | ||||
|             end | ||||
|             return acc | ||||
|         end, | ||||
|         operations; | ||||
|         init = typemax(cost_type(optimizer.estimator)), | ||||
|     ) | ||||
|  | ||||
|     if lowestCost > zero(cost_type(optimizer.estimator)) | ||||
|         return true | ||||
|     end | ||||
|  | ||||
|     return false | ||||
| end | ||||
|  | ||||
| function optimize_to_fixpoint!(optimizer::GreedyOptimizer, graph::DAG) | ||||
|     while optimize_step!(optimizer, graph) | ||||
|     end | ||||
|     return nothing | ||||
| end | ||||
							
								
								
									
										60
									
								
								src/optimization/interface.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								src/optimization/interface.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
|  | ||||
| """ | ||||
|     AbstractOptimizer | ||||
|  | ||||
| Abstract base type for optimizer implementations. | ||||
| """ | ||||
| abstract type AbstractOptimizer end | ||||
|  | ||||
| """ | ||||
|     optimize_step!(optimizer::AbstractOptimizer, graph::DAG) | ||||
|  | ||||
| Interface function that must be implemented by implementations of [`AbstractOptimizer`](@ref). Returns `true` if an operations has been applied, `false` if not, usually when a fixpoint of the algorithm has been reached. | ||||
|  | ||||
| It should do one smallest logical step on the given [`DAG`](@ref), muting the graph and, if necessary, the optimizer's state. | ||||
| """ | ||||
| function optimize_step! end | ||||
|  | ||||
| """ | ||||
|     optimize!(optimizer::AbstractOptimizer, graph::DAG, n::Int) | ||||
|  | ||||
| Function calling the given optimizer `n` times, muting the graph. Returns `true` if the requested number of operations has been applied, `false` if not, usually when a fixpoint of the algorithm has been reached. | ||||
|  | ||||
| If a more efficient method exists, this can be overloaded for a specific optimizer. | ||||
| """ | ||||
| function optimize!(optimizer::AbstractOptimizer, graph::DAG, n::Int) | ||||
|     for i in 1:n | ||||
|         if !optimize_step!(optimizer, graph) | ||||
|             return false | ||||
|         end | ||||
|     end | ||||
|     return true | ||||
| end | ||||
|  | ||||
| """ | ||||
|     fixpoint_reached(optimizer::AbstractOptimizer, graph::DAG) | ||||
|  | ||||
| Interface function that can be implemented by optimization algorithms that can reach a fixpoint, returning as a `Bool` whether it has been reached. The default implementation returns `false`. | ||||
|  | ||||
| See also: [`optimize_to_fixpoint!`](@ref) | ||||
| """ | ||||
| function fixpoint_reached(optimizer::AbstractOptimizer, graph::DAG) | ||||
|     return false | ||||
| end | ||||
|  | ||||
| """ | ||||
|     optimize_to_fixpoint!(optimizer::AbstractOptimizer, graph::DAG) | ||||
|  | ||||
| Interface function that can be implemented by optimization algorithms that can reach a fixpoint. The algorithm will be run until that fixpoint is reached, at which point [`fixpoint_reached`](@ref) should return true. | ||||
|  | ||||
| A usual implementation might look like this: | ||||
| ```julia | ||||
|     function optimize_to_fixpoint!(optimizer::MyOptimizer, graph::DAG) | ||||
|         while !fixpoint_reached(optimizer, graph) | ||||
|             optimize_step!(optimizer, graph) | ||||
|         end | ||||
|         return nothing | ||||
|     end | ||||
| ``` | ||||
| """ | ||||
| function optimize_to_fixpoint! end | ||||
							
								
								
									
										49
									
								
								src/optimization/random_walk.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								src/optimization/random_walk.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| using Random | ||||
|  | ||||
| """ | ||||
|     RandomWalkOptimizer | ||||
|  | ||||
| An optimizer that randomly pushes or pops operations. It doesn't optimize in any direction and is useful mainly for testing purposes. | ||||
|  | ||||
| This algorithm never reaches a fixpoint, so it does not implement [`optimize_to_fixpoint`](@ref). | ||||
| """ | ||||
| struct RandomWalkOptimizer <: AbstractOptimizer | ||||
|     rng::AbstractRNG | ||||
| end | ||||
|  | ||||
| function optimize_step!(optimizer::RandomWalkOptimizer, graph::DAG) | ||||
|     operations = get_operations(graph) | ||||
|  | ||||
|     if sum(length(operations)) == 0 && length(graph.appliedOperations) + length(graph.operationsToApply) == 0 | ||||
|         # in case there are zero operations possible at all on the graph | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     r = optimizer.rng | ||||
|     # try until something was applied or popped | ||||
|     while true | ||||
|         # choose push or pop | ||||
|         if rand(r, Bool) | ||||
|             # push | ||||
|  | ||||
|             # choose one of fuse/split/reduce | ||||
|             option = rand(r, 1:3) | ||||
|             if option == 1 && !isempty(operations.nodeFusions) | ||||
|                 push_operation!(graph, rand(r, collect(operations.nodeFusions))) | ||||
|                 return true | ||||
|             elseif option == 2 && !isempty(operations.nodeReductions) | ||||
|                 push_operation!(graph, rand(r, collect(operations.nodeReductions))) | ||||
|                 return true | ||||
|             elseif option == 3 && !isempty(operations.nodeSplits) | ||||
|                 push_operation!(graph, rand(r, collect(operations.nodeSplits))) | ||||
|                 return true | ||||
|             end | ||||
|         else | ||||
|             # pop | ||||
|             if (can_pop(graph)) | ||||
|                 pop_operation!(graph) | ||||
|                 return true | ||||
|             end | ||||
|         end | ||||
|     end | ||||
| end | ||||
							
								
								
									
										30
									
								
								src/optimization/reduce.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								src/optimization/reduce.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| """ | ||||
|     ReductionOptimizer | ||||
|  | ||||
| An optimizer that simply applies an available [`NodeReduction`](@ref) on each step. It implements [`optimize_to_fixpoint`](@ref). The fixpoint is reached when there are no more possible [`NodeReduction`](@ref)s in the graph. | ||||
| """ | ||||
| struct ReductionOptimizer <: AbstractOptimizer end | ||||
|  | ||||
| function optimize_step!(optimizer::ReductionOptimizer, graph::DAG) | ||||
|     # generate all options | ||||
|     operations = get_operations(graph) | ||||
|     if fixpoint_reached(optimizer, graph) | ||||
|         return false | ||||
|     end | ||||
|  | ||||
|     push_operation!(graph, first(operations.nodeReductions)) | ||||
|  | ||||
|     return true | ||||
| end | ||||
|  | ||||
| function fixpoint_reached(optimizer::ReductionOptimizer, graph::DAG) | ||||
|     operations = get_operations(graph) | ||||
|     return isempty(operations.nodeReductions) | ||||
| end | ||||
|  | ||||
| function optimize_to_fixpoint!(optimizer::ReductionOptimizer, graph::DAG) | ||||
|     while !fixpoint_reached(optimizer, graph) | ||||
|         optimize_step!(optimizer, graph) | ||||
|     end | ||||
|     return nothing | ||||
| end | ||||
| @@ -4,14 +4,18 @@ | ||||
| Create an empty [`GraphProperties`](@ref) object. | ||||
| """ | ||||
| function GraphProperties() | ||||
|     return ( | ||||
|         data = 0.0, | ||||
|         computeEffort = 0.0, | ||||
|         computeIntensity = 0.0, | ||||
|         cost = 0.0, | ||||
|         noNodes = 0, | ||||
|         noEdges = 0, | ||||
|     )::GraphProperties | ||||
|     return (data = 0.0, computeEffort = 0.0, computeIntensity = 0.0, noNodes = 0, noEdges = 0)::GraphProperties | ||||
| end | ||||
|  | ||||
| @inline function _props( | ||||
|     node::DataTaskNode{TaskType}, | ||||
| )::Tuple{Float64, Float64, Int64} where {TaskType <: AbstractDataTask} | ||||
|     return (data(task(node)) * length(parents(node)), 0.0, length(parents(node))) | ||||
| end | ||||
| @inline function _props( | ||||
|     node::ComputeTaskNode{TaskType}, | ||||
| )::Tuple{Float64, Float64, Int64} where {TaskType <: AbstractComputeTask} | ||||
|     return (0.0, compute_effort(task(node)), length(parents(node))) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -27,16 +31,16 @@ function GraphProperties(graph::DAG) | ||||
|     ce = 0.0 | ||||
|     ed = 0 | ||||
|     for node in graph.nodes | ||||
|         d += data(node.task) * length(node.parents) | ||||
|         ce += compute_effort(node.task) | ||||
|         ed += length(node.parents) | ||||
|         props = _props(node) | ||||
|         d += props[1] | ||||
|         ce += props[2] | ||||
|         ed += props[3] | ||||
|     end | ||||
|  | ||||
|     return ( | ||||
|         data = d, | ||||
|         computeEffort = ce, | ||||
|         computeIntensity = (d == 0) ? 0.0 : ce / d, | ||||
|         cost = 0.0, # TODO | ||||
|         noNodes = length(graph.nodes), | ||||
|         noEdges = ed, | ||||
|     )::GraphProperties | ||||
| @@ -50,23 +54,18 @@ The graph's properties after applying the [`Diff`](@ref) will be `get_properties | ||||
| For reverting a diff, it's `get_properties(graph) - GraphProperties(diff)`. | ||||
| """ | ||||
| function GraphProperties(diff::Diff) | ||||
|     d = 0.0 | ||||
|     ce = 0.0 | ||||
|     c = 0.0 # TODO | ||||
|  | ||||
|     ce = | ||||
|         reduce(+, compute_effort(n.task) for n in diff.addedNodes; init = 0.0) - | ||||
|         reduce(+, compute_effort(n.task) for n in diff.removedNodes; init = 0.0) | ||||
|         reduce(+, compute_effort(task(n)) for n in diff.addedNodes; init = 0.0) - | ||||
|         reduce(+, compute_effort(task(n)) for n in diff.removedNodes; init = 0.0) | ||||
|  | ||||
|     d = | ||||
|         reduce(+, data(e) for e in diff.addedEdges; init = 0.0) - | ||||
|         reduce(+, data(e) for e in diff.removedEdges; init = 0.0) | ||||
|         reduce(+, data(task(n)) for n in diff.addedNodes; init = 0.0) - | ||||
|         reduce(+, data(task(n)) for n in diff.removedNodes; init = 0.0) | ||||
|  | ||||
|     return ( | ||||
|         data = d, | ||||
|         computeEffort = ce, | ||||
|         computeIntensity = (d == 0) ? 0.0 : ce / d, | ||||
|         cost = c, | ||||
|         noNodes = length(diff.addedNodes) - length(diff.removedNodes), | ||||
|         noEdges = length(diff.addedEdges) - length(diff.removedEdges), | ||||
|     )::GraphProperties | ||||
|   | ||||
| @@ -7,11 +7,10 @@ Representation of a [`DAG`](@ref)'s properties. | ||||
| `.data`: The total data transfer.\\ | ||||
| `.computeEffort`: The total compute effort.\\ | ||||
| `.computeIntensity`: The compute intensity, will always equal `.computeEffort / .data`.\\ | ||||
| `.cost`: The estimated cost.\\ | ||||
| `.noNodes`: Number of [`Node`](@ref)s.\\ | ||||
| `.noEdges`: Number of [`Edge`](@ref)s. | ||||
| """ | ||||
| const GraphProperties = NamedTuple{ | ||||
|     (:data, :computeEffort, :computeIntensity, :cost, :noNodes, :noEdges), | ||||
|     Tuple{Float64, Float64, Float64, Float64, Int, Int}, | ||||
|     (:data, :computeEffort, :computeIntensity, :noNodes, :noEdges), | ||||
|     Tuple{Float64, Float64, Float64, Int, Int}, | ||||
| } | ||||
|   | ||||
| @@ -11,10 +11,8 @@ function -(prop1::GraphProperties, prop2::GraphProperties) | ||||
|         computeIntensity = if (prop1.data - prop2.data == 0) | ||||
|             0.0 | ||||
|         else | ||||
|             (prop1.computeEffort - prop2.computeEffort) / | ||||
|             (prop1.data - prop2.data) | ||||
|             (prop1.computeEffort - prop2.computeEffort) / (prop1.data - prop2.data) | ||||
|         end, | ||||
|         cost = prop1.cost - prop2.cost, | ||||
|         noNodes = prop1.noNodes - prop2.noNodes, | ||||
|         noEdges = prop1.noEdges - prop2.noEdges, | ||||
|     )::GraphProperties | ||||
| @@ -33,10 +31,8 @@ function +(prop1::GraphProperties, prop2::GraphProperties) | ||||
|         computeIntensity = if (prop1.data + prop2.data == 0) | ||||
|             0.0 | ||||
|         else | ||||
|             (prop1.computeEffort + prop2.computeEffort) / | ||||
|             (prop1.data + prop2.data) | ||||
|             (prop1.computeEffort + prop2.computeEffort) / (prop1.data + prop2.data) | ||||
|         end, | ||||
|         cost = prop1.cost + prop2.cost, | ||||
|         noNodes = prop1.noNodes + prop2.noNodes, | ||||
|         noEdges = prop1.noEdges + prop2.noEdges, | ||||
|     )::GraphProperties | ||||
| @@ -52,7 +48,6 @@ function -(prop::GraphProperties) | ||||
|         data = -prop.data, | ||||
|         computeEffort = -prop.computeEffort, | ||||
|         computeIntensity = prop.computeIntensity,   # no negation here! | ||||
|         cost = -prop.cost, | ||||
|         noNodes = -prop.noNodes, | ||||
|         noEdges = -prop.noEdges, | ||||
|     )::GraphProperties | ||||
|   | ||||
							
								
								
									
										50
									
								
								src/scheduler/greedy.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								src/scheduler/greedy.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
|  | ||||
| """ | ||||
|     GreedyScheduler | ||||
|  | ||||
| A greedy implementation of a scheduler, creating a topological ordering of nodes and naively balancing them onto the different devices. | ||||
| """ | ||||
| struct GreedyScheduler end | ||||
|  | ||||
| function schedule_dag(::GreedyScheduler, graph::DAG, machine::Machine) | ||||
|     nodeQueue = PriorityQueue{Node, Int}() | ||||
|  | ||||
|     # use a priority equal to the number of unseen children -> 0 are nodes that can be added | ||||
|     for node in get_entry_nodes(graph) | ||||
|         enqueue!(nodeQueue, node => 0) | ||||
|     end | ||||
|  | ||||
|     schedule = Vector{Node}() | ||||
|     sizehint!(schedule, length(graph.nodes)) | ||||
|  | ||||
|     # keep an accumulated cost of things scheduled to this device so far | ||||
|     deviceAccCost = PriorityQueue{AbstractDevice, Int}() | ||||
|     for device in machine.devices | ||||
|         enqueue!(deviceAccCost, device => 0) | ||||
|     end | ||||
|  | ||||
|     node = nothing | ||||
|     while !isempty(nodeQueue) | ||||
|         @assert peek(nodeQueue)[2] == 0 | ||||
|         node = dequeue!(nodeQueue) | ||||
|  | ||||
|         # assign the device with lowest accumulated cost to the node (if it's a compute node) | ||||
|         if (isa(node, ComputeTaskNode)) | ||||
|             lowestDevice = peek(deviceAccCost)[1] | ||||
|             node.device = lowestDevice | ||||
|             deviceAccCost[lowestDevice] = compute_effort(task(node)) | ||||
|         end | ||||
|  | ||||
|         push!(schedule, node) | ||||
|         for parent in parents(node) | ||||
|             # reduce the priority of all parents by one | ||||
|             if (!haskey(nodeQueue, parent)) | ||||
|                 enqueue!(nodeQueue, parent => length(children(parent)) - 1) | ||||
|             else | ||||
|                 nodeQueue[parent] = nodeQueue[parent] - 1 | ||||
|             end | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     return schedule | ||||
| end | ||||
							
								
								
									
										18
									
								
								src/scheduler/interface.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								src/scheduler/interface.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
|  | ||||
| """ | ||||
|     Scheduler | ||||
|  | ||||
| Abstract base type for scheduler implementations. The scheduler is used to assign each node to a device and create a topological ordering of tasks. | ||||
| """ | ||||
| abstract type Scheduler end | ||||
|  | ||||
| """ | ||||
|     schedule_dag(::Scheduler, ::DAG, ::Machine) | ||||
|  | ||||
| Interface functions that must be implemented for implementations of [`Scheduler`](@ref). | ||||
|  | ||||
| The function assigns each [`ComputeTaskNode`](@ref) of the [`DAG`](@ref) to one of the devices in the given [`Machine`](@ref) and returns a `Vector{Node}` representing a topological ordering. | ||||
|  | ||||
| [`DataTaskNode`](@ref)s are not scheduled to devices since they do not compute. Instead, a data node transfers data from the [`AbstractDevice`](@ref) of their child to all [`AbstractDevice`](@ref)s of its parents. | ||||
| """ | ||||
| function schedule_dag end | ||||
							
								
								
									
										89
									
								
								src/task/compute.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								src/task/compute.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
|  | ||||
| """ | ||||
|     compute(t::FusedComputeTask, data) | ||||
|  | ||||
| Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead. | ||||
| """ | ||||
| function compute(t::FusedComputeTask, data) | ||||
|     @assert false "This is not implemented and should never be called" | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector{String}, outExpr::String) | ||||
|  | ||||
| Generate code evaluating a [`FusedComputeTask`](@ref) on `inExprs`, providing the output on `outExpr`. | ||||
| `inExprs` should be of the correct types and may be heterogeneous. `outExpr` will be of the type of the output of `T2` of t. | ||||
| """ | ||||
| function get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector, outExpr) | ||||
|     inExprs1 = Vector() | ||||
|     for sym in t.t1_inputs | ||||
|         push!(inExprs1, gen_access_expr(device, sym)) | ||||
|     end | ||||
|  | ||||
|     outExpr1 = gen_access_expr(device, t.t1_output) | ||||
|  | ||||
|     inExprs2 = Vector() | ||||
|     for sym in t.t2_inputs | ||||
|         push!(inExprs2, gen_access_expr(device, sym)) | ||||
|     end | ||||
|  | ||||
|     expr1 = get_expression(t.first_task, device, inExprs1, outExpr1) | ||||
|     expr2 = get_expression(t.second_task, device, [inExprs2..., outExpr1], outExpr) | ||||
|  | ||||
|     full_expr = Expr(:block, expr1, expr2) | ||||
|  | ||||
|     return full_expr | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(node::ComputeTaskNode) | ||||
|  | ||||
| Generate and return code for a given [`ComputeTaskNode`](@ref). | ||||
| """ | ||||
| function get_expression(node::ComputeTaskNode) | ||||
|     @assert length(children(node)) <= children(task(node)) "Node $(node) has too many children for its task: node has $(length(node.children)) versus task has $(children(task(node)))\nNode's children: $(getfield.(node.children, :children))" | ||||
|     @assert !ismissing(node.device) "Trying to get expression for an unscheduled ComputeTaskNode\nNode: $(node)" | ||||
|  | ||||
|     inExprs = Vector() | ||||
|     for id in getfield.(children(node), :id) | ||||
|         push!(inExprs, gen_access_expr(node.device, Symbol(to_var_name(id)))) | ||||
|     end | ||||
|     outExpr = gen_access_expr(node.device, Symbol(to_var_name(node.id))) | ||||
|  | ||||
|     return get_expression(task(node), node.device, inExprs, outExpr) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression(node::DataTaskNode) | ||||
|  | ||||
| Generate and return code for a given [`DataTaskNode`](@ref). | ||||
| """ | ||||
| function get_expression(node::DataTaskNode) | ||||
|     @assert length(children(node)) == 1 "Trying to call get_expression on a data task node that has $(length(node.children)) children instead of 1" | ||||
|  | ||||
|     # TODO: dispatch to device implementations generating the copy commands | ||||
|  | ||||
|     child = children(node)[1] | ||||
|     inExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(child.id)))) | ||||
|     outExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(node.id)))) | ||||
|     dataTransportExp = Meta.parse("$outExpr = $inExpr") | ||||
|  | ||||
|     return dataTransportExp | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_init_expression(node::DataTaskNode, device::AbstractDevice) | ||||
|  | ||||
| Generate and return code for the initial input reading expression for [`DataTaskNode`](@ref)s with 0 children, i.e., entry nodes. | ||||
|  | ||||
| See also: [`get_entry_nodes`](@ref) | ||||
| """ | ||||
| function get_init_expression(node::DataTaskNode, device::AbstractDevice) | ||||
|     @assert isempty(children(node)) "Trying to call get_init_expression on a data task node that is not an entry node." | ||||
|  | ||||
|     inExpr = eval(gen_access_expr(device, Symbol("$(to_var_name(node.id))_in"))) | ||||
|     outExpr = eval(gen_access_expr(device, Symbol(to_var_name(node.id)))) | ||||
|     dataTransportExp = Meta.parse("$outExpr = $inExpr") | ||||
|  | ||||
|     return dataTransportExp | ||||
| end | ||||
| @@ -3,8 +3,7 @@ | ||||
|  | ||||
| Fallback implementation of the copy of an abstract data task, throwing an error. | ||||
| """ | ||||
| copy(t::AbstractDataTask) = | ||||
|     error("Need to implement copying for your data tasks!") | ||||
| copy(t::AbstractDataTask) = error("Need to implement copying for your data tasks!") | ||||
|  | ||||
| """ | ||||
|     copy(t::AbstractComputeTask) | ||||
| @@ -12,3 +11,22 @@ copy(t::AbstractDataTask) = | ||||
| Return a copy of the given compute task. | ||||
| """ | ||||
| copy(t::AbstractComputeTask) = typeof(t)() | ||||
|  | ||||
| """ | ||||
|     copy(t::FusedComputeTask) | ||||
|  | ||||
| Return a copy of th egiven [`FusedComputeTask`](@ref). | ||||
| """ | ||||
| function copy(t::FusedComputeTask) | ||||
|     return FusedComputeTask(copy(t.first_task), copy(t.second_task), copy(t.t1_inputs), t.t1_output, copy(t.t2_inputs)) | ||||
| end | ||||
|  | ||||
| function FusedComputeTask( | ||||
|     T1::Type{<:AbstractComputeTask}, | ||||
|     T2::Type{<:AbstractComputeTask}, | ||||
|     t1_inputs::Vector{String}, | ||||
|     t1_output::String, | ||||
|     t2_inputs::Vector{String}, | ||||
| ) | ||||
|     return FusedComputeTask(T1(), T2(), t1_inputs, t1_output, t2_inputs) | ||||
| end | ||||
|   | ||||
| @@ -4,6 +4,5 @@ | ||||
| Print a string representation of the fused compute task to io. | ||||
| """ | ||||
| function show(io::IO, t::FusedComputeTask) | ||||
|     (T1, T2) = get_types(t) | ||||
|     return print(io, "ComputeFuse(", T1(), ", ", T2(), ")") | ||||
|     return print(io, "ComputeFuse($(t.first_task), $(t.second_task))") | ||||
| end | ||||
|   | ||||
| @@ -30,7 +30,7 @@ compute(t::AbstractDataTask; data...) = data | ||||
|  | ||||
| Fallback implementation of the compute effort of a task, throwing an error. | ||||
| """ | ||||
| function compute_effort(t::AbstractTask) | ||||
| function compute_effort(t::AbstractTask)::Float64 | ||||
|     # default implementation using compute | ||||
|     return error("Need to implement compute_effort()") | ||||
| end | ||||
| @@ -40,7 +40,7 @@ end | ||||
|  | ||||
| Fallback implementation of the data of a task, throwing an error. | ||||
| """ | ||||
| function data(t::AbstractTask) | ||||
| function data(t::AbstractTask)::Float64 | ||||
|     return error("Need to implement data()") | ||||
| end | ||||
|  | ||||
| @@ -49,30 +49,29 @@ end | ||||
|  | ||||
| Return the compute effort of a data task, always zero, regardless of the specific task. | ||||
| """ | ||||
| compute_effort(t::AbstractDataTask) = 0 | ||||
| compute_effort(t::AbstractDataTask)::Float64 = 0.0 | ||||
|  | ||||
| """ | ||||
|     data(t::AbstractDataTask) | ||||
|  | ||||
| Return the data of a data task. Given by the task's `.data` field. | ||||
| """ | ||||
| data(t::AbstractDataTask) = getfield(t, :data) | ||||
| data(t::AbstractDataTask)::Float64 = getfield(t, :data) | ||||
|  | ||||
| """ | ||||
|     data(t::AbstractComputeTask) | ||||
|  | ||||
| Return the data of a compute task, always zero, regardless of the specific task. | ||||
| """ | ||||
| data(t::AbstractComputeTask) = 0 | ||||
| data(t::AbstractComputeTask)::Float64 = 0.0 | ||||
|  | ||||
| """ | ||||
|     compute_effort(t::FusedComputeTask) | ||||
|  | ||||
| Return the compute effort of a fused compute task.  | ||||
| """ | ||||
| function compute_effort(t::FusedComputeTask) | ||||
|     (T1, T2) = collect(typeof(t).parameters) | ||||
|     return compute_effort(T1()) + compute_effort(T2()) | ||||
| function compute_effort(t::FusedComputeTask)::Float64 | ||||
|     return compute_effort(t.first_task) + compute_effort(t.second_task) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -80,31 +79,4 @@ end | ||||
|  | ||||
| Return a tuple of a the fused compute task's components' types. | ||||
| """ | ||||
| get_types(::FusedComputeTask{T1, T2}) where {T1, T2} = (T1, T2) | ||||
|  | ||||
| """ | ||||
|     get_expression(t::AbstractTask) | ||||
|  | ||||
| Return an expression evaluating the given task on the :dataIn symbol | ||||
| """ | ||||
| function get_expression(t::AbstractTask) | ||||
|     return quote | ||||
|         dataOut = compute($t, dataIn) | ||||
|     end | ||||
| end | ||||
|  | ||||
| """ | ||||
|     get_expression() | ||||
| """ | ||||
| function get_expression( | ||||
|     t::FusedComputeTask, | ||||
|     inSymbol::Symbol, | ||||
|     outSymbol::Symbol, | ||||
| ) | ||||
|     #TODO | ||||
|     computeExp = quote | ||||
|         $outSymbol = compute($t, $inSymbol) | ||||
|     end | ||||
|  | ||||
|     return computeExp | ||||
| end | ||||
| get_types(t::FusedComputeTask) = (typeof(t.first_task), typeof(t.second_task)) | ||||
|   | ||||
| @@ -26,5 +26,13 @@ A fused compute task made up of the computation of first `T1` and then `T2`. | ||||
|  | ||||
| Also see: [`get_types`](@ref). | ||||
| """ | ||||
| struct FusedComputeTask{T1 <: AbstractComputeTask, T2 <: AbstractComputeTask} <: | ||||
|        AbstractComputeTask end | ||||
| struct FusedComputeTask <: AbstractComputeTask | ||||
|     first_task::AbstractComputeTask | ||||
|     second_task::AbstractComputeTask | ||||
|     # the names of the inputs for T1 | ||||
|     t1_inputs::Vector{Symbol} | ||||
|     # output name of T1 | ||||
|     t1_output::Symbol | ||||
|     # t2_inputs doesn't include the output of t1, that's implicit | ||||
|     t2_inputs::Vector{Symbol} | ||||
| end | ||||
|   | ||||
							
								
								
									
										32
									
								
								src/trie.jl
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								src/trie.jl
									
									
									
									
									
								
							| @@ -3,9 +3,9 @@ | ||||
|  | ||||
| Helper struct for [`NodeTrie`](@ref). After the Trie's first level, every Trie level contains the vector of nodes that had children up to that level, and the TrieNode's children by UUID of the node's children. | ||||
| """ | ||||
| mutable struct NodeIdTrie | ||||
|     value::Vector{Node} | ||||
|     children::Dict{UUID, NodeIdTrie} | ||||
| mutable struct NodeIdTrie{NodeType <: Node} | ||||
|     value::Vector{NodeType} | ||||
|     children::Dict{UUID, NodeIdTrie{NodeType}} | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -35,8 +35,8 @@ end | ||||
|  | ||||
| Constructor for an empty [`NodeIdTrie`](@ref). | ||||
| """ | ||||
| function NodeIdTrie() | ||||
|     return NodeIdTrie(Vector{Node}(), Dict{UUID, NodeIdTrie}()) | ||||
| function NodeIdTrie{NodeType}() where {NodeType <: Node} | ||||
|     return NodeIdTrie(Vector{NodeType}(), Dict{UUID, NodeIdTrie{NodeType}}()) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -44,8 +44,12 @@ end | ||||
|  | ||||
| Insert the given node into the trie. The depth is used to iterate through the trie layers, while the function calls itself recursively until it ran through all children of the node. | ||||
| """ | ||||
| function insert_helper!(trie::NodeIdTrie, node::Node, depth::Int) | ||||
|     if (length(node.children) == depth) | ||||
| function insert_helper!( | ||||
|     trie::NodeIdTrie{NodeType}, | ||||
|     node::NodeType, | ||||
|     depth::Int, | ||||
| ) where {TaskType <: AbstractTask, NodeType <: Union{DataTaskNode{TaskType}, ComputeTaskNode{TaskType}}} | ||||
|     if (length(children(node)) == depth) | ||||
|         push!(trie.value, node) | ||||
|         return nothing | ||||
|     end | ||||
| @@ -54,7 +58,7 @@ function insert_helper!(trie::NodeIdTrie, node::Node, depth::Int) | ||||
|     id = node.children[depth].id | ||||
|  | ||||
|     if (!haskey(trie.children, id)) | ||||
|         trie.children[id] = NodeIdTrie() | ||||
|         trie.children[id] = NodeIdTrie{NodeType}() | ||||
|     end | ||||
|     return insert_helper!(trie.children[id], node, depth) | ||||
| end | ||||
| @@ -64,12 +68,14 @@ end | ||||
|  | ||||
| Insert the given node into the trie. It's sorted by its type in the first layer, then by its children in the following layers. | ||||
| """ | ||||
| function insert!(trie::NodeTrie, node::Node) | ||||
|     t = typeof(node.task) | ||||
|     if (!haskey(trie.children, t)) | ||||
|         trie.children[t] = NodeIdTrie() | ||||
| function insert!( | ||||
|     trie::NodeTrie, | ||||
|     node::NodeType, | ||||
| ) where {TaskType <: AbstractTask, NodeType <: Union{DataTaskNode{TaskType}, ComputeTaskNode{TaskType}}} | ||||
|     if (!haskey(trie.children, NodeType)) | ||||
|         trie.children[NodeType] = NodeIdTrie{NodeType}() | ||||
|     end | ||||
|     return insert_helper!(trie.children[typeof(node.task)], node, 0) | ||||
|     return insert_helper!(trie.children[NodeType], node, 0) | ||||
| end | ||||
|  | ||||
| """ | ||||
|   | ||||
| @@ -36,8 +36,8 @@ Sort the nodes' parents and children vectors. The vectors are mostly very short | ||||
| Sorted nodes are required to make the finding of [`NodeReduction`](@ref)s a lot faster using the [`NodeTrie`](@ref) data structure. | ||||
| """ | ||||
| function sort_node!(node::Node) | ||||
|     sort!(node.children, lt = lt_nodes) | ||||
|     return sort!(node.parents, lt = lt_nodes) | ||||
|     sort!(children(node), lt = lt_nodes) | ||||
|     return sort!(parents(node), lt = lt_nodes) | ||||
| end | ||||
|  | ||||
| """ | ||||
| @@ -87,3 +87,19 @@ Return the memory footprint of the node in Byte. Used in [`mem(graph::DAG)`](@re | ||||
| function mem(node::Node) | ||||
|     return Base.summarysize(node, exclude = Union{Node, Operation}) | ||||
| end | ||||
|  | ||||
| """ | ||||
|     unroll_symbol_vector(vec::Vector{Symbol}) | ||||
|  | ||||
| Return the given vector as single String without quotation marks or brackets. | ||||
| """ | ||||
| function unroll_symbol_vector(vec::Vector) | ||||
|     result = "" | ||||
|     for s in vec | ||||
|         if (result != "") | ||||
|             result *= ", " | ||||
|         end | ||||
|         result *= "$s" | ||||
|     end | ||||
|     return result | ||||
| end | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| [deps] | ||||
| AccurateArithmetic = "22286c92-06ac-501d-9306-4abd417d9753" | ||||
| QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93" | ||||
| Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | ||||
| Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | ||||
|   | ||||
| @@ -2,7 +2,7 @@ using Random | ||||
|  | ||||
| function test_known_graph(name::String, n, fusion_test = true) | ||||
|     @testset "Test $name Graph ($n)" begin | ||||
|         graph = parse_abc(joinpath(@__DIR__, "..", "input", "$name.txt")) | ||||
|         graph = parse_dag(joinpath(@__DIR__, "..", "input", "$name.txt"), ABCModel()) | ||||
|         props = get_properties(graph) | ||||
|  | ||||
|         if (fusion_test) | ||||
|   | ||||
| @@ -5,51 +5,51 @@ import MetagraphOptimization.make_node | ||||
| @testset "Unit Tests Node Reduction" begin | ||||
|     graph = MetagraphOptimization.DAG() | ||||
|  | ||||
|     d_exit = insert_node!(graph, make_node(DataTask(10)), false) | ||||
|     d_exit = insert_node!(graph, make_node(DataTask(10)), track = false) | ||||
|  | ||||
|     s0 = insert_node!(graph, make_node(ComputeTaskS2()), false) | ||||
|     s0 = insert_node!(graph, make_node(ComputeTaskS2()), track = false) | ||||
|  | ||||
|     ED = insert_node!(graph, make_node(DataTask(3)), false) | ||||
|     FD = insert_node!(graph, make_node(DataTask(3)), false) | ||||
|     ED = insert_node!(graph, make_node(DataTask(3)), track = false) | ||||
|     FD = insert_node!(graph, make_node(DataTask(3)), track = false) | ||||
|  | ||||
|     EC = insert_node!(graph, make_node(ComputeTaskV()), false) | ||||
|     FC = insert_node!(graph, make_node(ComputeTaskV()), false) | ||||
|     EC = insert_node!(graph, make_node(ComputeTaskV()), track = false) | ||||
|     FC = insert_node!(graph, make_node(ComputeTaskV()), track = false) | ||||
|  | ||||
|     A1D = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     B1D_1 = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     B1D_2 = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     C1D = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     A1D = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|     B1D_1 = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|     B1D_2 = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|     C1D = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|  | ||||
|     A1C = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     B1C_1 = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     B1C_2 = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     C1C = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     A1C = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|     B1C_1 = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|     B1C_2 = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|     C1C = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|  | ||||
|     AD = insert_node!(graph, make_node(DataTask(5)), false) | ||||
|     BD = insert_node!(graph, make_node(DataTask(5)), false) | ||||
|     CD = insert_node!(graph, make_node(DataTask(5)), false) | ||||
|     AD = insert_node!(graph, make_node(DataTask(5)), track = false) | ||||
|     BD = insert_node!(graph, make_node(DataTask(5)), track = false) | ||||
|     CD = insert_node!(graph, make_node(DataTask(5)), track = false) | ||||
|  | ||||
|     insert_edge!(graph, s0, d_exit, false) | ||||
|     insert_edge!(graph, ED, s0, false) | ||||
|     insert_edge!(graph, FD, s0, false) | ||||
|     insert_edge!(graph, EC, ED, false) | ||||
|     insert_edge!(graph, FC, FD, false) | ||||
|     insert_edge!(graph, s0, d_exit, track = false) | ||||
|     insert_edge!(graph, ED, s0, track = false) | ||||
|     insert_edge!(graph, FD, s0, track = false) | ||||
|     insert_edge!(graph, EC, ED, track = false) | ||||
|     insert_edge!(graph, FC, FD, track = false) | ||||
|  | ||||
|     insert_edge!(graph, A1D, EC, false) | ||||
|     insert_edge!(graph, B1D_1, EC, false) | ||||
|     insert_edge!(graph, A1D, EC, track = false) | ||||
|     insert_edge!(graph, B1D_1, EC, track = false) | ||||
|  | ||||
|     insert_edge!(graph, B1D_2, FC, false) | ||||
|     insert_edge!(graph, C1D, FC, false) | ||||
|     insert_edge!(graph, B1D_2, FC, track = false) | ||||
|     insert_edge!(graph, C1D, FC, track = false) | ||||
|  | ||||
|     insert_edge!(graph, A1C, A1D, false) | ||||
|     insert_edge!(graph, B1C_1, B1D_1, false) | ||||
|     insert_edge!(graph, B1C_2, B1D_2, false) | ||||
|     insert_edge!(graph, C1C, C1D, false) | ||||
|     insert_edge!(graph, A1C, A1D, track = false) | ||||
|     insert_edge!(graph, B1C_1, B1D_1, track = false) | ||||
|     insert_edge!(graph, B1C_2, B1D_2, track = false) | ||||
|     insert_edge!(graph, C1C, C1D, track = false) | ||||
|  | ||||
|     insert_edge!(graph, AD, A1C, false) | ||||
|     insert_edge!(graph, BD, B1C_1, false) | ||||
|     insert_edge!(graph, BD, B1C_2, false) | ||||
|     insert_edge!(graph, CD, C1C, false) | ||||
|     insert_edge!(graph, AD, A1C, track = false) | ||||
|     insert_edge!(graph, BD, B1C_1, track = false) | ||||
|     insert_edge!(graph, BD, B1C_2, track = false) | ||||
|     insert_edge!(graph, CD, C1C, track = false) | ||||
|  | ||||
|     @test is_valid(graph) | ||||
|  | ||||
|   | ||||
| @@ -6,9 +6,12 @@ using Test | ||||
|     include("unit_tests_tasks.jl") | ||||
|     include("unit_tests_nodes.jl") | ||||
|     include("unit_tests_properties.jl") | ||||
|     include("unit_tests_estimator.jl") | ||||
|     include("unit_tests_abcmodel.jl") | ||||
|     include("node_reduction.jl") | ||||
|     include("unit_tests_graph.jl") | ||||
|     include("unit_tests_execution.jl") | ||||
|     include("unit_tests_optimization.jl") | ||||
|  | ||||
|     include("known_graphs.jl") | ||||
| end | ||||
|   | ||||
							
								
								
									
										26
									
								
								test/unit_tests_abcmodel.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								test/unit_tests_abcmodel.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| using MetagraphOptimization | ||||
| using QEDbase | ||||
|  | ||||
| import MetagraphOptimization.interaction_result | ||||
|  | ||||
| def_momentum = SFourMomentum(1.0, 0.0, 0.0, 0.0) | ||||
|  | ||||
| testparticleTypes = [ParticleA, ParticleB, ParticleC] | ||||
| testparticles = [ParticleA(def_momentum), ParticleB(def_momentum), ParticleC(def_momentum)] | ||||
|  | ||||
| @testset "Unit Tests ABC-Model" begin | ||||
|     @testset "Interaction Result" begin | ||||
|         for p1 in testparticleTypes, p2 in testparticleTypes | ||||
|             if (p1 == p2) | ||||
|                 @test_throws AssertionError interaction_result(p1, p2) | ||||
|             else | ||||
|                 @test interaction_result(p1, p2) == setdiff(testparticleTypes, [p1, p2])[1] | ||||
|             end | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @testset "Vertex" begin | ||||
|         @test isapprox(MetagraphOptimization.vertex(), 1 / 137.0) | ||||
|     end | ||||
| end | ||||
| println("ABC-Model Unit Tests Complete!") | ||||
							
								
								
									
										92
									
								
								test/unit_tests_estimator.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								test/unit_tests_estimator.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| function test_op_specific(estimator, graph, nf::NodeFusion) | ||||
|     estimate = operation_effect(estimator, graph, nf) | ||||
|     data_reduce = data(nf.input[2].task) | ||||
|  | ||||
|     @test isapprox(estimate.data, -data_reduce) | ||||
|     @test isapprox(estimate.computeEffort, 0; atol = eps(Float64)) | ||||
|     @test isapprox(estimate.computeIntensity, 0; atol = eps(Float64)) | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function test_op_specific(estimator, graph, nr::NodeReduction) | ||||
|     estimate = operation_effect(estimator, graph, nr) | ||||
|  | ||||
|     data_reduce = data(nr.input[1].task) * (length(nr.input) - 1) | ||||
|     compute_effort_reduce = compute_effort(nr.input[1].task) * (length(nr.input) - 1) | ||||
|  | ||||
|     @test isapprox(estimate.data, -data_reduce; atol = eps(Float64)) | ||||
|     @test isapprox(estimate.computeEffort, -compute_effort_reduce) | ||||
|     @test isapprox(estimate.computeIntensity, compute_effort_reduce / data_reduce) | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function test_op_specific(estimator, graph, ns::NodeSplit) | ||||
|     estimate = operation_effect(estimator, graph, ns) | ||||
|  | ||||
|     copies = length(ns.input.parents) - 1 | ||||
|  | ||||
|     data_increase = data(ns.input.task) * copies | ||||
|     compute_effort_increase = compute_effort(ns.input.task) * copies | ||||
|  | ||||
|     @test isapprox(estimate.data, data_increase; atol = eps(Float64)) | ||||
|     @test isapprox(estimate.computeEffort, compute_effort_increase) | ||||
|     @test isapprox(estimate.computeIntensity, compute_effort_increase / data_increase) | ||||
|  | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function test_op(estimator, graph, op) | ||||
|     estimate_before = graph_cost(estimator, graph) | ||||
|  | ||||
|     estimate = operation_effect(estimator, graph, op) | ||||
|  | ||||
|     push_operation!(graph, op) | ||||
|     estimate_after_apply = graph_cost(estimator, graph) | ||||
|     reset_graph!(graph) | ||||
|  | ||||
|     @test isapprox((estimate_before + estimate).data, estimate_after_apply.data) | ||||
|     @test isapprox((estimate_before + estimate).computeEffort, estimate_after_apply.computeEffort) | ||||
|     @test isapprox((estimate_before + estimate).computeIntensity, estimate_after_apply.computeIntensity) | ||||
|  | ||||
|     test_op_specific(estimator, graph, op) | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| @testset "Unit Tests Estimator" begin | ||||
|     @testset "Global Metric Estimator" for (graph_string, exp_data, exp_computeEffort) in | ||||
|                                            zip(["AB->AB", "AB->ABBB"], [976, 10944], [53, 1075]) | ||||
|         estimator = GlobalMetricEstimator() | ||||
|  | ||||
|         @test cost_type(estimator) == CDCost | ||||
|  | ||||
|         graph = parse_dag(joinpath(@__DIR__, "..", "input", "$(graph_string).txt"), ABCModel()) | ||||
|  | ||||
|         @testset "Graph Cost" begin | ||||
|             estimate = graph_cost(estimator, graph) | ||||
|  | ||||
|             @test estimate.data == exp_data | ||||
|             @test estimate.computeEffort == exp_computeEffort | ||||
|             @test isapprox(estimate.computeIntensity, exp_computeEffort / exp_data) | ||||
|         end | ||||
|  | ||||
|         @testset "Operation Cost" begin | ||||
|             ops = get_operations(graph) | ||||
|             nfs = copy(ops.nodeFusions) | ||||
|             nrs = copy(ops.nodeReductions) | ||||
|             nss = copy(ops.nodeSplits) | ||||
|  | ||||
|             for nf in nfs | ||||
|                 test_op(estimator, graph, nf) | ||||
|             end | ||||
|             for nr in nrs | ||||
|                 test_op(estimator, graph, nr) | ||||
|             end | ||||
|             for ns in nss | ||||
|                 test_op(estimator, graph, ns) | ||||
|             end | ||||
|         end | ||||
|     end | ||||
| end | ||||
| println("Estimator Unit Tests Complete!") | ||||
| @@ -1,31 +1,219 @@ | ||||
| import MetagraphOptimization.A | ||||
| import MetagraphOptimization.B | ||||
| import MetagraphOptimization.ParticleType | ||||
| import MetagraphOptimization.ABCParticle | ||||
| import MetagraphOptimization.interaction_result | ||||
|  | ||||
| @testset "Unit Tests Graph" begin | ||||
|     particles = Dict{ParticleType, Vector{Particle}}( | ||||
|         ( | ||||
|             A => [ | ||||
|                 Particle(0.823648, 0.0, 0.0, 0.823648, A), | ||||
|                 Particle(0.823648, -0.835061, -0.474802, 0.277915, A), | ||||
|             ] | ||||
|         ), | ||||
|         ( | ||||
|             B => [ | ||||
|                 Particle(0.823648, 0.0, 0.0, -0.823648, B), | ||||
|                 Particle(0.823648, 0.835061, 0.474802, -0.277915, B), | ||||
|             ] | ||||
|         ), | ||||
| using QEDbase | ||||
| using AccurateArithmetic | ||||
| using Random | ||||
|  | ||||
| const RTOL = sqrt(eps(Float64)) | ||||
| RNG = Random.default_rng() | ||||
|  | ||||
| function check_particle_reverse_moment(p1::SFourMomentum, p2::SFourMomentum) | ||||
|     @test isapprox(abs(p1.E), abs(p2.E)) | ||||
|     @test isapprox(p1.px, -p2.px) | ||||
|     @test isapprox(p1.py, -p2.py) | ||||
|     @test isapprox(p1.pz, -p2.pz) | ||||
|     return nothing | ||||
| end | ||||
|  | ||||
| function ground_truth_graph_result(input::ABCProcessInput) | ||||
|     # formula for one diagram: | ||||
|     # u_Bp * iλ * u_Ap * S_C * u_B * iλ * u_A | ||||
|     # for the second diagram: | ||||
|     # u_B * iλ * u_Ap * S_C * u_Bp * iλ * u_Ap | ||||
|     # the "u"s are all 1, we ignore the i, λ is 1/137. | ||||
|  | ||||
|     constant = (1 / 137.0)^2 | ||||
|  | ||||
|     # calculate particle C in diagram 1 | ||||
|     diagram1_C = ParticleC(input.inParticles[1].momentum + input.inParticles[2].momentum) | ||||
|     diagram2_C = ParticleC(input.inParticles[1].momentum + input.outParticles[2].momentum) | ||||
|  | ||||
|     diagram1_Cp = ParticleC(input.outParticles[1].momentum + input.outParticles[2].momentum) | ||||
|     diagram2_Cp = ParticleC(input.outParticles[1].momentum + input.inParticles[2].momentum) | ||||
|  | ||||
|     check_particle_reverse_moment(diagram1_Cp.momentum, diagram1_C.momentum) | ||||
|     check_particle_reverse_moment(diagram2_Cp.momentum, diagram2_C.momentum) | ||||
|     @test isapprox(getMass2(diagram1_C.momentum), getMass2(diagram1_Cp.momentum)) | ||||
|     @test isapprox(getMass2(diagram2_C.momentum), getMass2(diagram2_Cp.momentum)) | ||||
|  | ||||
|     inner1 = MetagraphOptimization.inner_edge(diagram1_C) | ||||
|     inner2 = MetagraphOptimization.inner_edge(diagram2_C) | ||||
|  | ||||
|     diagram1_result = inner1 * constant | ||||
|     diagram2_result = inner2 * constant | ||||
|  | ||||
|     return sum_kbn([diagram1_result, diagram2_result]) | ||||
| end | ||||
|  | ||||
| @testset "Unit Tests Execution" begin | ||||
|     machine = get_machine_info() | ||||
|  | ||||
|     process_2_2 = ABCProcessDescription( | ||||
|         Dict{Type, Int64}(ParticleA => 1, ParticleB => 1), | ||||
|         Dict{Type, Int64}(ParticleA => 1, ParticleB => 1), | ||||
|     ) | ||||
|  | ||||
|     expected_result = 5.5320567694746876e-5 | ||||
|     particles_2_2 = ABCProcessInput( | ||||
|         process_2_2, | ||||
|         ABCParticle[ | ||||
|             ParticleA(SFourMomentum(0.823648, 0.0, 0.0, 0.823648)), | ||||
|             ParticleB(SFourMomentum(0.823648, 0.0, 0.0, -0.823648)), | ||||
|         ], | ||||
|         ABCParticle[ | ||||
|             ParticleA(SFourMomentum(0.823648, -0.835061, -0.474802, 0.277915)), | ||||
|             ParticleB(SFourMomentum(0.823648, 0.835061, 0.474802, -0.277915)), | ||||
|         ], | ||||
|     ) | ||||
|     expected_result = ground_truth_graph_result(particles_2_2) | ||||
|  | ||||
|     for _ in 1:10   # test in a loop because graph layout should not change the result | ||||
|         graph = parse_abc(joinpath(@__DIR__, "..", "input", "AB->AB.txt")) | ||||
|         @test isapprox(execute(graph, particles), expected_result; rtol = 0.001) | ||||
|     @testset "AB->AB no optimization" begin | ||||
|         for _ in 1:10   # test in a loop because graph layout should not change the result | ||||
|             graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel()) | ||||
|             @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL) | ||||
|  | ||||
|         code = MetagraphOptimization.gen_code(graph) | ||||
|         @test isapprox(execute(code, particles), expected_result; rtol = 0.001) | ||||
|             # graph should be fully scheduled after being executed | ||||
|             @test is_scheduled(graph) | ||||
|  | ||||
|             func = get_compute_function(graph, process_2_2, machine) | ||||
|             @test isapprox(func(particles_2_2), expected_result; rtol = RTOL) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @testset "AB->AB after random walk" begin | ||||
|         for i in 1:200 | ||||
|             graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel()) | ||||
|             optimize!(RandomWalkOptimizer(RNG), graph, 50) | ||||
|  | ||||
|             @test is_valid(graph) | ||||
|  | ||||
|             @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL) | ||||
|  | ||||
|             # graph should be fully scheduled after being executed | ||||
|             @test is_scheduled(graph) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     process_2_4 = ABCProcessDescription( | ||||
|         Dict{Type, Int64}(ParticleA => 1, ParticleB => 1), | ||||
|         Dict{Type, Int64}(ParticleA => 1, ParticleB => 3), | ||||
|     ) | ||||
|     particles_2_4 = gen_process_input(process_2_4) | ||||
|     graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel()) | ||||
|     expected_result = execute(graph, process_2_4, machine, particles_2_4) | ||||
|  | ||||
|     @testset "AB->ABBB no optimization" begin | ||||
|         for _ in 1:5   # test in a loop because graph layout should not change the result | ||||
|             graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel()) | ||||
|             @test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = RTOL) | ||||
|  | ||||
|             func = get_compute_function(graph, process_2_4, machine) | ||||
|             @test isapprox(func(particles_2_4), expected_result; rtol = RTOL) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @testset "AB->ABBB after random walk" begin | ||||
|         for i in 1:50 | ||||
|             graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel()) | ||||
|             optimize!(RandomWalkOptimizer(RNG), graph, 100) | ||||
|             @test is_valid(graph) | ||||
|  | ||||
|             @test isapprox(execute(graph, process_2_4, machine, particles_2_4), expected_result; rtol = RTOL) | ||||
|         end | ||||
|     end | ||||
|  | ||||
|     @testset "AB->AB large sum fusion" for _ in 1:20 | ||||
|         graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel()) | ||||
|  | ||||
|         # push a fusion with the sum node | ||||
|         ops = get_operations(graph) | ||||
|         for fusion in ops.nodeFusions | ||||
|             if isa(fusion.input[3].task, ComputeTaskSum) | ||||
|                 push_operation!(graph, fusion) | ||||
|                 break | ||||
|             end | ||||
|         end | ||||
|  | ||||
|         # push two more fusions with the fused node | ||||
|         for _ in 1:15 | ||||
|             ops = get_operations(graph) | ||||
|             for fusion in ops.nodeFusions | ||||
|                 if isa(fusion.input[3].task, FusedComputeTask) | ||||
|                     push_operation!(graph, fusion) | ||||
|                     break | ||||
|                 end | ||||
|             end | ||||
|         end | ||||
|  | ||||
|         # try execute | ||||
|         @test is_valid(graph) | ||||
|         expected_result = ground_truth_graph_result(particles_2_2) | ||||
|         @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL) | ||||
|     end | ||||
|  | ||||
|  | ||||
|     @testset "AB->AB large sum fusion" for _ in 1:20 | ||||
|         graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel()) | ||||
|  | ||||
|         # push a fusion with the sum node | ||||
|         ops = get_operations(graph) | ||||
|         for fusion in ops.nodeFusions | ||||
|             if isa(fusion.input[3].task, ComputeTaskSum) | ||||
|                 push_operation!(graph, fusion) | ||||
|                 break | ||||
|             end | ||||
|         end | ||||
|  | ||||
|         # push two more fusions with the fused node | ||||
|         for _ in 1:15 | ||||
|             ops = get_operations(graph) | ||||
|             for fusion in ops.nodeFusions | ||||
|                 if isa(fusion.input[3].task, FusedComputeTask) | ||||
|                     push_operation!(graph, fusion) | ||||
|                     break | ||||
|                 end | ||||
|             end | ||||
|         end | ||||
|  | ||||
|         # try execute | ||||
|         @test is_valid(graph) | ||||
|         expected_result = ground_truth_graph_result(particles_2_2) | ||||
|         @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL) | ||||
|     end | ||||
|  | ||||
|     @testset "AB->AB fusion edge case" for _ in 1:20 | ||||
|         graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->AB.txt"), ABCModel()) | ||||
|  | ||||
|         # push two fusions with ComputeTaskV | ||||
|         for _ in 1:2 | ||||
|             ops = get_operations(graph) | ||||
|             for fusion in ops.nodeFusions | ||||
|                 if isa(fusion.input[1].task, ComputeTaskV) | ||||
|                     push_operation!(graph, fusion) | ||||
|                     break | ||||
|                 end | ||||
|             end | ||||
|         end | ||||
|  | ||||
|         # push fusions until the end | ||||
|         cont = true | ||||
|         while cont | ||||
|             cont = false | ||||
|             ops = get_operations(graph) | ||||
|             for fusion in ops.nodeFusions | ||||
|                 if isa(fusion.input[1].task, FusedComputeTask) | ||||
|                     push_operation!(graph, fusion) | ||||
|                     cont = true | ||||
|                     break | ||||
|                 end | ||||
|             end | ||||
|         end | ||||
|  | ||||
|         # try execute | ||||
|         @test is_valid(graph) | ||||
|         expected_result = ground_truth_graph_result(particles_2_2) | ||||
|         @test isapprox(execute(graph, process_2_2, machine, particles_2_2), expected_result; rtol = RTOL) | ||||
|     end | ||||
|  | ||||
| end | ||||
| println("Execution Unit Tests Complete!") | ||||
|   | ||||
| @@ -11,104 +11,101 @@ import MetagraphOptimization.partners | ||||
|     @test length(graph.appliedOperations) == 0 | ||||
|     @test length(graph.operationsToApply) == 0 | ||||
|     @test length(graph.dirtyNodes) == 0 | ||||
|     @test length(graph.diff) == | ||||
|           (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0) | ||||
|     @test length(get_operations(graph)) == | ||||
|           (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0) | ||||
|     @test length(get_operations(graph)) == (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0) | ||||
|  | ||||
|     # s to output (exit node) | ||||
|     d_exit = insert_node!(graph, make_node(DataTask(10)), false) | ||||
|     d_exit = insert_node!(graph, make_node(DataTask(10)), track = false) | ||||
|  | ||||
|     @test length(graph.nodes) == 1 | ||||
|     @test length(graph.dirtyNodes) == 1 | ||||
|  | ||||
|     # final s compute | ||||
|     s0 = insert_node!(graph, make_node(ComputeTaskS2()), false) | ||||
|     s0 = insert_node!(graph, make_node(ComputeTaskS2()), track = false) | ||||
|  | ||||
|     @test length(graph.nodes) == 2 | ||||
|     @test length(graph.dirtyNodes) == 2 | ||||
|  | ||||
|     # data from v0 and v1 to s0 | ||||
|     d_v0_s0 = insert_node!(graph, make_node(DataTask(5)), false) | ||||
|     d_v1_s0 = insert_node!(graph, make_node(DataTask(5)), false) | ||||
|     d_v0_s0 = insert_node!(graph, make_node(DataTask(5)), track = false) | ||||
|     d_v1_s0 = insert_node!(graph, make_node(DataTask(5)), track = false) | ||||
|  | ||||
|     # v0 and v1 compute | ||||
|     v0 = insert_node!(graph, make_node(ComputeTaskV()), false) | ||||
|     v1 = insert_node!(graph, make_node(ComputeTaskV()), false) | ||||
|     v0 = insert_node!(graph, make_node(ComputeTaskV()), track = false) | ||||
|     v1 = insert_node!(graph, make_node(ComputeTaskV()), track = false) | ||||
|  | ||||
|     # data from uB, uA, uBp and uAp to v0 and v1 | ||||
|     d_uB_v0 = insert_node!(graph, make_node(DataTask(3)), false) | ||||
|     d_uA_v0 = insert_node!(graph, make_node(DataTask(3)), false) | ||||
|     d_uBp_v1 = insert_node!(graph, make_node(DataTask(3)), false) | ||||
|     d_uAp_v1 = insert_node!(graph, make_node(DataTask(3)), false) | ||||
|     d_uB_v0 = insert_node!(graph, make_node(DataTask(3)), track = false) | ||||
|     d_uA_v0 = insert_node!(graph, make_node(DataTask(3)), track = false) | ||||
|     d_uBp_v1 = insert_node!(graph, make_node(DataTask(3)), track = false) | ||||
|     d_uAp_v1 = insert_node!(graph, make_node(DataTask(3)), track = false) | ||||
|  | ||||
|     # uB, uA, uBp and uAp computes | ||||
|     uB = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     uA = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     uBp = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     uAp = insert_node!(graph, make_node(ComputeTaskU()), false) | ||||
|     uB = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|     uA = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|     uBp = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|     uAp = insert_node!(graph, make_node(ComputeTaskU()), track = false) | ||||
|  | ||||
|     # data from PB, PA, PBp and PAp to uB, uA, uBp and uAp | ||||
|     d_PB_uB = insert_node!(graph, make_node(DataTask(6)), false) | ||||
|     d_PA_uA = insert_node!(graph, make_node(DataTask(6)), false) | ||||
|     d_PBp_uBp = insert_node!(graph, make_node(DataTask(6)), false) | ||||
|     d_PAp_uAp = insert_node!(graph, make_node(DataTask(6)), false) | ||||
|     d_PB_uB = insert_node!(graph, make_node(DataTask(6)), track = false) | ||||
|     d_PA_uA = insert_node!(graph, make_node(DataTask(6)), track = false) | ||||
|     d_PBp_uBp = insert_node!(graph, make_node(DataTask(6)), track = false) | ||||
|     d_PAp_uAp = insert_node!(graph, make_node(DataTask(6)), track = false) | ||||
|  | ||||
|     # P computes PB, PA, PBp and PAp | ||||
|     PB = insert_node!(graph, make_node(ComputeTaskP()), false) | ||||
|     PA = insert_node!(graph, make_node(ComputeTaskP()), false) | ||||
|     PBp = insert_node!(graph, make_node(ComputeTaskP()), false) | ||||
|     PAp = insert_node!(graph, make_node(ComputeTaskP()), false) | ||||
|     PB = insert_node!(graph, make_node(ComputeTaskP()), track = false) | ||||
|     PA = insert_node!(graph, make_node(ComputeTaskP()), track = false) | ||||
|     PBp = insert_node!(graph, make_node(ComputeTaskP()), track = false) | ||||
|     PAp = insert_node!(graph, make_node(ComputeTaskP()), track = false) | ||||
|  | ||||
|     # entry nodes getting data for P computes | ||||
|     d_PB = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     d_PA = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     d_PBp = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     d_PAp = insert_node!(graph, make_node(DataTask(4)), false) | ||||
|     d_PB = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|     d_PA = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|     d_PBp = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|     d_PAp = insert_node!(graph, make_node(DataTask(4)), track = false) | ||||
|  | ||||
|     @test length(graph.nodes) == 26 | ||||
|     @test length(graph.dirtyNodes) == 26 | ||||
|  | ||||
|     # now for all the edges | ||||
|     insert_edge!(graph, d_PB, PB, false) | ||||
|     insert_edge!(graph, d_PA, PA, false) | ||||
|     insert_edge!(graph, d_PBp, PBp, false) | ||||
|     insert_edge!(graph, d_PAp, PAp, false) | ||||
|     insert_edge!(graph, d_PB, PB, track = false) | ||||
|     insert_edge!(graph, d_PA, PA, track = false) | ||||
|     insert_edge!(graph, d_PBp, PBp, track = false) | ||||
|     insert_edge!(graph, d_PAp, PAp, track = false) | ||||
|  | ||||
|     insert_edge!(graph, PB, d_PB_uB, false) | ||||
|     insert_edge!(graph, PA, d_PA_uA, false) | ||||
|     insert_edge!(graph, PBp, d_PBp_uBp, false) | ||||
|     insert_edge!(graph, PAp, d_PAp_uAp, false) | ||||
|     insert_edge!(graph, PB, d_PB_uB, track = false) | ||||
|     insert_edge!(graph, PA, d_PA_uA, track = false) | ||||
|     insert_edge!(graph, PBp, d_PBp_uBp, track = false) | ||||
|     insert_edge!(graph, PAp, d_PAp_uAp, track = false) | ||||
|  | ||||
|     insert_edge!(graph, d_PB_uB, uB, false) | ||||
|     insert_edge!(graph, d_PA_uA, uA, false) | ||||
|     insert_edge!(graph, d_PBp_uBp, uBp, false) | ||||
|     insert_edge!(graph, d_PAp_uAp, uAp, false) | ||||
|     insert_edge!(graph, d_PB_uB, uB, track = false) | ||||
|     insert_edge!(graph, d_PA_uA, uA, track = false) | ||||
|     insert_edge!(graph, d_PBp_uBp, uBp, track = false) | ||||
|     insert_edge!(graph, d_PAp_uAp, uAp, track = false) | ||||
|  | ||||
|     insert_edge!(graph, uB, d_uB_v0, false) | ||||
|     insert_edge!(graph, uA, d_uA_v0, false) | ||||
|     insert_edge!(graph, uBp, d_uBp_v1, false) | ||||
|     insert_edge!(graph, uAp, d_uAp_v1, false) | ||||
|     insert_edge!(graph, uB, d_uB_v0, track = false) | ||||
|     insert_edge!(graph, uA, d_uA_v0, track = false) | ||||
|     insert_edge!(graph, uBp, d_uBp_v1, track = false) | ||||
|     insert_edge!(graph, uAp, d_uAp_v1, track = false) | ||||
|  | ||||
|     insert_edge!(graph, d_uB_v0, v0, false) | ||||
|     insert_edge!(graph, d_uA_v0, v0, false) | ||||
|     insert_edge!(graph, d_uBp_v1, v1, false) | ||||
|     insert_edge!(graph, d_uAp_v1, v1, false) | ||||
|     insert_edge!(graph, d_uB_v0, v0, track = false) | ||||
|     insert_edge!(graph, d_uA_v0, v0, track = false) | ||||
|     insert_edge!(graph, d_uBp_v1, v1, track = false) | ||||
|     insert_edge!(graph, d_uAp_v1, v1, track = false) | ||||
|  | ||||
|     insert_edge!(graph, v0, d_v0_s0, false) | ||||
|     insert_edge!(graph, v1, d_v1_s0, false) | ||||
|     insert_edge!(graph, v0, d_v0_s0, track = false) | ||||
|     insert_edge!(graph, v1, d_v1_s0, track = false) | ||||
|  | ||||
|     insert_edge!(graph, d_v0_s0, s0, false) | ||||
|     insert_edge!(graph, d_v1_s0, s0, false) | ||||
|     insert_edge!(graph, d_v0_s0, s0, track = false) | ||||
|     insert_edge!(graph, d_v1_s0, s0, track = false) | ||||
|  | ||||
|     insert_edge!(graph, s0, d_exit, false) | ||||
|     insert_edge!(graph, s0, d_exit, track = false) | ||||
|  | ||||
|     @test length(graph.nodes) == 26 | ||||
|     @test length(graph.appliedOperations) == 0 | ||||
|     @test length(graph.operationsToApply) == 0 | ||||
|     @test length(graph.dirtyNodes) == 26 | ||||
|     @test length(graph.diff) == | ||||
|           (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0) | ||||
|     @test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0) | ||||
|  | ||||
|     @test is_valid(graph) | ||||
|  | ||||
| @@ -135,10 +132,15 @@ import MetagraphOptimization.partners | ||||
|     @test length(siblings(s0)) == 1 | ||||
|  | ||||
|     operations = get_operations(graph) | ||||
|     @test length(operations) == | ||||
|           (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(operations) == (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(graph.dirtyNodes) == 0 | ||||
|  | ||||
|     i = 0 | ||||
|     for op in operations | ||||
|         i += 1 | ||||
|     end | ||||
|     @test i == 10 | ||||
|  | ||||
|     @test operations == get_operations(graph) | ||||
|     nf = first(operations.nodeFusions) | ||||
|  | ||||
| @@ -157,8 +159,7 @@ import MetagraphOptimization.partners | ||||
|     @test length(graph.operationsToApply) == 1 | ||||
|     @test first(graph.operationsToApply) == nf | ||||
|     @test length(graph.dirtyNodes) == 0 | ||||
|     @test length(graph.diff) == | ||||
|           (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0) | ||||
|     @test length(graph.diff) == (addedNodes = 0, removedNodes = 0, addedEdges = 0, removedEdges = 0) | ||||
|  | ||||
|     # this applies pending operations | ||||
|     properties = get_properties(graph) | ||||
| @@ -176,8 +177,7 @@ import MetagraphOptimization.partners | ||||
|     operations = get_operations(graph) | ||||
|     @test length(graph.dirtyNodes) == 0 | ||||
|  | ||||
|     @test length(operations) == | ||||
|           (nodeFusions = 9, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(operations) == (nodeFusions = 9, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test !isempty(operations) | ||||
|  | ||||
|     possibleNF = 9 | ||||
| @@ -185,14 +185,12 @@ import MetagraphOptimization.partners | ||||
|         push_operation!(graph, first(operations.nodeFusions)) | ||||
|         operations = get_operations(graph) | ||||
|         possibleNF = possibleNF - 1 | ||||
|         @test length(operations) == | ||||
|               (nodeFusions = possibleNF, nodeReductions = 0, nodeSplits = 0) | ||||
|         @test length(operations) == (nodeFusions = possibleNF, nodeReductions = 0, nodeSplits = 0) | ||||
|     end | ||||
|  | ||||
|     @test isempty(operations) | ||||
|  | ||||
|     @test length(operations) == | ||||
|           (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(operations) == (nodeFusions = 0, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(graph.dirtyNodes) == 0 | ||||
|     @test length(graph.nodes) == 6 | ||||
|     @test length(graph.appliedOperations) == 10 | ||||
| @@ -213,8 +211,7 @@ import MetagraphOptimization.partners | ||||
|     @test properties.computeIntensity ≈ 28 / 62 | ||||
|  | ||||
|     operations = get_operations(graph) | ||||
|     @test length(operations) == | ||||
|           (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0) | ||||
|     @test length(operations) == (nodeFusions = 10, nodeReductions = 0, nodeSplits = 0) | ||||
|  | ||||
|     @test is_valid(graph) | ||||
| end | ||||
|   | ||||
| @@ -3,8 +3,7 @@ | ||||
|     nC1 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskU()) | ||||
|     nC2 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskV()) | ||||
|     nC3 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskP()) | ||||
|     nC4 = | ||||
|         MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskSum()) | ||||
|     nC4 = MetagraphOptimization.make_node(MetagraphOptimization.ComputeTaskSum()) | ||||
|  | ||||
|     nD1 = MetagraphOptimization.make_node(MetagraphOptimization.DataTask(10)) | ||||
|     nD2 = MetagraphOptimization.make_node(MetagraphOptimization.DataTask(20)) | ||||
|   | ||||
							
								
								
									
										42
									
								
								test/unit_tests_optimization.jl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								test/unit_tests_optimization.jl
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| using Random | ||||
|  | ||||
| RNG = Random.default_rng() | ||||
|  | ||||
| @testset "Unit Tests Optimization" begin | ||||
|     graph = parse_dag(joinpath(@__DIR__, "..", "input", "AB->ABBB.txt"), ABCModel()) | ||||
|  | ||||
|     # create the optimizers | ||||
|     FIXPOINT_OPTIMIZERS = [GreedyOptimizer(GlobalMetricEstimator()), ReductionOptimizer()] | ||||
|     NO_FIXPOINT_OPTIMIZERS = [RandomWalkOptimizer(RNG)] | ||||
|  | ||||
|     @testset "Optimizer $optimizer" for optimizer in vcat(NO_FIXPOINT_OPTIMIZERS, FIXPOINT_OPTIMIZERS) | ||||
|         @test operation_stack_length(graph) == 0 | ||||
|         @test optimize_step!(optimizer, graph) | ||||
|  | ||||
|         @test !fixpoint_reached(optimizer, graph) | ||||
|         @test operation_stack_length(graph) == 1 | ||||
|  | ||||
|         @test optimize!(optimizer, graph, 10) | ||||
|  | ||||
|         @test !fixpoint_reached(optimizer, graph) | ||||
|  | ||||
|         reset_graph!(graph) | ||||
|     end | ||||
|  | ||||
|     @testset "Fixpoint optimizer $optimizer" for optimizer in FIXPOINT_OPTIMIZERS | ||||
|         @test operation_stack_length(graph) == 0 | ||||
|  | ||||
|         optimize_to_fixpoint!(optimizer, graph) | ||||
|  | ||||
|         @test fixpoint_reached(optimizer, graph) | ||||
|         @test !optimize_step!(optimizer, graph) | ||||
|         @test !optimize!(optimizer, graph, 10) | ||||
|  | ||||
|         reset_graph!(graph) | ||||
|     end | ||||
|  | ||||
|     @testset "No fixpoint optimizer $optimizer" for optimizer in NO_FIXPOINT_OPTIMIZERS | ||||
|         @test_throws MethodError optimize_to_fixpoint!(optimizer, graph) | ||||
|     end | ||||
| end | ||||
| println("Optimization Unit Tests Complete!") | ||||
| @@ -5,18 +5,10 @@ | ||||
|     @test prop.data == 0.0 | ||||
|     @test prop.computeEffort == 0.0 | ||||
|     @test prop.computeIntensity == 0.0 | ||||
|     @test prop.cost == 0.0 | ||||
|     @test prop.noNodes == 0.0 | ||||
|     @test prop.noEdges == 0.0 | ||||
|  | ||||
|     prop2 = ( | ||||
|         data = 5.0, | ||||
|         computeEffort = 6.0, | ||||
|         computeIntensity = 6.0 / 5.0, | ||||
|         cost = 0.0, | ||||
|         noNodes = 2, | ||||
|         noEdges = 3, | ||||
|     )::GraphProperties | ||||
|     prop2 = (data = 5.0, computeEffort = 6.0, computeIntensity = 6.0 / 5.0, noNodes = 2, noEdges = 3)::GraphProperties | ||||
|  | ||||
|     @test prop + prop2 == prop2 | ||||
|     @test prop2 - prop == prop2 | ||||
| @@ -25,27 +17,18 @@ | ||||
|     @test negProp.data == -5.0 | ||||
|     @test negProp.computeEffort == -6.0 | ||||
|     @test negProp.computeIntensity == 6.0 / 5.0 | ||||
|     @test negProp.cost == 0.0 | ||||
|     @test negProp.noNodes == -2 | ||||
|     @test negProp.noEdges == -3 | ||||
|  | ||||
|     @test negProp + prop2 == GraphProperties() | ||||
|  | ||||
|     prop3 = ( | ||||
|         data = 7.0, | ||||
|         computeEffort = 3.0, | ||||
|         computeIntensity = 7.0 / 3.0, | ||||
|         cost = 0.0, | ||||
|         noNodes = -3, | ||||
|         noEdges = 2, | ||||
|     )::GraphProperties | ||||
|     prop3 = (data = 7.0, computeEffort = 3.0, computeIntensity = 7.0 / 3.0, noNodes = -3, noEdges = 2)::GraphProperties | ||||
|  | ||||
|     propSum = prop2 + prop3 | ||||
|  | ||||
|     @test propSum.data == 12.0 | ||||
|     @test propSum.computeEffort == 9.0 | ||||
|     @test propSum.computeIntensity == 9.0 / 12.0 | ||||
|     @test propSum.cost == 0.0 | ||||
|     @test propSum.noNodes == -1 | ||||
|     @test propSum.noEdges == 5 | ||||
| end | ||||
|   | ||||
| @@ -5,9 +5,7 @@ | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(1025) == "1.001 KiB" | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(684235) == "668.2 KiB" | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(86214576) == "82.22 MiB" | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(9241457698) == | ||||
|           "8.607 GiB" | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(3218598654367) == | ||||
|           "2.927 TiB" | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(9241457698) == "8.607 GiB" | ||||
|     @test MetagraphOptimization.bytes_to_human_readable(3218598654367) == "2.927 TiB" | ||||
| end | ||||
| println("Utility Unit Tests Complete!") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user