Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
9cd2bcdba2 | |||
82ed774b7e | |||
92e0eeaaef | |||
c90346e948 |
@ -22,7 +22,9 @@ jobs:
|
||||
version: '1.9.2'
|
||||
|
||||
- name: Instantiate
|
||||
run: julia --project=./ -e 'using Pkg; Pkg.instantiate()'
|
||||
run: |
|
||||
julia --project=./ -e 'using Pkg; Pkg.instantiate()'
|
||||
julia --project=./ -e 'using Pkg; Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")'
|
||||
|
||||
- name: Format check
|
||||
run: |
|
||||
@ -32,7 +34,7 @@ jobs:
|
||||
if out == ""
|
||||
exit(0)
|
||||
else
|
||||
@error "Some files have not been formatted !!!"
|
||||
@error "Some files have not been formatted!!!"
|
||||
write(stdout, out)
|
||||
exit(1)
|
||||
end'
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -5,6 +5,7 @@
|
||||
|
||||
# Files generated by invoking Julia with --track-allocation
|
||||
*.mem
|
||||
*.pb.gz
|
||||
|
||||
# System-specific files and directories generated by the BinaryProvider and BinDeps packages
|
||||
# They contain absolute paths specific to the host computer, and so should not be committed
|
||||
|
@ -15,6 +15,7 @@ QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
|
||||
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
|
||||
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[extras]
|
||||
|
16
data/qed_ke-kke_reduction_optimizer.csv
Normal file
16
data/qed_ke-kke_reduction_optimizer.csv
Normal file
@ -0,0 +1,16 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,77,101,252.0,6240.0,0.04038461538461539,0.02087051,8.691e-6,3.405098066,0.244763721,1.565749515,0.936213163
|
||||
1,76,99,246.0,6240.0,0.03942307692307692,0.020658734,9.36e-6,3.244313848,0.230460257,1.548012602,0.887605389
|
||||
2,75,97,240.0,6240.0,0.038461538461538464,0.045333482,8.74e-6,3.163679857,0.217614064,1.52780456,0.816496837
|
||||
3,74,95,234.0,6240.0,0.0375,0.020314034,9.081e-6,2.956421016,0.183415997,1.524262179,0.793770075
|
||||
4,73,93,228.0,6240.0,0.03653846153846154,0.033579409,8.52e-6,2.845414866,0.19168374,1.50907807,0.742734411
|
||||
5,72,92,228.0,6144.0,0.037109375,0.019736718,8.87e-6,2.827109937,0.207452606,1.497203204,0.719774022
|
||||
6,71,90,222.0,6144.0,0.0361328125,0.043612693,1.01e-5,2.62776692,0.166492497,1.602060948,0.668929854
|
||||
7,70,89,222.0,6048.0,0.03670634920634921,0.042731148,1.053e-5,2.631288029,0.185812224,1.514154792,0.694503947
|
||||
8,69,87,216.0,6048.0,0.03571428571428571,0.042148711,8.19e-6,2.493343257,0.183595081,1.506478504,0.652420896
|
||||
9,68,86,216.0,5952.0,0.036290322580645164,0.041568955,8.571e-6,2.487317627,0.147773078,1.472141844,0.653143947
|
||||
10,67,85,216.0,5856.0,0.036885245901639344,0.041307868,9.13e-6,2.491634709,0.175728138,1.482162906,0.63058774
|
||||
11,66,84,216.0,5760.0,0.0375,0.041265756,8.43e-6,2.516916643,0.180420842,1.463053866,0.650627815
|
||||
12,65,83,205.0,5760.0,0.035590277777777776,0.039711293,9.22e-6,2.479664249,0.178013433,1.459566956,0.652477867
|
||||
13,64,82,205.0,5664.0,0.03619350282485876,0.030866093,8.87e-6,2.485424881,0.179983608,1.564961227,0.647932468
|
||||
14,63,81,205.0,5568.0,0.03681752873563218,0.029946916,8.93e-6,2.469922022,0.179443854,1.485935831,0.651804318
|
|
176
data/qed_ke-kkke_greedy_optimizer_GlobalMetricEstimator().csv
Normal file
176
data/qed_ke-kkke_greedy_optimizer_GlobalMetricEstimator().csv
Normal file
@ -0,0 +1,176 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,356,493,1399.0,30528.0,0.0458267819706499,0.077070556,2.6761e-5,17.804336617,0.960385595,10.618577031,4.95440474
|
||||
1,354,491,1399.0,30432.0,0.04597134595162986,1.030851104,2.37e-5,17.726472964,0.933074463,2.174912444,4.959474851
|
||||
2,352,489,1399.0,30336.0,0.04611682489451477,0.376282553,2.3861e-5,17.935912907,0.968087391,2.238665483,4.912705328
|
||||
3,350,487,1399.0,30240.0,0.04626322751322751,0.076651194,4.2451e-5,17.976779783,0.977130996,2.246167674,4.954520005
|
||||
4,348,485,1399.0,30144.0,0.04641056263269639,0.223709216,2.8031e-5,17.67129111,0.97799748,2.175788856,4.923999491
|
||||
5,346,483,1399.0,30048.0,0.04655883919062833,0.076034997,4.3191e-5,17.766336956,0.967055891,2.187609178,4.922574669
|
||||
6,344,481,1399.0,29952.0,0.04670806623931624,0.398917781,4.3422e-5,17.709032771,0.971142926,2.170963978,4.917191185
|
||||
7,342,479,1399.0,29856.0,0.04685825294748124,0.352569343,4.3801e-5,17.690255833,0.952966242,2.159295978,4.945842152
|
||||
8,340,477,1399.0,29760.0,0.04700940860215054,0.117620751,4.2992e-5,17.905787431,0.749896479,2.19940915,4.922882222
|
||||
9,338,475,1399.0,29664.0,0.04716154261057174,0.318053898,2.3481e-5,17.522775542,0.745113955,2.202366151,4.928734427
|
||||
10,336,473,1399.0,29568.0,0.047314664502164504,0.184069985,2.3381e-5,17.529935879,0.74637911,2.238397648,4.919919125
|
||||
11,334,471,1399.0,29472.0,0.047468783930510315,0.086029218,2.365e-5,17.560859257,0.75559668,2.249242933,4.956561058
|
||||
12,332,469,1399.0,29376.0,0.04762391067538126,0.077326472,2.4361e-5,17.559317648,0.746726769,2.1818156,4.938490196
|
||||
13,330,467,1399.0,29280.0,0.047780054644808743,0.169738661,2.342e-5,17.517109121,0.751453942,2.187781478,4.923659727
|
||||
14,328,465,1399.0,29184.0,0.047937225877192985,0.077817676,2.315e-5,17.533304215,0.745481303,2.209343496,4.960503415
|
||||
15,326,463,1399.0,29088.0,0.04809543454345434,0.171584444,2.352e-5,17.579912576,0.754778436,2.210370024,4.934281254
|
||||
16,324,461,1399.0,28992.0,0.04825469094922737,0.084223667,2.305e-5,17.570464754,0.751290178,2.22797709,4.939806799
|
||||
17,322,459,1399.0,28896.0,0.04841500553709856,0.123005102,2.3661e-5,17.605650973,0.756929676,2.269940175,4.937928844
|
||||
18,320,457,1399.0,28800.0,0.04857638888888889,0.086677986,2.37e-5,17.5539199,0.746367967,2.264938904,4.959258096
|
||||
19,318,455,1399.0,28704.0,0.04873885172798216,0.12293158,2.3711e-5,17.609395222,0.755783994,2.264754078,4.92827168
|
||||
20,316,453,1399.0,28608.0,0.04890240492170023,0.124475123,2.4281e-5,17.597716228,0.75106304,2.20218749,4.933120236
|
||||
21,314,451,1399.0,28512.0,0.04906705948372615,0.112172177,2.6391e-5,17.623178954,0.755694751,2.186417905,4.921509117
|
||||
22,312,449,1399.0,28416.0,0.04923282657657658,0.219362642,2.321e-5,17.593459902,0.747914841,2.168628993,4.952994795
|
||||
23,310,447,1399.0,28320.0,0.049399717514124294,0.080729209,2.358e-5,17.571675834,0.755489634,2.209531477,4.951190234
|
||||
24,308,445,1399.0,28224.0,0.049567743764172334,0.080235835,2.3271e-5,17.615791747,0.750314688,2.21464245,4.949496195
|
||||
25,306,443,1399.0,28128.0,0.049736916951080776,0.124106403,2.374e-5,17.60716179,0.753826187,2.186184237,4.920128786
|
||||
26,304,441,1399.0,28032.0,0.04990724885844749,0.080715608,2.3781e-5,17.581988477,0.750266997,2.209826064,4.937813884
|
||||
27,302,439,1399.0,27936.0,0.05007875143184422,0.080606465,2.4071e-5,17.633096607,0.749125265,2.198599437,4.935320693
|
||||
28,300,437,1399.0,27840.0,0.0502514367816092,0.081056137,2.3781e-5,17.564695624,0.746230293,2.225110355,4.939656214
|
||||
29,298,435,1399.0,27744.0,0.05042531718569781,0.096545225,2.379e-5,17.58144781,0.747458632,2.263551336,4.924245431
|
||||
30,296,433,1399.0,27648.0,0.050600405092592594,0.120638697,2.383e-5,17.574370836,0.748933285,2.234417803,4.915183371
|
||||
31,294,431,1399.0,27552.0,0.0507767131242741,0.125073582,2.393e-5,17.627352699,0.754384428,2.214199106,4.938130459
|
||||
32,292,429,1399.0,27456.0,0.05095425407925408,0.12314953,2.468e-5,17.697160429,0.796488763,2.261473826,4.956976138
|
||||
33,290,427,1399.0,27360.0,0.051133040935672516,0.125481487,2.354e-5,17.636971006,0.748416796,2.222200724,4.948970096
|
||||
34,288,425,1399.0,27264.0,0.051313086854460094,0.094052012,2.4301e-5,17.62971842,0.805139938,2.205015347,4.959455536
|
||||
35,286,423,1399.0,27168.0,0.051494405182567725,0.08136377,2.4041e-5,17.621304482,0.747718686,2.244362062,4.941432169
|
||||
36,284,421,1399.0,27072.0,0.05167700945626478,0.080217839,2.3921e-5,17.61427713,0.747754586,2.212103901,4.933185029
|
||||
37,282,417,1399.0,26976.0,0.051860913404507714,0.126372199,2.376e-5,17.601417663,0.750036789,2.163344775,4.926698186
|
||||
38,280,414,1399.0,26880.0,0.052046130952380955,0.125444544,2.476e-5,17.612452443,0.748155225,2.195259021,4.91594575
|
||||
39,278,412,1399.0,26784.0,0.05223267622461171,0.083158944,2.4551e-5,17.599589645,0.741671021,2.208064301,4.9351555
|
||||
40,276,410,1399.0,26688.0,0.05242056354916067,0.083321959,2.4101e-5,17.567124159,0.748238012,2.197233222,4.954754226
|
||||
41,274,408,1399.0,26592.0,0.052609807460890494,0.084803792,2.3901e-5,17.549365204,0.754817994,2.229499405,4.94957165
|
||||
42,272,405,1399.0,26496.0,0.05280042270531401,0.127648261,2.3851e-5,17.582852416,0.750759497,2.230398721,4.937220319
|
||||
43,270,401,1399.0,26400.0,0.052992424242424244,0.128445184,2.428e-5,17.596647819,0.75777713,2.160922996,4.937371146
|
||||
44,268,399,1399.0,26304.0,0.053185827250608275,0.129526096,2.5081e-5,17.594476326,0.746906342,2.219401891,4.93357998
|
||||
45,266,397,1399.0,26208.0,0.05338064713064713,0.129819495,2.4731e-5,17.568331366,0.750368555,2.18948505,4.922275732
|
||||
46,264,394,1399.0,26112.0,0.05357689950980392,0.087649075,2.462e-5,17.585414218,0.751605626,2.198684054,4.941424565
|
||||
47,262,391,1399.0,26016.0,0.05377460024600246,0.089110637,2.4551e-5,17.614139291,0.750622403,2.168793662,4.953321773
|
||||
48,260,389,1399.0,25920.0,0.053973765432098766,0.090307061,2.45e-5,17.633806293,0.749096576,2.224521298,4.930813246
|
||||
49,258,387,1399.0,25824.0,0.054174411400247834,0.133480181,2.461e-5,17.634768586,0.756613261,2.201452177,4.972809945
|
||||
50,256,385,1399.0,25728.0,0.05437655472636816,0.134254424,2.425e-5,17.606323938,0.748779206,2.216818872,4.939295094
|
||||
51,254,382,1399.0,25632.0,0.05458021223470662,0.134016868,2.4531e-5,17.5926305,0.75625873,2.227679889,4.968213894
|
||||
52,252,379,1399.0,25536.0,0.054785401002506263,0.135650945,2.4601e-5,17.642803637,0.751975585,2.226011125,4.9285844
|
||||
53,250,375,1399.0,25440.0,0.054992138364779876,0.136647933,2.4161e-5,17.799738254,0.76667472,2.165144989,4.930427128
|
||||
54,248,373,1399.0,25344.0,0.05520044191919192,0.123103164,2.4461e-5,17.745879754,0.760526742,2.161495227,4.940492285
|
||||
55,246,370,1399.0,25248.0,0.05541032953105197,0.09476826,2.3511e-5,17.596131758,0.756924114,2.180021837,4.954121771
|
||||
56,244,365,1399.0,25152.0,0.05562181933842239,0.095345787,2.4171e-5,17.612023424,0.747989147,2.215139082,4.945396527
|
||||
57,242,362,1399.0,25056.0,0.05583492975734355,0.139570128,2.3801e-5,17.630922372,0.750668446,2.186529739,4.961981394
|
||||
58,240,359,1399.0,24960.0,0.05604967948717949,0.097466916,2.4451e-5,17.61078772,0.7485922,2.217673752,4.95291513
|
||||
59,238,357,1399.0,24864.0,0.05626608751608752,0.138599302,2.3601e-5,17.586404505,0.756929027,2.233374301,4.935342135
|
||||
60,236,352,1399.0,24768.0,0.05648417312661499,0.147210964,2.4911e-5,17.650436019,0.74908103,2.157077946,4.937714591
|
||||
61,234,350,1399.0,24672.0,0.05670395590142672,0.099491094,2.3601e-5,17.608002511,0.756924473,2.165309665,4.932434479
|
||||
62,232,348,1399.0,24576.0,0.056925455729166664,0.141929827,2.454e-5,17.605756917,0.749178717,2.234082435,4.957629943
|
||||
63,230,344,1399.0,24480.0,0.057148692810457515,0.142483983,2.4211e-5,17.623883273,0.758216784,2.210078838,4.930940098
|
||||
64,228,341,1399.0,24384.0,0.057373687664041995,0.101524943,2.4371e-5,17.662312587,0.751128917,2.22449657,4.96708528
|
||||
65,226,339,1399.0,24288.0,0.05760046113306983,0.102619253,2.3831e-5,17.610112922,0.758167777,2.187456785,4.957519684
|
||||
66,224,337,1399.0,24192.0,0.05782903439153439,0.10351088,2.3401e-5,17.611932402,0.749178457,2.236980212,4.933450322
|
||||
67,222,335,1399.0,24096.0,0.05805942895086321,0.148780402,2.3711e-5,17.636035095,0.75707833,2.252138664,4.951632995
|
||||
68,220,333,1399.0,24000.0,0.058291666666666665,0.148311059,2.4851e-5,17.617252052,0.750104986,2.22330739,4.9243139
|
||||
69,218,329,1399.0,23904.0,0.05852576974564926,0.151678794,2.4181e-5,17.627742278,0.755299894,2.248062201,4.951401482
|
||||
70,216,326,1399.0,23808.0,0.05876176075268817,0.15082361,2.3851e-5,17.647410652,0.752445605,2.240948426,4.949599133
|
||||
71,214,323,1399.0,23712.0,0.05899966261808367,0.153382492,2.4011e-5,17.654743596,0.752802907,2.253819342,4.966250371
|
||||
72,212,320,1399.0,23616.0,0.05923949864498645,0.151516131,2.3931e-5,17.672908543,0.750257716,2.220003155,4.944782327
|
||||
73,210,317,1399.0,23520.0,0.059481292517006804,0.154244628,2.386e-5,17.60330678,0.750422813,2.211295295,4.943727837
|
||||
74,208,313,1399.0,23424.0,0.05972506830601093,0.153767234,2.4291e-5,17.640950842,0.74988433,2.24794966,4.952712228
|
||||
75,206,311,1399.0,23328.0,0.05997085048010974,0.155927375,2.406e-5,17.589128666,0.749120129,2.253801308,4.953014816
|
||||
76,204,306,1399.0,23232.0,0.06021866391184573,0.15464184,2.4521e-5,17.662616581,0.750484429,2.227511412,4.924026259
|
||||
77,202,304,1399.0,23136.0,0.06046853388658368,0.157807248,2.4041e-5,17.611953814,0.755679546,2.178734374,4.943974526
|
||||
78,200,301,1399.0,23040.0,0.06072048611111111,0.155978707,2.4051e-5,17.624250437,0.794935481,2.247188963,4.940403894
|
||||
79,198,298,1399.0,22944.0,0.06097454672245467,0.158377905,2.5091e-5,17.634938402,0.754743461,2.245248812,4.919902064
|
||||
80,196,296,1399.0,22848.0,0.061230742296918765,0.158750786,2.4511e-5,17.6360904,0.750867213,2.200032233,4.942215648
|
||||
81,194,293,1399.0,22752.0,0.061489099859353025,0.161152794,2.4831e-5,17.780761042,0.765338482,2.204873372,4.939655562
|
||||
82,192,290,1399.0,22656.0,0.061749646892655365,0.160175486,2.318e-5,17.798147683,0.76168194,2.230891056,4.955801153
|
||||
83,190,287,1399.0,22560.0,0.06201241134751773,0.159868767,2.4791e-5,17.764165058,0.796377137,2.239618185,4.928054627
|
||||
84,188,283,1399.0,22464.0,0.06227742165242165,0.160933577,2.4221e-5,17.798426962,0.848255338,2.218112612,4.932433146
|
||||
85,186,280,1399.0,22368.0,0.06254470672389127,0.163393917,2.4371e-5,17.808464853,0.765692696,2.213490844,4.943298137
|
||||
86,184,277,1399.0,22272.0,0.06281429597701149,0.163792118,2.4261e-5,17.805783627,0.761027705,2.232891092,4.919454211
|
||||
87,182,275,1399.0,22176.0,0.06308621933621934,0.162177953,2.43e-5,17.797665375,0.761040026,2.236586089,4.951072155
|
||||
88,180,271,1399.0,22080.0,0.06336050724637682,0.165377424,2.557e-5,17.805099359,0.763146286,2.212611436,4.921150887
|
||||
89,178,268,1399.0,21984.0,0.06363719068413391,0.166754373,2.5141e-5,17.770997205,0.764361801,2.199943181,4.934748884
|
||||
90,176,266,1399.0,21888.0,0.06391630116959064,0.167241957,2.4571e-5,17.770223198,0.759580227,2.247867501,4.935730147
|
||||
91,174,264,1399.0,21792.0,0.06419787077826726,0.169623073,2.5e-5,17.771153368,0.750276145,2.243455929,4.939933808
|
||||
92,172,261,1399.0,21696.0,0.06448193215339233,0.168358288,2.5181e-5,17.799224982,0.760906435,2.210000929,4.943923374
|
||||
93,170,259,1399.0,21600.0,0.06476851851851852,0.170287483,2.529e-5,17.79271252,0.763151029,2.205444892,4.924953813
|
||||
94,168,254,1399.0,21504.0,0.06505766369047619,0.168986856,2.5021e-5,17.775583682,0.760237647,2.222811993,4.951301097
|
||||
95,166,250,1399.0,21408.0,0.06534940209267563,0.171662521,2.4401e-5,17.636022254,0.749599438,2.234944605,4.958431762
|
||||
96,164,246,1399.0,21312.0,0.06564376876876876,0.170911431,2.4481e-5,17.633556045,0.788097892,2.198060879,4.922871993
|
||||
97,162,244,1399.0,21216.0,0.06594079939668175,0.172387252,2.4781e-5,17.620254381,0.799269067,2.202436673,4.936411908
|
||||
98,160,241,1399.0,21120.0,0.0662405303030303,0.171830017,2.581e-5,17.656653806,0.750275098,2.200933622,4.94776375
|
||||
99,158,238,1399.0,21024.0,0.06654299847792998,0.174560093,2.447e-5,17.625724723,0.756745741,2.249721096,4.958786002
|
||||
100,156,235,1399.0,20928.0,0.06684824159021406,0.178996759,2.453e-5,17.669194606,0.749422535,2.218089817,4.960858653
|
||||
101,154,231,1399.0,20832.0,0.0671562980030722,0.175032127,2.3871e-5,17.642586975,0.754643863,2.194675279,4.944134534
|
||||
102,152,229,1399.0,20736.0,0.06746720679012345,0.176393906,2.4731e-5,17.592973556,0.749943551,2.229565622,4.927935661
|
||||
103,150,225,1399.0,20640.0,0.06778100775193799,0.178017631,2.412e-5,17.630568322,0.755272802,2.221125776,4.952348991
|
||||
104,148,223,1399.0,20544.0,0.0680977414330218,0.175897841,2.36e-5,17.661766307,0.749293633,2.2201698,4.963634779
|
||||
105,146,221,1399.0,20448.0,0.06841744913928012,0.178367362,2.5001e-5,17.654508999,0.755361234,2.185187066,4.938710949
|
||||
106,144,218,1399.0,20352.0,0.06874017295597484,0.178791594,2.502e-5,17.649520916,0.749748217,2.238645461,4.955141284
|
||||
107,142,216,1399.0,20256.0,0.06906595576619273,0.175900502,2.3291e-5,17.648252045,0.755157659,2.250102545,4.948078116
|
||||
108,140,212,1399.0,20160.0,0.06939484126984127,0.180050739,2.3901e-5,17.642556024,0.751139061,2.195233955,4.92102672
|
||||
109,138,210,1399.0,20064.0,0.06972687400318979,0.182587052,2.492e-5,17.631301401,0.754040144,2.177296385,4.948297571
|
||||
110,136,207,1399.0,19968.0,0.07006209935897435,0.181449712,2.4401e-5,17.618787463,0.748940439,2.251932822,4.950366155
|
||||
111,134,203,1399.0,19872.0,0.07040056360708534,0.183466877,2.407e-5,17.658532693,0.756589176,2.240568188,4.97337861
|
||||
112,132,201,1399.0,19776.0,0.0707423139158576,0.181545084,2.485e-5,17.63441504,0.751343023,2.183033772,4.975534251
|
||||
113,130,199,1399.0,19680.0,0.07108739837398374,0.177809314,2.417e-5,17.627163359,0.754577307,2.211080446,4.977438563
|
||||
114,128,195,1399.0,19584.0,0.07143586601307189,0.183038393,2.5541e-5,17.63366534,0.751510139,2.237832092,4.969644912
|
||||
115,126,191,1399.0,19488.0,0.07178776683087028,0.186344151,2.4971e-5,17.711808739,0.759177,2.236586017,4.951292022
|
||||
116,124,187,1399.0,19392.0,0.07214315181518152,0.184833587,2.475e-5,17.648467279,0.749564641,2.179772409,4.97017709
|
||||
117,122,183,1399.0,19296.0,0.07250207296849089,0.193249355,2.3811e-5,17.639230223,0.755564354,2.195109482,4.982434629
|
||||
118,120,180,1399.0,19200.0,0.07286458333333333,0.186818046,2.372e-5,17.635977046,0.750626058,2.243877912,4.972608068
|
||||
119,118,177,1399.0,19104.0,0.07323073701842546,0.189204719,2.4961e-5,17.791522288,0.766082656,2.242948358,4.980365418
|
||||
120,116,173,1399.0,19008.0,0.07360058922558922,0.186391669,2.4181e-5,17.645956891,0.750893368,2.197914806,4.98745469
|
||||
121,114,171,1399.0,18912.0,0.07397419627749577,0.19060573,2.4701e-5,17.771140583,0.765197694,2.20643796,4.959618561
|
||||
122,112,169,1399.0,18816.0,0.0743516156462585,0.188466188,2.381e-5,17.795228145,0.759434429,2.26208531,4.965068853
|
||||
123,110,165,1399.0,18720.0,0.07473290598290598,0.191524927,2.3841e-5,17.779734215,0.767242896,2.242967333,4.950554681
|
||||
124,108,161,1399.0,18624.0,0.07511812714776632,0.189450326,2.3601e-5,17.807849571,0.762371273,2.196711688,4.966122065
|
||||
125,106,157,1399.0,18528.0,0.0755073402417962,0.191473057,2.357e-5,17.632877767,0.755845465,2.188474891,4.977562868
|
||||
126,104,153,1399.0,18432.0,0.0759006076388889,0.191382079,2.3851e-5,17.775729988,0.758861116,2.278116886,4.979965119
|
||||
127,102,151,1399.0,18336.0,0.07629799301919721,0.192296369,2.394e-5,17.777918793,0.764981303,2.224818047,4.949944943
|
||||
128,100,149,1399.0,18240.0,0.07669956140350877,0.191424719,2.4331e-5,17.856475915,0.76057459,2.201588049,4.941974925
|
||||
129,98,146,1399.0,18144.0,0.07710537918871252,0.194280932,2.3951e-5,17.779963845,0.766401736,2.223182601,4.961465017
|
||||
130,96,142,1399.0,18048.0,0.07751551418439716,0.192850597,2.3861e-5,17.765033828,0.760509569,2.250897799,4.967399083
|
||||
131,94,138,1399.0,17952.0,0.07793003565062388,0.194741823,2.38e-5,17.778261696,0.764271609,2.248898068,4.975998565
|
||||
132,92,136,1399.0,17856.0,0.07834901433691756,0.193567295,2.5281e-5,17.791322862,0.759809249,2.216694812,4.962092553
|
||||
133,90,132,1399.0,17760.0,0.07877252252252252,0.196949912,2.4641e-5,17.775924767,0.766636532,2.192664527,4.943809886
|
||||
134,88,129,1399.0,17664.0,0.07920063405797101,0.19423328,2.4491e-5,17.775940481,0.759698903,2.241454301,4.965419114
|
||||
135,86,125,1399.0,17568.0,0.07963342440801457,0.196021362,2.4541e-5,17.749824568,0.77002309,2.244133161,4.973507276
|
||||
136,84,123,1399.0,17472.0,0.08007097069597069,0.195945063,2.4791e-5,17.793381264,0.758984676,2.223761942,4.967845004
|
||||
137,82,120,1399.0,17376.0,0.0805133517495396,0.196404909,2.5491e-5,17.781126567,0.76777764,2.208548873,4.942758101
|
||||
138,80,116,1399.0,17280.0,0.08096064814814814,0.197313346,2.469e-5,17.785944557,0.814271788,2.200296465,4.939179018
|
||||
139,78,114,1399.0,17184.0,0.08141294227188083,0.155633427,2.5181e-5,17.79491891,0.767423131,2.233213884,4.963944358
|
||||
140,76,111,1399.0,17088.0,0.08187031835205992,0.194686919,2.4311e-5,17.835512877,0.761171578,2.216772786,4.968370761
|
||||
141,74,108,1399.0,16992.0,0.0823328625235405,0.19895497,2.4301e-5,17.80769545,0.768202031,2.212642548,4.971369432
|
||||
142,72,106,1399.0,16896.0,0.08280066287878787,0.197589165,2.4241e-5,17.817799582,0.760097766,2.219367009,4.967751237
|
||||
143,70,102,1399.0,16800.0,0.08327380952380953,0.200103786,2.425e-5,17.804210307,0.767108387,2.264925155,4.965506236
|
||||
144,68,99,1399.0,16704.0,0.08375239463601533,0.196633322,2.5371e-5,17.822197608,0.762852947,2.20877412,4.971541033
|
||||
145,66,97,1399.0,16608.0,0.08423651252408478,0.200144552,2.4801e-5,17.823667792,0.766965999,2.209992675,4.969252216
|
||||
146,64,93,1399.0,16512.0,0.08472625968992248,0.199816644,2.4901e-5,17.838429006,0.764432365,2.241092809,4.961995819
|
||||
147,62,89,1399.0,16416.0,0.08522173489278752,0.187325579,2.5321e-5,17.811923957,0.767393244,2.227406228,4.960056608
|
||||
148,60,85,1399.0,16320.0,0.08572303921568628,0.198893612,2.4451e-5,17.82940565,0.760747136,2.209815727,4.971563658
|
||||
149,58,83,1399.0,16224.0,0.08623027613412229,0.201039293,2.4651e-5,17.817639935,0.767607352,2.210546374,4.97066195
|
||||
150,56,81,1399.0,16128.0,0.08674355158730158,0.199841932,2.414e-5,17.82203287,0.760048809,2.243550629,4.954439346
|
||||
151,54,79,1399.0,16032.0,0.0872629740518962,0.2011596,2.4741e-5,17.804574042,0.767800679,2.250206119,4.955980994
|
||||
152,52,75,1399.0,15936.0,0.08778865461847389,0.19971389,2.4331e-5,17.829821975,0.762018993,2.205143141,4.970086548
|
||||
153,50,73,1399.0,15840.0,0.08832070707070708,0.201368798,2.4881e-5,17.836101646,0.767371477,2.218711432,4.96364023
|
||||
154,48,71,1399.0,15744.0,0.08885924796747967,0.200798594,2.4491e-5,17.830384655,0.765407907,2.286796949,4.939295093
|
||||
155,46,67,1399.0,15648.0,0.08940439672801637,0.202551163,2.5121e-5,17.827221721,0.768466657,2.262575248,4.943430916
|
||||
156,44,65,1399.0,15552.0,0.08995627572016461,0.198816901,2.578e-5,17.840506569,0.760760306,2.220630133,4.952844324
|
||||
157,42,63,1399.0,15456.0,0.09051501035196688,0.201424744,2.5021e-5,17.814439397,0.767553139,2.196934945,4.958506547
|
||||
158,40,59,1399.0,15360.0,0.09108072916666667,0.202145126,2.565e-5,17.808712307,0.76137146,2.235801178,4.949559042
|
||||
159,38,55,1399.0,15264.0,0.0916535639412998,0.201663393,2.4591e-5,17.784477195,0.766209648,2.249329555,4.964028527
|
||||
160,36,53,1399.0,15168.0,0.09223364978902954,0.199579456,2.5461e-5,17.900752023,0.761934363,2.209582978,4.950507063
|
||||
161,34,48,1399.0,15072.0,0.09282112526539278,0.159541692,2.5211e-5,17.769415534,0.935609132,2.216664395,4.962977201
|
||||
162,32,44,1399.0,14976.0,0.09341613247863248,0.201979445,2.5581e-5,17.802148727,0.758630938,2.257162782,4.954367291
|
||||
163,30,40,1399.0,14880.0,0.09401881720430108,0.203381244,2.5411e-5,17.808584074,0.768160516,2.239967841,4.949515694
|
||||
164,28,35,1399.0,14784.0,0.09462932900432901,0.200707381,2.5071e-5,17.811958674,0.765546396,2.222827481,4.962523474
|
||||
165,26,31,1399.0,14688.0,0.09524782135076253,0.203476579,2.4431e-5,17.791537057,0.759747517,2.210172596,4.96717851
|
||||
166,24,29,1399.0,14592.0,0.09587445175438597,0.38619058,2.5161e-5,17.784565893,0.765981903,2.205094732,4.970469758
|
||||
167,22,25,1399.0,14496.0,0.09650938189845475,0.209174268,2.6071e-5,17.886396985,0.762283972,2.251379768,4.9348063
|
||||
168,20,21,1399.0,14400.0,0.09715277777777778,0.184182012,2.5331e-5,17.791795342,0.760972528,2.229551257,4.941190792
|
||||
169,18,17,1399.0,14304.0,0.09780480984340045,0.203935864,2.572e-5,17.823665061,0.762353868,2.199132836,4.965200905
|
||||
170,16,15,1399.0,14208.0,0.09846565315315316,0.200164969,2.4631e-5,17.792385586,0.76804392,2.174965407,4.972074439
|
||||
171,14,13,1399.0,14112.0,0.09913548752834467,0.204567903,2.5071e-5,17.806154396,0.759505453,2.2340466,4.972671228
|
||||
172,12,11,1399.0,14016.0,0.09981449771689498,0.201861418,2.5971e-5,18.529840195,0.789347616,2.23167521,4.947890089
|
||||
173,10,9,1399.0,13920.0,0.1005028735632184,0.202902727,2.4951e-5,17.865867105,0.761004999,2.194876208,4.93177029
|
||||
174,8,7,1399.0,13824.0,0.10120081018518519,0.198079003,2.4651e-5,17.791197743,0.767399089,2.226370372,4.951979965
|
|
82
data/qed_ke-kkke_reduction_optimizer.csv
Normal file
82
data/qed_ke-kkke_reduction_optimizer.csv
Normal file
@ -0,0 +1,82 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,356,493,1399.0,30528.0,0.0458267819706499,0.084389903,2.4971e-5,17.802549835,0.960409581,2.406448706,4.927079076
|
||||
1,351,483,1369.0,30528.0,0.044844077568134175,0.126855933,2.9211e-5,16.868735557,0.927387188,2.257632484,4.697683068
|
||||
2,346,478,1369.0,30048.0,0.04556043663471779,0.08319682,3.5431e-5,16.871399152,0.834869326,2.264361993,4.701280771
|
||||
3,341,473,1314.0,30048.0,0.04373003194888179,0.124422234,2.392e-5,16.454231193,0.856669072,2.271991539,4.68580348
|
||||
4,336,463,1284.0,30048.0,0.042731629392971246,0.121696991,2.2921e-5,15.881542683,0.816430136,2.213686135,4.449106524
|
||||
5,331,458,1284.0,29568.0,0.04342532467532467,0.124024888,2.314e-5,15.879200155,0.799333453,2.194093083,4.435654931
|
||||
6,326,448,1254.0,29568.0,0.04241071428571429,0.121610951,2.2e-5,15.325702423,0.833341953,2.203843882,4.199677306
|
||||
7,321,438,1224.0,29568.0,0.041396103896103896,0.118972208,2.1631e-5,14.367273685,0.711553932,2.16189756,3.948872646
|
||||
8,316,433,1224.0,29088.0,0.04207920792079208,0.074826839,2.2031e-5,14.367107152,0.792981221,2.169096496,3.961630969
|
||||
9,311,428,1169.0,29088.0,0.04018839383938394,0.116237162,2.15e-5,14.416973472,0.788583102,2.092186151,3.946339564
|
||||
10,306,418,1139.0,29088.0,0.03915704070407041,0.114647398,2.031e-5,13.671420757,0.745657392,2.037551329,3.657411205
|
||||
11,301,408,1109.0,29088.0,0.03812568756875687,0.11434652,1.951e-5,13.093103664,0.686554396,2.065489584,3.441139671
|
||||
12,296,403,1109.0,28608.0,0.03876538031319911,0.112282663,1.8991e-5,13.11525848,0.705183633,2.0639299,3.422598036
|
||||
13,291,398,1109.0,28128.0,0.039426905574516495,0.111549203,1.9661e-5,13.08100601,0.700772882,2.065935946,3.41679234
|
||||
14,286,388,1079.0,28128.0,0.0383603526734926,0.109881396,1.907e-5,11.871746271,0.665244638,2.063828106,3.187580585
|
||||
15,281,378,1049.0,28128.0,0.037293799772468716,0.108444747,1.7961e-5,10.963517612,0.62180291,2.037926216,2.935137574
|
||||
16,276,373,1049.0,27648.0,0.03794126157407408,0.107959773,1.874e-5,11.021594456,0.541779823,2.003876106,2.931304737
|
||||
17,271,368,1049.0,27168.0,0.03861160188457008,0.105629068,1.8241e-5,11.017450178,0.581974375,2.017201027,2.952118903
|
||||
18,266,363,1049.0,26688.0,0.0393060551558753,0.107303406,1.8301e-5,11.028597789,0.556078309,2.037535226,2.911405619
|
||||
19,261,358,994.0,26688.0,0.03724520383693045,0.106584986,1.7111e-5,10.789192026,0.525275525,2.011931363,2.931360979
|
||||
20,256,353,939.0,26688.0,0.035184352517985615,0.105743463,1.7521e-5,10.50283261,0.535253087,1.962456949,2.941274646
|
||||
21,255,351,933.0,26688.0,0.03495953237410072,0.105189187,1.7471e-5,10.739591259,0.555102576,2.013201521,2.896175037
|
||||
22,254,350,933.0,26592.0,0.035085740072202165,0.105895137,1.6631e-5,10.68514711,0.571809578,1.974934611,2.890503396
|
||||
23,253,348,927.0,26592.0,0.0348601083032491,0.104181459,1.817e-5,10.344271645,0.572483889,2.002875753,2.842241926
|
||||
24,252,347,927.0,26496.0,0.034986413043478264,0.103568232,1.7471e-5,10.363216025,0.602207417,1.943794016,2.811132729
|
||||
25,247,342,927.0,26016.0,0.035631918819188195,0.102006829,1.669e-5,10.360319761,0.588967585,1.942523675,2.838431844
|
||||
26,246,340,921.0,26016.0,0.03540129151291513,0.103244544,1.672e-5,10.140255758,0.565172778,1.980058606,2.776594151
|
||||
27,245,339,921.0,25920.0,0.03553240740740741,0.102991317,1.723e-5,10.166352736,0.588556746,2.025713505,2.754827976
|
||||
28,244,337,915.0,25920.0,0.03530092592592592,0.102527335,1.6261e-5,9.965044496,0.527648944,1.966870364,2.708992883
|
||||
29,243,335,909.0,25920.0,0.035069444444444445,0.101020632,1.6541e-5,9.899918186,0.530837495,1.99964346,2.686936268
|
||||
30,242,334,909.0,25824.0,0.03519981412639405,0.099846559,1.614e-5,9.924451078,0.532149983,1.992832633,2.667590089
|
||||
31,241,333,909.0,25728.0,0.035331156716417914,0.103293156,1.634e-5,9.893503718,0.500188044,1.971455575,2.661440862
|
||||
32,236,328,909.0,25248.0,0.036002851711026615,0.110948742,1.5851e-5,9.916889596,0.515528547,2.014256204,2.691654688
|
||||
33,235,326,903.0,25248.0,0.03576520912547528,0.099799239,1.658e-5,9.667648582,0.561210643,1.981308261,2.647665444
|
||||
34,234,324,897.0,25248.0,0.035527566539923956,0.099455409,1.6561e-5,9.588166052,0.544847505,1.932560182,2.56349283
|
||||
35,233,323,897.0,25152.0,0.035663167938931296,0.103335368,1.6271e-5,9.590387462,0.542413718,1.965145602,2.559435691
|
||||
36,232,321,891.0,25152.0,0.03542461832061069,0.097770562,1.6571e-5,9.362808632,0.543288523,2.017894491,2.498672404
|
||||
37,231,320,891.0,25056.0,0.03556034482758621,0.100428616,1.5941e-5,9.340302395,0.548822639,1.994799194,2.525394
|
||||
38,230,319,891.0,24960.0,0.03569711538461538,0.056667955,1.5341e-5,9.356871677,0.537041949,1.921246656,2.507595034
|
||||
39,225,314,891.0,24480.0,0.036397058823529414,0.099323026,1.636e-5,9.383625024,0.506403697,1.972101141,2.529248938
|
||||
40,220,309,836.0,24480.0,0.03415032679738562,0.096789665,1.645e-5,9.524601658,0.473707387,1.980933173,2.524768525
|
||||
41,215,304,836.0,24000.0,0.034833333333333334,0.053463925,1.671e-5,9.520567128,0.487585179,1.942542795,2.535491481
|
||||
42,214,302,830.0,24000.0,0.034583333333333334,0.096303802,1.6011e-5,9.137262758,0.4297148,1.950560163,2.478408276
|
||||
43,213,301,830.0,23904.0,0.034722222222222224,0.070596338,1.6901e-5,9.143790565,0.492842898,1.949332161,2.476752284
|
||||
44,212,299,824.0,23904.0,0.034471218206157964,0.09696925,1.612e-5,9.089211511,0.456930617,2.022026121,2.419473874
|
||||
45,211,297,818.0,23904.0,0.03422021419009371,0.052526649,1.536e-5,8.807671694,0.471203239,1.970488502,2.372441242
|
||||
46,210,296,818.0,23808.0,0.03435819892473118,0.096716114,1.5701e-5,8.806210783,0.451452844,1.960073481,2.387451098
|
||||
47,209,295,818.0,23712.0,0.034497300944669365,0.05145174,1.6061e-5,8.867215342,0.450895098,1.968012818,2.394204111
|
||||
48,204,290,818.0,23232.0,0.03521005509641873,0.093248236,1.9521e-5,8.844517253,0.476030278,1.963827031,2.389413849
|
||||
49,203,288,812.0,23232.0,0.034951790633608815,0.093881584,1.527e-5,8.849095772,0.446415074,1.974782212,2.332439097
|
||||
50,202,287,812.0,23136.0,0.03509681881051176,0.050473481,1.5851e-5,8.784636116,0.469233287,1.953068913,2.321316886
|
||||
51,201,285,806.0,23136.0,0.034837482710926695,0.092750242,1.5541e-5,8.632088328,0.491467054,1.945455141,2.29300329
|
||||
52,200,284,806.0,23040.0,0.03498263888888889,0.092540087,1.7161e-5,8.637677414,0.471865872,1.975464118,2.259260411
|
||||
53,199,282,800.0,23040.0,0.034722222222222224,0.092944049,1.5261e-5,8.624992966,0.478249573,1.931707577,2.232058939
|
||||
54,198,281,800.0,22944.0,0.03486750348675035,0.091660013,1.575e-5,8.680034605,0.429976994,2.022314921,2.224544849
|
||||
55,197,279,794.0,22944.0,0.03460599721059972,0.092591389,1.582e-5,8.266084761,0.442472956,1.949268775,2.165130527
|
||||
56,196,278,794.0,22848.0,0.03475140056022409,0.090376966,1.529e-5,8.26930839,0.438461132,1.960119483,2.169387658
|
||||
57,191,273,739.0,22848.0,0.03234418767507003,0.090398736,1.589e-5,8.061516101,0.468233752,1.825342557,2.144808638
|
||||
58,186,268,739.0,22368.0,0.03303826895565093,0.090566151,1.5781e-5,8.051685873,0.472555774,1.827021946,2.175475243
|
||||
59,185,266,733.0,22368.0,0.03277002861230329,0.046301524,1.4931e-5,7.809555195,0.466519375,1.819191936,2.095906173
|
||||
60,184,264,727.0,22368.0,0.03250178826895565,0.087977349,1.4771e-5,7.825535183,0.452072238,1.820734702,2.06485156
|
||||
61,183,263,727.0,22272.0,0.032641882183908046,0.08908488,1.4591e-5,7.77560322,0.445728609,1.804235078,2.06763398
|
||||
62,182,262,727.0,22176.0,0.03278318903318903,0.076517376,1.461e-5,7.754359737,0.421063625,1.812681957,2.076417548
|
||||
63,181,260,721.0,22176.0,0.032512626262626264,0.088983767,1.4091e-5,7.616158878,0.422402602,1.868182992,2.016601005
|
||||
64,180,259,721.0,22080.0,0.03265398550724638,0.089172453,1.467e-5,7.63910266,0.402654247,1.844390793,2.031385412
|
||||
65,175,254,666.0,22080.0,0.03016304347826087,0.091971222,1.3851e-5,7.35822511,0.443635961,1.719023302,2.007792679
|
||||
66,170,249,666.0,21600.0,0.030833333333333334,0.073480651,1.3871e-5,7.291999508,0.434965958,1.750073777,1.999358953
|
||||
67,169,247,660.0,21600.0,0.030555555555555555,0.085309774,1.7211e-5,7.245192983,0.412650069,1.744681817,1.962798523
|
||||
68,168,245,654.0,21600.0,0.03027777777777778,0.089043539,1.367e-5,7.024436477,0.421292773,1.722710908,1.890918459
|
||||
69,167,243,648.0,21600.0,0.03,0.084353527,1.428e-5,6.8832018,0.415786727,1.715216258,1.830282141
|
||||
70,166,242,648.0,21504.0,0.030133928571428572,0.084367977,1.3441e-5,6.899982477,0.419080281,1.707637056,1.843529005
|
||||
71,165,241,648.0,21408.0,0.030269058295964126,0.085701815,1.4031e-5,6.936174291,0.377346024,1.704252961,1.85218872
|
||||
72,164,240,648.0,21312.0,0.030405405405405407,0.083910355,1.3601e-5,6.9051589,0.389477478,1.75740328,1.867258596
|
||||
73,159,235,593.0,21312.0,0.0278246996996997,0.082135195,1.3351e-5,7.031037571,0.356084586,1.631072,1.797434919
|
||||
74,154,230,593.0,20832.0,0.028465821812596007,0.080356395,1.358e-5,7.040766129,0.405151789,1.620631997,1.781269114
|
||||
75,153,228,587.0,20832.0,0.02817780337941628,0.066967517,1.3391e-5,6.644186555,0.395240289,1.641155866,1.743666486
|
||||
76,152,226,581.0,20832.0,0.02788978494623656,0.080763676,1.298e-5,6.633937959,0.388869331,1.630064054,1.701302723
|
||||
77,151,225,581.0,20736.0,0.028018904320987654,0.080671833,1.2781e-5,6.622133299,0.392564435,1.625932508,1.711411428
|
||||
78,150,224,581.0,20640.0,0.02814922480620155,0.080368195,1.358e-5,6.599986437,0.397419271,1.657700695,1.694756709
|
||||
79,149,222,575.0,20640.0,0.027858527131782947,0.080015475,1.298e-5,6.281191715,0.37819019,1.622522233,1.656839741
|
||||
80,148,221,575.0,20544.0,0.027988707165109036,0.065331671,1.334e-5,6.313635402,0.380955078,1.627111603,1.638795233
|
|
79
data/qed_ke-kkkkke_reduction_optimizer.csv
Normal file
79
data/qed_ke-kkkkke_reduction_optimizer.csv
Normal file
@ -0,0 +1,79 @@
|
||||
operations,graph_nodes,graph_edges,graph_ce,graph_dt,graph_ci,gen_func_t,cpu_compile_t,cpu_st_t,cpu_mt_t,gpu_compile_t,gpu_t
|
||||
0,15866,21617,66249.0,1.314048e6,0.050415966540035065,6.468999136,0.001398329,8.478099553,0.43958521,0.0,0.0
|
||||
10,14676,19713,60656.0,1.279776e6,0.0473957942639962,5.993535435,0.000745961,7.192805963,0.417393835,0.0,0.0
|
||||
20,13774,18527,56334.0,1.243296e6,0.04531020770596865,5.489738392,0.000682889,6.652182167,0.336339503,0.0,0.0
|
||||
30,13352,17940,53276.0,1.236672e6,0.04308013765978368,5.169906767,0.000675318,6.370526843,0.313517861,0.0,0.0
|
||||
40,12714,17168,51163.0,1.199712e6,0.042646068389746876,4.845906388,0.000634457,6.124306725,0.311820244,0.0,0.0
|
||||
50,12004,16270,48473.0,1.163232e6,0.04167096503534978,4.433653313,0.000596017,5.760561483,0.320897852,0.0,0.0
|
||||
60,11750,15983,48022.0,1.144224e6,0.04196905501020779,4.316924709,0.000596237,5.738809149,0.283214404,0.0,0.0
|
||||
70,11538,15697,47325.0,1.133184e6,0.04176285581158929,4.201152631,0.000554855,5.438337093,0.313985744,0.0,0.0
|
||||
80,11434,15550,46814.0,1.129536e6,0.04144533684628024,4.216359254,0.000553545,5.429706297,0.268223845,0.0,0.0
|
||||
90,11066,15085,46232.0,1.10352e6,0.041895026823256486,3.924567625,0.000560535,5.412444055,0.274917428,0.0,0.0
|
||||
100,10848,14847,44297.0,1.100352e6,0.04025711772232885,3.848048388,0.000527955,5.127227854,0.294706757,0.0,0.0
|
||||
110,10462,14382,42261.0,1.084512e6,0.038967756926617685,3.674674179,0.000509054,4.922064369,0.276530272,0.0,0.0
|
||||
120,10304,14191,41810.0,1.07472e6,0.038903156170909635,3.58233155,0.000516074,5.02371138,0.266906519,0.0,0.0
|
||||
130,10200,14067,41437.0,1.068864e6,0.03876732680677804,3.529160319,0.000501634,4.863804478,0.24639169,0.0,0.0
|
||||
140,10042,13871,40956.0,1.059552e6,0.03865407266467337,3.346890818,0.000488403,4.753116119,0.254509861,0.0,0.0
|
||||
150,9956,13765,40583.0,1.055424e6,0.038451844945727974,3.41847396,0.000500654,4.756966153,0.255966291,0.0,0.0
|
||||
160,9906,13690,40433.0,1.053024e6,0.03839703558513386,3.405093274,0.000496774,4.812050085,0.24421971,0.0,0.0
|
||||
170,9838,13597,40283.0,1.048896e6,0.038405142168527674,3.348340057,0.000481363,4.669473296,0.234701411,0.0,0.0
|
||||
180,9242,12790,37708.0,1.02336e6,0.03684724828017511,3.063089187,0.000449352,4.335668832,0.228471471,0.0,0.0
|
||||
190,9120,12648,37082.0,1.017984e6,0.03642689865459575,2.994073054,0.000429002,4.181894908,0.224361729,0.0,0.0
|
||||
200,9052,12555,36932.0,1.013856e6,0.03642726383233911,3.046147594,0.000427282,4.151250123,0.212513705,0.0,0.0
|
||||
210,8912,12405,36366.0,1.005792e6,0.03615658108237091,2.937579863,0.000433982,4.261727394,0.214012817,0.0,0.0
|
||||
220,8808,12281,35993.0,999936.0,0.035995303699436765,2.892146284,0.000432382,4.198423468,0.219749812,0.0,0.0
|
||||
230,8626,12061,35765.0,986112.0,0.03626869970145379,2.752333211,0.000414672,4.035044142,0.241721263,0.0,0.0
|
||||
240,8426,11841,34336.0,980256.0,0.03502758463095355,2.714773746,0.000414522,4.036870861,0.235365769,0.0,0.0
|
||||
250,8118,11464,33416.0,961728.0,0.03474579090969588,2.579966689,0.000402461,3.870568035,0.20937257,0.0,0.0
|
||||
260,7942,11242,32634.0,953664.0,0.034219599355747934,2.520293442,0.000391581,3.72881432,0.191238985,0.0,0.0
|
||||
270,7838,11100,32153.0,949536.0,0.0338618019748593,2.456319106,0.000383211,3.635092003,0.187908484,0.0,0.0
|
||||
280,7716,10940,31672.0,943680.0,0.033562224482875554,2.402192681,0.00037687,3.594882506,0.194062713,0.0,0.0
|
||||
290,7576,10772,30745.0,939552.0,0.032723042471305475,2.338714319,0.00037334,3.556085038,0.194369971,0.0,0.0
|
||||
300,7376,10529,30487.0,924480.0,0.0329774575977847,2.279512925,0.00036552,3.504723807,0.191079171,0.0,0.0
|
||||
310,7218,10310,29868.0,917376.0,0.03255807869401423,2.207692656,0.000355539,3.30937664,0.181261073,0.0,0.0
|
||||
320,7078,10137,29417.0,909312.0,0.03235083227759009,2.147511905,0.000352659,3.30461376,0.18005858,0.0,0.0
|
||||
330,6860,9848,28991.0,895200.0,0.032384941912421805,2.078259266,0.00033941,3.211808988,0.172834084,0.0,0.0
|
||||
340,6702,9611,28264.0,889824.0,0.03176358470888625,2.069880378,0.000318959,3.033092324,0.154811992,0.0,0.0
|
||||
350,6616,9505,27891.0,885696.0,0.03149048883589855,2.005510172,0.000326369,3.008426711,0.173417779,0.0,0.0
|
||||
360,6512,9391,27325.0,881088.0,0.03101279327377061,1.968347618,0.000315789,2.921325386,0.168873786,0.0,0.0
|
||||
370,6426,9280,27175.0,875232.0,0.03104891046031224,1.92734893,0.000315548,2.990437001,0.181187901,0.0,0.0
|
||||
380,6358,9187,27025.0,871104.0,0.031023850194695467,1.889258172,0.000308689,2.846738111,0.181651873,0.0,0.0
|
||||
390,6272,9081,26652.0,866976.0,0.030741335400287898,1.840892272,0.000329279,2.825270586,0.177422669,0.0,0.0
|
||||
400,6204,8993,26532.0,862368.0,0.03076644773460982,1.820608708,0.000296329,2.759355249,0.175583708,0.0,0.0
|
||||
410,6118,8864,26274.0,858240.0,0.030613814317673377,1.783961229,0.000290708,2.707626007,0.172954176,0.0,0.0
|
||||
420,6014,8740,25901.0,852384.0,0.030386539400082593,1.774576254,0.000288998,2.694176581,0.173939173,0.0,0.0
|
||||
430,5928,8629,25498.0,848736.0,0.030042321758473777,1.7065974,0.000284277,2.675798329,0.170062674,0.0,0.0
|
||||
440,5842,8523,25125.0,844608.0,0.029747527847238008,1.685087395,0.000287118,2.688215586,0.166480549,0.0,0.0
|
||||
450,5738,8399,24752.0,838752.0,0.02951051085422151,1.673553823,0.000274969,2.523253333,0.167824913,0.0,0.0
|
||||
460,5670,8316,24662.0,833664.0,0.02958266159987717,1.625105871,0.000272178,2.52817126,0.164730041,0.0,0.0
|
||||
470,5548,8161,24211.0,827328.0,0.029264088729016785,1.583826656,0.000262318,2.419247276,0.160768733,0.0,0.0
|
||||
480,5426,8006,23760.0,820992.0,0.028940598690364826,1.58433006,0.000264708,2.454129792,0.155746163,0.0,0.0
|
||||
490,5358,7918,23640.0,816384.0,0.028956961429915332,1.520887155,0.000253268,2.329551174,0.153813499,0.0,0.0
|
||||
500,5272,7807,23237.0,812736.0,0.02859108000629971,1.488167166,0.000248837,2.282665244,0.154234105,0.0,0.0
|
||||
510,5150,7647,22756.0,806880.0,0.028202458853856832,1.448681065,0.000247727,2.275316917,0.149501885,0.0,0.0
|
||||
520,5028,7487,22022.0,803232.0,0.02741673638500458,1.43939862,0.000236057,2.14942739,0.146771977,0.0,0.0
|
||||
530,4906,7350,21679.0,795168.0,0.02726342106322186,1.367826149,0.000242258,2.188588822,0.148076932,0.0,0.0
|
||||
540,4838,7257,21529.0,791040.0,0.027216069983818772,1.341798982,0.000230357,2.096237881,0.141709174,0.0,0.0
|
||||
550,4752,7151,21156.0,786912.0,0.02688483591557887,1.339939443,0.000227267,2.062687036,0.13782156,0.0,0.0
|
||||
560,4684,7068,21066.0,781824.0,0.026944683202357565,1.327848904,0.000222317,2.00294804,0.139508498,0.0,0.0
|
||||
570,4634,6993,20916.0,779424.0,0.02683520137948023,1.276183945,0.000224717,2.021180753,0.13573571,0.0,0.0
|
||||
580,4548,6882,20766.0,773568.0,0.026844440307768676,1.235522514,0.000212457,1.917354147,0.128401984,0.0,0.0
|
||||
590,4498,6807,20616.0,771168.0,0.026733474418025645,1.267249751,0.000212506,1.899792552,0.133449083,0.0,0.0
|
||||
600,4376,6657,20195.0,764352.0,0.0264210730134807,1.209891149,0.000205326,1.850663451,0.129490109,0.0,0.0
|
||||
610,4326,6582,20045.0,761952.0,0.026307431439250767,1.18887911,0.000203196,1.819359467,0.129183977,0.0,0.0
|
||||
620,4204,6422,19564.0,756096.0,0.02587502116133401,1.172245936,0.000212366,1.757557943,0.125887084,0.0,0.0
|
||||
630,3836,5980,17558.0,741504.0,0.02367890126014155,1.043747354,0.000175996,1.554965777,0.115650062,0.0,0.0
|
||||
640,3732,5856,17438.0,733440.0,0.023775632635253053,1.010298683,0.000174715,1.562411059,0.113877446,0.0,0.0
|
||||
650,3628,5714,16957.0,729312.0,0.023250680093019175,0.985957627,0.000170445,1.474744854,0.110990727,0.0,0.0
|
||||
660,3506,5549,16446.0,723936.0,0.022717477788091765,0.948042334,0.000161975,1.420057878,0.106426767,0.0,0.0
|
||||
670,3420,5448,16103.0,719328.0,0.0223861715378798,0.921840457,0.000156765,1.356400004,0.10491163,0.0,0.0
|
||||
680,3316,5319,15700.0,713952.0,0.021990273855945496,0.892707383,0.000162605,1.335548894,0.100909488,0.0,0.0
|
||||
690,3212,5200,15357.0,707616.0,0.02170244878578212,0.89578919,0.000149085,1.299462304,0.099173414,0.0,0.0
|
||||
700,2916,4871,13850.0,693792.0,0.019962755407960886,0.781393124,0.000134984,1.179737113,0.096642976,0.0,0.0
|
||||
710,2722,4598,13123.0,684960.0,0.019158782994627425,0.725161332,0.000122213,1.056813282,0.08619269,0.0,0.0
|
||||
720,2636,4492,12750.0,680832.0,0.018727086858432038,0.701632434,0.000128984,1.019551067,0.085388434,0.0,0.0
|
||||
730,2532,4373,12407.0,674496.0,0.018394475282284845,0.675037355,0.000119134,0.993660466,0.082709493,0.0,0.0
|
||||
740,2428,4231,11926.0,670368.0,0.017790228650532244,0.6435086,0.000109403,0.927737064,0.078423743,0.0,0.0
|
||||
750,2342,4125,11553.0,666240.0,0.017340597982708934,0.619218823,0.000106693,0.883708241,0.075467284,0.0,0.0
|
||||
760,2274,4032,11403.0,662112.0,0.017222161809482384,0.635081649,0.000103493,0.919860114,0.074058132,0.0,0.0
|
||||
770,2234,3977,11313.0,659712.0,0.017148392025611175,0.593953439,0.000110543,0.84404911,0.077019298,0.0,0.0
|
|
@ -1,8 +1,23 @@
|
||||
# Code Generation
|
||||
|
||||
## Main
|
||||
## Types
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["code_gen/main.jl"]
|
||||
Pages = ["code_gen/type.jl"]
|
||||
Order = [:type, :constant, :function]
|
||||
```
|
||||
|
||||
## Function Generation
|
||||
Implementations for generation of a callable function. A function generated this way cannot immediately be called. One Julia World Age has to pass before this is possible, which happens when the global Julia scope advances. If the DAG and therefore the generated function becomes too large, use the tape machine instead, since compiling large functions becomes infeasible.
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["code_gen/function.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## Tape Machine
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["code_gen/tabe_machine.jl"]
|
||||
Order = [:function]
|
||||
```
|
@ -7,6 +7,13 @@ Pages = ["scheduler/interface.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
## Types
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["scheduler/type.jl"]
|
||||
Order = [:type, :function]
|
||||
```
|
||||
|
||||
## Greedy
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
|
@ -34,10 +34,3 @@ Modules = [MetagraphOptimization]
|
||||
Pages = ["task/properties.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
||||
## Print
|
||||
```@autodocs
|
||||
Modules = [MetagraphOptimization]
|
||||
Pages = ["task/print.jl"]
|
||||
Order = [:function]
|
||||
```
|
||||
|
@ -1,3 +1,9 @@
|
||||
[deps]
|
||||
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
MetagraphOptimization = "3e869610-d48d-4942-ba70-c1b702a33ca4"
|
||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
|
||||
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
|
||||
|
148
examples/qed_bench.jl
Normal file
148
examples/qed_bench.jl
Normal file
@ -0,0 +1,148 @@
|
||||
using MetagraphOptimization
|
||||
using LIKWID
|
||||
using CUDA
|
||||
using UUIDs
|
||||
|
||||
function cpu_bench(compute_function, inputs)
|
||||
compute_function.(inputs[begin:10]) # make sure it's compiled
|
||||
|
||||
time = @elapsed Threads.@threads for i in eachindex(inputs)
|
||||
@invokelatest compute_function(inputs[i])
|
||||
end
|
||||
rate = length(inputs) / time
|
||||
return (time, rate)
|
||||
end
|
||||
|
||||
function gpu_bench(compute_function, inputs)
|
||||
CUDA.@sync compute_function.(inputs[begin:10]) # make sure it's compiled
|
||||
|
||||
time = @elapsed CUDA.@sync compute_function.(inputs)
|
||||
rate = length(inputs) / time
|
||||
|
||||
return (time, rate)
|
||||
end
|
||||
|
||||
function bench_process(
|
||||
process::MetagraphOptimization.AbstractProcessDescription,
|
||||
func,
|
||||
io::IO = stdout;
|
||||
use_likwid = true,
|
||||
)
|
||||
println(io, "\n--- Benchmarking $(process) ---")
|
||||
|
||||
NFLOPs = GraphProperties(graph).computeEffort
|
||||
if use_likwid
|
||||
input = gen_process_input(process)
|
||||
func(input) # compile first
|
||||
_, events = @perfmon "FLOPS_DP" func(input)
|
||||
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
|
||||
end
|
||||
|
||||
nInputs = 10000000 # ten million
|
||||
println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
|
||||
inputs = Vector{typeof(gen_process_input(process))}()
|
||||
resize!(inputs, nInputs)
|
||||
processes = Vector{typeof(process)}()
|
||||
for i in 1:Threads.nthreads()
|
||||
push!(processes, copy(process))
|
||||
end
|
||||
|
||||
Threads.@threads for i in eachindex(inputs)
|
||||
inputs[i] = gen_process_input(processes[Threads.nthreads()])
|
||||
end
|
||||
|
||||
println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
|
||||
(time_cpu, rate_cpu) = cpu_bench(func, inputs)
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 1024^3
|
||||
|
||||
println(io, "Benchmarking GPU...")
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 1024^3
|
||||
|
||||
println(io, "\nBenchmark Summary for $(process):")
|
||||
|
||||
if use_likwid
|
||||
println(io, "Measured FLOPS by LIKWID: $NFLOPs")
|
||||
else
|
||||
println(io, "Total graph compute effort: $NFLOPs")
|
||||
end
|
||||
println(io, "Total input size: $(bytes_to_human_readable(Base.summarysize(inputs)))")
|
||||
println(io, "CPU, $(Threads.nthreads()) threads")
|
||||
println(io, " Time: $time_cpu")
|
||||
println(io, " Rate: $rate_cpu")
|
||||
println(io, " GFLOPS: $flops_cpu")
|
||||
println(io, "GPU, $(name(first(CUDA.devices())))")
|
||||
println(io, " Time: $time_gpu")
|
||||
println(io, " Rate: $rate_gpu")
|
||||
return println(io, " GFLOPS: $flops_gpu")
|
||||
end
|
||||
|
||||
# use "mock" machine that only uses cpu
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
|
||||
|
||||
# compton
|
||||
process = parse_process("ke->ke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# 2-photon compton
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# 3-photon compton
|
||||
process = parse_process("ke->kkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# AB->AB
|
||||
process = parse_process("AB->AB", ABCModel())
|
||||
graph = parse_dag("input/AB->AB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
exit(0)
|
||||
|
||||
# 4-photon compton
|
||||
process = parse_process("ke->kkkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# AB->AB^5
|
||||
process = parse_process("AB->ABBBBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
542
examples/reduction.ipynb
Normal file
542
examples/reduction.ipynb
Normal file
File diff suppressed because one or more lines are too long
BIN
images/qed_ke-kke_exec_1000000_inputs.pdf
Normal file
BIN
images/qed_ke-kke_exec_1000000_inputs.pdf
Normal file
Binary file not shown.
BIN
images/qed_ke-kke_graph_properties.pdf
Normal file
BIN
images/qed_ke-kke_graph_properties.pdf
Normal file
Binary file not shown.
BIN
images/qed_ke-kkke_exec_1000000_inputs.pdf
Normal file
BIN
images/qed_ke-kkke_exec_1000000_inputs.pdf
Normal file
Binary file not shown.
BIN
images/qed_ke-kkke_graph_properties.pdf
Normal file
BIN
images/qed_ke-kkke_graph_properties.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
images/qed_ke-kkkke_exec_100000_inputs.pdf
Normal file
BIN
images/qed_ke-kkkke_exec_100000_inputs.pdf
Normal file
Binary file not shown.
BIN
images/qed_ke-kkkke_graph_properties.pdf
Normal file
BIN
images/qed_ke-kkkke_graph_properties.pdf
Normal file
Binary file not shown.
BIN
images/qed_ke-kkkkke_exec_10000_inputs.pdf
Normal file
BIN
images/qed_ke-kkkkke_exec_10000_inputs.pdf
Normal file
Binary file not shown.
BIN
images/qed_ke-kkkkke_graph_properties.pdf
Normal file
BIN
images/qed_ke-kkkkke_graph_properties.pdf
Normal file
Binary file not shown.
@ -391,7 +391,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Julia 1.9.3",
|
||||
"display_name": "Julia 1.9.4",
|
||||
"language": "julia",
|
||||
"name": "julia-1.9"
|
||||
},
|
||||
@ -399,7 +399,7 @@
|
||||
"file_extension": ".jl",
|
||||
"mimetype": "application/julia",
|
||||
"name": "julia",
|
||||
"version": "1.9.3"
|
||||
"version": "1.9.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -31,13 +31,13 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Graph:\n",
|
||||
" Nodes: Total: 131069, DataTask: 65539, ComputeTaskQED_Sum: 1, \n",
|
||||
" ComputeTaskQED_V: 35280, ComputeTaskQED_S2: 5040, ComputeTaskQED_U: 9, \n",
|
||||
" ComputeTaskQED_S1: 25200\n",
|
||||
" Edges: 176419\n",
|
||||
" Total Compute Effort: 549370.0\n",
|
||||
" Total Data Transfer: 1.0645344e7\n",
|
||||
" Total Compute Intensity: 0.05160659909158408\n"
|
||||
" Nodes: Total: 15866, DataTask: 7937, ComputeTaskQED_S2: 720, \n",
|
||||
" ComputeTaskQED_Sum: 1, ComputeTaskQED_V: 4320, ComputeTaskQED_S1: 2880, \n",
|
||||
" ComputeTaskQED_U: 8\n",
|
||||
" Edges: 21617\n",
|
||||
" Total Compute Effort: 66249.0\n",
|
||||
" Total Data Transfer: 1.314048e6\n",
|
||||
" Total Compute Intensity: 0.050415966540035065\n"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
@ -47,7 +47,7 @@
|
||||
"source": [
|
||||
"machine = get_machine_info()\n",
|
||||
"model = QEDModel()\n",
|
||||
"process = parse_process(\"ke->kkkkkke\", model)\n",
|
||||
"process = parse_process(\"ke->kkkkke\", model)\n",
|
||||
"\n",
|
||||
"inputs = [gen_process_input(process) for _ in 1:1e3];\n",
|
||||
"graph = gen_graph(process)"
|
||||
@ -62,13 +62,13 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Graph:\n",
|
||||
" Nodes: Total: 14783, DataTask: 7396, ComputeTaskQED_Sum: 1, \n",
|
||||
" ComputeTaskQED_V: 1819, ComputeTaskQED_S2: 5040, ComputeTaskQED_U: 9, \n",
|
||||
" ComputeTaskQED_S1: 518\n",
|
||||
" Edges: 26672\n",
|
||||
" Total Compute Effort: 77102.0\n",
|
||||
" Total Data Transfer: 5.063616e6\n",
|
||||
" Total Compute Intensity: 0.015226668056977465\n"
|
||||
" Nodes: Total: 2234, DataTask: 1121, ComputeTaskQED_S2: 720, \n",
|
||||
" ComputeTaskQED_Sum: 1, ComputeTaskQED_V: 312, ComputeTaskQED_S1: 72, \n",
|
||||
" ComputeTaskQED_U: 8\n",
|
||||
" Edges: 3977\n",
|
||||
" Total Compute Effort: 11313.0\n",
|
||||
" Total Data Transfer: 659712.0\n",
|
||||
" Total Compute Intensity: 0.017148392025611175\n"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
@ -78,6 +78,7 @@
|
||||
"source": [
|
||||
"optimizer = ReductionOptimizer()\n",
|
||||
"\n",
|
||||
"compute_compton = get_compute_function(graph, process, machine)\n",
|
||||
"optimize_to_fixpoint!(optimizer, graph)\n",
|
||||
"graph"
|
||||
]
|
||||
@ -91,7 +92,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Calculated 15537.0 results/s, 1295.0 results/s per thread for QED Process: 'ke->kkkkkke' (12 threads)\n"
|
||||
"Calculated 133942.0 results/s, 11162.0 results/s per thread for QED Process: 'ke->kkkkke' (12 threads)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -109,6 +110,31 @@
|
||||
"rate_per_thread = rate / Threads.nthreads()\n",
|
||||
"println(\"Calculated $(round(rate)) results/s, $(round(rate_per_thread)) results/s per thread for $(process) ($(Threads.nthreads()) threads)\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Calculated 17124.0 results/s, 1427.0 results/s per thread for QED Process: 'ke->kkkkke' (12 threads)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"bench_result = @benchmark begin\n",
|
||||
" Threads.@threads :static for i in eachindex(inputs)\n",
|
||||
" outputs[i] = compute_compton(inputs[i])\n",
|
||||
" end\n",
|
||||
"end\n",
|
||||
"\n",
|
||||
"rate = length(inputs) / (mean(bench_result.times) / 1.0e9)\n",
|
||||
"rate_per_thread = rate / Threads.nthreads()\n",
|
||||
"println(\"Calculated $(round(rate)) results/s, $(round(rate_per_thread)) results/s per thread for $(process) ($(Threads.nthreads()) threads)\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
62
results/FWK8999_QED.txt
Normal file
62
results/FWK8999_QED.txt
Normal file
@ -0,0 +1,62 @@
|
||||
CPU: AMD EPYC 7452 32 Cores, 64 Threads | 122.8 GFLOPS (?, source: https://www.cpubenchmark.net/cpu.php?cpu=AMD+EPYC+7452)
|
||||
GPU: A30 24GB | 5.161 TFLOPS (source: https://www.techpowerup.com/gpu-specs/a30-pcie.c3792)
|
||||
|
||||
Benchmark Summary for QED Process: 'ke->ke':
|
||||
Measured FLOPS by LIKWID: 5657.0
|
||||
Total input size: 1.394 GiB
|
||||
CPU, 64 threads
|
||||
Time: 0.594810558
|
||||
Rate: 1.681207548437632e7
|
||||
GFLOPS: 88.57428190774921
|
||||
GPU, NVIDIA A30
|
||||
Time: 1.547648257
|
||||
Rate: 6.461416510353748e6
|
||||
GFLOPS: 34.041919930904314
|
||||
|
||||
Benchmark Summary for QED Process: 'ke->kke':
|
||||
Measured FLOPS by LIKWID: 16256.0
|
||||
Total input size: 1.768 GiB
|
||||
CPU, 64 threads
|
||||
Time: 1.294064702
|
||||
Rate: 7.7275888790914565e6
|
||||
GFLOPS: 116.99244828756034
|
||||
GPU, NVIDIA A30
|
||||
Time: 4.973188906
|
||||
Rate: 2.0107822544072892e6
|
||||
GFLOPS: 30.442398346629826
|
||||
|
||||
Benchmark Summary for QED Process: 'ke->kkke':
|
||||
Measured FLOPS by LIKWID: 43433.0
|
||||
Total input size: 2.632 GiB
|
||||
CPU, 64 threads
|
||||
Time: 3.232029091
|
||||
Rate: 3.094031556784648e6
|
||||
GFLOPS: 125.15398916399816
|
||||
GPU, NVIDIA A30
|
||||
Time: 14.597070187
|
||||
Rate: 685068.9810963502
|
||||
GFLOPS: 27.711131662091034
|
||||
|
||||
Benchmark Summary for ABC Process: 'AB->AB':
|
||||
Measured FLOPS by LIKWID: 41.0
|
||||
Total input size: 2.201 GiB
|
||||
CPU, 64 threads
|
||||
Time: 0.688079611
|
||||
Rate: 1.453320203089116e7
|
||||
GFLOPS: 0.5549390644454747
|
||||
GPU, NVIDIA A30
|
||||
Time: 0.013803574
|
||||
Rate: 7.244500590933913e8
|
||||
GFLOPS: 27.662564462822903
|
||||
|
||||
Benchmark Summary for ABC Process: 'AB->ABBB':
|
||||
Measured FLOPS by LIKWID: 899.0
|
||||
Total input size: 3.079 GiB
|
||||
CPU, 64 threads
|
||||
Time: 0.855687624
|
||||
Rate: 1.1686507692204276e7
|
||||
GFLOPS: 9.784633680518386
|
||||
GPU, NVIDIA A30
|
||||
Time: 0.014804518
|
||||
Rate: 6.754694749265056e8
|
||||
GFLOPS: 565.542893445984
|
@ -79,6 +79,7 @@ export execute
|
||||
export parse_dag, parse_process
|
||||
export gen_process_input
|
||||
export get_compute_function
|
||||
export gen_tape, execute_tape
|
||||
|
||||
# estimator
|
||||
export cost_type, graph_cost, operation_effect
|
||||
@ -120,6 +121,7 @@ include("diff/type.jl")
|
||||
include("properties/type.jl")
|
||||
include("operation/type.jl")
|
||||
include("graph/type.jl")
|
||||
include("scheduler/type.jl")
|
||||
|
||||
include("trie.jl")
|
||||
include("utility.jl")
|
||||
@ -155,7 +157,6 @@ include("properties/utility.jl")
|
||||
include("task/create.jl")
|
||||
include("task/compare.jl")
|
||||
include("task/compute.jl")
|
||||
include("task/print.jl")
|
||||
include("task/properties.jl")
|
||||
|
||||
include("estimator/interface.jl")
|
||||
@ -200,6 +201,8 @@ include("devices/cuda/impl.jl")
|
||||
include("scheduler/interface.jl")
|
||||
include("scheduler/greedy.jl")
|
||||
|
||||
include("code_gen/main.jl")
|
||||
include("code_gen/type.jl")
|
||||
include("code_gen/tape_machine.jl")
|
||||
include("code_gen/function.jl")
|
||||
|
||||
end # module MetagraphOptimization
|
||||
|
40
src/code_gen/function.jl
Normal file
40
src/code_gen/function.jl
Normal file
@ -0,0 +1,40 @@
|
||||
"""
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
|
||||
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
|
||||
"""
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
tape = gen_tape(graph, process, machine)
|
||||
|
||||
initCaches = Expr(:block, tape.initCachesCode...)
|
||||
assignInputs = Expr(:block, expr_from_fc.(tape.inputAssignCode)...)
|
||||
code = Expr(:block, expr_from_fc.(tape.computeCode)...)
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
resSym = eval(gen_access_expr(entry_device(tape.machine), tape.outputSymbol))
|
||||
expr = Meta.parse(
|
||||
"function compute_$(functionId)(data_input::AbstractProcessInput) $(initCaches); $(assignInputs); $code; return $resSym; end",
|
||||
)
|
||||
|
||||
func = eval(expr)
|
||||
|
||||
return func
|
||||
end
|
||||
|
||||
"""
|
||||
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
|
||||
Execute the code of the given `graph` on the given input particles.
|
||||
|
||||
This is essentially shorthand for
|
||||
```julia
|
||||
tape = gen_tape(graph, process, machine)
|
||||
return execute_tape(tape, input)
|
||||
```
|
||||
|
||||
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
|
||||
"""
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
tape = gen_tape(graph, process, machine)
|
||||
return execute_tape(tape, input)
|
||||
end
|
@ -1,155 +0,0 @@
|
||||
"""
|
||||
gen_code(graph::DAG)
|
||||
|
||||
Generate the code for a given graph. The return value is a named tuple of:
|
||||
|
||||
- `code::Expr`: The julia expression containing the code for the whole graph.
|
||||
- `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
|
||||
- `outputSymbol::Symbol`: The symbol of the final calculated value
|
||||
|
||||
See also: [`execute`](@ref)
|
||||
"""
|
||||
function gen_code(graph::DAG, machine::Machine)
|
||||
sched = schedule_dag(GreedyScheduler(), graph, machine)
|
||||
|
||||
codeAcc = Vector{Expr}()
|
||||
sizehint!(codeAcc, length(graph.nodes))
|
||||
|
||||
for node in sched
|
||||
# TODO: this is kind of ugly, should init nodes be scheduled differently from the rest?
|
||||
if (node isa DataTaskNode && length(node.children) == 0)
|
||||
push!(codeAcc, get_init_expression(node, entry_device(machine)))
|
||||
continue
|
||||
end
|
||||
push!(codeAcc, get_expression(node))
|
||||
end
|
||||
|
||||
# get inSymbols
|
||||
inputSyms = Dict{String, Vector{Symbol}}()
|
||||
for node in get_entry_nodes(graph)
|
||||
if !haskey(inputSyms, node.name)
|
||||
inputSyms[node.name] = Vector{Symbol}()
|
||||
end
|
||||
|
||||
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
|
||||
end
|
||||
|
||||
# get outSymbol
|
||||
outSym = Symbol(to_var_name(get_exit_node(graph).id))
|
||||
|
||||
return (code = Expr(:block, codeAcc...), inputSymbols = inputSyms, outputSymbol = outSym)
|
||||
end
|
||||
|
||||
function gen_cache_init_code(machine::Machine)
|
||||
initializeCaches = Vector{Expr}()
|
||||
|
||||
for device in machine.devices
|
||||
push!(initializeCaches, gen_cache_init_code(device))
|
||||
end
|
||||
|
||||
return Expr(:block, initializeCaches...)
|
||||
end
|
||||
|
||||
function gen_input_assignment_code(
|
||||
inputSymbols::Dict{String, Vector{Symbol}},
|
||||
processDescription::AbstractProcessDescription,
|
||||
machine::Machine,
|
||||
processInputSymbol::Symbol = :input,
|
||||
)
|
||||
@assert length(inputSymbols) >=
|
||||
sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description"
|
||||
|
||||
assignInputs = Vector{Expr}()
|
||||
for (name, symbols) in inputSymbols
|
||||
(type, index) = type_index_from_name(model(processDescription), name)
|
||||
p = nothing
|
||||
|
||||
if (index > get(in_particles(processDescription), type, 0))
|
||||
index -= get(in_particles(processDescription), type, 0)
|
||||
@assert index <= out_particles(processDescription)[type] "Too few particles of type $type in input particles for this process"
|
||||
|
||||
p = "filter(x -> typeof(x) <: $type, out_particles($(processInputSymbol)))[$(index)]"
|
||||
else
|
||||
p = "filter(x -> typeof(x) <: $type, in_particles($(processInputSymbol)))[$(index)]"
|
||||
end
|
||||
|
||||
for symbol in symbols
|
||||
device = entry_device(machine)
|
||||
evalExpr = eval(gen_access_expr(device, symbol))
|
||||
push!(assignInputs, Meta.parse("$(evalExpr)::ParticleValue{$type} = ParticleValue($p, one(ComplexF64))"))
|
||||
end
|
||||
end
|
||||
|
||||
return Expr(:block, assignInputs...)
|
||||
end
|
||||
|
||||
"""
|
||||
get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
|
||||
Return a function of signature `compute_<id>(input::AbstractProcessInput)`, which will return the result of the DAG computation on the given input.
|
||||
"""
|
||||
function get_compute_function(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
|
||||
|
||||
initCaches = gen_cache_init_code(machine)
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
|
||||
expr = Meta.parse(
|
||||
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
|
||||
)
|
||||
|
||||
func = eval(expr)
|
||||
|
||||
return func
|
||||
end
|
||||
|
||||
"""
|
||||
execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
|
||||
Execute the code of the given `graph` on the given input particles.
|
||||
|
||||
This is essentially shorthand for
|
||||
```julia
|
||||
compute_graph = get_compute_function(graph, process)
|
||||
result = compute_graph(particles)
|
||||
```
|
||||
|
||||
See also: [`parse_dag`](@ref), [`parse_process`](@ref), [`gen_process_input`](@ref)
|
||||
"""
|
||||
function execute(graph::DAG, process::AbstractProcessDescription, machine::Machine, input::AbstractProcessInput)
|
||||
(code, inputSymbols, outputSymbol) = gen_code(graph, machine)
|
||||
|
||||
initCaches = gen_cache_init_code(machine)
|
||||
assignInputs = gen_input_assignment_code(inputSymbols, process, machine, :input)
|
||||
|
||||
|
||||
functionId = to_var_name(UUIDs.uuid1(rng[1]))
|
||||
resSym = eval(gen_access_expr(entry_device(machine), outputSymbol))
|
||||
expr = Meta.parse(
|
||||
"function compute_$(functionId)(input::AbstractProcessInput) $initCaches; $assignInputs; $code; return $resSym; end",
|
||||
)
|
||||
func = eval(expr)
|
||||
|
||||
result = 0
|
||||
try
|
||||
result = @eval $func($input)
|
||||
catch e
|
||||
println("Error while evaluating: $e")
|
||||
|
||||
# if we find a uuid in the exception we can color it in so it's easier to spot
|
||||
uuidRegex = r"[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}"
|
||||
m = match(uuidRegex, string(e))
|
||||
|
||||
functionStr = string(expr)
|
||||
if (isa(m, RegexMatch))
|
||||
functionStr = replace(functionStr, m.match => "\033[31m$(m.match)\033[0m")
|
||||
end
|
||||
|
||||
println("Function:\n$functionStr")
|
||||
@assert false
|
||||
end
|
||||
|
||||
return result
|
||||
end
|
182
src/code_gen/tape_machine.jl
Normal file
182
src/code_gen/tape_machine.jl
Normal file
@ -0,0 +1,182 @@
|
||||
function call_fc(fc::FunctionCall{VectorT, 0}, cache::Dict{Symbol, Any}) where {VectorT <: SVector{1}}
|
||||
cache[fc.return_symbol] = fc.func(cache[fc.arguments[1]])
|
||||
return nothing
|
||||
end
|
||||
|
||||
function call_fc(fc::FunctionCall{VectorT, 1}, cache::Dict{Symbol, Any}) where {VectorT <: SVector{1}}
|
||||
cache[fc.return_symbol] = fc.func(fc.additional_arguments[1], cache[fc.arguments[1]])
|
||||
return nothing
|
||||
end
|
||||
|
||||
function call_fc(fc::FunctionCall{VectorT, 0}, cache::Dict{Symbol, Any}) where {VectorT <: SVector{2}}
|
||||
cache[fc.return_symbol] = fc.func(cache[fc.arguments[1]], cache[fc.arguments[2]])
|
||||
return nothing
|
||||
end
|
||||
|
||||
function call_fc(fc::FunctionCall{VectorT, 1}, cache::Dict{Symbol, Any}) where {VectorT <: SVector{2}}
|
||||
cache[fc.return_symbol] = fc.func(fc.additional_arguments[1], cache[fc.arguments[1]], cache[fc.arguments[2]])
|
||||
return nothing
|
||||
end
|
||||
|
||||
function call_fc(fc::FunctionCall{VectorT, 1}, cache::Dict{Symbol, Any}) where {VectorT}
|
||||
cache[fc.return_symbol] = fc.func(fc.additional_arguments[1], getindex.(Ref(cache), fc.arguments)...)
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
call_fc(fc::FunctionCall, cache::Dict{Symbol, Any})
|
||||
|
||||
Execute the given [`FunctionCall`](@ref) on the dictionary.
|
||||
|
||||
Several more specialized versions of this function exist to reduce vector unrolling work for common cases.
|
||||
"""
|
||||
function call_fc(fc::FunctionCall{VectorT, M}, cache::Dict{Symbol, Any}) where {VectorT, M}
|
||||
cache[fc.return_symbol] = fc.func(fc.additional_arguments..., getindex.(Ref(cache), fc.arguments)...)
|
||||
return nothing
|
||||
end
|
||||
|
||||
function expr_from_fc(fc::FunctionCall{VectorT, 0}) where {VectorT}
|
||||
return Meta.parse(
|
||||
"$(eval(gen_access_expr(fc.device, fc.return_symbol))) = $(fc.func)($(unroll_symbol_vector(eval.(gen_access_expr.(Ref(fc.device), fc.arguments)))))",
|
||||
)
|
||||
end
|
||||
|
||||
"""
|
||||
expr_from_fc(fc::FunctionCall)
|
||||
|
||||
For a given function call, return an expression evaluating it.
|
||||
"""
|
||||
function expr_from_fc(fc::FunctionCall{VectorT, M}) where {VectorT, M}
|
||||
func_call = Expr(
|
||||
:call,
|
||||
Symbol(fc.func),
|
||||
fc.additional_arguments...,
|
||||
eval.(gen_access_expr.(Ref(fc.device), fc.arguments))...,
|
||||
)
|
||||
|
||||
expr = :($(eval(gen_access_expr(fc.device, fc.return_symbol))) = $func_call)
|
||||
return expr
|
||||
end
|
||||
|
||||
"""
|
||||
gen_cache_init_code(machine::Machine)
|
||||
|
||||
For each [`AbstractDevice`](@ref) in the given [`Machine`](@ref), returning a `Vector{Expr}` doing the initialization.
|
||||
"""
|
||||
function gen_cache_init_code(machine::Machine)
|
||||
initializeCaches = Vector{Expr}()
|
||||
|
||||
for device in machine.devices
|
||||
push!(initializeCaches, gen_cache_init_code(device))
|
||||
end
|
||||
|
||||
return initializeCaches
|
||||
end
|
||||
|
||||
"""
|
||||
part_from_x(type::Type, index::Int, x::AbstractProcessInput)
|
||||
|
||||
Return the [`ParticleValue`](@ref) of the given type of particle with the given `index` from the given process input.
|
||||
|
||||
Function is wrapped into a [`FunctionCall`](@ref) in [`gen_input_assignment_code`](@ref).
|
||||
"""
|
||||
part_from_x(type::Type, index::Int, x::AbstractProcessInput) =
|
||||
ParticleValue{type, ComplexF64}(get_particle(x, type, index), one(ComplexF64))
|
||||
|
||||
"""
|
||||
gen_input_assignment_code(
|
||||
inputSymbols::Dict{String, Vector{Symbol}},
|
||||
processDescription::AbstractProcessDescription,
|
||||
machine::Machine,
|
||||
processInputSymbol::Symbol = :input,
|
||||
)
|
||||
|
||||
Return a `Vector{Expr}` doing the input assignments from the given `processInputSymbol` onto the `inputSymbols`.
|
||||
"""
|
||||
function gen_input_assignment_code(
|
||||
inputSymbols::Dict{String, Vector{Symbol}},
|
||||
processDescription::AbstractProcessDescription,
|
||||
machine::Machine,
|
||||
processInputSymbol::Symbol = :input,
|
||||
)
|
||||
@assert length(inputSymbols) >=
|
||||
sum(values(in_particles(processDescription))) + sum(values(out_particles(processDescription))) "Number of input Symbols is smaller than the number of particles in the process description"
|
||||
|
||||
assignInputs = Vector{FunctionCall}()
|
||||
for (name, symbols) in inputSymbols
|
||||
(type, index) = type_index_from_name(model(processDescription), name)
|
||||
# make a function for this, since we can't use anonymous functions in the FunctionCall
|
||||
|
||||
for symbol in symbols
|
||||
device = entry_device(machine)
|
||||
push!(
|
||||
assignInputs,
|
||||
FunctionCall(
|
||||
# x is the process input
|
||||
part_from_x,
|
||||
SVector{1, Symbol}(processInputSymbol),
|
||||
SVector{2, Any}(type, index),
|
||||
symbol,
|
||||
device,
|
||||
),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
return assignInputs
|
||||
end
|
||||
|
||||
"""
|
||||
gen_tape(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
|
||||
Generate the code for a given graph. The return value is a [`Tape`](@ref).
|
||||
|
||||
See also: [`execute`](@ref), [`execute_tape`](@ref)
|
||||
"""
|
||||
function gen_tape(graph::DAG, process::AbstractProcessDescription, machine::Machine)
|
||||
schedule = schedule_dag(GreedyScheduler(), graph, machine)
|
||||
|
||||
# get inSymbols
|
||||
inputSyms = Dict{String, Vector{Symbol}}()
|
||||
for node in get_entry_nodes(graph)
|
||||
if !haskey(inputSyms, node.name)
|
||||
inputSyms[node.name] = Vector{Symbol}()
|
||||
end
|
||||
|
||||
push!(inputSyms[node.name], Symbol("$(to_var_name(node.id))_in"))
|
||||
end
|
||||
|
||||
# get outSymbol
|
||||
outSym = Symbol(to_var_name(get_exit_node(graph).id))
|
||||
|
||||
initCaches = gen_cache_init_code(machine)
|
||||
assignInputs = gen_input_assignment_code(inputSyms, process, machine, :input)
|
||||
|
||||
return Tape(initCaches, assignInputs, schedule, inputSyms, outSym, Dict(), process, machine)
|
||||
end
|
||||
|
||||
"""
|
||||
execute_tape(tape::Tape, input::AbstractProcessInput)
|
||||
|
||||
Execute the given tape with the given input.
|
||||
|
||||
For implementation reasons, this disregards the set [`CacheStrategy`](@ref) of the devices and always uses a dictionary.
|
||||
"""
|
||||
function execute_tape(tape::Tape, input::AbstractProcessInput)
|
||||
cache = Dict{Symbol, Any}()
|
||||
cache[:input] = input
|
||||
# simply execute all the code snippets here
|
||||
# TODO: `@assert` that process input fits the tape.process
|
||||
for expr in tape.initCachesCode
|
||||
@eval $expr
|
||||
end
|
||||
|
||||
for function_call in tape.inputAssignCode
|
||||
call_fc(function_call, cache)
|
||||
end
|
||||
for function_call in tape.computeCode
|
||||
call_fc(function_call, cache)
|
||||
end
|
||||
|
||||
return cache[tape.outputSymbol]
|
||||
end
|
19
src/code_gen/type.jl
Normal file
19
src/code_gen/type.jl
Normal file
@ -0,0 +1,19 @@
|
||||
|
||||
"""
|
||||
Tape
|
||||
|
||||
TODO: update docs
|
||||
- `code::Vector{Expr}`: The julia expression containing the code for the whole graph.
|
||||
- `inputSymbols::Dict{String, Vector{Symbol}}`: A dictionary of symbols mapping the names of the input nodes of the graph to the symbols their inputs should be provided on.
|
||||
- `outputSymbol::Symbol`: The symbol of the final calculated value
|
||||
"""
|
||||
struct Tape
|
||||
initCachesCode::Vector{Expr}
|
||||
inputAssignCode::Vector{FunctionCall}
|
||||
computeCode::Vector{FunctionCall}
|
||||
inputSymbols::Dict{String, Vector{Symbol}}
|
||||
outputSymbol::Symbol
|
||||
cache::Dict{Symbol, Any}
|
||||
process::AbstractProcessDescription
|
||||
machine::Machine
|
||||
end
|
@ -75,3 +75,7 @@ function operation_effect(estimator::GlobalMetricEstimator, graph::DAG, operatio
|
||||
ce::Float64 = s * compute_effort(task(operation.input))
|
||||
return (data = d, computeEffort = ce, computeIntensity = ce / d)::CDCost
|
||||
end
|
||||
|
||||
function String(::GlobalMetricEstimator)
|
||||
return "global_metric"
|
||||
end
|
||||
|
14
src/estimator/likwid.jl
Normal file
14
src/estimator/likwid.jl
Normal file
@ -0,0 +1,14 @@
|
||||
using LIKWID
|
||||
|
||||
"""
|
||||
LIKWIDEstimator <: AbstractEstimator
|
||||
|
||||
An estimator using LIKWID.jl to measure the total FLOPS needed to execute the graph.
|
||||
"""
|
||||
struct LIKWIDEstimator <: AbstractEstimator end
|
||||
|
||||
cost_type(::LIKWIDEstimator) = Float64
|
||||
|
||||
function graph_cost(::LIKWIDEstimator, graph::DAG)
|
||||
|
||||
end
|
@ -1,4 +1,5 @@
|
||||
using AccurateArithmetic
|
||||
using StaticArrays
|
||||
|
||||
"""
|
||||
compute(::ComputeTaskABC_P, data::ABCParticleValue)
|
||||
@ -75,89 +76,17 @@ function compute(::ComputeTaskABC_S1, data::ABCParticleValue{P})::ABCParticleVal
|
||||
end
|
||||
|
||||
"""
|
||||
compute(::ComputeTaskABC_Sum, data::Vector{Float64})
|
||||
compute(::ComputeTaskABC_Sum, data...)
|
||||
compute(::ComputeTaskABC_Sum, data::AbstractArray)
|
||||
|
||||
Compute a sum over the vector. Use an algorithm that accounts for accumulated errors in long sums with potentially large differences in magnitude of the summands.
|
||||
|
||||
Linearly many FLOP with growing data.
|
||||
"""
|
||||
function compute(::ComputeTaskABC_Sum, data::Vector{Float64})::Float64
|
||||
return sum_kbn(data)
|
||||
function compute(::ComputeTaskABC_Sum, data...)::Float64
|
||||
return sum(data)
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskABC_P, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate and return code evaluating [`ComputeTaskABC_P`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
"""
|
||||
function get_expression(::ComputeTaskABC_P, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskABC_P(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskABC_U, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskABC_U`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`ABCParticleValue`](@ref), `outSym` will be of type [`ABCParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskABC_U, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskABC_U(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskABC_V, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskABC_V`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSym[1]` and `inSym[2]` should be of type [`ABCParticleValue`](@ref), `outSym` will be of type [`ABCParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskABC_V, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1]), eval(inExprs[2])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskABC_V(), $(in[1]), $(in[2]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskABC_S2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskABC_S2`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms[1]` and `inSyms[2]` should be of type [`ABCParticleValue`](@ref), `outSym` will be of type `Float64`.
|
||||
"""
|
||||
function get_expression(::ComputeTaskABC_S2, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1]), eval(inExprs[2])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskABC_S2(), $(in[1]), $(in[2]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskABC_S1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskABC_S1`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`ABCParticleValue`](@ref), `outSym` will be of type [`ABCParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskABC_S1, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskABC_S1(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskABC_Sum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskABC_Sum`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`].
|
||||
"""
|
||||
function get_expression(::ComputeTaskABC_Sum, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = eval.(inExprs)
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskABC_Sum(), [$(unroll_symbol_vector(in))])")
|
||||
function compute(::ComputeTaskABC_Sum, data::AbstractArray)::Float64
|
||||
return sum(data)
|
||||
end
|
||||
|
@ -5,6 +5,20 @@ using ForwardDiff
|
||||
|
||||
ComputeTaskABC_Sum() = ComputeTaskABC_Sum(0)
|
||||
|
||||
function _svector_from_type_in(processDescription::ABCProcessDescription, type, particles)
|
||||
if haskey(in_particles(processDescription), type)
|
||||
return SVector{in_particles(processDescription)[type], type}(filter(x -> typeof(x) <: type, particles))
|
||||
end
|
||||
return SVector{0, type}()
|
||||
end
|
||||
|
||||
function _svector_from_type_out(processDescription::ABCProcessDescription, type, particles)
|
||||
if haskey(out_particles(processDescription), type)
|
||||
return SVector{out_particles(processDescription)[type], type}(filter(x -> typeof(x) <: type, particles))
|
||||
end
|
||||
return SVector{0, type}()
|
||||
end
|
||||
|
||||
"""
|
||||
gen_process_input(processDescription::ABCProcessDescription)
|
||||
|
||||
@ -58,7 +72,15 @@ function gen_process_input(processDescription::ABCProcessDescription)
|
||||
end
|
||||
end
|
||||
|
||||
processInput = ABCProcessInput(processDescription, inputParticles, outputParticles)
|
||||
inA = _svector_from_type_in(processDescription, ParticleA, inputParticles)
|
||||
inB = _svector_from_type_in(processDescription, ParticleB, inputParticles)
|
||||
inC = _svector_from_type_in(processDescription, ParticleC, inputParticles)
|
||||
|
||||
outA = _svector_from_type_out(processDescription, ParticleA, outputParticles)
|
||||
outB = _svector_from_type_out(processDescription, ParticleB, outputParticles)
|
||||
outC = _svector_from_type_out(processDescription, ParticleC, outputParticles)
|
||||
|
||||
processInput = ABCProcessInput(processDescription, inA, inB, inC, outA, outB, outC)
|
||||
|
||||
return return processInput
|
||||
end
|
||||
|
@ -1,3 +1,5 @@
|
||||
using StaticArrays
|
||||
|
||||
import QEDbase.mass
|
||||
|
||||
"""
|
||||
@ -60,27 +62,30 @@ Input for a ABC Process. Contains the [`ABCProcessDescription`](@ref) of the pro
|
||||
|
||||
See also: [`gen_process_input`](@ref)
|
||||
"""
|
||||
struct ABCProcessInput <: AbstractProcessInput
|
||||
struct ABCProcessInput{N1, N2, N3, N4, N5, N6} <: AbstractProcessInput
|
||||
process::ABCProcessDescription
|
||||
inParticles::Vector{ABCParticle}
|
||||
outParticles::Vector{ABCParticle}
|
||||
inA::SVector{N1, ParticleA}
|
||||
inB::SVector{N2, ParticleB}
|
||||
inC::SVector{N3, ParticleC}
|
||||
outA::SVector{N4, ParticleA}
|
||||
outB::SVector{N5, ParticleB}
|
||||
outC::SVector{N6, ParticleC}
|
||||
end
|
||||
|
||||
ABCParticleValue{ParticleType <: ABCParticle} = ParticleValue{ParticleType, ComplexF64}
|
||||
|
||||
"""
|
||||
PARTICLE_MASSES
|
||||
|
||||
A constant dictionary containing the masses of the different [`ABCParticle`](@ref)s.
|
||||
"""
|
||||
const PARTICLE_MASSES = Dict{Type, Float64}(ParticleA => 1.0, ParticleB => 1.0, ParticleC => 0.0)
|
||||
|
||||
"""
|
||||
mass(t::Type{T}) where {T <: ABCParticle}
|
||||
|
||||
Return the mass (at rest) of the given particle type.
|
||||
"""
|
||||
mass(t::Type{T}) where {T <: ABCParticle} = PARTICLE_MASSES[t]
|
||||
mass(::ParticleA) = 1.0
|
||||
mass(::ParticleB) = 1.0
|
||||
mass(::ParticleC) = 0.0
|
||||
|
||||
mass(::Type{ParticleA}) = 1.0
|
||||
mass(::Type{ParticleB}) = 1.0
|
||||
mass(::Type{ParticleC}) = 0.0
|
||||
|
||||
"""
|
||||
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: ABCParticle, T2 <: ABCParticle}
|
||||
@ -126,7 +131,7 @@ Return the factor of the inner edge with the given (virtual) particle.
|
||||
Takes 10 effective FLOP. (3 here + 7 in square(p))
|
||||
"""
|
||||
function ABC_inner_edge(p::ABCParticle)
|
||||
return 1.0 / (square(p) - mass(typeof(p)) * mass(typeof(p)))
|
||||
return 1.0 / (square(p) - mass(p)^2)
|
||||
end
|
||||
|
||||
"""
|
||||
@ -166,6 +171,10 @@ function ABC_conserve_momentum(p1::ABCParticle, p2::ABCParticle)
|
||||
return p3
|
||||
end
|
||||
|
||||
function copy(process::ABCProcessDescription)
|
||||
return ABCProcessDescription(copy(process.inParticles), copy(process.outParticles))
|
||||
end
|
||||
|
||||
model(::ABCProcessDescription) = ABCModel()
|
||||
model(::ABCProcessInput) = ABCModel()
|
||||
|
||||
@ -195,14 +204,29 @@ function in_particles(process::ABCProcessDescription)
|
||||
return process.inParticles
|
||||
end
|
||||
|
||||
function in_particles(input::ABCProcessInput)
|
||||
return input.inParticles
|
||||
end
|
||||
|
||||
function out_particles(process::ABCProcessDescription)
|
||||
return process.outParticles
|
||||
end
|
||||
|
||||
function out_particles(input::ABCProcessInput)
|
||||
return input.outParticles
|
||||
function get_particle(input::ABCProcessInput, t::Type{Particle}, n::Int)::Particle where {Particle}
|
||||
if (t <: ParticleA)
|
||||
if (n > length(input.inA))
|
||||
return input.outA[n - length(input.inA)]
|
||||
else
|
||||
return input.inA[n]
|
||||
end
|
||||
elseif (t <: ParticleB)
|
||||
if (n > length(input.inB))
|
||||
return input.outB[n - length(input.inB)]
|
||||
else
|
||||
return input.inB[n]
|
||||
end
|
||||
elseif (t <: ParticleC)
|
||||
if (n > length(input.inC))
|
||||
return input.outC[n - length(input.inC)]
|
||||
else
|
||||
return input.inC[n]
|
||||
end
|
||||
end
|
||||
@assert false "Invalid type given"
|
||||
end
|
||||
|
@ -36,15 +36,26 @@ Pretty print an [`ABCProcessInput`](@ref) (with newlines).
|
||||
"""
|
||||
function show(io::IO, processInput::ABCProcessInput)
|
||||
println(io, "Input for $(processInput.process):")
|
||||
println(io, " $(length(processInput.inParticles)) Incoming particles:")
|
||||
for particle in processInput.inParticles
|
||||
println(io, " $particle")
|
||||
println(io, "Incoming particles:")
|
||||
if !isempty(processInput.inA)
|
||||
println(io, " $(processInput.inA)")
|
||||
end
|
||||
println(io, " $(length(processInput.outParticles)) Outgoing Particles:")
|
||||
for particle in processInput.outParticles
|
||||
println(io, " $particle")
|
||||
if !isempty(processInput.inB)
|
||||
println(io, " $(processInput.inB)")
|
||||
end
|
||||
if !isempty(processInput.inC)
|
||||
println(io, " $(processInput.inC)")
|
||||
end
|
||||
println(io, "Outgoing particles:")
|
||||
if !isempty(processInput.outA)
|
||||
println(io, " $(processInput.outA)")
|
||||
end
|
||||
if !isempty(processInput.outB)
|
||||
println(io, " $(processInput.outB)")
|
||||
end
|
||||
if !isempty(processInput.outC)
|
||||
println(io, " $(processInput.outC)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
|
@ -43,48 +43,6 @@ this doesn't matter.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskABC_Sum)::Float64 = 1.0
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_S1)
|
||||
|
||||
Print the S1 task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_S1) = print(io, "ComputeS1")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_S2)
|
||||
|
||||
Print the S2 task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_S2) = print(io, "ComputeS2")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_P)
|
||||
|
||||
Print the P task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_P) = print(io, "ComputeP")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_U)
|
||||
|
||||
Print the U task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_U) = print(io, "ComputeU")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_V)
|
||||
|
||||
Print the V task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_V) = print(io, "ComputeV")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_Sum)
|
||||
|
||||
Print the sum task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskABC_Sum) = print(io, "ComputeSum")
|
||||
|
||||
"""
|
||||
children(::ComputeTaskABC_S1)
|
||||
|
||||
|
@ -80,6 +80,14 @@ Returns a `<: Vector{AbstractParticle}` object with the values of all outgoing p
|
||||
"""
|
||||
function out_particles end
|
||||
|
||||
"""
|
||||
get_particle(::AbstractProcessInput, t::Type, n::Int)
|
||||
|
||||
Interface function that must be implemented for every subtype of [`AbstractProcessInput`](@ref).
|
||||
Returns the `n`th particle of type `t`.
|
||||
"""
|
||||
function get_particle end
|
||||
|
||||
"""
|
||||
parse_process(::AbstractString, ::AbstractPhysicsModel)
|
||||
|
||||
|
@ -1,12 +1,13 @@
|
||||
using StaticArrays
|
||||
|
||||
"""
|
||||
compute(::ComputeTaskQED_P, data::QEDParticleValue)
|
||||
|
||||
Return the particle as is and initialize the Value.
|
||||
"""
|
||||
function compute(::ComputeTaskQED_P, data::QEDParticleValue{P})::QEDParticleValue{P} where {P <: QEDParticle}
|
||||
function compute(::ComputeTaskQED_P, data::QEDParticleValue{P}) where {P <: QEDParticle}
|
||||
# TODO do we actually need this for anything?
|
||||
return QEDParticleValue{P}(data.p, one(DiracMatrix))
|
||||
return ParticleValue{P, DiracMatrix}(data.p, one(DiracMatrix))
|
||||
end
|
||||
|
||||
"""
|
||||
@ -15,7 +16,8 @@ end
|
||||
Compute an outer edge. Return the particle value with the same particle and the value multiplied by an outer_edge factor.
|
||||
"""
|
||||
function compute(::ComputeTaskQED_U, data::PV) where {P <: QEDParticle, PV <: QEDParticleValue{P}}
|
||||
state = base_state(particle(data.p), direction(data.p), momentum(data.p), spin_or_pol(data.p))
|
||||
part::P = data.p
|
||||
state = base_state(particle(part), direction(part), momentum(part), spin_or_pol(part))
|
||||
return ParticleValue{P, typeof(state)}(
|
||||
data.p,
|
||||
state, # will return a SLorentzVector{ComplexF64}, BiSpinor or AdjointBiSpinor
|
||||
@ -34,7 +36,6 @@ function compute(
|
||||
) where {P1 <: QEDParticle, P2 <: QEDParticle, PV1 <: QEDParticleValue{P1}, PV2 <: QEDParticleValue{P2}}
|
||||
p3 = QED_conserve_momentum(data1.p, data2.p)
|
||||
P3 = interaction_result(P1, P2)
|
||||
|
||||
state = QED_vertex()
|
||||
if (typeof(data1.v) <: AdjointBiSpinor)
|
||||
state = data1.v * state
|
||||
@ -47,7 +48,7 @@ function compute(
|
||||
state = state * data2.v
|
||||
end
|
||||
|
||||
dataOut = ParticleValue{P3, typeof(state)}(P3(p3), state)
|
||||
dataOut = ParticleValue{P3, typeof(state)}(P3(momentum(p3)), state)
|
||||
return dataOut
|
||||
end
|
||||
|
||||
@ -64,13 +65,10 @@ function compute(
|
||||
::ComputeTaskQED_S2,
|
||||
data1::ParticleValue{P1},
|
||||
data2::ParticleValue{P2},
|
||||
)::ComplexF64 where {
|
||||
P1 <: Union{AntiFermionStateful, FermionStateful},
|
||||
P2 <: Union{AntiFermionStateful, FermionStateful},
|
||||
}
|
||||
@assert isapprox(data1.p.momentum, data2.p.momentum, rtol = sqrt(eps()), atol = sqrt(eps())) "$(data1.p.momentum) vs. $(data2.p.momentum)"
|
||||
) where {P1 <: Union{AntiFermionStateful, FermionStateful}, P2 <: Union{AntiFermionStateful, FermionStateful}}
|
||||
#@assert isapprox(data1.p.momentum, data2.p.momentum, rtol = sqrt(eps()), atol = sqrt(eps())) "$(data1.p.momentum) vs. $(data2.p.momentum)"
|
||||
|
||||
inner = QED_inner_edge(propagation_result(P1)(data1.p))
|
||||
inner = QED_inner_edge(propagation_result(P1)(momentum(data1.p)))
|
||||
|
||||
# inner edge is just a "scalar", data1 and data2 are bispinor/adjointbispinnor, need to keep correct order
|
||||
if typeof(data1.v) <: BiSpinor
|
||||
@ -80,12 +78,11 @@ function compute(
|
||||
end
|
||||
end
|
||||
|
||||
# TODO: S2 when the particles are photons?
|
||||
function compute(
|
||||
::ComputeTaskQED_S2,
|
||||
data1::ParticleValue{P1},
|
||||
data2::ParticleValue{P2},
|
||||
)::ComplexF64 where {P1 <: PhotonStateful, P2 <: PhotonStateful}
|
||||
) where {P1 <: PhotonStateful, P2 <: PhotonStateful}
|
||||
# TODO: assert that data1 and data2 are opposites
|
||||
inner = QED_inner_edge(data1.p)
|
||||
# inner edge is just a scalar, data1 and data2 are photon states that are just Complex numbers here
|
||||
@ -97,9 +94,9 @@ end
|
||||
|
||||
Compute inner edge (1 input particle, 1 output particle).
|
||||
"""
|
||||
function compute(::ComputeTaskQED_S1, data::QEDParticleValue{P})::QEDParticleValue where {P <: QEDParticle}
|
||||
function compute(::ComputeTaskQED_S1, data::QEDParticleValue{P}) where {P <: QEDParticle}
|
||||
newP = propagation_result(P)
|
||||
new_p = newP(data.p)
|
||||
new_p = newP(momentum(data.p))
|
||||
# inner edge is just a scalar, can multiply from either side
|
||||
if typeof(data.v) <: BiSpinor
|
||||
return ParticleValue(new_p, QED_inner_edge(new_p) * data.v)
|
||||
@ -109,90 +106,19 @@ function compute(::ComputeTaskQED_S1, data::QEDParticleValue{P})::QEDParticleVal
|
||||
end
|
||||
|
||||
"""
|
||||
compute(::ComputeTaskQED_Sum, data::Vector{ComplexF64})
|
||||
compute(::ComputeTaskQED_Sum, data...)
|
||||
compute(::ComputeTaskQED_Sum, data::AbstractArray)
|
||||
|
||||
Compute a sum over the vector. Use an algorithm that accounts for accumulated errors in long sums with potentially large differences in magnitude of the summands.
|
||||
|
||||
Linearly many FLOP with growing data.
|
||||
"""
|
||||
function compute(::ComputeTaskQED_Sum, data::Vector{ComplexF64})::ComplexF64
|
||||
function compute(::ComputeTaskQED_Sum, data...)::ComplexF64
|
||||
# TODO: want to use sum_kbn here but it doesn't seem to support ComplexF64, do it element-wise?
|
||||
return sum(data)
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskQED_P, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate and return code evaluating [`ComputeTaskQED_P`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
"""
|
||||
function get_expression(::ComputeTaskQED_P, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskQED_P(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskQED_U, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskQED_U`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`QEDParticleValue`](@ref), `outSym` will be of type [`QEDParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskQED_U, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskQED_U(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskQED_V, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskQED_V`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSym[1]` and `inSym[2]` should be of type [`QEDParticleValue`](@ref), `outSym` will be of type [`QEDParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskQED_V, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1]), eval(inExprs[2])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskQED_V(), $(in[1]), $(in[2]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskQED_S2, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskQED_S2`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms[1]` and `inSyms[2]` should be of type [`QEDParticleValue`](@ref), `outSym` will be of type `Float64`.
|
||||
"""
|
||||
function get_expression(::ComputeTaskQED_S2, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1]), eval(inExprs[2])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskQED_S2(), $(in[1]), $(in[2]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskQED_S1, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskQED_S1`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`QEDParticleValue`](@ref), `outSym` will be of type [`QEDParticleValue`](@ref).
|
||||
"""
|
||||
function get_expression(::ComputeTaskQED_S1, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = [eval(inExprs[1])]
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskQED_S1(), $(in[1]))")
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(::ComputeTaskQED_Sum, device::AbstractDevice, inExprs::Vector{Expr}, outExpr::Expr)
|
||||
|
||||
Generate code evaluating [`ComputeTaskQED_Sum`](@ref) on `inSyms`, providing the output on `outSym`.
|
||||
`inSyms` should be of type [`Float64`], `outSym` will be of type [`Float64`].
|
||||
"""
|
||||
function get_expression(::ComputeTaskQED_Sum, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
in = eval.(inExprs)
|
||||
out = eval(outExpr)
|
||||
|
||||
return Meta.parse("$out = compute(ComputeTaskQED_Sum(), [$(unroll_symbol_vector(in))])")
|
||||
function compute(::ComputeTaskQED_Sum, data::AbstractArray)::ComplexF64
|
||||
# TODO: want to use sum_kbn here but it doesn't seem to support ComplexF64, do it element-wise?
|
||||
return sum(data)
|
||||
end
|
||||
|
@ -1,6 +1,16 @@
|
||||
|
||||
ComputeTaskQED_Sum() = ComputeTaskQED_Sum(0)
|
||||
|
||||
function _svector_from_type(processDescription::QEDProcessDescription, type, particles)
|
||||
if haskey(in_particles(processDescription), type)
|
||||
return SVector{in_particles(processDescription)[type], type}(filter(x -> typeof(x) <: type, particles))
|
||||
end
|
||||
if haskey(out_particles(processDescription), type)
|
||||
return SVector{out_particles(processDescription)[type], type}(filter(x -> typeof(x) <: type, particles))
|
||||
end
|
||||
return SVector{0, type}()
|
||||
end
|
||||
|
||||
"""
|
||||
gen_process_input(processDescription::QEDProcessDescription)
|
||||
|
||||
@ -29,30 +39,37 @@ function gen_process_input(processDescription::QEDProcessDescription)
|
||||
massSum += rand(rng[threadid()]) * (length(inputMasses) + length(outputMasses))
|
||||
|
||||
|
||||
inputParticles = Vector{QEDParticle}()
|
||||
particles = Vector{QEDParticle}()
|
||||
initialMomenta = generate_initial_moms(massSum, inputMasses)
|
||||
index = 1
|
||||
for (particle, n) in processDescription.inParticles
|
||||
for _ in 1:n
|
||||
mom = initialMomenta[index]
|
||||
push!(inputParticles, particle(mom))
|
||||
push!(particles, particle(mom))
|
||||
index += 1
|
||||
end
|
||||
end
|
||||
|
||||
outputParticles = Vector{QEDParticle}()
|
||||
final_momenta = generate_physical_massive_moms(rng[threadid()], massSum, outputMasses)
|
||||
index = 1
|
||||
for (particle, n) in processDescription.outParticles
|
||||
for _ in 1:n
|
||||
push!(outputParticles, particle(final_momenta[index]))
|
||||
push!(particles, particle(final_momenta[index]))
|
||||
index += 1
|
||||
end
|
||||
end
|
||||
|
||||
processInput = QEDProcessInput(processDescription, inputParticles, outputParticles)
|
||||
inFerms = _svector_from_type(processDescription, FermionStateful{Incoming, SpinUp}, particles)
|
||||
outFerms = _svector_from_type(processDescription, FermionStateful{Outgoing, SpinUp}, particles)
|
||||
inAntiferms = _svector_from_type(processDescription, AntiFermionStateful{Incoming, SpinUp}, particles)
|
||||
outAntiferms = _svector_from_type(processDescription, AntiFermionStateful{Outgoing, SpinUp}, particles)
|
||||
inPhotons = _svector_from_type(processDescription, PhotonStateful{Incoming, PolX}, particles)
|
||||
outPhotons = _svector_from_type(processDescription, PhotonStateful{Outgoing, PolX}, particles)
|
||||
|
||||
return return processInput
|
||||
processInput =
|
||||
QEDProcessInput(processDescription, inFerms, outFerms, inAntiferms, outAntiferms, inPhotons, outPhotons)
|
||||
|
||||
return processInput
|
||||
end
|
||||
|
||||
"""
|
||||
|
@ -82,7 +82,7 @@ end
|
||||
|
||||
function particle_after_tie(p::FeynmanParticle, t::FeynmanTie)
|
||||
if p == t.in1 || p == t.in2
|
||||
return FeynmanParticle(FermionStateful{Incoming}, -1) # placeholder particle and id for tied particles
|
||||
return FeynmanParticle(FermionStateful{Incoming, SpinUp}, -1) # placeholder particle and id for tied particles
|
||||
end
|
||||
return p
|
||||
end
|
||||
|
@ -1,4 +1,5 @@
|
||||
using QEDprocesses
|
||||
using StaticArrays
|
||||
import QEDbase.mass
|
||||
|
||||
# TODO check
|
||||
@ -34,19 +35,6 @@ struct QEDProcessDescription <: AbstractProcessDescription
|
||||
outParticles::Dict{Type{<:QEDParticle{Outgoing}}, Int}
|
||||
end
|
||||
|
||||
"""
|
||||
QEDProcessInput <: AbstractProcessInput
|
||||
|
||||
Input for a QED Process. Contains the [`QEDProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles.
|
||||
|
||||
See also: [`gen_process_input`](@ref)
|
||||
"""
|
||||
struct QEDProcessInput <: AbstractProcessInput
|
||||
process::QEDProcessDescription
|
||||
inParticles::Vector{QEDParticle}
|
||||
outParticles::Vector{QEDParticle}
|
||||
end
|
||||
|
||||
QEDParticleValue{ParticleType <: QEDParticle} = Union{
|
||||
ParticleValue{ParticleType, BiSpinor},
|
||||
ParticleValue{ParticleType, AdjointBiSpinor},
|
||||
@ -60,51 +48,44 @@ QEDParticleValue{ParticleType <: QEDParticle} = Union{
|
||||
|
||||
A photon of the [`QEDModel`](@ref) with its state.
|
||||
"""
|
||||
struct PhotonStateful{Direction <: ParticleDirection} <: QEDParticle{Direction}
|
||||
struct PhotonStateful{Direction <: ParticleDirection, Pol <: AbstractDefinitePolarization} <: QEDParticle{Direction}
|
||||
momentum::SFourMomentum
|
||||
# this will maybe change to the full polarization vector? or do i need both
|
||||
polarization::AbstractDefinitePolarization
|
||||
end
|
||||
|
||||
PhotonStateful{Direction}(mom::SFourMomentum) where {Direction <: ParticleDirection} =
|
||||
PhotonStateful{Direction}(mom, PolX()) # TODO: make allpol possible
|
||||
PhotonStateful{Direction, PolX}(mom)
|
||||
|
||||
PhotonStateful{Dir1}(ph::PhotonStateful{Dir2}) where {Dir1 <: ParticleDirection, Dir2 <: ParticleDirection} =
|
||||
PhotonStateful{Dir1}(ph.momentum, ph.polarization)
|
||||
PhotonStateful{Dir, Pol}(ph::PhotonStateful) where {Dir, Pol} = PhotonStateful{Dir, Pol}(ph.momentum)
|
||||
|
||||
"""
|
||||
FermionStateful <: QEDParticle
|
||||
|
||||
A fermion of the [`QEDModel`](@ref) with its state.
|
||||
"""
|
||||
struct FermionStateful{Direction <: ParticleDirection} <: QEDParticle{Direction}
|
||||
struct FermionStateful{Direction <: ParticleDirection, Spin <: AbstractDefiniteSpin} <: QEDParticle{Direction}
|
||||
momentum::SFourMomentum
|
||||
spin::AbstractDefiniteSpin
|
||||
# TODO: mass for electron/muon/tauon representation?
|
||||
end
|
||||
|
||||
FermionStateful{Direction}(mom::SFourMomentum) where {Direction <: ParticleDirection} =
|
||||
FermionStateful{Direction}(mom, SpinUp()) # TODO: make allspin possible
|
||||
FermionStateful{Direction, SpinUp}(mom)
|
||||
|
||||
FermionStateful{Dir1}(f::FermionStateful{Dir2}) where {Dir1 <: ParticleDirection, Dir2 <: ParticleDirection} =
|
||||
FermionStateful{Dir1}(f.momentum, f.spin)
|
||||
FermionStateful{Dir, Spin}(f::FermionStateful) where {Dir, Spin} = FermionStateful{Dir, Spin}(f.momentum)
|
||||
|
||||
"""
|
||||
AntiFermionStateful <: QEDParticle
|
||||
|
||||
An anti-fermion of the [`QEDModel`](@ref) with its state.
|
||||
"""
|
||||
struct AntiFermionStateful{Direction <: ParticleDirection} <: QEDParticle{Direction}
|
||||
struct AntiFermionStateful{Direction <: ParticleDirection, Spin <: AbstractDefiniteSpin} <: QEDParticle{Direction}
|
||||
momentum::SFourMomentum
|
||||
spin::AbstractDefiniteSpin
|
||||
# TODO: mass for electron/muon/tauon representation?
|
||||
end
|
||||
|
||||
AntiFermionStateful{Direction}(mom::SFourMomentum) where {Direction <: ParticleDirection} =
|
||||
AntiFermionStateful{Direction}(mom, SpinUp()) # TODO: make allspin possible
|
||||
AntiFermionStateful{Direction, SpinUp}(mom)
|
||||
|
||||
AntiFermionStateful{Dir1}(f::AntiFermionStateful{Dir2}) where {Dir1 <: ParticleDirection, Dir2 <: ParticleDirection} =
|
||||
AntiFermionStateful{Dir1}(f.momentum, f.spin)
|
||||
AntiFermionStateful{Dir, Spin}(f::AntiFermionStateful) where {Dir, Spin} = AntiFermionStateful{Dir, Spin}(f.momentum)
|
||||
|
||||
"""
|
||||
interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: QEDParticle, T2 <: QEDParticle}
|
||||
@ -115,19 +96,33 @@ function interaction_result(t1::Type{T1}, t2::Type{T2}) where {T1 <: QEDParticle
|
||||
@assert false "Invalid interaction between particles of types $t1 and $t2"
|
||||
end
|
||||
|
||||
interaction_result(::Type{FermionStateful{Incoming}}, ::Type{FermionStateful{Outgoing}}) = PhotonStateful{Incoming}
|
||||
interaction_result(::Type{FermionStateful{Incoming}}, ::Type{AntiFermionStateful{Incoming}}) = PhotonStateful{Incoming}
|
||||
interaction_result(::Type{FermionStateful{Incoming}}, ::Type{<:PhotonStateful}) = FermionStateful{Outgoing}
|
||||
interaction_result(
|
||||
::Type{FermionStateful{Incoming, Spin1}},
|
||||
::Type{FermionStateful{Outgoing, Spin2}},
|
||||
) where {Spin1, Spin2} = PhotonStateful{Incoming, PolX}
|
||||
interaction_result(
|
||||
::Type{FermionStateful{Incoming, Spin1}},
|
||||
::Type{AntiFermionStateful{Incoming, Spin2}},
|
||||
) where {Spin1, Spin2} = PhotonStateful{Incoming, PolX}
|
||||
interaction_result(::Type{FermionStateful{Incoming, Spin1}}, ::Type{<:PhotonStateful}) where {Spin1} =
|
||||
FermionStateful{Outgoing, SpinUp}
|
||||
|
||||
interaction_result(::Type{FermionStateful{Outgoing}}, ::Type{FermionStateful{Incoming}}) = PhotonStateful{Incoming}
|
||||
interaction_result(::Type{FermionStateful{Outgoing}}, ::Type{AntiFermionStateful{Outgoing}}) = PhotonStateful{Incoming}
|
||||
interaction_result(::Type{FermionStateful{Outgoing}}, ::Type{<:PhotonStateful}) = FermionStateful{Incoming}
|
||||
interaction_result(
|
||||
::Type{FermionStateful{Outgoing, Spin1}},
|
||||
::Type{FermionStateful{Incoming, Spin2}},
|
||||
) where {Spin1, Spin2} = PhotonStateful{Incoming, PolX}
|
||||
interaction_result(
|
||||
::Type{FermionStateful{Outgoing, Spin1}},
|
||||
::Type{AntiFermionStateful{Outgoing, Spin2}},
|
||||
) where {Spin1, Spin2} = PhotonStateful{Incoming, PolX}
|
||||
interaction_result(::Type{FermionStateful{Outgoing, Spin1}}, ::Type{<:PhotonStateful}) where {Spin1} =
|
||||
FermionStateful{Incoming, SpinUp}
|
||||
|
||||
# antifermion mirror
|
||||
interaction_result(::Type{AntiFermionStateful{Incoming}}, t2::Type{<:QEDParticle}) =
|
||||
interaction_result(FermionStateful{Outgoing}, t2)
|
||||
interaction_result(::Type{AntiFermionStateful{Outgoing}}, t2::Type{<:QEDParticle}) =
|
||||
interaction_result(FermionStateful{Incoming}, t2)
|
||||
interaction_result(::Type{AntiFermionStateful{Incoming, Spin}}, t2::Type{<:QEDParticle}) where {Spin} =
|
||||
interaction_result(FermionStateful{Outgoing, Spin}, t2)
|
||||
interaction_result(::Type{AntiFermionStateful{Outgoing, Spin}}, t2::Type{<:QEDParticle}) where {Spin} =
|
||||
interaction_result(FermionStateful{Incoming, Spin}, t2)
|
||||
|
||||
# photon commutativity
|
||||
interaction_result(t1::Type{<:PhotonStateful}, t2::Type{<:QEDParticle}) = interaction_result(t2, t1)
|
||||
@ -142,12 +137,18 @@ end
|
||||
|
||||
Return the type of the inverted direction. E.g.
|
||||
"""
|
||||
propagation_result(::Type{FermionStateful{Incoming}}) = FermionStateful{Outgoing}
|
||||
propagation_result(::Type{FermionStateful{Outgoing}}) = FermionStateful{Incoming}
|
||||
propagation_result(::Type{AntiFermionStateful{Incoming}}) = AntiFermionStateful{Outgoing}
|
||||
propagation_result(::Type{AntiFermionStateful{Outgoing}}) = AntiFermionStateful{Incoming}
|
||||
propagation_result(::Type{PhotonStateful{Incoming}}) = PhotonStateful{Outgoing}
|
||||
propagation_result(::Type{PhotonStateful{Outgoing}}) = PhotonStateful{Incoming}
|
||||
propagation_result(::Type{FermionStateful{Incoming, Spin}}) where {Spin <: AbstractDefiniteSpin} =
|
||||
FermionStateful{Outgoing, Spin}
|
||||
propagation_result(::Type{FermionStateful{Outgoing, Spin}}) where {Spin <: AbstractDefiniteSpin} =
|
||||
FermionStateful{Incoming, Spin}
|
||||
propagation_result(::Type{AntiFermionStateful{Incoming, Spin}}) where {Spin <: AbstractDefiniteSpin} =
|
||||
AntiFermionStateful{Outgoing, Spin}
|
||||
propagation_result(::Type{AntiFermionStateful{Outgoing, Spin}}) where {Spin <: AbstractDefiniteSpin} =
|
||||
AntiFermionStateful{Incoming, Spin}
|
||||
propagation_result(::Type{PhotonStateful{Incoming, Pol}}) where {Pol <: AbstractDefinitePolarization} =
|
||||
PhotonStateful{Outgoing, Pol}
|
||||
propagation_result(::Type{PhotonStateful{Outgoing, Pol}}) where {Pol <: AbstractDefinitePolarization} =
|
||||
PhotonStateful{Incoming, Pol}
|
||||
|
||||
"""
|
||||
types(::QEDModel)
|
||||
@ -156,12 +157,12 @@ Return a Vector of the possible types of particle in the [`QEDModel`](@ref).
|
||||
"""
|
||||
function types(::QEDModel)
|
||||
return [
|
||||
PhotonStateful{Incoming},
|
||||
PhotonStateful{Outgoing},
|
||||
FermionStateful{Incoming},
|
||||
FermionStateful{Outgoing},
|
||||
AntiFermionStateful{Incoming},
|
||||
AntiFermionStateful{Outgoing},
|
||||
PhotonStateful{Incoming, PolX},
|
||||
PhotonStateful{Outgoing, PolX},
|
||||
FermionStateful{Incoming, SpinUp},
|
||||
FermionStateful{Outgoing, SpinUp},
|
||||
AntiFermionStateful{Incoming, SpinUp},
|
||||
AntiFermionStateful{Outgoing, SpinUp},
|
||||
]
|
||||
end
|
||||
|
||||
@ -169,6 +170,12 @@ end
|
||||
String(::Type{Incoming}) = "Incoming"
|
||||
String(::Type{Outgoing}) = "Outgoing"
|
||||
|
||||
String(::Type{PolX}) = "polx"
|
||||
String(::Type{PolY}) = "poly"
|
||||
|
||||
String(::Type{SpinUp}) = "spinup"
|
||||
String(::Type{SpinDown}) = "spindown"
|
||||
|
||||
String(::Incoming) = "i"
|
||||
String(::Outgoing) = "o"
|
||||
|
||||
@ -182,6 +189,16 @@ function String(::Type{<:AntiFermionStateful})
|
||||
return "p"
|
||||
end
|
||||
|
||||
function unique_name(::Type{PhotonStateful{Dir, Pol}}) where {Dir, Pol}
|
||||
return String(PhotonStateful) * String(Dir) * String(Pol)
|
||||
end
|
||||
function unique_name(::Type{FermionStateful{Dir, Spin}}) where {Dir, Spin}
|
||||
return String(FermionStateful) * String(Dir) * String(Spin)
|
||||
end
|
||||
function unique_name(::Type{AntiFermionStateful{Dir, Spin}}) where {Dir, Spin}
|
||||
return String(AntiFermionStateful) * String(Dir) * String(Spin)
|
||||
end
|
||||
|
||||
@inline particle(::PhotonStateful) = Photon()
|
||||
@inline particle(::FermionStateful) = Electron()
|
||||
@inline particle(::AntiFermionStateful) = Positron()
|
||||
@ -190,17 +207,23 @@ end
|
||||
@inline momentum(p::FermionStateful)::SFourMomentum = p.momentum
|
||||
@inline momentum(p::AntiFermionStateful)::SFourMomentum = p.momentum
|
||||
|
||||
@inline spin_or_pol(p::PhotonStateful)::AbstractPolarization = p.polarization
|
||||
@inline spin_or_pol(p::FermionStateful)::AbstractSpin = p.spin
|
||||
@inline spin_or_pol(p::AntiFermionStateful)::AbstractSpin = p.spin
|
||||
@inline spin_or_pol(p::PhotonStateful{Dir, Pol}) where {Dir, Pol <: AbstractDefinitePolarization} = Pol()
|
||||
@inline spin_or_pol(p::FermionStateful{Dir, Spin}) where {Dir, Spin <: AbstractDefiniteSpin} = Spin()
|
||||
@inline spin_or_pol(p::AntiFermionStateful{Dir, Spin}) where {Dir, Spin <: AbstractDefiniteSpin} = Spin()
|
||||
|
||||
@inline direction(::PhotonStateful{Dir}) where {Dir <: ParticleDirection} = Dir()
|
||||
@inline direction(::FermionStateful{Dir}) where {Dir <: ParticleDirection} = Dir()
|
||||
@inline direction(::AntiFermionStateful{Dir}) where {Dir <: ParticleDirection} = Dir()
|
||||
@inline direction(
|
||||
::Type{P},
|
||||
) where {P <: Union{FermionStateful{Incoming}, AntiFermionStateful{Incoming}, PhotonStateful{Incoming}}} = Incoming()
|
||||
@inline direction(
|
||||
::Type{P},
|
||||
) where {P <: Union{FermionStateful{Outgoing}, AntiFermionStateful{Outgoing}, PhotonStateful{Outgoing}}} = Outgoing()
|
||||
|
||||
@inline direction(::Type{PhotonStateful{Dir}}) where {Dir <: ParticleDirection} = Dir()
|
||||
@inline direction(::Type{FermionStateful{Dir}}) where {Dir <: ParticleDirection} = Dir()
|
||||
@inline direction(::Type{AntiFermionStateful{Dir}}) where {Dir <: ParticleDirection} = Dir()
|
||||
@inline direction(
|
||||
::P,
|
||||
) where {P <: Union{FermionStateful{Incoming}, AntiFermionStateful{Incoming}, PhotonStateful{Incoming}}} = Incoming()
|
||||
@inline direction(
|
||||
::P,
|
||||
) where {P <: Union{FermionStateful{Outgoing}, AntiFermionStateful{Outgoing}, PhotonStateful{Outgoing}}} = Outgoing()
|
||||
|
||||
@inline isincoming(::QEDParticle{Incoming}) = true
|
||||
@inline isincoming(::QEDParticle{Outgoing}) = false
|
||||
@ -216,12 +239,12 @@ end
|
||||
@inline mass(::Type{<:AntiFermionStateful}) = 1.0
|
||||
@inline mass(::Type{<:PhotonStateful}) = 0.0
|
||||
|
||||
@inline invert_momentum(p::FermionStateful{Dir}) where {Dir <: ParticleDirection} =
|
||||
FermionStateful{Dir}(-p.momentum, p.spin)
|
||||
@inline invert_momentum(p::AntiFermionStateful{Dir}) where {Dir <: ParticleDirection} =
|
||||
AntiFermionStateful{Dir}(-p.momentum, p.spin)
|
||||
@inline invert_momentum(k::PhotonStateful{Dir}) where {Dir <: ParticleDirection} =
|
||||
PhotonStateful{Dir}(-k.momentum, k.polarization)
|
||||
@inline invert_momentum(p::FermionStateful{Dir, Spin}) where {Dir, Spin} =
|
||||
FermionStateful{Dir, Spin}(-p.momentum, p.spin)
|
||||
@inline invert_momentum(p::AntiFermionStateful{Dir, Spin}) where {Dir, Spin} =
|
||||
AntiFermionStateful{Dir, Spin}(-p.momentum, p.spin)
|
||||
@inline invert_momentum(k::PhotonStateful{Dir, Spin}) where {Dir, Spin} =
|
||||
PhotonStateful{Dir, Spin}(-k.momentum, k.polarization)
|
||||
|
||||
|
||||
"""
|
||||
@ -240,10 +263,10 @@ function caninteract(T1::Type{<:QEDParticle}, T2::Type{<:QEDParticle})
|
||||
end
|
||||
|
||||
for (P1, P2) in [(T1, T2), (T2, T1)]
|
||||
if (P1 == FermionStateful{Incoming} && P2 == AntiFermionStateful{Outgoing})
|
||||
if (P1 <: FermionStateful{Incoming} && P2 <: AntiFermionStateful{Outgoing})
|
||||
return false
|
||||
end
|
||||
if (P1 == FermionStateful{Outgoing} && P2 == AntiFermionStateful{Incoming})
|
||||
if (P1 <: FermionStateful{Outgoing} && P2 <: AntiFermionStateful{Incoming})
|
||||
return false
|
||||
end
|
||||
end
|
||||
@ -253,17 +276,17 @@ end
|
||||
|
||||
function type_index_from_name(::QEDModel, name::String)
|
||||
if startswith(name, "ki")
|
||||
return (PhotonStateful{Incoming}, parse(Int, name[3:end]))
|
||||
return (PhotonStateful{Incoming, PolX}, parse(Int, name[3:end]))
|
||||
elseif startswith(name, "ko")
|
||||
return (PhotonStateful{Outgoing}, parse(Int, name[3:end]))
|
||||
return (PhotonStateful{Outgoing, PolX}, parse(Int, name[3:end]))
|
||||
elseif startswith(name, "ei")
|
||||
return (FermionStateful{Incoming}, parse(Int, name[3:end]))
|
||||
return (FermionStateful{Incoming, SpinUp}, parse(Int, name[3:end]))
|
||||
elseif startswith(name, "eo")
|
||||
return (FermionStateful{Outgoing}, parse(Int, name[3:end]))
|
||||
return (FermionStateful{Outgoing, SpinUp}, parse(Int, name[3:end]))
|
||||
elseif startswith(name, "pi")
|
||||
return (AntiFermionStateful{Incoming}, parse(Int, name[3:end]))
|
||||
return (AntiFermionStateful{Incoming, SpinUp}, parse(Int, name[3:end]))
|
||||
elseif startswith(name, "po")
|
||||
return (AntiFermionStateful{Outgoing}, parse(Int, name[3:end]))
|
||||
return (AntiFermionStateful{Outgoing, SpinUp}, parse(Int, name[3:end]))
|
||||
else
|
||||
throw("Invalid name for a particle in the QED model")
|
||||
end
|
||||
@ -291,8 +314,7 @@ Return the factor of a vertex in a QED feynman diagram.
|
||||
end
|
||||
|
||||
@inline function QED_inner_edge(p::QEDParticle)
|
||||
pos_mom = p.momentum
|
||||
return propagator(particle(p), pos_mom)
|
||||
return propagator(particle(p), p.momentum)
|
||||
end
|
||||
|
||||
"""
|
||||
@ -300,24 +322,49 @@ end
|
||||
|
||||
Calculate and return a new particle from two given interacting ones at a vertex.
|
||||
"""
|
||||
function QED_conserve_momentum(p1::QEDParticle, p2::QEDParticle)
|
||||
#println("Conserving momentum of \n$(direction(p1)) $(p1)\n and \n$(direction(p2)) $(p2)")
|
||||
T3 = interaction_result(typeof(p1), typeof(p2))
|
||||
# TODO: probably also need to do something about the spin/pol
|
||||
function QED_conserve_momentum(
|
||||
p1::P1,
|
||||
p2::P2,
|
||||
) where {
|
||||
Dir1 <: ParticleDirection,
|
||||
Dir2 <: ParticleDirection,
|
||||
SpinPol1 <: AbstractSpinOrPolarization,
|
||||
SpinPol2 <: AbstractSpinOrPolarization,
|
||||
P1 <: Union{FermionStateful{Dir1, SpinPol1}, AntiFermionStateful{Dir1, SpinPol1}, PhotonStateful{Dir1, SpinPol1}},
|
||||
P2 <: Union{FermionStateful{Dir2, SpinPol2}, AntiFermionStateful{Dir2, SpinPol2}, PhotonStateful{Dir2, SpinPol2}},
|
||||
}
|
||||
P3 = interaction_result(P1, P2)
|
||||
p1_mom = p1.momentum
|
||||
if (typeof(direction(p1)) <: Outgoing)
|
||||
if (Dir1 <: Outgoing)
|
||||
p1_mom *= -1
|
||||
end
|
||||
p2_mom = p2.momentum
|
||||
if (typeof(direction(p2)) <: Outgoing)
|
||||
if (Dir2 <: Outgoing)
|
||||
p2_mom *= -1
|
||||
end
|
||||
|
||||
p3_mom = p1_mom + p2_mom
|
||||
if (typeof(direction(T3)) <: Incoming)
|
||||
return T3(-p3_mom)
|
||||
if (typeof(direction(P3)) <: Incoming)
|
||||
return P3(-p3_mom)
|
||||
end
|
||||
return T3(p3_mom)
|
||||
return P3(p3_mom)
|
||||
end
|
||||
|
||||
"""
|
||||
QEDProcessInput <: AbstractProcessInput
|
||||
|
||||
Input for a QED Process. Contains the [`QEDProcessDescription`](@ref) of the process it is an input for, and the values of the in and out particles.
|
||||
|
||||
See also: [`gen_process_input`](@ref)
|
||||
"""
|
||||
struct QEDProcessInput{N1, N2, N3, N4, N5, N6} <: AbstractProcessInput
|
||||
process::QEDProcessDescription
|
||||
inFerms::SVector{N1, FermionStateful{Incoming, SpinUp}}
|
||||
outFerms::SVector{N2, FermionStateful{Outgoing, SpinUp}}
|
||||
inAntiferms::SVector{N3, AntiFermionStateful{Incoming, SpinUp}}
|
||||
outAntiferms::SVector{N4, AntiFermionStateful{Outgoing, SpinUp}}
|
||||
inPhotons::SVector{N5, PhotonStateful{Incoming, PolX}}
|
||||
outPhotons::SVector{N6, PhotonStateful{Outgoing, PolX}}
|
||||
end
|
||||
|
||||
"""
|
||||
@ -328,6 +375,10 @@ Return the model of this process description.
|
||||
model(::QEDProcessDescription) = QEDModel()
|
||||
model(::QEDProcessInput) = QEDModel()
|
||||
|
||||
function copy(process::QEDProcessDescription)
|
||||
return QEDProcessDescription(copy(process.inParticles), copy(process.outParticles))
|
||||
end
|
||||
|
||||
==(p1::QEDProcessDescription, p2::QEDProcessDescription) =
|
||||
p1.inParticles == p2.inParticles && p1.outParticles == p2.outParticles
|
||||
|
||||
@ -335,14 +386,23 @@ function in_particles(process::QEDProcessDescription)
|
||||
return process.inParticles
|
||||
end
|
||||
|
||||
function in_particles(input::QEDProcessInput)
|
||||
return input.inParticles
|
||||
end
|
||||
|
||||
function out_particles(process::QEDProcessDescription)
|
||||
return process.outParticles
|
||||
end
|
||||
|
||||
function out_particles(input::QEDProcessInput)
|
||||
return input.outParticles
|
||||
function get_particle(input::QEDProcessInput, t::Type{Particle}, n::Int)::Particle where {Particle}
|
||||
if (t <: FermionStateful{Incoming})
|
||||
return input.inFerms[n]
|
||||
elseif (t <: FermionStateful{Outgoing})
|
||||
return input.outFerms[n]
|
||||
elseif (t <: AntiFermionStateful{Incoming})
|
||||
return input.inAntiferms[n]
|
||||
elseif (t <: AntiFermionStateful{Outgoing})
|
||||
return input.outAntiferms[n]
|
||||
elseif (t <: PhotonStateful{Incoming})
|
||||
return input.inPhotons[n]
|
||||
elseif (t <: PhotonStateful{Outgoing})
|
||||
return input.outPhotons[n]
|
||||
end
|
||||
@assert false "Invalid type given"
|
||||
end
|
||||
|
@ -32,20 +32,63 @@ function show(io::IO, process::QEDProcessDescription)
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
String(process::QEDProcessDescription)
|
||||
|
||||
Create a short string suitable as a filename or similar, describing the given process.
|
||||
|
||||
```jldoctest
|
||||
julia> using MetagraphOptimization
|
||||
|
||||
julia> String(parse_process("ke->ke", QEDModel()))
|
||||
qed_ke-ke
|
||||
|
||||
julia> print(parse_process("kk->ep", QEDModel()))
|
||||
qed_kk-ep
|
||||
```
|
||||
"""
|
||||
function String(process::QEDProcessDescription)
|
||||
# types() gives the types in order (QED) instead of random like keys() would
|
||||
str = "qed_"
|
||||
for type in types(QEDModel())
|
||||
for _ in 1:get(process.inParticles, type, 0)
|
||||
str = str * String(type)
|
||||
end
|
||||
end
|
||||
str = str * "-"
|
||||
for type in types(QEDModel())
|
||||
for _ in 1:get(process.outParticles, type, 0)
|
||||
str = str * String(type)
|
||||
end
|
||||
end
|
||||
return str
|
||||
end
|
||||
|
||||
"""
|
||||
show(io::IO, processInput::QEDProcessInput)
|
||||
|
||||
Pretty print an [`QEDProcessInput`](@ref) (with newlines).
|
||||
Pretty print a [`QEDProcessInput`](@ref) (with newlines).
|
||||
"""
|
||||
function show(io::IO, processInput::QEDProcessInput)
|
||||
println(io, "Input for $(processInput.process):")
|
||||
println(io, " $(length(processInput.inParticles)) Incoming particles:")
|
||||
for particle in processInput.inParticles
|
||||
println(io, " $particle")
|
||||
if !isempty(processInput.inFerms)
|
||||
println(io, " $(processInput.inFerms)")
|
||||
end
|
||||
println(io, " $(length(processInput.outParticles)) Outgoing Particles:")
|
||||
for particle in processInput.outParticles
|
||||
println(io, " $particle")
|
||||
if !isempty(processInput.outFerms)
|
||||
println(io, " $(processInput.outFerms)")
|
||||
end
|
||||
if !isempty(processInput.inAntiferms)
|
||||
println(io, " $(processInput.inAntiferms)")
|
||||
end
|
||||
if !isempty(processInput.outAntiferms)
|
||||
println(io, " $(processInput.outAntiferms)")
|
||||
end
|
||||
if !isempty(processInput.inPhotons)
|
||||
println(io, " $(processInput.inPhotons)")
|
||||
end
|
||||
if !isempty(processInput.outPhotons)
|
||||
println(io, " $(processInput.outPhotons)")
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
@ -53,7 +96,7 @@ end
|
||||
"""
|
||||
show(io::IO, particle::T) where {T <: QEDParticle}
|
||||
|
||||
Pretty print an [`QEDParticle`](@ref) (no newlines).
|
||||
Pretty print a [`QEDParticle`](@ref) (no newlines).
|
||||
"""
|
||||
function show(io::IO, particle::T) where {T <: QEDParticle}
|
||||
print(io, "$(String(typeof(particle))): $(particle.momentum)")
|
||||
|
@ -45,48 +45,6 @@ this doesn't matter.
|
||||
"""
|
||||
compute_effort(t::ComputeTaskQED_Sum)::Float64 = 1.0
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_S1)
|
||||
|
||||
Print the S1 task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_S1) = print(io, "ComputeS1")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_S2)
|
||||
|
||||
Print the S2 task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_S2) = print(io, "ComputeS2")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_P)
|
||||
|
||||
Print the P task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_P) = print(io, "ComputeP")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_U)
|
||||
|
||||
Print the U task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_U) = print(io, "ComputeU")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_V)
|
||||
|
||||
Print the V task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_V) = print(io, "ComputeV")
|
||||
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_Sum)
|
||||
|
||||
Print the sum task to io.
|
||||
"""
|
||||
show(io::IO, t::ComputeTaskQED_Sum) = print(io, "ComputeSum")
|
||||
|
||||
"""
|
||||
children(::ComputeTaskQED_S1)
|
||||
|
||||
|
@ -3,7 +3,7 @@ using UUIDs
|
||||
using Base.Threads
|
||||
|
||||
# TODO: reliably find out how many threads we're running with (nthreads() returns 1 when precompiling :/)
|
||||
rng = [Random.MersenneTwister(0) for _ in 1:32]
|
||||
rng = [Random.MersenneTwister(0) for _ in 1:64]
|
||||
|
||||
"""
|
||||
Node
|
||||
|
@ -71,3 +71,7 @@ function optimize_to_fixpoint!(optimizer::GreedyOptimizer, graph::DAG)
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function String(optimizer::GreedyOptimizer)
|
||||
return "greedy_optimizer_$(optimizer.estimator)"
|
||||
end
|
||||
|
@ -47,3 +47,7 @@ function optimize_step!(optimizer::RandomWalkOptimizer, graph::DAG)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function String(::RandomWalkOptimizer)
|
||||
return "random_walker"
|
||||
end
|
||||
|
@ -28,3 +28,7 @@ function optimize_to_fixpoint!(optimizer::ReductionOptimizer, graph::DAG)
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function String(::ReductionOptimizer)
|
||||
return "reduction_optimizer"
|
||||
end
|
||||
|
@ -14,7 +14,7 @@ function schedule_dag(::GreedyScheduler, graph::DAG, machine::Machine)
|
||||
enqueue!(nodeQueue, node => 0)
|
||||
end
|
||||
|
||||
schedule = Vector{Node}()
|
||||
schedule = Vector{FunctionCall}()
|
||||
sizehint!(schedule, length(graph.nodes))
|
||||
|
||||
# keep an accumulated cost of things scheduled to this device so far
|
||||
@ -35,7 +35,12 @@ function schedule_dag(::GreedyScheduler, graph::DAG, machine::Machine)
|
||||
deviceAccCost[lowestDevice] = compute_effort(task(node))
|
||||
end
|
||||
|
||||
push!(schedule, node)
|
||||
if (node isa DataTaskNode && length(node.children) == 0)
|
||||
push!(schedule, get_init_function_call(node, entry_device(machine)))
|
||||
else
|
||||
push!(schedule, get_function_call(node)...)
|
||||
end
|
||||
|
||||
for parent in parents(node)
|
||||
# reduce the priority of all parents by one
|
||||
if (!haskey(nodeQueue, parent))
|
||||
|
@ -14,5 +14,7 @@ Interface functions that must be implemented for implementations of [`Scheduler`
|
||||
The function assigns each [`ComputeTaskNode`](@ref) of the [`DAG`](@ref) to one of the devices in the given [`Machine`](@ref) and returns a `Vector{Node}` representing a topological ordering.
|
||||
|
||||
[`DataTaskNode`](@ref)s are not scheduled to devices since they do not compute. Instead, a data node transfers data from the [`AbstractDevice`](@ref) of their child to all [`AbstractDevice`](@ref)s of its parents.
|
||||
|
||||
Return a `Vector{FunctionCall}`. See [`FunctionCall`](@ref)
|
||||
"""
|
||||
function schedule_dag end
|
||||
|
14
src/scheduler/type.jl
Normal file
14
src/scheduler/type.jl
Normal file
@ -0,0 +1,14 @@
|
||||
using StaticArrays
|
||||
|
||||
"""
|
||||
FunctionCall{N}
|
||||
|
||||
Type representing a function call with `N` parameters. Contains the function to call, argument symbols, the return symbol and the device to execute on.
|
||||
"""
|
||||
struct FunctionCall{VectorType <: AbstractVector, M}
|
||||
func::Function
|
||||
arguments::VectorType
|
||||
additional_arguments::SVector{M, Any} # additional arguments (as values) for the function call, will be prepended to the other arguments
|
||||
return_symbol::Symbol
|
||||
device::AbstractDevice
|
||||
end
|
@ -1,89 +1,85 @@
|
||||
using StaticArrays
|
||||
|
||||
"""
|
||||
compute(t::FusedComputeTask, data)
|
||||
|
||||
Compute a [`FusedComputeTask`](@ref). This simply asserts false and should not be called. Fused Compute Tasks generate their expressions directly through the other tasks instead.
|
||||
"""
|
||||
function compute(t::FusedComputeTask, data)
|
||||
@assert false "This is not implemented and should never be called"
|
||||
function compute(t::FusedComputeTask, data...)
|
||||
inter = compute(t.first_task)
|
||||
return compute(t.second_task, inter, data2...)
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector{String}, outExpr::String)
|
||||
get_function_call(n::Node)
|
||||
get_function_call(t::AbstractTask, device::AbstractDevice, inSymbols::AbstractVector, outSymbol::Symbol)
|
||||
|
||||
Generate code evaluating a [`FusedComputeTask`](@ref) on `inExprs`, providing the output on `outExpr`.
|
||||
`inExprs` should be of the correct types and may be heterogeneous. `outExpr` will be of the type of the output of `T2` of t.
|
||||
For a node or a task together with necessary information, return a vector of [`FunctionCall`](@ref)s for the computation of the node or task.
|
||||
|
||||
For ordinary compute or data tasks the vector will contain exactly one element, for a [`FusedComputeTask`](@ref) there can be any number of tasks greater 1.
|
||||
"""
|
||||
function get_expression(t::FusedComputeTask, device::AbstractDevice, inExprs::Vector, outExpr)
|
||||
inExprs1 = Vector()
|
||||
for sym in t.t1_inputs
|
||||
push!(inExprs1, gen_access_expr(device, sym))
|
||||
end
|
||||
|
||||
outExpr1 = gen_access_expr(device, t.t1_output)
|
||||
|
||||
inExprs2 = Vector()
|
||||
for sym in t.t2_inputs
|
||||
push!(inExprs2, gen_access_expr(device, sym))
|
||||
end
|
||||
|
||||
expr1 = get_expression(t.first_task, device, inExprs1, outExpr1)
|
||||
expr2 = get_expression(t.second_task, device, [inExprs2..., outExpr1], outExpr)
|
||||
|
||||
full_expr = Expr(:block, expr1, expr2)
|
||||
|
||||
return full_expr
|
||||
function get_function_call(t::FusedComputeTask, device::AbstractDevice, inSymbols::AbstractVector, outSymbol::Symbol)
|
||||
# sort out the symbols to the correct tasks
|
||||
return [
|
||||
get_function_call(t.first_task, device, t.t1_inputs, t.t1_output)...,
|
||||
get_function_call(t.second_task, device, [t.t2_inputs..., t.t1_output], outSymbol)...,
|
||||
]
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(node::ComputeTaskNode)
|
||||
function get_function_call(
|
||||
t::CompTask,
|
||||
device::AbstractDevice,
|
||||
inSymbols::AbstractVector,
|
||||
outSymbol::Symbol,
|
||||
) where {CompTask <: AbstractComputeTask}
|
||||
return [FunctionCall(compute, inSymbols, SVector{1, Any}(t), outSymbol, device)]
|
||||
end
|
||||
|
||||
Generate and return code for a given [`ComputeTaskNode`](@ref).
|
||||
"""
|
||||
function get_expression(node::ComputeTaskNode)
|
||||
function get_function_call(node::ComputeTaskNode)
|
||||
@assert length(children(node)) <= children(task(node)) "Node $(node) has too many children for its task: node has $(length(node.children)) versus task has $(children(task(node)))\nNode's children: $(getfield.(node.children, :children))"
|
||||
@assert !ismissing(node.device) "Trying to get expression for an unscheduled ComputeTaskNode\nNode: $(node)"
|
||||
|
||||
inExprs = Vector()
|
||||
for id in getfield.(children(node), :id)
|
||||
push!(inExprs, gen_access_expr(node.device, Symbol(to_var_name(id))))
|
||||
if (length(node.children) <= 50)
|
||||
#only use an SVector when there are few children
|
||||
return get_function_call(
|
||||
node.task,
|
||||
node.device,
|
||||
SVector{length(node.children), Symbol}(Symbol.(to_var_name.(getfield.(children(node), :id)))...),
|
||||
Symbol(to_var_name(node.id)),
|
||||
)
|
||||
else
|
||||
return get_function_call(
|
||||
node.task,
|
||||
node.device,
|
||||
Symbol.(to_var_name.(getfield.(children(node), :id))),
|
||||
Symbol(to_var_name(node.id)),
|
||||
)
|
||||
end
|
||||
outExpr = gen_access_expr(node.device, Symbol(to_var_name(node.id)))
|
||||
|
||||
return get_expression(task(node), node.device, inExprs, outExpr)
|
||||
end
|
||||
|
||||
"""
|
||||
get_expression(node::DataTaskNode)
|
||||
|
||||
Generate and return code for a given [`DataTaskNode`](@ref).
|
||||
"""
|
||||
function get_expression(node::DataTaskNode)
|
||||
function get_function_call(node::DataTaskNode)
|
||||
@assert length(children(node)) == 1 "Trying to call get_expression on a data task node that has $(length(node.children)) children instead of 1"
|
||||
|
||||
# TODO: dispatch to device implementations generating the copy commands
|
||||
|
||||
child = children(node)[1]
|
||||
inExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(child.id))))
|
||||
outExpr = eval(gen_access_expr(child.device, Symbol(to_var_name(node.id))))
|
||||
dataTransportExp = Meta.parse("$outExpr = $inExpr")
|
||||
|
||||
return dataTransportExp
|
||||
return [
|
||||
FunctionCall(
|
||||
unpack_identity,
|
||||
SVector{1, Symbol}(Symbol(to_var_name(first(children(node)).id))),
|
||||
SVector{0, Any}(),
|
||||
Symbol(to_var_name(node.id)),
|
||||
first(children(node)).device,
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
"""
|
||||
get_init_expression(node::DataTaskNode, device::AbstractDevice)
|
||||
|
||||
Generate and return code for the initial input reading expression for [`DataTaskNode`](@ref)s with 0 children, i.e., entry nodes.
|
||||
|
||||
See also: [`get_entry_nodes`](@ref)
|
||||
"""
|
||||
function get_init_expression(node::DataTaskNode, device::AbstractDevice)
|
||||
function get_init_function_call(node::DataTaskNode, device::AbstractDevice)
|
||||
@assert isempty(children(node)) "Trying to call get_init_expression on a data task node that is not an entry node."
|
||||
|
||||
inExpr = eval(gen_access_expr(device, Symbol("$(to_var_name(node.id))_in")))
|
||||
outExpr = eval(gen_access_expr(device, Symbol(to_var_name(node.id))))
|
||||
dataTransportExp = Meta.parse("$outExpr = $inExpr")
|
||||
|
||||
return dataTransportExp
|
||||
return FunctionCall(
|
||||
unpack_identity,
|
||||
SVector{1, Symbol}(Symbol("$(to_var_name(node.id))_in")),
|
||||
SVector{0, Any}(),
|
||||
Symbol(to_var_name(node.id)),
|
||||
device,
|
||||
)
|
||||
end
|
||||
|
@ -1,17 +0,0 @@
|
||||
"""
|
||||
show(io::IO, t::FusedComputeTask)
|
||||
|
||||
Print a string representation of the fused compute task to io.
|
||||
"""
|
||||
function show(io::IO, t::FusedComputeTask)
|
||||
return print(io, "ComputeFuse($(t.first_task), $(t.second_task))")
|
||||
end
|
||||
|
||||
"""
|
||||
show(io::IO, t::DataTask)
|
||||
|
||||
Print the data task to io.
|
||||
"""
|
||||
function show(io::IO, t::DataTask)
|
||||
return print(io, "Data", t.data)
|
||||
end
|
@ -3,28 +3,10 @@
|
||||
|
||||
Fallback implementation of the compute function of a compute task, throwing an error.
|
||||
"""
|
||||
function compute(t::AbstractTask; data...)
|
||||
function compute(t::AbstractTask, data...)
|
||||
return error("Need to implement compute()")
|
||||
end
|
||||
|
||||
"""
|
||||
compute(t::FusedComputeTask; data...)
|
||||
|
||||
Compute a fused compute task.
|
||||
"""
|
||||
function compute(t::FusedComputeTask; data...)
|
||||
(T1, T2) = collect(typeof(t).parameters)
|
||||
|
||||
return compute(T2(), compute(T1(), data))
|
||||
end
|
||||
|
||||
"""
|
||||
compute(t::AbstractDataTask; data...)
|
||||
|
||||
The compute function of a data task, always the identity function, regardless of the specific task.
|
||||
"""
|
||||
compute(t::AbstractDataTask; data...) = data
|
||||
|
||||
"""
|
||||
compute_effort(t::AbstractTask)
|
||||
|
||||
|
@ -1,3 +1,18 @@
|
||||
"""
|
||||
noop()
|
||||
|
||||
Function with no arguments, returns nothing, does nothing. Useful for noop [`FunctionCall`](@ref)s.
|
||||
"""
|
||||
@inline noop() = nothing
|
||||
|
||||
"""
|
||||
unpack_identity(x::SVector)
|
||||
|
||||
Function taking an `SVector`, returning it unpacked.
|
||||
"""
|
||||
@inline unpack_identity(x::SVector{1, <:Any}) = x[1]
|
||||
@inline unpack_identity(x) = x
|
||||
|
||||
"""
|
||||
bytes_to_human_readable(bytes)
|
||||
|
||||
@ -104,6 +119,10 @@ function unroll_symbol_vector(vec::Vector)
|
||||
return result
|
||||
end
|
||||
|
||||
function unroll_symbol_vector(vec::SVector)
|
||||
return unroll_symbol_vector(Vector(vec))
|
||||
end
|
||||
|
||||
|
||||
|
||||
####################
|
||||
|
@ -4,5 +4,7 @@ QEDbase = "10e22c08-3ccb-4172-bfcf-7d7aa3d04d93"
|
||||
QEDprocesses = "46de9c38-1bb3-4547-a1ec-da24d767fdad"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
|
||||
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
|
||||
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
@ -18,12 +18,12 @@ end
|
||||
@safetestset "ABC-Model Unit Tests " begin
|
||||
include("unit_tests_abcmodel.jl")
|
||||
end
|
||||
@safetestset "QED Feynman Diagram Generation Tests" begin
|
||||
include("unit_tests_qed_diagrams.jl")
|
||||
end
|
||||
@safetestset "QED-Model Unit Tests " begin
|
||||
include("unit_tests_qedmodel.jl")
|
||||
end
|
||||
@safetestset "QED Feynman Diagram Generation Tests" begin
|
||||
include("unit_tests_qed_diagrams.jl")
|
||||
end
|
||||
@safetestset "Node Reduction Unit Tests " begin
|
||||
include("node_reduction.jl")
|
||||
end
|
||||
|
@ -2,6 +2,8 @@ using MetagraphOptimization
|
||||
using QEDbase
|
||||
using AccurateArithmetic
|
||||
using Random
|
||||
using UUIDs
|
||||
using StaticArrays
|
||||
|
||||
import MetagraphOptimization.ABCParticle
|
||||
import MetagraphOptimization.interaction_result
|
||||
@ -27,11 +29,11 @@ function ground_truth_graph_result(input::ABCProcessInput)
|
||||
constant = (1 / 137.0)^2
|
||||
|
||||
# calculate particle C in diagram 1
|
||||
diagram1_C = ParticleC(input.inParticles[1].momentum + input.inParticles[2].momentum)
|
||||
diagram2_C = ParticleC(input.inParticles[1].momentum + input.outParticles[2].momentum)
|
||||
diagram1_C = ParticleC(input.inA[1].momentum + input.inB[1].momentum)
|
||||
diagram2_C = ParticleC(input.inA[1].momentum + input.outB[1].momentum)
|
||||
|
||||
diagram1_Cp = ParticleC(input.outParticles[1].momentum + input.outParticles[2].momentum)
|
||||
diagram2_Cp = ParticleC(input.outParticles[1].momentum + input.inParticles[2].momentum)
|
||||
diagram1_Cp = ParticleC(input.outA[1].momentum + input.outB[1].momentum)
|
||||
diagram2_Cp = ParticleC(input.outA[1].momentum + input.inB[1].momentum)
|
||||
|
||||
check_particle_reverse_moment(diagram1_Cp.momentum, diagram1_C.momentum)
|
||||
check_particle_reverse_moment(diagram2_Cp.momentum, diagram2_C.momentum)
|
||||
@ -47,7 +49,18 @@ function ground_truth_graph_result(input::ABCProcessInput)
|
||||
return sum_kbn([diagram1_result, diagram2_result])
|
||||
end
|
||||
|
||||
machine = get_machine_info()
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
|
||||
process_2_2 = ABCProcessDescription(
|
||||
Dict{Type, Int64}(ParticleA => 1, ParticleB => 1),
|
||||
@ -56,14 +69,12 @@ process_2_2 = ABCProcessDescription(
|
||||
|
||||
particles_2_2 = ABCProcessInput(
|
||||
process_2_2,
|
||||
ABCParticle[
|
||||
ParticleA(SFourMomentum(0.823648, 0.0, 0.0, 0.823648)),
|
||||
ParticleB(SFourMomentum(0.823648, 0.0, 0.0, -0.823648)),
|
||||
],
|
||||
ABCParticle[
|
||||
ParticleA(SFourMomentum(0.823648, -0.835061, -0.474802, 0.277915)),
|
||||
ParticleB(SFourMomentum(0.823648, 0.835061, 0.474802, -0.277915)),
|
||||
],
|
||||
SVector{1}(ParticleA(SFourMomentum(0.823648, 0.0, 0.0, 0.823648))),
|
||||
SVector{1}(ParticleB(SFourMomentum(0.823648, 0.0, 0.0, -0.823648))),
|
||||
SVector{0, ParticleC}(),
|
||||
SVector{1}(ParticleA(SFourMomentum(0.823648, -0.835061, -0.474802, 0.277915))),
|
||||
SVector{1}(ParticleB(SFourMomentum(0.823648, 0.835061, 0.474802, -0.277915))),
|
||||
SVector{0, ParticleC}(),
|
||||
)
|
||||
expected_result = ground_truth_graph_result(particles_2_2)
|
||||
|
||||
|
@ -3,6 +3,7 @@ using QEDbase
|
||||
using QEDprocesses
|
||||
using StatsBase # for countmap
|
||||
using Random
|
||||
using UUIDs
|
||||
|
||||
import MetagraphOptimization.caninteract
|
||||
import MetagraphOptimization.issame
|
||||
@ -17,32 +18,32 @@ def_momentum = SFourMomentum(1.0, 0.0, 0.0, 0.0)
|
||||
RNG = Random.default_rng()
|
||||
|
||||
testparticleTypes = [
|
||||
PhotonStateful{Incoming},
|
||||
PhotonStateful{Outgoing},
|
||||
FermionStateful{Incoming},
|
||||
FermionStateful{Outgoing},
|
||||
AntiFermionStateful{Incoming},
|
||||
AntiFermionStateful{Outgoing},
|
||||
PhotonStateful{Incoming, PolX},
|
||||
PhotonStateful{Outgoing, PolX},
|
||||
FermionStateful{Incoming, SpinUp},
|
||||
FermionStateful{Outgoing, SpinUp},
|
||||
AntiFermionStateful{Incoming, SpinUp},
|
||||
AntiFermionStateful{Outgoing, SpinUp},
|
||||
]
|
||||
|
||||
testparticleTypesPropagated = [
|
||||
PhotonStateful{Outgoing},
|
||||
PhotonStateful{Incoming},
|
||||
FermionStateful{Outgoing},
|
||||
FermionStateful{Incoming},
|
||||
AntiFermionStateful{Outgoing},
|
||||
AntiFermionStateful{Incoming},
|
||||
PhotonStateful{Outgoing, PolX},
|
||||
PhotonStateful{Incoming, PolX},
|
||||
FermionStateful{Outgoing, SpinUp},
|
||||
FermionStateful{Incoming, SpinUp},
|
||||
AntiFermionStateful{Outgoing, SpinUp},
|
||||
AntiFermionStateful{Incoming, SpinUp},
|
||||
]
|
||||
|
||||
function compton_groundtruth(input::QEDProcessInput)
|
||||
# p1k1 -> p2k2
|
||||
# formula: −(ie)^2 (u(p2) slashed(ε1) S(p2 − k1) slashed(ε2) u(p1) + u(p2) slashed(ε2) S(p1 + k1) slashed(ε1) u(p1))
|
||||
|
||||
p1 = input.inParticles[findfirst(x -> typeof(x) <: FermionStateful, input.inParticles)]
|
||||
p2 = input.outParticles[findfirst(x -> typeof(x) <: FermionStateful, input.outParticles)]
|
||||
p1 = input.inFerms[1]
|
||||
p2 = input.outFerms[1]
|
||||
|
||||
k1 = input.inParticles[findfirst(x -> typeof(x) <: PhotonStateful, input.inParticles)]
|
||||
k2 = input.outParticles[findfirst(x -> typeof(x) <: PhotonStateful, input.outParticles)]
|
||||
k1 = input.inPhotons[1]
|
||||
k2 = input.outPhotons[1]
|
||||
|
||||
u_p1 = base_state(Electron(), Incoming(), p1.momentum, spin_or_pol(p1))
|
||||
u_p2 = base_state(Electron(), Outgoing(), p2.momentum, spin_or_pol(p2))
|
||||
@ -117,36 +118,36 @@ end
|
||||
|
||||
@testset "Known processes" begin
|
||||
compton_process = QEDProcessDescription(
|
||||
Dict{Type, Int}(PhotonStateful{Incoming} => 1, FermionStateful{Incoming} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Outgoing} => 1, FermionStateful{Outgoing} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Incoming, PolX} => 1, FermionStateful{Incoming, SpinUp} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Outgoing, PolX} => 1, FermionStateful{Outgoing, SpinUp} => 1),
|
||||
)
|
||||
|
||||
@test parse_process("ke->ke", QEDModel()) == compton_process
|
||||
|
||||
positron_compton_process = QEDProcessDescription(
|
||||
Dict{Type, Int}(PhotonStateful{Incoming} => 1, AntiFermionStateful{Incoming} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Outgoing} => 1, AntiFermionStateful{Outgoing} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Incoming, PolX} => 1, AntiFermionStateful{Incoming, SpinUp} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Outgoing, PolX} => 1, AntiFermionStateful{Outgoing, SpinUp} => 1),
|
||||
)
|
||||
|
||||
@test parse_process("kp->kp", QEDModel()) == positron_compton_process
|
||||
|
||||
trident_process = QEDProcessDescription(
|
||||
Dict{Type, Int}(PhotonStateful{Incoming} => 1, FermionStateful{Incoming} => 1),
|
||||
Dict{Type, Int}(FermionStateful{Outgoing} => 2, AntiFermionStateful{Outgoing} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Incoming, PolX} => 1, FermionStateful{Incoming, SpinUp} => 1),
|
||||
Dict{Type, Int}(FermionStateful{Outgoing, SpinUp} => 2, AntiFermionStateful{Outgoing, SpinUp} => 1),
|
||||
)
|
||||
|
||||
@test parse_process("ke->eep", QEDModel()) == trident_process
|
||||
|
||||
pair_production_process = QEDProcessDescription(
|
||||
Dict{Type, Int}(PhotonStateful{Incoming} => 2),
|
||||
Dict{Type, Int}(FermionStateful{Outgoing} => 1, AntiFermionStateful{Outgoing} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Incoming, PolX} => 2),
|
||||
Dict{Type, Int}(FermionStateful{Outgoing, SpinUp} => 1, AntiFermionStateful{Outgoing, SpinUp} => 1),
|
||||
)
|
||||
|
||||
@test parse_process("kk->pe", QEDModel()) == pair_production_process
|
||||
|
||||
pair_annihilation_process = QEDProcessDescription(
|
||||
Dict{Type, Int}(FermionStateful{Incoming} => 1, AntiFermionStateful{Incoming} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Outgoing} => 2),
|
||||
Dict{Type, Int}(FermionStateful{Incoming, SpinUp} => 1, AntiFermionStateful{Incoming, SpinUp} => 1),
|
||||
Dict{Type, Int}(PhotonStateful{Outgoing, PolX} => 2),
|
||||
)
|
||||
|
||||
@test parse_process("pe->kk", QEDModel()) == pair_annihilation_process
|
||||
@ -160,12 +161,24 @@ end
|
||||
|
||||
for i in 1:100
|
||||
input = gen_process_input(process)
|
||||
@test countmap(typeof.(input.inParticles)) == process.inParticles
|
||||
@test countmap(typeof.(input.outParticles)) == process.outParticles
|
||||
@test length(input.inFerms) == get(process.inParticles, FermionStateful{Incoming, SpinUp}, 0)
|
||||
@test length(input.inAntiferms) == get(process.inParticles, AntiFermionStateful{Incoming, SpinUp}, 0)
|
||||
@test length(input.inPhotons) == get(process.inParticles, PhotonStateful{Incoming, PolX}, 0)
|
||||
@test length(input.outFerms) == get(process.outParticles, FermionStateful{Outgoing, SpinUp}, 0)
|
||||
@test length(input.outAntiferms) == get(process.outParticles, AntiFermionStateful{Outgoing, SpinUp}, 0)
|
||||
@test length(input.outPhotons) == get(process.outParticles, PhotonStateful{Outgoing, PolX}, 0)
|
||||
|
||||
@test isapprox(
|
||||
sum(getfield.(input.inParticles, :momentum)),
|
||||
sum(getfield.(input.outParticles, :momentum));
|
||||
sum([
|
||||
getfield.(input.inFerms, :momentum)...,
|
||||
getfield.(input.inAntiferms, :momentum)...,
|
||||
getfield.(input.inPhotons, :momentum)...,
|
||||
]),
|
||||
sum([
|
||||
getfield.(input.outFerms, :momentum)...,
|
||||
getfield.(input.outAntiferms, :momentum)...,
|
||||
getfield.(input.outPhotons, :momentum)...,
|
||||
]);
|
||||
atol = sqrt(eps()),
|
||||
)
|
||||
end
|
||||
@ -179,7 +192,18 @@ end
|
||||
|
||||
model = QEDModel()
|
||||
process = parse_process("ke->ke", model)
|
||||
machine = get_machine_info()
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
|
||||
graph = MetagraphOptimization.DAG()
|
||||
|
||||
@ -289,3 +313,37 @@ end
|
||||
compton_function = get_compute_function(graph_generated, process, machine)
|
||||
@test isapprox(compton_function.(input), compton_groundtruth.(input))
|
||||
end
|
||||
|
||||
@testset "Equal results after optimization" for optimizer in
|
||||
[ReductionOptimizer(), RandomWalkOptimizer(MersenneTwister(0))]
|
||||
@testset "Process $proc_str" for proc_str in ["ke->ke", "kp->kp", "kk->ep", "ep->kk", "ke->kke", "ke->kkke"]
|
||||
model = QEDModel()
|
||||
process = parse_process(proc_str, model)
|
||||
machine = Machine(
|
||||
[
|
||||
MetagraphOptimization.NumaNode(
|
||||
0,
|
||||
1,
|
||||
MetagraphOptimization.default_strategy(MetagraphOptimization.NumaNode),
|
||||
-1.0,
|
||||
UUIDs.uuid1(),
|
||||
),
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
graph = gen_graph(process)
|
||||
|
||||
compute_function = get_compute_function(graph, process, machine)
|
||||
|
||||
if (typeof(optimizer) <: RandomWalkOptimizer)
|
||||
optimize!(optimizer, graph, 100)
|
||||
elseif (typeof(optimizer) <: ReductionOptimizer)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
end
|
||||
reduced_compute_function = get_compute_function(graph, process, machine)
|
||||
|
||||
input = [gen_process_input(process) for _ in 1:100]
|
||||
|
||||
@test isapprox(compute_function.(input), reduced_compute_function.(input))
|
||||
end
|
||||
end
|
||||
|
Reference in New Issue
Block a user