experiments #1

Merged
rubydragon merged 39 commits from experiments into main 2024-05-08 12:03:28 +02:00
6 changed files with 415 additions and 51 deletions
Showing only changes of commit 7098d1801a - Show all commits

2
.gitignore vendored
View File

@ -30,3 +30,5 @@ Manifest.toml
.julia
**/.ipynb_checkpoints/
*.bkp
*.sif

View File

@ -0,0 +1,97 @@
process_name,graph_gen_time,optimization_time,function_generation_time,graph_nodes,graph_edges,graph_mem,cpu_threads,n_inputs,nflops_likwid,cpu_time,cpu_rate,cpu_gflops,gpu_name,gpu_time,gpu_rate,gpu_gflops
QED Process: 'ke->ke' no optimization,0.000221518,0.0,0.076346293,26,29,6532.0,1,100000,5683,0.086186253,1.1602778461664878e6,6.59385899976415,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000221518,0.000117106,0.003614977,26,29,6948.0,1,100000,5683,0.086626002,1.1543878014825156e6,6.560385875825136,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.00066327,0.0,0.084021406,77,101,19175.0,1,100000,23078,0.270498122,369688.333732683,8.531667365882857,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.00066327,0.029167501,0.006483402,61,79,16863.0,1,100000,16308,0.181935913,549644.0936320252,8.963595878951066,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.006063462,0.0,0.035553646,356,493,85898.0,1,100000,120271,1.199812544,83346.35314497928,10.024149239099803,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.006063462,0.002954241,0.013902962,148,221,43962.0,1,100000,43511,0.435115663,229823.9491323483,9.999869850697607,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000235685,0.0,0.075679887,26,29,6532.0,2,100000,5683,0.055355402,1.8065084235139329e6,10.26638737082968,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000235685,9.8981e-5,0.003594457,26,29,6948.0,2,100000,5683,0.055994886,1.7858773745873864e6,10.149141119780117,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.00065845,0.0,0.082042796,77,101,19175.0,2,100000,23078,0.160231685,624096.2890704169,14.402894159167081,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.00065845,0.028895496,0.006503071,61,79,16863.0,2,100000,16308,0.106436305,939529.0450941528,15.321839667395443,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.005661736,0.0,0.034808678,356,493,85898.0,2,100000,120271,0.620175294,161244.73349304366,19.393065341941853,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.005661736,0.002755388,0.013644252,148,221,43962.0,2,100000,43511,0.228774759,437111.15875331336,19.01914362851542,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000218101,0.0,0.076264355,26,29,6532.0,3,100000,5683,0.050665491,1.9737300088535608e6,11.216707640314786,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000218101,0.000128448,0.003242828,26,29,6948.0,3,100000,5683,0.072984976,1.3701450008012608e6,7.786534039553565,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000695342,0.0,0.097129092,77,101,19175.0,3,100000,23078,0.119935587,833780.8860684527,19.24199528868775,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000695342,0.029709036,0.006407135,61,79,16863.0,3,100000,16308,0.087404054,1.1441116907460608e6,18.65817345268676,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.022064122,0.0,0.03663888,356,493,85898.0,3,100000,120271,0.476353198,209928.2641952579,25.248282263027868,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.022064122,0.010551003,0.013701032,148,221,43962.0,3,100000,43511,0.170405831,586834.3789245099,25.53375066138435,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.00021267,0.0,0.086773717,26,29,6532.0,4,100000,5683,0.041951463,2.3837070950302733e6,13.546607421057043,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.00021267,0.000121484,0.003464937,26,29,6948.0,4,100000,5683,0.043205825,2.314502731981162e6,13.153319025848944,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000684641,0.0,0.085586085,77,101,19175.0,4,100000,23078,0.091246099,1.0959372630275406e6,25.292040156149582,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000684641,0.05730277,0.006948781,61,79,16863.0,4,100000,16308,0.068751105,1.4545220764088081e6,23.720346022074846,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.005607071,0.0,0.036123215,356,493,85898.0,4,100000,120271,0.335336275,298208.1195957699,35.86578875190284,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.005607071,0.002691243,0.013817787,148,221,43962.0,4,100000,43511,0.134833158,741657.3303133639,32.27025209926478,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.00023815,0.0,0.080547541,26,29,6532.0,5,100000,5683,0.041168092,2.429065694859018e6,13.8043803438838,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.00023815,0.000153696,0.003356137,26,29,6948.0,5,100000,5683,0.045756827,2.185466225619185e6,12.420004560193826,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000897943,0.0,0.098053657,77,101,19175.0,5,100000,23078,0.080404746,1.2437076786487205e6,28.702285807855173,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000897943,0.000581051,0.006567074,61,79,16863.0,5,100000,16308,0.08312713,1.202976693649835e6,19.618143920041508,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.00574661,0.0,0.037455126,356,493,85898.0,5,100000,120271,0.283356036,352912.89859800274,42.445187227280385,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.00574661,0.003115262,0.014315688,148,221,43962.0,5,100000,43511,0.115244153,867722.9811390084,37.75549463233939,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000224744,0.0,0.076885463,26,29,6532.0,6,100000,5683,0.03953493,2.529408803809694e6,14.37463023205049,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000224744,0.000138508,0.003362109,26,29,6948.0,6,100000,5683,0.084441256,1.184255241300532e6,6.7301225363109225,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000718106,0.0,0.094876895,77,101,19175.0,6,100000,23078,0.076512764,1.306971474720218e6,30.16228769359319,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000718106,0.029791235,0.006595418,61,79,16863.0,6,100000,16308,0.056293132,1.7764156380568768e6,28.969786225431548,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.005668259,0.0,0.036226264,356,493,85898.0,6,100000,120271,0.24165661,413810.32366546895,49.76938143756962,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.005668259,0.002705742,0.01385029,148,221,43962.0,6,100000,43511,0.102296354,977551.9467683081,42.534262755835854,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000224073,0.0,0.076151638,26,29,6532.0,7,100000,5683,0.039683036,2.5199684822501987e6,14.32098088462788,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000224073,0.000137366,0.003257647,26,29,6948.0,7,100000,5683,0.044023983,2.2714891562628485e6,12.90887287504177,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000693579,0.0,0.117181072,77,101,19175.0,7,100000,23078,0.08102353,1.2342093710308599e6,28.483083864650183,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000693579,0.052699947,0.00657028,61,79,16863.0,7,100000,16308,0.062383365,1.6029914385028766e6,26.141584379104913,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.005981163,0.0,0.03714658,356,493,85898.0,7,100000,120271,0.284440029,351567.9574058826,42.2834298051629,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.005981163,0.002827226,0.013832877,148,221,43962.0,7,100000,43511,0.12095232,826772.0701843504,35.97367954579127,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000227358,0.0,0.087982831,26,29,6532.0,8,100000,5683,0.038577702,2.592170990381957e6,14.731307738340663,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000227358,0.000131283,0.00358018,26,29,6948.0,8,100000,5683,0.037625264,2.6577886603001645e6,15.104212956485835,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000686625,0.0,0.086321324,77,101,19175.0,8,100000,23078,0.071813645,1.3924930283095923e6,32.13595410732877,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000686625,0.032879545,0.006435109,61,79,16863.0,8,100000,16308,0.058067196,1.722142739594314e6,28.084703797304073,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.006248709,0.0,0.036577372,356,493,85898.0,8,100000,120271,0.256489446,389879.5898214073,46.891208147410474,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.006248709,0.002962728,0.014268867,148,221,43962.0,8,100000,43511,0.108013889,925806.8654485721,40.28278252253282,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000211057,0.0,0.07707584,26,29,6532.0,9,100000,5683,0.036530301,2.7374534910073695e6,15.55694818939488,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000211057,0.000162875,0.003619465,26,29,6948.0,9,100000,5683,0.036196266,2.762715911083204e6,15.70051452268585,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000669602,0.0,0.110030151,77,101,19175.0,9,100000,23078,0.059213688,1.6887987115411558e6,38.97409666494679,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000669602,0.000557426,0.006374502,61,79,16863.0,9,100000,16308,0.047210683,2.118164653538268e6,34.54302916990208,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.005898744,0.0,0.036433113,356,493,85898.0,9,100000,120271,0.171036178,584671.6242688724,70.31904092244156,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.005898744,0.002558788,0.01376731,148,221,43962.0,9,100000,43511,0.082465833,1.2126234145964426e6,52.762457392505816,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000224924,0.0,0.077441757,26,29,6532.0,10,100000,5683,0.036579646,2.73376073677695e6,15.535962267103406,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000224924,0.000139469,0.003645977,26,29,6948.0,10,100000,5683,0.106126326,942273.2678035042,5.354938980927315,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000679782,0.0,0.102684554,77,101,19175.0,10,100000,23078,0.063933026,1.5641368203031716e6,36.09714953895659,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000679782,0.02384501,0.006682196,61,79,16863.0,10,100000,16308,0.051995087,1.923258634032096e6,31.36450180379542,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.018473242,0.0,0.035702944,356,493,85898.0,10,100000,120271,0.209342666,477685.70980174677,57.451738003565886,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.018473242,0.002601901,0.013714899,148,221,43962.0,10,100000,43511,0.091342875,1.0947761388066665e6,47.63480457561687,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000226858,0.0,0.093169541,26,29,6532.0,11,100000,5683,0.033549598,2.9806616460799323e6,16.939100134672255,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000226858,0.000146002,0.003515996,26,29,6948.0,11,100000,5683,0.03517745,2.8427302149530454e6,16.15523581157816,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000706774,0.0,0.084382302,77,101,19175.0,11,100000,23078,0.060738871,1.6463921431796122e6,37.995437880299086,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000706774,0.023541413,0.006270281,61,79,16863.0,11,100000,16308,0.050268545,1.9893155849249267e6,32.441758558955705,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.005735379,0.0,0.039783449,356,493,85898.0,11,100000,120271,0.195017069,512775.62786055414,61.67203753841671,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.005735379,0.00275681,0.013533609,148,221,43962.0,11,100000,43511,0.085348426,1.171667770416762e6,50.98043635860373,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000227249,0.0,0.077273713,26,29,6532.0,12,100000,5683,0.034006851,2.9405839429237363e6,16.711338547635595,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000227249,0.000162082,0.003808429,26,29,6948.0,12,100000,5683,0.034761157,2.876774210938951e6,16.348707840766057,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000671376,0.0,0.086502994,77,101,19175.0,12,100000,23078,0.060896495,1.6421306349404838e6,37.89709079315649,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000671376,0.0005545,0.00638866,61,79,16863.0,12,100000,16308,0.053085842,1.8837414314724442e6,30.720055264452622,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.00589155,0.0,0.036595917,356,493,85898.0,12,100000,120271,0.183184673,545897.1995981345,65.65560209286723,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.00589155,0.002941467,0.0140628,148,221,43962.0,12,100000,43511,0.082717269,1.2089373985497516e6,52.60207514829824,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.00020243,0.0,0.07351055,26,29,6532.0,13,100000,5683,0.03426045,2.9188174702900867e6,16.587639683658562,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.00020243,0.00018666,0.003180458,26,29,6948.0,13,100000,5683,0.036528437,2.73759317980126e6,15.557742040810561,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000649463,0.0,0.085458859,77,101,19175.0,13,100000,23078,0.056745574,1.7622519775727354e6,40.66925113842358,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000649463,0.023744455,0.006343052,61,79,16863.0,13,100000,16308,0.048610134,2.0571842077209663e6,33.54856005951352,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.016563426,0.0,0.035803778,356,493,85898.0,13,100000,120271,0.171455937,583240.2292374396,70.1468856106161,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.016563426,0.002889105,0.013906849,148,221,43962.0,13,100000,43511,0.079703702,1.2546468669673586e6,54.59093982861675,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000215617,0.0,0.075539676,26,29,6532.0,14,100000,5683,0.034475014,2.9006514689160096e6,16.484402297849684,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000215617,0.000175659,0.003260683,26,29,6948.0,14,100000,5683,0.035111653,2.8480573102041082e6,16.185509693889948,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000659363,0.0,0.084012087,77,101,19175.0,14,100000,23078,0.053867069,1.8564217778398155e6,42.842501788987256,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000659363,0.024033773,0.006425781,61,79,16863.0,14,100000,16308,0.046639671,2.1440974572912403e6,34.96594133350555,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.006857645,0.0,0.036011529,356,493,85898.0,14,100000,120271,0.158956374,629103.4293472245,75.66289855102004,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.006857645,0.010993406,0.013796697,148,221,43962.0,14,100000,43511,0.076985656,1.2989432732767777e6,56.51832076354587,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000236356,0.0,0.075072915,26,29,6532.0,15,100000,5683,0.036551101,2.735895698463365e6,15.548095254367304,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000236356,0.000311852,0.003495446,26,29,6948.0,15,100000,5683,0.036288895,2.755663957251936e6,15.660438269062753,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000688499,0.0,0.083327476,77,101,19175.0,15,100000,23078,0.05833869,1.714128308331915e6,39.55865309968393,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000688499,0.031409148,0.00639916,61,79,16863.0,15,100000,16308,0.048503318,2.0617146233171101e6,33.62244207705543,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.044098897,0.0,0.035616548,356,493,85898.0,15,100000,120271,0.174533677,572955.3271257788,68.90991014874454,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.044098897,0.002820964,0.013448835,148,221,43962.0,15,100000,43511,0.082111068,1.217862615061833e6,52.99042024395542,none,0.0,0.0,0.0
QED Process: 'ke->ke' no optimization,0.000206569,0.0,0.081030603,26,29,6532.0,16,100000,5683,0.035064231,2.8519091150180935e6,16.207399500647824,none,0.0,0.0,0.0
QED Process: 'ke->ke' reduced,0.000206569,0.000323635,0.0034872,26,29,6948.0,16,100000,5683,0.03428592,2.916649166771666e6,16.575317214763377,none,0.0,0.0,0.0
QED Process: 'ke->kke' no optimization,0.000683149,0.0,0.083208997,77,101,19175.0,16,100000,23078,0.054342847,1.8401685874131697e6,42.46741066032113,none,0.0,0.0,0.0
QED Process: 'ke->kke' reduced,0.000683149,0.028225894,0.00650278,61,79,16863.0,16,100000,16308,0.046233237,2.1629461073642755e6,35.27332511889661,none,0.0,0.0,0.0
QED Process: 'ke->kkke' no optimization,0.020724006,0.0,0.036160978,356,493,85898.0,16,100000,120271,0.237323357,421366.0267750215,50.67811340625861,none,0.0,0.0,0.0
QED Process: 'ke->kkke' reduced,0.020724006,0.00273602,0.013774424,148,221,43962.0,16,100000,43511,0.077327366,1.293203236742863e6,56.26856603391871,none,0.0,0.0,0.0
1 process_name graph_gen_time optimization_time function_generation_time graph_nodes graph_edges graph_mem cpu_threads n_inputs nflops_likwid cpu_time cpu_rate cpu_gflops gpu_name gpu_time gpu_rate gpu_gflops
2 QED Process: 'ke->ke' no optimization 0.000221518 0.0 0.076346293 26 29 6532.0 1 100000 5683 0.086186253 1.1602778461664878e6 6.59385899976415 none 0.0 0.0 0.0
3 QED Process: 'ke->ke' reduced 0.000221518 0.000117106 0.003614977 26 29 6948.0 1 100000 5683 0.086626002 1.1543878014825156e6 6.560385875825136 none 0.0 0.0 0.0
4 QED Process: 'ke->kke' no optimization 0.00066327 0.0 0.084021406 77 101 19175.0 1 100000 23078 0.270498122 369688.333732683 8.531667365882857 none 0.0 0.0 0.0
5 QED Process: 'ke->kke' reduced 0.00066327 0.029167501 0.006483402 61 79 16863.0 1 100000 16308 0.181935913 549644.0936320252 8.963595878951066 none 0.0 0.0 0.0
6 QED Process: 'ke->kkke' no optimization 0.006063462 0.0 0.035553646 356 493 85898.0 1 100000 120271 1.199812544 83346.35314497928 10.024149239099803 none 0.0 0.0 0.0
7 QED Process: 'ke->kkke' reduced 0.006063462 0.002954241 0.013902962 148 221 43962.0 1 100000 43511 0.435115663 229823.9491323483 9.999869850697607 none 0.0 0.0 0.0
8 QED Process: 'ke->ke' no optimization 0.000235685 0.0 0.075679887 26 29 6532.0 2 100000 5683 0.055355402 1.8065084235139329e6 10.26638737082968 none 0.0 0.0 0.0
9 QED Process: 'ke->ke' reduced 0.000235685 9.8981e-5 0.003594457 26 29 6948.0 2 100000 5683 0.055994886 1.7858773745873864e6 10.149141119780117 none 0.0 0.0 0.0
10 QED Process: 'ke->kke' no optimization 0.00065845 0.0 0.082042796 77 101 19175.0 2 100000 23078 0.160231685 624096.2890704169 14.402894159167081 none 0.0 0.0 0.0
11 QED Process: 'ke->kke' reduced 0.00065845 0.028895496 0.006503071 61 79 16863.0 2 100000 16308 0.106436305 939529.0450941528 15.321839667395443 none 0.0 0.0 0.0
12 QED Process: 'ke->kkke' no optimization 0.005661736 0.0 0.034808678 356 493 85898.0 2 100000 120271 0.620175294 161244.73349304366 19.393065341941853 none 0.0 0.0 0.0
13 QED Process: 'ke->kkke' reduced 0.005661736 0.002755388 0.013644252 148 221 43962.0 2 100000 43511 0.228774759 437111.15875331336 19.01914362851542 none 0.0 0.0 0.0
14 QED Process: 'ke->ke' no optimization 0.000218101 0.0 0.076264355 26 29 6532.0 3 100000 5683 0.050665491 1.9737300088535608e6 11.216707640314786 none 0.0 0.0 0.0
15 QED Process: 'ke->ke' reduced 0.000218101 0.000128448 0.003242828 26 29 6948.0 3 100000 5683 0.072984976 1.3701450008012608e6 7.786534039553565 none 0.0 0.0 0.0
16 QED Process: 'ke->kke' no optimization 0.000695342 0.0 0.097129092 77 101 19175.0 3 100000 23078 0.119935587 833780.8860684527 19.24199528868775 none 0.0 0.0 0.0
17 QED Process: 'ke->kke' reduced 0.000695342 0.029709036 0.006407135 61 79 16863.0 3 100000 16308 0.087404054 1.1441116907460608e6 18.65817345268676 none 0.0 0.0 0.0
18 QED Process: 'ke->kkke' no optimization 0.022064122 0.0 0.03663888 356 493 85898.0 3 100000 120271 0.476353198 209928.2641952579 25.248282263027868 none 0.0 0.0 0.0
19 QED Process: 'ke->kkke' reduced 0.022064122 0.010551003 0.013701032 148 221 43962.0 3 100000 43511 0.170405831 586834.3789245099 25.53375066138435 none 0.0 0.0 0.0
20 QED Process: 'ke->ke' no optimization 0.00021267 0.0 0.086773717 26 29 6532.0 4 100000 5683 0.041951463 2.3837070950302733e6 13.546607421057043 none 0.0 0.0 0.0
21 QED Process: 'ke->ke' reduced 0.00021267 0.000121484 0.003464937 26 29 6948.0 4 100000 5683 0.043205825 2.314502731981162e6 13.153319025848944 none 0.0 0.0 0.0
22 QED Process: 'ke->kke' no optimization 0.000684641 0.0 0.085586085 77 101 19175.0 4 100000 23078 0.091246099 1.0959372630275406e6 25.292040156149582 none 0.0 0.0 0.0
23 QED Process: 'ke->kke' reduced 0.000684641 0.05730277 0.006948781 61 79 16863.0 4 100000 16308 0.068751105 1.4545220764088081e6 23.720346022074846 none 0.0 0.0 0.0
24 QED Process: 'ke->kkke' no optimization 0.005607071 0.0 0.036123215 356 493 85898.0 4 100000 120271 0.335336275 298208.1195957699 35.86578875190284 none 0.0 0.0 0.0
25 QED Process: 'ke->kkke' reduced 0.005607071 0.002691243 0.013817787 148 221 43962.0 4 100000 43511 0.134833158 741657.3303133639 32.27025209926478 none 0.0 0.0 0.0
26 QED Process: 'ke->ke' no optimization 0.00023815 0.0 0.080547541 26 29 6532.0 5 100000 5683 0.041168092 2.429065694859018e6 13.8043803438838 none 0.0 0.0 0.0
27 QED Process: 'ke->ke' reduced 0.00023815 0.000153696 0.003356137 26 29 6948.0 5 100000 5683 0.045756827 2.185466225619185e6 12.420004560193826 none 0.0 0.0 0.0
28 QED Process: 'ke->kke' no optimization 0.000897943 0.0 0.098053657 77 101 19175.0 5 100000 23078 0.080404746 1.2437076786487205e6 28.702285807855173 none 0.0 0.0 0.0
29 QED Process: 'ke->kke' reduced 0.000897943 0.000581051 0.006567074 61 79 16863.0 5 100000 16308 0.08312713 1.202976693649835e6 19.618143920041508 none 0.0 0.0 0.0
30 QED Process: 'ke->kkke' no optimization 0.00574661 0.0 0.037455126 356 493 85898.0 5 100000 120271 0.283356036 352912.89859800274 42.445187227280385 none 0.0 0.0 0.0
31 QED Process: 'ke->kkke' reduced 0.00574661 0.003115262 0.014315688 148 221 43962.0 5 100000 43511 0.115244153 867722.9811390084 37.75549463233939 none 0.0 0.0 0.0
32 QED Process: 'ke->ke' no optimization 0.000224744 0.0 0.076885463 26 29 6532.0 6 100000 5683 0.03953493 2.529408803809694e6 14.37463023205049 none 0.0 0.0 0.0
33 QED Process: 'ke->ke' reduced 0.000224744 0.000138508 0.003362109 26 29 6948.0 6 100000 5683 0.084441256 1.184255241300532e6 6.7301225363109225 none 0.0 0.0 0.0
34 QED Process: 'ke->kke' no optimization 0.000718106 0.0 0.094876895 77 101 19175.0 6 100000 23078 0.076512764 1.306971474720218e6 30.16228769359319 none 0.0 0.0 0.0
35 QED Process: 'ke->kke' reduced 0.000718106 0.029791235 0.006595418 61 79 16863.0 6 100000 16308 0.056293132 1.7764156380568768e6 28.969786225431548 none 0.0 0.0 0.0
36 QED Process: 'ke->kkke' no optimization 0.005668259 0.0 0.036226264 356 493 85898.0 6 100000 120271 0.24165661 413810.32366546895 49.76938143756962 none 0.0 0.0 0.0
37 QED Process: 'ke->kkke' reduced 0.005668259 0.002705742 0.01385029 148 221 43962.0 6 100000 43511 0.102296354 977551.9467683081 42.534262755835854 none 0.0 0.0 0.0
38 QED Process: 'ke->ke' no optimization 0.000224073 0.0 0.076151638 26 29 6532.0 7 100000 5683 0.039683036 2.5199684822501987e6 14.32098088462788 none 0.0 0.0 0.0
39 QED Process: 'ke->ke' reduced 0.000224073 0.000137366 0.003257647 26 29 6948.0 7 100000 5683 0.044023983 2.2714891562628485e6 12.90887287504177 none 0.0 0.0 0.0
40 QED Process: 'ke->kke' no optimization 0.000693579 0.0 0.117181072 77 101 19175.0 7 100000 23078 0.08102353 1.2342093710308599e6 28.483083864650183 none 0.0 0.0 0.0
41 QED Process: 'ke->kke' reduced 0.000693579 0.052699947 0.00657028 61 79 16863.0 7 100000 16308 0.062383365 1.6029914385028766e6 26.141584379104913 none 0.0 0.0 0.0
42 QED Process: 'ke->kkke' no optimization 0.005981163 0.0 0.03714658 356 493 85898.0 7 100000 120271 0.284440029 351567.9574058826 42.2834298051629 none 0.0 0.0 0.0
43 QED Process: 'ke->kkke' reduced 0.005981163 0.002827226 0.013832877 148 221 43962.0 7 100000 43511 0.12095232 826772.0701843504 35.97367954579127 none 0.0 0.0 0.0
44 QED Process: 'ke->ke' no optimization 0.000227358 0.0 0.087982831 26 29 6532.0 8 100000 5683 0.038577702 2.592170990381957e6 14.731307738340663 none 0.0 0.0 0.0
45 QED Process: 'ke->ke' reduced 0.000227358 0.000131283 0.00358018 26 29 6948.0 8 100000 5683 0.037625264 2.6577886603001645e6 15.104212956485835 none 0.0 0.0 0.0
46 QED Process: 'ke->kke' no optimization 0.000686625 0.0 0.086321324 77 101 19175.0 8 100000 23078 0.071813645 1.3924930283095923e6 32.13595410732877 none 0.0 0.0 0.0
47 QED Process: 'ke->kke' reduced 0.000686625 0.032879545 0.006435109 61 79 16863.0 8 100000 16308 0.058067196 1.722142739594314e6 28.084703797304073 none 0.0 0.0 0.0
48 QED Process: 'ke->kkke' no optimization 0.006248709 0.0 0.036577372 356 493 85898.0 8 100000 120271 0.256489446 389879.5898214073 46.891208147410474 none 0.0 0.0 0.0
49 QED Process: 'ke->kkke' reduced 0.006248709 0.002962728 0.014268867 148 221 43962.0 8 100000 43511 0.108013889 925806.8654485721 40.28278252253282 none 0.0 0.0 0.0
50 QED Process: 'ke->ke' no optimization 0.000211057 0.0 0.07707584 26 29 6532.0 9 100000 5683 0.036530301 2.7374534910073695e6 15.55694818939488 none 0.0 0.0 0.0
51 QED Process: 'ke->ke' reduced 0.000211057 0.000162875 0.003619465 26 29 6948.0 9 100000 5683 0.036196266 2.762715911083204e6 15.70051452268585 none 0.0 0.0 0.0
52 QED Process: 'ke->kke' no optimization 0.000669602 0.0 0.110030151 77 101 19175.0 9 100000 23078 0.059213688 1.6887987115411558e6 38.97409666494679 none 0.0 0.0 0.0
53 QED Process: 'ke->kke' reduced 0.000669602 0.000557426 0.006374502 61 79 16863.0 9 100000 16308 0.047210683 2.118164653538268e6 34.54302916990208 none 0.0 0.0 0.0
54 QED Process: 'ke->kkke' no optimization 0.005898744 0.0 0.036433113 356 493 85898.0 9 100000 120271 0.171036178 584671.6242688724 70.31904092244156 none 0.0 0.0 0.0
55 QED Process: 'ke->kkke' reduced 0.005898744 0.002558788 0.01376731 148 221 43962.0 9 100000 43511 0.082465833 1.2126234145964426e6 52.762457392505816 none 0.0 0.0 0.0
56 QED Process: 'ke->ke' no optimization 0.000224924 0.0 0.077441757 26 29 6532.0 10 100000 5683 0.036579646 2.73376073677695e6 15.535962267103406 none 0.0 0.0 0.0
57 QED Process: 'ke->ke' reduced 0.000224924 0.000139469 0.003645977 26 29 6948.0 10 100000 5683 0.106126326 942273.2678035042 5.354938980927315 none 0.0 0.0 0.0
58 QED Process: 'ke->kke' no optimization 0.000679782 0.0 0.102684554 77 101 19175.0 10 100000 23078 0.063933026 1.5641368203031716e6 36.09714953895659 none 0.0 0.0 0.0
59 QED Process: 'ke->kke' reduced 0.000679782 0.02384501 0.006682196 61 79 16863.0 10 100000 16308 0.051995087 1.923258634032096e6 31.36450180379542 none 0.0 0.0 0.0
60 QED Process: 'ke->kkke' no optimization 0.018473242 0.0 0.035702944 356 493 85898.0 10 100000 120271 0.209342666 477685.70980174677 57.451738003565886 none 0.0 0.0 0.0
61 QED Process: 'ke->kkke' reduced 0.018473242 0.002601901 0.013714899 148 221 43962.0 10 100000 43511 0.091342875 1.0947761388066665e6 47.63480457561687 none 0.0 0.0 0.0
62 QED Process: 'ke->ke' no optimization 0.000226858 0.0 0.093169541 26 29 6532.0 11 100000 5683 0.033549598 2.9806616460799323e6 16.939100134672255 none 0.0 0.0 0.0
63 QED Process: 'ke->ke' reduced 0.000226858 0.000146002 0.003515996 26 29 6948.0 11 100000 5683 0.03517745 2.8427302149530454e6 16.15523581157816 none 0.0 0.0 0.0
64 QED Process: 'ke->kke' no optimization 0.000706774 0.0 0.084382302 77 101 19175.0 11 100000 23078 0.060738871 1.6463921431796122e6 37.995437880299086 none 0.0 0.0 0.0
65 QED Process: 'ke->kke' reduced 0.000706774 0.023541413 0.006270281 61 79 16863.0 11 100000 16308 0.050268545 1.9893155849249267e6 32.441758558955705 none 0.0 0.0 0.0
66 QED Process: 'ke->kkke' no optimization 0.005735379 0.0 0.039783449 356 493 85898.0 11 100000 120271 0.195017069 512775.62786055414 61.67203753841671 none 0.0 0.0 0.0
67 QED Process: 'ke->kkke' reduced 0.005735379 0.00275681 0.013533609 148 221 43962.0 11 100000 43511 0.085348426 1.171667770416762e6 50.98043635860373 none 0.0 0.0 0.0
68 QED Process: 'ke->ke' no optimization 0.000227249 0.0 0.077273713 26 29 6532.0 12 100000 5683 0.034006851 2.9405839429237363e6 16.711338547635595 none 0.0 0.0 0.0
69 QED Process: 'ke->ke' reduced 0.000227249 0.000162082 0.003808429 26 29 6948.0 12 100000 5683 0.034761157 2.876774210938951e6 16.348707840766057 none 0.0 0.0 0.0
70 QED Process: 'ke->kke' no optimization 0.000671376 0.0 0.086502994 77 101 19175.0 12 100000 23078 0.060896495 1.6421306349404838e6 37.89709079315649 none 0.0 0.0 0.0
71 QED Process: 'ke->kke' reduced 0.000671376 0.0005545 0.00638866 61 79 16863.0 12 100000 16308 0.053085842 1.8837414314724442e6 30.720055264452622 none 0.0 0.0 0.0
72 QED Process: 'ke->kkke' no optimization 0.00589155 0.0 0.036595917 356 493 85898.0 12 100000 120271 0.183184673 545897.1995981345 65.65560209286723 none 0.0 0.0 0.0
73 QED Process: 'ke->kkke' reduced 0.00589155 0.002941467 0.0140628 148 221 43962.0 12 100000 43511 0.082717269 1.2089373985497516e6 52.60207514829824 none 0.0 0.0 0.0
74 QED Process: 'ke->ke' no optimization 0.00020243 0.0 0.07351055 26 29 6532.0 13 100000 5683 0.03426045 2.9188174702900867e6 16.587639683658562 none 0.0 0.0 0.0
75 QED Process: 'ke->ke' reduced 0.00020243 0.00018666 0.003180458 26 29 6948.0 13 100000 5683 0.036528437 2.73759317980126e6 15.557742040810561 none 0.0 0.0 0.0
76 QED Process: 'ke->kke' no optimization 0.000649463 0.0 0.085458859 77 101 19175.0 13 100000 23078 0.056745574 1.7622519775727354e6 40.66925113842358 none 0.0 0.0 0.0
77 QED Process: 'ke->kke' reduced 0.000649463 0.023744455 0.006343052 61 79 16863.0 13 100000 16308 0.048610134 2.0571842077209663e6 33.54856005951352 none 0.0 0.0 0.0
78 QED Process: 'ke->kkke' no optimization 0.016563426 0.0 0.035803778 356 493 85898.0 13 100000 120271 0.171455937 583240.2292374396 70.1468856106161 none 0.0 0.0 0.0
79 QED Process: 'ke->kkke' reduced 0.016563426 0.002889105 0.013906849 148 221 43962.0 13 100000 43511 0.079703702 1.2546468669673586e6 54.59093982861675 none 0.0 0.0 0.0
80 QED Process: 'ke->ke' no optimization 0.000215617 0.0 0.075539676 26 29 6532.0 14 100000 5683 0.034475014 2.9006514689160096e6 16.484402297849684 none 0.0 0.0 0.0
81 QED Process: 'ke->ke' reduced 0.000215617 0.000175659 0.003260683 26 29 6948.0 14 100000 5683 0.035111653 2.8480573102041082e6 16.185509693889948 none 0.0 0.0 0.0
82 QED Process: 'ke->kke' no optimization 0.000659363 0.0 0.084012087 77 101 19175.0 14 100000 23078 0.053867069 1.8564217778398155e6 42.842501788987256 none 0.0 0.0 0.0
83 QED Process: 'ke->kke' reduced 0.000659363 0.024033773 0.006425781 61 79 16863.0 14 100000 16308 0.046639671 2.1440974572912403e6 34.96594133350555 none 0.0 0.0 0.0
84 QED Process: 'ke->kkke' no optimization 0.006857645 0.0 0.036011529 356 493 85898.0 14 100000 120271 0.158956374 629103.4293472245 75.66289855102004 none 0.0 0.0 0.0
85 QED Process: 'ke->kkke' reduced 0.006857645 0.010993406 0.013796697 148 221 43962.0 14 100000 43511 0.076985656 1.2989432732767777e6 56.51832076354587 none 0.0 0.0 0.0
86 QED Process: 'ke->ke' no optimization 0.000236356 0.0 0.075072915 26 29 6532.0 15 100000 5683 0.036551101 2.735895698463365e6 15.548095254367304 none 0.0 0.0 0.0
87 QED Process: 'ke->ke' reduced 0.000236356 0.000311852 0.003495446 26 29 6948.0 15 100000 5683 0.036288895 2.755663957251936e6 15.660438269062753 none 0.0 0.0 0.0
88 QED Process: 'ke->kke' no optimization 0.000688499 0.0 0.083327476 77 101 19175.0 15 100000 23078 0.05833869 1.714128308331915e6 39.55865309968393 none 0.0 0.0 0.0
89 QED Process: 'ke->kke' reduced 0.000688499 0.031409148 0.00639916 61 79 16863.0 15 100000 16308 0.048503318 2.0617146233171101e6 33.62244207705543 none 0.0 0.0 0.0
90 QED Process: 'ke->kkke' no optimization 0.044098897 0.0 0.035616548 356 493 85898.0 15 100000 120271 0.174533677 572955.3271257788 68.90991014874454 none 0.0 0.0 0.0
91 QED Process: 'ke->kkke' reduced 0.044098897 0.002820964 0.013448835 148 221 43962.0 15 100000 43511 0.082111068 1.217862615061833e6 52.99042024395542 none 0.0 0.0 0.0
92 QED Process: 'ke->ke' no optimization 0.000206569 0.0 0.081030603 26 29 6532.0 16 100000 5683 0.035064231 2.8519091150180935e6 16.207399500647824 none 0.0 0.0 0.0
93 QED Process: 'ke->ke' reduced 0.000206569 0.000323635 0.0034872 26 29 6948.0 16 100000 5683 0.03428592 2.916649166771666e6 16.575317214763377 none 0.0 0.0 0.0
94 QED Process: 'ke->kke' no optimization 0.000683149 0.0 0.083208997 77 101 19175.0 16 100000 23078 0.054342847 1.8401685874131697e6 42.46741066032113 none 0.0 0.0 0.0
95 QED Process: 'ke->kke' reduced 0.000683149 0.028225894 0.00650278 61 79 16863.0 16 100000 16308 0.046233237 2.1629461073642755e6 35.27332511889661 none 0.0 0.0 0.0
96 QED Process: 'ke->kkke' no optimization 0.020724006 0.0 0.036160978 356 493 85898.0 16 100000 120271 0.237323357 421366.0267750215 50.67811340625861 none 0.0 0.0 0.0
97 QED Process: 'ke->kkke' reduced 0.020724006 0.00273602 0.013774424 148 221 43962.0 16 100000 43511 0.077327366 1.293203236742863e6 56.26856603391871 none 0.0 0.0 0.0

View File

@ -34,9 +34,10 @@ function import_bench()
bench_txt("AB->ABBB.txt")
bench_txt("AB->ABBBBB.txt")
bench_txt("AB->ABBBBBBB.txt")
#bench_txt("AB->ABBBBBBBBB.txt")
bench_txt("AB->ABBBBBBBBB.txt")
bench_txt("ABAB->ABAB.txt")
return bench_txt("ABAB->ABC.txt")
bench_txt("ABAB->ABC.txt")
return nothing
end
import_bench()

View File

@ -2,6 +2,40 @@ using MetagraphOptimization
using LIKWID
using CUDA
using UUIDs
using DataFrames
using CSV
using Random
DISABLE_GPU = false
results_filename = "results.csv"
df = DataFrame(
process_name = String[],
graph_gen_time = Float64[],
optimization_time = Float64[],
function_generation_time = Float64[],
graph_nodes = Int[],
graph_edges = Int[],
graph_mem = Float64[],
cpu_threads = Int[],
n_inputs = Int[],
nflops_likwid = Int[],
cpu_time = Float64[],
cpu_rate = Float64[],
cpu_gflops = Float64[],
gpu_name = String[],
gpu_time = Float64[],
gpu_rate = Float64[],
gpu_gflops = Float64[],
)
# if they exist, read existing results and append new ones
if isfile(results_filename)
df = CSV.read(results_filename, DataFrame)
end
nInputs = 100_000
function cpu_bench(compute_function, inputs)
compute_function.(inputs[begin:10]) # make sure it's compiled
@ -24,21 +58,38 @@ end
function bench_process(
process::MetagraphOptimization.AbstractProcessDescription,
process_name::String,
graph::DAG,
func,
gen_time::Float64,
opt_time::Float64,
func_time::Float64,
io::IO = stdout;
use_likwid = true,
use_gpu = true,
)
println(io, "\n--- Benchmarking $(process) ---")
println(io, "\n--- Benchmarking $(process_name) ---")
if DISABLE_GPU
use_gpu = false
end
NFLOPs = GraphProperties(graph).computeEffort
graph_props = GraphProperties(graph)
NFLOPs = graph_props.computeEffort
nflops_likwid = 0
if use_likwid
input = gen_process_input(process)
func(input) # compile first
# get rid of annoying output to console
oldstd = stdout
redirect_stdout(devnull)
_, events = @perfmon "FLOPS_DP" func(input)
redirect_stdout(oldstd) # recover original stdout
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
nflops_likwid = NFLOPs
end
nInputs = 10000000 # ten million
println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")
inputs = Vector{typeof(gen_process_input(process))}()
@ -54,12 +105,21 @@ function bench_process(
println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
(time_cpu, rate_cpu) = cpu_bench(func, inputs)
flops_cpu = (rate_cpu * NFLOPs) / 1024^3
flops_cpu = (rate_cpu * NFLOPs) / 10^9
println(io, "Benchmarking GPU...")
cuInputs = CuArray(inputs)
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
flops_gpu = (rate_gpu * NFLOPs) / 1024^3
time_gpu = 0.0
rate_gpu = 0.0
flops_gpu = 0.0
gpu_name = "none"
if use_gpu
println(io, "Benchmarking GPU...")
gpu_name = "$(name(first(CUDA.devices())))"
cuInputs = CuArray(inputs)
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
flops_gpu = (rate_gpu * NFLOPs) / 10^9
else
println(io, "Skipping GPU...")
end
println(io, "\nBenchmark Summary for $(process):")
@ -73,10 +133,39 @@ function bench_process(
println(io, " Time: $time_cpu")
println(io, " Rate: $rate_cpu")
println(io, " GFLOPS: $flops_cpu")
println(io, "GPU, $(name(first(CUDA.devices())))")
println(io, " Time: $time_gpu")
println(io, " Rate: $rate_gpu")
return println(io, " GFLOPS: $flops_gpu")
if use_gpu
println(io, "GPU, $gpu_name")
println(io, " Time: $time_gpu")
println(io, " Rate: $rate_gpu")
println(io, " GFLOPS: $flops_gpu")
end
if (process_name != "warmup")
push!(
df,
Dict(
:process_name => process_name,
:graph_gen_time => gen_time,
:optimization_time => opt_time,
:function_generation_time => func_time,
:graph_nodes => graph_props.noNodes,
:graph_edges => graph_props.noEdges,
:graph_mem => MetagraphOptimization.mem(graph),
:cpu_threads => Threads.nthreads(),
:n_inputs => nInputs,
:nflops_likwid => nflops_likwid,
:cpu_time => time_cpu,
:cpu_rate => rate_cpu,
:cpu_gflops => flops_cpu,
:gpu_name => gpu_name,
:gpu_time => time_gpu,
:gpu_rate => rate_gpu,
:gpu_gflops => flops_gpu,
),
)
end
return nothing
end
# use "mock" machine that only uses cpu
@ -92,57 +181,151 @@ machine = Machine(
],
[-1.0;;],
)
optimizer = ReductionOptimizer()
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
# compton
process = parse_process("ke->ke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
## -- WARMUP TO COMPILE FUNCTIONS first
optimizer = RandomWalkOptimizer(MersenneTwister(0))
# 2-photon compton
process = parse_process("ke->kke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
gen_time = @elapsed graph = gen_graph(process)
opt_time = @elapsed optimize!(optimizer, graph, 200)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
# 3-photon compton
process = parse_process("ke->kkke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
# AB->AB
process = parse_process("AB->AB", ABCModel())
graph = parse_dag("input/AB->AB.txt", ABCModel())
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
optimizer = ReductionOptimizer()
# AB->AB^3
process = parse_process("AB->ABBB", ABCModel())
graph = parse_dag("input/AB->ABBB.txt", ABCModel())
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
exit(0)
## -- WARMUP END
optimizer = ReductionOptimizer()
# compton
process = parse_process("ke->ke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
# 2-photon compton
process = parse_process("ke->kke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
# 3-photon compton
process = parse_process("ke->kkke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
# 4-photon compton
process = parse_process("ke->kkkke", QEDModel())
graph = gen_graph(process)
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
# 5-photon compton
process = parse_process("ke->kkkkke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
# 6-photon compton
process = parse_process("ke->kkkkkke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
# 7-photon compton
process = parse_process("ke->kkkkkkke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
# 8-photon compton
process = parse_process("ke->kkkkkkkke", QEDModel())
gen_time = @elapsed graph = gen_graph(process)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
# AB->AB
process = parse_process("AB->AB", ABCModel())
gen_time = @elapsed graph = parse_dag("input/AB->AB.txt", ABCModel())
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
# AB->AB^3
process = parse_process("AB->ABBB", ABCModel())
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
# AB->AB^5
process = parse_process("AB->ABBBBB", ABCModel())
graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
optimize_to_fixpoint!(optimizer, graph)
compute_func = get_compute_function(graph, process, machine)
bench_process(process, compute_func)
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
# AB->AB^7
process = parse_process("AB->ABBBBBBB", ABCModel())
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBBBB.txt", ABCModel())
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
CSV.write(results_filename, df)

View File

@ -0,0 +1,63 @@
Bootstrap: docker
From: nvidia/cuda:12.3.1-devel-ubuntu20.04
%labels
Requires CUDA driver 470.57+.
%environment
export LANG=C
%runscript
nvidia-smi
./run.sh
%post
. /.singularity.d/env/10-docker*.sh
apt-get update
apt-get install -y pciutils
DEBIAN_FRONTEND='noninteractive' apt-get -y -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' install build-essential cuda-compat-12-3 libibverbs-dev ibverbs-utils gcc wget git libcap2-bin
apt-get -y autoremove; apt-get -y clean
cd /tmp
# install slurm
: ${SLURM_VERSION:=17-02-11-1}
wget https://github.com/SchedMD/slurm/archive/slurm-${SLURM_VERSION}.tar.gz
tar -xf slurm-${SLURM_VERSION}.tar.gz
cd slurm-slurm-${SLURM_VERSION}
./configure --prefix=/usr/ --sysconfdir=/etc/slurm --localstatedir=/var --disable-debug
make -C contribs/pmi2 -j$(nproc) install
cd ..
rm -rf slurm-*
# install julia
cd ~
wget https://julialang-s3.julialang.org/bin/linux/x64/1.9/julia-1.9.4-linux-x86_64.tar.gz
tar zxvf julia-1.9.4-linux-x86_64.tar.gz
mv julia-1.9.4/ /opt/julia-1.9.4
#mkdir /usr/local/bin
ln -s /opt/julia-1.9.4/bin/julia /usr/local/bin/julia
#Add nvidia driver paths to the environment variables
echo "\n #Nvidia driver paths \n" >> /environment
echo 'export PATH="/nvbin:$PATH"' >> /environment
echo 'export LD_LIBRARY_PATH="/nvlib:$LD_LIBRARY_PATH"' >> /environment
#Add CUDA paths
echo "\n #Cuda paths \n" >> /environment
echo 'export CPATH="/usr/local/cuda/include:$CPATH"' >> /environment
echo 'export PATH="/usr/local/cuda/bin:$PATH"' >> /environment
echo 'export LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH"' >> /environment
echo 'export CUDA_HOME="/usr/local/cuda"' >> /environment
# install likwid
VERSION=5.3.0
wget http://ftp.fau.de/pub/likwid/likwid-$VERSION.tar.gz
tar -xaf likwid-$VERSION.tar.gz
cd likwid-$VERSION
# accessdaemon doesn't work because of permissions
sed -i 's/ACCESSMODE = accessdaemon/ACCESSMODE = perf_event/g' config.mk
make -j4
make -j4 install
echo 'export LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"' >> /environment

18
experiments/run.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
LOG_FILE="$SCRIPT_DIR/../julia.log"
cd $SCRIPT_DIR/..
echo "Initiating julia..."
julia --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' > $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
julia -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
echo "Benchmarking..."
for i in {1..16}
do
echo "$i Threads"
julia --project --threads=$i examples/qed_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
done