Add qed bench run script and singularity container
This commit is contained in:
parent
82ed774b7e
commit
7098d1801a
2
.gitignore
vendored
2
.gitignore
vendored
@ -30,3 +30,5 @@ Manifest.toml
|
||||
.julia
|
||||
**/.ipynb_checkpoints/
|
||||
*.bkp
|
||||
|
||||
*.sif
|
||||
|
97
data/singularity_homepc_results.csv
Normal file
97
data/singularity_homepc_results.csv
Normal file
@ -0,0 +1,97 @@
|
||||
process_name,graph_gen_time,optimization_time,function_generation_time,graph_nodes,graph_edges,graph_mem,cpu_threads,n_inputs,nflops_likwid,cpu_time,cpu_rate,cpu_gflops,gpu_name,gpu_time,gpu_rate,gpu_gflops
|
||||
QED Process: 'ke->ke' no optimization,0.000221518,0.0,0.076346293,26,29,6532.0,1,100000,5683,0.086186253,1.1602778461664878e6,6.59385899976415,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000221518,0.000117106,0.003614977,26,29,6948.0,1,100000,5683,0.086626002,1.1543878014825156e6,6.560385875825136,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.00066327,0.0,0.084021406,77,101,19175.0,1,100000,23078,0.270498122,369688.333732683,8.531667365882857,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.00066327,0.029167501,0.006483402,61,79,16863.0,1,100000,16308,0.181935913,549644.0936320252,8.963595878951066,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.006063462,0.0,0.035553646,356,493,85898.0,1,100000,120271,1.199812544,83346.35314497928,10.024149239099803,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.006063462,0.002954241,0.013902962,148,221,43962.0,1,100000,43511,0.435115663,229823.9491323483,9.999869850697607,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000235685,0.0,0.075679887,26,29,6532.0,2,100000,5683,0.055355402,1.8065084235139329e6,10.26638737082968,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000235685,9.8981e-5,0.003594457,26,29,6948.0,2,100000,5683,0.055994886,1.7858773745873864e6,10.149141119780117,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.00065845,0.0,0.082042796,77,101,19175.0,2,100000,23078,0.160231685,624096.2890704169,14.402894159167081,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.00065845,0.028895496,0.006503071,61,79,16863.0,2,100000,16308,0.106436305,939529.0450941528,15.321839667395443,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.005661736,0.0,0.034808678,356,493,85898.0,2,100000,120271,0.620175294,161244.73349304366,19.393065341941853,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.005661736,0.002755388,0.013644252,148,221,43962.0,2,100000,43511,0.228774759,437111.15875331336,19.01914362851542,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000218101,0.0,0.076264355,26,29,6532.0,3,100000,5683,0.050665491,1.9737300088535608e6,11.216707640314786,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000218101,0.000128448,0.003242828,26,29,6948.0,3,100000,5683,0.072984976,1.3701450008012608e6,7.786534039553565,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000695342,0.0,0.097129092,77,101,19175.0,3,100000,23078,0.119935587,833780.8860684527,19.24199528868775,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000695342,0.029709036,0.006407135,61,79,16863.0,3,100000,16308,0.087404054,1.1441116907460608e6,18.65817345268676,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.022064122,0.0,0.03663888,356,493,85898.0,3,100000,120271,0.476353198,209928.2641952579,25.248282263027868,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.022064122,0.010551003,0.013701032,148,221,43962.0,3,100000,43511,0.170405831,586834.3789245099,25.53375066138435,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.00021267,0.0,0.086773717,26,29,6532.0,4,100000,5683,0.041951463,2.3837070950302733e6,13.546607421057043,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.00021267,0.000121484,0.003464937,26,29,6948.0,4,100000,5683,0.043205825,2.314502731981162e6,13.153319025848944,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000684641,0.0,0.085586085,77,101,19175.0,4,100000,23078,0.091246099,1.0959372630275406e6,25.292040156149582,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000684641,0.05730277,0.006948781,61,79,16863.0,4,100000,16308,0.068751105,1.4545220764088081e6,23.720346022074846,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.005607071,0.0,0.036123215,356,493,85898.0,4,100000,120271,0.335336275,298208.1195957699,35.86578875190284,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.005607071,0.002691243,0.013817787,148,221,43962.0,4,100000,43511,0.134833158,741657.3303133639,32.27025209926478,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.00023815,0.0,0.080547541,26,29,6532.0,5,100000,5683,0.041168092,2.429065694859018e6,13.8043803438838,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.00023815,0.000153696,0.003356137,26,29,6948.0,5,100000,5683,0.045756827,2.185466225619185e6,12.420004560193826,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000897943,0.0,0.098053657,77,101,19175.0,5,100000,23078,0.080404746,1.2437076786487205e6,28.702285807855173,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000897943,0.000581051,0.006567074,61,79,16863.0,5,100000,16308,0.08312713,1.202976693649835e6,19.618143920041508,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.00574661,0.0,0.037455126,356,493,85898.0,5,100000,120271,0.283356036,352912.89859800274,42.445187227280385,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.00574661,0.003115262,0.014315688,148,221,43962.0,5,100000,43511,0.115244153,867722.9811390084,37.75549463233939,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000224744,0.0,0.076885463,26,29,6532.0,6,100000,5683,0.03953493,2.529408803809694e6,14.37463023205049,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000224744,0.000138508,0.003362109,26,29,6948.0,6,100000,5683,0.084441256,1.184255241300532e6,6.7301225363109225,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000718106,0.0,0.094876895,77,101,19175.0,6,100000,23078,0.076512764,1.306971474720218e6,30.16228769359319,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000718106,0.029791235,0.006595418,61,79,16863.0,6,100000,16308,0.056293132,1.7764156380568768e6,28.969786225431548,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.005668259,0.0,0.036226264,356,493,85898.0,6,100000,120271,0.24165661,413810.32366546895,49.76938143756962,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.005668259,0.002705742,0.01385029,148,221,43962.0,6,100000,43511,0.102296354,977551.9467683081,42.534262755835854,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000224073,0.0,0.076151638,26,29,6532.0,7,100000,5683,0.039683036,2.5199684822501987e6,14.32098088462788,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000224073,0.000137366,0.003257647,26,29,6948.0,7,100000,5683,0.044023983,2.2714891562628485e6,12.90887287504177,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000693579,0.0,0.117181072,77,101,19175.0,7,100000,23078,0.08102353,1.2342093710308599e6,28.483083864650183,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000693579,0.052699947,0.00657028,61,79,16863.0,7,100000,16308,0.062383365,1.6029914385028766e6,26.141584379104913,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.005981163,0.0,0.03714658,356,493,85898.0,7,100000,120271,0.284440029,351567.9574058826,42.2834298051629,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.005981163,0.002827226,0.013832877,148,221,43962.0,7,100000,43511,0.12095232,826772.0701843504,35.97367954579127,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000227358,0.0,0.087982831,26,29,6532.0,8,100000,5683,0.038577702,2.592170990381957e6,14.731307738340663,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000227358,0.000131283,0.00358018,26,29,6948.0,8,100000,5683,0.037625264,2.6577886603001645e6,15.104212956485835,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000686625,0.0,0.086321324,77,101,19175.0,8,100000,23078,0.071813645,1.3924930283095923e6,32.13595410732877,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000686625,0.032879545,0.006435109,61,79,16863.0,8,100000,16308,0.058067196,1.722142739594314e6,28.084703797304073,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.006248709,0.0,0.036577372,356,493,85898.0,8,100000,120271,0.256489446,389879.5898214073,46.891208147410474,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.006248709,0.002962728,0.014268867,148,221,43962.0,8,100000,43511,0.108013889,925806.8654485721,40.28278252253282,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000211057,0.0,0.07707584,26,29,6532.0,9,100000,5683,0.036530301,2.7374534910073695e6,15.55694818939488,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000211057,0.000162875,0.003619465,26,29,6948.0,9,100000,5683,0.036196266,2.762715911083204e6,15.70051452268585,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000669602,0.0,0.110030151,77,101,19175.0,9,100000,23078,0.059213688,1.6887987115411558e6,38.97409666494679,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000669602,0.000557426,0.006374502,61,79,16863.0,9,100000,16308,0.047210683,2.118164653538268e6,34.54302916990208,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.005898744,0.0,0.036433113,356,493,85898.0,9,100000,120271,0.171036178,584671.6242688724,70.31904092244156,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.005898744,0.002558788,0.01376731,148,221,43962.0,9,100000,43511,0.082465833,1.2126234145964426e6,52.762457392505816,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000224924,0.0,0.077441757,26,29,6532.0,10,100000,5683,0.036579646,2.73376073677695e6,15.535962267103406,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000224924,0.000139469,0.003645977,26,29,6948.0,10,100000,5683,0.106126326,942273.2678035042,5.354938980927315,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000679782,0.0,0.102684554,77,101,19175.0,10,100000,23078,0.063933026,1.5641368203031716e6,36.09714953895659,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000679782,0.02384501,0.006682196,61,79,16863.0,10,100000,16308,0.051995087,1.923258634032096e6,31.36450180379542,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.018473242,0.0,0.035702944,356,493,85898.0,10,100000,120271,0.209342666,477685.70980174677,57.451738003565886,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.018473242,0.002601901,0.013714899,148,221,43962.0,10,100000,43511,0.091342875,1.0947761388066665e6,47.63480457561687,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000226858,0.0,0.093169541,26,29,6532.0,11,100000,5683,0.033549598,2.9806616460799323e6,16.939100134672255,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000226858,0.000146002,0.003515996,26,29,6948.0,11,100000,5683,0.03517745,2.8427302149530454e6,16.15523581157816,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000706774,0.0,0.084382302,77,101,19175.0,11,100000,23078,0.060738871,1.6463921431796122e6,37.995437880299086,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000706774,0.023541413,0.006270281,61,79,16863.0,11,100000,16308,0.050268545,1.9893155849249267e6,32.441758558955705,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.005735379,0.0,0.039783449,356,493,85898.0,11,100000,120271,0.195017069,512775.62786055414,61.67203753841671,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.005735379,0.00275681,0.013533609,148,221,43962.0,11,100000,43511,0.085348426,1.171667770416762e6,50.98043635860373,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000227249,0.0,0.077273713,26,29,6532.0,12,100000,5683,0.034006851,2.9405839429237363e6,16.711338547635595,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000227249,0.000162082,0.003808429,26,29,6948.0,12,100000,5683,0.034761157,2.876774210938951e6,16.348707840766057,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000671376,0.0,0.086502994,77,101,19175.0,12,100000,23078,0.060896495,1.6421306349404838e6,37.89709079315649,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000671376,0.0005545,0.00638866,61,79,16863.0,12,100000,16308,0.053085842,1.8837414314724442e6,30.720055264452622,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.00589155,0.0,0.036595917,356,493,85898.0,12,100000,120271,0.183184673,545897.1995981345,65.65560209286723,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.00589155,0.002941467,0.0140628,148,221,43962.0,12,100000,43511,0.082717269,1.2089373985497516e6,52.60207514829824,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.00020243,0.0,0.07351055,26,29,6532.0,13,100000,5683,0.03426045,2.9188174702900867e6,16.587639683658562,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.00020243,0.00018666,0.003180458,26,29,6948.0,13,100000,5683,0.036528437,2.73759317980126e6,15.557742040810561,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000649463,0.0,0.085458859,77,101,19175.0,13,100000,23078,0.056745574,1.7622519775727354e6,40.66925113842358,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000649463,0.023744455,0.006343052,61,79,16863.0,13,100000,16308,0.048610134,2.0571842077209663e6,33.54856005951352,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.016563426,0.0,0.035803778,356,493,85898.0,13,100000,120271,0.171455937,583240.2292374396,70.1468856106161,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.016563426,0.002889105,0.013906849,148,221,43962.0,13,100000,43511,0.079703702,1.2546468669673586e6,54.59093982861675,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000215617,0.0,0.075539676,26,29,6532.0,14,100000,5683,0.034475014,2.9006514689160096e6,16.484402297849684,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000215617,0.000175659,0.003260683,26,29,6948.0,14,100000,5683,0.035111653,2.8480573102041082e6,16.185509693889948,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000659363,0.0,0.084012087,77,101,19175.0,14,100000,23078,0.053867069,1.8564217778398155e6,42.842501788987256,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000659363,0.024033773,0.006425781,61,79,16863.0,14,100000,16308,0.046639671,2.1440974572912403e6,34.96594133350555,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.006857645,0.0,0.036011529,356,493,85898.0,14,100000,120271,0.158956374,629103.4293472245,75.66289855102004,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.006857645,0.010993406,0.013796697,148,221,43962.0,14,100000,43511,0.076985656,1.2989432732767777e6,56.51832076354587,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000236356,0.0,0.075072915,26,29,6532.0,15,100000,5683,0.036551101,2.735895698463365e6,15.548095254367304,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000236356,0.000311852,0.003495446,26,29,6948.0,15,100000,5683,0.036288895,2.755663957251936e6,15.660438269062753,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000688499,0.0,0.083327476,77,101,19175.0,15,100000,23078,0.05833869,1.714128308331915e6,39.55865309968393,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000688499,0.031409148,0.00639916,61,79,16863.0,15,100000,16308,0.048503318,2.0617146233171101e6,33.62244207705543,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.044098897,0.0,0.035616548,356,493,85898.0,15,100000,120271,0.174533677,572955.3271257788,68.90991014874454,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.044098897,0.002820964,0.013448835,148,221,43962.0,15,100000,43511,0.082111068,1.217862615061833e6,52.99042024395542,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' no optimization,0.000206569,0.0,0.081030603,26,29,6532.0,16,100000,5683,0.035064231,2.8519091150180935e6,16.207399500647824,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->ke' reduced,0.000206569,0.000323635,0.0034872,26,29,6948.0,16,100000,5683,0.03428592,2.916649166771666e6,16.575317214763377,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' no optimization,0.000683149,0.0,0.083208997,77,101,19175.0,16,100000,23078,0.054342847,1.8401685874131697e6,42.46741066032113,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kke' reduced,0.000683149,0.028225894,0.00650278,61,79,16863.0,16,100000,16308,0.046233237,2.1629461073642755e6,35.27332511889661,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' no optimization,0.020724006,0.0,0.036160978,356,493,85898.0,16,100000,120271,0.237323357,421366.0267750215,50.67811340625861,none,0.0,0.0,0.0
|
||||
QED Process: 'ke->kkke' reduced,0.020724006,0.00273602,0.013774424,148,221,43962.0,16,100000,43511,0.077327366,1.293203236742863e6,56.26856603391871,none,0.0,0.0,0.0
|
|
@ -34,9 +34,10 @@ function import_bench()
|
||||
bench_txt("AB->ABBB.txt")
|
||||
bench_txt("AB->ABBBBB.txt")
|
||||
bench_txt("AB->ABBBBBBB.txt")
|
||||
#bench_txt("AB->ABBBBBBBBB.txt")
|
||||
bench_txt("AB->ABBBBBBBBB.txt")
|
||||
bench_txt("ABAB->ABAB.txt")
|
||||
return bench_txt("ABAB->ABC.txt")
|
||||
bench_txt("ABAB->ABC.txt")
|
||||
return nothing
|
||||
end
|
||||
|
||||
import_bench()
|
||||
|
@ -2,6 +2,40 @@ using MetagraphOptimization
|
||||
using LIKWID
|
||||
using CUDA
|
||||
using UUIDs
|
||||
using DataFrames
|
||||
using CSV
|
||||
using Random
|
||||
|
||||
DISABLE_GPU = false
|
||||
|
||||
results_filename = "results.csv"
|
||||
|
||||
df = DataFrame(
|
||||
process_name = String[],
|
||||
graph_gen_time = Float64[],
|
||||
optimization_time = Float64[],
|
||||
function_generation_time = Float64[],
|
||||
graph_nodes = Int[],
|
||||
graph_edges = Int[],
|
||||
graph_mem = Float64[],
|
||||
cpu_threads = Int[],
|
||||
n_inputs = Int[],
|
||||
nflops_likwid = Int[],
|
||||
cpu_time = Float64[],
|
||||
cpu_rate = Float64[],
|
||||
cpu_gflops = Float64[],
|
||||
gpu_name = String[],
|
||||
gpu_time = Float64[],
|
||||
gpu_rate = Float64[],
|
||||
gpu_gflops = Float64[],
|
||||
)
|
||||
|
||||
# if they exist, read existing results and append new ones
|
||||
if isfile(results_filename)
|
||||
df = CSV.read(results_filename, DataFrame)
|
||||
end
|
||||
|
||||
nInputs = 100_000
|
||||
|
||||
function cpu_bench(compute_function, inputs)
|
||||
compute_function.(inputs[begin:10]) # make sure it's compiled
|
||||
@ -24,21 +58,38 @@ end
|
||||
|
||||
function bench_process(
|
||||
process::MetagraphOptimization.AbstractProcessDescription,
|
||||
process_name::String,
|
||||
graph::DAG,
|
||||
func,
|
||||
gen_time::Float64,
|
||||
opt_time::Float64,
|
||||
func_time::Float64,
|
||||
io::IO = stdout;
|
||||
use_likwid = true,
|
||||
use_gpu = true,
|
||||
)
|
||||
println(io, "\n--- Benchmarking $(process) ---")
|
||||
println(io, "\n--- Benchmarking $(process_name) ---")
|
||||
if DISABLE_GPU
|
||||
use_gpu = false
|
||||
end
|
||||
|
||||
NFLOPs = GraphProperties(graph).computeEffort
|
||||
graph_props = GraphProperties(graph)
|
||||
NFLOPs = graph_props.computeEffort
|
||||
nflops_likwid = 0
|
||||
if use_likwid
|
||||
input = gen_process_input(process)
|
||||
func(input) # compile first
|
||||
|
||||
# get rid of annoying output to console
|
||||
oldstd = stdout
|
||||
redirect_stdout(devnull)
|
||||
_, events = @perfmon "FLOPS_DP" func(input)
|
||||
redirect_stdout(oldstd) # recover original stdout
|
||||
|
||||
NFLOPs = first(events["FLOPS_DP"])["RETIRED_SSE_AVX_FLOPS_ALL"]
|
||||
nflops_likwid = NFLOPs
|
||||
end
|
||||
|
||||
nInputs = 10000000 # ten million
|
||||
println(io, "Generating $nInputs inputs with $(Threads.nthreads()) threads...")
|
||||
|
||||
inputs = Vector{typeof(gen_process_input(process))}()
|
||||
@ -54,12 +105,21 @@ function bench_process(
|
||||
|
||||
println(io, "Benchmarking CPU with $(Threads.nthreads()) threads...")
|
||||
(time_cpu, rate_cpu) = cpu_bench(func, inputs)
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 1024^3
|
||||
flops_cpu = (rate_cpu * NFLOPs) / 10^9
|
||||
|
||||
println(io, "Benchmarking GPU...")
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 1024^3
|
||||
time_gpu = 0.0
|
||||
rate_gpu = 0.0
|
||||
flops_gpu = 0.0
|
||||
gpu_name = "none"
|
||||
if use_gpu
|
||||
println(io, "Benchmarking GPU...")
|
||||
gpu_name = "$(name(first(CUDA.devices())))"
|
||||
cuInputs = CuArray(inputs)
|
||||
(time_gpu, rate_gpu) = gpu_bench(func, cuInputs)
|
||||
flops_gpu = (rate_gpu * NFLOPs) / 10^9
|
||||
else
|
||||
println(io, "Skipping GPU...")
|
||||
end
|
||||
|
||||
println(io, "\nBenchmark Summary for $(process):")
|
||||
|
||||
@ -73,10 +133,39 @@ function bench_process(
|
||||
println(io, " Time: $time_cpu")
|
||||
println(io, " Rate: $rate_cpu")
|
||||
println(io, " GFLOPS: $flops_cpu")
|
||||
println(io, "GPU, $(name(first(CUDA.devices())))")
|
||||
println(io, " Time: $time_gpu")
|
||||
println(io, " Rate: $rate_gpu")
|
||||
return println(io, " GFLOPS: $flops_gpu")
|
||||
if use_gpu
|
||||
println(io, "GPU, $gpu_name")
|
||||
println(io, " Time: $time_gpu")
|
||||
println(io, " Rate: $rate_gpu")
|
||||
println(io, " GFLOPS: $flops_gpu")
|
||||
end
|
||||
|
||||
if (process_name != "warmup")
|
||||
push!(
|
||||
df,
|
||||
Dict(
|
||||
:process_name => process_name,
|
||||
:graph_gen_time => gen_time,
|
||||
:optimization_time => opt_time,
|
||||
:function_generation_time => func_time,
|
||||
:graph_nodes => graph_props.noNodes,
|
||||
:graph_edges => graph_props.noEdges,
|
||||
:graph_mem => MetagraphOptimization.mem(graph),
|
||||
:cpu_threads => Threads.nthreads(),
|
||||
:n_inputs => nInputs,
|
||||
:nflops_likwid => nflops_likwid,
|
||||
:cpu_time => time_cpu,
|
||||
:cpu_rate => rate_cpu,
|
||||
:cpu_gflops => flops_cpu,
|
||||
:gpu_name => gpu_name,
|
||||
:gpu_time => time_gpu,
|
||||
:gpu_rate => rate_gpu,
|
||||
:gpu_gflops => flops_gpu,
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
# use "mock" machine that only uses cpu
|
||||
@ -92,57 +181,151 @@ machine = Machine(
|
||||
],
|
||||
[-1.0;;],
|
||||
)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# sadly cannot put these in functions because the world age must increase after the function is created which happens only in the global scope
|
||||
|
||||
# compton
|
||||
process = parse_process("ke->ke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
## -- WARMUP TO COMPILE FUNCTIONS first
|
||||
|
||||
optimizer = RandomWalkOptimizer(MersenneTwister(0))
|
||||
|
||||
# 2-photon compton
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
opt_time = @elapsed optimize!(optimizer, graph, 200)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 3-photon compton
|
||||
process = parse_process("ke->kkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
|
||||
# AB->AB
|
||||
process = parse_process("AB->AB", ABCModel())
|
||||
graph = parse_dag("input/AB->AB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "warmup", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
exit(0)
|
||||
## -- WARMUP END
|
||||
|
||||
optimizer = ReductionOptimizer()
|
||||
|
||||
# compton
|
||||
process = parse_process("ke->ke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 2-photon compton
|
||||
process = parse_process("ke->kke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 3-photon compton
|
||||
process = parse_process("ke->kkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 4-photon compton
|
||||
process = parse_process("ke->kkkke", QEDModel())
|
||||
graph = gen_graph(process)
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# 5-photon compton
|
||||
process = parse_process("ke->kkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# 6-photon compton
|
||||
process = parse_process("ke->kkkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# 7-photon compton
|
||||
process = parse_process("ke->kkkkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# 8-photon compton
|
||||
process = parse_process("ke->kkkkkkkke", QEDModel())
|
||||
gen_time = @elapsed graph = gen_graph(process)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# AB->AB
|
||||
process = parse_process("AB->AB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->AB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# AB->AB^3
|
||||
process = parse_process("AB->ABBB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time)
|
||||
|
||||
# AB->AB^5
|
||||
process = parse_process("AB->ABBBBB", ABCModel())
|
||||
graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
||||
optimize_to_fixpoint!(optimizer, graph)
|
||||
compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, compute_func)
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
# AB->AB^7
|
||||
process = parse_process("AB->ABBBBBBB", ABCModel())
|
||||
gen_time = @elapsed graph = parse_dag("input/AB->ABBBBBBB.txt", ABCModel())
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process no optimization", graph, compute_func, gen_time, 0.0, func_gen_time, use_gpu = false)
|
||||
|
||||
opt_time = @elapsed optimize_to_fixpoint!(optimizer, graph)
|
||||
func_gen_time = @elapsed compute_func = get_compute_function(graph, process, machine)
|
||||
bench_process(process, "$process reduced", graph, compute_func, gen_time, opt_time, func_gen_time, use_gpu = false)
|
||||
|
||||
CSV.write(results_filename, df)
|
||||
|
63
experiments/CUDA_container.def
Normal file
63
experiments/CUDA_container.def
Normal file
@ -0,0 +1,63 @@
|
||||
Bootstrap: docker
|
||||
From: nvidia/cuda:12.3.1-devel-ubuntu20.04
|
||||
|
||||
%labels
|
||||
Requires CUDA driver 470.57+.
|
||||
|
||||
%environment
|
||||
export LANG=C
|
||||
|
||||
%runscript
|
||||
nvidia-smi
|
||||
./run.sh
|
||||
|
||||
%post
|
||||
. /.singularity.d/env/10-docker*.sh
|
||||
|
||||
apt-get update
|
||||
apt-get install -y pciutils
|
||||
DEBIAN_FRONTEND='noninteractive' apt-get -y -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' install build-essential cuda-compat-12-3 libibverbs-dev ibverbs-utils gcc wget git libcap2-bin
|
||||
apt-get -y autoremove; apt-get -y clean
|
||||
|
||||
cd /tmp
|
||||
|
||||
# install slurm
|
||||
: ${SLURM_VERSION:=17-02-11-1}
|
||||
wget https://github.com/SchedMD/slurm/archive/slurm-${SLURM_VERSION}.tar.gz
|
||||
tar -xf slurm-${SLURM_VERSION}.tar.gz
|
||||
cd slurm-slurm-${SLURM_VERSION}
|
||||
./configure --prefix=/usr/ --sysconfdir=/etc/slurm --localstatedir=/var --disable-debug
|
||||
make -C contribs/pmi2 -j$(nproc) install
|
||||
cd ..
|
||||
rm -rf slurm-*
|
||||
|
||||
# install julia
|
||||
cd ~
|
||||
wget https://julialang-s3.julialang.org/bin/linux/x64/1.9/julia-1.9.4-linux-x86_64.tar.gz
|
||||
tar zxvf julia-1.9.4-linux-x86_64.tar.gz
|
||||
mv julia-1.9.4/ /opt/julia-1.9.4
|
||||
#mkdir /usr/local/bin
|
||||
ln -s /opt/julia-1.9.4/bin/julia /usr/local/bin/julia
|
||||
|
||||
#Add nvidia driver paths to the environment variables
|
||||
echo "\n #Nvidia driver paths \n" >> /environment
|
||||
echo 'export PATH="/nvbin:$PATH"' >> /environment
|
||||
echo 'export LD_LIBRARY_PATH="/nvlib:$LD_LIBRARY_PATH"' >> /environment
|
||||
|
||||
#Add CUDA paths
|
||||
echo "\n #Cuda paths \n" >> /environment
|
||||
echo 'export CPATH="/usr/local/cuda/include:$CPATH"' >> /environment
|
||||
echo 'export PATH="/usr/local/cuda/bin:$PATH"' >> /environment
|
||||
echo 'export LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH"' >> /environment
|
||||
echo 'export CUDA_HOME="/usr/local/cuda"' >> /environment
|
||||
|
||||
# install likwid
|
||||
VERSION=5.3.0
|
||||
wget http://ftp.fau.de/pub/likwid/likwid-$VERSION.tar.gz
|
||||
tar -xaf likwid-$VERSION.tar.gz
|
||||
cd likwid-$VERSION
|
||||
# accessdaemon doesn't work because of permissions
|
||||
sed -i 's/ACCESSMODE = accessdaemon/ACCESSMODE = perf_event/g' config.mk
|
||||
make -j4
|
||||
make -j4 install
|
||||
echo 'export LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"' >> /environment
|
18
experiments/run.sh
Executable file
18
experiments/run.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
LOG_FILE="$SCRIPT_DIR/../julia.log"
|
||||
|
||||
cd $SCRIPT_DIR/..
|
||||
|
||||
echo "Initiating julia..."
|
||||
julia --project=./ -e 'using Pkg; Pkg.instantiate(); Pkg.add(url="https://github.com/QEDjl-project/QEDprocesses.jl/")' > $LOG_FILE 2>&1 || exit 1 # need current dev version of QEDprocesses
|
||||
julia -e 'using Pkg; Pkg.add("CSV"); Pkg.add("DataFrames"); Pkg.add("LIKWID"); Pkg.add("CUDA"); Pkg.add("Random")' >> $LOG_FILE 2>&1 || exit 1 # add requirements for the bench script
|
||||
|
||||
echo "Benchmarking..."
|
||||
|
||||
for i in {1..16}
|
||||
do
|
||||
echo "$i Threads"
|
||||
julia --project --threads=$i examples/qed_bench.jl >> $LOG_FILE 2>&1 || echo "-- Something went wrong, check logs --"
|
||||
done
|
Loading…
x
Reference in New Issue
Block a user