ex-post: .
?
Benchmark! ... , ,
chapel, . Chapel HPC .
[PARALLEL]
, , -, "".
norm_reduce()
concurrency -enabled reduce
x**2
+
- . 2 CLK-, ?
Amdahl .
- :
+++++++++++++++++++++++++++++++++++++++++++++++ <TiO.IDE>.RUN
3.74166
[SEQ] norm_loop(): 0.0 [us] -- 3.74166
[SEQ] norm_loop_param(): 0.0 [us] -- 3.74166
[PAR]: norm_reduce(): 5677.0 [us] -- 3.74166
3.74166
[SEQ] norm_loop(): 0.0 [us] -- 3.74166
[SEQ] norm_loop_param(): 1.0 [us] -- 3.74166
[PAR]: norm_reduce(): 5818.0 [us] -- 3.74166
3.74166
[SEQ] norm_loop(): 1.0 [us] -- 3.74166
[SEQ] norm_loop_param(): 2.0 [us] -- 3.74166
[PAR]: norm_reduce(): 4886.0 [us] -- 3.74166
, , --fast
:
+++++++++++++++++++++++++++++++++++++++++++++++ <TiO.IDE>.+CompilerFLAG( "--fast" ).RUN
3.74166
[SEQ] norm_loop(): 1.0 [us] -- 3.74166
[SEQ] norm_loop_param(): 2.0 [us] -- 3.74166
[PAR]: norm_reduce(): 7769.0 [us] -- 3.74166
3.74166
[SEQ] norm_loop(): 0.0 [us] -- 3.74166
[SEQ] norm_loop_param(): 0.0 [us] -- 3.74166
[PAR]: norm_reduce(): 9109.0 [us] -- 3.74166
3.74166
[SEQ] norm_loop(): 1.0 [us] -- 3.74166
[SEQ] norm_loop_param(): 1.0 [us] -- 3.74166
[PAR]: norm_reduce(): 8807.0 [us] -- 3.74166
, SuperComputing2017 HPC [ ] , .
- Try-it-Online chapel, / Chapel-, ( , TiO.IDE).
use Time;
var aStopWATCH_SEQ: Timer;
var aStopWATCH_PAR: Timer;
proc norm_3tuple( x: 3*real ): real
{
return sqrt( x[1]**2 + x[2]**2 + x[3]**2 );
}
proc norm_loop( x ): real
{
aStopWATCH_SEQ.start();
var tmp = 0.0;
for i in 1 .. x.size do
tmp += x[i]**2;
aStopWATCH_SEQ.stop(); write( "[SEQ] norm_loop(): ",
aStopWATCH_SEQ.elapsed( Time.TimeUnits.microseconds ), " [us] -- " );
return sqrt( tmp );
}
proc norm_loop_param( x ): real
{
aStopWATCH_SEQ.start();
var tmp = 0.0;
for param i in 1 .. x.size do
tmp += x[i]**2;
aStopWATCH_SEQ.stop(); write( "[SEQ] norm_loop_param(): ",
aStopWATCH_SEQ.elapsed( Time.TimeUnits.microseconds ), " [us] -- " );
return sqrt( tmp );
}
proc norm_reduce( x ): real
{
aStopWATCH_PAR.start();
var tmp = ( + reduce x**2 );
aStopWATCH_PAR.stop(); write( "[PAR]: norm_reduce(): ",
aStopWATCH_PAR.elapsed( Time.TimeUnits.microseconds ), " [us] -- " );
return sqrt( tmp );
}
var a = ( 1.0, 2.0, 3.0 );
writeln( norm_3tuple( a ) );
writeln( norm_loop( a ) );
writeln( norm_loop_param( a ) );
writeln( norm_reduce( a ) );
[LOOP] norm_3tuple(): 45829.0 [us] -- result = 4.30918e+06 @ 1000000 loops.
[LOOP] norm_3tuple(): 241680 [us] -- result = 4.30918e+07 @ 10000000 loops.
[LOOP] norm_3tuple(): 2387080 [us] -- result = 4.30918e+08 @ 100000000 loops.
[LOOP] norm_loop(): 72160.0 [us] -- result = 4.30918e+06 @ 1000000 loops.
[LOOP] norm_loop(): 755959 [us] -- result = 4.30918e+07 @ 10000000 loops.
[LOOP] norm_loop(): 7783740 [us] -- result = 4.30918e+08 @ 100000000 loops.
[LOOP] norm_loop_param(): 34102.0 [us] -- result = 4.30918e+06 @ 1000000 loops.
[LOOP] norm_loop_param(): 365510 [us] -- result = 4.30918e+07 @ 10000000 loops.
[LOOP] norm_loop_param(): 3480310 [us] -- result = 4.30918e+08 @ 100000000 loops.
[LOOP] norm_reduce(): 5851380 [us]
[LOOP] norm_reduce(): 5884600 [us]
[LOOP] norm_reduce(): 6163690 [us]
[LOOP] norm_reduce(): 6029860 [us]
[LOOP] norm_reduce(): 6083730 [us]
[LOOP] norm_reduce(): 6132720 [us]
[LOOP] norm_reduce(): 6012620 [us]
[LOOP] norm_reduce(): 6379020 [us]
[LOOP] norm_reduce(): 5923550 [us]
[LOOP] norm_reduce(): 6144660 [us]
[LOOP] norm_reduce(): 8098380 [us]
[LOOP] norm_reduce(): 6215470 [us]
[LOOP] norm_reduce(): 5831670 [us]
[LOOP] norm_reduce(): 6124580 [us]
[LOOP] norm_reduce(): 6092740 [us]
[LOOP] norm_reduce(): 5811260 [us]
[LOOP] norm_reduce(): 5880400 [us]
[LOOP] norm_reduce(): 5898520 [us]
[LOOP] norm_reduce(): 6591110 [us]
[LOOP] norm_reduce(): 5876570 [us]
[LOOP] norm_reduce(): 6034180 [us]
[LOOP] norm_reduce(): 12434700 [us]
[LOOP] norm_reduce(): 17807600 [us]
[LOOP] norm_reduce(): 23844300 [us]
[LOOP] norm_reduce(): 30557700 [us]
[LOOP] norm_reduce(): 30523700 [us]
[LOOP] norm_reduce(): 29404200 [us]
[LOOP] norm_reduce(): 29268600 [us]
[LOOP] norm_reduce(): 29009500 [us]
[LOOP] norm_reduce(): 30388800 [us]
[LOOP] norm_reduce(): 37070600 [us]
[LOOP] norm_reduce(): 42789200 [us]
[LOOP] norm_reduce(): 50572700 [us]
[LOOP] norm_reduce(): 49944300 [us]
[LOOP] norm_reduce(): 49365600 [us]
[LOOP] norm_reduce(): ~60+ // exceeded the 60 seconds limit and was terminated [Exit code: 124]
[LOOP] norm_reduce(): 50099900 [us]
[LOOP] norm_reduce(): 49445500 [us]
[LOOP] norm_reduce(): 49783800 [us]
[LOOP] norm_reduce(): 48533400 [us]
[LOOP] norm_reduce(): 48966600 [us]
[LOOP] norm_reduce(): 47564700 [us]
[LOOP] norm_reduce(): 47087400 [us]
[LOOP] norm_reduce(): 47624300 [us]
[LOOP] norm_reduce(): ~60+ [
[LOOP] norm_reduce(): ~60+ [
[LOOP] norm_reduce(): 46887700 [us]
[LOOP] norm_reduce(): 46571800 [us]
[LOOP] norm_reduce(): 46794700 [us]
[LOOP] norm_reduce(): 46862600 [us]
[LOOP] norm_reduce(): 47348700 [us]
[LOOP] norm_reduce(): 46669500 [us]
[SEQ]
- nloops
, , ( ) , --fast
:
use Time;
var aStopWATCH_LOOP: Timer;
config const nloops = 100000000;
var res: atomic real;
res.write( 0.0 );
var A1: [1 .. nloops] real;
forall k in 1 .. nloops do
A1[k] = (k % 5): real;
aStopWATCH_LOOP.start();
forall i in 1 .. nloops do
{
res.add( norm_reduce( ( A1[i], a[1], a[2] ) ) );
} aStopWATCH_LOOP.stop(); write(
"forall .. do { res.add( norm_reduce( aPreComputedTUPLE ) ) }: ", aStopWATCH_LOOP.elapsed( Time.TimeUnits.microseconds ), " [us] -- " );