Python C ( C++ - ). x86-64 clang++. 82 , CPython3.6.2 , , Skylake x86, Python , . (, asm , , ).
JIT - . , Python C, - (, NumPy), C , Cython - , CPython - , - .
: 1,5 ( + add , , 4- L1D). ( ), 6c = 5c + 1c + add ).
, Python , : P ( , 32- 64- , , 4585 18 , 32- L1D-. ABI Linux x32 AArch64 ILP32 ABI.)
, gcc , clang. ( perf stat , , .)
unsigned jumps(int offset[], unsigned size) {
unsigned location = 0;
unsigned counter = 0;
do {
int off = offset[location];
offset[location] += (off>=3) ? -1 : 1;
location += off;
counter++;
} while (location < size);
return counter;
}
#include <iostream>
#include <iterator>
#include <vector>
int main()
{
std::ios::sync_with_stdio(false);
std::istream_iterator<int> begin(std::cin), dummy;
std::vector<int> values(begin, dummy);
unsigned count = jumps(values.data(), values.size());
std::cout << count << '\n';
}
clang4.0.1 -O3 -march=skylake ; >=3. ? : ? : , , . =3) %3F+-1 :+1%3B+++++++//"conditional%22+version
location++%3D+off;
counter++;
} while (location+%3C+size);
return counter;
}
%23include+
%23include+
%23include+
int main()
{
std::ios::sync_with_stdio(false)%3B+++++//makes cin faster
std::istream_iterator begin(std::cin),+dummy;
std::vector+values(begin,+dummy);
unsigned count %3D+jumps(values.data(),+values.size());
std::cout+<%3C+count <%3C+!'\n!';
}
'),l:'5',n:'0',o:'C++ source #1',t:'0')),k:35.30937506743596,l:'4',m:100,n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:clang401,filters:(b:'0',binary:'1',commentOnly:'0',demangle:'0',directives:'0',execute:'1',intel:'0',trim:'1'),fontScale:0.8957951999999999,libs:!(),options:'-O3 -fverbose-asm -march=skylake',source:1),l:'5',n:'0',o:'x86-64 clang 4.0.1+(Editor #1, Compiler #1)',t:'0')),k:33.07976551955818,l:'4',m:100,n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:g72,filters:(b:'0',binary:'1',commentOnly:'0',demangle:'0',directives:'0',execute:'1',intel:'0',trim:'1'),fontScale:0.8957951999999999,libs:!(),options:'-O3 -fverbose-asm -march=skylake',source:1),l:'5',n:'0',o:'x86-64 gcc 7.2+(Editor #1, Compiler #2)',t:'0')),header:(),k:31.610859413005866,l:'4',n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4 rel="nofollow noreferrer">Source + asm Godbolt
.LBB1_4: # =>This Inner Loop Header: Depth=1
mov ebx, edi ; silly compiler: extra work inside the loop to save code outside
mov esi, dword ptr [rax + 4*rbx] ; off = offset[location]
cmp esi, 2
mov ecx, 1
cmovg ecx, r8d ; ecx = (off>=3) ? -1 : 1; // r8d = -1 (set outside the loop)
add ecx, esi ; off += -1 or 1
mov dword ptr [rax + 4*rbx], ecx ; store back the updated off
add edi, esi ; location += off (original value)
add edx, 1 ; counter++
cmp edi, r9d
jb .LBB1_4 ; unsigned compare against array size
perf stat./a.out < input.txt ( clang) i7-6700k Skylake:
21841249
Performance counter stats for './a.out':
36.843436 task-clock (msec)
0 context-switches
0 cpu-migrations
119 page-faults
143,680,934 cycles
245,059,492 instructions
22,654,670 branches
20,171 branch-misses
0.036953258 seconds time elapsed
4 - . + , . .
int short ( ; movsx , mov Skylake), movsx , L1D, .
( int offsets[] = { file contents with commas added }; . . ~ 36,2 + / - 0,1 , ~ 36,8, , , - , ( Python, C++ Skylake P- Skylake.)
, , [rdi] [rdi + rdx*4] 1 add (index += offset current = target). Intel, IvyBridge mov , . ( ) + asm . ( std::vector): 23.26 +- 0.05 ms, 90,725 (3,900 ), 288.724 M instructions (3,18 ). , , - , .
gcc 2 . (14% perf stat . , , , , , .)
offset[location] = (off>=3)? off-1: off+1; offset[location] = (off>=3)? off-1: off+1; gcc asm, .
gcc7.1.1 -O3 -march = skylake ( , (off <= 3)?: -1: +1).
Performance counter stats for './ec-gcc':
70.032162 task-clock (msec)
0 context-switches
0 cpu-migrations
118 page-faults
273,115,485 cycles
255,088,412 instructions
44,382,466 branches
6,230,137 branch-misses
0.070181924 seconds time elapsed
CPython (Python3.6.2 Arch Linux):
perf stat python ./orig-v2.e.py
21841249
Performance counter stats for 'python ./orig-v2.e.py':
3046.703831 task-clock (msec)
10 context-switches
0 cpu-migrations
923 page-faults
11,880,130,860 cycles
38,731,286,195 instructions
8,489,399,768 branches
18,666,459 branch-misses
3.046819579 seconds time elapsed
, PyPy Python.