MSVC コンパイラが次のループを展開する方法を理解できません (アセンブリ言語の理解が不十分で申し訳ありません)。
#define NUM_ITERATIONS (1000 * 1000 * 1000)
double dummySum = 0;
for (int x = 0; x < NUM_ITERATIONS; x++) {
if (x & 1)
dummySum += x;
}
生成されたアセンブリは次のとおりです。
00007FF7B4511070 xorps xmm1,xmm1
double dummySum = 0;
00007FF7B4511073 mov ecx,2
00007FF7B4511078 nop dword ptr [rax+rax]
if (x & 1)
00007FF7B4511080 lea eax,[rcx-2]
00007FF7B4511083 mov r8d,eax
00007FF7B4511086 and r8d,1
00007FF7B451108A je someTest+28h (07FF7B4511098h)
dummySum += x;
00007FF7B451108C movd xmm0,eax
00007FF7B4511090 cvtdq2pd xmm0,xmm0
00007FF7B4511094 addsd xmm1,xmm0
if (x & 1)
00007FF7B4511098 lea edx,[rcx-1]
00007FF7B451109B and edx,1
00007FF7B451109E je someTest+3Fh (07FF7B45110AFh)
dummySum += x;
00007FF7B45110A0 lea eax,[rcx-1]
00007FF7B45110A3 movd xmm0,eax
00007FF7B45110A7 cvtdq2pd xmm0,xmm0
00007FF7B45110AB addsd xmm1,xmm0
00007FF7B45110AF test r8d,r8d
if (x & 1)
00007FF7B45110B2 je someTest+50h (07FF7B45110C0h)
dummySum += x;
00007FF7B45110B4 movd xmm0,ecx
00007FF7B45110B8 cvtdq2pd xmm0,xmm0
00007FF7B45110BC addsd xmm1,xmm0
00007FF7B45110C0 test edx,edx
if (x & 1)
00007FF7B45110C2 je someTest+63h (07FF7B45110D3h)
dummySum += x;
00007FF7B45110C4 lea eax,[rcx+1]
00007FF7B45110C7 movd xmm0,eax
00007FF7B45110CB cvtdq2pd xmm0,xmm0
00007FF7B45110CF addsd xmm1,xmm0
00007FF7B45110D3 test r8d,r8d
if (x & 1)
00007FF7B45110D6 je someTest+77h (07FF7B45110E7h)
dummySum += x;
00007FF7B45110D8 lea eax,[rcx+2]
00007FF7B45110DB movd xmm0,eax
00007FF7B45110DF cvtdq2pd xmm0,xmm0
00007FF7B45110E3 addsd xmm1,xmm0
00007FF7B45110E7 test edx,edx
if (x & 1)
00007FF7B45110E9 je someTest+8Ah (07FF7B45110FAh)
dummySum += x;
00007FF7B45110EB lea eax,[rcx+3]
00007FF7B45110EE movd xmm0,eax
00007FF7B45110F2 cvtdq2pd xmm0,xmm0
00007FF7B45110F6 addsd xmm1,xmm0
00007FF7B45110FA test r8d,r8d
if (x & 1)
00007FF7B45110FD je someTest+9Eh (07FF7B451110Eh)
dummySum += x;
00007FF7B45110FF lea eax,[rcx+4]
00007FF7B4511102 movd xmm0,eax
00007FF7B4511106 cvtdq2pd xmm0,xmm0
00007FF7B451110A addsd xmm1,xmm0
00007FF7B451110E test edx,edx
if (x & 1)
00007FF7B4511110 je someTest+0B1h (07FF7B4511121h)
dummySum += x;
00007FF7B4511112 lea eax,[rcx+5]
00007FF7B4511115 movd xmm0,eax
00007FF7B4511119 cvtdq2pd xmm0,xmm0
00007FF7B451111D addsd xmm1,xmm0
00007FF7B4511121 test r8d,r8d
if (x & 1)
00007FF7B4511124 je someTest+0C5h (07FF7B4511135h)
dummySum += x;
00007FF7B4511126 lea eax,[rcx+6]
00007FF7B4511129 movd xmm0,eax
00007FF7B451112D cvtdq2pd xmm0,xmm0
00007FF7B4511131 addsd xmm1,xmm0
00007FF7B4511135 test edx,edx
if (x & 1)
00007FF7B4511137 je someTest+0D8h (07FF7B4511148h)
dummySum += x;
00007FF7B4511139 lea eax,[rcx+7]
00007FF7B451113C movd xmm0,eax
00007FF7B4511140 cvtdq2pd xmm0,xmm0
00007FF7B4511144 addsd xmm1,xmm0
for (int x = 0; x < NUM_ITERATIONS; x++) {
00007FF7B4511148 add ecx,0Ah
00007FF7B451114B lea eax,[rcx-2]
00007FF7B451114E cmp eax,3B9ACA00h
00007FF7B4511153 jl someTest+10h (07FF7B4511080h)
}
この部分(ループの始まり)を理解しています:
// if (x % 2 == 0) jump over the sumation
00007FF7B4511073 mov ecx,2 // ecx/rcx = 2
00007FF7B4511080 lea eax,[rcx-2] // eax = rcx - 2
00007FF7B4511083 mov r8d,eax // r8d = eax
00007FF7B4511086 and r8d,1 // r8x & 1
00007FF7B451108A je someTest+28h (07FF7B4511098h) // jump if zero
// add double
00007FF7B451108C movd xmm0,eax
00007FF7B4511090 cvtdq2pd xmm0,xmm0
00007FF7B4511094 addsd xmm1,xmm0
しかし、アドレスを見ると、後続のジャンプ命令が次の命令をスキップするように見える方法がわかりませんlea
(これは、ジャンプが行われることを前提としています)-上記のリストから、ジャンプ間の命令を省略したことに注意してください:
00007FF7B45110C0 test edx,edx
00007FF7B45110C2 je someTest+63h (07FF7B45110D3h)
... addresses in between omitted ...
00007FF7B45110D3 test r8d,r8d
00007FF7B45110D6 je someTest+77h (07FF7B45110E7h)
... addresses in between omitted ...
00007FF7B45110E7 test edx,edx
00007FF7B45110E9 je someTest+8Ah (07FF7B45110FAh)
... addresses in between omitted ...
00007FF7B45110FA test r8d,r8d
00007FF7B45110FD je someTest+9Eh (07FF7B451110Eh)
... addresses in between omitted ...
00007FF7B451110E test edx,edx
00007FF7B4511110 je someTest+0B1h (07FF7B4511121h)
各ジャンプが行われる場合、次の値をロードせずに、交互test r8d,r8d
に命令するだけのようです。test edx,edx
ここで何を間違って解釈していますか?