私のシステムは mac osx 10.8.5 です。このマシンのデフォルトの gcc は 4.2 (i686-apple-darwin11-llvm-gcc-4.2 (GCC) 4.2.1 (Apple Inc. ビルド 5658 に基づく) (LLVM ビルド 2335.15.00)) です。/ に gcc 4.9 をインストールしました。ユーザー/ローカル。
AVX2 組み込み関数を使用するコードがあります。メッセージ付きのコードを添付しました。
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <sys/time.h>
#define SIZE 4
#define TIMES 1
void mmul(const float*a, const float* b, float*c){
int a_vindex1[4] ={0,0,0,0};
int b_vindex1[4] = {0,0,0,0};
int m,i,j,k;
__m128i a_index, b_index;
__m128 a1;
for (i=0;i< SIZE*SIZE; i+= 1){
m=(i/SIZE)*4 ;
for (j=0;j<4;j++){
b_vindex1[j] = i%SIZE+SIZE*j;
a_vindex1[j] = m+j;
}
a_index = *(__m128i*)&a_vindex1[0];
b_index = *(__m128i*)&b_vindex1[0];
a1 = _mm_i32gather_ps(a, a_index, 1);
printf("\nBINDEX %d,%d,%d,%d", b_vindex1[0],b_vindex1[1], b_vindex1[2], b_vindex1[3]);
printf("\nAINDEX %d,%d,%d,%d", a_vindex1[0],a_vindex1[1], a_vindex1[2], a_vindex1[3]);
}
}
int main(){
float * a, *b,*c;
int i,j;
double timetotal = 0.0;
struct timeval start,stop,start1, stop1;
a=(float*)calloc(SIZE*SIZE, sizeof(float));
b=(float*)calloc(SIZE*SIZE, sizeof(float));
c=(float*)calloc(SIZE*SIZE, sizeof(float));
for (i=0;i<SIZE*SIZE;i++){
a[i] = i;
b[i] = 0.5*i;
}
for (j=0;j<TIMES;j++){
gettimeofday(&start,NULL);
mmul(a,b,c);
gettimeofday(&stop,NULL);
timetotal += ((double)((stop.tv_sec-start.tv_sec)*1000000+ (stop.tv_usec-start.tv_usec)))/1000000;
}
//printf("\n time avegrare = %.8lf",timetotal/TIMES);
return 0;
}
このコードを gcc 4.9 でコンパイルすると、
gcc-4.9 -O3 -march=core-avx2 a7.c, 次のエラー メッセージが表示されます。
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:141:no such instruction: `vmovd %r8d, %xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:143:no such instruction: `vmovapd LC15(%rip), %ymm3'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:145:no such instruction: `vbroadcastss %xmm7, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:147:no such instruction: `vpaddd LC13(%rip), %ymm0,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:148:no such instruction: `vpaddd LC14(%rip), %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:149:no such instruction: `vcvtdq2ps %ymm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:150:no such instruction: `vmovups %ymm2, (%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:151:no such instruction: `vcvtdq2pd %xmm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:152:no such instruction: `vmulpd %ymm3, %ymm2,%ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:153:no such instruction: `vextracti128 $0x1, %ymm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:154:no such instruction: `vcvtpd2psy %ymm2, %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:155:no such instruction: `vcvtdq2pd %xmm1, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:156:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:157:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:158:no such instruction: `vinsertf128 $0x1, %xmm1,%ymm2,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:159:no such instruction: `vmovups %ymm1, (%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:162:no such instruction: `vcvtdq2ps %ymm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:163:no such instruction: `vmovups %ymm1, 32(%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:164:no such instruction: `vcvtdq2pd %xmm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:165:no such instruction: `vextracti128 $0x1, %ymm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:166:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:167:no such instruction: `vcvtdq2pd %xmm0, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:168:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:169:no such instruction: `vmulpd %ymm3, %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:170:no such instruction: `vcvtpd2psy %ymm0, %xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:171:no such instruction: `vinsertf128 $0x1, %xmm0,%ymm1,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:172:no such instruction: `vmovups %ymm0, 32(%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:178:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:179:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:181:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:182:no such instruction: `vmovsd LC16(%rip), %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:183:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:184:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:185:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:186:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:187:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:188:no such instruction: `vmovss %xmm5, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:192:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:193:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:195:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:196:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:197:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:198:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:200:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:201:no such instruction: `vcvtsd2ss %xmm0, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:202:no such instruction: `vmovss %xmm6, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:205:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:206:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:208:no such instruction: `vxorps %xmm7, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:209:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:210:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:211:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:213:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:214:no such instruction: `vcvtsd2ss %xmm0, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:215:no such instruction: `vmovss %xmm7, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:218:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:219:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:221:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:222:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:223:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:224:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:226:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:227:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:228:no such instruction: `vmovss %xmm4, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:231:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:232:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:234:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:236:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:237:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:238:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:239:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:240:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:241:no such instruction: `vmovss %xmm5, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:244:no such instruction: `vxorpd %xmm1, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:245:no such instruction: `vcvtsi2sd %edi, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:246:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:248:no such instruction: `vcvtsi2ss %edi, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:249:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:251:no such instruction: `vmulsd %xmm2, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:252:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:253:no such instruction: `vcvtsd2ss %xmm1, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:254:no such instruction: `vmovss %xmm6, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:257:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:258:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:260:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:261:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:262:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:263:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:264:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:265:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:266:no such instruction: `vmovss %xmm4, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:270:no such instruction: `vzeroupper'
gcc-4.9 を使用する場合、アセンブラを更新する必要があるのでしょうか? それとも、他の問題のように思えますか...ポインタが役立ちます..