求解释下这个SIMD在Win下的性能测试结果,如何设定编译参数才能得到正确的测试结果?

#include

#include

#include

int simd_add(int* int_array, int nCount, int execCount)
{
int t0 = GetTickCount();
int result = 0;
for (int k = 0; k < execCount; k++)
{
Is32vec4* s_vec4 = (Is32vec4*)int_array;
Is32vec4 sum(0, 0, 0, 0);
for (int i = 0; i < nCount / 4; i++)
{
sum += s_vec4[i];
}
result += (int)(sum[0] + sum[1] + sum[2] + sum[3]);
}

int t1 = GetTickCount();
printf("simd_add, sum = %d, time = %d\n", result, t1 - t0);
return result;

}

int normal_add(int* int_array, int nCount, int execCount)
{
int t0 = GetTickCount();
int result = 0;
for (int k = 0; k < execCount; k++)
{
int sum = 0;
for (int i = 0; i < nCount; i++)
{
sum += int_array[i];
}
result += sum;
}

int t1 = GetTickCount();
printf("normal_add, sum = %d, time = %d\n", result, t1 - t0);
return result;

}

int main()
{
int nCount = 10000 * 10000 3;
int
int_array = new int[nCount];
for (int i = 0; i < nCount; i++)
{
int_array[i] = i;
}

int execCount = 5;
simd_add(int_array, nCount, execCount);
normal_add(int_array, nCount, execCount);

getchar();
return 0;

}

// 测试结果显示,SIMD并没有性能上的优势,求问原因

用的什么处理器?编译器生成simd指令了么?反汇编看下。