为什么多线程比单线程慢?

#include <iostream>
#include <sys/time.h>
#include <thread>
using namespace std;

long *a = new long[3];
const int count = 1000000000;

void cal(volatile long *a, int index, int total) {
    struct timeval start, end;
    gettimeofday(&start, NULL);
    for (int i = 0; i < total; ++i) {
        a[index]++;
    }

    gettimeofday(&end, NULL);
    long dura = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
    cout << dura << endl;
}


int main(){
    struct timeval start, end;
    gettimeofday(&start, NULL);

    cal(a, 0, count);
    cal(a, 1, count);

    gettimeofday(&end, NULL);
    long dura = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
    cout << dura << endl;
}
#include <iostream>
#include <sys/time.h>
#include <thread>
using namespace std;

long *a = new long[3];
const int count = 1000000000;

void cal(long *a, int index, int total) {
    struct timeval start, end;
    gettimeofday(&start, NULL);
    int x = 0;
    for (int i = 0; i < total; ++i) {
        a[index]++;
    }
    gettimeofday(&end, NULL);
    long dura = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
    cout << dura << endl;
}


int main(){
    struct timeval start, end;
    gettimeofday(&start, NULL);
    std::thread t1(std::bind(cal, a, 0, count));
    std::thread t2(std::bind(cal, a, 1, count));
    t1.join();
    t2.join();
    gettimeofday(&end, NULL);
    long dura = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
    cout << dura << endl;
}

其中单线程程序花费4040813ms,
而多线程总是要大于等于4040813ms。

多线程确实是在两个核上跑的,所以应该不是线程切换导致的,我的想法是可能是缓存导致的问题。

for (int i = 0; i < total; ++i) {
    a[index]++;
}

这是内存敏感型操作,所以无论多少线程,都是在做内存带宽的测试罢了。

因为缓存是以行为单位的,a[0], a[1]在同一行,所以会不断出现缓存失效问题。