My understanding is that lock-free structures improve when there is a lot of conflict, and locked data structures are better if there is low competition.
To test this, I wrote the following code:
#include<thread>
#include<chrono>
#include<iostream>
#include<vector>
#include<stack>
#include<mutex>
#include<fstream>
#include <boost/lockfree/stack.hpp>
using namespace std;
mutex mut;
const static int totalNumberOfWorkItems = 100000;
const static int maxNumberOfThreads = 2000;
const static int threadIncrement = 5;
chrono::milliseconds calcRawSpawnTime(int n) {
auto start = chrono::high_resolution_clock::now();
vector<thread> ts;
int j = 0;
for (int i = 0; i < n; i++)
ts.push_back(thread([&](){j += i; }));
for (auto&& t : ts)
t.join();
auto end = chrono::high_resolution_clock::now();
return chrono::duration_cast<chrono::milliseconds>(end - start);
}
chrono::milliseconds timeNThreadsLock(int n, int worksize){
stack<int> data;
vector<thread> ts;
auto startSpawn = chrono::high_resolution_clock::now();
for (int i = 0; i < n; i++)
ts.push_back(thread([&]() {
for (int j = 0; j < worksize; j++){
mut.lock();
data.push(7);
mut.unlock();
}
}));
auto startWait = chrono::high_resolution_clock::now();
for (auto&& t : ts)
t.join();
auto endWait = chrono::high_resolution_clock::now();
return chrono::duration_cast<chrono::milliseconds>(endWait - startSpawn);
}
chrono::milliseconds timeNThreadsLockFree(int n, int worksize)
{
boost::lockfree::stack<int> data;
vector<thread> ts;
auto startSpawn = chrono::high_resolution_clock::now();
for (int i = 0; i < n; i++)
ts.push_back(thread([&](){
for (int j = 0; j < worksize; j++)
data.push(7);
}));
auto startWait = chrono::high_resolution_clock::now();
for (auto&& t : ts)
t.join();
auto endWait = chrono::high_resolution_clock::now();
return chrono::duration_cast<chrono::milliseconds>(endWait - startSpawn);
}
int main(int argc, char* argv [])
{
ofstream lockFile("locklog.log");
ofstream lockFreeFile("lockfreelog.log");
ofstream spawnTimes("spawnTimes.log");
for (int i = 1; i < maxNumberOfThreads; i += threadIncrement){
cout << i << endl;
spawnTimes << i << ",\t" << calcRawSpawnTime(i).count() << endl;
lockFreeFile << i << ",\t" << timeNThreadsLockFree(i, totalNumberOfWorkItems / i).count() << endl;
lockFile << i << ",\t" << timeNThreadsLock(i, totalNumberOfWorkItems / i).count() << endl;
}
return 0;
}
The problem is that my time data structure without blocking was as follows:
.
I suspected that the problem was related to the thread creation time (with a large number of threads, which is clearly not a constant), but subtracting the thread creation time gave this graph: 
This is clearly wrong.
Any ideas on how to properly evaluate this?