:
TBB . , concurrency . TBB, , .
initialize(), :
__ , task_scheduler_inits. task_scheduler_inits. , .
( , )
, MATLAB Intel TBB , , MEX- . , , MATLAB, , .
MATLAB ( ), , :
>> maxNumCompThreads
Warning: maxNumCompThreads will be removed in a future release [...]
ans =
4
OpenMP , , :
#include <omp.h>
..
omp_set_dynamic(1);
omp_set_num_threads(omp_get_num_procs());
:
>> setenv('OMP_NUM_THREADS', '8')
, , :
test_tbb.cpp
#ifdef MATLAB_MEX_FILE
#include "mex.h"
#endif
#include <cstdlib>
#include <cstdio>
#include <vector>
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include "tbb/task_scheduler_init.h"
#include "tbb/parallel_for_each.h"
#include "tbb/spin_mutex.h"
#include "tbb_helpers.hxx"
#define NTASKS 100
#define NLOOPS 400000L
tbb::spin_mutex print_mutex;
struct mytask {
mytask(size_t n) :_n(n) {}
void operator()()
{
ConcurrencyProfiler prof;
double x = 1.0 / _n;
for (long i=0; i<NLOOPS; ++i) {
x = sin(x) * 10.0;
while((double) rand() / RAND_MAX < 0.9);
}
{
tbb::spin_mutex::scoped_lock s(print_mutex);
fprintf(stderr, "%f\n", x);
}
}
size_t _n;
};
template <typename T> struct invoker {
void operator()(T& it) const { it(); }
};
void run()
{
SetProcessAffinityMask(GetCurrentProcess(), 0xFF);
printf("numTasks = %d\n", NTASKS);
for (int t = tbb::task_scheduler_init::automatic;
t <= 512; t = (t>0) ? t*2 : 1)
{
tbb::task_scheduler_init init(t);
std::vector<mytask> tasks;
for (int i=0; i<NTASKS; ++i) {
tasks.push_back(mytask(i));
}
ConcurrencyProfiler::Reset();
tbb::parallel_for_each(tasks.begin(), tasks.end(), invoker<mytask>());
printf("pool_init(%d) -> %d worker threads\n", t,
ConcurrencyProfiler::GetMaxNumThreads());
}
}
#ifdef MATLAB_MEX_FILE
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[])
{
run();
}
#else
int main()
{
run();
return 0;
}
#endif
, concurrency, , . Intel VTune :
tbb_helpers.hxx
#ifndef HELPERS_H
#define HELPERS_H
#include "tbb/atomic.h"
class ConcurrencyProfiler
{
public:
ConcurrencyProfiler();
~ConcurrencyProfiler();
static void Reset();
static size_t GetMaxNumThreads();
private:
static void RecordMax();
static tbb::atomic<size_t> cur_count;
static tbb::atomic<size_t> max_count;
};
#endif
tbb_helpers.cxx
#include "tbb_helpers.hxx"
tbb::atomic<size_t> ConcurrencyProfiler::cur_count;
tbb::atomic<size_t> ConcurrencyProfiler::max_count;
ConcurrencyProfiler::ConcurrencyProfiler()
{
++cur_count;
RecordMax();
}
ConcurrencyProfiler::~ConcurrencyProfiler()
{
--cur_count;
}
void ConcurrencyProfiler::Reset()
{
cur_count = max_count = 0;
}
size_t ConcurrencyProfiler::GetMaxNumThreads()
{
return static_cast<size_t>(max_count);
}
void ConcurrencyProfiler::RecordMax()
{
size_t o;
do {
o = max_count;
if (o >= cur_count) break;
} while(max_count.compare_and_swap(cur_count,o) != o);
}
( Intel ++ Composer XE 2013 SP1, VS2012 4):
C:\> vcvarsall.bat amd64
C:\> iclvars.bat intel64 vs2012
C:\> icl /MD test_tbb.cpp tbb_helpers.cxx tbb.lib
(Windows 8.1). 100% , :
C:\> test_tbb.exe 2> nul
numTasks = 100
pool_init(-1) -> 8 worker threads // task_scheduler_init::automatic
pool_init(1) -> 1 worker threads
pool_init(2) -> 2 worker threads
pool_init(4) -> 4 worker threads
pool_init(8) -> 8 worker threads
pool_init(16) -> 16 worker threads
pool_init(32) -> 32 worker threads
pool_init(64) -> 64 worker threads
pool_init(128) -> 98 worker threads
pool_init(256) -> 100 worker threads
pool_init(512) -> 98 worker threads
, , , ( 512 100 !).
MEX :
>> mex -I"C:\Program Files (x86)\Intel\Composer XE\tbb\include" ...
-largeArrayDims test_tbb.cpp tbb_helpers.cxx ...
-L"C:\Program Files (x86)\Intel\Composer XE\tbb\lib\intel64\vc11" tbb.lib
, , MEX- MATLAB:
>> test_tbb()
numTasks = 100
pool_init(-1) -> 4 worker threads
pool_init(1) -> 4 worker threads
pool_init(2) -> 4 worker threads
pool_init(4) -> 4 worker threads
pool_init(8) -> 4 worker threads
pool_init(16) -> 4 worker threads
pool_init(32) -> 4 worker threads
pool_init(64) -> 4 worker threads
pool_init(128) -> 4 worker threads
pool_init(256) -> 4 worker threads
pool_init(512) -> 4 worker threads
, , , 4 (4 - ). , .
, 8 , 4 , 50%.
, , :)