Boost :: mutex takes less time than without a mutex for a program

I executed the program below, where I created 100 threads to execute simultaneously. Please note that this is an example program. I understand that several threads are not required for the program below, but I decided to check the mutex.

class ThreadPool{

    public:
    ThreadPool(int num = 10);
    ~ThreadPool();
    void AssignPool();
    void doSometask();
    void inc();
    private:
    boost::asio::io_service ioService;
    boost::thread_group threadpool;
    boost::asio::io_service::work * work;
   volatile int p_size;
    int pool_sz;
    boost::mutex io_mutex;// with boost lock

};

void ThreadPool::AssignPool()
{
        std::cout<<std::endl<<"pool_sz="<<pool_sz<<std::endl;
        for(int i=0;i<pool_sz;i++)
        {
                ioService.post(boost::bind(&ThreadPool::doSometask, this));
        }
}

void ThreadPool::inc()
{
        p_size++;
}

void ThreadPool::doSometask()
{

//      boost::mutex::scoped_lock lock(io_mutex);
        for(int i=0;i<10000;i++){
                inc();
        }


}

ThreadPool::ThreadPool(int num):p_size(0)
{
        pool_sz = num;
        work = new  boost::asio::io_service::work(ioService);
        for(int i =0;i<num;i++)
        {
                threadpool.create_thread(boost::bind(&boost::asio::io_service::run, &ioService  ))    ;
        }

}

ThreadPool::~ThreadPool()
{
        delete work;
        ioService.stop();
        threadpool.join_all();
}

int main()
{
        ThreadPool p1(100);
        p1.AssignPool();
}

Case 1: Above, the comment was made "boost :: mutex :: scoped_lock lock (io_mutex)"; a string that is not a mutex. time spent by the program was

real    0m1.386s
user    0m0.483s
sys 0m9.937s

Case 2: with Mutex: However, when I run this program using mutex, that is, "boost :: mutex :: scoped_lock lock (io_mutex)"; line. This program takes less time.

real    0m0.289s
user    0m0.067s
sys 0m0.230s

In my understanding, with a mutex, a program should take much longer than without a mutex. What is wrong here?

+6
3

doSometask(), , , , for, . , .

, , , 100, (, Bo Persson ), .

: (a) , , - (b), ThreadPool::inc(), .

, p_size std::atomic<int> (++ 11) .

+5

, " " , , , . , , , doSometask(), . doSometask, , , :

#include <iostream>
#include <chrono>
#include <atomic>
#include <boost/asio/io_service.hpp>
#include <boost/thread.hpp>


class ThreadPool
{
public:
    ThreadPool(int num = 10, int cycles = 10000);
    ~ThreadPool();

    void inc(volatile int* x);

    void AssignPool();
    void doSometask(volatile int* x);
    void AssignPoolSync();
    void doSometaskSync(volatile int* x);

private:
    boost::asio::io_service ioService;
    boost::thread_group threadpool;
    boost::asio::io_service::work * work;
    std::atomic<int> p_size;

    int *xsize;
    int pool_sz, cycles;
    boost::mutex io_mutex; // with boost lock
};

void ThreadPool::AssignPool()
{
    for (int i = 0; i<pool_sz; ++i)
        ioService.post(boost::bind(&ThreadPool::doSometask, this, &xsize[i]));
}

void ThreadPool::AssignPoolSync()
{
    for (int i=0; i<pool_sz; ++i)
        ioService.post(boost::bind(&ThreadPool::doSometaskSync, this, &xsize[i]));
}

void ThreadPool::inc(volatile int* x)
{
    *x = *x + 1;
}

void ThreadPool::doSometask(volatile int* x)
{
    for (int i=0; i<cycles; ++i)
    {
        inc(x);
        if (i & 255 == 0)
            p_size++; // access shared data evert 256 cycles
    }
}

void ThreadPool::doSometaskSync(volatile int* x)
{
    boost::mutex::scoped_lock lock(io_mutex);
    doSometask(x);
}

ThreadPool::ThreadPool(int num, int cycles)
{
    pool_sz = num;
    p_size = 0;
    this->cycles = cycles;
    xsize = new int[num];
    memset(xsize, 0, num * sizeof(int));
    work = new  boost::asio::io_service::work(ioService);
    for (int i=0; i<pool_sz; ++i)
        threadpool.create_thread(boost::bind(&boost::asio::io_service::run, &ioService));
}

ThreadPool::~ThreadPool()
{
    delete work;
    ioService.stop();
    threadpool.join_all();
    delete[] xsize;
}

int main(int argc, const char** argv)
{
    const int C = argc>1 ? std::stoi(argv[1]) : 10000; // number of cycles
    const int T = argc>2 ? std::stoi(argv[2]) : 100; // number of threads
    const int N = argc>3 ? std::stoi(argv[3]) : 50; // number of times to time execution

    long long t_min[2] = {0};
    for (int i = 0; i<N*2; ++i)
    {
        auto t0 = std::chrono::high_resolution_clock::now();
        {
            Sleep(1);
            ThreadPool pool(T, C);
            if (i&1)
                pool.AssignPoolSync();
            else
                pool.AssignPool();
        }
        auto t1 = std::chrono::high_resolution_clock::now();
        t_min[i&1] = std::min(i>1 ? t_min[i&1] : (t1-t0).count(), (t1-t0).count());
    }
    printf("timeSync / time: %f\n", (t_min[1] + 0.0) / (t_min[0] + 0.0));
}

, : , , , . , , , .

, 4- :

test> test.exe 10000 100
timeSync / time: 1.027782

test> test.exe 500000 100
timeSync / time: 3.531433

, 10000 , , , 500000, 3,5

+2

. , , , , ( , , . / ). , , , ( ) . .

1000 .

nomutex: 11.97 | 5.76 | 0: 20,55 | 86% CPU

withmutex: 30.78 | 8.78 | 0: 43,67 | 90% CPU

And now the other day most devices have a multi-core processor, so I used the link below to force the OS to use only one core. https://unix.stackexchange.com/a/23109

Hope this helps you.

+1
source

Source: https://habr.com/ru/post/1016270/


All Articles