C: pthread performance issues. How can I make this code as expected?

I created this small program to calculate pi using probabilities and coefficients. To make it work faster, I decided to give multithreading using pthreads. Unfortunately, even after multiple searches, I could not solve the problem that I have when I run the threadFunc function with a single thread, whether with pthread or just normally called from calculate_pi_mt, the performance is very great (at least twice or if not 3 times better) than when I try to run it with two threads on my dual core machine. I tried disabling optimization to no avail. As far as I see, when the thread is working, it uses local variables separately from the end, when I used the mutex lock to create the sum of hits ...

First off, are there any tips for creating code that will work better here? (i.e. style) because I was just learning by tasting this stuff.

And secondly, will there be any reason for these obvious performance issues? On startup with the number of threads set to 1, one of my maximum cpus values ​​is 100%. When set to two, the second processor rises to about 80% -90%, but all this extra work seems to do it to no avail! Could there be a use of the rand () function?

struct arguments {
    int n_threads;
    int rays;
    int hits_in;
    pthread_mutex_t *mutex;
};


void *threadFunc(void *arg)
{
    struct arguments* args=(struct arguments*)arg;

    int n = 0;
    int local_hits_in = 0;
    double x;
    double y;
    double r;
    while (n < args->rays)
    {
        n++;
        x = ((double)rand())/((double)RAND_MAX);
        y = ((double)rand())/((double)RAND_MAX);
        r = (double)sqrt(pow(x, 2) + pow(y, 2)); 
        if (r < 1.0){
            local_hits_in++;
        }
    }

    pthread_mutex_lock(args->mutex);
    args->hits_in += local_hits_in;
    pthread_mutex_unlock(args->mutex);

    return NULL;
}


double calculate_pi_mt(int rays, int threads){
    double answer;
    int c;
    unsigned int iseed = (unsigned int)time(NULL);
    srand(iseed);

    if ( (float)(rays/threads) != ((float)rays)/((float)threads) ){
        printf("Error: number of rays is not evenly divisible by threads\n");
    }

    /* argument initialization */
    struct arguments* args = malloc(sizeof(struct arguments));
    args->hits_in = 0;
    args->rays = rays/threads;
    args->n_threads = 0;
    args->mutex = malloc(sizeof(pthread_mutex_t));
    if (pthread_mutex_init(args->mutex, NULL)){
        printf("Error creating mutex!\n");
    }


    pthread_t thread_ary[MAXTHREADS];

    c=0;
    while (c < threads){
        args->n_threads += 1;
        if (pthread_create(&(thread_ary[c]),NULL,threadFunc, args)){
            printf("Error when creating thread\n");
        }
        printf("Created Thread: %d\n", args->n_threads);
        c+=1;
    }


    c=0;
    while (c < threads){
        printf("main waiting for thread %d to terminate...\n", c+1);
        if (pthread_join(thread_ary[c],NULL)){
            printf("Error while waiting for thread to join\n");
        }
        printf("Destroyed Thread: %d\n", c+1);

        c+=1;
    }

    printf("Hits in %d\n", args->hits_in);
    printf("Rays: %d\n", rays);
    answer = 4.0 * (double)(args->hits_in)/(double)(rays);

    //freeing everything!
    pthread_mutex_destroy(args->mutex);
    free(args->mutex);
    free(args);

    return answer;
}
+3
source share
3 answers

There are several problems that I see:

  • rand() . drand48_r() ( double [0.0, 1.0) , )
  • struct arguments, . ( ).

. , - - , :

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <pthread.h>

struct thread_info {
    int thread_n;
    pthread_t thread_id;
    int rays;
    int hits_in;
};

void seed_rand(int thread_n, struct drand48_data *buffer)
{
    struct timeval tv;

    gettimeofday(&tv, NULL);
    srand48_r(tv.tv_sec * thread_n + tv.tv_usec, buffer);
}

void *threadFunc(void *arg)
{
    struct thread_info *thread_info = arg;
    struct drand48_data drand_buffer;

    int n = 0;
    const int rays = thread_info->rays;
    int hits_in = 0;
    double x;
    double y;
    double r;

    seed_rand(thread_info->thread_n, &drand_buffer);

    for (n = 0; n < rays; n++)
    {
        drand48_r(&drand_buffer, &x);
        drand48_r(&drand_buffer, &y);
        r = x * x + y * y;
        if (r < 1.0){
            hits_in++;
        }
    }

    thread_info->hits_in = hits_in;
    return NULL;
}


double calculate_pi_mt(int rays, int threads)
{
    int c;
    int hits_in = 0;

    if (rays % threads) {
        printf("Error: number of rays is not evenly divisible by threads\n");
        rays = (rays / threads) * threads;
    }

    /* argument initialization */
    struct thread_info *thr = malloc(threads * sizeof thr[0]);

    for (c = 0; c < threads; c++) {
        thr[c].thread_n = c;
        thr[c].rays = rays / threads;
        thr[c].hits_in = 0;
        if (pthread_create(&thr[c].thread_id, NULL, threadFunc, &thr[c])) {
            printf("Error when creating thread\n");
        }
        printf("Created Thread: %d\n", thr[c].thread_n);
    }

    for (c = 0; c < threads; c++) {
        printf("main waiting for thread %d to terminate...\n", c);
        if (pthread_join(thr[c].thread_id, NULL)) {
            printf("Error while waiting for thread to join\n");
        }
        hits_in += thr[c].hits_in;
        printf("Destroyed Thread: %d\n", c+1);
    }

    printf("Hits in %d\n", hits_in);
    printf("Rays: %d\n", rays);
    double answer = (4.0 * hits_in) / rays;

    free(thr);

    return answer;
}
+11

. local_hits , , . , , ( int), , int.

+8

Streaming has a cost. It’s possible that since your useful computing code looks very simple, the cost of managing the threads (the cost paid when changing the cost of thread and synchronization) is much higher than the benefit.

+1
source

Source: https://habr.com/ru/post/1764486/


All Articles