Why does this pthreads code go over to OS XX, but not Linux?

I have a task planning code that I want to compare with a baseline, which basically creates a new pthread for each task (I know this is not a great idea, but why is it just a baseline for comparison). However, for some reason, the pthreads version continues to give me segfaults on OS X 1 but when I try to run the same code on Linux 2 everything works fine.

On OS X, it sometimes succeeds, but it usually runs in pthread_create, and sometimes segfaults in pthread_join. I also found that if I call pthread_create, supplying an attribute PTHREAD_CREATE_DETACHED, and skip pthread_joins, the problems with segfault will disappear.

At the bottom of this question, a stripped-down version of the code is included, which I tried to minimize as much as possible while preserving the problematic segfaults.


My question is this:

Why is this crashing on OS X but not Linux?


Maybe there is a bug with which I do not notice that on Linux this is bad. I am sure that the mutex and CAS operations provide sufficient synchronization, so I do not think that this is a problem with data calculation.

, , PTHREAD_CREATE_DETACHED, segfaults. , , , , , ​​ pthreads, ; , pthread, / .

, :

  • pthreads ( wait_list_head), , .

  • main , ( , active ).

  • Fibonacci(N=10), Fibonacci(N-1) Fibonacci(N-2), , . , , N<2, N.

  • , ---, . I.e., , ; - . "" segfaults.

  • Fibonacci (pure_fib), .

:

Fibonacci(N):
    If N < 2:
        signal_parent(N)
    Else:
        sum = 0
        pthread_create(A, Fibonacci, N-1)
        pthread_create(B, Fibonacci, N-2)
        sum += suspend_and_join_child(); // not necessarily thread A
        sum += suspend_and_join_child(); // not necessarily thread B
        signal_parent(sum)

C.

1 Apple LLVM 7.0.0 (clang-700.1.76), : x86_64-apple-darwin14.5.0
2 gcc (Ubuntu 5.4.0-6ubuntu1 ~ 16.04.2) 5.4.0 20160609


#include <assert.h>
#include <pthread.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>

#define N 10

#define RCHECK(expr)                                     \
    do {                                                 \
        int _rcheck_expr_return_value = expr;            \
        if (_rcheck_expr_return_value != 0) {            \
            fprintf(stderr, "FAILED CALL: " #expr "\n"); \
            abort();                                     \
        }                                                \
    } while (0);

typedef struct wait_state_st {
    volatile intptr_t val;
    pthread_t other;
    pthread_mutex_t lock;
    pthread_cond_t cond;
    struct wait_state_st *next;
} wait_state;

wait_state *volatile wait_list_head = NULL;
volatile int active = 0;

static inline void push_thread(wait_state *ws) {
    do {
        ws->next = wait_list_head;
    } while (!__sync_bool_compare_and_swap(&wait_list_head, ws->next, ws));
}

static inline wait_state *pop_thread(void) {
    wait_state *ws, *next;
    do {
        ws = wait_list_head;
        while (!ws) {
            usleep(1000);
            ws = wait_list_head;
        }
        next = ws->next;
    } while (!__sync_bool_compare_and_swap(&wait_list_head, ws, next));
    assert(ws->next == next); // check for ABA problem
    ws->next = NULL;
    return ws;
}

intptr_t thread_suspend(int count) {
    intptr_t sum = 0;
    // WAIT TO BE WOKEN UP "count" TIMES
    for (int i = 0; i < count; i++) {
        wait_state ws;
        ws.val = -1;
        ws.other = pthread_self();
        RCHECK(pthread_mutex_init(&ws.lock, NULL));
        RCHECK(pthread_cond_init(&ws.cond, NULL));

        RCHECK(pthread_mutex_lock(&ws.lock));

        push_thread(&ws);

        while (ws.val < 0) {
            RCHECK(pthread_cond_wait(&ws.cond, &ws.lock));
        }

        assert(ws.other != pthread_self());
        pthread_join(ws.other, NULL);

        sum += ws.val;

        RCHECK(pthread_mutex_unlock(&ws.lock));
    }
    return sum;
}

void thread_signal(intptr_t x) {
    // wake up the suspended thread
    __sync_fetch_and_add(&active, -1);
    wait_state *ws = pop_thread();
    RCHECK(pthread_mutex_lock(&ws->lock));
    ws->val = x;
    ws->other = pthread_self();
    RCHECK(pthread_cond_signal(&ws->cond));
    RCHECK(pthread_mutex_unlock(&ws->lock));
}

void *fib(void *arg) {
    intptr_t n = (intptr_t)arg;
    if (n > 1) {
        pthread_t t1, t2;
        __sync_fetch_and_add(&active, 2);
        RCHECK(pthread_create(&t1, NULL, fib, (void *)(n - 1)));
        RCHECK(pthread_create(&t2, NULL, fib, (void *)(n - 2)));
        intptr_t sum = thread_suspend(2);
        thread_signal(sum);
    }
    else {
        thread_signal(n);
    }
    return NULL;
}

intptr_t pure_fib(intptr_t n) {
    if (n < 2) return n;
    return pure_fib(n-1) + pure_fib(n-2);
}

int main(int argc, char *argv[]) {
    printf("EXPECTED = %" PRIdPTR "\n", pure_fib(N));
    assert("START" && wait_list_head == NULL);

    active = 1;

    pthread_t t;
    RCHECK(pthread_create(&t, NULL, fib, (void *)N));

    while (active > 0) { usleep(100000); }
    intptr_t sum = thread_suspend(1);

    printf("SUM      = %" PRIdPTR "\n", sum);
    printf("DONE %p\n", wait_list_head);

    assert("END" && wait_list_head == NULL);

    return 0;
}

: Gist , push/pop, , , ABA CAS . - , 30-50% , 99% , .

, , , pthreads , / , , .

+4
1

, .

, , - , , . . - .

20 , , , , . , , , , .

, linux, OS X, , , usleep().

.

:

https://computing.llnl.gov/tutorials/pthreads/#Overview

https://en.wikipedia.org/wiki/ABA_problem

W.R. Stevens, "Unix Network Program, 1", 23.

, .

+2

Source: https://habr.com/ru/post/1659437/


All Articles