Std :: async for Windows and Solaris 10

I am running a simple test test program on both Windows machines (compiled using MSVS2015) and a server running Solaris 10 (compiled using GCC 4.9.3). On Windows, I get a significant performance boost from increasing threads from 1 to the number of available cores; however, the same code does not see performance gains on Solaris 10 at all.

A Windows machine has 4 cores (8 logical), and a Unix machine has 8 cores (16 logical).

What could be the reason for this? I compile with -pthread, and it creates threads, since it prints all "S" to the first "F". I do not have root access on the Solaris machine, and from what I see, there is no installed tool that I can use to view the process merge.

Code example:

#include <iostream>
#include <vector>
#include <future>
#include <random>
#include <chrono>

std::default_random_engine gen(std::chrono::system_clock::now().time_since_epoch().count());
std::normal_distribution<double> randn(0.0, 1.0);

double generate_randn(uint64_t iterations)
{
    // Print "S" when a thread starts
    std::cout << "S";
    std::cout.flush();

    double rvalue = 0;
    for (int i = 0; i < iterations; i++)
    {
        rvalue += randn(gen);
    }
    // Print "F" when a thread finishes
    std::cout << "F";
    std::cout.flush();

    return rvalue/iterations;
}

int main(int argc, char *argv[])
{
    if (argc < 2)
        return 0;

    uint64_t count = 100000000;
    uint32_t threads = std::atoi(argv[1]);

    double total = 0;

    std::vector<std::future<double>> futures;
    std::chrono::high_resolution_clock::time_point t1;
    std::chrono::high_resolution_clock::time_point t2;

    // Start timing
    t1 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < threads; i++)
    {
        // Start async tasks
        futures.push_back(std::async(std::launch::async, generate_randn, count/threads));
    }
    for (auto &future : futures)
    {
        // Wait for tasks to finish
        future.wait();
        total += future.get();
    }
    // End timing
    t2 = std::chrono::high_resolution_clock::now();

    // Take the average of the threads' results
    total /= threads;

    std::cout << std::endl;
    std::cout << total << std::endl;
    std::cout << "Finished in " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() << " ms" << std::endl;
}
+4
source share
2 answers

, , ++, . - undefined, " ". (N3337, 17.6.4.10 17.6.5.9.) RNG " ". (cout stdlib, " " - , ios::sync_with_stdio(false).)

, , RNG ; , , . Solaris , -, , Windows , , , "" .

RNG . , , . : , .

, : system_clock::now , RNG , . random_device. random_device ; . random_device main, , random_device ( ) undefined default_random_engine.

, :

#include <iostream>
#include <vector>
#include <future>
#include <random>
#include <chrono>

static double generate_randn(uint64_t iterations, unsigned int seed)
{
    // Print "S" when a thread starts
    std::cout << "S";
    std::cout.flush();

    std::default_random_engine gen(seed);
    std::normal_distribution<double> randn(0.0, 1.0);

    double rvalue = 0;
    for (int i = 0; i < iterations; i++)
    {
        rvalue += randn(gen);
    }
    // Print "F" when a thread finishes
    std::cout << "F";
    std::cout.flush();

    return rvalue/iterations;
}

int main(int argc, char *argv[])
{
    if (argc < 2)
        return 0;

    uint64_t count = 100000000;
    uint32_t threads = std::atoi(argv[1]);

    double total = 0;

    std::vector<std::future<double>> futures;
    std::chrono::high_resolution_clock::time_point t1;
    std::chrono::high_resolution_clock::time_point t2;

    std::random_device make_seed;

    // Start timing
    t1 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < threads; i++)
    {
        // Start async tasks
        futures.push_back(std::async(std::launch::async,
                                     generate_randn,
                                     count/threads,
                                     make_seed()));
    }
    for (auto &future : futures)
    {
        // Wait for tasks to finish
        future.wait();
        total += future.get();
    }
    // End timing
    t2 = std::chrono::high_resolution_clock::now();

    // Take the average of the threads' results
    total /= threads;

    std::cout << '\n' << total
              << "\nFinished in "
              << std::chrono::duration_cast<
                   std::chrono::milliseconds>(t2 - t1).count()
              << " ms\n";
}
+3

( , , .)

Solaris Solaris Studio collect. Solaris , .

collect -d /tmp -p high -s all app [app args]

, :

analyzer /tmp/test.1.er &

/tmp/test.1.er , collect.

- (), @zwol, , .

Oracle : http://www.oracle.com/technetwork/server-storage/solarisstudio/documentation/o11-151-perf-analyzer-brief-1405338.pdf

Solaris Studio .

+2

Source: https://habr.com/ru/post/1652599/


All Articles