We run some code in a project that uses OpenMP, and I came across something strange. I have included portions of the replay code that demonstrates what I see.
The tests compare the function call with the const char * argument with the std :: string argument in a multi-threaded loop. Functions do almost nothing and therefore have no overhead.
What I see is a significant time difference that is required to complete the cycles. For the const char * version doing 100,000,000 iterations, the code takes 0.075 seconds to complete, compared to 5.08 seconds for the std :: string version. These tests were conducted on Ubuntu-10.04-x64 with gcc-4.4.
My question mainly consists in whether it is connected exclusively with the dynamic distribution of std :: string and why in this case it is impossible to optimize, since it is const and cannot change?
The code below and thank you very much for your answers.
Compiled with: g ++ -Wall -Wextra -O3 -fopenmp string_args.cpp -o string_args
#include <iostream>
#include <map>
#include <string>
#include <stdint.h>
#ifdef _WIN32
#include <time.h>
#else
#include <sys/time.h>
#endif
namespace
{
const int64_t g_max_iter = 100000000;
std::map<const char*, int> g_charIndex = std::map<const char*,int>();
std::map<std::string, int> g_strIndex = std::map<std::string,int>();
class Timer
{
public:
Timer()
{
#ifdef _WIN32
m_start = clock();
#else
gettimeofday(&m_start,0);
#endif
}
float elapsed()
{
#ifdef _WIN32
clock_t now = clock();
const float retval = float(now - m_start)/CLOCKS_PER_SEC;
m_start = now;
#else
timeval now;
gettimeofday(&now,0);
const float retval = float(now.tv_sec - m_start.tv_sec) + float((now.tv_usec - m_start.tv_usec)/1E6);
m_start = now;
#endif
return retval;
}
private:
#ifdef _WIN32
clock_t
#else
timeval
#endif
m_start;
};
}
bool contains_char(const char * id)
{
if( g_charIndex.empty() ) return false;
return (g_charIndex.find(id) != g_charIndex.end());
}
bool contains_str(const std::string & name)
{
if( g_strIndex.empty() ) return false;
return (g_strIndex.find(name) != g_strIndex.end());
}
void do_serial_char()
{
int found(0);
Timer clock;
for( int64_t i = 0; i < g_max_iter; ++i )
{
if( contains_char("pos") )
{
++found;
}
}
std::cout << "Loop time: " << clock.elapsed() << "\n";
++found;
}
void do_parallel_char()
{
int found(0);
Timer clock;
#pragma omp parallel for
for( int64_t i = 0; i < g_max_iter; ++i )
{
if( contains_char("pos") )
{
++found;
}
}
std::cout << "Loop time: " << clock.elapsed() << "\n";
++found;
}
void do_serial_str()
{
int found(0);
Timer clock;
for( int64_t i = 0; i < g_max_iter; ++i )
{
if( contains_str("pos") )
{
++found;
}
}
std::cout << "Loop time: " << clock.elapsed() << "\n";
++found;
}
void do_parallel_str()
{
int found(0);
Timer clock;
#pragma omp parallel for
for( int64_t i = 0; i < g_max_iter ; ++i )
{
if( contains_str("pos") )
{
++found;
}
}
std::cout << "Loop time: " << clock.elapsed() << "\n";
++found;
}
int main()
{
std::cout << "Starting single-threaded loop using std::string\n";
do_serial_str();
std::cout << "\nStarting multi-threaded loop using std::string\n";
do_parallel_str();
std::cout << "\nStarting single-threaded loop using char *\n";
do_serial_char();
std::cout << "\nStarting multi-threaded loop using const char*\n";
do_parallel_char();
}
source
share