Gcc -O3 question, calling the same function from another file gives different performance

I am using the following test:

int main(int argc, char **argv)
{
 char *d = malloc(sizeof(char) * 13);

 TIME_THIS(func_a(999, d), 99999999);
 TIME_THIS(func_b(999, d), 99999999);

 return 0;
}

with normal compilation, the results are the same for both functions

% gcc func_overhead.c func_overhead_plus.c -o func_overhead && ./func_overhead                                                                               
[func_a(999, d)                     ]      9276227.73
[func_b(999, d)                     ]      9265085.90

but with -O3 they are very different

% gcc -O3 func_overhead.c func_overhead_plus.c -o func_overhead && ./func_overhead                                                                
[func_a(999, d)                     ]    178580674.69
[func_b(999, d)                     ]     48450175.29

func_a and func_b are defined as follows:

char *func_a(uint64_t id, char *d)
{
 register size_t i, j;
 register char c;

 for (i = 0, j = 36; i <= 11; i++)
  if (i == 4 || i == 8)
   d[i] = '/';
  else {
   c = ((id >> j) & 0xf) + '0';

   if (c > '9') 
    c = c - '9' - 1 + 'A';

   d[i] = c;

   j -= 4;
  }

 d[12] = '\0';

 return d;
}

The only difference is that func_a is in the same file as main (), and func_b is in func_overhead_plus.c

I am wondering if anyone can figure out what is going on

thank

Edit:

Sorry for all the confusion regarding the results. they are actually calls per second, so func_a () is faster than func_b () with -O3

TIME_THIS is defined as follows:

double get_time(void)
{
    struct timeval t;
    gettimeofday(&t, NULL);
    return t.tv_sec + t.tv_usec*1e-6;
}

#define TIME_THIS(func, runs) do {                  \
        double t0, td;                              \
        int i;                                      \
        t0 = get_time();                            \
        for (i = 0; i < runs; i++)                  \
            func;                                   \
        td = get_time() - t0;                       \
        printf("[%-35s] %15.2f\n", #func, runs / td);   \
} while(0)

Linux architecture

Linux komiko 2.6.30-gentoo-r2 #1 SMP PREEMPT Wed Jul 15 17:27:51 IDT 2009 i686 Intel(R) Core(TM)2 Quad CPU Q8200 @ 2.33GHz GenuineIntel GNU/Linux

gcc is 4.3.3

as suggested, here are the results of mixing calls a bit

-O3

[func_b(999, d)                     ]     48926120.09
[func_a(999, d)                     ]    135299870.52
[func_b(999, d)                     ]     49075900.30
[func_a(999, d)                     ]    135748939.12
[func_b(999, d)                     ]     49039535.67
[func_a(999, d)                     ]    134055084.58

-O2

[func_b(999, d)                     ]     27243732.97
[func_a(999, d)                     ]     27341371.38
[func_b(999, d)                     ]     27303284.93
[func_a(999, d)                     ]     27349177.65
[func_b(999, d)                     ]     27325398.25
[func_a(999, d)                     ]     27343935.88

(- O1 and -Os were the same as -O2 in this test)

no optimizations

[func_b(999, d)                     ]      8852314.88
[func_a(999, d)                     ]      9646166.81
[func_b(999, d)                     ]      8909973.33
[func_a(999, d)                     ]      9734883.99
[func_b(999, d)                     ]      8726127.49
[func_a(999, d)                     ]      9566052.21

, -O3 , func_a , func_b

, gcc 4.4.4

[func_b(999, d)                     ]     16982343.03
[func_a(999, d)                     ]     19693688.36
[func_b(999, d)                     ]     17260359.40
[func_a(999, d)                     ]     18137352.08
[func_b(999, d)                     ]     16790465.45
[func_a(999, d)                     ]     19828836.94

-O3

[func_b(999, d)                     ]     52184739.72
[func_a(999, d)                     ] 99999237556468.61
[func_b(999, d)                     ]     52430823.56
[func_a(999, d)                     ]    101030101.92
[func_b(999, d)                     ]     52404446.52
[func_a(999, d)                     ]    100842538.40

, ?

Edit:

gcc4.3/4.4 , ?

e.g

#include "performance_critical.c"

, , ?

+3
6

, , , -S . , , .

(read: ), , .., ( ). , . , , .

, , , , , . , , .

Edit

, . , ...

+6

. - TIME_THIS, . TIME_THIS_A TIME_THIS_B, c . , , , . func_b , TIME_THIS main.c. , , . c - ( main_b). L1/L2, , . () :

// main.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "foo_b.h"

void prefoo_a()
{
    static volatile int i = 1;
}

void foo_a()
{
    char *d = malloc(sizeof(char) * 13);

    for ( int i = 0; i < 10000000; i++ )
        func_a(999, d);
}

char *func_a(uint64_t id, char *d)
{
    // snipped

    return d;
}

int main(int argc, char **argv)
{   
    clock_t start;

    prefoo_a();
    start = clock();
    foo_a();
    printf ( "func_a %f\n", ( (double)clock() - start ) / CLOCKS_PER_SEC );

    prefoo_b();
    start = clock();
    foo_b();
    printf ( "func_b %f\n", ( (double)clock() - start ) / CLOCKS_PER_SEC );

    prefoo_a();
    start = clock();
    foo_a();
    printf ( "func_a %f\n", ( (double)clock() - start ) / CLOCKS_PER_SEC );

    prefoo_b();
    start = clock();
    foo_b();
    printf ( "func_b %f\n", ( (double)clock() - start ) / CLOCKS_PER_SEC );

    return 0;
}

foo.b

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "foo_b.h"

void prefoo_b()
{
    static volatile int i = 1;
}

void foo_b()
{
    char *d = malloc(sizeof(char) * 13);

    for ( int i = 0; i < 10000000; i++ )
        func_b(999, d);
}

char *func_b(uint64_t id, char *d)
{
            // ... snippped

    return d;
}

:

typedef unsigned long long uint64_t;
char *func_a(uint64_t id, char *d);
char *func_b(uint64_t id, char *d);
void prefoo_a();
void prefoo_b();
void foo_a();
void foo_b();

(x64 Snow Leopard) -O3:

  • func_a 0.043674
  • func_b 0.043825
  • func_a 0.044268
  • func_b 0.043997
  • func_a 0.043879
  • func_b 0.043950

-Od

  • func_a 0.853132
  • func_b 0.852719
  • func_a 0.872263
  • func_b 0.851980
  • func_a 0.852977
  • func_b 0.853294
+1

- .

-O3 , -O2 . , gcc 4.5.

+1

-O3 . . GCC.

0

, , . , , TIME_THIS.

, , , , .

, , , .

, , , , - .

0

,

static inline

. .

0

Source: https://habr.com/ru/post/1753376/


All Articles