I am stuck in writing some parallel code using OpenMP for the concurrency course.
Here is a snippet
#include <stdio.h> #include <time.h> #include <math.h> #define FALSE 0 #define TRUE 1 int count_primes_0(int); int count_primes_1(int); int count_primes_2(int); int main(int argc, char *argv[]){ int n; if (argc != 2){ printf("Incorrect Invocation, use: \nq1 N"); return 0; } else { n = atoi(argv[1]); } if (n < 0){ printf("N cannot be negative"); return 0; } printf("N = %d\n", n); //omp_set_num_threads(1); time_it(count_primes_0, n, "Method 0"); time_it(count_primes_1, n, "Method 1"); time_it(count_primes_2, n, "Method 2"); return 0; } int is_prime(int n){ for(int i = 2; i <= (int)(sqrt((double) n)); i++){ if ((n % i) == 0){ return FALSE; } } return n > 1; } void time_it( int (*f)(int), int n, char *string){ clock_t start_clock; clock_t end_clock; double calc_time; int nprimes; struct timeval start_val; struct timeval end_val; start_clock = clock(); nprimes = (*f)(n); end_clock = clock(); calc_time = ((double)end_clock - (double)start_clock) / CLOCKS_PER_SEC; printf("\tNumber of primes: %d \t Time taken: %fs\n\n", nprimes, calc_time); } // METHOD 0 // Base Case no parallelization int count_primes_0(int n){ int nprimes = 0; for(int i = 1; i <= n; i++){ if (is_prime(i)) { nprimes++; } } return nprimes; } //METHOD 1 // Use only For and Critical Constructs int count_primes_1(int n){ int nprimes = 0; #pragma omp parallel for for(int i = 1; i <= n; i++){ if (is_prime(i)) { #pragma omp critical nprimes++; } } return nprimes; } //METHOD 2 // Use Reduction int count_primes_2(int n){ int nprimes = 0; #pragma omp parallel for reduction(+:nprimes) for(int i = 1; i <= n; i++){ if (is_prime(i)) { nprimes++; } } return nprimes; }
The problem I am facing is that when I use omp_set_num_threads (), the fewer threads I use, the faster my functions execute or the time it takes for the base non-parallel case to execute
Time Results: They run on an 8-core computer.
8 Topics: Method 0: 0.07 s; Method 1: 1.63 s; Method 2: 1.4s
4 Topics: Method 0: 0.07 s; Method 1: 0.16 s; Method 2: 0.16 s
2 Topics: Method 0: 0.07 s; Method 1: 0.10; Method 2: 0.09
1 Topic: Method 0: 0.07 s; Method 1: 0.08 s; Method 2: 0.07 s
I tried disabling optimization and using a different version of gcc no difference
Any help is appreciated.
EDIT: using the clock in Linux returns the βwrongβ time, the wall clock time is what I need, so using omp_get_wtime () or the Linux function time will produce the correct results.
source share