View on GitHub

Notes

reference notes

What is OpenMP?

OpenMP Targets Ease of Use

OpenMP: Fork-Join Parallelism

img

OpenMP Directive Format

Sample OpenMP Program

#include <stdio.h>
#include <omp.h>

int main() {
    #pragma omp parallel num_threads (4) // if you domt specify this reagion the number of threads will be the number of cores.
    {
        int i = omp_get_thread_num();
        printf("Hello from thread %d\n", i);
    }
}

output

Hello from thread 0
Hello from thread 1
Hello from thread 2
Hello from thread 3

Work-sharing Construct

The work-sharing construct in OpenMP is used to specify how to assign independent work to one or all of the threads.

all must be within the #pragma omp parallel directive.

omp for or omp do

Example:

#include <omp.h>

int main() {
    #pragma omp parallel for
    for (int i = 0; i < 10; i++) {
        // Work to be done in parallel
    }
    return 0;
}

sections

Example:

#include <omp.h>

int main() {
    #pragma omp parallel sections
    {
        #pragma omp section
        {
            // Code block for thread 1
        }

        #pragma omp section
        {
            // Code block for thread 2
        }

        // Additional sections as needed
    }
    return 0;
}

single

Example:

#include <omp.h>

int main() {
    #pragma omp parallel
    {
        #pragma omp single
        {
            // Code block executed by only one thread
        }

        // Other parallel work outside the single construct
    }
    return 0;
}

master

Example:

#include <omp.h>

int main() {
    #pragma omp parallel
    {
        #pragma omp master
        {
            // Code block executed by the master thread only
        }

        // Other parallel work outside the master construct
    }
    return 0;
}

Clauses

Since OpenMP is a shared memory programming model, most variables in OpenMP code are visible to all threads by default. But sometimes private variables are necessary to avoid race conditions and there is a need to pass values between the sequential part and the parallel region (the code block executed in parallel), so data environment management is introduced as data sharing attribute clauses by appending them to the OpenMP directive. The different types of clauses are:

Data Sharing Attribute Clauses

Synchronization Clauses

Schedule Clause

IF Control

Reduction

OMP general construct

#include <omp.h>

int main() {
    // Declare variables
    int var1, var2, var3;

    // Serial code
    // ...

    // Beginning of parallel region. Fork a team of threads.
    // Specify variable scoping
    #pragma omp parallel private(var1, var2) shared(var3)
    {
        // Parallel region executed by all threads
        // ...

        // Other OpenMP directives
        // ...

        // Run-time Library calls
        // ...

        // All threads join master thread and disband
    }

    // Resume serial code
    // ...

    return 0;
}

Simple “Hello World” program.

#include <stdio.h>
#include <omp.h>

int main() {
    int nthreads, tid;
    /* Fork a team of threads giving them their own copies of variables */

    #pragma omp parallel private(nthreads, tid)
    {
        /* Obtain thread number */
        tid = omp_get_thread_num();
        printf("Hello World from thread = %d\n", tid);

        /* Only master thread does this */
        if (tid == 0) {
            nthreads = omp_get_num_threads();
            printf("Number of threads = %d\n", nthreads);
        }
    } /* All threads join master thread and disband */
}

Possible Output: Assume :​ OMP_NUM_THREADS 8

Hello World from thread = 0​

Hello World from thread = 3​

Hello World from thread = 2​

Number of threads = 8​

Hello World from thread = 6​

Hello World from thread = 1​

Hello World from thread = 4​

Hello World from thread = 7​

Hello World from thread = 5​

Number of Threads

Restrictions

Interpreting an OpenMP Parallel Directive

Meaning:

#pragma omp parallel if (is_parallel == 1) num_threads(8) \ shared(b) private(a) firstprivate(c) default(none)
{
    /* structured block */
}

Specifying Worksharing

#include <stdio.h>
#include <omp.h>

void test(int val) {
    #pragma omp parallel if(val)
    {
        if (omp_in_parallel()) {
            #pragma omp single
            {
                printf("val = %d, parallelized with %d threads\n", val, omp_get_num_threads());
            }
        }
        else {
            printf("val = %d, serialized\n", val);
        }
    }
}

int main() {
    omp_set_num_threads(2);
    
    test(0);
    test(2);

    return 0;
}

The SINGLE directive specifies that the enclosed code is to be executed by only one thread in the team.

Vector Addition Program C/C++ Code Example

#include <stdio.h>
#include <omp.h>

#define CHUNKSIZE 100
#define N 1000

int main() {
    int i, chunk;
    float a[N], b[N], c[N];

    /* Some initializations */
    for (i = 0; i < N; i++)
        a[i] = b[i] = i * 1.0;

    chunk = CHUNKSIZE;

    #pragma omp parallel shared(a, b, c, chunk) private(i)
    {
        #pragma omp for schedule(dynamic, chunk) nowait
        for (i = 0; i < N; i++)
            c[i] = a[i] + b[i];
    } /* end of parallel section */

    return 0;
}

Dynamic Scheduling

Example of Sections Directive

#include <omp.h>
#define N 1000

int main(int argc, char *argv[]) {
    int i;
    float a[N], b[N], c[N], d[N];

    /* Some initializations */
    for (i = 0; i < N; i++) {
        a[i] = i * 1.5;
        b[i] = i + 22.35;
    }

    #pragma omp parallel shared(a, b, c, d) private(i)
    {
        #pragma omp sections nowait
        {
            #pragma omp section
            for (i = 0; i < N; i++)
                c[i] = a[i] + b[i];

            #pragma omp section
            for (i = 0; i < N; i++)
                d[i] = a[i] * b[i];
        } /* end of sections */
    } /* end of parallel region */

    return 0;
}

Example of For in Sections Directive

#include <omp.h>
#define N 1000

int main(int argc, char *argv[]) {
    int i;
    float a[N], b[N], c[N], d[N];

    /* Some initializations */
    for (i = 0; i < N; i++) {
        a[i] = i * 1.5;
        b[i] = i + 22.35;
    }

    #pragma omp parallel shared(a, b, c, d) private(i)
    {
        #pragma omp sections nowait
        {
            #pragma omp section
            #pragma omp for
            for (i = 0; i < N; i++)
                c[i] = a[i] + b[i];

            #pragma omp section
            #pragma omp for
            for (i = 0; i < N; i++)
                d[i] = a[i] * b[i];
        }
    } /* end of parallel region */

    return 0;
}

Synchronization Construct

Master Directive

#include <omp.h>

int main() {
    int x = 0;

    #pragma omp parallel shared(x)
    {
        // ...

        #pragma omp master
        x = x + 1;

        // ...
    } /* end of parallel region */

    return 0;
}

Critical Directive

#include <omp.h>

int main() {
    int x = 0;

    #pragma omp parallel shared(x)
    {
        // ...

        #pragma omp critical
        x = x + 1;

        // ...
    } /* end of parallel region */

    return 0;
}

This code demonstrates the use of the CRITICAL directive to ensure exclusive access to a shared variable x.

Purpose of ORDERED Directive

#pragma omp parallel
{
    #pragma omp for nowait shared(a)
    for (k = 0; k < nmax; k++) {
        // ...

        #pragma omp ordered
        {
            a[k] = a[k - 1] + ...;
        }

        // ...
    }
}

Directives summary

img