- A.1 単純ループ
-
#pragma omp parallel for
for(i = 1; i < n; i++)
b[i] = (a[i] + a[i-1]) / 2.0;
- A.2 条件コンパイル
-
#ifdef _OPENMP
printf("Compiled by an OpenMP-compliant implementation.\n");
#endif
#if defined(_OPENMP) && defined(VERBOSE)
printf("Compiled by an OpenMP-compliant implementation.\n");
#endif
- A.3 parallel regionの使い方
-
#pragma omp parallel shared(x, npoints) private(iam, np, ipoints)
{
iam = omp_get_thread_num();
np = omp_get_num_threads();
ipoints = npoints / np;
subdomain(x, iam, ipoints);
}
- A.4 nowait clauseの使い方
-
#pragma omp parallel
{
#pragma omp for nowait
for(i = 1; i < n; i++)
b[i] = (a[i] + a[i-1]) / 2.0;
#pragma omp for nowait
for(i = 0; i < m; i++)
y[i] = sqrt(z[i]);
}
- A.5 critical directiveの使い方
-
#pragma omp parallel shared(x, y) private(x_next, y_next)
{
#pragma omp critical (xaxis)
x_next = dequeue(x);
work(x_next);
#pragma omp critical (yaxis)
y_next = dequeue(y);
work(y_next);
}
- A.6 lastprivate clauseの使い方
-
#pragma omp parallel
{
#pragma omp for lastprivate(i)
for(i = 0; i < n-1; i++)
a[i] = b[i] + b[i+1]
}
a[i] = b[i];
- A.7 reduction clauseの使い方
-
#pragma omp prallel for private(i) shared(x, y, n) reduction(+: a, b)
for(i = 0; i < n; i++){
a = a + x[i];
b = b + y[i];
}
- A.8 parallel sectionsの使い方
-
#pragma omp parallel sections
{
#pragma omp section
xaxis();
#pragma omp section
yaxis();
#pragma omp section
zaxis();
}
- A.9 single directiveの使い方
-
#pragma omp parallel
{
#pragma omp single
printf("Beginning work1.\n");
work1();
#pragma omp single
printf("Finishing work1.\n");
#pragma omp single nowait
printf("Finished work1 and beginneng work2.\n");
work2();
}
- A.10 ORDERED の使い方
-
#pragma omp for ordered schedule(dynamic)
for(i = lb; i < ub; i += st)
work(i);
void work(int k)
{
#pragma omp ordered
printf(" %d", k);
}
- A.11 スレッド数を指定する例
-
omp_set_dynamic(0);
omp_set_num_threads(16);
#pragma omp parallel shared(x, npoints) private(iam, ipoints)
{
if (omp_get_num_threads() != 16)
abort();
iam = omp_get_thread_num();
ipoints = npoints / 16;
do_by_16(x, iam, ipoints);
}
- A.12 atomic directiveの使い方
-
#pragma omp parallel for shared(x, y, index, n)
for(i = 0; i < n; i++){
#pragma omp atomic
x[index[i]] += work1(i);
y[i] += work2(i);
}
注: y[i]+= work2(i) は、atomicではない。
- A.13 flush directiveの使い方
-
#pragma omp parallel private(iam, neighbor) shared(work, sync)
{
iam = omp_get_thread_num();
sync[iam] = 0;
#pragma omp barrier
/* Do computation into my portion of work array */
work[iam] = ...;
/* Announce that I am done with my work
* The first flush ensures that my work is made visible before sync.
* The second flush ensures that sync is made visible.
*/
#pragma omp flush (work)
sync[iam] = 1;
#pragma omp flush(sync)
/* Wait for neighbor */
neighbor = ( iam > 0? iam: omp_get_num_threads() ) - 1;
while(sync[neighbor] == 0){
#pragma omp flush (sync)
}
/* Read neighbor's values of work array */
... = work[neighbor];
}
- A.14 listを用いないflush directiveの使い方
-
int x, *p = &x;
void f1(int *q)
{
*q = 1;
#pragma omp flush
// x, p, and *q are flushed
// because they are shared and accessible
}
void f2(int *q)
{
*q = 2;
#pragma omp barrier
// a barrier implies a flush
// x, p, and *q are flushed
// because they are shared and accessible
}
int g(int n)
{
int i = 1, j, sum = 0;
*p = 1;
#pragma omp parallel reduction (+: sum)
{
f1(&j);
// i and n were not flushed
// because they were not accessible in f1
sum += j;
f2(&j);
// i and n were not flushed
// because they were not accessible in f2
// j was flushed because it was accessible
sum += i + j + *p + n;
}
return sum;
}
- A.15 実行スレッド数の使い方
-
#pragma omp parallel private(i)
{
i = omp_get_thread_num();
work(i);
}
- A.16 lockの使い方
-
#include
int main()
{
omp_lock_t lck;
int id;
omp_init_lock(&lck);
#pragma omp parallel shared(lck) private(id)
{
id = omp_get_thread_num();
omp_set_lock(&lck);
printf("My thread id is %d.\n", id);
omp_unset_lock(&lck);
while( !omp_test_lock(&lck) ){
skip(id); /* we do not yet have the lock,
so we must do something else */
}
work(id); /* we now have the lock
and can do the work */
omp_unset_lock(&lck);
}
omp_destroy_lock(&lck);
}
- A.17 ネストしたlock
-
#include
typedef struct {
int a, b;
omp_nest_lock_t lck;
} pair;
void incr_a(pair *p, int a)
{
// Called only from incr_pair, no need to lock.
p->a += a;
}
void incr_b(pair *p, int b)
{
// Called both from incr_pair and elsewhere,
// so need a nestable lock.
omp_set_nest_lock(&p->lck);
p->b += b;
omp_unset_nest_lock(&p->lck);
}
void incr_pair(pair *p, int a, int b)
{
omp_set_nest_lock(&p->lck);
incr_a(p, a);
incr_b(p, b);
omp_unset_nest_lock(&p->lck);
}
void f(pair *p)
{
extern int work1(), work2(), work3();
#pragma omp parallel sections
{
#pragma omp section
incr_pair(p, work1(), work2());
#pragma omp section
incr_pair(p, work3());
}
}
- A.18 多重ループの並列化
-
#pragma omp parallel default(shared)
{
#pragma omp for
for( i = 0 ; i < n ; i++ ){
#pragma omp parallel shared(i, n)
{
#pragma omp for
for( j = 0 ; j < N ; j++ )
work(i, j);
}
}
}
#pragma omp parallel default(shared)
{
#pragma omp for
for( i = 0 ; i < n ; i++ )
work1(i, n);
}
work1(int i, int n)
{
int j;
#pragma omp parallel default(shared)
{
#pragma omp for
for( j = 0 ; j < n ; j++ )
work2(i, j);
}
return;
}
- A.19 誤った使い方
-
void wrong1(int n)
{
#pragma omp parallel default(shared)
{
int i, j;
#pragma omp for
for( i = 0 ; i < n ; i++ ){
#pragma omp for
for( j = 0 ; j < n ; j++ )
work(i, j);
}
}
}
void wrong2(int n)
{
#pragma omp parallel default(shared)
{
int i;
#pragma omp for
for( i = 0 ; i < n ; i++ )
work1(i, n);
}
}
void work1(int i, int n)
{
int j;
#pragma omp for
for( j = 0 ; j < n ; j++ )
work2(i, j);
}
void wrong3(int n)
{
#pragma omp parallel default(shared)
{
int i;
#pragma omp for
for( i = 0 ; i < n ; i++ ){
#pragma omp single
work(i);
}
}
}
void wrong4(int n)
{
#pragma omp parallel default(shared)
{
int i;
#pragma omp for
for( i = 0 ; i < n ; i++ ){
work1(i);
#pragma omp barrier
work2(i);
}
}
}
void wrong5()
{
#pragma omp parallel default(shared)
{
#pragma omp critical
{
work1();
#pragma omp barrier
work2();
}
}
}
void wrong6()
{
#pragma omp parallel
{
setup();
#pragma omp single
{
work1();
#pragma omp barrier
work2();
}
finish();
}
}
- A.20 barrier同期
-
int main()
{
sub1(2);
sub2(2);
}
void sub1(int n)
{
int i;
#pragma omp parallel private(i) shared(n)
{
#pragma omp for
for( i = 0 ; i < n ; i++ )
sub2(i);
}
}
void sub2(int k)
{
#pragma omp parallel shared(k)
sub3(k);
}
void sub3(int n)
{
work1(n);
#pragma omp barrier
work2(n);
}
- A.21 private clauseのスコープ
-
int i, j;
i = 1;
j = 2;
#pragma omp parallel private(i) firstprivate(j)
{
i = 3;
j = j + 2;
}
printf("i=%d, j=%d\n", i, j); /* these values are undefined! */
- A.22 default(none) clause
-
int x, y, z[1000];
#pragma omp threadprivate(x)
void fun(int a)
{
const int c = 1;
int i = 0;
#pragma omp parallel default(none) private(a) shared(z)
{
int j = omp_get_num_thread();
// O.K. - j is declared within parallel region
a = z[j]; // O.K. - a is listed in private clause
// - z is listed in shared clause
x = c; // O.K. - x is threadprivate
// - c has const-qualified type
z[i] = y; // Error - cannot reference i or y here
#pragma omp for firstprivate(y)
for( i = 0 ; i < 10 ; i++ ){
z[i] = y; // O.K. - i is the loop control variable
// - y is listed in firstprivate clause
}
z[i] = y; // Error - cannot reference i or y here
}
}