reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
// RUN: %libomp-compile-and-run
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run

// These compilers don't support the taskloop construct
// UNSUPPORTED: gcc-4, gcc-5, icc-16
// GCC 6 has support for taskloops, but at least 6.3.0 is crashing on this test
// UNSUPPORTED: gcc-6

/*
 * Test for taskloop
 * Method: caculate how many times the iteration space is dispatched
 *     and judge if each dispatch has the requested grainsize
 * It is possible for two adjacent chunks are executed by the same thread
 */
#include <stdio.h>
#include <omp.h>
#include <stdlib.h>
#include "omp_testsuite.h"

#define CFDMAX_SIZE 1120

int test_omp_taskloop_grainsize()
{
  int result = 0;
  int i, grainsize, count, tmp_count, num_off;
  int *tmp, *tids, *tidsArray;

  tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
  tids = tidsArray;

  for (grainsize = 1; grainsize < 48; ++grainsize) {
    fprintf(stderr, "Grainsize %d\n", grainsize);
    count = tmp_count = num_off = 0;

    for (i = 0; i < CFDMAX_SIZE; ++i) {
      tids[i] = -1;
    }

    #pragma omp parallel shared(tids)
    {
      #pragma omp master
      #pragma omp taskloop grainsize(grainsize)
      for (i = 0; i < CFDMAX_SIZE; i++) {
        tids[i] = omp_get_thread_num();
      }
    }

    for (i = 0; i < CFDMAX_SIZE; ++i) {
      if (tids[i] == -1) {
        fprintf(stderr, "  Iteration %d not touched!\n", i);
        result++;
      }
    }

    for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
      if (tids[i] != tids[i + 1]) {
        count++;
      }
    }

    tmp = (int *)malloc(sizeof(int) * (count + 1));
    tmp[0] = 1;

    for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
      if (tmp_count > count) {
        printf("--------------------\nTestinternal Error: List too "
               "small!!!\n--------------------\n");
        break;
      }
      if (tids[i] != tids[i + 1]) {
        tmp_count++;
        tmp[tmp_count] = 1;
      } else {
        tmp[tmp_count]++;
      }
    }

    // is grainsize statement working?
    int num_tasks = CFDMAX_SIZE / grainsize;
    int multiple1 = CFDMAX_SIZE / num_tasks;
    int multiple2 = CFDMAX_SIZE / num_tasks + 1;
    for (i = 0; i < count; i++) {
      // it is possible for 2 adjacent chunks assigned to a same thread
      if (tmp[i] % multiple1 != 0 && tmp[i] % multiple2 != 0) {
        num_off++;
      }
    }

    if (num_off > 1) {
      fprintf(stderr, "  The number of bad chunks is %d\n", num_off);
      result++;
    } else {
      fprintf(stderr, "  Everything ok\n");
    }

    free(tmp);
  }
  free(tidsArray);
  return (result==0);
}

int main()
{
  int i;
  int num_failed=0;

  for (i = 0; i < REPETITIONS; i++) {
    if (!test_omp_taskloop_grainsize()) {
      num_failed++;
    }
  }
  return num_failed;
}