Home | History | Annotate | Line # | Download | only in libgomp
taskloop.c revision 1.7
      1  1.7  mrg /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
      2  1.1  mrg    Contributed by Jakub Jelinek <jakub (at) redhat.com>.
      3  1.1  mrg 
      4  1.1  mrg    This file is part of the GNU Offloading and Multi Processing Library
      5  1.1  mrg    (libgomp).
      6  1.1  mrg 
      7  1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
      8  1.1  mrg    under the terms of the GNU General Public License as published by
      9  1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     10  1.1  mrg    any later version.
     11  1.1  mrg 
     12  1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     14  1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     15  1.1  mrg    more details.
     16  1.1  mrg 
     17  1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     18  1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     19  1.1  mrg    3.1, as published by the Free Software Foundation.
     20  1.1  mrg 
     21  1.1  mrg    You should have received a copy of the GNU General Public License and
     22  1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     23  1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.1  mrg    <http://www.gnu.org/licenses/>.  */
     25  1.1  mrg 
     26  1.1  mrg /* This file handles the taskloop construct.  It is included twice, once
     27  1.1  mrg    for the long and once for unsigned long long variant.  */
     28  1.1  mrg 
     29  1.1  mrg /* Called when encountering an explicit task directive.  If IF_CLAUSE is
     30  1.1  mrg    false, then we must not delay in executing the task.  If UNTIED is true,
     31  1.1  mrg    then the task may be executed by any member of the team.  */
     32  1.1  mrg 
     33  1.1  mrg void
     34  1.1  mrg GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
     35  1.1  mrg 	       long arg_size, long arg_align, unsigned flags,
     36  1.1  mrg 	       unsigned long num_tasks, int priority,
     37  1.1  mrg 	       TYPE start, TYPE end, TYPE step)
     38  1.1  mrg {
     39  1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     40  1.1  mrg   struct gomp_team *team = thr->ts.team;
     41  1.1  mrg 
     42  1.1  mrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
     43  1.1  mrg   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
     44  1.1  mrg      tied to one thread all the time.  This means UNTIED tasks must be
     45  1.1  mrg      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
     46  1.1  mrg      might be running on different thread than FN.  */
     47  1.1  mrg   if (cpyfn)
     48  1.1  mrg     flags &= ~GOMP_TASK_FLAG_IF;
     49  1.1  mrg   flags &= ~GOMP_TASK_FLAG_UNTIED;
     50  1.1  mrg #endif
     51  1.1  mrg 
     52  1.1  mrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
     53  1.1  mrg   if (team && gomp_team_barrier_cancelled (&team->barrier))
     54  1.6  mrg     {
     55  1.6  mrg     early_return:
     56  1.6  mrg       if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
     57  1.6  mrg 	  == GOMP_TASK_FLAG_REDUCTION)
     58  1.6  mrg 	{
     59  1.6  mrg 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
     60  1.6  mrg 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
     61  1.6  mrg 	  /* Tell callers GOMP_taskgroup_reduction_register has not been
     62  1.6  mrg 	     called.  */
     63  1.6  mrg 	  ptr[2] = 0;
     64  1.6  mrg 	}
     65  1.6  mrg       return;
     66  1.6  mrg     }
     67  1.1  mrg 
     68  1.1  mrg #ifdef TYPE_is_long
     69  1.1  mrg   TYPE s = step;
     70  1.1  mrg   if (step > 0)
     71  1.1  mrg     {
     72  1.1  mrg       if (start >= end)
     73  1.6  mrg 	goto early_return;
     74  1.1  mrg       s--;
     75  1.1  mrg     }
     76  1.1  mrg   else
     77  1.1  mrg     {
     78  1.1  mrg       if (start <= end)
     79  1.6  mrg 	goto early_return;
     80  1.1  mrg       s++;
     81  1.1  mrg     }
     82  1.1  mrg   UTYPE n = (end - start + s) / step;
     83  1.1  mrg #else
     84  1.1  mrg   UTYPE n;
     85  1.1  mrg   if (flags & GOMP_TASK_FLAG_UP)
     86  1.1  mrg     {
     87  1.1  mrg       if (start >= end)
     88  1.6  mrg 	goto early_return;
     89  1.1  mrg       n = (end - start + step - 1) / step;
     90  1.1  mrg     }
     91  1.1  mrg   else
     92  1.1  mrg     {
     93  1.1  mrg       if (start <= end)
     94  1.6  mrg 	goto early_return;
     95  1.1  mrg       n = (start - end - step - 1) / -step;
     96  1.1  mrg     }
     97  1.1  mrg #endif
     98  1.1  mrg 
     99  1.1  mrg   TYPE task_step = step;
    100  1.7  mrg   TYPE nfirst_task_step = step;
    101  1.1  mrg   unsigned long nfirst = n;
    102  1.1  mrg   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
    103  1.1  mrg     {
    104  1.1  mrg       unsigned long grainsize = num_tasks;
    105  1.1  mrg #ifdef TYPE_is_long
    106  1.1  mrg       num_tasks = n / grainsize;
    107  1.1  mrg #else
    108  1.1  mrg       UTYPE ndiv = n / grainsize;
    109  1.1  mrg       num_tasks = ndiv;
    110  1.1  mrg       if (num_tasks != ndiv)
    111  1.1  mrg 	num_tasks = ~0UL;
    112  1.1  mrg #endif
    113  1.7  mrg       if ((flags & GOMP_TASK_FLAG_STRICT)
    114  1.7  mrg 	  && num_tasks != ~0ULL)
    115  1.7  mrg 	{
    116  1.7  mrg 	  UTYPE mod = n % grainsize;
    117  1.7  mrg 	  task_step = (TYPE) grainsize * step;
    118  1.7  mrg 	  if (mod)
    119  1.7  mrg 	    {
    120  1.7  mrg 	      num_tasks++;
    121  1.7  mrg 	      nfirst_task_step = (TYPE) mod * step;
    122  1.7  mrg 	      if (num_tasks == 1)
    123  1.7  mrg 		task_step = nfirst_task_step;
    124  1.7  mrg 	      else
    125  1.7  mrg 		nfirst = num_tasks - 2;
    126  1.7  mrg 	    }
    127  1.7  mrg 	}
    128  1.7  mrg       else if (num_tasks <= 1)
    129  1.1  mrg 	{
    130  1.1  mrg 	  num_tasks = 1;
    131  1.1  mrg 	  task_step = end - start;
    132  1.1  mrg 	}
    133  1.1  mrg       else if (num_tasks >= grainsize
    134  1.1  mrg #ifndef TYPE_is_long
    135  1.1  mrg 	       && num_tasks != ~0UL
    136  1.1  mrg #endif
    137  1.1  mrg 	      )
    138  1.1  mrg 	{
    139  1.1  mrg 	  UTYPE mul = num_tasks * grainsize;
    140  1.1  mrg 	  task_step = (TYPE) grainsize * step;
    141  1.1  mrg 	  if (mul != n)
    142  1.1  mrg 	    {
    143  1.7  mrg 	      nfirst_task_step = task_step;
    144  1.1  mrg 	      task_step += step;
    145  1.1  mrg 	      nfirst = n - mul - 1;
    146  1.1  mrg 	    }
    147  1.1  mrg 	}
    148  1.1  mrg       else
    149  1.1  mrg 	{
    150  1.1  mrg 	  UTYPE div = n / num_tasks;
    151  1.1  mrg 	  UTYPE mod = n % num_tasks;
    152  1.1  mrg 	  task_step = (TYPE) div * step;
    153  1.1  mrg 	  if (mod)
    154  1.1  mrg 	    {
    155  1.7  mrg 	      nfirst_task_step = task_step;
    156  1.1  mrg 	      task_step += step;
    157  1.1  mrg 	      nfirst = mod - 1;
    158  1.1  mrg 	    }
    159  1.1  mrg 	}
    160  1.1  mrg     }
    161  1.1  mrg   else
    162  1.1  mrg     {
    163  1.1  mrg       if (num_tasks == 0)
    164  1.1  mrg 	num_tasks = team ? team->nthreads : 1;
    165  1.1  mrg       if (num_tasks >= n)
    166  1.1  mrg 	num_tasks = n;
    167  1.1  mrg       else
    168  1.1  mrg 	{
    169  1.1  mrg 	  UTYPE div = n / num_tasks;
    170  1.1  mrg 	  UTYPE mod = n % num_tasks;
    171  1.1  mrg 	  task_step = (TYPE) div * step;
    172  1.1  mrg 	  if (mod)
    173  1.1  mrg 	    {
    174  1.7  mrg 	      nfirst_task_step = task_step;
    175  1.1  mrg 	      task_step += step;
    176  1.1  mrg 	      nfirst = mod - 1;
    177  1.1  mrg 	    }
    178  1.1  mrg 	}
    179  1.1  mrg     }
    180  1.1  mrg 
    181  1.1  mrg   if (flags & GOMP_TASK_FLAG_NOGROUP)
    182  1.1  mrg     {
    183  1.5  mrg       if (__builtin_expect (gomp_cancel_var, 0)
    184  1.5  mrg 	  && thr->task
    185  1.5  mrg 	  && thr->task->taskgroup)
    186  1.5  mrg 	{
    187  1.5  mrg 	  if (thr->task->taskgroup->cancelled)
    188  1.5  mrg 	    return;
    189  1.5  mrg 	  if (thr->task->taskgroup->workshare
    190  1.5  mrg 	      && thr->task->taskgroup->prev
    191  1.5  mrg 	      && thr->task->taskgroup->prev->cancelled)
    192  1.5  mrg 	    return;
    193  1.5  mrg 	}
    194  1.1  mrg     }
    195  1.1  mrg   else
    196  1.5  mrg     {
    197  1.5  mrg       ialias_call (GOMP_taskgroup_start) ();
    198  1.5  mrg       if (flags & GOMP_TASK_FLAG_REDUCTION)
    199  1.5  mrg 	{
    200  1.5  mrg 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
    201  1.5  mrg 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
    202  1.5  mrg 	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
    203  1.5  mrg 	}
    204  1.5  mrg     }
    205  1.1  mrg 
    206  1.1  mrg   if (priority > gomp_max_task_priority_var)
    207  1.1  mrg     priority = gomp_max_task_priority_var;
    208  1.1  mrg 
    209  1.1  mrg   if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
    210  1.1  mrg       || (thr->task && thr->task->final_task)
    211  1.1  mrg       || team->task_count + num_tasks > 64 * team->nthreads)
    212  1.1  mrg     {
    213  1.1  mrg       unsigned long i;
    214  1.1  mrg       if (__builtin_expect (cpyfn != NULL, 0))
    215  1.1  mrg 	{
    216  1.1  mrg 	  struct gomp_task task[num_tasks];
    217  1.1  mrg 	  struct gomp_task *parent = thr->task;
    218  1.1  mrg 	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
    219  1.1  mrg 	  char buf[num_tasks * arg_size + arg_align - 1];
    220  1.1  mrg 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
    221  1.1  mrg 				& ~(uintptr_t) (arg_align - 1));
    222  1.1  mrg 	  char *orig_arg = arg;
    223  1.1  mrg 	  for (i = 0; i < num_tasks; i++)
    224  1.1  mrg 	    {
    225  1.1  mrg 	      gomp_init_task (&task[i], parent, gomp_icv (false));
    226  1.1  mrg 	      task[i].priority = priority;
    227  1.1  mrg 	      task[i].kind = GOMP_TASK_UNDEFERRED;
    228  1.1  mrg 	      task[i].final_task = (thr->task && thr->task->final_task)
    229  1.1  mrg 				   || (flags & GOMP_TASK_FLAG_FINAL);
    230  1.1  mrg 	      if (thr->task)
    231  1.1  mrg 		{
    232  1.1  mrg 		  task[i].in_tied_task = thr->task->in_tied_task;
    233  1.1  mrg 		  task[i].taskgroup = thr->task->taskgroup;
    234  1.1  mrg 		}
    235  1.1  mrg 	      thr->task = &task[i];
    236  1.1  mrg 	      cpyfn (arg, data);
    237  1.1  mrg 	      arg += arg_size;
    238  1.1  mrg 	    }
    239  1.1  mrg 	  arg = orig_arg;
    240  1.1  mrg 	  for (i = 0; i < num_tasks; i++)
    241  1.1  mrg 	    {
    242  1.1  mrg 	      thr->task = &task[i];
    243  1.1  mrg 	      ((TYPE *)arg)[0] = start;
    244  1.1  mrg 	      start += task_step;
    245  1.1  mrg 	      ((TYPE *)arg)[1] = start;
    246  1.1  mrg 	      if (i == nfirst)
    247  1.7  mrg 		task_step = nfirst_task_step;
    248  1.1  mrg 	      fn (arg);
    249  1.1  mrg 	      arg += arg_size;
    250  1.1  mrg 	      if (!priority_queue_empty_p (&task[i].children_queue,
    251  1.1  mrg 					   MEMMODEL_RELAXED))
    252  1.1  mrg 		{
    253  1.1  mrg 		  gomp_mutex_lock (&team->task_lock);
    254  1.1  mrg 		  gomp_clear_parent (&task[i].children_queue);
    255  1.1  mrg 		  gomp_mutex_unlock (&team->task_lock);
    256  1.1  mrg 		}
    257  1.1  mrg 	      gomp_end_task ();
    258  1.1  mrg 	    }
    259  1.1  mrg 	}
    260  1.1  mrg       else
    261  1.1  mrg 	for (i = 0; i < num_tasks; i++)
    262  1.1  mrg 	  {
    263  1.1  mrg 	    struct gomp_task task;
    264  1.1  mrg 
    265  1.1  mrg 	    gomp_init_task (&task, thr->task, gomp_icv (false));
    266  1.1  mrg 	    task.priority = priority;
    267  1.1  mrg 	    task.kind = GOMP_TASK_UNDEFERRED;
    268  1.1  mrg 	    task.final_task = (thr->task && thr->task->final_task)
    269  1.1  mrg 			      || (flags & GOMP_TASK_FLAG_FINAL);
    270  1.1  mrg 	    if (thr->task)
    271  1.1  mrg 	      {
    272  1.1  mrg 		task.in_tied_task = thr->task->in_tied_task;
    273  1.1  mrg 		task.taskgroup = thr->task->taskgroup;
    274  1.1  mrg 	      }
    275  1.1  mrg 	    thr->task = &task;
    276  1.1  mrg 	    ((TYPE *)data)[0] = start;
    277  1.1  mrg 	    start += task_step;
    278  1.1  mrg 	    ((TYPE *)data)[1] = start;
    279  1.1  mrg 	    if (i == nfirst)
    280  1.7  mrg 	      task_step = nfirst_task_step;
    281  1.1  mrg 	    fn (data);
    282  1.1  mrg 	    if (!priority_queue_empty_p (&task.children_queue,
    283  1.1  mrg 					 MEMMODEL_RELAXED))
    284  1.1  mrg 	      {
    285  1.1  mrg 		gomp_mutex_lock (&team->task_lock);
    286  1.1  mrg 		gomp_clear_parent (&task.children_queue);
    287  1.1  mrg 		gomp_mutex_unlock (&team->task_lock);
    288  1.1  mrg 	      }
    289  1.1  mrg 	    gomp_end_task ();
    290  1.1  mrg 	  }
    291  1.1  mrg     }
    292  1.1  mrg   else
    293  1.1  mrg     {
    294  1.1  mrg       struct gomp_task *tasks[num_tasks];
    295  1.1  mrg       struct gomp_task *parent = thr->task;
    296  1.1  mrg       struct gomp_taskgroup *taskgroup = parent->taskgroup;
    297  1.1  mrg       char *arg;
    298  1.1  mrg       int do_wake;
    299  1.1  mrg       unsigned long i;
    300  1.1  mrg 
    301  1.1  mrg       for (i = 0; i < num_tasks; i++)
    302  1.1  mrg 	{
    303  1.1  mrg 	  struct gomp_task *task
    304  1.1  mrg 	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
    305  1.1  mrg 	  tasks[i] = task;
    306  1.1  mrg 	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
    307  1.1  mrg 			  & ~(uintptr_t) (arg_align - 1));
    308  1.1  mrg 	  gomp_init_task (task, parent, gomp_icv (false));
    309  1.1  mrg 	  task->priority = priority;
    310  1.1  mrg 	  task->kind = GOMP_TASK_UNDEFERRED;
    311  1.1  mrg 	  task->in_tied_task = parent->in_tied_task;
    312  1.1  mrg 	  task->taskgroup = taskgroup;
    313  1.1  mrg 	  thr->task = task;
    314  1.1  mrg 	  if (cpyfn)
    315  1.1  mrg 	    {
    316  1.1  mrg 	      cpyfn (arg, data);
    317  1.1  mrg 	      task->copy_ctors_done = true;
    318  1.1  mrg 	    }
    319  1.1  mrg 	  else
    320  1.1  mrg 	    memcpy (arg, data, arg_size);
    321  1.1  mrg 	  ((TYPE *)arg)[0] = start;
    322  1.1  mrg 	  start += task_step;
    323  1.1  mrg 	  ((TYPE *)arg)[1] = start;
    324  1.1  mrg 	  if (i == nfirst)
    325  1.7  mrg 	    task_step = nfirst_task_step;
    326  1.1  mrg 	  thr->task = parent;
    327  1.1  mrg 	  task->kind = GOMP_TASK_WAITING;
    328  1.1  mrg 	  task->fn = fn;
    329  1.1  mrg 	  task->fn_data = arg;
    330  1.1  mrg 	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
    331  1.1  mrg 	}
    332  1.1  mrg       gomp_mutex_lock (&team->task_lock);
    333  1.1  mrg       /* If parallel or taskgroup has been cancelled, don't start new
    334  1.1  mrg 	 tasks.  */
    335  1.5  mrg       if (__builtin_expect (gomp_cancel_var, 0)
    336  1.5  mrg 	  && cpyfn == NULL)
    337  1.1  mrg 	{
    338  1.5  mrg 	  if (gomp_team_barrier_cancelled (&team->barrier))
    339  1.5  mrg 	    {
    340  1.5  mrg 	    do_cancel:
    341  1.5  mrg 	      gomp_mutex_unlock (&team->task_lock);
    342  1.5  mrg 	      for (i = 0; i < num_tasks; i++)
    343  1.5  mrg 		{
    344  1.5  mrg 		  gomp_finish_task (tasks[i]);
    345  1.5  mrg 		  free (tasks[i]);
    346  1.5  mrg 		}
    347  1.5  mrg 	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
    348  1.5  mrg 		ialias_call (GOMP_taskgroup_end) ();
    349  1.5  mrg 	      return;
    350  1.5  mrg 	    }
    351  1.5  mrg 	  if (taskgroup)
    352  1.1  mrg 	    {
    353  1.5  mrg 	      if (taskgroup->cancelled)
    354  1.5  mrg 		goto do_cancel;
    355  1.5  mrg 	      if (taskgroup->workshare
    356  1.5  mrg 		  && taskgroup->prev
    357  1.5  mrg 		  && taskgroup->prev->cancelled)
    358  1.5  mrg 		goto do_cancel;
    359  1.1  mrg 	    }
    360  1.1  mrg 	}
    361  1.1  mrg       if (taskgroup)
    362  1.1  mrg 	taskgroup->num_children += num_tasks;
    363  1.1  mrg       for (i = 0; i < num_tasks; i++)
    364  1.1  mrg 	{
    365  1.1  mrg 	  struct gomp_task *task = tasks[i];
    366  1.1  mrg 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
    367  1.1  mrg 				 task, priority,
    368  1.1  mrg 				 PRIORITY_INSERT_BEGIN,
    369  1.1  mrg 				 /*last_parent_depends_on=*/false,
    370  1.1  mrg 				 task->parent_depends_on);
    371  1.1  mrg 	  if (taskgroup)
    372  1.1  mrg 	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
    373  1.1  mrg 				   task, priority, PRIORITY_INSERT_BEGIN,
    374  1.1  mrg 				   /*last_parent_depends_on=*/false,
    375  1.1  mrg 				   task->parent_depends_on);
    376  1.1  mrg 	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
    377  1.1  mrg 				 PRIORITY_INSERT_END,
    378  1.1  mrg 				 /*last_parent_depends_on=*/false,
    379  1.1  mrg 				 task->parent_depends_on);
    380  1.1  mrg 	  ++team->task_count;
    381  1.1  mrg 	  ++team->task_queued_count;
    382  1.1  mrg 	}
    383  1.1  mrg       gomp_team_barrier_set_task_pending (&team->barrier);
    384  1.1  mrg       if (team->task_running_count + !parent->in_tied_task
    385  1.1  mrg 	  < team->nthreads)
    386  1.1  mrg 	{
    387  1.1  mrg 	  do_wake = team->nthreads - team->task_running_count
    388  1.1  mrg 		    - !parent->in_tied_task;
    389  1.1  mrg 	  if ((unsigned long) do_wake > num_tasks)
    390  1.1  mrg 	    do_wake = num_tasks;
    391  1.1  mrg 	}
    392  1.1  mrg       else
    393  1.1  mrg 	do_wake = 0;
    394  1.1  mrg       gomp_mutex_unlock (&team->task_lock);
    395  1.1  mrg       if (do_wake)
    396  1.1  mrg 	gomp_team_barrier_wake (&team->barrier, do_wake);
    397  1.1  mrg     }
    398  1.1  mrg   if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
    399  1.1  mrg     ialias_call (GOMP_taskgroup_end) ();
    400  1.1  mrg }
    401