dist/libgomp/loop.c

1.1.1.12  mrg /* Copyright (C) 2005-2024 Free Software Foundation, Inc.
     1.1  mrg    Contributed by Richard Henderson <rth (at) redhat.com>.
     1.1  mrg
 1.1.1.3  mrg    This file is part of the GNU Offloading and Multi Processing Library
 1.1.1.3  mrg    (libgomp).
     1.1  mrg
     1.1  mrg    Libgomp is free software; you can redistribute it and/or modify it
     1.1  mrg    under the terms of the GNU General Public License as published by
     1.1  mrg    the Free Software Foundation; either version 3, or (at your option)
     1.1  mrg    any later version.
     1.1  mrg
     1.1  mrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     1.1  mrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     1.1  mrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     1.1  mrg    more details.
     1.1  mrg
     1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     1.1  mrg    3.1, as published by the Free Software Foundation.
     1.1  mrg
     1.1  mrg    You should have received a copy of the GNU General Public License and
     1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     1.1  mrg    <http://www.gnu.org/licenses/>.  */
     1.1  mrg
     1.1  mrg /* This file handles the LOOP (FOR/DO) construct.  */
     1.1  mrg
     1.1  mrg #include <limits.h>
     1.1  mrg #include <stdlib.h>
 1.1.1.9  mrg #include <string.h>
     1.1  mrg #include "libgomp.h"
     1.1  mrg
     1.1  mrg
 1.1.1.9  mrg ialias (GOMP_loop_runtime_next)
 1.1.1.9  mrg ialias_redirect (GOMP_taskgroup_reduction_register)
 1.1.1.9  mrg
     1.1  mrg /* Initialize the given work share construct from the given arguments.  */
     1.1  mrg
     1.1  mrg static inline void
     1.1  mrg gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
     1.1  mrg 		enum gomp_schedule_type sched, long chunk_size)
     1.1  mrg {
     1.1  mrg   ws->sched = sched;
     1.1  mrg   ws->chunk_size = chunk_size;
     1.1  mrg   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
     1.1  mrg   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
     1.1  mrg 	    ? start : end;
     1.1  mrg   ws->incr = incr;
     1.1  mrg   ws->next = start;
     1.1  mrg   if (sched == GFS_DYNAMIC)
     1.1  mrg     {
     1.1  mrg       ws->chunk_size *= incr;
     1.1  mrg
     1.1  mrg #ifdef HAVE_SYNC_BUILTINS
     1.1  mrg       {
     1.1  mrg 	/* For dynamic scheduling prepare things to make each iteration
     1.1  mrg 	   faster.  */
     1.1  mrg 	struct gomp_thread *thr = gomp_thread ();
     1.1  mrg 	struct gomp_team *team = thr->ts.team;
     1.1  mrg 	long nthreads = team ? team->nthreads : 1;
     1.1  mrg
     1.1  mrg 	if (__builtin_expect (incr > 0, 1))
     1.1  mrg 	  {
     1.1  mrg 	    /* Cheap overflow protection.  */
     1.1  mrg 	    if (__builtin_expect ((nthreads | ws->chunk_size)
     1.1  mrg 				  >= 1UL << (sizeof (long)
     1.1  mrg 					     * __CHAR_BIT__ / 2 - 1), 0))
     1.1  mrg 	      ws->mode = 0;
     1.1  mrg 	    else
     1.1  mrg 	      ws->mode = ws->end < (LONG_MAX
     1.1  mrg 				    - (nthreads + 1) * ws->chunk_size);
     1.1  mrg 	  }
     1.1  mrg 	/* Cheap overflow protection.  */
     1.1  mrg 	else if (__builtin_expect ((nthreads | -ws->chunk_size)
     1.1  mrg 				   >= 1UL << (sizeof (long)
     1.1  mrg 					      * __CHAR_BIT__ / 2 - 1), 0))
     1.1  mrg 	  ws->mode = 0;
     1.1  mrg 	else
     1.1  mrg 	  ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
     1.1  mrg       }
     1.1  mrg #endif
     1.1  mrg     }
     1.1  mrg }
     1.1  mrg
     1.1  mrg /* The *_start routines are called when first encountering a loop construct
 1.1.1.9  mrg    that is not bound directly to a parallel construct.  The first thread
     1.1  mrg    that arrives will create the work-share construct; subsequent threads
     1.1  mrg    will see the construct exists and allocate work from it.
     1.1  mrg
     1.1  mrg    START, END, INCR are the bounds of the loop; due to the restrictions of
 1.1.1.9  mrg    OpenMP, these values must be the same in every thread.  This is not
     1.1  mrg    verified (nor is it entirely verifiable, since START is not necessarily
     1.1  mrg    retained intact in the work-share data structure).  CHUNK_SIZE is the
     1.1  mrg    scheduling parameter; again this must be identical in all threads.
     1.1  mrg
     1.1  mrg    Returns true if there's any work for this thread to perform.  If so,
     1.1  mrg    *ISTART and *IEND are filled with the bounds of the iteration block
     1.1  mrg    allocated to this thread.  Returns false if all work was assigned to
     1.1  mrg    other threads prior to this thread's arrival.  */
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_static_start (long start, long end, long incr, long chunk_size,
     1.1  mrg 			long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg
     1.1  mrg   thr->ts.static_trip = 0;
 1.1.1.9  mrg   if (gomp_work_share_start (0))
     1.1  mrg     {
     1.1  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
     1.1  mrg 		      GFS_STATIC, chunk_size);
     1.1  mrg       gomp_work_share_init_done ();
     1.1  mrg     }
     1.1  mrg
     1.1  mrg   return !gomp_iter_static_next (istart, iend);
     1.1  mrg }
     1.1  mrg
 1.1.1.4  mrg /* The current dynamic implementation is always monotonic.  The
 1.1.1.4  mrg    entrypoints without nonmonotonic in them have to be always monotonic,
 1.1.1.4  mrg    but the nonmonotonic ones could be changed to use work-stealing for
 1.1.1.4  mrg    improved scalability.  */
 1.1.1.4  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
     1.1  mrg 			 long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   bool ret;
     1.1  mrg
 1.1.1.9  mrg   if (gomp_work_share_start (0))
     1.1  mrg     {
     1.1  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
     1.1  mrg 		      GFS_DYNAMIC, chunk_size);
     1.1  mrg       gomp_work_share_init_done ();
     1.1  mrg     }
     1.1  mrg
     1.1  mrg #ifdef HAVE_SYNC_BUILTINS
     1.1  mrg   ret = gomp_iter_dynamic_next (istart, iend);
     1.1  mrg #else
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   ret = gomp_iter_dynamic_next_locked (istart, iend);
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg #endif
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
 1.1.1.4  mrg /* Similarly as for dynamic, though the question is how can the chunk sizes
 1.1.1.4  mrg    be decreased without a central locking or atomics.  */
 1.1.1.4  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
     1.1  mrg 			long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   bool ret;
     1.1  mrg
 1.1.1.9  mrg   if (gomp_work_share_start (0))
     1.1  mrg     {
     1.1  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
     1.1  mrg 		      GFS_GUIDED, chunk_size);
     1.1  mrg       gomp_work_share_init_done ();
     1.1  mrg     }
     1.1  mrg
     1.1  mrg #ifdef HAVE_SYNC_BUILTINS
     1.1  mrg   ret = gomp_iter_guided_next (istart, iend);
     1.1  mrg #else
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   ret = gomp_iter_guided_next_locked (istart, iend);
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg #endif
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_runtime_start (long start, long end, long incr,
     1.1  mrg 			 long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.9  mrg   switch (icv->run_sched_var & ~GFS_MONOTONIC)
     1.1  mrg     {
     1.1  mrg     case GFS_STATIC:
 1.1.1.4  mrg       return gomp_loop_static_start (start, end, incr,
 1.1.1.4  mrg 				     icv->run_sched_chunk_size,
     1.1  mrg 				     istart, iend);
     1.1  mrg     case GFS_DYNAMIC:
 1.1.1.4  mrg       return gomp_loop_dynamic_start (start, end, incr,
 1.1.1.4  mrg 				      icv->run_sched_chunk_size,
     1.1  mrg 				      istart, iend);
     1.1  mrg     case GFS_GUIDED:
 1.1.1.4  mrg       return gomp_loop_guided_start (start, end, incr,
 1.1.1.4  mrg 				     icv->run_sched_chunk_size,
     1.1  mrg 				     istart, iend);
     1.1  mrg     case GFS_AUTO:
     1.1  mrg       /* For now map to schedule(static), later on we could play with feedback
     1.1  mrg 	 driven choice.  */
     1.1  mrg       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
     1.1  mrg     default:
     1.1  mrg       abort ();
     1.1  mrg     }
     1.1  mrg }
     1.1  mrg
 1.1.1.9  mrg static long
 1.1.1.9  mrg gomp_adjust_sched (long sched, long *chunk_size)
 1.1.1.9  mrg {
 1.1.1.9  mrg   sched &= ~GFS_MONOTONIC;
 1.1.1.9  mrg   switch (sched)
 1.1.1.9  mrg     {
 1.1.1.9  mrg     case GFS_STATIC:
 1.1.1.9  mrg     case GFS_DYNAMIC:
 1.1.1.9  mrg     case GFS_GUIDED:
 1.1.1.9  mrg       return sched;
 1.1.1.9  mrg     /* GFS_RUNTIME is used for runtime schedule without monotonic
 1.1.1.9  mrg        or nonmonotonic modifiers on the clause.
 1.1.1.9  mrg        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
 1.1.1.9  mrg        modifier.  */
 1.1.1.9  mrg     case GFS_RUNTIME:
 1.1.1.9  mrg     /* GFS_AUTO is used for runtime schedule with nonmonotonic
 1.1.1.9  mrg        modifier.  */
 1.1.1.9  mrg     case GFS_AUTO:
 1.1.1.9  mrg       {
 1.1.1.9  mrg 	struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.9  mrg 	sched = icv->run_sched_var & ~GFS_MONOTONIC;
 1.1.1.9  mrg 	switch (sched)
 1.1.1.9  mrg 	  {
 1.1.1.9  mrg 	  case GFS_STATIC:
 1.1.1.9  mrg 	  case GFS_DYNAMIC:
 1.1.1.9  mrg 	  case GFS_GUIDED:
 1.1.1.9  mrg 	    *chunk_size = icv->run_sched_chunk_size;
 1.1.1.9  mrg 	    break;
 1.1.1.9  mrg 	  case GFS_AUTO:
 1.1.1.9  mrg 	    sched = GFS_STATIC;
 1.1.1.9  mrg 	    *chunk_size = 0;
 1.1.1.9  mrg 	    break;
 1.1.1.9  mrg 	  default:
 1.1.1.9  mrg 	    abort ();
 1.1.1.9  mrg 	  }
 1.1.1.9  mrg 	return sched;
 1.1.1.9  mrg       }
 1.1.1.9  mrg     default:
 1.1.1.9  mrg       abort ();
 1.1.1.9  mrg     }
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.9  mrg bool
 1.1.1.9  mrg GOMP_loop_start (long start, long end, long incr, long sched,
 1.1.1.9  mrg 		 long chunk_size, long *istart, long *iend,
 1.1.1.9  mrg 		 uintptr_t *reductions, void **mem)
 1.1.1.9  mrg {
 1.1.1.9  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.9  mrg
 1.1.1.9  mrg   thr->ts.static_trip = 0;
 1.1.1.9  mrg   if (reductions)
 1.1.1.9  mrg     gomp_workshare_taskgroup_start ();
 1.1.1.9  mrg   if (gomp_work_share_start (0))
 1.1.1.9  mrg     {
 1.1.1.9  mrg       sched = gomp_adjust_sched (sched, &chunk_size);
 1.1.1.9  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
 1.1.1.9  mrg 		      sched, chunk_size);
 1.1.1.9  mrg       if (reductions)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  GOMP_taskgroup_reduction_register (reductions);
 1.1.1.9  mrg 	  thr->task->taskgroup->workshare = true;
 1.1.1.9  mrg 	  thr->ts.work_share->task_reductions = reductions;
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       if (mem)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  uintptr_t size = (uintptr_t) *mem;
 1.1.1.9  mrg #define INLINE_ORDERED_TEAM_IDS_OFF \
 1.1.1.9  mrg   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
 1.1.1.9  mrg     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
1.1.1.11  mrg 	  if (sizeof (struct gomp_work_share)
1.1.1.11  mrg 	      <= INLINE_ORDERED_TEAM_IDS_OFF
1.1.1.11  mrg 	      || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
1.1.1.11  mrg 	      || size > (sizeof (struct gomp_work_share)
1.1.1.11  mrg 			- INLINE_ORDERED_TEAM_IDS_OFF))
 1.1.1.9  mrg 	    *mem
 1.1.1.9  mrg 	      = (void *) (thr->ts.work_share->ordered_team_ids
 1.1.1.9  mrg 			  = gomp_malloc_cleared (size));
 1.1.1.9  mrg 	  else
 1.1.1.9  mrg 	    *mem = memset (((char *) thr->ts.work_share)
 1.1.1.9  mrg 			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       gomp_work_share_init_done ();
 1.1.1.9  mrg     }
 1.1.1.9  mrg   else
 1.1.1.9  mrg     {
 1.1.1.9  mrg       if (reductions)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 1.1.1.9  mrg 	  gomp_workshare_task_reduction_register (reductions,
 1.1.1.9  mrg 						  first_reductions);
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       if (mem)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
 1.1.1.9  mrg 	       & (__alignof__ (long long) - 1)) == 0)
 1.1.1.9  mrg 	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
 1.1.1.9  mrg 	  else
 1.1.1.9  mrg 	    {
 1.1.1.9  mrg 	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
 1.1.1.9  mrg 	      p += __alignof__ (long long) - 1;
 1.1.1.9  mrg 	      p &= ~(__alignof__ (long long) - 1);
 1.1.1.9  mrg 	      *mem = (void *) p;
 1.1.1.9  mrg 	    }
 1.1.1.9  mrg 	}
 1.1.1.9  mrg     }
 1.1.1.9  mrg
 1.1.1.9  mrg   if (!istart)
 1.1.1.9  mrg     return true;
 1.1.1.9  mrg   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 1.1.1.9  mrg }
 1.1.1.9  mrg
     1.1  mrg /* The *_ordered_*_start routines are similar.  The only difference is that
     1.1  mrg    this work-share construct is initialized to expect an ORDERED section.  */
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_ordered_static_start (long start, long end, long incr,
     1.1  mrg 				long chunk_size, long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg
     1.1  mrg   thr->ts.static_trip = 0;
 1.1.1.9  mrg   if (gomp_work_share_start (1))
     1.1  mrg     {
     1.1  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
     1.1  mrg 		      GFS_STATIC, chunk_size);
     1.1  mrg       gomp_ordered_static_init ();
     1.1  mrg       gomp_work_share_init_done ();
     1.1  mrg     }
     1.1  mrg
     1.1  mrg   return !gomp_iter_static_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_ordered_dynamic_start (long start, long end, long incr,
     1.1  mrg 				 long chunk_size, long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   bool ret;
     1.1  mrg
 1.1.1.9  mrg   if (gomp_work_share_start (1))
     1.1  mrg     {
     1.1  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
     1.1  mrg 		      GFS_DYNAMIC, chunk_size);
     1.1  mrg       gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg       gomp_work_share_init_done ();
     1.1  mrg     }
     1.1  mrg   else
     1.1  mrg     gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   ret = gomp_iter_dynamic_next_locked (istart, iend);
     1.1  mrg   if (ret)
     1.1  mrg     gomp_ordered_first ();
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_ordered_guided_start (long start, long end, long incr,
     1.1  mrg 				long chunk_size, long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   bool ret;
     1.1  mrg
 1.1.1.9  mrg   if (gomp_work_share_start (1))
     1.1  mrg     {
     1.1  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
     1.1  mrg 		      GFS_GUIDED, chunk_size);
     1.1  mrg       gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg       gomp_work_share_init_done ();
     1.1  mrg     }
     1.1  mrg   else
     1.1  mrg     gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   ret = gomp_iter_guided_next_locked (istart, iend);
     1.1  mrg   if (ret)
     1.1  mrg     gomp_ordered_first ();
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_ordered_runtime_start (long start, long end, long incr,
     1.1  mrg 				 long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.9  mrg   switch (icv->run_sched_var & ~GFS_MONOTONIC)
     1.1  mrg     {
     1.1  mrg     case GFS_STATIC:
     1.1  mrg       return gomp_loop_ordered_static_start (start, end, incr,
 1.1.1.4  mrg 					     icv->run_sched_chunk_size,
     1.1  mrg 					     istart, iend);
     1.1  mrg     case GFS_DYNAMIC:
     1.1  mrg       return gomp_loop_ordered_dynamic_start (start, end, incr,
 1.1.1.4  mrg 					      icv->run_sched_chunk_size,
     1.1  mrg 					      istart, iend);
     1.1  mrg     case GFS_GUIDED:
     1.1  mrg       return gomp_loop_ordered_guided_start (start, end, incr,
 1.1.1.4  mrg 					     icv->run_sched_chunk_size,
     1.1  mrg 					     istart, iend);
     1.1  mrg     case GFS_AUTO:
     1.1  mrg       /* For now map to schedule(static), later on we could play with feedback
     1.1  mrg 	 driven choice.  */
     1.1  mrg       return gomp_loop_ordered_static_start (start, end, incr,
     1.1  mrg 					     0, istart, iend);
     1.1  mrg     default:
     1.1  mrg       abort ();
     1.1  mrg     }
     1.1  mrg }
     1.1  mrg
 1.1.1.9  mrg bool
 1.1.1.9  mrg GOMP_loop_ordered_start (long start, long end, long incr, long sched,
 1.1.1.9  mrg 			 long chunk_size, long *istart, long *iend,
 1.1.1.9  mrg 			 uintptr_t *reductions, void **mem)
 1.1.1.9  mrg {
 1.1.1.9  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.9  mrg   size_t ordered = 1;
 1.1.1.9  mrg   bool ret;
 1.1.1.9  mrg
 1.1.1.9  mrg   thr->ts.static_trip = 0;
 1.1.1.9  mrg   if (reductions)
 1.1.1.9  mrg     gomp_workshare_taskgroup_start ();
 1.1.1.9  mrg   if (mem)
 1.1.1.9  mrg     ordered += (uintptr_t) *mem;
 1.1.1.9  mrg   if (gomp_work_share_start (ordered))
 1.1.1.9  mrg     {
 1.1.1.9  mrg       sched = gomp_adjust_sched (sched, &chunk_size);
 1.1.1.9  mrg       gomp_loop_init (thr->ts.work_share, start, end, incr,
 1.1.1.9  mrg 		      sched, chunk_size);
 1.1.1.9  mrg       if (reductions)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  GOMP_taskgroup_reduction_register (reductions);
 1.1.1.9  mrg 	  thr->task->taskgroup->workshare = true;
 1.1.1.9  mrg 	  thr->ts.work_share->task_reductions = reductions;
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       if (sched == GFS_STATIC)
 1.1.1.9  mrg 	gomp_ordered_static_init ();
 1.1.1.9  mrg       else
 1.1.1.9  mrg 	gomp_mutex_lock (&thr->ts.work_share->lock);
 1.1.1.9  mrg       gomp_work_share_init_done ();
 1.1.1.9  mrg     }
 1.1.1.9  mrg   else
 1.1.1.9  mrg     {
 1.1.1.9  mrg       if (reductions)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 1.1.1.9  mrg 	  gomp_workshare_task_reduction_register (reductions,
 1.1.1.9  mrg 						  first_reductions);
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       sched = thr->ts.work_share->sched;
 1.1.1.9  mrg       if (sched != GFS_STATIC)
 1.1.1.9  mrg 	gomp_mutex_lock (&thr->ts.work_share->lock);
 1.1.1.9  mrg     }
 1.1.1.9  mrg
 1.1.1.9  mrg   if (mem)
 1.1.1.9  mrg     {
 1.1.1.9  mrg       uintptr_t p
 1.1.1.9  mrg 	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
 1.1.1.9  mrg 		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
 1.1.1.9  mrg       p += __alignof__ (long long) - 1;
 1.1.1.9  mrg       p &= ~(__alignof__ (long long) - 1);
 1.1.1.9  mrg       *mem = (void *) p;
 1.1.1.9  mrg     }
 1.1.1.9  mrg
 1.1.1.9  mrg   switch (sched)
 1.1.1.9  mrg     {
 1.1.1.9  mrg     case GFS_STATIC:
 1.1.1.9  mrg     case GFS_AUTO:
 1.1.1.9  mrg       return !gomp_iter_static_next (istart, iend);
 1.1.1.9  mrg     case GFS_DYNAMIC:
 1.1.1.9  mrg       ret = gomp_iter_dynamic_next_locked (istart, iend);
 1.1.1.9  mrg       break;
 1.1.1.9  mrg     case GFS_GUIDED:
 1.1.1.9  mrg       ret = gomp_iter_guided_next_locked (istart, iend);
 1.1.1.9  mrg       break;
 1.1.1.9  mrg     default:
 1.1.1.9  mrg       abort ();
 1.1.1.9  mrg     }
 1.1.1.9  mrg
 1.1.1.9  mrg   if (ret)
 1.1.1.9  mrg     gomp_ordered_first ();
 1.1.1.9  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
 1.1.1.9  mrg   return ret;
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.4  mrg /* The *_doacross_*_start routines are similar.  The only difference is that
 1.1.1.4  mrg    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
 1.1.1.4  mrg    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
 1.1.1.4  mrg    and other COUNTS array elements tell the library number of iterations
 1.1.1.4  mrg    in the ordered inner loops.  */
 1.1.1.4  mrg
 1.1.1.4  mrg static bool
 1.1.1.4  mrg gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				 long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.4  mrg
 1.1.1.4  mrg   thr->ts.static_trip = 0;
 1.1.1.9  mrg   if (gomp_work_share_start (0))
 1.1.1.4  mrg     {
 1.1.1.4  mrg       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 1.1.1.4  mrg 		      GFS_STATIC, chunk_size);
 1.1.1.9  mrg       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 1.1.1.4  mrg       gomp_work_share_init_done ();
 1.1.1.4  mrg     }
 1.1.1.4  mrg
 1.1.1.4  mrg   return !gomp_iter_static_next (istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg static bool
 1.1.1.4  mrg gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				  long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.4  mrg   bool ret;
 1.1.1.4  mrg
 1.1.1.9  mrg   if (gomp_work_share_start (0))
 1.1.1.4  mrg     {
 1.1.1.4  mrg       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 1.1.1.4  mrg 		      GFS_DYNAMIC, chunk_size);
 1.1.1.9  mrg       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 1.1.1.4  mrg       gomp_work_share_init_done ();
 1.1.1.4  mrg     }
 1.1.1.4  mrg
 1.1.1.4  mrg #ifdef HAVE_SYNC_BUILTINS
 1.1.1.4  mrg   ret = gomp_iter_dynamic_next (istart, iend);
 1.1.1.4  mrg #else
 1.1.1.4  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
 1.1.1.4  mrg   ret = gomp_iter_dynamic_next_locked (istart, iend);
 1.1.1.4  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
 1.1.1.4  mrg #endif
 1.1.1.4  mrg
 1.1.1.4  mrg   return ret;
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg static bool
 1.1.1.4  mrg gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				 long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.4  mrg   bool ret;
 1.1.1.4  mrg
 1.1.1.9  mrg   if (gomp_work_share_start (0))
 1.1.1.4  mrg     {
 1.1.1.4  mrg       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 1.1.1.4  mrg 		      GFS_GUIDED, chunk_size);
 1.1.1.9  mrg       gomp_doacross_init (ncounts, counts, chunk_size, 0);
 1.1.1.4  mrg       gomp_work_share_init_done ();
 1.1.1.4  mrg     }
 1.1.1.4  mrg
 1.1.1.4  mrg #ifdef HAVE_SYNC_BUILTINS
 1.1.1.4  mrg   ret = gomp_iter_guided_next (istart, iend);
 1.1.1.4  mrg #else
 1.1.1.4  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
 1.1.1.4  mrg   ret = gomp_iter_guided_next_locked (istart, iend);
 1.1.1.4  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
 1.1.1.4  mrg #endif
 1.1.1.4  mrg
 1.1.1.4  mrg   return ret;
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.4  mrg GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				  long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.9  mrg   switch (icv->run_sched_var & ~GFS_MONOTONIC)
 1.1.1.4  mrg     {
 1.1.1.4  mrg     case GFS_STATIC:
 1.1.1.4  mrg       return gomp_loop_doacross_static_start (ncounts, counts,
 1.1.1.4  mrg 					      icv->run_sched_chunk_size,
 1.1.1.4  mrg 					      istart, iend);
 1.1.1.4  mrg     case GFS_DYNAMIC:
 1.1.1.4  mrg       return gomp_loop_doacross_dynamic_start (ncounts, counts,
 1.1.1.4  mrg 					       icv->run_sched_chunk_size,
 1.1.1.4  mrg 					       istart, iend);
 1.1.1.4  mrg     case GFS_GUIDED:
 1.1.1.4  mrg       return gomp_loop_doacross_guided_start (ncounts, counts,
 1.1.1.4  mrg 					      icv->run_sched_chunk_size,
 1.1.1.4  mrg 					      istart, iend);
 1.1.1.4  mrg     case GFS_AUTO:
 1.1.1.4  mrg       /* For now map to schedule(static), later on we could play with feedback
 1.1.1.4  mrg 	 driven choice.  */
 1.1.1.4  mrg       return gomp_loop_doacross_static_start (ncounts, counts,
 1.1.1.4  mrg 					      0, istart, iend);
 1.1.1.4  mrg     default:
 1.1.1.4  mrg       abort ();
 1.1.1.4  mrg     }
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.9  mrg bool
 1.1.1.9  mrg GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
 1.1.1.9  mrg 			  long chunk_size, long *istart, long *iend,
 1.1.1.9  mrg 			  uintptr_t *reductions, void **mem)
 1.1.1.9  mrg {
 1.1.1.9  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.9  mrg
 1.1.1.9  mrg   thr->ts.static_trip = 0;
 1.1.1.9  mrg   if (reductions)
 1.1.1.9  mrg     gomp_workshare_taskgroup_start ();
 1.1.1.9  mrg   if (gomp_work_share_start (0))
 1.1.1.9  mrg     {
 1.1.1.9  mrg       size_t extra = 0;
 1.1.1.9  mrg       if (mem)
 1.1.1.9  mrg 	extra = (uintptr_t) *mem;
 1.1.1.9  mrg       sched = gomp_adjust_sched (sched, &chunk_size);
 1.1.1.9  mrg       gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
 1.1.1.9  mrg 		      sched, chunk_size);
 1.1.1.9  mrg       gomp_doacross_init (ncounts, counts, chunk_size, extra);
 1.1.1.9  mrg       if (reductions)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  GOMP_taskgroup_reduction_register (reductions);
 1.1.1.9  mrg 	  thr->task->taskgroup->workshare = true;
 1.1.1.9  mrg 	  thr->ts.work_share->task_reductions = reductions;
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       gomp_work_share_init_done ();
 1.1.1.9  mrg     }
 1.1.1.9  mrg   else
 1.1.1.9  mrg     {
 1.1.1.9  mrg       if (reductions)
 1.1.1.9  mrg 	{
 1.1.1.9  mrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
 1.1.1.9  mrg 	  gomp_workshare_task_reduction_register (reductions,
 1.1.1.9  mrg 						  first_reductions);
 1.1.1.9  mrg 	}
 1.1.1.9  mrg       sched = thr->ts.work_share->sched;
 1.1.1.9  mrg     }
 1.1.1.9  mrg
 1.1.1.9  mrg   if (mem)
 1.1.1.9  mrg     *mem = thr->ts.work_share->doacross->extra;
 1.1.1.9  mrg
 1.1.1.9  mrg   return ialias_call (GOMP_loop_runtime_next) (istart, iend);
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.9  mrg /* The *_next routines are called when the thread completes processing of
 1.1.1.9  mrg    the iteration block currently assigned to it.  If the work-share
     1.1  mrg    construct is bound directly to a parallel construct, then the iteration
     1.1  mrg    bounds may have been set up before the parallel.  In which case, this
     1.1  mrg    may be the first iteration for the thread.
     1.1  mrg
     1.1  mrg    Returns true if there is work remaining to be performed; *ISTART and
     1.1  mrg    *IEND are filled with a new iteration block.  Returns false if all work
     1.1  mrg    has been assigned.  */
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_static_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return !gomp_iter_static_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_dynamic_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   bool ret;
     1.1  mrg
     1.1  mrg #ifdef HAVE_SYNC_BUILTINS
     1.1  mrg   ret = gomp_iter_dynamic_next (istart, iend);
     1.1  mrg #else
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   ret = gomp_iter_dynamic_next_locked (istart, iend);
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg #endif
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_guided_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   bool ret;
     1.1  mrg
     1.1  mrg #ifdef HAVE_SYNC_BUILTINS
     1.1  mrg   ret = gomp_iter_guided_next (istart, iend);
     1.1  mrg #else
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   ret = gomp_iter_guided_next_locked (istart, iend);
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg #endif
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_runtime_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.9  mrg
     1.1  mrg   switch (thr->ts.work_share->sched)
     1.1  mrg     {
     1.1  mrg     case GFS_STATIC:
     1.1  mrg     case GFS_AUTO:
     1.1  mrg       return gomp_loop_static_next (istart, iend);
     1.1  mrg     case GFS_DYNAMIC:
     1.1  mrg       return gomp_loop_dynamic_next (istart, iend);
     1.1  mrg     case GFS_GUIDED:
     1.1  mrg       return gomp_loop_guided_next (istart, iend);
     1.1  mrg     default:
     1.1  mrg       abort ();
     1.1  mrg     }
     1.1  mrg }
     1.1  mrg
     1.1  mrg /* The *_ordered_*_next routines are called when the thread completes
     1.1  mrg    processing of the iteration block currently assigned to it.
     1.1  mrg
     1.1  mrg    Returns true if there is work remaining to be performed; *ISTART and
     1.1  mrg    *IEND are filled with a new iteration block.  Returns false if all work
     1.1  mrg    has been assigned.  */
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_ordered_static_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   int test;
     1.1  mrg
     1.1  mrg   gomp_ordered_sync ();
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   test = gomp_iter_static_next (istart, iend);
     1.1  mrg   if (test >= 0)
     1.1  mrg     gomp_ordered_static_next ();
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   return test == 0;
     1.1  mrg }
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_ordered_dynamic_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   bool ret;
     1.1  mrg
     1.1  mrg   gomp_ordered_sync ();
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   ret = gomp_iter_dynamic_next_locked (istart, iend);
     1.1  mrg   if (ret)
     1.1  mrg     gomp_ordered_next ();
     1.1  mrg   else
     1.1  mrg     gomp_ordered_last ();
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg static bool
     1.1  mrg gomp_loop_ordered_guided_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
     1.1  mrg   bool ret;
     1.1  mrg
     1.1  mrg   gomp_ordered_sync ();
     1.1  mrg   gomp_mutex_lock (&thr->ts.work_share->lock);
     1.1  mrg   ret = gomp_iter_guided_next_locked (istart, iend);
     1.1  mrg   if (ret)
     1.1  mrg     gomp_ordered_next ();
     1.1  mrg   else
     1.1  mrg     gomp_ordered_last ();
     1.1  mrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
     1.1  mrg
     1.1  mrg   return ret;
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_ordered_runtime_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   struct gomp_thread *thr = gomp_thread ();
 1.1.1.9  mrg
     1.1  mrg   switch (thr->ts.work_share->sched)
     1.1  mrg     {
     1.1  mrg     case GFS_STATIC:
     1.1  mrg     case GFS_AUTO:
     1.1  mrg       return gomp_loop_ordered_static_next (istart, iend);
     1.1  mrg     case GFS_DYNAMIC:
     1.1  mrg       return gomp_loop_ordered_dynamic_next (istart, iend);
     1.1  mrg     case GFS_GUIDED:
     1.1  mrg       return gomp_loop_ordered_guided_next (istart, iend);
     1.1  mrg     default:
     1.1  mrg       abort ();
     1.1  mrg     }
     1.1  mrg }
     1.1  mrg
     1.1  mrg /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
     1.1  mrg    to avoid one synchronization once we get into the loop.  */
     1.1  mrg
     1.1  mrg static void
     1.1  mrg gomp_parallel_loop_start (void (*fn) (void *), void *data,
     1.1  mrg 			  unsigned num_threads, long start, long end,
     1.1  mrg 			  long incr, enum gomp_schedule_type sched,
 1.1.1.3  mrg 			  long chunk_size, unsigned int flags)
     1.1  mrg {
     1.1  mrg   struct gomp_team *team;
     1.1  mrg
     1.1  mrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
     1.1  mrg   team = gomp_new_team (num_threads);
     1.1  mrg   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
 1.1.1.9  mrg   gomp_team_start (fn, data, num_threads, flags, team, NULL);
     1.1  mrg }
     1.1  mrg
     1.1  mrg void
     1.1  mrg GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
     1.1  mrg 				 unsigned num_threads, long start, long end,
     1.1  mrg 				 long incr, long chunk_size)
     1.1  mrg {
     1.1  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.3  mrg 			    GFS_STATIC, chunk_size, 0);
     1.1  mrg }
     1.1  mrg
     1.1  mrg void
     1.1  mrg GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
     1.1  mrg 				  unsigned num_threads, long start, long end,
     1.1  mrg 				  long incr, long chunk_size)
     1.1  mrg {
     1.1  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.3  mrg 			    GFS_DYNAMIC, chunk_size, 0);
     1.1  mrg }
     1.1  mrg
     1.1  mrg void
     1.1  mrg GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
     1.1  mrg 				 unsigned num_threads, long start, long end,
     1.1  mrg 				 long incr, long chunk_size)
     1.1  mrg {
     1.1  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.3  mrg 			    GFS_GUIDED, chunk_size, 0);
     1.1  mrg }
     1.1  mrg
     1.1  mrg void
     1.1  mrg GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
     1.1  mrg 				  unsigned num_threads, long start, long end,
     1.1  mrg 				  long incr)
     1.1  mrg {
     1.1  mrg   struct gomp_task_icv *icv = gomp_icv (false);
     1.1  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.9  mrg 			    icv->run_sched_var & ~GFS_MONOTONIC,
 1.1.1.9  mrg 			    icv->run_sched_chunk_size, 0);
 1.1.1.3  mrg }
 1.1.1.3  mrg
 1.1.1.3  mrg ialias_redirect (GOMP_parallel_end)
 1.1.1.3  mrg
 1.1.1.3  mrg void
 1.1.1.3  mrg GOMP_parallel_loop_static (void (*fn) (void *), void *data,
 1.1.1.3  mrg 			   unsigned num_threads, long start, long end,
 1.1.1.3  mrg 			   long incr, long chunk_size, unsigned flags)
 1.1.1.3  mrg {
 1.1.1.3  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.3  mrg 			    GFS_STATIC, chunk_size, flags);
 1.1.1.3  mrg   fn (data);
 1.1.1.3  mrg   GOMP_parallel_end ();
 1.1.1.3  mrg }
 1.1.1.3  mrg
 1.1.1.3  mrg void
 1.1.1.3  mrg GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
 1.1.1.3  mrg 			    unsigned num_threads, long start, long end,
 1.1.1.3  mrg 			    long incr, long chunk_size, unsigned flags)
 1.1.1.3  mrg {
 1.1.1.3  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.3  mrg 			    GFS_DYNAMIC, chunk_size, flags);
 1.1.1.3  mrg   fn (data);
 1.1.1.3  mrg   GOMP_parallel_end ();
 1.1.1.3  mrg }
 1.1.1.3  mrg
 1.1.1.3  mrg void
 1.1.1.3  mrg GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
 1.1.1.3  mrg 			  unsigned num_threads, long start, long end,
 1.1.1.3  mrg 			  long incr, long chunk_size, unsigned flags)
 1.1.1.3  mrg {
 1.1.1.3  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.3  mrg 			    GFS_GUIDED, chunk_size, flags);
 1.1.1.3  mrg   fn (data);
 1.1.1.3  mrg   GOMP_parallel_end ();
 1.1.1.3  mrg }
 1.1.1.3  mrg
 1.1.1.9  mrg void
 1.1.1.9  mrg GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
 1.1.1.9  mrg 			    unsigned num_threads, long start, long end,
 1.1.1.9  mrg 			    long incr, unsigned flags)
 1.1.1.9  mrg {
 1.1.1.9  mrg   struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.9  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.9  mrg 			    icv->run_sched_var & ~GFS_MONOTONIC,
 1.1.1.9  mrg 			    icv->run_sched_chunk_size, flags);
 1.1.1.9  mrg   fn (data);
 1.1.1.9  mrg   GOMP_parallel_end ();
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.4  mrg #ifdef HAVE_ATTRIBUTE_ALIAS
 1.1.1.4  mrg extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
 1.1.1.4  mrg 	__attribute__((alias ("GOMP_parallel_loop_dynamic")));
 1.1.1.4  mrg extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
 1.1.1.4  mrg 	__attribute__((alias ("GOMP_parallel_loop_guided")));
 1.1.1.9  mrg extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
 1.1.1.9  mrg 	__attribute__((alias ("GOMP_parallel_loop_runtime")));
 1.1.1.9  mrg extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
 1.1.1.9  mrg 	__attribute__((alias ("GOMP_parallel_loop_runtime")));
 1.1.1.4  mrg #else
 1.1.1.4  mrg void
 1.1.1.4  mrg GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
 1.1.1.4  mrg 					 unsigned num_threads, long start,
 1.1.1.4  mrg 					 long end, long incr, long chunk_size,
 1.1.1.4  mrg 					 unsigned flags)
 1.1.1.4  mrg {
 1.1.1.4  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.4  mrg 			    GFS_DYNAMIC, chunk_size, flags);
 1.1.1.4  mrg   fn (data);
 1.1.1.4  mrg   GOMP_parallel_end ();
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg void
 1.1.1.4  mrg GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
 1.1.1.4  mrg 					unsigned num_threads, long start,
 1.1.1.4  mrg 					long end, long incr, long chunk_size,
 1.1.1.4  mrg 					unsigned flags)
 1.1.1.4  mrg {
 1.1.1.4  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.4  mrg 			    GFS_GUIDED, chunk_size, flags);
 1.1.1.4  mrg   fn (data);
 1.1.1.4  mrg   GOMP_parallel_end ();
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.3  mrg void
 1.1.1.9  mrg GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
 1.1.1.9  mrg 					 unsigned num_threads, long start,
 1.1.1.9  mrg 					 long end, long incr, unsigned flags)
 1.1.1.3  mrg {
 1.1.1.3  mrg   struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.3  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.9  mrg 			    icv->run_sched_var & ~GFS_MONOTONIC,
 1.1.1.9  mrg 			    icv->run_sched_chunk_size, flags);
 1.1.1.3  mrg   fn (data);
 1.1.1.3  mrg   GOMP_parallel_end ();
     1.1  mrg }
     1.1  mrg
 1.1.1.9  mrg void
 1.1.1.9  mrg GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
 1.1.1.9  mrg 					       unsigned num_threads, long start,
 1.1.1.9  mrg 					       long end, long incr,
 1.1.1.9  mrg 					       unsigned flags)
 1.1.1.9  mrg {
 1.1.1.9  mrg   struct gomp_task_icv *icv = gomp_icv (false);
 1.1.1.9  mrg   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
 1.1.1.9  mrg 			    icv->run_sched_var & ~GFS_MONOTONIC,
 1.1.1.9  mrg 			    icv->run_sched_chunk_size, flags);
 1.1.1.9  mrg   fn (data);
 1.1.1.9  mrg   GOMP_parallel_end ();
 1.1.1.9  mrg }
 1.1.1.9  mrg #endif
 1.1.1.9  mrg
     1.1  mrg /* The GOMP_loop_end* routines are called after the thread is told that
 1.1.1.3  mrg    all loop iterations are complete.  The first two versions synchronize
     1.1  mrg    all threads; the nowait version does not.  */
     1.1  mrg
     1.1  mrg void
     1.1  mrg GOMP_loop_end (void)
     1.1  mrg {
     1.1  mrg   gomp_work_share_end ();
     1.1  mrg }
     1.1  mrg
 1.1.1.3  mrg bool
 1.1.1.3  mrg GOMP_loop_end_cancel (void)
 1.1.1.3  mrg {
 1.1.1.3  mrg   return gomp_work_share_end_cancel ();
 1.1.1.3  mrg }
 1.1.1.3  mrg
     1.1  mrg void
     1.1  mrg GOMP_loop_end_nowait (void)
     1.1  mrg {
     1.1  mrg   gomp_work_share_end_nowait ();
     1.1  mrg }
     1.1  mrg
     1.1  mrg
     1.1  mrg /* We use static functions above so that we're sure that the "runtime"
     1.1  mrg    function can defer to the proper routine without interposition.  We
     1.1  mrg    export the static function with a strong alias when possible, or with
     1.1  mrg    a wrapper function otherwise.  */
     1.1  mrg
     1.1  mrg #ifdef HAVE_ATTRIBUTE_ALIAS
     1.1  mrg extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
     1.1  mrg 	__attribute__((alias ("gomp_loop_static_start")));
     1.1  mrg extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
     1.1  mrg 	__attribute__((alias ("gomp_loop_dynamic_start")));
     1.1  mrg extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
     1.1  mrg 	__attribute__((alias ("gomp_loop_guided_start")));
 1.1.1.4  mrg extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_dynamic_start")));
 1.1.1.4  mrg extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_guided_start")));
 1.1.1.9  mrg extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
 1.1.1.9  mrg 	__attribute__((alias ("GOMP_loop_runtime_start")));
 1.1.1.9  mrg extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
 1.1.1.9  mrg 	__attribute__((alias ("GOMP_loop_runtime_start")));
     1.1  mrg
     1.1  mrg extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
     1.1  mrg 	__attribute__((alias ("gomp_loop_ordered_static_start")));
     1.1  mrg extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
     1.1  mrg 	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
     1.1  mrg extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
     1.1  mrg 	__attribute__((alias ("gomp_loop_ordered_guided_start")));
     1.1  mrg
 1.1.1.4  mrg extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_doacross_static_start")));
 1.1.1.4  mrg extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_doacross_dynamic_start")));
 1.1.1.4  mrg extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_doacross_guided_start")));
 1.1.1.4  mrg
     1.1  mrg extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
     1.1  mrg 	__attribute__((alias ("gomp_loop_static_next")));
     1.1  mrg extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
     1.1  mrg 	__attribute__((alias ("gomp_loop_dynamic_next")));
     1.1  mrg extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
     1.1  mrg 	__attribute__((alias ("gomp_loop_guided_next")));
 1.1.1.4  mrg extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_dynamic_next")));
 1.1.1.4  mrg extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
 1.1.1.4  mrg 	__attribute__((alias ("gomp_loop_guided_next")));
 1.1.1.9  mrg extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
 1.1.1.9  mrg 	__attribute__((alias ("GOMP_loop_runtime_next")));
 1.1.1.9  mrg extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
 1.1.1.9  mrg 	__attribute__((alias ("GOMP_loop_runtime_next")));
     1.1  mrg
     1.1  mrg extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
     1.1  mrg 	__attribute__((alias ("gomp_loop_ordered_static_next")));
     1.1  mrg extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
     1.1  mrg 	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
     1.1  mrg extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
     1.1  mrg 	__attribute__((alias ("gomp_loop_ordered_guided_next")));
     1.1  mrg #else
     1.1  mrg bool
     1.1  mrg GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
     1.1  mrg 			long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
     1.1  mrg 			 long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
     1.1  mrg 			long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
 1.1.1.4  mrg GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
 1.1.1.4  mrg 				      long chunk_size, long *istart,
 1.1.1.4  mrg 				      long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.4  mrg GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
 1.1.1.4  mrg 				     long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.9  mrg GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
 1.1.1.9  mrg 				      long *istart, long *iend)
 1.1.1.9  mrg {
 1.1.1.9  mrg   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.9  mrg bool
 1.1.1.9  mrg GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
 1.1.1.9  mrg 					    long *istart, long *iend)
 1.1.1.9  mrg {
 1.1.1.9  mrg   return GOMP_loop_runtime_start (start, end, incr, istart, iend);
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.9  mrg bool
     1.1  mrg GOMP_loop_ordered_static_start (long start, long end, long incr,
     1.1  mrg 				long chunk_size, long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
     1.1  mrg 					 istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
     1.1  mrg 				 long chunk_size, long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
     1.1  mrg 					  istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_ordered_guided_start (long start, long end, long incr,
     1.1  mrg 				long chunk_size, long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
     1.1  mrg 					 istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
 1.1.1.4  mrg GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				 long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
 1.1.1.4  mrg 					  istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.4  mrg GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				  long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
 1.1.1.4  mrg 					   istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.4  mrg GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
 1.1.1.4  mrg 				 long chunk_size, long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
 1.1.1.4  mrg 					  istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
     1.1  mrg GOMP_loop_static_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_static_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_dynamic_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_dynamic_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_guided_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_guided_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
 1.1.1.4  mrg GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_dynamic_next (istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.4  mrg GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
 1.1.1.4  mrg {
 1.1.1.4  mrg   return gomp_loop_guided_next (istart, iend);
 1.1.1.4  mrg }
 1.1.1.4  mrg
 1.1.1.4  mrg bool
 1.1.1.9  mrg GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
 1.1.1.9  mrg {
 1.1.1.9  mrg   return GOMP_loop_runtime_next (istart, iend);
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.9  mrg bool
 1.1.1.9  mrg GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
 1.1.1.9  mrg {
 1.1.1.9  mrg   return GOMP_loop_runtime_next (istart, iend);
 1.1.1.9  mrg }
 1.1.1.9  mrg
 1.1.1.9  mrg bool
     1.1  mrg GOMP_loop_ordered_static_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_ordered_static_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_ordered_dynamic_next (istart, iend);
     1.1  mrg }
     1.1  mrg
     1.1  mrg bool
     1.1  mrg GOMP_loop_ordered_guided_next (long *istart, long *iend)
     1.1  mrg {
     1.1  mrg   return gomp_loop_ordered_guided_next (istart, iend);
     1.1  mrg }
     1.1  mrg #endif