Home | History | Annotate | Line # | Download | only in libgomp
iter_ull.c revision 1.1.1.1.8.2
      1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
      2    Contributed by Richard Henderson <rth (at) redhat.com>.
      3 
      4    This file is part of the GNU OpenMP Library (libgomp).
      5 
      6    Libgomp is free software; you can redistribute it and/or modify it
      7    under the terms of the GNU General Public License as published by
      8    the Free Software Foundation; either version 3, or (at your option)
      9    any later version.
     10 
     11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
     13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     14    more details.
     15 
     16    Under Section 7 of GPL version 3, you are granted additional
     17    permissions described in the GCC Runtime Library Exception, version
     18    3.1, as published by the Free Software Foundation.
     19 
     20    You should have received a copy of the GNU General Public License and
     21    a copy of the GCC Runtime Library Exception along with this program;
     22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23    <http://www.gnu.org/licenses/>.  */
     24 
     25 /* This file contains routines for managing work-share iteration, both
     26    for loops and sections.  */
     27 
     28 #include "libgomp.h"
     29 #include <stdlib.h>
     30 
     31 typedef unsigned long long gomp_ull;
     32 
     33 /* This function implements the STATIC scheduling method.  The caller should
     34    iterate *pstart <= x < *pend.  Return zero if there are more iterations
     35    to perform; nonzero if not.  Return less than 0 if this thread had
     36    received the absolutely last iteration.  */
     37 
     38 int
     39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
     40 {
     41   struct gomp_thread *thr = gomp_thread ();
     42   struct gomp_team *team = thr->ts.team;
     43   struct gomp_work_share *ws = thr->ts.work_share;
     44   unsigned long nthreads = team ? team->nthreads : 1;
     45 
     46   if (thr->ts.static_trip == -1)
     47     return -1;
     48 
     49   /* Quick test for degenerate teams and orphaned constructs.  */
     50   if (nthreads == 1)
     51     {
     52       *pstart = ws->next_ull;
     53       *pend = ws->end_ull;
     54       thr->ts.static_trip = -1;
     55       return ws->next_ull == ws->end_ull;
     56     }
     57 
     58   /* We interpret chunk_size zero as "unspecified", which means that we
     59      should break up the iterations such that each thread makes only one
     60      trip through the outer loop.  */
     61   if (ws->chunk_size_ull == 0)
     62     {
     63       gomp_ull n, q, i, s0, e0, s, e;
     64 
     65       if (thr->ts.static_trip > 0)
     66 	return 1;
     67 
     68       /* Compute the total number of iterations.  */
     69       if (__builtin_expect (ws->mode, 0) == 0)
     70 	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
     71       else
     72 	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
     73       i = thr->ts.team_id;
     74 
     75       /* Compute the "zero-based" start and end points.  That is, as
     76 	 if the loop began at zero and incremented by one.  */
     77       q = n / nthreads;
     78       q += (q * nthreads != n);
     79       s0 = q * i;
     80       e0 = s0 + q;
     81       if (e0 > n)
     82 	e0 = n;
     83 
     84       /* Notice when no iterations allocated for this thread.  */
     85       if (s0 >= e0)
     86 	{
     87 	  thr->ts.static_trip = 1;
     88 	  return 1;
     89 	}
     90 
     91       /* Transform these to the actual start and end numbers.  */
     92       s = s0 * ws->incr_ull + ws->next_ull;
     93       e = e0 * ws->incr_ull + ws->next_ull;
     94 
     95       *pstart = s;
     96       *pend = e;
     97       thr->ts.static_trip = (e0 == n ? -1 : 1);
     98       return 0;
     99     }
    100   else
    101     {
    102       gomp_ull n, s0, e0, i, c, s, e;
    103 
    104       /* Otherwise, each thread gets exactly chunk_size iterations
    105 	 (if available) each time through the loop.  */
    106 
    107       if (__builtin_expect (ws->mode, 0) == 0)
    108 	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
    109       else
    110 	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
    111       i = thr->ts.team_id;
    112       c = ws->chunk_size_ull;
    113 
    114       /* Initial guess is a C sized chunk positioned nthreads iterations
    115 	 in, offset by our thread number.  */
    116       s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
    117       e0 = s0 + c;
    118 
    119       /* Detect overflow.  */
    120       if (s0 >= n)
    121 	return 1;
    122       if (e0 > n)
    123 	e0 = n;
    124 
    125       /* Transform these to the actual start and end numbers.  */
    126       s = s0 * ws->incr_ull + ws->next_ull;
    127       e = e0 * ws->incr_ull + ws->next_ull;
    128 
    129       *pstart = s;
    130       *pend = e;
    131 
    132       if (e0 == n)
    133 	thr->ts.static_trip = -1;
    134       else
    135 	thr->ts.static_trip++;
    136       return 0;
    137     }
    138 }
    139 
    140 
    141 /* This function implements the DYNAMIC scheduling method.  Arguments are
    142    as for gomp_iter_ull_static_next.  This function must be called with
    143    ws->lock held.  */
    144 
    145 bool
    146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
    147 {
    148   struct gomp_thread *thr = gomp_thread ();
    149   struct gomp_work_share *ws = thr->ts.work_share;
    150   gomp_ull start, end, chunk, left;
    151 
    152   start = ws->next_ull;
    153   if (start == ws->end_ull)
    154     return false;
    155 
    156   chunk = ws->chunk_size_ull;
    157   left = ws->end_ull - start;
    158   if (__builtin_expect (ws->mode & 2, 0))
    159     {
    160       if (chunk < left)
    161 	chunk = left;
    162     }
    163   else
    164     {
    165       if (chunk > left)
    166 	chunk = left;
    167     }
    168   end = start + chunk;
    169 
    170   ws->next_ull = end;
    171   *pstart = start;
    172   *pend = end;
    173   return true;
    174 }
    175 
    176 
    177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
    178 /* Similar, but doesn't require the lock held, and uses compare-and-swap
    179    instead.  Note that the only memory value that changes is ws->next_ull.  */
    180 
    181 bool
    182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
    183 {
    184   struct gomp_thread *thr = gomp_thread ();
    185   struct gomp_work_share *ws = thr->ts.work_share;
    186   gomp_ull start, end, nend, chunk;
    187 
    188   end = ws->end_ull;
    189   chunk = ws->chunk_size_ull;
    190 
    191   if (__builtin_expect (ws->mode & 1, 1))
    192     {
    193       gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
    194       if (__builtin_expect (ws->mode & 2, 0) == 0)
    195 	{
    196 	  if (tmp >= end)
    197 	    return false;
    198 	  nend = tmp + chunk;
    199 	  if (nend > end)
    200 	    nend = end;
    201 	  *pstart = tmp;
    202 	  *pend = nend;
    203 	  return true;
    204 	}
    205       else
    206 	{
    207 	  if (tmp <= end)
    208 	    return false;
    209 	  nend = tmp + chunk;
    210 	  if (nend < end)
    211 	    nend = end;
    212 	  *pstart = tmp;
    213 	  *pend = nend;
    214 	  return true;
    215 	}
    216     }
    217 
    218   start = ws->next_ull;
    219   while (1)
    220     {
    221       gomp_ull left = end - start;
    222       gomp_ull tmp;
    223 
    224       if (start == end)
    225 	return false;
    226 
    227       if (__builtin_expect (ws->mode & 2, 0))
    228 	{
    229 	  if (chunk < left)
    230 	    chunk = left;
    231 	}
    232       else
    233 	{
    234 	  if (chunk > left)
    235 	    chunk = left;
    236 	}
    237       nend = start + chunk;
    238 
    239       tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
    240       if (__builtin_expect (tmp == start, 1))
    241 	break;
    242 
    243       start = tmp;
    244     }
    245 
    246   *pstart = start;
    247   *pend = nend;
    248   return true;
    249 }
    250 #endif /* HAVE_SYNC_BUILTINS */
    251 
    252 
    253 /* This function implements the GUIDED scheduling method.  Arguments are
    254    as for gomp_iter_ull_static_next.  This function must be called with the
    255    work share lock held.  */
    256 
    257 bool
    258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
    259 {
    260   struct gomp_thread *thr = gomp_thread ();
    261   struct gomp_work_share *ws = thr->ts.work_share;
    262   struct gomp_team *team = thr->ts.team;
    263   gomp_ull nthreads = team ? team->nthreads : 1;
    264   gomp_ull n, q;
    265   gomp_ull start, end;
    266 
    267   if (ws->next_ull == ws->end_ull)
    268     return false;
    269 
    270   start = ws->next_ull;
    271   if (__builtin_expect (ws->mode, 0) == 0)
    272     n = (ws->end_ull - start) / ws->incr_ull;
    273   else
    274     n = (start - ws->end_ull) / -ws->incr_ull;
    275   q = (n + nthreads - 1) / nthreads;
    276 
    277   if (q < ws->chunk_size_ull)
    278     q = ws->chunk_size_ull;
    279   if (q <= n)
    280     end = start + q * ws->incr_ull;
    281   else
    282     end = ws->end_ull;
    283 
    284   ws->next_ull = end;
    285   *pstart = start;
    286   *pend = end;
    287   return true;
    288 }
    289 
    290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
    291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
    292    instead.  Note that the only memory value that changes is ws->next_ull.  */
    293 
    294 bool
    295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
    296 {
    297   struct gomp_thread *thr = gomp_thread ();
    298   struct gomp_work_share *ws = thr->ts.work_share;
    299   struct gomp_team *team = thr->ts.team;
    300   gomp_ull nthreads = team ? team->nthreads : 1;
    301   gomp_ull start, end, nend, incr;
    302   gomp_ull chunk_size;
    303 
    304   start = ws->next_ull;
    305   end = ws->end_ull;
    306   incr = ws->incr_ull;
    307   chunk_size = ws->chunk_size_ull;
    308 
    309   while (1)
    310     {
    311       gomp_ull n, q;
    312       gomp_ull tmp;
    313 
    314       if (start == end)
    315 	return false;
    316 
    317       if (__builtin_expect (ws->mode, 0) == 0)
    318 	n = (end - start) / incr;
    319       else
    320 	n = (start - end) / -incr;
    321       q = (n + nthreads - 1) / nthreads;
    322 
    323       if (q < chunk_size)
    324 	q = chunk_size;
    325       if (__builtin_expect (q <= n, 1))
    326 	nend = start + q * incr;
    327       else
    328 	nend = end;
    329 
    330       tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
    331       if (__builtin_expect (tmp == start, 1))
    332 	break;
    333 
    334       start = tmp;
    335     }
    336 
    337   *pstart = start;
    338   *pend = nend;
    339   return true;
    340 }
    341 #endif /* HAVE_SYNC_BUILTINS */
    342