iter_ull.c revision 1.1.1.1.8.2 1 1.1.1.1.8.2 tls /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2 1.1.1.1.8.2 tls Contributed by Richard Henderson <rth (at) redhat.com>.
3 1.1.1.1.8.2 tls
4 1.1.1.1.8.2 tls This file is part of the GNU OpenMP Library (libgomp).
5 1.1.1.1.8.2 tls
6 1.1.1.1.8.2 tls Libgomp is free software; you can redistribute it and/or modify it
7 1.1.1.1.8.2 tls under the terms of the GNU General Public License as published by
8 1.1.1.1.8.2 tls the Free Software Foundation; either version 3, or (at your option)
9 1.1.1.1.8.2 tls any later version.
10 1.1.1.1.8.2 tls
11 1.1.1.1.8.2 tls Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 1.1.1.1.8.2 tls WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 1.1.1.1.8.2 tls FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 1.1.1.1.8.2 tls more details.
15 1.1.1.1.8.2 tls
16 1.1.1.1.8.2 tls Under Section 7 of GPL version 3, you are granted additional
17 1.1.1.1.8.2 tls permissions described in the GCC Runtime Library Exception, version
18 1.1.1.1.8.2 tls 3.1, as published by the Free Software Foundation.
19 1.1.1.1.8.2 tls
20 1.1.1.1.8.2 tls You should have received a copy of the GNU General Public License and
21 1.1.1.1.8.2 tls a copy of the GCC Runtime Library Exception along with this program;
22 1.1.1.1.8.2 tls see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 1.1.1.1.8.2 tls <http://www.gnu.org/licenses/>. */
24 1.1.1.1.8.2 tls
25 1.1.1.1.8.2 tls /* This file contains routines for managing work-share iteration, both
26 1.1.1.1.8.2 tls for loops and sections. */
27 1.1.1.1.8.2 tls
28 1.1.1.1.8.2 tls #include "libgomp.h"
29 1.1.1.1.8.2 tls #include <stdlib.h>
30 1.1.1.1.8.2 tls
31 1.1.1.1.8.2 tls typedef unsigned long long gomp_ull;
32 1.1.1.1.8.2 tls
33 1.1.1.1.8.2 tls /* This function implements the STATIC scheduling method. The caller should
34 1.1.1.1.8.2 tls iterate *pstart <= x < *pend. Return zero if there are more iterations
35 1.1.1.1.8.2 tls to perform; nonzero if not. Return less than 0 if this thread had
36 1.1.1.1.8.2 tls received the absolutely last iteration. */
37 1.1.1.1.8.2 tls
38 1.1.1.1.8.2 tls int
39 1.1.1.1.8.2 tls gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40 1.1.1.1.8.2 tls {
41 1.1.1.1.8.2 tls struct gomp_thread *thr = gomp_thread ();
42 1.1.1.1.8.2 tls struct gomp_team *team = thr->ts.team;
43 1.1.1.1.8.2 tls struct gomp_work_share *ws = thr->ts.work_share;
44 1.1.1.1.8.2 tls unsigned long nthreads = team ? team->nthreads : 1;
45 1.1.1.1.8.2 tls
46 1.1.1.1.8.2 tls if (thr->ts.static_trip == -1)
47 1.1.1.1.8.2 tls return -1;
48 1.1.1.1.8.2 tls
49 1.1.1.1.8.2 tls /* Quick test for degenerate teams and orphaned constructs. */
50 1.1.1.1.8.2 tls if (nthreads == 1)
51 1.1.1.1.8.2 tls {
52 1.1.1.1.8.2 tls *pstart = ws->next_ull;
53 1.1.1.1.8.2 tls *pend = ws->end_ull;
54 1.1.1.1.8.2 tls thr->ts.static_trip = -1;
55 1.1.1.1.8.2 tls return ws->next_ull == ws->end_ull;
56 1.1.1.1.8.2 tls }
57 1.1.1.1.8.2 tls
58 1.1.1.1.8.2 tls /* We interpret chunk_size zero as "unspecified", which means that we
59 1.1.1.1.8.2 tls should break up the iterations such that each thread makes only one
60 1.1.1.1.8.2 tls trip through the outer loop. */
61 1.1.1.1.8.2 tls if (ws->chunk_size_ull == 0)
62 1.1.1.1.8.2 tls {
63 1.1.1.1.8.2 tls gomp_ull n, q, i, s0, e0, s, e;
64 1.1.1.1.8.2 tls
65 1.1.1.1.8.2 tls if (thr->ts.static_trip > 0)
66 1.1.1.1.8.2 tls return 1;
67 1.1.1.1.8.2 tls
68 1.1.1.1.8.2 tls /* Compute the total number of iterations. */
69 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode, 0) == 0)
70 1.1.1.1.8.2 tls n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71 1.1.1.1.8.2 tls else
72 1.1.1.1.8.2 tls n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73 1.1.1.1.8.2 tls i = thr->ts.team_id;
74 1.1.1.1.8.2 tls
75 1.1.1.1.8.2 tls /* Compute the "zero-based" start and end points. That is, as
76 1.1.1.1.8.2 tls if the loop began at zero and incremented by one. */
77 1.1.1.1.8.2 tls q = n / nthreads;
78 1.1.1.1.8.2 tls q += (q * nthreads != n);
79 1.1.1.1.8.2 tls s0 = q * i;
80 1.1.1.1.8.2 tls e0 = s0 + q;
81 1.1.1.1.8.2 tls if (e0 > n)
82 1.1.1.1.8.2 tls e0 = n;
83 1.1.1.1.8.2 tls
84 1.1.1.1.8.2 tls /* Notice when no iterations allocated for this thread. */
85 1.1.1.1.8.2 tls if (s0 >= e0)
86 1.1.1.1.8.2 tls {
87 1.1.1.1.8.2 tls thr->ts.static_trip = 1;
88 1.1.1.1.8.2 tls return 1;
89 1.1.1.1.8.2 tls }
90 1.1.1.1.8.2 tls
91 1.1.1.1.8.2 tls /* Transform these to the actual start and end numbers. */
92 1.1.1.1.8.2 tls s = s0 * ws->incr_ull + ws->next_ull;
93 1.1.1.1.8.2 tls e = e0 * ws->incr_ull + ws->next_ull;
94 1.1.1.1.8.2 tls
95 1.1.1.1.8.2 tls *pstart = s;
96 1.1.1.1.8.2 tls *pend = e;
97 1.1.1.1.8.2 tls thr->ts.static_trip = (e0 == n ? -1 : 1);
98 1.1.1.1.8.2 tls return 0;
99 1.1.1.1.8.2 tls }
100 1.1.1.1.8.2 tls else
101 1.1.1.1.8.2 tls {
102 1.1.1.1.8.2 tls gomp_ull n, s0, e0, i, c, s, e;
103 1.1.1.1.8.2 tls
104 1.1.1.1.8.2 tls /* Otherwise, each thread gets exactly chunk_size iterations
105 1.1.1.1.8.2 tls (if available) each time through the loop. */
106 1.1.1.1.8.2 tls
107 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode, 0) == 0)
108 1.1.1.1.8.2 tls n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109 1.1.1.1.8.2 tls else
110 1.1.1.1.8.2 tls n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111 1.1.1.1.8.2 tls i = thr->ts.team_id;
112 1.1.1.1.8.2 tls c = ws->chunk_size_ull;
113 1.1.1.1.8.2 tls
114 1.1.1.1.8.2 tls /* Initial guess is a C sized chunk positioned nthreads iterations
115 1.1.1.1.8.2 tls in, offset by our thread number. */
116 1.1.1.1.8.2 tls s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117 1.1.1.1.8.2 tls e0 = s0 + c;
118 1.1.1.1.8.2 tls
119 1.1.1.1.8.2 tls /* Detect overflow. */
120 1.1.1.1.8.2 tls if (s0 >= n)
121 1.1.1.1.8.2 tls return 1;
122 1.1.1.1.8.2 tls if (e0 > n)
123 1.1.1.1.8.2 tls e0 = n;
124 1.1.1.1.8.2 tls
125 1.1.1.1.8.2 tls /* Transform these to the actual start and end numbers. */
126 1.1.1.1.8.2 tls s = s0 * ws->incr_ull + ws->next_ull;
127 1.1.1.1.8.2 tls e = e0 * ws->incr_ull + ws->next_ull;
128 1.1.1.1.8.2 tls
129 1.1.1.1.8.2 tls *pstart = s;
130 1.1.1.1.8.2 tls *pend = e;
131 1.1.1.1.8.2 tls
132 1.1.1.1.8.2 tls if (e0 == n)
133 1.1.1.1.8.2 tls thr->ts.static_trip = -1;
134 1.1.1.1.8.2 tls else
135 1.1.1.1.8.2 tls thr->ts.static_trip++;
136 1.1.1.1.8.2 tls return 0;
137 1.1.1.1.8.2 tls }
138 1.1.1.1.8.2 tls }
139 1.1.1.1.8.2 tls
140 1.1.1.1.8.2 tls
141 1.1.1.1.8.2 tls /* This function implements the DYNAMIC scheduling method. Arguments are
142 1.1.1.1.8.2 tls as for gomp_iter_ull_static_next. This function must be called with
143 1.1.1.1.8.2 tls ws->lock held. */
144 1.1.1.1.8.2 tls
145 1.1.1.1.8.2 tls bool
146 1.1.1.1.8.2 tls gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
147 1.1.1.1.8.2 tls {
148 1.1.1.1.8.2 tls struct gomp_thread *thr = gomp_thread ();
149 1.1.1.1.8.2 tls struct gomp_work_share *ws = thr->ts.work_share;
150 1.1.1.1.8.2 tls gomp_ull start, end, chunk, left;
151 1.1.1.1.8.2 tls
152 1.1.1.1.8.2 tls start = ws->next_ull;
153 1.1.1.1.8.2 tls if (start == ws->end_ull)
154 1.1.1.1.8.2 tls return false;
155 1.1.1.1.8.2 tls
156 1.1.1.1.8.2 tls chunk = ws->chunk_size_ull;
157 1.1.1.1.8.2 tls left = ws->end_ull - start;
158 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode & 2, 0))
159 1.1.1.1.8.2 tls {
160 1.1.1.1.8.2 tls if (chunk < left)
161 1.1.1.1.8.2 tls chunk = left;
162 1.1.1.1.8.2 tls }
163 1.1.1.1.8.2 tls else
164 1.1.1.1.8.2 tls {
165 1.1.1.1.8.2 tls if (chunk > left)
166 1.1.1.1.8.2 tls chunk = left;
167 1.1.1.1.8.2 tls }
168 1.1.1.1.8.2 tls end = start + chunk;
169 1.1.1.1.8.2 tls
170 1.1.1.1.8.2 tls ws->next_ull = end;
171 1.1.1.1.8.2 tls *pstart = start;
172 1.1.1.1.8.2 tls *pend = end;
173 1.1.1.1.8.2 tls return true;
174 1.1.1.1.8.2 tls }
175 1.1.1.1.8.2 tls
176 1.1.1.1.8.2 tls
177 1.1.1.1.8.2 tls #if defined HAVE_SYNC_BUILTINS && defined __LP64__
178 1.1.1.1.8.2 tls /* Similar, but doesn't require the lock held, and uses compare-and-swap
179 1.1.1.1.8.2 tls instead. Note that the only memory value that changes is ws->next_ull. */
180 1.1.1.1.8.2 tls
181 1.1.1.1.8.2 tls bool
182 1.1.1.1.8.2 tls gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
183 1.1.1.1.8.2 tls {
184 1.1.1.1.8.2 tls struct gomp_thread *thr = gomp_thread ();
185 1.1.1.1.8.2 tls struct gomp_work_share *ws = thr->ts.work_share;
186 1.1.1.1.8.2 tls gomp_ull start, end, nend, chunk;
187 1.1.1.1.8.2 tls
188 1.1.1.1.8.2 tls end = ws->end_ull;
189 1.1.1.1.8.2 tls chunk = ws->chunk_size_ull;
190 1.1.1.1.8.2 tls
191 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode & 1, 1))
192 1.1.1.1.8.2 tls {
193 1.1.1.1.8.2 tls gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode & 2, 0) == 0)
195 1.1.1.1.8.2 tls {
196 1.1.1.1.8.2 tls if (tmp >= end)
197 1.1.1.1.8.2 tls return false;
198 1.1.1.1.8.2 tls nend = tmp + chunk;
199 1.1.1.1.8.2 tls if (nend > end)
200 1.1.1.1.8.2 tls nend = end;
201 1.1.1.1.8.2 tls *pstart = tmp;
202 1.1.1.1.8.2 tls *pend = nend;
203 1.1.1.1.8.2 tls return true;
204 1.1.1.1.8.2 tls }
205 1.1.1.1.8.2 tls else
206 1.1.1.1.8.2 tls {
207 1.1.1.1.8.2 tls if (tmp <= end)
208 1.1.1.1.8.2 tls return false;
209 1.1.1.1.8.2 tls nend = tmp + chunk;
210 1.1.1.1.8.2 tls if (nend < end)
211 1.1.1.1.8.2 tls nend = end;
212 1.1.1.1.8.2 tls *pstart = tmp;
213 1.1.1.1.8.2 tls *pend = nend;
214 1.1.1.1.8.2 tls return true;
215 1.1.1.1.8.2 tls }
216 1.1.1.1.8.2 tls }
217 1.1.1.1.8.2 tls
218 1.1.1.1.8.2 tls start = ws->next_ull;
219 1.1.1.1.8.2 tls while (1)
220 1.1.1.1.8.2 tls {
221 1.1.1.1.8.2 tls gomp_ull left = end - start;
222 1.1.1.1.8.2 tls gomp_ull tmp;
223 1.1.1.1.8.2 tls
224 1.1.1.1.8.2 tls if (start == end)
225 1.1.1.1.8.2 tls return false;
226 1.1.1.1.8.2 tls
227 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode & 2, 0))
228 1.1.1.1.8.2 tls {
229 1.1.1.1.8.2 tls if (chunk < left)
230 1.1.1.1.8.2 tls chunk = left;
231 1.1.1.1.8.2 tls }
232 1.1.1.1.8.2 tls else
233 1.1.1.1.8.2 tls {
234 1.1.1.1.8.2 tls if (chunk > left)
235 1.1.1.1.8.2 tls chunk = left;
236 1.1.1.1.8.2 tls }
237 1.1.1.1.8.2 tls nend = start + chunk;
238 1.1.1.1.8.2 tls
239 1.1.1.1.8.2 tls tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240 1.1.1.1.8.2 tls if (__builtin_expect (tmp == start, 1))
241 1.1.1.1.8.2 tls break;
242 1.1.1.1.8.2 tls
243 1.1.1.1.8.2 tls start = tmp;
244 1.1.1.1.8.2 tls }
245 1.1.1.1.8.2 tls
246 1.1.1.1.8.2 tls *pstart = start;
247 1.1.1.1.8.2 tls *pend = nend;
248 1.1.1.1.8.2 tls return true;
249 1.1.1.1.8.2 tls }
250 1.1.1.1.8.2 tls #endif /* HAVE_SYNC_BUILTINS */
251 1.1.1.1.8.2 tls
252 1.1.1.1.8.2 tls
253 1.1.1.1.8.2 tls /* This function implements the GUIDED scheduling method. Arguments are
254 1.1.1.1.8.2 tls as for gomp_iter_ull_static_next. This function must be called with the
255 1.1.1.1.8.2 tls work share lock held. */
256 1.1.1.1.8.2 tls
257 1.1.1.1.8.2 tls bool
258 1.1.1.1.8.2 tls gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
259 1.1.1.1.8.2 tls {
260 1.1.1.1.8.2 tls struct gomp_thread *thr = gomp_thread ();
261 1.1.1.1.8.2 tls struct gomp_work_share *ws = thr->ts.work_share;
262 1.1.1.1.8.2 tls struct gomp_team *team = thr->ts.team;
263 1.1.1.1.8.2 tls gomp_ull nthreads = team ? team->nthreads : 1;
264 1.1.1.1.8.2 tls gomp_ull n, q;
265 1.1.1.1.8.2 tls gomp_ull start, end;
266 1.1.1.1.8.2 tls
267 1.1.1.1.8.2 tls if (ws->next_ull == ws->end_ull)
268 1.1.1.1.8.2 tls return false;
269 1.1.1.1.8.2 tls
270 1.1.1.1.8.2 tls start = ws->next_ull;
271 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode, 0) == 0)
272 1.1.1.1.8.2 tls n = (ws->end_ull - start) / ws->incr_ull;
273 1.1.1.1.8.2 tls else
274 1.1.1.1.8.2 tls n = (start - ws->end_ull) / -ws->incr_ull;
275 1.1.1.1.8.2 tls q = (n + nthreads - 1) / nthreads;
276 1.1.1.1.8.2 tls
277 1.1.1.1.8.2 tls if (q < ws->chunk_size_ull)
278 1.1.1.1.8.2 tls q = ws->chunk_size_ull;
279 1.1.1.1.8.2 tls if (q <= n)
280 1.1.1.1.8.2 tls end = start + q * ws->incr_ull;
281 1.1.1.1.8.2 tls else
282 1.1.1.1.8.2 tls end = ws->end_ull;
283 1.1.1.1.8.2 tls
284 1.1.1.1.8.2 tls ws->next_ull = end;
285 1.1.1.1.8.2 tls *pstart = start;
286 1.1.1.1.8.2 tls *pend = end;
287 1.1.1.1.8.2 tls return true;
288 1.1.1.1.8.2 tls }
289 1.1.1.1.8.2 tls
290 1.1.1.1.8.2 tls #if defined HAVE_SYNC_BUILTINS && defined __LP64__
291 1.1.1.1.8.2 tls /* Similar, but doesn't require the lock held, and uses compare-and-swap
292 1.1.1.1.8.2 tls instead. Note that the only memory value that changes is ws->next_ull. */
293 1.1.1.1.8.2 tls
294 1.1.1.1.8.2 tls bool
295 1.1.1.1.8.2 tls gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
296 1.1.1.1.8.2 tls {
297 1.1.1.1.8.2 tls struct gomp_thread *thr = gomp_thread ();
298 1.1.1.1.8.2 tls struct gomp_work_share *ws = thr->ts.work_share;
299 1.1.1.1.8.2 tls struct gomp_team *team = thr->ts.team;
300 1.1.1.1.8.2 tls gomp_ull nthreads = team ? team->nthreads : 1;
301 1.1.1.1.8.2 tls gomp_ull start, end, nend, incr;
302 1.1.1.1.8.2 tls gomp_ull chunk_size;
303 1.1.1.1.8.2 tls
304 1.1.1.1.8.2 tls start = ws->next_ull;
305 1.1.1.1.8.2 tls end = ws->end_ull;
306 1.1.1.1.8.2 tls incr = ws->incr_ull;
307 1.1.1.1.8.2 tls chunk_size = ws->chunk_size_ull;
308 1.1.1.1.8.2 tls
309 1.1.1.1.8.2 tls while (1)
310 1.1.1.1.8.2 tls {
311 1.1.1.1.8.2 tls gomp_ull n, q;
312 1.1.1.1.8.2 tls gomp_ull tmp;
313 1.1.1.1.8.2 tls
314 1.1.1.1.8.2 tls if (start == end)
315 1.1.1.1.8.2 tls return false;
316 1.1.1.1.8.2 tls
317 1.1.1.1.8.2 tls if (__builtin_expect (ws->mode, 0) == 0)
318 1.1.1.1.8.2 tls n = (end - start) / incr;
319 1.1.1.1.8.2 tls else
320 1.1.1.1.8.2 tls n = (start - end) / -incr;
321 1.1.1.1.8.2 tls q = (n + nthreads - 1) / nthreads;
322 1.1.1.1.8.2 tls
323 1.1.1.1.8.2 tls if (q < chunk_size)
324 1.1.1.1.8.2 tls q = chunk_size;
325 1.1.1.1.8.2 tls if (__builtin_expect (q <= n, 1))
326 1.1.1.1.8.2 tls nend = start + q * incr;
327 1.1.1.1.8.2 tls else
328 1.1.1.1.8.2 tls nend = end;
329 1.1.1.1.8.2 tls
330 1.1.1.1.8.2 tls tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331 1.1.1.1.8.2 tls if (__builtin_expect (tmp == start, 1))
332 1.1.1.1.8.2 tls break;
333 1.1.1.1.8.2 tls
334 1.1.1.1.8.2 tls start = tmp;
335 1.1.1.1.8.2 tls }
336 1.1.1.1.8.2 tls
337 1.1.1.1.8.2 tls *pstart = start;
338 1.1.1.1.8.2 tls *pend = nend;
339 1.1.1.1.8.2 tls return true;
340 1.1.1.1.8.2 tls }
341 1.1.1.1.8.2 tls #endif /* HAVE_SYNC_BUILTINS */
342