taskloop.c revision 1.7 1 1.7 mrg /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>.
3 1.1 mrg
4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library
5 1.1 mrg (libgomp).
6 1.1 mrg
7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published by
9 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
10 1.1 mrg any later version.
11 1.1 mrg
12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 1.1 mrg more details.
16 1.1 mrg
17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional
18 1.1 mrg permissions described in the GCC Runtime Library Exception, version
19 1.1 mrg 3.1, as published by the Free Software Foundation.
20 1.1 mrg
21 1.1 mrg You should have received a copy of the GNU General Public License and
22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program;
23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 1.1 mrg <http://www.gnu.org/licenses/>. */
25 1.1 mrg
26 1.1 mrg /* This file handles the taskloop construct. It is included twice, once
27 1.1 mrg for the long and once for unsigned long long variant. */
28 1.1 mrg
29 1.1 mrg /* Called when encountering an explicit task directive. If IF_CLAUSE is
30 1.1 mrg false, then we must not delay in executing the task. If UNTIED is true,
31 1.1 mrg then the task may be executed by any member of the team. */
32 1.1 mrg
33 1.1 mrg void
34 1.1 mrg GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35 1.1 mrg long arg_size, long arg_align, unsigned flags,
36 1.1 mrg unsigned long num_tasks, int priority,
37 1.1 mrg TYPE start, TYPE end, TYPE step)
38 1.1 mrg {
39 1.1 mrg struct gomp_thread *thr = gomp_thread ();
40 1.1 mrg struct gomp_team *team = thr->ts.team;
41 1.1 mrg
42 1.1 mrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43 1.1 mrg /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44 1.1 mrg tied to one thread all the time. This means UNTIED tasks must be
45 1.1 mrg tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46 1.1 mrg might be running on different thread than FN. */
47 1.1 mrg if (cpyfn)
48 1.1 mrg flags &= ~GOMP_TASK_FLAG_IF;
49 1.1 mrg flags &= ~GOMP_TASK_FLAG_UNTIED;
50 1.1 mrg #endif
51 1.1 mrg
52 1.1 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
53 1.1 mrg if (team && gomp_team_barrier_cancelled (&team->barrier))
54 1.6 mrg {
55 1.6 mrg early_return:
56 1.6 mrg if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57 1.6 mrg == GOMP_TASK_FLAG_REDUCTION)
58 1.6 mrg {
59 1.6 mrg struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60 1.6 mrg uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61 1.6 mrg /* Tell callers GOMP_taskgroup_reduction_register has not been
62 1.6 mrg called. */
63 1.6 mrg ptr[2] = 0;
64 1.6 mrg }
65 1.6 mrg return;
66 1.6 mrg }
67 1.1 mrg
68 1.1 mrg #ifdef TYPE_is_long
69 1.1 mrg TYPE s = step;
70 1.1 mrg if (step > 0)
71 1.1 mrg {
72 1.1 mrg if (start >= end)
73 1.6 mrg goto early_return;
74 1.1 mrg s--;
75 1.1 mrg }
76 1.1 mrg else
77 1.1 mrg {
78 1.1 mrg if (start <= end)
79 1.6 mrg goto early_return;
80 1.1 mrg s++;
81 1.1 mrg }
82 1.1 mrg UTYPE n = (end - start + s) / step;
83 1.1 mrg #else
84 1.1 mrg UTYPE n;
85 1.1 mrg if (flags & GOMP_TASK_FLAG_UP)
86 1.1 mrg {
87 1.1 mrg if (start >= end)
88 1.6 mrg goto early_return;
89 1.1 mrg n = (end - start + step - 1) / step;
90 1.1 mrg }
91 1.1 mrg else
92 1.1 mrg {
93 1.1 mrg if (start <= end)
94 1.6 mrg goto early_return;
95 1.1 mrg n = (start - end - step - 1) / -step;
96 1.1 mrg }
97 1.1 mrg #endif
98 1.1 mrg
99 1.1 mrg TYPE task_step = step;
100 1.7 mrg TYPE nfirst_task_step = step;
101 1.1 mrg unsigned long nfirst = n;
102 1.1 mrg if (flags & GOMP_TASK_FLAG_GRAINSIZE)
103 1.1 mrg {
104 1.1 mrg unsigned long grainsize = num_tasks;
105 1.1 mrg #ifdef TYPE_is_long
106 1.1 mrg num_tasks = n / grainsize;
107 1.1 mrg #else
108 1.1 mrg UTYPE ndiv = n / grainsize;
109 1.1 mrg num_tasks = ndiv;
110 1.1 mrg if (num_tasks != ndiv)
111 1.1 mrg num_tasks = ~0UL;
112 1.1 mrg #endif
113 1.7 mrg if ((flags & GOMP_TASK_FLAG_STRICT)
114 1.7 mrg && num_tasks != ~0ULL)
115 1.7 mrg {
116 1.7 mrg UTYPE mod = n % grainsize;
117 1.7 mrg task_step = (TYPE) grainsize * step;
118 1.7 mrg if (mod)
119 1.7 mrg {
120 1.7 mrg num_tasks++;
121 1.7 mrg nfirst_task_step = (TYPE) mod * step;
122 1.7 mrg if (num_tasks == 1)
123 1.7 mrg task_step = nfirst_task_step;
124 1.7 mrg else
125 1.7 mrg nfirst = num_tasks - 2;
126 1.7 mrg }
127 1.7 mrg }
128 1.7 mrg else if (num_tasks <= 1)
129 1.1 mrg {
130 1.1 mrg num_tasks = 1;
131 1.1 mrg task_step = end - start;
132 1.1 mrg }
133 1.1 mrg else if (num_tasks >= grainsize
134 1.1 mrg #ifndef TYPE_is_long
135 1.1 mrg && num_tasks != ~0UL
136 1.1 mrg #endif
137 1.1 mrg )
138 1.1 mrg {
139 1.1 mrg UTYPE mul = num_tasks * grainsize;
140 1.1 mrg task_step = (TYPE) grainsize * step;
141 1.1 mrg if (mul != n)
142 1.1 mrg {
143 1.7 mrg nfirst_task_step = task_step;
144 1.1 mrg task_step += step;
145 1.1 mrg nfirst = n - mul - 1;
146 1.1 mrg }
147 1.1 mrg }
148 1.1 mrg else
149 1.1 mrg {
150 1.1 mrg UTYPE div = n / num_tasks;
151 1.1 mrg UTYPE mod = n % num_tasks;
152 1.1 mrg task_step = (TYPE) div * step;
153 1.1 mrg if (mod)
154 1.1 mrg {
155 1.7 mrg nfirst_task_step = task_step;
156 1.1 mrg task_step += step;
157 1.1 mrg nfirst = mod - 1;
158 1.1 mrg }
159 1.1 mrg }
160 1.1 mrg }
161 1.1 mrg else
162 1.1 mrg {
163 1.1 mrg if (num_tasks == 0)
164 1.1 mrg num_tasks = team ? team->nthreads : 1;
165 1.1 mrg if (num_tasks >= n)
166 1.1 mrg num_tasks = n;
167 1.1 mrg else
168 1.1 mrg {
169 1.1 mrg UTYPE div = n / num_tasks;
170 1.1 mrg UTYPE mod = n % num_tasks;
171 1.1 mrg task_step = (TYPE) div * step;
172 1.1 mrg if (mod)
173 1.1 mrg {
174 1.7 mrg nfirst_task_step = task_step;
175 1.1 mrg task_step += step;
176 1.1 mrg nfirst = mod - 1;
177 1.1 mrg }
178 1.1 mrg }
179 1.1 mrg }
180 1.1 mrg
181 1.1 mrg if (flags & GOMP_TASK_FLAG_NOGROUP)
182 1.1 mrg {
183 1.5 mrg if (__builtin_expect (gomp_cancel_var, 0)
184 1.5 mrg && thr->task
185 1.5 mrg && thr->task->taskgroup)
186 1.5 mrg {
187 1.5 mrg if (thr->task->taskgroup->cancelled)
188 1.5 mrg return;
189 1.5 mrg if (thr->task->taskgroup->workshare
190 1.5 mrg && thr->task->taskgroup->prev
191 1.5 mrg && thr->task->taskgroup->prev->cancelled)
192 1.5 mrg return;
193 1.5 mrg }
194 1.1 mrg }
195 1.1 mrg else
196 1.5 mrg {
197 1.5 mrg ialias_call (GOMP_taskgroup_start) ();
198 1.5 mrg if (flags & GOMP_TASK_FLAG_REDUCTION)
199 1.5 mrg {
200 1.5 mrg struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
201 1.5 mrg uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
202 1.5 mrg ialias_call (GOMP_taskgroup_reduction_register) (ptr);
203 1.5 mrg }
204 1.5 mrg }
205 1.1 mrg
206 1.1 mrg if (priority > gomp_max_task_priority_var)
207 1.1 mrg priority = gomp_max_task_priority_var;
208 1.1 mrg
209 1.1 mrg if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
210 1.1 mrg || (thr->task && thr->task->final_task)
211 1.1 mrg || team->task_count + num_tasks > 64 * team->nthreads)
212 1.1 mrg {
213 1.1 mrg unsigned long i;
214 1.1 mrg if (__builtin_expect (cpyfn != NULL, 0))
215 1.1 mrg {
216 1.1 mrg struct gomp_task task[num_tasks];
217 1.1 mrg struct gomp_task *parent = thr->task;
218 1.1 mrg arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
219 1.1 mrg char buf[num_tasks * arg_size + arg_align - 1];
220 1.1 mrg char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
221 1.1 mrg & ~(uintptr_t) (arg_align - 1));
222 1.1 mrg char *orig_arg = arg;
223 1.1 mrg for (i = 0; i < num_tasks; i++)
224 1.1 mrg {
225 1.1 mrg gomp_init_task (&task[i], parent, gomp_icv (false));
226 1.1 mrg task[i].priority = priority;
227 1.1 mrg task[i].kind = GOMP_TASK_UNDEFERRED;
228 1.1 mrg task[i].final_task = (thr->task && thr->task->final_task)
229 1.1 mrg || (flags & GOMP_TASK_FLAG_FINAL);
230 1.1 mrg if (thr->task)
231 1.1 mrg {
232 1.1 mrg task[i].in_tied_task = thr->task->in_tied_task;
233 1.1 mrg task[i].taskgroup = thr->task->taskgroup;
234 1.1 mrg }
235 1.1 mrg thr->task = &task[i];
236 1.1 mrg cpyfn (arg, data);
237 1.1 mrg arg += arg_size;
238 1.1 mrg }
239 1.1 mrg arg = orig_arg;
240 1.1 mrg for (i = 0; i < num_tasks; i++)
241 1.1 mrg {
242 1.1 mrg thr->task = &task[i];
243 1.1 mrg ((TYPE *)arg)[0] = start;
244 1.1 mrg start += task_step;
245 1.1 mrg ((TYPE *)arg)[1] = start;
246 1.1 mrg if (i == nfirst)
247 1.7 mrg task_step = nfirst_task_step;
248 1.1 mrg fn (arg);
249 1.1 mrg arg += arg_size;
250 1.1 mrg if (!priority_queue_empty_p (&task[i].children_queue,
251 1.1 mrg MEMMODEL_RELAXED))
252 1.1 mrg {
253 1.1 mrg gomp_mutex_lock (&team->task_lock);
254 1.1 mrg gomp_clear_parent (&task[i].children_queue);
255 1.1 mrg gomp_mutex_unlock (&team->task_lock);
256 1.1 mrg }
257 1.1 mrg gomp_end_task ();
258 1.1 mrg }
259 1.1 mrg }
260 1.1 mrg else
261 1.1 mrg for (i = 0; i < num_tasks; i++)
262 1.1 mrg {
263 1.1 mrg struct gomp_task task;
264 1.1 mrg
265 1.1 mrg gomp_init_task (&task, thr->task, gomp_icv (false));
266 1.1 mrg task.priority = priority;
267 1.1 mrg task.kind = GOMP_TASK_UNDEFERRED;
268 1.1 mrg task.final_task = (thr->task && thr->task->final_task)
269 1.1 mrg || (flags & GOMP_TASK_FLAG_FINAL);
270 1.1 mrg if (thr->task)
271 1.1 mrg {
272 1.1 mrg task.in_tied_task = thr->task->in_tied_task;
273 1.1 mrg task.taskgroup = thr->task->taskgroup;
274 1.1 mrg }
275 1.1 mrg thr->task = &task;
276 1.1 mrg ((TYPE *)data)[0] = start;
277 1.1 mrg start += task_step;
278 1.1 mrg ((TYPE *)data)[1] = start;
279 1.1 mrg if (i == nfirst)
280 1.7 mrg task_step = nfirst_task_step;
281 1.1 mrg fn (data);
282 1.1 mrg if (!priority_queue_empty_p (&task.children_queue,
283 1.1 mrg MEMMODEL_RELAXED))
284 1.1 mrg {
285 1.1 mrg gomp_mutex_lock (&team->task_lock);
286 1.1 mrg gomp_clear_parent (&task.children_queue);
287 1.1 mrg gomp_mutex_unlock (&team->task_lock);
288 1.1 mrg }
289 1.1 mrg gomp_end_task ();
290 1.1 mrg }
291 1.1 mrg }
292 1.1 mrg else
293 1.1 mrg {
294 1.1 mrg struct gomp_task *tasks[num_tasks];
295 1.1 mrg struct gomp_task *parent = thr->task;
296 1.1 mrg struct gomp_taskgroup *taskgroup = parent->taskgroup;
297 1.1 mrg char *arg;
298 1.1 mrg int do_wake;
299 1.1 mrg unsigned long i;
300 1.1 mrg
301 1.1 mrg for (i = 0; i < num_tasks; i++)
302 1.1 mrg {
303 1.1 mrg struct gomp_task *task
304 1.1 mrg = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
305 1.1 mrg tasks[i] = task;
306 1.1 mrg arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
307 1.1 mrg & ~(uintptr_t) (arg_align - 1));
308 1.1 mrg gomp_init_task (task, parent, gomp_icv (false));
309 1.1 mrg task->priority = priority;
310 1.1 mrg task->kind = GOMP_TASK_UNDEFERRED;
311 1.1 mrg task->in_tied_task = parent->in_tied_task;
312 1.1 mrg task->taskgroup = taskgroup;
313 1.1 mrg thr->task = task;
314 1.1 mrg if (cpyfn)
315 1.1 mrg {
316 1.1 mrg cpyfn (arg, data);
317 1.1 mrg task->copy_ctors_done = true;
318 1.1 mrg }
319 1.1 mrg else
320 1.1 mrg memcpy (arg, data, arg_size);
321 1.1 mrg ((TYPE *)arg)[0] = start;
322 1.1 mrg start += task_step;
323 1.1 mrg ((TYPE *)arg)[1] = start;
324 1.1 mrg if (i == nfirst)
325 1.7 mrg task_step = nfirst_task_step;
326 1.1 mrg thr->task = parent;
327 1.1 mrg task->kind = GOMP_TASK_WAITING;
328 1.1 mrg task->fn = fn;
329 1.1 mrg task->fn_data = arg;
330 1.1 mrg task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
331 1.1 mrg }
332 1.1 mrg gomp_mutex_lock (&team->task_lock);
333 1.1 mrg /* If parallel or taskgroup has been cancelled, don't start new
334 1.1 mrg tasks. */
335 1.5 mrg if (__builtin_expect (gomp_cancel_var, 0)
336 1.5 mrg && cpyfn == NULL)
337 1.1 mrg {
338 1.5 mrg if (gomp_team_barrier_cancelled (&team->barrier))
339 1.5 mrg {
340 1.5 mrg do_cancel:
341 1.5 mrg gomp_mutex_unlock (&team->task_lock);
342 1.5 mrg for (i = 0; i < num_tasks; i++)
343 1.5 mrg {
344 1.5 mrg gomp_finish_task (tasks[i]);
345 1.5 mrg free (tasks[i]);
346 1.5 mrg }
347 1.5 mrg if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
348 1.5 mrg ialias_call (GOMP_taskgroup_end) ();
349 1.5 mrg return;
350 1.5 mrg }
351 1.5 mrg if (taskgroup)
352 1.1 mrg {
353 1.5 mrg if (taskgroup->cancelled)
354 1.5 mrg goto do_cancel;
355 1.5 mrg if (taskgroup->workshare
356 1.5 mrg && taskgroup->prev
357 1.5 mrg && taskgroup->prev->cancelled)
358 1.5 mrg goto do_cancel;
359 1.1 mrg }
360 1.1 mrg }
361 1.1 mrg if (taskgroup)
362 1.1 mrg taskgroup->num_children += num_tasks;
363 1.1 mrg for (i = 0; i < num_tasks; i++)
364 1.1 mrg {
365 1.1 mrg struct gomp_task *task = tasks[i];
366 1.1 mrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
367 1.1 mrg task, priority,
368 1.1 mrg PRIORITY_INSERT_BEGIN,
369 1.1 mrg /*last_parent_depends_on=*/false,
370 1.1 mrg task->parent_depends_on);
371 1.1 mrg if (taskgroup)
372 1.1 mrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
373 1.1 mrg task, priority, PRIORITY_INSERT_BEGIN,
374 1.1 mrg /*last_parent_depends_on=*/false,
375 1.1 mrg task->parent_depends_on);
376 1.1 mrg priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
377 1.1 mrg PRIORITY_INSERT_END,
378 1.1 mrg /*last_parent_depends_on=*/false,
379 1.1 mrg task->parent_depends_on);
380 1.1 mrg ++team->task_count;
381 1.1 mrg ++team->task_queued_count;
382 1.1 mrg }
383 1.1 mrg gomp_team_barrier_set_task_pending (&team->barrier);
384 1.1 mrg if (team->task_running_count + !parent->in_tied_task
385 1.1 mrg < team->nthreads)
386 1.1 mrg {
387 1.1 mrg do_wake = team->nthreads - team->task_running_count
388 1.1 mrg - !parent->in_tied_task;
389 1.1 mrg if ((unsigned long) do_wake > num_tasks)
390 1.1 mrg do_wake = num_tasks;
391 1.1 mrg }
392 1.1 mrg else
393 1.1 mrg do_wake = 0;
394 1.1 mrg gomp_mutex_unlock (&team->task_lock);
395 1.1 mrg if (do_wake)
396 1.1 mrg gomp_team_barrier_wake (&team->barrier, do_wake);
397 1.1 mrg }
398 1.1 mrg if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
399 1.1 mrg ialias_call (GOMP_taskgroup_end) ();
400 1.1 mrg }
401