1 1.7 mrg /* Copyright (C) 2015-2022 Free Software Foundation, Inc. 2 1.1 mrg Contributed by Jakub Jelinek <jakub (at) redhat.com>. 3 1.1 mrg 4 1.1 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.1 mrg (libgomp). 6 1.1 mrg 7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1 mrg more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg /* This file handles the taskloop construct. It is included twice, once 27 1.1 mrg for the long and once for unsigned long long variant. */ 28 1.1 mrg 29 1.1 mrg /* Called when encountering an explicit task directive. If IF_CLAUSE is 30 1.1 mrg false, then we must not delay in executing the task. If UNTIED is true, 31 1.1 mrg then the task may be executed by any member of the team. */ 32 1.1 mrg 33 1.1 mrg void 34 1.1 mrg GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), 35 1.1 mrg long arg_size, long arg_align, unsigned flags, 36 1.1 mrg unsigned long num_tasks, int priority, 37 1.1 mrg TYPE start, TYPE end, TYPE step) 38 1.1 mrg { 39 1.1 mrg struct gomp_thread *thr = gomp_thread (); 40 1.1 mrg struct gomp_team *team = thr->ts.team; 41 1.1 mrg 42 1.1 mrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES 43 1.1 mrg /* If pthread_mutex_* is used for omp_*lock*, then each task must be 44 1.1 mrg tied to one thread all the time. This means UNTIED tasks must be 45 1.1 mrg tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN 46 1.1 mrg might be running on different thread than FN. */ 47 1.1 mrg if (cpyfn) 48 1.1 mrg flags &= ~GOMP_TASK_FLAG_IF; 49 1.1 mrg flags &= ~GOMP_TASK_FLAG_UNTIED; 50 1.1 mrg #endif 51 1.1 mrg 52 1.1 mrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 53 1.1 mrg if (team && gomp_team_barrier_cancelled (&team->barrier)) 54 1.6 mrg { 55 1.6 mrg early_return: 56 1.6 mrg if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION)) 57 1.6 mrg == GOMP_TASK_FLAG_REDUCTION) 58 1.6 mrg { 59 1.6 mrg struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; 60 1.6 mrg uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; 61 1.6 mrg /* Tell callers GOMP_taskgroup_reduction_register has not been 62 1.6 mrg called. */ 63 1.6 mrg ptr[2] = 0; 64 1.6 mrg } 65 1.6 mrg return; 66 1.6 mrg } 67 1.1 mrg 68 1.1 mrg #ifdef TYPE_is_long 69 1.1 mrg TYPE s = step; 70 1.1 mrg if (step > 0) 71 1.1 mrg { 72 1.1 mrg if (start >= end) 73 1.6 mrg goto early_return; 74 1.1 mrg s--; 75 1.1 mrg } 76 1.1 mrg else 77 1.1 mrg { 78 1.1 mrg if (start <= end) 79 1.6 mrg goto early_return; 80 1.1 mrg s++; 81 1.1 mrg } 82 1.1 mrg UTYPE n = (end - start + s) / step; 83 1.1 mrg #else 84 1.1 mrg UTYPE n; 85 1.1 mrg if (flags & GOMP_TASK_FLAG_UP) 86 1.1 mrg { 87 1.1 mrg if (start >= end) 88 1.6 mrg goto early_return; 89 1.1 mrg n = (end - start + step - 1) / step; 90 1.1 mrg } 91 1.1 mrg else 92 1.1 mrg { 93 1.1 mrg if (start <= end) 94 1.6 mrg goto early_return; 95 1.1 mrg n = (start - end - step - 1) / -step; 96 1.1 mrg } 97 1.1 mrg #endif 98 1.1 mrg 99 1.1 mrg TYPE task_step = step; 100 1.7 mrg TYPE nfirst_task_step = step; 101 1.1 mrg unsigned long nfirst = n; 102 1.1 mrg if (flags & GOMP_TASK_FLAG_GRAINSIZE) 103 1.1 mrg { 104 1.1 mrg unsigned long grainsize = num_tasks; 105 1.1 mrg #ifdef TYPE_is_long 106 1.1 mrg num_tasks = n / grainsize; 107 1.1 mrg #else 108 1.1 mrg UTYPE ndiv = n / grainsize; 109 1.1 mrg num_tasks = ndiv; 110 1.1 mrg if (num_tasks != ndiv) 111 1.1 mrg num_tasks = ~0UL; 112 1.1 mrg #endif 113 1.7 mrg if ((flags & GOMP_TASK_FLAG_STRICT) 114 1.7 mrg && num_tasks != ~0ULL) 115 1.7 mrg { 116 1.7 mrg UTYPE mod = n % grainsize; 117 1.7 mrg task_step = (TYPE) grainsize * step; 118 1.7 mrg if (mod) 119 1.7 mrg { 120 1.7 mrg num_tasks++; 121 1.7 mrg nfirst_task_step = (TYPE) mod * step; 122 1.7 mrg if (num_tasks == 1) 123 1.7 mrg task_step = nfirst_task_step; 124 1.7 mrg else 125 1.7 mrg nfirst = num_tasks - 2; 126 1.7 mrg } 127 1.7 mrg } 128 1.7 mrg else if (num_tasks <= 1) 129 1.1 mrg { 130 1.1 mrg num_tasks = 1; 131 1.1 mrg task_step = end - start; 132 1.1 mrg } 133 1.1 mrg else if (num_tasks >= grainsize 134 1.1 mrg #ifndef TYPE_is_long 135 1.1 mrg && num_tasks != ~0UL 136 1.1 mrg #endif 137 1.1 mrg ) 138 1.1 mrg { 139 1.1 mrg UTYPE mul = num_tasks * grainsize; 140 1.1 mrg task_step = (TYPE) grainsize * step; 141 1.1 mrg if (mul != n) 142 1.1 mrg { 143 1.7 mrg nfirst_task_step = task_step; 144 1.1 mrg task_step += step; 145 1.1 mrg nfirst = n - mul - 1; 146 1.1 mrg } 147 1.1 mrg } 148 1.1 mrg else 149 1.1 mrg { 150 1.1 mrg UTYPE div = n / num_tasks; 151 1.1 mrg UTYPE mod = n % num_tasks; 152 1.1 mrg task_step = (TYPE) div * step; 153 1.1 mrg if (mod) 154 1.1 mrg { 155 1.7 mrg nfirst_task_step = task_step; 156 1.1 mrg task_step += step; 157 1.1 mrg nfirst = mod - 1; 158 1.1 mrg } 159 1.1 mrg } 160 1.1 mrg } 161 1.1 mrg else 162 1.1 mrg { 163 1.1 mrg if (num_tasks == 0) 164 1.1 mrg num_tasks = team ? team->nthreads : 1; 165 1.1 mrg if (num_tasks >= n) 166 1.1 mrg num_tasks = n; 167 1.1 mrg else 168 1.1 mrg { 169 1.1 mrg UTYPE div = n / num_tasks; 170 1.1 mrg UTYPE mod = n % num_tasks; 171 1.1 mrg task_step = (TYPE) div * step; 172 1.1 mrg if (mod) 173 1.1 mrg { 174 1.7 mrg nfirst_task_step = task_step; 175 1.1 mrg task_step += step; 176 1.1 mrg nfirst = mod - 1; 177 1.1 mrg } 178 1.1 mrg } 179 1.1 mrg } 180 1.1 mrg 181 1.1 mrg if (flags & GOMP_TASK_FLAG_NOGROUP) 182 1.1 mrg { 183 1.5 mrg if (__builtin_expect (gomp_cancel_var, 0) 184 1.5 mrg && thr->task 185 1.5 mrg && thr->task->taskgroup) 186 1.5 mrg { 187 1.5 mrg if (thr->task->taskgroup->cancelled) 188 1.5 mrg return; 189 1.5 mrg if (thr->task->taskgroup->workshare 190 1.5 mrg && thr->task->taskgroup->prev 191 1.5 mrg && thr->task->taskgroup->prev->cancelled) 192 1.5 mrg return; 193 1.5 mrg } 194 1.1 mrg } 195 1.1 mrg else 196 1.5 mrg { 197 1.5 mrg ialias_call (GOMP_taskgroup_start) (); 198 1.5 mrg if (flags & GOMP_TASK_FLAG_REDUCTION) 199 1.5 mrg { 200 1.5 mrg struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; 201 1.5 mrg uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; 202 1.5 mrg ialias_call (GOMP_taskgroup_reduction_register) (ptr); 203 1.5 mrg } 204 1.5 mrg } 205 1.1 mrg 206 1.1 mrg if (priority > gomp_max_task_priority_var) 207 1.1 mrg priority = gomp_max_task_priority_var; 208 1.1 mrg 209 1.1 mrg if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL 210 1.1 mrg || (thr->task && thr->task->final_task) 211 1.1 mrg || team->task_count + num_tasks > 64 * team->nthreads) 212 1.1 mrg { 213 1.1 mrg unsigned long i; 214 1.1 mrg if (__builtin_expect (cpyfn != NULL, 0)) 215 1.1 mrg { 216 1.1 mrg struct gomp_task task[num_tasks]; 217 1.1 mrg struct gomp_task *parent = thr->task; 218 1.1 mrg arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); 219 1.1 mrg char buf[num_tasks * arg_size + arg_align - 1]; 220 1.1 mrg char *arg = (char *) (((uintptr_t) buf + arg_align - 1) 221 1.1 mrg & ~(uintptr_t) (arg_align - 1)); 222 1.1 mrg char *orig_arg = arg; 223 1.1 mrg for (i = 0; i < num_tasks; i++) 224 1.1 mrg { 225 1.1 mrg gomp_init_task (&task[i], parent, gomp_icv (false)); 226 1.1 mrg task[i].priority = priority; 227 1.1 mrg task[i].kind = GOMP_TASK_UNDEFERRED; 228 1.1 mrg task[i].final_task = (thr->task && thr->task->final_task) 229 1.1 mrg || (flags & GOMP_TASK_FLAG_FINAL); 230 1.1 mrg if (thr->task) 231 1.1 mrg { 232 1.1 mrg task[i].in_tied_task = thr->task->in_tied_task; 233 1.1 mrg task[i].taskgroup = thr->task->taskgroup; 234 1.1 mrg } 235 1.1 mrg thr->task = &task[i]; 236 1.1 mrg cpyfn (arg, data); 237 1.1 mrg arg += arg_size; 238 1.1 mrg } 239 1.1 mrg arg = orig_arg; 240 1.1 mrg for (i = 0; i < num_tasks; i++) 241 1.1 mrg { 242 1.1 mrg thr->task = &task[i]; 243 1.1 mrg ((TYPE *)arg)[0] = start; 244 1.1 mrg start += task_step; 245 1.1 mrg ((TYPE *)arg)[1] = start; 246 1.1 mrg if (i == nfirst) 247 1.7 mrg task_step = nfirst_task_step; 248 1.1 mrg fn (arg); 249 1.1 mrg arg += arg_size; 250 1.1 mrg if (!priority_queue_empty_p (&task[i].children_queue, 251 1.1 mrg MEMMODEL_RELAXED)) 252 1.1 mrg { 253 1.1 mrg gomp_mutex_lock (&team->task_lock); 254 1.1 mrg gomp_clear_parent (&task[i].children_queue); 255 1.1 mrg gomp_mutex_unlock (&team->task_lock); 256 1.1 mrg } 257 1.1 mrg gomp_end_task (); 258 1.1 mrg } 259 1.1 mrg } 260 1.1 mrg else 261 1.1 mrg for (i = 0; i < num_tasks; i++) 262 1.1 mrg { 263 1.1 mrg struct gomp_task task; 264 1.1 mrg 265 1.1 mrg gomp_init_task (&task, thr->task, gomp_icv (false)); 266 1.1 mrg task.priority = priority; 267 1.1 mrg task.kind = GOMP_TASK_UNDEFERRED; 268 1.1 mrg task.final_task = (thr->task && thr->task->final_task) 269 1.1 mrg || (flags & GOMP_TASK_FLAG_FINAL); 270 1.1 mrg if (thr->task) 271 1.1 mrg { 272 1.1 mrg task.in_tied_task = thr->task->in_tied_task; 273 1.1 mrg task.taskgroup = thr->task->taskgroup; 274 1.1 mrg } 275 1.1 mrg thr->task = &task; 276 1.1 mrg ((TYPE *)data)[0] = start; 277 1.1 mrg start += task_step; 278 1.1 mrg ((TYPE *)data)[1] = start; 279 1.1 mrg if (i == nfirst) 280 1.7 mrg task_step = nfirst_task_step; 281 1.1 mrg fn (data); 282 1.1 mrg if (!priority_queue_empty_p (&task.children_queue, 283 1.1 mrg MEMMODEL_RELAXED)) 284 1.1 mrg { 285 1.1 mrg gomp_mutex_lock (&team->task_lock); 286 1.1 mrg gomp_clear_parent (&task.children_queue); 287 1.1 mrg gomp_mutex_unlock (&team->task_lock); 288 1.1 mrg } 289 1.1 mrg gomp_end_task (); 290 1.1 mrg } 291 1.1 mrg } 292 1.1 mrg else 293 1.1 mrg { 294 1.1 mrg struct gomp_task *tasks[num_tasks]; 295 1.1 mrg struct gomp_task *parent = thr->task; 296 1.1 mrg struct gomp_taskgroup *taskgroup = parent->taskgroup; 297 1.1 mrg char *arg; 298 1.1 mrg int do_wake; 299 1.1 mrg unsigned long i; 300 1.1 mrg 301 1.1 mrg for (i = 0; i < num_tasks; i++) 302 1.1 mrg { 303 1.1 mrg struct gomp_task *task 304 1.1 mrg = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); 305 1.1 mrg tasks[i] = task; 306 1.1 mrg arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) 307 1.1 mrg & ~(uintptr_t) (arg_align - 1)); 308 1.1 mrg gomp_init_task (task, parent, gomp_icv (false)); 309 1.1 mrg task->priority = priority; 310 1.1 mrg task->kind = GOMP_TASK_UNDEFERRED; 311 1.1 mrg task->in_tied_task = parent->in_tied_task; 312 1.1 mrg task->taskgroup = taskgroup; 313 1.1 mrg thr->task = task; 314 1.1 mrg if (cpyfn) 315 1.1 mrg { 316 1.1 mrg cpyfn (arg, data); 317 1.1 mrg task->copy_ctors_done = true; 318 1.1 mrg } 319 1.1 mrg else 320 1.1 mrg memcpy (arg, data, arg_size); 321 1.1 mrg ((TYPE *)arg)[0] = start; 322 1.1 mrg start += task_step; 323 1.1 mrg ((TYPE *)arg)[1] = start; 324 1.1 mrg if (i == nfirst) 325 1.7 mrg task_step = nfirst_task_step; 326 1.1 mrg thr->task = parent; 327 1.1 mrg task->kind = GOMP_TASK_WAITING; 328 1.1 mrg task->fn = fn; 329 1.1 mrg task->fn_data = arg; 330 1.1 mrg task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; 331 1.1 mrg } 332 1.1 mrg gomp_mutex_lock (&team->task_lock); 333 1.1 mrg /* If parallel or taskgroup has been cancelled, don't start new 334 1.1 mrg tasks. */ 335 1.5 mrg if (__builtin_expect (gomp_cancel_var, 0) 336 1.5 mrg && cpyfn == NULL) 337 1.1 mrg { 338 1.5 mrg if (gomp_team_barrier_cancelled (&team->barrier)) 339 1.5 mrg { 340 1.5 mrg do_cancel: 341 1.5 mrg gomp_mutex_unlock (&team->task_lock); 342 1.5 mrg for (i = 0; i < num_tasks; i++) 343 1.5 mrg { 344 1.5 mrg gomp_finish_task (tasks[i]); 345 1.5 mrg free (tasks[i]); 346 1.5 mrg } 347 1.5 mrg if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 348 1.5 mrg ialias_call (GOMP_taskgroup_end) (); 349 1.5 mrg return; 350 1.5 mrg } 351 1.5 mrg if (taskgroup) 352 1.1 mrg { 353 1.5 mrg if (taskgroup->cancelled) 354 1.5 mrg goto do_cancel; 355 1.5 mrg if (taskgroup->workshare 356 1.5 mrg && taskgroup->prev 357 1.5 mrg && taskgroup->prev->cancelled) 358 1.5 mrg goto do_cancel; 359 1.1 mrg } 360 1.1 mrg } 361 1.1 mrg if (taskgroup) 362 1.1 mrg taskgroup->num_children += num_tasks; 363 1.1 mrg for (i = 0; i < num_tasks; i++) 364 1.1 mrg { 365 1.1 mrg struct gomp_task *task = tasks[i]; 366 1.1 mrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue, 367 1.1 mrg task, priority, 368 1.1 mrg PRIORITY_INSERT_BEGIN, 369 1.1 mrg /*last_parent_depends_on=*/false, 370 1.1 mrg task->parent_depends_on); 371 1.1 mrg if (taskgroup) 372 1.1 mrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 373 1.1 mrg task, priority, PRIORITY_INSERT_BEGIN, 374 1.1 mrg /*last_parent_depends_on=*/false, 375 1.1 mrg task->parent_depends_on); 376 1.1 mrg priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, 377 1.1 mrg PRIORITY_INSERT_END, 378 1.1 mrg /*last_parent_depends_on=*/false, 379 1.1 mrg task->parent_depends_on); 380 1.1 mrg ++team->task_count; 381 1.1 mrg ++team->task_queued_count; 382 1.1 mrg } 383 1.1 mrg gomp_team_barrier_set_task_pending (&team->barrier); 384 1.1 mrg if (team->task_running_count + !parent->in_tied_task 385 1.1 mrg < team->nthreads) 386 1.1 mrg { 387 1.1 mrg do_wake = team->nthreads - team->task_running_count 388 1.1 mrg - !parent->in_tied_task; 389 1.1 mrg if ((unsigned long) do_wake > num_tasks) 390 1.1 mrg do_wake = num_tasks; 391 1.1 mrg } 392 1.1 mrg else 393 1.1 mrg do_wake = 0; 394 1.1 mrg gomp_mutex_unlock (&team->task_lock); 395 1.1 mrg if (do_wake) 396 1.1 mrg gomp_team_barrier_wake (&team->barrier, do_wake); 397 1.1 mrg } 398 1.1 mrg if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 399 1.1 mrg ialias_call (GOMP_taskgroup_end) (); 400 1.1 mrg } 401