1 1.1.1.12 mrg /* Copyright (C) 2005-2024 Free Software Foundation, Inc. 2 1.1 mrg Contributed by Richard Henderson <rth (at) redhat.com>. 3 1.1 mrg 4 1.1.1.3 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.1.1.3 mrg (libgomp). 6 1.1 mrg 7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1 mrg more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg /* This file handles the LOOP (FOR/DO) construct. */ 27 1.1 mrg 28 1.1 mrg #include <limits.h> 29 1.1 mrg #include <stdlib.h> 30 1.1.1.9 mrg #include <string.h> 31 1.1 mrg #include "libgomp.h" 32 1.1 mrg 33 1.1 mrg 34 1.1.1.9 mrg ialias (GOMP_loop_runtime_next) 35 1.1.1.9 mrg ialias_redirect (GOMP_taskgroup_reduction_register) 36 1.1.1.9 mrg 37 1.1 mrg /* Initialize the given work share construct from the given arguments. */ 38 1.1 mrg 39 1.1 mrg static inline void 40 1.1 mrg gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr, 41 1.1 mrg enum gomp_schedule_type sched, long chunk_size) 42 1.1 mrg { 43 1.1 mrg ws->sched = sched; 44 1.1 mrg ws->chunk_size = chunk_size; 45 1.1 mrg /* Canonicalize loops that have zero iterations to ->next == ->end. */ 46 1.1 mrg ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end)) 47 1.1 mrg ? start : end; 48 1.1 mrg ws->incr = incr; 49 1.1 mrg ws->next = start; 50 1.1 mrg if (sched == GFS_DYNAMIC) 51 1.1 mrg { 52 1.1 mrg ws->chunk_size *= incr; 53 1.1 mrg 54 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 55 1.1 mrg { 56 1.1 mrg /* For dynamic scheduling prepare things to make each iteration 57 1.1 mrg faster. */ 58 1.1 mrg struct gomp_thread *thr = gomp_thread (); 59 1.1 mrg struct gomp_team *team = thr->ts.team; 60 1.1 mrg long nthreads = team ? team->nthreads : 1; 61 1.1 mrg 62 1.1 mrg if (__builtin_expect (incr > 0, 1)) 63 1.1 mrg { 64 1.1 mrg /* Cheap overflow protection. */ 65 1.1 mrg if (__builtin_expect ((nthreads | ws->chunk_size) 66 1.1 mrg >= 1UL << (sizeof (long) 67 1.1 mrg * __CHAR_BIT__ / 2 - 1), 0)) 68 1.1 mrg ws->mode = 0; 69 1.1 mrg else 70 1.1 mrg ws->mode = ws->end < (LONG_MAX 71 1.1 mrg - (nthreads + 1) * ws->chunk_size); 72 1.1 mrg } 73 1.1 mrg /* Cheap overflow protection. */ 74 1.1 mrg else if (__builtin_expect ((nthreads | -ws->chunk_size) 75 1.1 mrg >= 1UL << (sizeof (long) 76 1.1 mrg * __CHAR_BIT__ / 2 - 1), 0)) 77 1.1 mrg ws->mode = 0; 78 1.1 mrg else 79 1.1 mrg ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX; 80 1.1 mrg } 81 1.1 mrg #endif 82 1.1 mrg } 83 1.1 mrg } 84 1.1 mrg 85 1.1 mrg /* The *_start routines are called when first encountering a loop construct 86 1.1.1.9 mrg that is not bound directly to a parallel construct. The first thread 87 1.1 mrg that arrives will create the work-share construct; subsequent threads 88 1.1 mrg will see the construct exists and allocate work from it. 89 1.1 mrg 90 1.1 mrg START, END, INCR are the bounds of the loop; due to the restrictions of 91 1.1.1.9 mrg OpenMP, these values must be the same in every thread. This is not 92 1.1 mrg verified (nor is it entirely verifiable, since START is not necessarily 93 1.1 mrg retained intact in the work-share data structure). CHUNK_SIZE is the 94 1.1 mrg scheduling parameter; again this must be identical in all threads. 95 1.1 mrg 96 1.1 mrg Returns true if there's any work for this thread to perform. If so, 97 1.1 mrg *ISTART and *IEND are filled with the bounds of the iteration block 98 1.1 mrg allocated to this thread. Returns false if all work was assigned to 99 1.1 mrg other threads prior to this thread's arrival. */ 100 1.1 mrg 101 1.1 mrg static bool 102 1.1 mrg gomp_loop_static_start (long start, long end, long incr, long chunk_size, 103 1.1 mrg long *istart, long *iend) 104 1.1 mrg { 105 1.1 mrg struct gomp_thread *thr = gomp_thread (); 106 1.1 mrg 107 1.1 mrg thr->ts.static_trip = 0; 108 1.1.1.9 mrg if (gomp_work_share_start (0)) 109 1.1 mrg { 110 1.1 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 111 1.1 mrg GFS_STATIC, chunk_size); 112 1.1 mrg gomp_work_share_init_done (); 113 1.1 mrg } 114 1.1 mrg 115 1.1 mrg return !gomp_iter_static_next (istart, iend); 116 1.1 mrg } 117 1.1 mrg 118 1.1.1.4 mrg /* The current dynamic implementation is always monotonic. The 119 1.1.1.4 mrg entrypoints without nonmonotonic in them have to be always monotonic, 120 1.1.1.4 mrg but the nonmonotonic ones could be changed to use work-stealing for 121 1.1.1.4 mrg improved scalability. */ 122 1.1.1.4 mrg 123 1.1 mrg static bool 124 1.1 mrg gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size, 125 1.1 mrg long *istart, long *iend) 126 1.1 mrg { 127 1.1 mrg struct gomp_thread *thr = gomp_thread (); 128 1.1 mrg bool ret; 129 1.1 mrg 130 1.1.1.9 mrg if (gomp_work_share_start (0)) 131 1.1 mrg { 132 1.1 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 133 1.1 mrg GFS_DYNAMIC, chunk_size); 134 1.1 mrg gomp_work_share_init_done (); 135 1.1 mrg } 136 1.1 mrg 137 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 138 1.1 mrg ret = gomp_iter_dynamic_next (istart, iend); 139 1.1 mrg #else 140 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 141 1.1 mrg ret = gomp_iter_dynamic_next_locked (istart, iend); 142 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 143 1.1 mrg #endif 144 1.1 mrg 145 1.1 mrg return ret; 146 1.1 mrg } 147 1.1 mrg 148 1.1.1.4 mrg /* Similarly as for dynamic, though the question is how can the chunk sizes 149 1.1.1.4 mrg be decreased without a central locking or atomics. */ 150 1.1.1.4 mrg 151 1.1 mrg static bool 152 1.1 mrg gomp_loop_guided_start (long start, long end, long incr, long chunk_size, 153 1.1 mrg long *istart, long *iend) 154 1.1 mrg { 155 1.1 mrg struct gomp_thread *thr = gomp_thread (); 156 1.1 mrg bool ret; 157 1.1 mrg 158 1.1.1.9 mrg if (gomp_work_share_start (0)) 159 1.1 mrg { 160 1.1 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 161 1.1 mrg GFS_GUIDED, chunk_size); 162 1.1 mrg gomp_work_share_init_done (); 163 1.1 mrg } 164 1.1 mrg 165 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 166 1.1 mrg ret = gomp_iter_guided_next (istart, iend); 167 1.1 mrg #else 168 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 169 1.1 mrg ret = gomp_iter_guided_next_locked (istart, iend); 170 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 171 1.1 mrg #endif 172 1.1 mrg 173 1.1 mrg return ret; 174 1.1 mrg } 175 1.1 mrg 176 1.1 mrg bool 177 1.1 mrg GOMP_loop_runtime_start (long start, long end, long incr, 178 1.1 mrg long *istart, long *iend) 179 1.1 mrg { 180 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false); 181 1.1.1.9 mrg switch (icv->run_sched_var & ~GFS_MONOTONIC) 182 1.1 mrg { 183 1.1 mrg case GFS_STATIC: 184 1.1.1.4 mrg return gomp_loop_static_start (start, end, incr, 185 1.1.1.4 mrg icv->run_sched_chunk_size, 186 1.1 mrg istart, iend); 187 1.1 mrg case GFS_DYNAMIC: 188 1.1.1.4 mrg return gomp_loop_dynamic_start (start, end, incr, 189 1.1.1.4 mrg icv->run_sched_chunk_size, 190 1.1 mrg istart, iend); 191 1.1 mrg case GFS_GUIDED: 192 1.1.1.4 mrg return gomp_loop_guided_start (start, end, incr, 193 1.1.1.4 mrg icv->run_sched_chunk_size, 194 1.1 mrg istart, iend); 195 1.1 mrg case GFS_AUTO: 196 1.1 mrg /* For now map to schedule(static), later on we could play with feedback 197 1.1 mrg driven choice. */ 198 1.1 mrg return gomp_loop_static_start (start, end, incr, 0, istart, iend); 199 1.1 mrg default: 200 1.1 mrg abort (); 201 1.1 mrg } 202 1.1 mrg } 203 1.1 mrg 204 1.1.1.9 mrg static long 205 1.1.1.9 mrg gomp_adjust_sched (long sched, long *chunk_size) 206 1.1.1.9 mrg { 207 1.1.1.9 mrg sched &= ~GFS_MONOTONIC; 208 1.1.1.9 mrg switch (sched) 209 1.1.1.9 mrg { 210 1.1.1.9 mrg case GFS_STATIC: 211 1.1.1.9 mrg case GFS_DYNAMIC: 212 1.1.1.9 mrg case GFS_GUIDED: 213 1.1.1.9 mrg return sched; 214 1.1.1.9 mrg /* GFS_RUNTIME is used for runtime schedule without monotonic 215 1.1.1.9 mrg or nonmonotonic modifiers on the clause. 216 1.1.1.9 mrg GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic 217 1.1.1.9 mrg modifier. */ 218 1.1.1.9 mrg case GFS_RUNTIME: 219 1.1.1.9 mrg /* GFS_AUTO is used for runtime schedule with nonmonotonic 220 1.1.1.9 mrg modifier. */ 221 1.1.1.9 mrg case GFS_AUTO: 222 1.1.1.9 mrg { 223 1.1.1.9 mrg struct gomp_task_icv *icv = gomp_icv (false); 224 1.1.1.9 mrg sched = icv->run_sched_var & ~GFS_MONOTONIC; 225 1.1.1.9 mrg switch (sched) 226 1.1.1.9 mrg { 227 1.1.1.9 mrg case GFS_STATIC: 228 1.1.1.9 mrg case GFS_DYNAMIC: 229 1.1.1.9 mrg case GFS_GUIDED: 230 1.1.1.9 mrg *chunk_size = icv->run_sched_chunk_size; 231 1.1.1.9 mrg break; 232 1.1.1.9 mrg case GFS_AUTO: 233 1.1.1.9 mrg sched = GFS_STATIC; 234 1.1.1.9 mrg *chunk_size = 0; 235 1.1.1.9 mrg break; 236 1.1.1.9 mrg default: 237 1.1.1.9 mrg abort (); 238 1.1.1.9 mrg } 239 1.1.1.9 mrg return sched; 240 1.1.1.9 mrg } 241 1.1.1.9 mrg default: 242 1.1.1.9 mrg abort (); 243 1.1.1.9 mrg } 244 1.1.1.9 mrg } 245 1.1.1.9 mrg 246 1.1.1.9 mrg bool 247 1.1.1.9 mrg GOMP_loop_start (long start, long end, long incr, long sched, 248 1.1.1.9 mrg long chunk_size, long *istart, long *iend, 249 1.1.1.9 mrg uintptr_t *reductions, void **mem) 250 1.1.1.9 mrg { 251 1.1.1.9 mrg struct gomp_thread *thr = gomp_thread (); 252 1.1.1.9 mrg 253 1.1.1.9 mrg thr->ts.static_trip = 0; 254 1.1.1.9 mrg if (reductions) 255 1.1.1.9 mrg gomp_workshare_taskgroup_start (); 256 1.1.1.9 mrg if (gomp_work_share_start (0)) 257 1.1.1.9 mrg { 258 1.1.1.9 mrg sched = gomp_adjust_sched (sched, &chunk_size); 259 1.1.1.9 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 260 1.1.1.9 mrg sched, chunk_size); 261 1.1.1.9 mrg if (reductions) 262 1.1.1.9 mrg { 263 1.1.1.9 mrg GOMP_taskgroup_reduction_register (reductions); 264 1.1.1.9 mrg thr->task->taskgroup->workshare = true; 265 1.1.1.9 mrg thr->ts.work_share->task_reductions = reductions; 266 1.1.1.9 mrg } 267 1.1.1.9 mrg if (mem) 268 1.1.1.9 mrg { 269 1.1.1.9 mrg uintptr_t size = (uintptr_t) *mem; 270 1.1.1.9 mrg #define INLINE_ORDERED_TEAM_IDS_OFF \ 271 1.1.1.9 mrg ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \ 272 1.1.1.9 mrg + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1)) 273 1.1.1.11 mrg if (sizeof (struct gomp_work_share) 274 1.1.1.11 mrg <= INLINE_ORDERED_TEAM_IDS_OFF 275 1.1.1.11 mrg || __alignof__ (struct gomp_work_share) < __alignof__ (long long) 276 1.1.1.11 mrg || size > (sizeof (struct gomp_work_share) 277 1.1.1.11 mrg - INLINE_ORDERED_TEAM_IDS_OFF)) 278 1.1.1.9 mrg *mem 279 1.1.1.9 mrg = (void *) (thr->ts.work_share->ordered_team_ids 280 1.1.1.9 mrg = gomp_malloc_cleared (size)); 281 1.1.1.9 mrg else 282 1.1.1.9 mrg *mem = memset (((char *) thr->ts.work_share) 283 1.1.1.9 mrg + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size); 284 1.1.1.9 mrg } 285 1.1.1.9 mrg gomp_work_share_init_done (); 286 1.1.1.9 mrg } 287 1.1.1.9 mrg else 288 1.1.1.9 mrg { 289 1.1.1.9 mrg if (reductions) 290 1.1.1.9 mrg { 291 1.1.1.9 mrg uintptr_t *first_reductions = thr->ts.work_share->task_reductions; 292 1.1.1.9 mrg gomp_workshare_task_reduction_register (reductions, 293 1.1.1.9 mrg first_reductions); 294 1.1.1.9 mrg } 295 1.1.1.9 mrg if (mem) 296 1.1.1.9 mrg { 297 1.1.1.9 mrg if ((offsetof (struct gomp_work_share, inline_ordered_team_ids) 298 1.1.1.9 mrg & (__alignof__ (long long) - 1)) == 0) 299 1.1.1.9 mrg *mem = (void *) thr->ts.work_share->ordered_team_ids; 300 1.1.1.9 mrg else 301 1.1.1.9 mrg { 302 1.1.1.9 mrg uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids; 303 1.1.1.9 mrg p += __alignof__ (long long) - 1; 304 1.1.1.9 mrg p &= ~(__alignof__ (long long) - 1); 305 1.1.1.9 mrg *mem = (void *) p; 306 1.1.1.9 mrg } 307 1.1.1.9 mrg } 308 1.1.1.9 mrg } 309 1.1.1.9 mrg 310 1.1.1.9 mrg if (!istart) 311 1.1.1.9 mrg return true; 312 1.1.1.9 mrg return ialias_call (GOMP_loop_runtime_next) (istart, iend); 313 1.1.1.9 mrg } 314 1.1.1.9 mrg 315 1.1 mrg /* The *_ordered_*_start routines are similar. The only difference is that 316 1.1 mrg this work-share construct is initialized to expect an ORDERED section. */ 317 1.1 mrg 318 1.1 mrg static bool 319 1.1 mrg gomp_loop_ordered_static_start (long start, long end, long incr, 320 1.1 mrg long chunk_size, long *istart, long *iend) 321 1.1 mrg { 322 1.1 mrg struct gomp_thread *thr = gomp_thread (); 323 1.1 mrg 324 1.1 mrg thr->ts.static_trip = 0; 325 1.1.1.9 mrg if (gomp_work_share_start (1)) 326 1.1 mrg { 327 1.1 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 328 1.1 mrg GFS_STATIC, chunk_size); 329 1.1 mrg gomp_ordered_static_init (); 330 1.1 mrg gomp_work_share_init_done (); 331 1.1 mrg } 332 1.1 mrg 333 1.1 mrg return !gomp_iter_static_next (istart, iend); 334 1.1 mrg } 335 1.1 mrg 336 1.1 mrg static bool 337 1.1 mrg gomp_loop_ordered_dynamic_start (long start, long end, long incr, 338 1.1 mrg long chunk_size, long *istart, long *iend) 339 1.1 mrg { 340 1.1 mrg struct gomp_thread *thr = gomp_thread (); 341 1.1 mrg bool ret; 342 1.1 mrg 343 1.1.1.9 mrg if (gomp_work_share_start (1)) 344 1.1 mrg { 345 1.1 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 346 1.1 mrg GFS_DYNAMIC, chunk_size); 347 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 348 1.1 mrg gomp_work_share_init_done (); 349 1.1 mrg } 350 1.1 mrg else 351 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 352 1.1 mrg 353 1.1 mrg ret = gomp_iter_dynamic_next_locked (istart, iend); 354 1.1 mrg if (ret) 355 1.1 mrg gomp_ordered_first (); 356 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 357 1.1 mrg 358 1.1 mrg return ret; 359 1.1 mrg } 360 1.1 mrg 361 1.1 mrg static bool 362 1.1 mrg gomp_loop_ordered_guided_start (long start, long end, long incr, 363 1.1 mrg long chunk_size, long *istart, long *iend) 364 1.1 mrg { 365 1.1 mrg struct gomp_thread *thr = gomp_thread (); 366 1.1 mrg bool ret; 367 1.1 mrg 368 1.1.1.9 mrg if (gomp_work_share_start (1)) 369 1.1 mrg { 370 1.1 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 371 1.1 mrg GFS_GUIDED, chunk_size); 372 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 373 1.1 mrg gomp_work_share_init_done (); 374 1.1 mrg } 375 1.1 mrg else 376 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 377 1.1 mrg 378 1.1 mrg ret = gomp_iter_guided_next_locked (istart, iend); 379 1.1 mrg if (ret) 380 1.1 mrg gomp_ordered_first (); 381 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 382 1.1 mrg 383 1.1 mrg return ret; 384 1.1 mrg } 385 1.1 mrg 386 1.1 mrg bool 387 1.1 mrg GOMP_loop_ordered_runtime_start (long start, long end, long incr, 388 1.1 mrg long *istart, long *iend) 389 1.1 mrg { 390 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false); 391 1.1.1.9 mrg switch (icv->run_sched_var & ~GFS_MONOTONIC) 392 1.1 mrg { 393 1.1 mrg case GFS_STATIC: 394 1.1 mrg return gomp_loop_ordered_static_start (start, end, incr, 395 1.1.1.4 mrg icv->run_sched_chunk_size, 396 1.1 mrg istart, iend); 397 1.1 mrg case GFS_DYNAMIC: 398 1.1 mrg return gomp_loop_ordered_dynamic_start (start, end, incr, 399 1.1.1.4 mrg icv->run_sched_chunk_size, 400 1.1 mrg istart, iend); 401 1.1 mrg case GFS_GUIDED: 402 1.1 mrg return gomp_loop_ordered_guided_start (start, end, incr, 403 1.1.1.4 mrg icv->run_sched_chunk_size, 404 1.1 mrg istart, iend); 405 1.1 mrg case GFS_AUTO: 406 1.1 mrg /* For now map to schedule(static), later on we could play with feedback 407 1.1 mrg driven choice. */ 408 1.1 mrg return gomp_loop_ordered_static_start (start, end, incr, 409 1.1 mrg 0, istart, iend); 410 1.1 mrg default: 411 1.1 mrg abort (); 412 1.1 mrg } 413 1.1 mrg } 414 1.1 mrg 415 1.1.1.9 mrg bool 416 1.1.1.9 mrg GOMP_loop_ordered_start (long start, long end, long incr, long sched, 417 1.1.1.9 mrg long chunk_size, long *istart, long *iend, 418 1.1.1.9 mrg uintptr_t *reductions, void **mem) 419 1.1.1.9 mrg { 420 1.1.1.9 mrg struct gomp_thread *thr = gomp_thread (); 421 1.1.1.9 mrg size_t ordered = 1; 422 1.1.1.9 mrg bool ret; 423 1.1.1.9 mrg 424 1.1.1.9 mrg thr->ts.static_trip = 0; 425 1.1.1.9 mrg if (reductions) 426 1.1.1.9 mrg gomp_workshare_taskgroup_start (); 427 1.1.1.9 mrg if (mem) 428 1.1.1.9 mrg ordered += (uintptr_t) *mem; 429 1.1.1.9 mrg if (gomp_work_share_start (ordered)) 430 1.1.1.9 mrg { 431 1.1.1.9 mrg sched = gomp_adjust_sched (sched, &chunk_size); 432 1.1.1.9 mrg gomp_loop_init (thr->ts.work_share, start, end, incr, 433 1.1.1.9 mrg sched, chunk_size); 434 1.1.1.9 mrg if (reductions) 435 1.1.1.9 mrg { 436 1.1.1.9 mrg GOMP_taskgroup_reduction_register (reductions); 437 1.1.1.9 mrg thr->task->taskgroup->workshare = true; 438 1.1.1.9 mrg thr->ts.work_share->task_reductions = reductions; 439 1.1.1.9 mrg } 440 1.1.1.9 mrg if (sched == GFS_STATIC) 441 1.1.1.9 mrg gomp_ordered_static_init (); 442 1.1.1.9 mrg else 443 1.1.1.9 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 444 1.1.1.9 mrg gomp_work_share_init_done (); 445 1.1.1.9 mrg } 446 1.1.1.9 mrg else 447 1.1.1.9 mrg { 448 1.1.1.9 mrg if (reductions) 449 1.1.1.9 mrg { 450 1.1.1.9 mrg uintptr_t *first_reductions = thr->ts.work_share->task_reductions; 451 1.1.1.9 mrg gomp_workshare_task_reduction_register (reductions, 452 1.1.1.9 mrg first_reductions); 453 1.1.1.9 mrg } 454 1.1.1.9 mrg sched = thr->ts.work_share->sched; 455 1.1.1.9 mrg if (sched != GFS_STATIC) 456 1.1.1.9 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 457 1.1.1.9 mrg } 458 1.1.1.9 mrg 459 1.1.1.9 mrg if (mem) 460 1.1.1.9 mrg { 461 1.1.1.9 mrg uintptr_t p 462 1.1.1.9 mrg = (uintptr_t) (thr->ts.work_share->ordered_team_ids 463 1.1.1.9 mrg + (thr->ts.team ? thr->ts.team->nthreads : 1)); 464 1.1.1.9 mrg p += __alignof__ (long long) - 1; 465 1.1.1.9 mrg p &= ~(__alignof__ (long long) - 1); 466 1.1.1.9 mrg *mem = (void *) p; 467 1.1.1.9 mrg } 468 1.1.1.9 mrg 469 1.1.1.9 mrg switch (sched) 470 1.1.1.9 mrg { 471 1.1.1.9 mrg case GFS_STATIC: 472 1.1.1.9 mrg case GFS_AUTO: 473 1.1.1.9 mrg return !gomp_iter_static_next (istart, iend); 474 1.1.1.9 mrg case GFS_DYNAMIC: 475 1.1.1.9 mrg ret = gomp_iter_dynamic_next_locked (istart, iend); 476 1.1.1.9 mrg break; 477 1.1.1.9 mrg case GFS_GUIDED: 478 1.1.1.9 mrg ret = gomp_iter_guided_next_locked (istart, iend); 479 1.1.1.9 mrg break; 480 1.1.1.9 mrg default: 481 1.1.1.9 mrg abort (); 482 1.1.1.9 mrg } 483 1.1.1.9 mrg 484 1.1.1.9 mrg if (ret) 485 1.1.1.9 mrg gomp_ordered_first (); 486 1.1.1.9 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 487 1.1.1.9 mrg return ret; 488 1.1.1.9 mrg } 489 1.1.1.9 mrg 490 1.1.1.4 mrg /* The *_doacross_*_start routines are similar. The only difference is that 491 1.1.1.4 mrg this work-share construct is initialized to expect an ORDERED(N) - DOACROSS 492 1.1.1.4 mrg section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 493 1.1.1.4 mrg and other COUNTS array elements tell the library number of iterations 494 1.1.1.4 mrg in the ordered inner loops. */ 495 1.1.1.4 mrg 496 1.1.1.4 mrg static bool 497 1.1.1.4 mrg gomp_loop_doacross_static_start (unsigned ncounts, long *counts, 498 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 499 1.1.1.4 mrg { 500 1.1.1.4 mrg struct gomp_thread *thr = gomp_thread (); 501 1.1.1.4 mrg 502 1.1.1.4 mrg thr->ts.static_trip = 0; 503 1.1.1.9 mrg if (gomp_work_share_start (0)) 504 1.1.1.4 mrg { 505 1.1.1.4 mrg gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, 506 1.1.1.4 mrg GFS_STATIC, chunk_size); 507 1.1.1.9 mrg gomp_doacross_init (ncounts, counts, chunk_size, 0); 508 1.1.1.4 mrg gomp_work_share_init_done (); 509 1.1.1.4 mrg } 510 1.1.1.4 mrg 511 1.1.1.4 mrg return !gomp_iter_static_next (istart, iend); 512 1.1.1.4 mrg } 513 1.1.1.4 mrg 514 1.1.1.4 mrg static bool 515 1.1.1.4 mrg gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts, 516 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 517 1.1.1.4 mrg { 518 1.1.1.4 mrg struct gomp_thread *thr = gomp_thread (); 519 1.1.1.4 mrg bool ret; 520 1.1.1.4 mrg 521 1.1.1.9 mrg if (gomp_work_share_start (0)) 522 1.1.1.4 mrg { 523 1.1.1.4 mrg gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, 524 1.1.1.4 mrg GFS_DYNAMIC, chunk_size); 525 1.1.1.9 mrg gomp_doacross_init (ncounts, counts, chunk_size, 0); 526 1.1.1.4 mrg gomp_work_share_init_done (); 527 1.1.1.4 mrg } 528 1.1.1.4 mrg 529 1.1.1.4 mrg #ifdef HAVE_SYNC_BUILTINS 530 1.1.1.4 mrg ret = gomp_iter_dynamic_next (istart, iend); 531 1.1.1.4 mrg #else 532 1.1.1.4 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 533 1.1.1.4 mrg ret = gomp_iter_dynamic_next_locked (istart, iend); 534 1.1.1.4 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 535 1.1.1.4 mrg #endif 536 1.1.1.4 mrg 537 1.1.1.4 mrg return ret; 538 1.1.1.4 mrg } 539 1.1.1.4 mrg 540 1.1.1.4 mrg static bool 541 1.1.1.4 mrg gomp_loop_doacross_guided_start (unsigned ncounts, long *counts, 542 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 543 1.1.1.4 mrg { 544 1.1.1.4 mrg struct gomp_thread *thr = gomp_thread (); 545 1.1.1.4 mrg bool ret; 546 1.1.1.4 mrg 547 1.1.1.9 mrg if (gomp_work_share_start (0)) 548 1.1.1.4 mrg { 549 1.1.1.4 mrg gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, 550 1.1.1.4 mrg GFS_GUIDED, chunk_size); 551 1.1.1.9 mrg gomp_doacross_init (ncounts, counts, chunk_size, 0); 552 1.1.1.4 mrg gomp_work_share_init_done (); 553 1.1.1.4 mrg } 554 1.1.1.4 mrg 555 1.1.1.4 mrg #ifdef HAVE_SYNC_BUILTINS 556 1.1.1.4 mrg ret = gomp_iter_guided_next (istart, iend); 557 1.1.1.4 mrg #else 558 1.1.1.4 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 559 1.1.1.4 mrg ret = gomp_iter_guided_next_locked (istart, iend); 560 1.1.1.4 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 561 1.1.1.4 mrg #endif 562 1.1.1.4 mrg 563 1.1.1.4 mrg return ret; 564 1.1.1.4 mrg } 565 1.1.1.4 mrg 566 1.1.1.4 mrg bool 567 1.1.1.4 mrg GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts, 568 1.1.1.4 mrg long *istart, long *iend) 569 1.1.1.4 mrg { 570 1.1.1.4 mrg struct gomp_task_icv *icv = gomp_icv (false); 571 1.1.1.9 mrg switch (icv->run_sched_var & ~GFS_MONOTONIC) 572 1.1.1.4 mrg { 573 1.1.1.4 mrg case GFS_STATIC: 574 1.1.1.4 mrg return gomp_loop_doacross_static_start (ncounts, counts, 575 1.1.1.4 mrg icv->run_sched_chunk_size, 576 1.1.1.4 mrg istart, iend); 577 1.1.1.4 mrg case GFS_DYNAMIC: 578 1.1.1.4 mrg return gomp_loop_doacross_dynamic_start (ncounts, counts, 579 1.1.1.4 mrg icv->run_sched_chunk_size, 580 1.1.1.4 mrg istart, iend); 581 1.1.1.4 mrg case GFS_GUIDED: 582 1.1.1.4 mrg return gomp_loop_doacross_guided_start (ncounts, counts, 583 1.1.1.4 mrg icv->run_sched_chunk_size, 584 1.1.1.4 mrg istart, iend); 585 1.1.1.4 mrg case GFS_AUTO: 586 1.1.1.4 mrg /* For now map to schedule(static), later on we could play with feedback 587 1.1.1.4 mrg driven choice. */ 588 1.1.1.4 mrg return gomp_loop_doacross_static_start (ncounts, counts, 589 1.1.1.4 mrg 0, istart, iend); 590 1.1.1.4 mrg default: 591 1.1.1.4 mrg abort (); 592 1.1.1.4 mrg } 593 1.1.1.4 mrg } 594 1.1.1.4 mrg 595 1.1.1.9 mrg bool 596 1.1.1.9 mrg GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched, 597 1.1.1.9 mrg long chunk_size, long *istart, long *iend, 598 1.1.1.9 mrg uintptr_t *reductions, void **mem) 599 1.1.1.9 mrg { 600 1.1.1.9 mrg struct gomp_thread *thr = gomp_thread (); 601 1.1.1.9 mrg 602 1.1.1.9 mrg thr->ts.static_trip = 0; 603 1.1.1.9 mrg if (reductions) 604 1.1.1.9 mrg gomp_workshare_taskgroup_start (); 605 1.1.1.9 mrg if (gomp_work_share_start (0)) 606 1.1.1.9 mrg { 607 1.1.1.9 mrg size_t extra = 0; 608 1.1.1.9 mrg if (mem) 609 1.1.1.9 mrg extra = (uintptr_t) *mem; 610 1.1.1.9 mrg sched = gomp_adjust_sched (sched, &chunk_size); 611 1.1.1.9 mrg gomp_loop_init (thr->ts.work_share, 0, counts[0], 1, 612 1.1.1.9 mrg sched, chunk_size); 613 1.1.1.9 mrg gomp_doacross_init (ncounts, counts, chunk_size, extra); 614 1.1.1.9 mrg if (reductions) 615 1.1.1.9 mrg { 616 1.1.1.9 mrg GOMP_taskgroup_reduction_register (reductions); 617 1.1.1.9 mrg thr->task->taskgroup->workshare = true; 618 1.1.1.9 mrg thr->ts.work_share->task_reductions = reductions; 619 1.1.1.9 mrg } 620 1.1.1.9 mrg gomp_work_share_init_done (); 621 1.1.1.9 mrg } 622 1.1.1.9 mrg else 623 1.1.1.9 mrg { 624 1.1.1.9 mrg if (reductions) 625 1.1.1.9 mrg { 626 1.1.1.9 mrg uintptr_t *first_reductions = thr->ts.work_share->task_reductions; 627 1.1.1.9 mrg gomp_workshare_task_reduction_register (reductions, 628 1.1.1.9 mrg first_reductions); 629 1.1.1.9 mrg } 630 1.1.1.9 mrg sched = thr->ts.work_share->sched; 631 1.1.1.9 mrg } 632 1.1.1.9 mrg 633 1.1.1.9 mrg if (mem) 634 1.1.1.9 mrg *mem = thr->ts.work_share->doacross->extra; 635 1.1.1.9 mrg 636 1.1.1.9 mrg return ialias_call (GOMP_loop_runtime_next) (istart, iend); 637 1.1.1.9 mrg } 638 1.1.1.9 mrg 639 1.1.1.9 mrg /* The *_next routines are called when the thread completes processing of 640 1.1.1.9 mrg the iteration block currently assigned to it. If the work-share 641 1.1 mrg construct is bound directly to a parallel construct, then the iteration 642 1.1 mrg bounds may have been set up before the parallel. In which case, this 643 1.1 mrg may be the first iteration for the thread. 644 1.1 mrg 645 1.1 mrg Returns true if there is work remaining to be performed; *ISTART and 646 1.1 mrg *IEND are filled with a new iteration block. Returns false if all work 647 1.1 mrg has been assigned. */ 648 1.1 mrg 649 1.1 mrg static bool 650 1.1 mrg gomp_loop_static_next (long *istart, long *iend) 651 1.1 mrg { 652 1.1 mrg return !gomp_iter_static_next (istart, iend); 653 1.1 mrg } 654 1.1 mrg 655 1.1 mrg static bool 656 1.1 mrg gomp_loop_dynamic_next (long *istart, long *iend) 657 1.1 mrg { 658 1.1 mrg bool ret; 659 1.1 mrg 660 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 661 1.1 mrg ret = gomp_iter_dynamic_next (istart, iend); 662 1.1 mrg #else 663 1.1 mrg struct gomp_thread *thr = gomp_thread (); 664 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 665 1.1 mrg ret = gomp_iter_dynamic_next_locked (istart, iend); 666 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 667 1.1 mrg #endif 668 1.1 mrg 669 1.1 mrg return ret; 670 1.1 mrg } 671 1.1 mrg 672 1.1 mrg static bool 673 1.1 mrg gomp_loop_guided_next (long *istart, long *iend) 674 1.1 mrg { 675 1.1 mrg bool ret; 676 1.1 mrg 677 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 678 1.1 mrg ret = gomp_iter_guided_next (istart, iend); 679 1.1 mrg #else 680 1.1 mrg struct gomp_thread *thr = gomp_thread (); 681 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 682 1.1 mrg ret = gomp_iter_guided_next_locked (istart, iend); 683 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 684 1.1 mrg #endif 685 1.1 mrg 686 1.1 mrg return ret; 687 1.1 mrg } 688 1.1 mrg 689 1.1 mrg bool 690 1.1 mrg GOMP_loop_runtime_next (long *istart, long *iend) 691 1.1 mrg { 692 1.1 mrg struct gomp_thread *thr = gomp_thread (); 693 1.1.1.9 mrg 694 1.1 mrg switch (thr->ts.work_share->sched) 695 1.1 mrg { 696 1.1 mrg case GFS_STATIC: 697 1.1 mrg case GFS_AUTO: 698 1.1 mrg return gomp_loop_static_next (istart, iend); 699 1.1 mrg case GFS_DYNAMIC: 700 1.1 mrg return gomp_loop_dynamic_next (istart, iend); 701 1.1 mrg case GFS_GUIDED: 702 1.1 mrg return gomp_loop_guided_next (istart, iend); 703 1.1 mrg default: 704 1.1 mrg abort (); 705 1.1 mrg } 706 1.1 mrg } 707 1.1 mrg 708 1.1 mrg /* The *_ordered_*_next routines are called when the thread completes 709 1.1 mrg processing of the iteration block currently assigned to it. 710 1.1 mrg 711 1.1 mrg Returns true if there is work remaining to be performed; *ISTART and 712 1.1 mrg *IEND are filled with a new iteration block. Returns false if all work 713 1.1 mrg has been assigned. */ 714 1.1 mrg 715 1.1 mrg static bool 716 1.1 mrg gomp_loop_ordered_static_next (long *istart, long *iend) 717 1.1 mrg { 718 1.1 mrg struct gomp_thread *thr = gomp_thread (); 719 1.1 mrg int test; 720 1.1 mrg 721 1.1 mrg gomp_ordered_sync (); 722 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 723 1.1 mrg test = gomp_iter_static_next (istart, iend); 724 1.1 mrg if (test >= 0) 725 1.1 mrg gomp_ordered_static_next (); 726 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 727 1.1 mrg 728 1.1 mrg return test == 0; 729 1.1 mrg } 730 1.1 mrg 731 1.1 mrg static bool 732 1.1 mrg gomp_loop_ordered_dynamic_next (long *istart, long *iend) 733 1.1 mrg { 734 1.1 mrg struct gomp_thread *thr = gomp_thread (); 735 1.1 mrg bool ret; 736 1.1 mrg 737 1.1 mrg gomp_ordered_sync (); 738 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 739 1.1 mrg ret = gomp_iter_dynamic_next_locked (istart, iend); 740 1.1 mrg if (ret) 741 1.1 mrg gomp_ordered_next (); 742 1.1 mrg else 743 1.1 mrg gomp_ordered_last (); 744 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 745 1.1 mrg 746 1.1 mrg return ret; 747 1.1 mrg } 748 1.1 mrg 749 1.1 mrg static bool 750 1.1 mrg gomp_loop_ordered_guided_next (long *istart, long *iend) 751 1.1 mrg { 752 1.1 mrg struct gomp_thread *thr = gomp_thread (); 753 1.1 mrg bool ret; 754 1.1 mrg 755 1.1 mrg gomp_ordered_sync (); 756 1.1 mrg gomp_mutex_lock (&thr->ts.work_share->lock); 757 1.1 mrg ret = gomp_iter_guided_next_locked (istart, iend); 758 1.1 mrg if (ret) 759 1.1 mrg gomp_ordered_next (); 760 1.1 mrg else 761 1.1 mrg gomp_ordered_last (); 762 1.1 mrg gomp_mutex_unlock (&thr->ts.work_share->lock); 763 1.1 mrg 764 1.1 mrg return ret; 765 1.1 mrg } 766 1.1 mrg 767 1.1 mrg bool 768 1.1 mrg GOMP_loop_ordered_runtime_next (long *istart, long *iend) 769 1.1 mrg { 770 1.1 mrg struct gomp_thread *thr = gomp_thread (); 771 1.1.1.9 mrg 772 1.1 mrg switch (thr->ts.work_share->sched) 773 1.1 mrg { 774 1.1 mrg case GFS_STATIC: 775 1.1 mrg case GFS_AUTO: 776 1.1 mrg return gomp_loop_ordered_static_next (istart, iend); 777 1.1 mrg case GFS_DYNAMIC: 778 1.1 mrg return gomp_loop_ordered_dynamic_next (istart, iend); 779 1.1 mrg case GFS_GUIDED: 780 1.1 mrg return gomp_loop_ordered_guided_next (istart, iend); 781 1.1 mrg default: 782 1.1 mrg abort (); 783 1.1 mrg } 784 1.1 mrg } 785 1.1 mrg 786 1.1 mrg /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct 787 1.1 mrg to avoid one synchronization once we get into the loop. */ 788 1.1 mrg 789 1.1 mrg static void 790 1.1 mrg gomp_parallel_loop_start (void (*fn) (void *), void *data, 791 1.1 mrg unsigned num_threads, long start, long end, 792 1.1 mrg long incr, enum gomp_schedule_type sched, 793 1.1.1.3 mrg long chunk_size, unsigned int flags) 794 1.1 mrg { 795 1.1 mrg struct gomp_team *team; 796 1.1 mrg 797 1.1 mrg num_threads = gomp_resolve_num_threads (num_threads, 0); 798 1.1 mrg team = gomp_new_team (num_threads); 799 1.1 mrg gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); 800 1.1.1.9 mrg gomp_team_start (fn, data, num_threads, flags, team, NULL); 801 1.1 mrg } 802 1.1 mrg 803 1.1 mrg void 804 1.1 mrg GOMP_parallel_loop_static_start (void (*fn) (void *), void *data, 805 1.1 mrg unsigned num_threads, long start, long end, 806 1.1 mrg long incr, long chunk_size) 807 1.1 mrg { 808 1.1 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 809 1.1.1.3 mrg GFS_STATIC, chunk_size, 0); 810 1.1 mrg } 811 1.1 mrg 812 1.1 mrg void 813 1.1 mrg GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data, 814 1.1 mrg unsigned num_threads, long start, long end, 815 1.1 mrg long incr, long chunk_size) 816 1.1 mrg { 817 1.1 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 818 1.1.1.3 mrg GFS_DYNAMIC, chunk_size, 0); 819 1.1 mrg } 820 1.1 mrg 821 1.1 mrg void 822 1.1 mrg GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data, 823 1.1 mrg unsigned num_threads, long start, long end, 824 1.1 mrg long incr, long chunk_size) 825 1.1 mrg { 826 1.1 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 827 1.1.1.3 mrg GFS_GUIDED, chunk_size, 0); 828 1.1 mrg } 829 1.1 mrg 830 1.1 mrg void 831 1.1 mrg GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data, 832 1.1 mrg unsigned num_threads, long start, long end, 833 1.1 mrg long incr) 834 1.1 mrg { 835 1.1 mrg struct gomp_task_icv *icv = gomp_icv (false); 836 1.1 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 837 1.1.1.9 mrg icv->run_sched_var & ~GFS_MONOTONIC, 838 1.1.1.9 mrg icv->run_sched_chunk_size, 0); 839 1.1.1.3 mrg } 840 1.1.1.3 mrg 841 1.1.1.3 mrg ialias_redirect (GOMP_parallel_end) 842 1.1.1.3 mrg 843 1.1.1.3 mrg void 844 1.1.1.3 mrg GOMP_parallel_loop_static (void (*fn) (void *), void *data, 845 1.1.1.3 mrg unsigned num_threads, long start, long end, 846 1.1.1.3 mrg long incr, long chunk_size, unsigned flags) 847 1.1.1.3 mrg { 848 1.1.1.3 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 849 1.1.1.3 mrg GFS_STATIC, chunk_size, flags); 850 1.1.1.3 mrg fn (data); 851 1.1.1.3 mrg GOMP_parallel_end (); 852 1.1.1.3 mrg } 853 1.1.1.3 mrg 854 1.1.1.3 mrg void 855 1.1.1.3 mrg GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data, 856 1.1.1.3 mrg unsigned num_threads, long start, long end, 857 1.1.1.3 mrg long incr, long chunk_size, unsigned flags) 858 1.1.1.3 mrg { 859 1.1.1.3 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 860 1.1.1.3 mrg GFS_DYNAMIC, chunk_size, flags); 861 1.1.1.3 mrg fn (data); 862 1.1.1.3 mrg GOMP_parallel_end (); 863 1.1.1.3 mrg } 864 1.1.1.3 mrg 865 1.1.1.3 mrg void 866 1.1.1.3 mrg GOMP_parallel_loop_guided (void (*fn) (void *), void *data, 867 1.1.1.3 mrg unsigned num_threads, long start, long end, 868 1.1.1.3 mrg long incr, long chunk_size, unsigned flags) 869 1.1.1.3 mrg { 870 1.1.1.3 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 871 1.1.1.3 mrg GFS_GUIDED, chunk_size, flags); 872 1.1.1.3 mrg fn (data); 873 1.1.1.3 mrg GOMP_parallel_end (); 874 1.1.1.3 mrg } 875 1.1.1.3 mrg 876 1.1.1.9 mrg void 877 1.1.1.9 mrg GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, 878 1.1.1.9 mrg unsigned num_threads, long start, long end, 879 1.1.1.9 mrg long incr, unsigned flags) 880 1.1.1.9 mrg { 881 1.1.1.9 mrg struct gomp_task_icv *icv = gomp_icv (false); 882 1.1.1.9 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 883 1.1.1.9 mrg icv->run_sched_var & ~GFS_MONOTONIC, 884 1.1.1.9 mrg icv->run_sched_chunk_size, flags); 885 1.1.1.9 mrg fn (data); 886 1.1.1.9 mrg GOMP_parallel_end (); 887 1.1.1.9 mrg } 888 1.1.1.9 mrg 889 1.1.1.4 mrg #ifdef HAVE_ATTRIBUTE_ALIAS 890 1.1.1.4 mrg extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic 891 1.1.1.4 mrg __attribute__((alias ("GOMP_parallel_loop_dynamic"))); 892 1.1.1.4 mrg extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided 893 1.1.1.4 mrg __attribute__((alias ("GOMP_parallel_loop_guided"))); 894 1.1.1.9 mrg extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime 895 1.1.1.9 mrg __attribute__((alias ("GOMP_parallel_loop_runtime"))); 896 1.1.1.9 mrg extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime 897 1.1.1.9 mrg __attribute__((alias ("GOMP_parallel_loop_runtime"))); 898 1.1.1.4 mrg #else 899 1.1.1.4 mrg void 900 1.1.1.4 mrg GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data, 901 1.1.1.4 mrg unsigned num_threads, long start, 902 1.1.1.4 mrg long end, long incr, long chunk_size, 903 1.1.1.4 mrg unsigned flags) 904 1.1.1.4 mrg { 905 1.1.1.4 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 906 1.1.1.4 mrg GFS_DYNAMIC, chunk_size, flags); 907 1.1.1.4 mrg fn (data); 908 1.1.1.4 mrg GOMP_parallel_end (); 909 1.1.1.4 mrg } 910 1.1.1.4 mrg 911 1.1.1.4 mrg void 912 1.1.1.4 mrg GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data, 913 1.1.1.4 mrg unsigned num_threads, long start, 914 1.1.1.4 mrg long end, long incr, long chunk_size, 915 1.1.1.4 mrg unsigned flags) 916 1.1.1.4 mrg { 917 1.1.1.4 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 918 1.1.1.4 mrg GFS_GUIDED, chunk_size, flags); 919 1.1.1.4 mrg fn (data); 920 1.1.1.4 mrg GOMP_parallel_end (); 921 1.1.1.4 mrg } 922 1.1.1.4 mrg 923 1.1.1.3 mrg void 924 1.1.1.9 mrg GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data, 925 1.1.1.9 mrg unsigned num_threads, long start, 926 1.1.1.9 mrg long end, long incr, unsigned flags) 927 1.1.1.3 mrg { 928 1.1.1.3 mrg struct gomp_task_icv *icv = gomp_icv (false); 929 1.1.1.3 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 930 1.1.1.9 mrg icv->run_sched_var & ~GFS_MONOTONIC, 931 1.1.1.9 mrg icv->run_sched_chunk_size, flags); 932 1.1.1.3 mrg fn (data); 933 1.1.1.3 mrg GOMP_parallel_end (); 934 1.1 mrg } 935 1.1 mrg 936 1.1.1.9 mrg void 937 1.1.1.9 mrg GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data, 938 1.1.1.9 mrg unsigned num_threads, long start, 939 1.1.1.9 mrg long end, long incr, 940 1.1.1.9 mrg unsigned flags) 941 1.1.1.9 mrg { 942 1.1.1.9 mrg struct gomp_task_icv *icv = gomp_icv (false); 943 1.1.1.9 mrg gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, 944 1.1.1.9 mrg icv->run_sched_var & ~GFS_MONOTONIC, 945 1.1.1.9 mrg icv->run_sched_chunk_size, flags); 946 1.1.1.9 mrg fn (data); 947 1.1.1.9 mrg GOMP_parallel_end (); 948 1.1.1.9 mrg } 949 1.1.1.9 mrg #endif 950 1.1.1.9 mrg 951 1.1 mrg /* The GOMP_loop_end* routines are called after the thread is told that 952 1.1.1.3 mrg all loop iterations are complete. The first two versions synchronize 953 1.1 mrg all threads; the nowait version does not. */ 954 1.1 mrg 955 1.1 mrg void 956 1.1 mrg GOMP_loop_end (void) 957 1.1 mrg { 958 1.1 mrg gomp_work_share_end (); 959 1.1 mrg } 960 1.1 mrg 961 1.1.1.3 mrg bool 962 1.1.1.3 mrg GOMP_loop_end_cancel (void) 963 1.1.1.3 mrg { 964 1.1.1.3 mrg return gomp_work_share_end_cancel (); 965 1.1.1.3 mrg } 966 1.1.1.3 mrg 967 1.1 mrg void 968 1.1 mrg GOMP_loop_end_nowait (void) 969 1.1 mrg { 970 1.1 mrg gomp_work_share_end_nowait (); 971 1.1 mrg } 972 1.1 mrg 973 1.1 mrg 974 1.1 mrg /* We use static functions above so that we're sure that the "runtime" 975 1.1 mrg function can defer to the proper routine without interposition. We 976 1.1 mrg export the static function with a strong alias when possible, or with 977 1.1 mrg a wrapper function otherwise. */ 978 1.1 mrg 979 1.1 mrg #ifdef HAVE_ATTRIBUTE_ALIAS 980 1.1 mrg extern __typeof(gomp_loop_static_start) GOMP_loop_static_start 981 1.1 mrg __attribute__((alias ("gomp_loop_static_start"))); 982 1.1 mrg extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start 983 1.1 mrg __attribute__((alias ("gomp_loop_dynamic_start"))); 984 1.1 mrg extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start 985 1.1 mrg __attribute__((alias ("gomp_loop_guided_start"))); 986 1.1.1.4 mrg extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start 987 1.1.1.4 mrg __attribute__((alias ("gomp_loop_dynamic_start"))); 988 1.1.1.4 mrg extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start 989 1.1.1.4 mrg __attribute__((alias ("gomp_loop_guided_start"))); 990 1.1.1.9 mrg extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start 991 1.1.1.9 mrg __attribute__((alias ("GOMP_loop_runtime_start"))); 992 1.1.1.9 mrg extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start 993 1.1.1.9 mrg __attribute__((alias ("GOMP_loop_runtime_start"))); 994 1.1 mrg 995 1.1 mrg extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start 996 1.1 mrg __attribute__((alias ("gomp_loop_ordered_static_start"))); 997 1.1 mrg extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start 998 1.1 mrg __attribute__((alias ("gomp_loop_ordered_dynamic_start"))); 999 1.1 mrg extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start 1000 1.1 mrg __attribute__((alias ("gomp_loop_ordered_guided_start"))); 1001 1.1 mrg 1002 1.1.1.4 mrg extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start 1003 1.1.1.4 mrg __attribute__((alias ("gomp_loop_doacross_static_start"))); 1004 1.1.1.4 mrg extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start 1005 1.1.1.4 mrg __attribute__((alias ("gomp_loop_doacross_dynamic_start"))); 1006 1.1.1.4 mrg extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start 1007 1.1.1.4 mrg __attribute__((alias ("gomp_loop_doacross_guided_start"))); 1008 1.1.1.4 mrg 1009 1.1 mrg extern __typeof(gomp_loop_static_next) GOMP_loop_static_next 1010 1.1 mrg __attribute__((alias ("gomp_loop_static_next"))); 1011 1.1 mrg extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next 1012 1.1 mrg __attribute__((alias ("gomp_loop_dynamic_next"))); 1013 1.1 mrg extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next 1014 1.1 mrg __attribute__((alias ("gomp_loop_guided_next"))); 1015 1.1.1.4 mrg extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next 1016 1.1.1.4 mrg __attribute__((alias ("gomp_loop_dynamic_next"))); 1017 1.1.1.4 mrg extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next 1018 1.1.1.4 mrg __attribute__((alias ("gomp_loop_guided_next"))); 1019 1.1.1.9 mrg extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next 1020 1.1.1.9 mrg __attribute__((alias ("GOMP_loop_runtime_next"))); 1021 1.1.1.9 mrg extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next 1022 1.1.1.9 mrg __attribute__((alias ("GOMP_loop_runtime_next"))); 1023 1.1 mrg 1024 1.1 mrg extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next 1025 1.1 mrg __attribute__((alias ("gomp_loop_ordered_static_next"))); 1026 1.1 mrg extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next 1027 1.1 mrg __attribute__((alias ("gomp_loop_ordered_dynamic_next"))); 1028 1.1 mrg extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next 1029 1.1 mrg __attribute__((alias ("gomp_loop_ordered_guided_next"))); 1030 1.1 mrg #else 1031 1.1 mrg bool 1032 1.1 mrg GOMP_loop_static_start (long start, long end, long incr, long chunk_size, 1033 1.1 mrg long *istart, long *iend) 1034 1.1 mrg { 1035 1.1 mrg return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend); 1036 1.1 mrg } 1037 1.1 mrg 1038 1.1 mrg bool 1039 1.1 mrg GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size, 1040 1.1 mrg long *istart, long *iend) 1041 1.1 mrg { 1042 1.1 mrg return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); 1043 1.1 mrg } 1044 1.1 mrg 1045 1.1 mrg bool 1046 1.1 mrg GOMP_loop_guided_start (long start, long end, long incr, long chunk_size, 1047 1.1 mrg long *istart, long *iend) 1048 1.1 mrg { 1049 1.1 mrg return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); 1050 1.1 mrg } 1051 1.1 mrg 1052 1.1 mrg bool 1053 1.1.1.4 mrg GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr, 1054 1.1.1.4 mrg long chunk_size, long *istart, 1055 1.1.1.4 mrg long *iend) 1056 1.1.1.4 mrg { 1057 1.1.1.4 mrg return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend); 1058 1.1.1.4 mrg } 1059 1.1.1.4 mrg 1060 1.1.1.4 mrg bool 1061 1.1.1.4 mrg GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr, 1062 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 1063 1.1.1.4 mrg { 1064 1.1.1.4 mrg return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend); 1065 1.1.1.4 mrg } 1066 1.1.1.4 mrg 1067 1.1.1.4 mrg bool 1068 1.1.1.9 mrg GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr, 1069 1.1.1.9 mrg long *istart, long *iend) 1070 1.1.1.9 mrg { 1071 1.1.1.9 mrg return GOMP_loop_runtime_start (start, end, incr, istart, iend); 1072 1.1.1.9 mrg } 1073 1.1.1.9 mrg 1074 1.1.1.9 mrg bool 1075 1.1.1.9 mrg GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr, 1076 1.1.1.9 mrg long *istart, long *iend) 1077 1.1.1.9 mrg { 1078 1.1.1.9 mrg return GOMP_loop_runtime_start (start, end, incr, istart, iend); 1079 1.1.1.9 mrg } 1080 1.1.1.9 mrg 1081 1.1.1.9 mrg bool 1082 1.1 mrg GOMP_loop_ordered_static_start (long start, long end, long incr, 1083 1.1 mrg long chunk_size, long *istart, long *iend) 1084 1.1 mrg { 1085 1.1 mrg return gomp_loop_ordered_static_start (start, end, incr, chunk_size, 1086 1.1 mrg istart, iend); 1087 1.1 mrg } 1088 1.1 mrg 1089 1.1 mrg bool 1090 1.1 mrg GOMP_loop_ordered_dynamic_start (long start, long end, long incr, 1091 1.1 mrg long chunk_size, long *istart, long *iend) 1092 1.1 mrg { 1093 1.1 mrg return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size, 1094 1.1 mrg istart, iend); 1095 1.1 mrg } 1096 1.1 mrg 1097 1.1 mrg bool 1098 1.1 mrg GOMP_loop_ordered_guided_start (long start, long end, long incr, 1099 1.1 mrg long chunk_size, long *istart, long *iend) 1100 1.1 mrg { 1101 1.1 mrg return gomp_loop_ordered_guided_start (start, end, incr, chunk_size, 1102 1.1 mrg istart, iend); 1103 1.1 mrg } 1104 1.1 mrg 1105 1.1 mrg bool 1106 1.1.1.4 mrg GOMP_loop_doacross_static_start (unsigned ncounts, long *counts, 1107 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 1108 1.1.1.4 mrg { 1109 1.1.1.4 mrg return gomp_loop_doacross_static_start (ncounts, counts, chunk_size, 1110 1.1.1.4 mrg istart, iend); 1111 1.1.1.4 mrg } 1112 1.1.1.4 mrg 1113 1.1.1.4 mrg bool 1114 1.1.1.4 mrg GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts, 1115 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 1116 1.1.1.4 mrg { 1117 1.1.1.4 mrg return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size, 1118 1.1.1.4 mrg istart, iend); 1119 1.1.1.4 mrg } 1120 1.1.1.4 mrg 1121 1.1.1.4 mrg bool 1122 1.1.1.4 mrg GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts, 1123 1.1.1.4 mrg long chunk_size, long *istart, long *iend) 1124 1.1.1.4 mrg { 1125 1.1.1.4 mrg return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size, 1126 1.1.1.4 mrg istart, iend); 1127 1.1.1.4 mrg } 1128 1.1.1.4 mrg 1129 1.1.1.4 mrg bool 1130 1.1 mrg GOMP_loop_static_next (long *istart, long *iend) 1131 1.1 mrg { 1132 1.1 mrg return gomp_loop_static_next (istart, iend); 1133 1.1 mrg } 1134 1.1 mrg 1135 1.1 mrg bool 1136 1.1 mrg GOMP_loop_dynamic_next (long *istart, long *iend) 1137 1.1 mrg { 1138 1.1 mrg return gomp_loop_dynamic_next (istart, iend); 1139 1.1 mrg } 1140 1.1 mrg 1141 1.1 mrg bool 1142 1.1 mrg GOMP_loop_guided_next (long *istart, long *iend) 1143 1.1 mrg { 1144 1.1 mrg return gomp_loop_guided_next (istart, iend); 1145 1.1 mrg } 1146 1.1 mrg 1147 1.1 mrg bool 1148 1.1.1.4 mrg GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend) 1149 1.1.1.4 mrg { 1150 1.1.1.4 mrg return gomp_loop_dynamic_next (istart, iend); 1151 1.1.1.4 mrg } 1152 1.1.1.4 mrg 1153 1.1.1.4 mrg bool 1154 1.1.1.4 mrg GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend) 1155 1.1.1.4 mrg { 1156 1.1.1.4 mrg return gomp_loop_guided_next (istart, iend); 1157 1.1.1.4 mrg } 1158 1.1.1.4 mrg 1159 1.1.1.4 mrg bool 1160 1.1.1.9 mrg GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend) 1161 1.1.1.9 mrg { 1162 1.1.1.9 mrg return GOMP_loop_runtime_next (istart, iend); 1163 1.1.1.9 mrg } 1164 1.1.1.9 mrg 1165 1.1.1.9 mrg bool 1166 1.1.1.9 mrg GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend) 1167 1.1.1.9 mrg { 1168 1.1.1.9 mrg return GOMP_loop_runtime_next (istart, iend); 1169 1.1.1.9 mrg } 1170 1.1.1.9 mrg 1171 1.1.1.9 mrg bool 1172 1.1 mrg GOMP_loop_ordered_static_next (long *istart, long *iend) 1173 1.1 mrg { 1174 1.1 mrg return gomp_loop_ordered_static_next (istart, iend); 1175 1.1 mrg } 1176 1.1 mrg 1177 1.1 mrg bool 1178 1.1 mrg GOMP_loop_ordered_dynamic_next (long *istart, long *iend) 1179 1.1 mrg { 1180 1.1 mrg return gomp_loop_ordered_dynamic_next (istart, iend); 1181 1.1 mrg } 1182 1.1 mrg 1183 1.1 mrg bool 1184 1.1 mrg GOMP_loop_ordered_guided_next (long *istart, long *iend) 1185 1.1 mrg { 1186 1.1 mrg return gomp_loop_ordered_guided_next (istart, iend); 1187 1.1 mrg } 1188 1.1 mrg #endif 1189