1 1.1.1.12 mrg /* Copyright (C) 2005-2024 Free Software Foundation, Inc. 2 1.1 mrg Contributed by Richard Henderson <rth (at) redhat.com>. 3 1.1 mrg 4 1.1.1.3 mrg This file is part of the GNU Offloading and Multi Processing Library 5 1.1.1.3 mrg (libgomp). 6 1.1 mrg 7 1.1 mrg Libgomp is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published by 9 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 10 1.1 mrg any later version. 11 1.1 mrg 12 1.1 mrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 1.1 mrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 1.1 mrg more details. 16 1.1 mrg 17 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 18 1.1 mrg permissions described in the GCC Runtime Library Exception, version 19 1.1 mrg 3.1, as published by the Free Software Foundation. 20 1.1 mrg 21 1.1 mrg You should have received a copy of the GNU General Public License and 22 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 23 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 1.1 mrg <http://www.gnu.org/licenses/>. */ 25 1.1 mrg 26 1.1 mrg /* This file contains routines to manage the work-share queue for a team 27 1.1 mrg of threads. */ 28 1.1 mrg 29 1.1 mrg #include "libgomp.h" 30 1.1 mrg #include <stddef.h> 31 1.1 mrg #include <stdlib.h> 32 1.1 mrg #include <string.h> 33 1.1 mrg 34 1.1 mrg 35 1.1 mrg /* Allocate a new work share structure, preferably from current team's 36 1.1 mrg free gomp_work_share cache. */ 37 1.1 mrg 38 1.1 mrg static struct gomp_work_share * 39 1.1 mrg alloc_work_share (struct gomp_team *team) 40 1.1 mrg { 41 1.1 mrg struct gomp_work_share *ws; 42 1.1 mrg unsigned int i; 43 1.1 mrg 44 1.1 mrg /* This is called in a critical section. */ 45 1.1 mrg if (team->work_share_list_alloc != NULL) 46 1.1 mrg { 47 1.1 mrg ws = team->work_share_list_alloc; 48 1.1 mrg team->work_share_list_alloc = ws->next_free; 49 1.1 mrg return ws; 50 1.1 mrg } 51 1.1 mrg 52 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 53 1.1 mrg ws = team->work_share_list_free; 54 1.1 mrg /* We need atomic read from work_share_list_free, 55 1.1 mrg as free_work_share can be called concurrently. */ 56 1.1 mrg __asm ("" : "+r" (ws)); 57 1.1 mrg 58 1.1 mrg if (ws && ws->next_free) 59 1.1 mrg { 60 1.1 mrg struct gomp_work_share *next = ws->next_free; 61 1.1 mrg ws->next_free = NULL; 62 1.1 mrg team->work_share_list_alloc = next->next_free; 63 1.1 mrg return next; 64 1.1 mrg } 65 1.1 mrg #else 66 1.1 mrg gomp_mutex_lock (&team->work_share_list_free_lock); 67 1.1 mrg ws = team->work_share_list_free; 68 1.1 mrg if (ws) 69 1.1 mrg { 70 1.1 mrg team->work_share_list_alloc = ws->next_free; 71 1.1 mrg team->work_share_list_free = NULL; 72 1.1 mrg gomp_mutex_unlock (&team->work_share_list_free_lock); 73 1.1 mrg return ws; 74 1.1 mrg } 75 1.1 mrg gomp_mutex_unlock (&team->work_share_list_free_lock); 76 1.1 mrg #endif 77 1.1 mrg 78 1.1 mrg team->work_share_chunk *= 2; 79 1.1.1.9 mrg /* Allocating gomp_work_share structures aligned is just an 80 1.1.1.9 mrg optimization, don't do it when using the fallback method. */ 81 1.1.1.11 mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES 82 1.1.1.9 mrg ws = gomp_aligned_alloc (__alignof (struct gomp_work_share), 83 1.1.1.9 mrg team->work_share_chunk 84 1.1.1.9 mrg * sizeof (struct gomp_work_share)); 85 1.1.1.9 mrg #else 86 1.1 mrg ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share)); 87 1.1.1.9 mrg #endif 88 1.1 mrg ws->next_alloc = team->work_shares[0].next_alloc; 89 1.1 mrg team->work_shares[0].next_alloc = ws; 90 1.1 mrg team->work_share_list_alloc = &ws[1]; 91 1.1 mrg for (i = 1; i < team->work_share_chunk - 1; i++) 92 1.1 mrg ws[i].next_free = &ws[i + 1]; 93 1.1 mrg ws[i].next_free = NULL; 94 1.1 mrg return ws; 95 1.1 mrg } 96 1.1 mrg 97 1.1 mrg /* Initialize an already allocated struct gomp_work_share. 98 1.1 mrg This shouldn't touch the next_alloc field. */ 99 1.1 mrg 100 1.1 mrg void 101 1.1.1.9 mrg gomp_init_work_share (struct gomp_work_share *ws, size_t ordered, 102 1.1 mrg unsigned nthreads) 103 1.1 mrg { 104 1.1 mrg gomp_mutex_init (&ws->lock); 105 1.1 mrg if (__builtin_expect (ordered, 0)) 106 1.1 mrg { 107 1.1.1.9 mrg #define INLINE_ORDERED_TEAM_IDS_SIZE \ 108 1.1.1.9 mrg (sizeof (struct gomp_work_share) \ 109 1.1.1.9 mrg - offsetof (struct gomp_work_share, inline_ordered_team_ids)) 110 1.1.1.9 mrg 111 1.1.1.9 mrg if (__builtin_expect (ordered != 1, 0)) 112 1.1.1.9 mrg { 113 1.1.1.9 mrg size_t o = nthreads * sizeof (*ws->ordered_team_ids); 114 1.1.1.9 mrg o += __alignof__ (long long) - 1; 115 1.1.1.9 mrg if ((offsetof (struct gomp_work_share, inline_ordered_team_ids) 116 1.1.1.11 mrg & (__alignof__ (long long) - 1)) == 0 117 1.1.1.11 mrg && __alignof__ (struct gomp_work_share) 118 1.1.1.11 mrg >= __alignof__ (long long)) 119 1.1.1.9 mrg o &= ~(__alignof__ (long long) - 1); 120 1.1.1.9 mrg ordered += o - 1; 121 1.1.1.9 mrg } 122 1.1.1.9 mrg else 123 1.1.1.9 mrg ordered = nthreads * sizeof (*ws->ordered_team_ids); 124 1.1.1.9 mrg if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE) 125 1.1.1.10 mrg ws->ordered_team_ids = team_malloc (ordered); 126 1.1 mrg else 127 1.1 mrg ws->ordered_team_ids = ws->inline_ordered_team_ids; 128 1.1.1.9 mrg memset (ws->ordered_team_ids, '\0', ordered); 129 1.1 mrg ws->ordered_num_used = 0; 130 1.1 mrg ws->ordered_owner = -1; 131 1.1 mrg ws->ordered_cur = 0; 132 1.1 mrg } 133 1.1 mrg else 134 1.1.1.9 mrg ws->ordered_team_ids = ws->inline_ordered_team_ids; 135 1.1 mrg gomp_ptrlock_init (&ws->next_ws, NULL); 136 1.1 mrg ws->threads_completed = 0; 137 1.1 mrg } 138 1.1 mrg 139 1.1 mrg /* Do any needed destruction of gomp_work_share fields before it 140 1.1 mrg is put back into free gomp_work_share cache or freed. */ 141 1.1 mrg 142 1.1 mrg void 143 1.1 mrg gomp_fini_work_share (struct gomp_work_share *ws) 144 1.1 mrg { 145 1.1 mrg gomp_mutex_destroy (&ws->lock); 146 1.1 mrg if (ws->ordered_team_ids != ws->inline_ordered_team_ids) 147 1.1.1.10 mrg team_free (ws->ordered_team_ids); 148 1.1 mrg gomp_ptrlock_destroy (&ws->next_ws); 149 1.1 mrg } 150 1.1 mrg 151 1.1 mrg /* Free a work share struct, if not orphaned, put it into current 152 1.1 mrg team's free gomp_work_share cache. */ 153 1.1 mrg 154 1.1 mrg static inline void 155 1.1 mrg free_work_share (struct gomp_team *team, struct gomp_work_share *ws) 156 1.1 mrg { 157 1.1 mrg gomp_fini_work_share (ws); 158 1.1 mrg if (__builtin_expect (team == NULL, 0)) 159 1.1 mrg free (ws); 160 1.1 mrg else 161 1.1 mrg { 162 1.1 mrg struct gomp_work_share *next_ws; 163 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 164 1.1 mrg do 165 1.1 mrg { 166 1.1 mrg next_ws = team->work_share_list_free; 167 1.1 mrg ws->next_free = next_ws; 168 1.1 mrg } 169 1.1 mrg while (!__sync_bool_compare_and_swap (&team->work_share_list_free, 170 1.1 mrg next_ws, ws)); 171 1.1 mrg #else 172 1.1 mrg gomp_mutex_lock (&team->work_share_list_free_lock); 173 1.1 mrg next_ws = team->work_share_list_free; 174 1.1 mrg ws->next_free = next_ws; 175 1.1 mrg team->work_share_list_free = ws; 176 1.1 mrg gomp_mutex_unlock (&team->work_share_list_free_lock); 177 1.1 mrg #endif 178 1.1 mrg } 179 1.1 mrg } 180 1.1 mrg 181 1.1 mrg /* The current thread is ready to begin the next work sharing construct. 182 1.1 mrg In all cases, thr->ts.work_share is updated to point to the new 183 1.1 mrg structure. In all cases the work_share lock is locked. Return true 184 1.1 mrg if this was the first thread to reach this point. */ 185 1.1 mrg 186 1.1 mrg bool 187 1.1.1.9 mrg gomp_work_share_start (size_t ordered) 188 1.1 mrg { 189 1.1 mrg struct gomp_thread *thr = gomp_thread (); 190 1.1 mrg struct gomp_team *team = thr->ts.team; 191 1.1 mrg struct gomp_work_share *ws; 192 1.1 mrg 193 1.1 mrg /* Work sharing constructs can be orphaned. */ 194 1.1 mrg if (team == NULL) 195 1.1 mrg { 196 1.1.1.11 mrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES 197 1.1.1.11 mrg ws = gomp_aligned_alloc (__alignof (struct gomp_work_share), 198 1.1.1.11 mrg sizeof (*ws)); 199 1.1.1.11 mrg #else 200 1.1 mrg ws = gomp_malloc (sizeof (*ws)); 201 1.1.1.11 mrg #endif 202 1.1 mrg gomp_init_work_share (ws, ordered, 1); 203 1.1 mrg thr->ts.work_share = ws; 204 1.1.1.9 mrg return true; 205 1.1 mrg } 206 1.1 mrg 207 1.1 mrg ws = thr->ts.work_share; 208 1.1 mrg thr->ts.last_work_share = ws; 209 1.1 mrg ws = gomp_ptrlock_get (&ws->next_ws); 210 1.1 mrg if (ws == NULL) 211 1.1 mrg { 212 1.1 mrg /* This thread encountered a new ws first. */ 213 1.1 mrg struct gomp_work_share *ws = alloc_work_share (team); 214 1.1 mrg gomp_init_work_share (ws, ordered, team->nthreads); 215 1.1 mrg thr->ts.work_share = ws; 216 1.1 mrg return true; 217 1.1 mrg } 218 1.1 mrg else 219 1.1 mrg { 220 1.1 mrg thr->ts.work_share = ws; 221 1.1 mrg return false; 222 1.1 mrg } 223 1.1 mrg } 224 1.1 mrg 225 1.1 mrg /* The current thread is done with its current work sharing construct. 226 1.1 mrg This version does imply a barrier at the end of the work-share. */ 227 1.1 mrg 228 1.1 mrg void 229 1.1 mrg gomp_work_share_end (void) 230 1.1 mrg { 231 1.1 mrg struct gomp_thread *thr = gomp_thread (); 232 1.1 mrg struct gomp_team *team = thr->ts.team; 233 1.1 mrg gomp_barrier_state_t bstate; 234 1.1 mrg 235 1.1 mrg /* Work sharing constructs can be orphaned. */ 236 1.1 mrg if (team == NULL) 237 1.1 mrg { 238 1.1 mrg free_work_share (NULL, thr->ts.work_share); 239 1.1 mrg thr->ts.work_share = NULL; 240 1.1 mrg return; 241 1.1 mrg } 242 1.1 mrg 243 1.1 mrg bstate = gomp_barrier_wait_start (&team->barrier); 244 1.1 mrg 245 1.1 mrg if (gomp_barrier_last_thread (bstate)) 246 1.1 mrg { 247 1.1 mrg if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) 248 1.1.1.3 mrg { 249 1.1.1.3 mrg team->work_shares_to_free = thr->ts.work_share; 250 1.1.1.3 mrg free_work_share (team, thr->ts.last_work_share); 251 1.1.1.3 mrg } 252 1.1 mrg } 253 1.1 mrg 254 1.1 mrg gomp_team_barrier_wait_end (&team->barrier, bstate); 255 1.1 mrg thr->ts.last_work_share = NULL; 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg /* The current thread is done with its current work sharing construct. 259 1.1.1.3 mrg This version implies a cancellable barrier at the end of the work-share. */ 260 1.1.1.3 mrg 261 1.1.1.3 mrg bool 262 1.1.1.3 mrg gomp_work_share_end_cancel (void) 263 1.1.1.3 mrg { 264 1.1.1.3 mrg struct gomp_thread *thr = gomp_thread (); 265 1.1.1.3 mrg struct gomp_team *team = thr->ts.team; 266 1.1.1.3 mrg gomp_barrier_state_t bstate; 267 1.1.1.3 mrg 268 1.1.1.3 mrg /* Cancellable work sharing constructs cannot be orphaned. */ 269 1.1.1.3 mrg bstate = gomp_barrier_wait_cancel_start (&team->barrier); 270 1.1.1.3 mrg 271 1.1.1.3 mrg if (gomp_barrier_last_thread (bstate)) 272 1.1.1.3 mrg { 273 1.1.1.3 mrg if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) 274 1.1.1.3 mrg { 275 1.1.1.3 mrg team->work_shares_to_free = thr->ts.work_share; 276 1.1.1.3 mrg free_work_share (team, thr->ts.last_work_share); 277 1.1.1.3 mrg } 278 1.1.1.3 mrg } 279 1.1.1.3 mrg thr->ts.last_work_share = NULL; 280 1.1.1.3 mrg 281 1.1.1.3 mrg return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate); 282 1.1.1.3 mrg } 283 1.1.1.3 mrg 284 1.1.1.3 mrg /* The current thread is done with its current work sharing construct. 285 1.1 mrg This version does NOT imply a barrier at the end of the work-share. */ 286 1.1 mrg 287 1.1 mrg void 288 1.1 mrg gomp_work_share_end_nowait (void) 289 1.1 mrg { 290 1.1 mrg struct gomp_thread *thr = gomp_thread (); 291 1.1 mrg struct gomp_team *team = thr->ts.team; 292 1.1 mrg struct gomp_work_share *ws = thr->ts.work_share; 293 1.1 mrg unsigned completed; 294 1.1 mrg 295 1.1 mrg /* Work sharing constructs can be orphaned. */ 296 1.1 mrg if (team == NULL) 297 1.1 mrg { 298 1.1 mrg free_work_share (NULL, ws); 299 1.1 mrg thr->ts.work_share = NULL; 300 1.1 mrg return; 301 1.1 mrg } 302 1.1 mrg 303 1.1 mrg if (__builtin_expect (thr->ts.last_work_share == NULL, 0)) 304 1.1 mrg return; 305 1.1 mrg 306 1.1 mrg #ifdef HAVE_SYNC_BUILTINS 307 1.1 mrg completed = __sync_add_and_fetch (&ws->threads_completed, 1); 308 1.1 mrg #else 309 1.1 mrg gomp_mutex_lock (&ws->lock); 310 1.1 mrg completed = ++ws->threads_completed; 311 1.1 mrg gomp_mutex_unlock (&ws->lock); 312 1.1 mrg #endif 313 1.1 mrg 314 1.1 mrg if (completed == team->nthreads) 315 1.1.1.3 mrg { 316 1.1.1.3 mrg team->work_shares_to_free = thr->ts.work_share; 317 1.1.1.3 mrg free_work_share (team, thr->ts.last_work_share); 318 1.1.1.3 mrg } 319 1.1 mrg thr->ts.last_work_share = NULL; 320 1.1 mrg } 321