1 1.1 mrg // -*- C++ -*- 2 1.1 mrg 3 1.12 mrg // Copyright (C) 2007-2022 Free Software Foundation, Inc. 4 1.1 mrg // 5 1.1 mrg // This file is part of the GNU ISO C++ Library. This library is free 6 1.1 mrg // software; you can redistribute it and/or modify it under the terms 7 1.1 mrg // of the GNU General Public License as published by the Free Software 8 1.1 mrg // Foundation; either version 3, or (at your option) any later 9 1.1 mrg // version. 10 1.1 mrg 11 1.1 mrg // This library is distributed in the hope that it will be useful, but 12 1.1 mrg // WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 1.1 mrg // General Public License for more details. 15 1.1 mrg 16 1.1 mrg // Under Section 7 of GPL version 3, you are granted additional 17 1.1 mrg // permissions described in the GCC Runtime Library Exception, version 18 1.1 mrg // 3.1, as published by the Free Software Foundation. 19 1.1 mrg 20 1.1 mrg // You should have received a copy of the GNU General Public License and 21 1.1 mrg // a copy of the GCC Runtime Library Exception along with this program; 22 1.1 mrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 1.1 mrg // <http://www.gnu.org/licenses/>. 24 1.1 mrg 25 1.1 mrg /** @file parallel/partition.h 26 1.1 mrg * @brief Parallel implementation of std::partition(), 27 1.1 mrg * std::nth_element(), and std::partial_sort(). 28 1.1 mrg * This file is a GNU parallel extension to the Standard C++ Library. 29 1.1 mrg */ 30 1.1 mrg 31 1.1 mrg // Written by Johannes Singler and Felix Putze. 32 1.1 mrg 33 1.1 mrg #ifndef _GLIBCXX_PARALLEL_PARTITION_H 34 1.1 mrg #define _GLIBCXX_PARALLEL_PARTITION_H 1 35 1.1 mrg 36 1.1 mrg #include <parallel/basic_iterator.h> 37 1.1 mrg #include <parallel/sort.h> 38 1.1 mrg #include <parallel/random_number.h> 39 1.1 mrg #include <bits/stl_algo.h> 40 1.1 mrg #include <parallel/parallel.h> 41 1.1 mrg 42 1.1 mrg /** @brief Decide whether to declare certain variables volatile. */ 43 1.1 mrg #define _GLIBCXX_VOLATILE volatile 44 1.1 mrg 45 1.1 mrg namespace __gnu_parallel 46 1.1 mrg { 47 1.1 mrg /** @brief Parallel implementation of std::partition. 48 1.1 mrg * @param __begin Begin iterator of input sequence to split. 49 1.1 mrg * @param __end End iterator of input sequence to split. 50 1.1 mrg * @param __pred Partition predicate, possibly including some kind 51 1.1 mrg * of pivot. 52 1.1 mrg * @param __num_threads Maximum number of threads to use for this task. 53 1.1 mrg * @return Number of elements not fulfilling the predicate. */ 54 1.1 mrg template<typename _RAIter, typename _Predicate> 55 1.1 mrg typename std::iterator_traits<_RAIter>::difference_type 56 1.1 mrg __parallel_partition(_RAIter __begin, _RAIter __end, 57 1.1 mrg _Predicate __pred, _ThreadIndex __num_threads) 58 1.1 mrg { 59 1.1 mrg typedef std::iterator_traits<_RAIter> _TraitsType; 60 1.1 mrg typedef typename _TraitsType::value_type _ValueType; 61 1.1 mrg typedef typename _TraitsType::difference_type _DifferenceType; 62 1.1 mrg 63 1.1 mrg _DifferenceType __n = __end - __begin; 64 1.1 mrg 65 1.1 mrg _GLIBCXX_CALL(__n) 66 1.1 mrg 67 1.1 mrg const _Settings& __s = _Settings::get(); 68 1.1 mrg 69 1.3 mrg // shared 70 1.3 mrg _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1, 71 1.3 mrg __dist = __n, 72 1.3 mrg __leftover_left, __leftover_right, 73 1.3 mrg __leftnew, __rightnew; 74 1.1 mrg 75 1.3 mrg // just 0 or 1, but int to allow atomic operations 76 1.3 mrg int* __reserved_left = 0, * __reserved_right = 0; 77 1.1 mrg 78 1.1 mrg _DifferenceType __chunk_size = __s.partition_chunk_size; 79 1.1 mrg 80 1.1 mrg //at least two chunks per thread 81 1.3 mrg if (__dist >= 2 * __num_threads * __chunk_size) 82 1.1 mrg # pragma omp parallel num_threads(__num_threads) 83 1.1 mrg { 84 1.1 mrg # pragma omp single 85 1.1 mrg { 86 1.1 mrg __num_threads = omp_get_num_threads(); 87 1.3 mrg __reserved_left = new int[__num_threads]; 88 1.3 mrg __reserved_right = new int[__num_threads]; 89 1.1 mrg 90 1.1 mrg if (__s.partition_chunk_share > 0.0) 91 1.1 mrg __chunk_size = std::max<_DifferenceType> 92 1.1 mrg (__s.partition_chunk_size, (double)__n 93 1.1 mrg * __s.partition_chunk_share / (double)__num_threads); 94 1.1 mrg else 95 1.1 mrg __chunk_size = __s.partition_chunk_size; 96 1.1 mrg } 97 1.1 mrg 98 1.3 mrg while (__dist >= 2 * __num_threads * __chunk_size) 99 1.1 mrg { 100 1.1 mrg # pragma omp single 101 1.1 mrg { 102 1.3 mrg _DifferenceType __num_chunks = __dist / __chunk_size; 103 1.1 mrg 104 1.1 mrg for (_ThreadIndex __r = 0; __r < __num_threads; ++__r) 105 1.1 mrg { 106 1.3 mrg __reserved_left [__r] = 0; // false 107 1.3 mrg __reserved_right[__r] = 0; // false 108 1.1 mrg } 109 1.1 mrg __leftover_left = 0; 110 1.1 mrg __leftover_right = 0; 111 1.1 mrg } //implicit barrier 112 1.1 mrg 113 1.1 mrg // Private. 114 1.1 mrg _DifferenceType __thread_left, __thread_left_border, 115 1.1 mrg __thread_right, __thread_right_border; 116 1.3 mrg 117 1.1 mrg __thread_left = __left + 1; 118 1.1 mrg // Just to satisfy the condition below. 119 1.1 mrg __thread_left_border = __thread_left - 1; 120 1.3 mrg 121 1.1 mrg __thread_right = __n - 1; 122 1.3 mrg // Just to satisfy the condition below. 123 1.1 mrg __thread_right_border = __thread_right + 1; 124 1.1 mrg 125 1.1 mrg bool __iam_finished = false; 126 1.1 mrg while (!__iam_finished) 127 1.1 mrg { 128 1.1 mrg if (__thread_left > __thread_left_border) 129 1.1 mrg { 130 1.3 mrg _DifferenceType __former_dist = 131 1.3 mrg __fetch_and_add(&__dist, -__chunk_size); 132 1.3 mrg if (__former_dist < __chunk_size) 133 1.3 mrg { 134 1.3 mrg __fetch_and_add(&__dist, __chunk_size); 135 1.3 mrg __iam_finished = true; 136 1.3 mrg break; 137 1.3 mrg } 138 1.3 mrg else 139 1.3 mrg { 140 1.3 mrg __thread_left = 141 1.3 mrg __fetch_and_add(&__left, __chunk_size); 142 1.3 mrg __thread_left_border = 143 1.3 mrg __thread_left + (__chunk_size - 1); 144 1.3 mrg } 145 1.1 mrg } 146 1.1 mrg 147 1.1 mrg if (__thread_right < __thread_right_border) 148 1.1 mrg { 149 1.3 mrg _DifferenceType __former_dist = 150 1.3 mrg __fetch_and_add(&__dist, -__chunk_size); 151 1.3 mrg if (__former_dist < __chunk_size) 152 1.3 mrg { 153 1.3 mrg __fetch_and_add(&__dist, __chunk_size); 154 1.3 mrg __iam_finished = true; 155 1.3 mrg break; 156 1.3 mrg } 157 1.3 mrg else 158 1.3 mrg { 159 1.3 mrg __thread_right = 160 1.3 mrg __fetch_and_add(&__right, -__chunk_size); 161 1.3 mrg __thread_right_border = 162 1.3 mrg __thread_right - (__chunk_size - 1); 163 1.3 mrg } 164 1.1 mrg } 165 1.1 mrg 166 1.1 mrg // Swap as usual. 167 1.1 mrg while (__thread_left < __thread_right) 168 1.1 mrg { 169 1.1 mrg while (__pred(__begin[__thread_left]) 170 1.1 mrg && __thread_left <= __thread_left_border) 171 1.1 mrg ++__thread_left; 172 1.1 mrg while (!__pred(__begin[__thread_right]) 173 1.1 mrg && __thread_right >= __thread_right_border) 174 1.1 mrg --__thread_right; 175 1.1 mrg 176 1.1 mrg if (__thread_left > __thread_left_border 177 1.1 mrg || __thread_right < __thread_right_border) 178 1.1 mrg // Fetch new chunk(__s). 179 1.1 mrg break; 180 1.1 mrg 181 1.1 mrg std::iter_swap(__begin + __thread_left, 182 1.1 mrg __begin + __thread_right); 183 1.1 mrg ++__thread_left; 184 1.1 mrg --__thread_right; 185 1.1 mrg } 186 1.1 mrg } 187 1.1 mrg 188 1.1 mrg // Now swap the leftover chunks to the right places. 189 1.1 mrg if (__thread_left <= __thread_left_border) 190 1.1 mrg # pragma omp atomic 191 1.1 mrg ++__leftover_left; 192 1.1 mrg if (__thread_right >= __thread_right_border) 193 1.1 mrg # pragma omp atomic 194 1.1 mrg ++__leftover_right; 195 1.1 mrg 196 1.1 mrg # pragma omp barrier 197 1.1 mrg 198 1.3 mrg _DifferenceType 199 1.3 mrg __leftold = __left, 200 1.3 mrg __leftnew = __left - __leftover_left * __chunk_size, 201 1.3 mrg __rightold = __right, 202 1.3 mrg __rightnew = __right + __leftover_right * __chunk_size; 203 1.1 mrg 204 1.1 mrg // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew 205 1.1 mrg if (__thread_left <= __thread_left_border 206 1.1 mrg && __thread_left_border >= __leftnew) 207 1.1 mrg { 208 1.1 mrg // Chunk already in place, reserve spot. 209 1.1 mrg __reserved_left[(__left - (__thread_left_border + 1)) 210 1.3 mrg / __chunk_size] = 1; 211 1.1 mrg } 212 1.1 mrg 213 1.1 mrg // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew 214 1.1 mrg if (__thread_right >= __thread_right_border 215 1.1 mrg && __thread_right_border <= __rightnew) 216 1.1 mrg { 217 1.1 mrg // Chunk already in place, reserve spot. 218 1.1 mrg __reserved_right[((__thread_right_border - 1) - __right) 219 1.3 mrg / __chunk_size] = 1; 220 1.1 mrg } 221 1.1 mrg 222 1.1 mrg # pragma omp barrier 223 1.1 mrg 224 1.1 mrg if (__thread_left <= __thread_left_border 225 1.1 mrg && __thread_left_border < __leftnew) 226 1.1 mrg { 227 1.1 mrg // Find spot and swap. 228 1.1 mrg _DifferenceType __swapstart = -1; 229 1.3 mrg for (int __r = 0; __r < __leftover_left; ++__r) 230 1.3 mrg if (__reserved_left[__r] == 0 231 1.3 mrg && __compare_and_swap(&(__reserved_left[__r]), 0, 1)) 232 1.3 mrg { 233 1.3 mrg __swapstart = __leftold - (__r + 1) * __chunk_size; 234 1.3 mrg break; 235 1.3 mrg } 236 1.1 mrg 237 1.8 mrg #if _GLIBCXX_PARALLEL_ASSERTIONS 238 1.1 mrg _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); 239 1.1 mrg #endif 240 1.1 mrg 241 1.1 mrg std::swap_ranges(__begin + __thread_left_border 242 1.1 mrg - (__chunk_size - 1), 243 1.1 mrg __begin + __thread_left_border + 1, 244 1.1 mrg __begin + __swapstart); 245 1.1 mrg } 246 1.1 mrg 247 1.1 mrg if (__thread_right >= __thread_right_border 248 1.1 mrg && __thread_right_border > __rightnew) 249 1.1 mrg { 250 1.1 mrg // Find spot and swap 251 1.1 mrg _DifferenceType __swapstart = -1; 252 1.3 mrg for (int __r = 0; __r < __leftover_right; ++__r) 253 1.3 mrg if (__reserved_right[__r] == 0 254 1.3 mrg && __compare_and_swap(&(__reserved_right[__r]), 0, 1)) 255 1.3 mrg { 256 1.3 mrg __swapstart = __rightold + __r * __chunk_size + 1; 257 1.3 mrg break; 258 1.3 mrg } 259 1.1 mrg 260 1.8 mrg #if _GLIBCXX_PARALLEL_ASSERTIONS 261 1.1 mrg _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); 262 1.1 mrg #endif 263 1.1 mrg 264 1.1 mrg std::swap_ranges(__begin + __thread_right_border, 265 1.1 mrg __begin + __thread_right_border 266 1.1 mrg + __chunk_size, __begin + __swapstart); 267 1.1 mrg } 268 1.8 mrg #if _GLIBCXX_PARALLEL_ASSERTIONS 269 1.1 mrg # pragma omp barrier 270 1.1 mrg 271 1.1 mrg # pragma omp single 272 1.1 mrg { 273 1.1 mrg for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) 274 1.3 mrg _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r] == 1); 275 1.1 mrg for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) 276 1.3 mrg _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r] == 1); 277 1.1 mrg } 278 1.1 mrg #endif 279 1.1 mrg 280 1.1 mrg __left = __leftnew; 281 1.1 mrg __right = __rightnew; 282 1.3 mrg __dist = __right - __left + 1; 283 1.1 mrg } 284 1.1 mrg 285 1.1 mrg # pragma omp flush(__left, __right) 286 1.1 mrg } // end "recursion" //parallel 287 1.1 mrg 288 1.1 mrg _DifferenceType __final_left = __left, __final_right = __right; 289 1.1 mrg 290 1.1 mrg while (__final_left < __final_right) 291 1.1 mrg { 292 1.1 mrg // Go right until key is geq than pivot. 293 1.1 mrg while (__pred(__begin[__final_left]) 294 1.1 mrg && __final_left < __final_right) 295 1.1 mrg ++__final_left; 296 1.1 mrg 297 1.1 mrg // Go left until key is less than pivot. 298 1.1 mrg while (!__pred(__begin[__final_right]) 299 1.1 mrg && __final_left < __final_right) 300 1.1 mrg --__final_right; 301 1.1 mrg 302 1.1 mrg if (__final_left == __final_right) 303 1.1 mrg break; 304 1.1 mrg std::iter_swap(__begin + __final_left, __begin + __final_right); 305 1.1 mrg ++__final_left; 306 1.1 mrg --__final_right; 307 1.1 mrg } 308 1.1 mrg 309 1.1 mrg // All elements on the left side are < piv, all elements on the 310 1.1 mrg // right are >= piv 311 1.1 mrg delete[] __reserved_left; 312 1.1 mrg delete[] __reserved_right; 313 1.1 mrg 314 1.1 mrg // Element "between" __final_left and __final_right might not have 315 1.1 mrg // been regarded yet 316 1.1 mrg if (__final_left < __n && !__pred(__begin[__final_left])) 317 1.1 mrg // Really swapped. 318 1.1 mrg return __final_left; 319 1.1 mrg else 320 1.1 mrg return __final_left + 1; 321 1.1 mrg } 322 1.1 mrg 323 1.1 mrg /** 324 1.1 mrg * @brief Parallel implementation of std::nth_element(). 325 1.1 mrg * @param __begin Begin iterator of input sequence. 326 1.1 mrg * @param __nth _Iterator of element that must be in position afterwards. 327 1.1 mrg * @param __end End iterator of input sequence. 328 1.1 mrg * @param __comp Comparator. 329 1.1 mrg */ 330 1.1 mrg template<typename _RAIter, typename _Compare> 331 1.1 mrg void 332 1.1 mrg __parallel_nth_element(_RAIter __begin, _RAIter __nth, 333 1.1 mrg _RAIter __end, _Compare __comp) 334 1.1 mrg { 335 1.1 mrg typedef std::iterator_traits<_RAIter> _TraitsType; 336 1.1 mrg typedef typename _TraitsType::value_type _ValueType; 337 1.1 mrg typedef typename _TraitsType::difference_type _DifferenceType; 338 1.1 mrg 339 1.1 mrg _GLIBCXX_CALL(__end - __begin) 340 1.1 mrg 341 1.1 mrg _RAIter __split; 342 1.1 mrg _RandomNumber __rng; 343 1.1 mrg 344 1.1 mrg const _Settings& __s = _Settings::get(); 345 1.1 mrg _DifferenceType __minimum_length = std::max<_DifferenceType>(2, 346 1.1 mrg std::max(__s.nth_element_minimal_n, __s.partition_minimal_n)); 347 1.1 mrg 348 1.1 mrg // Break if input range to small. 349 1.1 mrg while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length) 350 1.1 mrg { 351 1.1 mrg _DifferenceType __n = __end - __begin; 352 1.1 mrg 353 1.1 mrg _RAIter __pivot_pos = __begin + __rng(__n); 354 1.1 mrg 355 1.1 mrg // Swap __pivot_pos value to end. 356 1.1 mrg if (__pivot_pos != (__end - 1)) 357 1.1 mrg std::iter_swap(__pivot_pos, __end - 1); 358 1.1 mrg __pivot_pos = __end - 1; 359 1.1 mrg 360 1.1 mrg // _Compare must have first_value_type, second_value_type, 361 1.1 mrg // result_type 362 1.1 mrg // _Compare == 363 1.1 mrg // __gnu_parallel::_Lexicographic<S, int, 364 1.1 mrg // __gnu_parallel::_Less<S, S> > 365 1.1 mrg // __pivot_pos == std::pair<S, int>* 366 1.1 mrg __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> 367 1.1 mrg __pred(__comp, *__pivot_pos); 368 1.1 mrg 369 1.1 mrg // Divide, leave pivot unchanged in last place. 370 1.1 mrg _RAIter __split_pos1, __split_pos2; 371 1.1 mrg __split_pos1 = __begin + __parallel_partition(__begin, __end - 1, 372 1.1 mrg __pred, 373 1.1 mrg __get_max_threads()); 374 1.1 mrg 375 1.1 mrg // Left side: < __pivot_pos; __right side: >= __pivot_pos 376 1.1 mrg 377 1.1 mrg // Swap pivot back to middle. 378 1.1 mrg if (__split_pos1 != __pivot_pos) 379 1.1 mrg std::iter_swap(__split_pos1, __pivot_pos); 380 1.1 mrg __pivot_pos = __split_pos1; 381 1.1 mrg 382 1.1 mrg // In case all elements are equal, __split_pos1 == 0 383 1.1 mrg if ((__split_pos1 + 1 - __begin) < (__n >> 7) 384 1.1 mrg || (__end - __split_pos1) < (__n >> 7)) 385 1.1 mrg { 386 1.1 mrg // Very unequal split, one part smaller than one 128th 387 1.1 mrg // elements not strictly larger than the pivot. 388 1.1 mrg __gnu_parallel::__unary_negate<__gnu_parallel:: 389 1.1 mrg __binder1st<_Compare, _ValueType, 390 1.1 mrg _ValueType, bool>, _ValueType> 391 1.1 mrg __pred(__gnu_parallel::__binder1st<_Compare, _ValueType, 392 1.1 mrg _ValueType, bool>(__comp, *__pivot_pos)); 393 1.1 mrg 394 1.1 mrg // Find other end of pivot-equal range. 395 1.1 mrg __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, 396 1.1 mrg __end, __pred); 397 1.1 mrg } 398 1.1 mrg else 399 1.1 mrg // Only skip the pivot. 400 1.1 mrg __split_pos2 = __split_pos1 + 1; 401 1.1 mrg 402 1.1 mrg // Compare iterators. 403 1.1 mrg if (__split_pos2 <= __nth) 404 1.1 mrg __begin = __split_pos2; 405 1.1 mrg else if (__nth < __split_pos1) 406 1.1 mrg __end = __split_pos1; 407 1.1 mrg else 408 1.1 mrg break; 409 1.1 mrg } 410 1.1 mrg 411 1.1 mrg // Only at most _Settings::partition_minimal_n __elements __left. 412 1.1 mrg __gnu_sequential::nth_element(__begin, __nth, __end, __comp); 413 1.1 mrg } 414 1.1 mrg 415 1.1 mrg /** @brief Parallel implementation of std::partial_sort(). 416 1.1 mrg * @param __begin Begin iterator of input sequence. 417 1.1 mrg * @param __middle Sort until this position. 418 1.1 mrg * @param __end End iterator of input sequence. 419 1.1 mrg * @param __comp Comparator. */ 420 1.1 mrg template<typename _RAIter, typename _Compare> 421 1.1 mrg void 422 1.1 mrg __parallel_partial_sort(_RAIter __begin, 423 1.1 mrg _RAIter __middle, 424 1.1 mrg _RAIter __end, _Compare __comp) 425 1.1 mrg { 426 1.1 mrg __parallel_nth_element(__begin, __middle, __end, __comp); 427 1.1 mrg std::sort(__begin, __middle, __comp); 428 1.1 mrg } 429 1.1 mrg 430 1.1 mrg } //namespace __gnu_parallel 431 1.1 mrg 432 1.1 mrg #undef _GLIBCXX_VOLATILE 433 1.1 mrg 434 1.1 mrg #endif /* _GLIBCXX_PARALLEL_PARTITION_H */ 435