Home | History | Annotate | Line # | Download | only in Support
      1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file declares helper functions for running LLVM in a multi-threaded
     10 // environment.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_SUPPORT_THREADING_H
     15 #define LLVM_SUPPORT_THREADING_H
     16 
     17 #include "llvm/ADT/BitVector.h"
     18 #include "llvm/ADT/FunctionExtras.h"
     19 #include "llvm/ADT/SmallVector.h"
     20 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
     21 #include "llvm/Support/Compiler.h"
     22 #include <ciso646> // So we can check the C++ standard lib macros.
     23 #include <functional>
     24 
     25 #if defined(_MSC_VER)
     26 // MSVC's call_once implementation worked since VS 2015, which is the minimum
     27 // supported version as of this writing.
     28 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
     29 #elif defined(LLVM_ON_UNIX) &&                                                 \
     30     (defined(_LIBCPP_VERSION) ||                                               \
     31      !(defined(__NetBSD__) || defined(__OpenBSD__) ||                          \
     32        (defined(__ppc__) || defined(__PPC__))))
     33 // std::call_once from libc++ is used on all Unix platforms. Other
     34 // implementations like libstdc++ are known to have problems on NetBSD,
     35 // OpenBSD and PowerPC.
     36 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
     37 #elif defined(LLVM_ON_UNIX) &&                                                 \
     38     ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
     39 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
     40 #else
     41 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
     42 #endif
     43 
     44 #if LLVM_THREADING_USE_STD_CALL_ONCE
     45 #include <mutex>
     46 #else
     47 #include "llvm/Support/Atomic.h"
     48 #endif
     49 
     50 namespace llvm {
     51 class Twine;
     52 
     53 /// Returns true if LLVM is compiled with support for multi-threading, and
     54 /// false otherwise.
     55 bool llvm_is_multithreaded();
     56 
     57 /// Execute the given \p UserFn on a separate thread, passing it the provided \p
     58 /// UserData and waits for thread completion.
     59 ///
     60 /// This function does not guarantee that the code will actually be executed
     61 /// on a separate thread or honoring the requested stack size, but tries to do
     62 /// so where system support is available.
     63 ///
     64 /// \param UserFn - The callback to execute.
     65 /// \param UserData - An argument to pass to the callback function.
     66 /// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
     67 /// (or None for default)
     68 void llvm_execute_on_thread(
     69     void (*UserFn)(void *), void *UserData,
     70     llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
     71 
     72 /// Schedule the given \p Func for execution on a separate thread, then return
     73 /// to the caller immediately. Roughly equivalent to
     74 /// `std::thread(Func).detach()`, except it allows requesting a specific stack
     75 /// size, if supported for the platform.
     76 ///
     77 /// This function would report a fatal error if it can't execute the code
     78 /// on a separate thread.
     79 ///
     80 /// \param Func - The callback to execute.
     81 /// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
     82 /// (or None for default)
     83 void llvm_execute_on_thread_async(
     84     llvm::unique_function<void()> Func,
     85     llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
     86 
     87 #if LLVM_THREADING_USE_STD_CALL_ONCE
     88 
     89   typedef std::once_flag once_flag;
     90 
     91 #else
     92 
     93   enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
     94 
     95   /// The llvm::once_flag structure
     96   ///
     97   /// This type is modeled after std::once_flag to use with llvm::call_once.
     98   /// This structure must be used as an opaque object. It is a struct to force
     99   /// autoinitialization and behave like std::once_flag.
    100   struct once_flag {
    101     volatile sys::cas_flag status = Uninitialized;
    102   };
    103 
    104 #endif
    105 
    106   /// Execute the function specified as a parameter once.
    107   ///
    108   /// Typical usage:
    109   /// \code
    110   ///   void foo() {...};
    111   ///   ...
    112   ///   static once_flag flag;
    113   ///   call_once(flag, foo);
    114   /// \endcode
    115   ///
    116   /// \param flag Flag used for tracking whether or not this has run.
    117   /// \param F Function to call once.
    118   template <typename Function, typename... Args>
    119   void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
    120 #if LLVM_THREADING_USE_STD_CALL_ONCE
    121     std::call_once(flag, std::forward<Function>(F),
    122                    std::forward<Args>(ArgList)...);
    123 #else
    124     // For other platforms we use a generic (if brittle) version based on our
    125     // atomics.
    126     sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
    127     if (old_val == Uninitialized) {
    128       std::forward<Function>(F)(std::forward<Args>(ArgList)...);
    129       sys::MemoryFence();
    130       TsanIgnoreWritesBegin();
    131       TsanHappensBefore(&flag.status);
    132       flag.status = Done;
    133       TsanIgnoreWritesEnd();
    134     } else {
    135       // Wait until any thread doing the call has finished.
    136       sys::cas_flag tmp = flag.status;
    137       sys::MemoryFence();
    138       while (tmp != Done) {
    139         tmp = flag.status;
    140         sys::MemoryFence();
    141       }
    142     }
    143     TsanHappensAfter(&flag.status);
    144 #endif
    145   }
    146 
    147   /// This tells how a thread pool will be used
    148   class ThreadPoolStrategy {
    149   public:
    150     // The default value (0) means all available threads should be used,
    151     // taking the affinity mask into account. If set, this value only represents
    152     // a suggested high bound, the runtime might choose a lower value (not
    153     // higher).
    154     unsigned ThreadsRequested = 0;
    155 
    156     // If SMT is active, use hyper threads. If false, there will be only one
    157     // std::thread per core.
    158     bool UseHyperThreads = true;
    159 
    160     // If set, will constrain 'ThreadsRequested' to the number of hardware
    161     // threads, or hardware cores.
    162     bool Limit = false;
    163 
    164     /// Retrieves the max available threads for the current strategy. This
    165     /// accounts for affinity masks and takes advantage of all CPU sockets.
    166     unsigned compute_thread_count() const;
    167 
    168     /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
    169     /// multi-socket system, this ensures threads are assigned to all CPU
    170     /// sockets. \p ThreadPoolNum represents a number bounded by [0,
    171     /// compute_thread_count()).
    172     void apply_thread_strategy(unsigned ThreadPoolNum) const;
    173 
    174     /// Finds the CPU socket where a thread should go. Returns 'None' if the
    175     /// thread shall remain on the actual CPU socket.
    176     Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
    177   };
    178 
    179   /// Build a strategy from a number of threads as a string provided in \p Num.
    180   /// When Num is above the max number of threads specified by the \p Default
    181   /// strategy, we attempt to equally allocate the threads on all CPU sockets.
    182   /// "0" or an empty string will return the \p Default strategy.
    183   /// "all" for using all hardware threads.
    184   Optional<ThreadPoolStrategy>
    185   get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
    186 
    187   /// Returns a thread strategy for tasks requiring significant memory or other
    188   /// resources. To be used for workloads where hardware_concurrency() proves to
    189   /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
    190   /// based on physical cores, if available for the host system, otherwise falls
    191   /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
    192   /// LLVM_ENABLE_THREADS = OFF.
    193   inline ThreadPoolStrategy
    194   heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
    195     ThreadPoolStrategy S;
    196     S.UseHyperThreads = false;
    197     S.ThreadsRequested = ThreadCount;
    198     return S;
    199   }
    200 
    201   /// Like heavyweight_hardware_concurrency() above, but builds a strategy
    202   /// based on the rules described for get_threadpool_strategy().
    203   /// If \p Num is invalid, returns a default strategy where one thread per
    204   /// hardware core is used.
    205   inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
    206     Optional<ThreadPoolStrategy> S =
    207         get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
    208     if (S)
    209       return *S;
    210     return heavyweight_hardware_concurrency();
    211   }
    212 
    213   /// Returns a default thread strategy where all available hardware resources
    214   /// are to be used, except for those initially excluded by an affinity mask.
    215   /// This function takes affinity into consideration. Returns 1 when LLVM is
    216   /// configured with LLVM_ENABLE_THREADS=OFF.
    217   inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
    218     ThreadPoolStrategy S;
    219     S.ThreadsRequested = ThreadCount;
    220     return S;
    221   }
    222 
    223   /// Returns an optimal thread strategy to execute specified amount of tasks.
    224   /// This strategy should prevent us from creating too many threads if we
    225   /// occasionaly have an unexpectedly small amount of tasks.
    226   inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
    227     ThreadPoolStrategy S;
    228     S.Limit = true;
    229     S.ThreadsRequested = TaskCount;
    230     return S;
    231   }
    232 
    233   /// Return the current thread id, as used in various OS system calls.
    234   /// Note that not all platforms guarantee that the value returned will be
    235   /// unique across the entire system, so portable code should not assume
    236   /// this.
    237   uint64_t get_threadid();
    238 
    239   /// Get the maximum length of a thread name on this platform.
    240   /// A value of 0 means there is no limit.
    241   uint32_t get_max_thread_name_length();
    242 
    243   /// Set the name of the current thread.  Setting a thread's name can
    244   /// be helpful for enabling useful diagnostics under a debugger or when
    245   /// logging.  The level of support for setting a thread's name varies
    246   /// wildly across operating systems, and we only make a best effort to
    247   /// perform the operation on supported platforms.  No indication of success
    248   /// or failure is returned.
    249   void set_thread_name(const Twine &Name);
    250 
    251   /// Get the name of the current thread.  The level of support for
    252   /// getting a thread's name varies wildly across operating systems, and it
    253   /// is not even guaranteed that if you can successfully set a thread's name
    254   /// that you can later get it back.  This function is intended for diagnostic
    255   /// purposes, and as with setting a thread's name no indication of whether
    256   /// the operation succeeded or failed is returned.
    257   void get_thread_name(SmallVectorImpl<char> &Name);
    258 
    259   /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
    260   /// group, the calling thread can be executed. On Windows, threads cannot
    261   /// cross CPU sockets boundaries.
    262   llvm::BitVector get_thread_affinity_mask();
    263 
    264   /// Returns how many physical CPUs or NUMA groups the system has.
    265   unsigned get_cpus();
    266 
    267   enum class ThreadPriority {
    268     Background = 0,
    269     Default = 1,
    270   };
    271   /// If priority is Background tries to lower current threads priority such
    272   /// that it does not affect foreground tasks significantly. Can be used for
    273   /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
    274   /// this task.
    275   /// If the priority is default tries to restore current threads priority to
    276   /// default scheduling priority.
    277   enum class SetThreadPriorityResult { FAILURE, SUCCESS };
    278   SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
    279 }
    280 
    281 #endif
    282