u_thread.h revision b8e80941
1/**************************************************************************
2 *
3 * Copyright 1999-2006 Brian Paul
4 * Copyright 2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#ifndef U_THREAD_H_
28#define U_THREAD_H_
29
30#include <stdint.h>
31#include <stdbool.h>
32
33#include "c11/threads.h"
34
35#ifdef HAVE_PTHREAD
36#include <signal.h>
37#endif
38
39static inline thrd_t u_thread_create(int (*routine)(void *), void *param)
40{
41   thrd_t thread;
42#ifdef HAVE_PTHREAD
43   sigset_t saved_set, new_set;
44   int ret;
45
46   sigfillset(&new_set);
47   sigdelset(&new_set, SIGSYS);
48   pthread_sigmask(SIG_BLOCK, &new_set, &saved_set);
49   ret = thrd_create( &thread, routine, param );
50   pthread_sigmask(SIG_SETMASK, &saved_set, NULL);
51#else
52   int ret;
53   ret = thrd_create( &thread, routine, param );
54#endif
55   if (ret)
56      return 0;
57
58   return thread;
59}
60
61static inline void u_thread_setname( const char *name )
62{
63#if defined(HAVE_PTHREAD)
64#  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
65      (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) && \
66      defined(__linux__)
67   pthread_setname_np(pthread_self(), name);
68#  endif
69#endif
70   (void)name;
71}
72
73/**
74 * An AMD Zen CPU consists of multiple modules where each module has its own L3
75 * cache. Inter-thread communication such as locks and atomics between modules
76 * is very expensive. It's desirable to pin a group of closely cooperating
77 * threads to one group of cores sharing L3.
78 *
79 * \param thread        thread
80 * \param L3_index      index of the L3 cache
81 * \param cores_per_L3  number of CPU cores shared by one L3
82 */
83static inline void
84util_pin_thread_to_L3(thrd_t thread, unsigned L3_index, unsigned cores_per_L3)
85{
86#if defined(HAVE_PTHREAD_SETAFFINITY)
87   cpu_set_t cpuset;
88
89   CPU_ZERO(&cpuset);
90   for (unsigned i = 0; i < cores_per_L3; i++)
91      CPU_SET(L3_index * cores_per_L3 + i, &cpuset);
92   pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
93#endif
94}
95
96/**
97 * Return the index of L3 that the thread is pinned to. If the thread is
98 * pinned to multiple L3 caches, return -1.
99 *
100 * \param thread        thread
101 * \param cores_per_L3  number of CPU cores shared by one L3
102 */
103static inline int
104util_get_L3_for_pinned_thread(thrd_t thread, unsigned cores_per_L3)
105{
106#if defined(HAVE_PTHREAD_SETAFFINITY)
107   cpu_set_t cpuset;
108
109   if (pthread_getaffinity_np(thread, sizeof(cpuset), &cpuset) == 0) {
110      int L3_index = -1;
111
112      for (unsigned i = 0; i < CPU_SETSIZE; i++) {
113         if (CPU_ISSET(i, &cpuset)) {
114            int x = i / cores_per_L3;
115
116            if (L3_index != x) {
117               if (L3_index == -1)
118                  L3_index = x;
119               else
120                  return -1; /* multiple L3s are set */
121            }
122         }
123      }
124      return L3_index;
125   }
126#endif
127   return -1;
128}
129
130/*
131 * Thread statistics.
132 */
133
134/* Return the time of a thread's CPU time clock. */
135static inline int64_t
136u_thread_get_time_nano(thrd_t thread)
137{
138#if defined(__linux__) && defined(HAVE_PTHREAD)
139   struct timespec ts;
140   clockid_t cid;
141
142   pthread_getcpuclockid(thread, &cid);
143   clock_gettime(cid, &ts);
144   return (int64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
145#else
146   return 0;
147#endif
148}
149
150static inline bool u_thread_is_self(thrd_t thread)
151{
152#if defined(HAVE_PTHREAD)
153#  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
154      (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12))
155   return pthread_equal(pthread_self(), thread);
156#  endif
157#endif
158   return false;
159}
160
161/*
162 * util_barrier
163 */
164
165#if defined(HAVE_PTHREAD) && !defined(__APPLE__)
166
167typedef pthread_barrier_t util_barrier;
168
169static inline void util_barrier_init(util_barrier *barrier, unsigned count)
170{
171   pthread_barrier_init(barrier, NULL, count);
172}
173
174static inline void util_barrier_destroy(util_barrier *barrier)
175{
176   pthread_barrier_destroy(barrier);
177}
178
179static inline void util_barrier_wait(util_barrier *barrier)
180{
181   pthread_barrier_wait(barrier);
182}
183
184
185#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */
186
187typedef struct {
188   unsigned count;
189   unsigned waiters;
190   uint64_t sequence;
191   mtx_t mutex;
192   cnd_t condvar;
193} util_barrier;
194
195static inline void util_barrier_init(util_barrier *barrier, unsigned count)
196{
197   barrier->count = count;
198   barrier->waiters = 0;
199   barrier->sequence = 0;
200   (void) mtx_init(&barrier->mutex, mtx_plain);
201   cnd_init(&barrier->condvar);
202}
203
204static inline void util_barrier_destroy(util_barrier *barrier)
205{
206   assert(barrier->waiters == 0);
207   mtx_destroy(&barrier->mutex);
208   cnd_destroy(&barrier->condvar);
209}
210
211static inline void util_barrier_wait(util_barrier *barrier)
212{
213   mtx_lock(&barrier->mutex);
214
215   assert(barrier->waiters < barrier->count);
216   barrier->waiters++;
217
218   if (barrier->waiters < barrier->count) {
219      uint64_t sequence = barrier->sequence;
220
221      do {
222         cnd_wait(&barrier->condvar, &barrier->mutex);
223      } while (sequence == barrier->sequence);
224   } else {
225      barrier->waiters = 0;
226      barrier->sequence++;
227      cnd_broadcast(&barrier->condvar);
228   }
229
230   mtx_unlock(&barrier->mutex);
231}
232
233#endif
234
235#endif /* U_THREAD_H_ */
236