tsan_clock.cpp revision 1.1 1 1.1 mrg //===-- tsan_clock.cpp ----------------------------------------------------===//
2 1.1 mrg //
3 1.1 mrg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 1.1 mrg // See https://llvm.org/LICENSE.txt for license information.
5 1.1 mrg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 1.1 mrg //
7 1.1 mrg //===----------------------------------------------------------------------===//
8 1.1 mrg //
9 1.1 mrg // This file is a part of ThreadSanitizer (TSan), a race detector.
10 1.1 mrg //
11 1.1 mrg //===----------------------------------------------------------------------===//
12 1.1 mrg #include "tsan_clock.h"
13 1.1 mrg #include "tsan_rtl.h"
14 1.1 mrg #include "sanitizer_common/sanitizer_placement_new.h"
15 1.1 mrg
16 1.1 mrg // SyncClock and ThreadClock implement vector clocks for sync variables
17 1.1 mrg // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
18 1.1 mrg // ThreadClock contains fixed-size vector clock for maximum number of threads.
19 1.1 mrg // SyncClock contains growable vector clock for currently necessary number of
20 1.1 mrg // threads.
21 1.1 mrg // Together they implement very simple model of operations, namely:
22 1.1 mrg //
23 1.1 mrg // void ThreadClock::acquire(const SyncClock *src) {
24 1.1 mrg // for (int i = 0; i < kMaxThreads; i++)
25 1.1 mrg // clock[i] = max(clock[i], src->clock[i]);
26 1.1 mrg // }
27 1.1 mrg //
28 1.1 mrg // void ThreadClock::release(SyncClock *dst) const {
29 1.1 mrg // for (int i = 0; i < kMaxThreads; i++)
30 1.1 mrg // dst->clock[i] = max(dst->clock[i], clock[i]);
31 1.1 mrg // }
32 1.1 mrg //
33 1.1 mrg // void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
34 1.1 mrg // for (int i = 0; i < kMaxThreads; i++) {
35 1.1 mrg // tmp = clock[i];
36 1.1 mrg // clock[i] = max(clock[i], sc->clock[i]);
37 1.1 mrg // sc->clock[i] = tmp;
38 1.1 mrg // }
39 1.1 mrg // }
40 1.1 mrg //
41 1.1 mrg // void ThreadClock::ReleaseStore(SyncClock *dst) const {
42 1.1 mrg // for (int i = 0; i < kMaxThreads; i++)
43 1.1 mrg // dst->clock[i] = clock[i];
44 1.1 mrg // }
45 1.1 mrg //
46 1.1 mrg // void ThreadClock::acq_rel(SyncClock *dst) {
47 1.1 mrg // acquire(dst);
48 1.1 mrg // release(dst);
49 1.1 mrg // }
50 1.1 mrg //
51 1.1 mrg // Conformance to this model is extensively verified in tsan_clock_test.cpp.
52 1.1 mrg // However, the implementation is significantly more complex. The complexity
53 1.1 mrg // allows to implement important classes of use cases in O(1) instead of O(N).
54 1.1 mrg //
55 1.1 mrg // The use cases are:
56 1.1 mrg // 1. Singleton/once atomic that has a single release-store operation followed
57 1.1 mrg // by zillions of acquire-loads (the acquire-load is O(1)).
58 1.1 mrg // 2. Thread-local mutex (both lock and unlock can be O(1)).
59 1.1 mrg // 3. Leaf mutex (unlock is O(1)).
60 1.1 mrg // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
61 1.1 mrg // 5. An atomic with a single writer (writes can be O(1)).
62 1.1 mrg // The implementation dynamically adopts to workload. So if an atomic is in
63 1.1 mrg // read-only phase, these reads will be O(1); if it later switches to read/write
64 1.1 mrg // phase, the implementation will correctly handle that by switching to O(N).
65 1.1 mrg //
66 1.1 mrg // Thread-safety note: all const operations on SyncClock's are conducted under
67 1.1 mrg // a shared lock; all non-const operations on SyncClock's are conducted under
68 1.1 mrg // an exclusive lock; ThreadClock's are private to respective threads and so
69 1.1 mrg // do not need any protection.
70 1.1 mrg //
71 1.1 mrg // Description of SyncClock state:
72 1.1 mrg // clk_ - variable size vector clock, low kClkBits hold timestamp,
73 1.1 mrg // the remaining bits hold "acquired" flag (the actual value is thread's
74 1.1 mrg // reused counter);
75 1.1 mrg // if acquired == thr->reused_, then the respective thread has already
76 1.1 mrg // acquired this clock (except possibly for dirty elements).
77 1.1 mrg // dirty_ - holds up to two indices in the vector clock that other threads
78 1.1 mrg // need to acquire regardless of "acquired" flag value;
79 1.1 mrg // release_store_tid_ - denotes that the clock state is a result of
80 1.1 mrg // release-store operation by the thread with release_store_tid_ index.
81 1.1 mrg // release_store_reused_ - reuse count of release_store_tid_.
82 1.1 mrg
83 1.1 mrg namespace __tsan {
84 1.1 mrg
85 1.1 mrg static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
86 1.1 mrg return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
87 1.1 mrg }
88 1.1 mrg
89 1.1 mrg // Drop reference to the first level block idx.
90 1.1 mrg static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
91 1.1 mrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
92 1.1 mrg atomic_uint32_t *ref = ref_ptr(cb);
93 1.1 mrg u32 v = atomic_load(ref, memory_order_acquire);
94 1.1 mrg for (;;) {
95 1.1 mrg CHECK_GT(v, 0);
96 1.1 mrg if (v == 1)
97 1.1 mrg break;
98 1.1 mrg if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
99 1.1 mrg return;
100 1.1 mrg }
101 1.1 mrg // First level block owns second level blocks, so them as well.
102 1.1 mrg for (uptr i = 0; i < blocks; i++)
103 1.1 mrg ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
104 1.1 mrg ctx->clock_alloc.Free(c, idx);
105 1.1 mrg }
106 1.1 mrg
107 1.1 mrg ThreadClock::ThreadClock(unsigned tid, unsigned reused)
108 1.1 mrg : tid_(tid)
109 1.1 mrg , reused_(reused + 1) // 0 has special meaning
110 1.1 mrg , last_acquire_()
111 1.1 mrg , global_acquire_()
112 1.1 mrg , cached_idx_()
113 1.1 mrg , cached_size_()
114 1.1 mrg , cached_blocks_() {
115 1.1 mrg CHECK_LT(tid, kMaxTidInClock);
116 1.1 mrg CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
117 1.1 mrg nclk_ = tid_ + 1;
118 1.1 mrg internal_memset(clk_, 0, sizeof(clk_));
119 1.1 mrg }
120 1.1 mrg
121 1.1 mrg void ThreadClock::ResetCached(ClockCache *c) {
122 1.1 mrg if (cached_idx_) {
123 1.1 mrg UnrefClockBlock(c, cached_idx_, cached_blocks_);
124 1.1 mrg cached_idx_ = 0;
125 1.1 mrg cached_size_ = 0;
126 1.1 mrg cached_blocks_ = 0;
127 1.1 mrg }
128 1.1 mrg }
129 1.1 mrg
130 1.1 mrg void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
131 1.1 mrg DCHECK_LE(nclk_, kMaxTid);
132 1.1 mrg DCHECK_LE(src->size_, kMaxTid);
133 1.1 mrg
134 1.1 mrg // Check if it's empty -> no need to do anything.
135 1.1 mrg const uptr nclk = src->size_;
136 1.1 mrg if (nclk == 0)
137 1.1 mrg return;
138 1.1 mrg
139 1.1 mrg bool acquired = false;
140 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++) {
141 1.1 mrg SyncClock::Dirty dirty = src->dirty_[i];
142 1.1 mrg unsigned tid = dirty.tid();
143 1.1 mrg if (tid != kInvalidTid) {
144 1.1 mrg if (clk_[tid] < dirty.epoch) {
145 1.1 mrg clk_[tid] = dirty.epoch;
146 1.1 mrg acquired = true;
147 1.1 mrg }
148 1.1 mrg }
149 1.1 mrg }
150 1.1 mrg
151 1.1 mrg // Check if we've already acquired src after the last release operation on src
152 1.1 mrg if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
153 1.1 mrg // O(N) acquire.
154 1.1 mrg nclk_ = max(nclk_, nclk);
155 1.1 mrg u64 *dst_pos = &clk_[0];
156 1.1 mrg for (ClockElem &src_elem : *src) {
157 1.1 mrg u64 epoch = src_elem.epoch;
158 1.1 mrg if (*dst_pos < epoch) {
159 1.1 mrg *dst_pos = epoch;
160 1.1 mrg acquired = true;
161 1.1 mrg }
162 1.1 mrg dst_pos++;
163 1.1 mrg }
164 1.1 mrg
165 1.1 mrg // Remember that this thread has acquired this clock.
166 1.1 mrg if (nclk > tid_)
167 1.1 mrg src->elem(tid_).reused = reused_;
168 1.1 mrg }
169 1.1 mrg
170 1.1 mrg if (acquired) {
171 1.1 mrg last_acquire_ = clk_[tid_];
172 1.1 mrg ResetCached(c);
173 1.1 mrg }
174 1.1 mrg }
175 1.1 mrg
176 1.1 mrg void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
177 1.1 mrg DCHECK_LE(nclk_, kMaxTid);
178 1.1 mrg DCHECK_LE(sc->size_, kMaxTid);
179 1.1 mrg
180 1.1 mrg if (sc->size_ == 0) {
181 1.1 mrg // ReleaseStore will correctly set release_store_tid_,
182 1.1 mrg // which can be important for future operations.
183 1.1 mrg ReleaseStore(c, sc);
184 1.1 mrg return;
185 1.1 mrg }
186 1.1 mrg
187 1.1 mrg nclk_ = max(nclk_, (uptr) sc->size_);
188 1.1 mrg
189 1.1 mrg // Check if we need to resize sc.
190 1.1 mrg if (sc->size_ < nclk_)
191 1.1 mrg sc->Resize(c, nclk_);
192 1.1 mrg
193 1.1 mrg bool acquired = false;
194 1.1 mrg
195 1.1 mrg sc->Unshare(c);
196 1.1 mrg // Update sc->clk_.
197 1.1 mrg sc->FlushDirty();
198 1.1 mrg uptr i = 0;
199 1.1 mrg for (ClockElem &ce : *sc) {
200 1.1 mrg u64 tmp = clk_[i];
201 1.1 mrg if (clk_[i] < ce.epoch) {
202 1.1 mrg clk_[i] = ce.epoch;
203 1.1 mrg acquired = true;
204 1.1 mrg }
205 1.1 mrg ce.epoch = tmp;
206 1.1 mrg ce.reused = 0;
207 1.1 mrg i++;
208 1.1 mrg }
209 1.1 mrg sc->release_store_tid_ = kInvalidTid;
210 1.1 mrg sc->release_store_reused_ = 0;
211 1.1 mrg
212 1.1 mrg if (acquired) {
213 1.1 mrg last_acquire_ = clk_[tid_];
214 1.1 mrg ResetCached(c);
215 1.1 mrg }
216 1.1 mrg }
217 1.1 mrg
218 1.1 mrg void ThreadClock::release(ClockCache *c, SyncClock *dst) {
219 1.1 mrg DCHECK_LE(nclk_, kMaxTid);
220 1.1 mrg DCHECK_LE(dst->size_, kMaxTid);
221 1.1 mrg
222 1.1 mrg if (dst->size_ == 0) {
223 1.1 mrg // ReleaseStore will correctly set release_store_tid_,
224 1.1 mrg // which can be important for future operations.
225 1.1 mrg ReleaseStore(c, dst);
226 1.1 mrg return;
227 1.1 mrg }
228 1.1 mrg
229 1.1 mrg // Check if we need to resize dst.
230 1.1 mrg if (dst->size_ < nclk_)
231 1.1 mrg dst->Resize(c, nclk_);
232 1.1 mrg
233 1.1 mrg // Check if we had not acquired anything from other threads
234 1.1 mrg // since the last release on dst. If so, we need to update
235 1.1 mrg // only dst->elem(tid_).
236 1.1 mrg if (!HasAcquiredAfterRelease(dst)) {
237 1.1 mrg UpdateCurrentThread(c, dst);
238 1.1 mrg if (dst->release_store_tid_ != tid_ ||
239 1.1 mrg dst->release_store_reused_ != reused_)
240 1.1 mrg dst->release_store_tid_ = kInvalidTid;
241 1.1 mrg return;
242 1.1 mrg }
243 1.1 mrg
244 1.1 mrg // O(N) release.
245 1.1 mrg dst->Unshare(c);
246 1.1 mrg // First, remember whether we've acquired dst.
247 1.1 mrg bool acquired = IsAlreadyAcquired(dst);
248 1.1 mrg // Update dst->clk_.
249 1.1 mrg dst->FlushDirty();
250 1.1 mrg uptr i = 0;
251 1.1 mrg for (ClockElem &ce : *dst) {
252 1.1 mrg ce.epoch = max(ce.epoch, clk_[i]);
253 1.1 mrg ce.reused = 0;
254 1.1 mrg i++;
255 1.1 mrg }
256 1.1 mrg // Clear 'acquired' flag in the remaining elements.
257 1.1 mrg dst->release_store_tid_ = kInvalidTid;
258 1.1 mrg dst->release_store_reused_ = 0;
259 1.1 mrg // If we've acquired dst, remember this fact,
260 1.1 mrg // so that we don't need to acquire it on next acquire.
261 1.1 mrg if (acquired)
262 1.1 mrg dst->elem(tid_).reused = reused_;
263 1.1 mrg }
264 1.1 mrg
265 1.1 mrg void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
266 1.1 mrg DCHECK_LE(nclk_, kMaxTid);
267 1.1 mrg DCHECK_LE(dst->size_, kMaxTid);
268 1.1 mrg
269 1.1 mrg if (dst->size_ == 0 && cached_idx_ != 0) {
270 1.1 mrg // Reuse the cached clock.
271 1.1 mrg // Note: we could reuse/cache the cached clock in more cases:
272 1.1 mrg // we could update the existing clock and cache it, or replace it with the
273 1.1 mrg // currently cached clock and release the old one. And for a shared
274 1.1 mrg // existing clock, we could replace it with the currently cached;
275 1.1 mrg // or unshare, update and cache. But, for simplicity, we currently reuse
276 1.1 mrg // cached clock only when the target clock is empty.
277 1.1 mrg dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
278 1.1 mrg dst->tab_idx_ = cached_idx_;
279 1.1 mrg dst->size_ = cached_size_;
280 1.1 mrg dst->blocks_ = cached_blocks_;
281 1.1 mrg CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);
282 1.1 mrg // The cached clock is shared (immutable),
283 1.1 mrg // so this is where we store the current clock.
284 1.1 mrg dst->dirty_[0].set_tid(tid_);
285 1.1 mrg dst->dirty_[0].epoch = clk_[tid_];
286 1.1 mrg dst->release_store_tid_ = tid_;
287 1.1 mrg dst->release_store_reused_ = reused_;
288 1.1 mrg // Remember that we don't need to acquire it in future.
289 1.1 mrg dst->elem(tid_).reused = reused_;
290 1.1 mrg // Grab a reference.
291 1.1 mrg atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
292 1.1 mrg return;
293 1.1 mrg }
294 1.1 mrg
295 1.1 mrg // Check if we need to resize dst.
296 1.1 mrg if (dst->size_ < nclk_)
297 1.1 mrg dst->Resize(c, nclk_);
298 1.1 mrg
299 1.1 mrg if (dst->release_store_tid_ == tid_ &&
300 1.1 mrg dst->release_store_reused_ == reused_ &&
301 1.1 mrg !HasAcquiredAfterRelease(dst)) {
302 1.1 mrg UpdateCurrentThread(c, dst);
303 1.1 mrg return;
304 1.1 mrg }
305 1.1 mrg
306 1.1 mrg // O(N) release-store.
307 1.1 mrg dst->Unshare(c);
308 1.1 mrg // Note: dst can be larger than this ThreadClock.
309 1.1 mrg // This is fine since clk_ beyond size is all zeros.
310 1.1 mrg uptr i = 0;
311 1.1 mrg for (ClockElem &ce : *dst) {
312 1.1 mrg ce.epoch = clk_[i];
313 1.1 mrg ce.reused = 0;
314 1.1 mrg i++;
315 1.1 mrg }
316 1.1 mrg for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
317 1.1 mrg dst->release_store_tid_ = tid_;
318 1.1 mrg dst->release_store_reused_ = reused_;
319 1.1 mrg // Remember that we don't need to acquire it in future.
320 1.1 mrg dst->elem(tid_).reused = reused_;
321 1.1 mrg
322 1.1 mrg // If the resulting clock is cachable, cache it for future release operations.
323 1.1 mrg // The clock is always cachable if we released to an empty sync object.
324 1.1 mrg if (cached_idx_ == 0 && dst->Cachable()) {
325 1.1 mrg // Grab a reference to the ClockBlock.
326 1.1 mrg atomic_uint32_t *ref = ref_ptr(dst->tab_);
327 1.1 mrg if (atomic_load(ref, memory_order_acquire) == 1)
328 1.1 mrg atomic_store_relaxed(ref, 2);
329 1.1 mrg else
330 1.1 mrg atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
331 1.1 mrg cached_idx_ = dst->tab_idx_;
332 1.1 mrg cached_size_ = dst->size_;
333 1.1 mrg cached_blocks_ = dst->blocks_;
334 1.1 mrg }
335 1.1 mrg }
336 1.1 mrg
337 1.1 mrg void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
338 1.1 mrg acquire(c, dst);
339 1.1 mrg ReleaseStore(c, dst);
340 1.1 mrg }
341 1.1 mrg
342 1.1 mrg // Updates only single element related to the current thread in dst->clk_.
343 1.1 mrg void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
344 1.1 mrg // Update the threads time, but preserve 'acquired' flag.
345 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++) {
346 1.1 mrg SyncClock::Dirty *dirty = &dst->dirty_[i];
347 1.1 mrg const unsigned tid = dirty->tid();
348 1.1 mrg if (tid == tid_ || tid == kInvalidTid) {
349 1.1 mrg dirty->set_tid(tid_);
350 1.1 mrg dirty->epoch = clk_[tid_];
351 1.1 mrg return;
352 1.1 mrg }
353 1.1 mrg }
354 1.1 mrg // Reset all 'acquired' flags, O(N).
355 1.1 mrg // We are going to touch dst elements, so we need to unshare it.
356 1.1 mrg dst->Unshare(c);
357 1.1 mrg dst->elem(tid_).epoch = clk_[tid_];
358 1.1 mrg for (uptr i = 0; i < dst->size_; i++)
359 1.1 mrg dst->elem(i).reused = 0;
360 1.1 mrg dst->FlushDirty();
361 1.1 mrg }
362 1.1 mrg
363 1.1 mrg // Checks whether the current thread has already acquired src.
364 1.1 mrg bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
365 1.1 mrg if (src->elem(tid_).reused != reused_)
366 1.1 mrg return false;
367 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++) {
368 1.1 mrg SyncClock::Dirty dirty = src->dirty_[i];
369 1.1 mrg if (dirty.tid() != kInvalidTid) {
370 1.1 mrg if (clk_[dirty.tid()] < dirty.epoch)
371 1.1 mrg return false;
372 1.1 mrg }
373 1.1 mrg }
374 1.1 mrg return true;
375 1.1 mrg }
376 1.1 mrg
377 1.1 mrg // Checks whether the current thread has acquired anything
378 1.1 mrg // from other clocks after releasing to dst (directly or indirectly).
379 1.1 mrg bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
380 1.1 mrg const u64 my_epoch = dst->elem(tid_).epoch;
381 1.1 mrg return my_epoch <= last_acquire_ ||
382 1.1 mrg my_epoch <= atomic_load_relaxed(&global_acquire_);
383 1.1 mrg }
384 1.1 mrg
385 1.1 mrg // Sets a single element in the vector clock.
386 1.1 mrg // This function is called only from weird places like AcquireGlobal.
387 1.1 mrg void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
388 1.1 mrg DCHECK_LT(tid, kMaxTid);
389 1.1 mrg DCHECK_GE(v, clk_[tid]);
390 1.1 mrg clk_[tid] = v;
391 1.1 mrg if (nclk_ <= tid)
392 1.1 mrg nclk_ = tid + 1;
393 1.1 mrg last_acquire_ = clk_[tid_];
394 1.1 mrg ResetCached(c);
395 1.1 mrg }
396 1.1 mrg
397 1.1 mrg void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
398 1.1 mrg printf("clock=[");
399 1.1 mrg for (uptr i = 0; i < nclk_; i++)
400 1.1 mrg printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
401 1.1 mrg printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
402 1.1 mrg }
403 1.1 mrg
404 1.1 mrg SyncClock::SyncClock() {
405 1.1 mrg ResetImpl();
406 1.1 mrg }
407 1.1 mrg
408 1.1 mrg SyncClock::~SyncClock() {
409 1.1 mrg // Reset must be called before dtor.
410 1.1 mrg CHECK_EQ(size_, 0);
411 1.1 mrg CHECK_EQ(blocks_, 0);
412 1.1 mrg CHECK_EQ(tab_, 0);
413 1.1 mrg CHECK_EQ(tab_idx_, 0);
414 1.1 mrg }
415 1.1 mrg
416 1.1 mrg void SyncClock::Reset(ClockCache *c) {
417 1.1 mrg if (size_)
418 1.1 mrg UnrefClockBlock(c, tab_idx_, blocks_);
419 1.1 mrg ResetImpl();
420 1.1 mrg }
421 1.1 mrg
422 1.1 mrg void SyncClock::ResetImpl() {
423 1.1 mrg tab_ = 0;
424 1.1 mrg tab_idx_ = 0;
425 1.1 mrg size_ = 0;
426 1.1 mrg blocks_ = 0;
427 1.1 mrg release_store_tid_ = kInvalidTid;
428 1.1 mrg release_store_reused_ = 0;
429 1.1 mrg for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);
430 1.1 mrg }
431 1.1 mrg
432 1.1 mrg void SyncClock::Resize(ClockCache *c, uptr nclk) {
433 1.1 mrg Unshare(c);
434 1.1 mrg if (nclk <= capacity()) {
435 1.1 mrg // Memory is already allocated, just increase the size.
436 1.1 mrg size_ = nclk;
437 1.1 mrg return;
438 1.1 mrg }
439 1.1 mrg if (size_ == 0) {
440 1.1 mrg // Grow from 0 to one-level table.
441 1.1 mrg CHECK_EQ(size_, 0);
442 1.1 mrg CHECK_EQ(blocks_, 0);
443 1.1 mrg CHECK_EQ(tab_, 0);
444 1.1 mrg CHECK_EQ(tab_idx_, 0);
445 1.1 mrg tab_idx_ = ctx->clock_alloc.Alloc(c);
446 1.1 mrg tab_ = ctx->clock_alloc.Map(tab_idx_);
447 1.1 mrg internal_memset(tab_, 0, sizeof(*tab_));
448 1.1 mrg atomic_store_relaxed(ref_ptr(tab_), 1);
449 1.1 mrg size_ = 1;
450 1.1 mrg } else if (size_ > blocks_ * ClockBlock::kClockCount) {
451 1.1 mrg u32 idx = ctx->clock_alloc.Alloc(c);
452 1.1 mrg ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
453 1.1 mrg uptr top = size_ - blocks_ * ClockBlock::kClockCount;
454 1.1 mrg CHECK_LT(top, ClockBlock::kClockCount);
455 1.1 mrg const uptr move = top * sizeof(tab_->clock[0]);
456 1.1 mrg internal_memcpy(&new_cb->clock[0], tab_->clock, move);
457 1.1 mrg internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
458 1.1 mrg internal_memset(tab_->clock, 0, move);
459 1.1 mrg append_block(idx);
460 1.1 mrg }
461 1.1 mrg // At this point we have first level table allocated and all clock elements
462 1.1 mrg // are evacuated from it to a second level block.
463 1.1 mrg // Add second level tables as necessary.
464 1.1 mrg while (nclk > capacity()) {
465 1.1 mrg u32 idx = ctx->clock_alloc.Alloc(c);
466 1.1 mrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
467 1.1 mrg internal_memset(cb, 0, sizeof(*cb));
468 1.1 mrg append_block(idx);
469 1.1 mrg }
470 1.1 mrg size_ = nclk;
471 1.1 mrg }
472 1.1 mrg
473 1.1 mrg // Flushes all dirty elements into the main clock array.
474 1.1 mrg void SyncClock::FlushDirty() {
475 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++) {
476 1.1 mrg Dirty *dirty = &dirty_[i];
477 1.1 mrg if (dirty->tid() != kInvalidTid) {
478 1.1 mrg CHECK_LT(dirty->tid(), size_);
479 1.1 mrg elem(dirty->tid()).epoch = dirty->epoch;
480 1.1 mrg dirty->set_tid(kInvalidTid);
481 1.1 mrg }
482 1.1 mrg }
483 1.1 mrg }
484 1.1 mrg
485 1.1 mrg bool SyncClock::IsShared() const {
486 1.1 mrg if (size_ == 0)
487 1.1 mrg return false;
488 1.1 mrg atomic_uint32_t *ref = ref_ptr(tab_);
489 1.1 mrg u32 v = atomic_load(ref, memory_order_acquire);
490 1.1 mrg CHECK_GT(v, 0);
491 1.1 mrg return v > 1;
492 1.1 mrg }
493 1.1 mrg
494 1.1 mrg // Unshares the current clock if it's shared.
495 1.1 mrg // Shared clocks are immutable, so they need to be unshared before any updates.
496 1.1 mrg // Note: this does not apply to dirty entries as they are not shared.
497 1.1 mrg void SyncClock::Unshare(ClockCache *c) {
498 1.1 mrg if (!IsShared())
499 1.1 mrg return;
500 1.1 mrg // First, copy current state into old.
501 1.1 mrg SyncClock old;
502 1.1 mrg old.tab_ = tab_;
503 1.1 mrg old.tab_idx_ = tab_idx_;
504 1.1 mrg old.size_ = size_;
505 1.1 mrg old.blocks_ = blocks_;
506 1.1 mrg old.release_store_tid_ = release_store_tid_;
507 1.1 mrg old.release_store_reused_ = release_store_reused_;
508 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++)
509 1.1 mrg old.dirty_[i] = dirty_[i];
510 1.1 mrg // Then, clear current object.
511 1.1 mrg ResetImpl();
512 1.1 mrg // Allocate brand new clock in the current object.
513 1.1 mrg Resize(c, old.size_);
514 1.1 mrg // Now copy state back into this object.
515 1.1 mrg Iter old_iter(&old);
516 1.1 mrg for (ClockElem &ce : *this) {
517 1.1 mrg ce = *old_iter;
518 1.1 mrg ++old_iter;
519 1.1 mrg }
520 1.1 mrg release_store_tid_ = old.release_store_tid_;
521 1.1 mrg release_store_reused_ = old.release_store_reused_;
522 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++)
523 1.1 mrg dirty_[i] = old.dirty_[i];
524 1.1 mrg // Drop reference to old and delete if necessary.
525 1.1 mrg old.Reset(c);
526 1.1 mrg }
527 1.1 mrg
528 1.1 mrg // Can we cache this clock for future release operations?
529 1.1 mrg ALWAYS_INLINE bool SyncClock::Cachable() const {
530 1.1 mrg if (size_ == 0)
531 1.1 mrg return false;
532 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++) {
533 1.1 mrg if (dirty_[i].tid() != kInvalidTid)
534 1.1 mrg return false;
535 1.1 mrg }
536 1.1 mrg return atomic_load_relaxed(ref_ptr(tab_)) == 1;
537 1.1 mrg }
538 1.1 mrg
539 1.1 mrg // elem linearizes the two-level structure into linear array.
540 1.1 mrg // Note: this is used only for one time accesses, vector operations use
541 1.1 mrg // the iterator as it is much faster.
542 1.1 mrg ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
543 1.1 mrg DCHECK_LT(tid, size_);
544 1.1 mrg const uptr block = tid / ClockBlock::kClockCount;
545 1.1 mrg DCHECK_LE(block, blocks_);
546 1.1 mrg tid %= ClockBlock::kClockCount;
547 1.1 mrg if (block == blocks_)
548 1.1 mrg return tab_->clock[tid];
549 1.1 mrg u32 idx = get_block(block);
550 1.1 mrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
551 1.1 mrg return cb->clock[tid];
552 1.1 mrg }
553 1.1 mrg
554 1.1 mrg ALWAYS_INLINE uptr SyncClock::capacity() const {
555 1.1 mrg if (size_ == 0)
556 1.1 mrg return 0;
557 1.1 mrg uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
558 1.1 mrg // How many clock elements we can fit into the first level block.
559 1.1 mrg // +1 for ref counter.
560 1.1 mrg uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
561 1.1 mrg return blocks_ * ClockBlock::kClockCount + top;
562 1.1 mrg }
563 1.1 mrg
564 1.1 mrg ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
565 1.1 mrg DCHECK(size_);
566 1.1 mrg DCHECK_LT(bi, blocks_);
567 1.1 mrg return tab_->table[ClockBlock::kBlockIdx - bi];
568 1.1 mrg }
569 1.1 mrg
570 1.1 mrg ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
571 1.1 mrg uptr bi = blocks_++;
572 1.1 mrg CHECK_EQ(get_block(bi), 0);
573 1.1 mrg tab_->table[ClockBlock::kBlockIdx - bi] = idx;
574 1.1 mrg }
575 1.1 mrg
576 1.1 mrg // Used only by tests.
577 1.1 mrg u64 SyncClock::get(unsigned tid) const {
578 1.1 mrg for (unsigned i = 0; i < kDirtyTids; i++) {
579 1.1 mrg Dirty dirty = dirty_[i];
580 1.1 mrg if (dirty.tid() == tid)
581 1.1 mrg return dirty.epoch;
582 1.1 mrg }
583 1.1 mrg return elem(tid).epoch;
584 1.1 mrg }
585 1.1 mrg
586 1.1 mrg // Used only by Iter test.
587 1.1 mrg u64 SyncClock::get_clean(unsigned tid) const {
588 1.1 mrg return elem(tid).epoch;
589 1.1 mrg }
590 1.1 mrg
591 1.1 mrg void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
592 1.1 mrg printf("clock=[");
593 1.1 mrg for (uptr i = 0; i < size_; i++)
594 1.1 mrg printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
595 1.1 mrg printf("] reused=[");
596 1.1 mrg for (uptr i = 0; i < size_; i++)
597 1.1 mrg printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
598 1.1 mrg printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
599 1.1 mrg release_store_tid_, release_store_reused_, dirty_[0].tid(),
600 1.1 mrg dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);
601 1.1 mrg }
602 1.1 mrg
603 1.1 mrg void SyncClock::Iter::Next() {
604 1.1 mrg // Finished with the current block, move on to the next one.
605 1.1 mrg block_++;
606 1.1 mrg if (block_ < parent_->blocks_) {
607 1.1 mrg // Iterate over the next second level block.
608 1.1 mrg u32 idx = parent_->get_block(block_);
609 1.1 mrg ClockBlock *cb = ctx->clock_alloc.Map(idx);
610 1.1 mrg pos_ = &cb->clock[0];
611 1.1 mrg end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
612 1.1 mrg ClockBlock::kClockCount);
613 1.1 mrg return;
614 1.1 mrg }
615 1.1 mrg if (block_ == parent_->blocks_ &&
616 1.1 mrg parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
617 1.1 mrg // Iterate over elements in the first level block.
618 1.1 mrg pos_ = &parent_->tab_->clock[0];
619 1.1 mrg end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
620 1.1 mrg ClockBlock::kClockCount);
621 1.1 mrg return;
622 1.1 mrg }
623 1.1 mrg parent_ = nullptr; // denotes end
624 1.1 mrg }
625 1.1 mrg } // namespace __tsan
626