Home | History | Annotate | Line # | Download | only in tsan
tsan_clock.cpp revision 1.1
      1  1.1  mrg //===-- tsan_clock.cpp ----------------------------------------------------===//
      2  1.1  mrg //
      3  1.1  mrg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  1.1  mrg // See https://llvm.org/LICENSE.txt for license information.
      5  1.1  mrg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  1.1  mrg //
      7  1.1  mrg //===----------------------------------------------------------------------===//
      8  1.1  mrg //
      9  1.1  mrg // This file is a part of ThreadSanitizer (TSan), a race detector.
     10  1.1  mrg //
     11  1.1  mrg //===----------------------------------------------------------------------===//
     12  1.1  mrg #include "tsan_clock.h"
     13  1.1  mrg #include "tsan_rtl.h"
     14  1.1  mrg #include "sanitizer_common/sanitizer_placement_new.h"
     15  1.1  mrg 
     16  1.1  mrg // SyncClock and ThreadClock implement vector clocks for sync variables
     17  1.1  mrg // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
     18  1.1  mrg // ThreadClock contains fixed-size vector clock for maximum number of threads.
     19  1.1  mrg // SyncClock contains growable vector clock for currently necessary number of
     20  1.1  mrg // threads.
     21  1.1  mrg // Together they implement very simple model of operations, namely:
     22  1.1  mrg //
     23  1.1  mrg //   void ThreadClock::acquire(const SyncClock *src) {
     24  1.1  mrg //     for (int i = 0; i < kMaxThreads; i++)
     25  1.1  mrg //       clock[i] = max(clock[i], src->clock[i]);
     26  1.1  mrg //   }
     27  1.1  mrg //
     28  1.1  mrg //   void ThreadClock::release(SyncClock *dst) const {
     29  1.1  mrg //     for (int i = 0; i < kMaxThreads; i++)
     30  1.1  mrg //       dst->clock[i] = max(dst->clock[i], clock[i]);
     31  1.1  mrg //   }
     32  1.1  mrg //
     33  1.1  mrg //   void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
     34  1.1  mrg //     for (int i = 0; i < kMaxThreads; i++) {
     35  1.1  mrg //       tmp = clock[i];
     36  1.1  mrg //       clock[i] = max(clock[i], sc->clock[i]);
     37  1.1  mrg //       sc->clock[i] = tmp;
     38  1.1  mrg //     }
     39  1.1  mrg //   }
     40  1.1  mrg //
     41  1.1  mrg //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
     42  1.1  mrg //     for (int i = 0; i < kMaxThreads; i++)
     43  1.1  mrg //       dst->clock[i] = clock[i];
     44  1.1  mrg //   }
     45  1.1  mrg //
     46  1.1  mrg //   void ThreadClock::acq_rel(SyncClock *dst) {
     47  1.1  mrg //     acquire(dst);
     48  1.1  mrg //     release(dst);
     49  1.1  mrg //   }
     50  1.1  mrg //
     51  1.1  mrg // Conformance to this model is extensively verified in tsan_clock_test.cpp.
     52  1.1  mrg // However, the implementation is significantly more complex. The complexity
     53  1.1  mrg // allows to implement important classes of use cases in O(1) instead of O(N).
     54  1.1  mrg //
     55  1.1  mrg // The use cases are:
     56  1.1  mrg // 1. Singleton/once atomic that has a single release-store operation followed
     57  1.1  mrg //    by zillions of acquire-loads (the acquire-load is O(1)).
     58  1.1  mrg // 2. Thread-local mutex (both lock and unlock can be O(1)).
     59  1.1  mrg // 3. Leaf mutex (unlock is O(1)).
     60  1.1  mrg // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
     61  1.1  mrg // 5. An atomic with a single writer (writes can be O(1)).
     62  1.1  mrg // The implementation dynamically adopts to workload. So if an atomic is in
     63  1.1  mrg // read-only phase, these reads will be O(1); if it later switches to read/write
     64  1.1  mrg // phase, the implementation will correctly handle that by switching to O(N).
     65  1.1  mrg //
     66  1.1  mrg // Thread-safety note: all const operations on SyncClock's are conducted under
     67  1.1  mrg // a shared lock; all non-const operations on SyncClock's are conducted under
     68  1.1  mrg // an exclusive lock; ThreadClock's are private to respective threads and so
     69  1.1  mrg // do not need any protection.
     70  1.1  mrg //
     71  1.1  mrg // Description of SyncClock state:
     72  1.1  mrg // clk_ - variable size vector clock, low kClkBits hold timestamp,
     73  1.1  mrg //   the remaining bits hold "acquired" flag (the actual value is thread's
     74  1.1  mrg //   reused counter);
     75  1.1  mrg //   if acquired == thr->reused_, then the respective thread has already
     76  1.1  mrg //   acquired this clock (except possibly for dirty elements).
     77  1.1  mrg // dirty_ - holds up to two indices in the vector clock that other threads
     78  1.1  mrg //   need to acquire regardless of "acquired" flag value;
     79  1.1  mrg // release_store_tid_ - denotes that the clock state is a result of
     80  1.1  mrg //   release-store operation by the thread with release_store_tid_ index.
     81  1.1  mrg // release_store_reused_ - reuse count of release_store_tid_.
     82  1.1  mrg 
     83  1.1  mrg namespace __tsan {
     84  1.1  mrg 
     85  1.1  mrg static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
     86  1.1  mrg   return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
     87  1.1  mrg }
     88  1.1  mrg 
     89  1.1  mrg // Drop reference to the first level block idx.
     90  1.1  mrg static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
     91  1.1  mrg   ClockBlock *cb = ctx->clock_alloc.Map(idx);
     92  1.1  mrg   atomic_uint32_t *ref = ref_ptr(cb);
     93  1.1  mrg   u32 v = atomic_load(ref, memory_order_acquire);
     94  1.1  mrg   for (;;) {
     95  1.1  mrg     CHECK_GT(v, 0);
     96  1.1  mrg     if (v == 1)
     97  1.1  mrg       break;
     98  1.1  mrg     if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
     99  1.1  mrg       return;
    100  1.1  mrg   }
    101  1.1  mrg   // First level block owns second level blocks, so them as well.
    102  1.1  mrg   for (uptr i = 0; i < blocks; i++)
    103  1.1  mrg     ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
    104  1.1  mrg   ctx->clock_alloc.Free(c, idx);
    105  1.1  mrg }
    106  1.1  mrg 
    107  1.1  mrg ThreadClock::ThreadClock(unsigned tid, unsigned reused)
    108  1.1  mrg     : tid_(tid)
    109  1.1  mrg     , reused_(reused + 1)  // 0 has special meaning
    110  1.1  mrg     , last_acquire_()
    111  1.1  mrg     , global_acquire_()
    112  1.1  mrg     , cached_idx_()
    113  1.1  mrg     , cached_size_()
    114  1.1  mrg     , cached_blocks_() {
    115  1.1  mrg   CHECK_LT(tid, kMaxTidInClock);
    116  1.1  mrg   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
    117  1.1  mrg   nclk_ = tid_ + 1;
    118  1.1  mrg   internal_memset(clk_, 0, sizeof(clk_));
    119  1.1  mrg }
    120  1.1  mrg 
    121  1.1  mrg void ThreadClock::ResetCached(ClockCache *c) {
    122  1.1  mrg   if (cached_idx_) {
    123  1.1  mrg     UnrefClockBlock(c, cached_idx_, cached_blocks_);
    124  1.1  mrg     cached_idx_ = 0;
    125  1.1  mrg     cached_size_ = 0;
    126  1.1  mrg     cached_blocks_ = 0;
    127  1.1  mrg   }
    128  1.1  mrg }
    129  1.1  mrg 
    130  1.1  mrg void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
    131  1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
    132  1.1  mrg   DCHECK_LE(src->size_, kMaxTid);
    133  1.1  mrg 
    134  1.1  mrg   // Check if it's empty -> no need to do anything.
    135  1.1  mrg   const uptr nclk = src->size_;
    136  1.1  mrg   if (nclk == 0)
    137  1.1  mrg     return;
    138  1.1  mrg 
    139  1.1  mrg   bool acquired = false;
    140  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
    141  1.1  mrg     SyncClock::Dirty dirty = src->dirty_[i];
    142  1.1  mrg     unsigned tid = dirty.tid();
    143  1.1  mrg     if (tid != kInvalidTid) {
    144  1.1  mrg       if (clk_[tid] < dirty.epoch) {
    145  1.1  mrg         clk_[tid] = dirty.epoch;
    146  1.1  mrg         acquired = true;
    147  1.1  mrg       }
    148  1.1  mrg     }
    149  1.1  mrg   }
    150  1.1  mrg 
    151  1.1  mrg   // Check if we've already acquired src after the last release operation on src
    152  1.1  mrg   if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
    153  1.1  mrg     // O(N) acquire.
    154  1.1  mrg     nclk_ = max(nclk_, nclk);
    155  1.1  mrg     u64 *dst_pos = &clk_[0];
    156  1.1  mrg     for (ClockElem &src_elem : *src) {
    157  1.1  mrg       u64 epoch = src_elem.epoch;
    158  1.1  mrg       if (*dst_pos < epoch) {
    159  1.1  mrg         *dst_pos = epoch;
    160  1.1  mrg         acquired = true;
    161  1.1  mrg       }
    162  1.1  mrg       dst_pos++;
    163  1.1  mrg     }
    164  1.1  mrg 
    165  1.1  mrg     // Remember that this thread has acquired this clock.
    166  1.1  mrg     if (nclk > tid_)
    167  1.1  mrg       src->elem(tid_).reused = reused_;
    168  1.1  mrg   }
    169  1.1  mrg 
    170  1.1  mrg   if (acquired) {
    171  1.1  mrg     last_acquire_ = clk_[tid_];
    172  1.1  mrg     ResetCached(c);
    173  1.1  mrg   }
    174  1.1  mrg }
    175  1.1  mrg 
    176  1.1  mrg void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
    177  1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
    178  1.1  mrg   DCHECK_LE(sc->size_, kMaxTid);
    179  1.1  mrg 
    180  1.1  mrg   if (sc->size_ == 0) {
    181  1.1  mrg     // ReleaseStore will correctly set release_store_tid_,
    182  1.1  mrg     // which can be important for future operations.
    183  1.1  mrg     ReleaseStore(c, sc);
    184  1.1  mrg     return;
    185  1.1  mrg   }
    186  1.1  mrg 
    187  1.1  mrg   nclk_ = max(nclk_, (uptr) sc->size_);
    188  1.1  mrg 
    189  1.1  mrg   // Check if we need to resize sc.
    190  1.1  mrg   if (sc->size_ < nclk_)
    191  1.1  mrg     sc->Resize(c, nclk_);
    192  1.1  mrg 
    193  1.1  mrg   bool acquired = false;
    194  1.1  mrg 
    195  1.1  mrg   sc->Unshare(c);
    196  1.1  mrg   // Update sc->clk_.
    197  1.1  mrg   sc->FlushDirty();
    198  1.1  mrg   uptr i = 0;
    199  1.1  mrg   for (ClockElem &ce : *sc) {
    200  1.1  mrg     u64 tmp = clk_[i];
    201  1.1  mrg     if (clk_[i] < ce.epoch) {
    202  1.1  mrg       clk_[i] = ce.epoch;
    203  1.1  mrg       acquired = true;
    204  1.1  mrg     }
    205  1.1  mrg     ce.epoch = tmp;
    206  1.1  mrg     ce.reused = 0;
    207  1.1  mrg     i++;
    208  1.1  mrg   }
    209  1.1  mrg   sc->release_store_tid_ = kInvalidTid;
    210  1.1  mrg   sc->release_store_reused_ = 0;
    211  1.1  mrg 
    212  1.1  mrg   if (acquired) {
    213  1.1  mrg     last_acquire_ = clk_[tid_];
    214  1.1  mrg     ResetCached(c);
    215  1.1  mrg   }
    216  1.1  mrg }
    217  1.1  mrg 
    218  1.1  mrg void ThreadClock::release(ClockCache *c, SyncClock *dst) {
    219  1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
    220  1.1  mrg   DCHECK_LE(dst->size_, kMaxTid);
    221  1.1  mrg 
    222  1.1  mrg   if (dst->size_ == 0) {
    223  1.1  mrg     // ReleaseStore will correctly set release_store_tid_,
    224  1.1  mrg     // which can be important for future operations.
    225  1.1  mrg     ReleaseStore(c, dst);
    226  1.1  mrg     return;
    227  1.1  mrg   }
    228  1.1  mrg 
    229  1.1  mrg   // Check if we need to resize dst.
    230  1.1  mrg   if (dst->size_ < nclk_)
    231  1.1  mrg     dst->Resize(c, nclk_);
    232  1.1  mrg 
    233  1.1  mrg   // Check if we had not acquired anything from other threads
    234  1.1  mrg   // since the last release on dst. If so, we need to update
    235  1.1  mrg   // only dst->elem(tid_).
    236  1.1  mrg   if (!HasAcquiredAfterRelease(dst)) {
    237  1.1  mrg     UpdateCurrentThread(c, dst);
    238  1.1  mrg     if (dst->release_store_tid_ != tid_ ||
    239  1.1  mrg         dst->release_store_reused_ != reused_)
    240  1.1  mrg       dst->release_store_tid_ = kInvalidTid;
    241  1.1  mrg     return;
    242  1.1  mrg   }
    243  1.1  mrg 
    244  1.1  mrg   // O(N) release.
    245  1.1  mrg   dst->Unshare(c);
    246  1.1  mrg   // First, remember whether we've acquired dst.
    247  1.1  mrg   bool acquired = IsAlreadyAcquired(dst);
    248  1.1  mrg   // Update dst->clk_.
    249  1.1  mrg   dst->FlushDirty();
    250  1.1  mrg   uptr i = 0;
    251  1.1  mrg   for (ClockElem &ce : *dst) {
    252  1.1  mrg     ce.epoch = max(ce.epoch, clk_[i]);
    253  1.1  mrg     ce.reused = 0;
    254  1.1  mrg     i++;
    255  1.1  mrg   }
    256  1.1  mrg   // Clear 'acquired' flag in the remaining elements.
    257  1.1  mrg   dst->release_store_tid_ = kInvalidTid;
    258  1.1  mrg   dst->release_store_reused_ = 0;
    259  1.1  mrg   // If we've acquired dst, remember this fact,
    260  1.1  mrg   // so that we don't need to acquire it on next acquire.
    261  1.1  mrg   if (acquired)
    262  1.1  mrg     dst->elem(tid_).reused = reused_;
    263  1.1  mrg }
    264  1.1  mrg 
    265  1.1  mrg void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
    266  1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
    267  1.1  mrg   DCHECK_LE(dst->size_, kMaxTid);
    268  1.1  mrg 
    269  1.1  mrg   if (dst->size_ == 0 && cached_idx_ != 0) {
    270  1.1  mrg     // Reuse the cached clock.
    271  1.1  mrg     // Note: we could reuse/cache the cached clock in more cases:
    272  1.1  mrg     // we could update the existing clock and cache it, or replace it with the
    273  1.1  mrg     // currently cached clock and release the old one. And for a shared
    274  1.1  mrg     // existing clock, we could replace it with the currently cached;
    275  1.1  mrg     // or unshare, update and cache. But, for simplicity, we currently reuse
    276  1.1  mrg     // cached clock only when the target clock is empty.
    277  1.1  mrg     dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
    278  1.1  mrg     dst->tab_idx_ = cached_idx_;
    279  1.1  mrg     dst->size_ = cached_size_;
    280  1.1  mrg     dst->blocks_ = cached_blocks_;
    281  1.1  mrg     CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);
    282  1.1  mrg     // The cached clock is shared (immutable),
    283  1.1  mrg     // so this is where we store the current clock.
    284  1.1  mrg     dst->dirty_[0].set_tid(tid_);
    285  1.1  mrg     dst->dirty_[0].epoch = clk_[tid_];
    286  1.1  mrg     dst->release_store_tid_ = tid_;
    287  1.1  mrg     dst->release_store_reused_ = reused_;
    288  1.1  mrg     // Remember that we don't need to acquire it in future.
    289  1.1  mrg     dst->elem(tid_).reused = reused_;
    290  1.1  mrg     // Grab a reference.
    291  1.1  mrg     atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
    292  1.1  mrg     return;
    293  1.1  mrg   }
    294  1.1  mrg 
    295  1.1  mrg   // Check if we need to resize dst.
    296  1.1  mrg   if (dst->size_ < nclk_)
    297  1.1  mrg     dst->Resize(c, nclk_);
    298  1.1  mrg 
    299  1.1  mrg   if (dst->release_store_tid_ == tid_ &&
    300  1.1  mrg       dst->release_store_reused_ == reused_ &&
    301  1.1  mrg       !HasAcquiredAfterRelease(dst)) {
    302  1.1  mrg     UpdateCurrentThread(c, dst);
    303  1.1  mrg     return;
    304  1.1  mrg   }
    305  1.1  mrg 
    306  1.1  mrg   // O(N) release-store.
    307  1.1  mrg   dst->Unshare(c);
    308  1.1  mrg   // Note: dst can be larger than this ThreadClock.
    309  1.1  mrg   // This is fine since clk_ beyond size is all zeros.
    310  1.1  mrg   uptr i = 0;
    311  1.1  mrg   for (ClockElem &ce : *dst) {
    312  1.1  mrg     ce.epoch = clk_[i];
    313  1.1  mrg     ce.reused = 0;
    314  1.1  mrg     i++;
    315  1.1  mrg   }
    316  1.1  mrg   for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
    317  1.1  mrg   dst->release_store_tid_ = tid_;
    318  1.1  mrg   dst->release_store_reused_ = reused_;
    319  1.1  mrg   // Remember that we don't need to acquire it in future.
    320  1.1  mrg   dst->elem(tid_).reused = reused_;
    321  1.1  mrg 
    322  1.1  mrg   // If the resulting clock is cachable, cache it for future release operations.
    323  1.1  mrg   // The clock is always cachable if we released to an empty sync object.
    324  1.1  mrg   if (cached_idx_ == 0 && dst->Cachable()) {
    325  1.1  mrg     // Grab a reference to the ClockBlock.
    326  1.1  mrg     atomic_uint32_t *ref = ref_ptr(dst->tab_);
    327  1.1  mrg     if (atomic_load(ref, memory_order_acquire) == 1)
    328  1.1  mrg       atomic_store_relaxed(ref, 2);
    329  1.1  mrg     else
    330  1.1  mrg       atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
    331  1.1  mrg     cached_idx_ = dst->tab_idx_;
    332  1.1  mrg     cached_size_ = dst->size_;
    333  1.1  mrg     cached_blocks_ = dst->blocks_;
    334  1.1  mrg   }
    335  1.1  mrg }
    336  1.1  mrg 
    337  1.1  mrg void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
    338  1.1  mrg   acquire(c, dst);
    339  1.1  mrg   ReleaseStore(c, dst);
    340  1.1  mrg }
    341  1.1  mrg 
    342  1.1  mrg // Updates only single element related to the current thread in dst->clk_.
    343  1.1  mrg void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
    344  1.1  mrg   // Update the threads time, but preserve 'acquired' flag.
    345  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
    346  1.1  mrg     SyncClock::Dirty *dirty = &dst->dirty_[i];
    347  1.1  mrg     const unsigned tid = dirty->tid();
    348  1.1  mrg     if (tid == tid_ || tid == kInvalidTid) {
    349  1.1  mrg       dirty->set_tid(tid_);
    350  1.1  mrg       dirty->epoch = clk_[tid_];
    351  1.1  mrg       return;
    352  1.1  mrg     }
    353  1.1  mrg   }
    354  1.1  mrg   // Reset all 'acquired' flags, O(N).
    355  1.1  mrg   // We are going to touch dst elements, so we need to unshare it.
    356  1.1  mrg   dst->Unshare(c);
    357  1.1  mrg   dst->elem(tid_).epoch = clk_[tid_];
    358  1.1  mrg   for (uptr i = 0; i < dst->size_; i++)
    359  1.1  mrg     dst->elem(i).reused = 0;
    360  1.1  mrg   dst->FlushDirty();
    361  1.1  mrg }
    362  1.1  mrg 
    363  1.1  mrg // Checks whether the current thread has already acquired src.
    364  1.1  mrg bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
    365  1.1  mrg   if (src->elem(tid_).reused != reused_)
    366  1.1  mrg     return false;
    367  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
    368  1.1  mrg     SyncClock::Dirty dirty = src->dirty_[i];
    369  1.1  mrg     if (dirty.tid() != kInvalidTid) {
    370  1.1  mrg       if (clk_[dirty.tid()] < dirty.epoch)
    371  1.1  mrg         return false;
    372  1.1  mrg     }
    373  1.1  mrg   }
    374  1.1  mrg   return true;
    375  1.1  mrg }
    376  1.1  mrg 
    377  1.1  mrg // Checks whether the current thread has acquired anything
    378  1.1  mrg // from other clocks after releasing to dst (directly or indirectly).
    379  1.1  mrg bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
    380  1.1  mrg   const u64 my_epoch = dst->elem(tid_).epoch;
    381  1.1  mrg   return my_epoch <= last_acquire_ ||
    382  1.1  mrg       my_epoch <= atomic_load_relaxed(&global_acquire_);
    383  1.1  mrg }
    384  1.1  mrg 
    385  1.1  mrg // Sets a single element in the vector clock.
    386  1.1  mrg // This function is called only from weird places like AcquireGlobal.
    387  1.1  mrg void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
    388  1.1  mrg   DCHECK_LT(tid, kMaxTid);
    389  1.1  mrg   DCHECK_GE(v, clk_[tid]);
    390  1.1  mrg   clk_[tid] = v;
    391  1.1  mrg   if (nclk_ <= tid)
    392  1.1  mrg     nclk_ = tid + 1;
    393  1.1  mrg   last_acquire_ = clk_[tid_];
    394  1.1  mrg   ResetCached(c);
    395  1.1  mrg }
    396  1.1  mrg 
    397  1.1  mrg void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
    398  1.1  mrg   printf("clock=[");
    399  1.1  mrg   for (uptr i = 0; i < nclk_; i++)
    400  1.1  mrg     printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
    401  1.1  mrg   printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
    402  1.1  mrg }
    403  1.1  mrg 
    404  1.1  mrg SyncClock::SyncClock() {
    405  1.1  mrg   ResetImpl();
    406  1.1  mrg }
    407  1.1  mrg 
    408  1.1  mrg SyncClock::~SyncClock() {
    409  1.1  mrg   // Reset must be called before dtor.
    410  1.1  mrg   CHECK_EQ(size_, 0);
    411  1.1  mrg   CHECK_EQ(blocks_, 0);
    412  1.1  mrg   CHECK_EQ(tab_, 0);
    413  1.1  mrg   CHECK_EQ(tab_idx_, 0);
    414  1.1  mrg }
    415  1.1  mrg 
    416  1.1  mrg void SyncClock::Reset(ClockCache *c) {
    417  1.1  mrg   if (size_)
    418  1.1  mrg     UnrefClockBlock(c, tab_idx_, blocks_);
    419  1.1  mrg   ResetImpl();
    420  1.1  mrg }
    421  1.1  mrg 
    422  1.1  mrg void SyncClock::ResetImpl() {
    423  1.1  mrg   tab_ = 0;
    424  1.1  mrg   tab_idx_ = 0;
    425  1.1  mrg   size_ = 0;
    426  1.1  mrg   blocks_ = 0;
    427  1.1  mrg   release_store_tid_ = kInvalidTid;
    428  1.1  mrg   release_store_reused_ = 0;
    429  1.1  mrg   for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);
    430  1.1  mrg }
    431  1.1  mrg 
    432  1.1  mrg void SyncClock::Resize(ClockCache *c, uptr nclk) {
    433  1.1  mrg   Unshare(c);
    434  1.1  mrg   if (nclk <= capacity()) {
    435  1.1  mrg     // Memory is already allocated, just increase the size.
    436  1.1  mrg     size_ = nclk;
    437  1.1  mrg     return;
    438  1.1  mrg   }
    439  1.1  mrg   if (size_ == 0) {
    440  1.1  mrg     // Grow from 0 to one-level table.
    441  1.1  mrg     CHECK_EQ(size_, 0);
    442  1.1  mrg     CHECK_EQ(blocks_, 0);
    443  1.1  mrg     CHECK_EQ(tab_, 0);
    444  1.1  mrg     CHECK_EQ(tab_idx_, 0);
    445  1.1  mrg     tab_idx_ = ctx->clock_alloc.Alloc(c);
    446  1.1  mrg     tab_ = ctx->clock_alloc.Map(tab_idx_);
    447  1.1  mrg     internal_memset(tab_, 0, sizeof(*tab_));
    448  1.1  mrg     atomic_store_relaxed(ref_ptr(tab_), 1);
    449  1.1  mrg     size_ = 1;
    450  1.1  mrg   } else if (size_ > blocks_ * ClockBlock::kClockCount) {
    451  1.1  mrg     u32 idx = ctx->clock_alloc.Alloc(c);
    452  1.1  mrg     ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
    453  1.1  mrg     uptr top = size_ - blocks_ * ClockBlock::kClockCount;
    454  1.1  mrg     CHECK_LT(top, ClockBlock::kClockCount);
    455  1.1  mrg     const uptr move = top * sizeof(tab_->clock[0]);
    456  1.1  mrg     internal_memcpy(&new_cb->clock[0], tab_->clock, move);
    457  1.1  mrg     internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
    458  1.1  mrg     internal_memset(tab_->clock, 0, move);
    459  1.1  mrg     append_block(idx);
    460  1.1  mrg   }
    461  1.1  mrg   // At this point we have first level table allocated and all clock elements
    462  1.1  mrg   // are evacuated from it to a second level block.
    463  1.1  mrg   // Add second level tables as necessary.
    464  1.1  mrg   while (nclk > capacity()) {
    465  1.1  mrg     u32 idx = ctx->clock_alloc.Alloc(c);
    466  1.1  mrg     ClockBlock *cb = ctx->clock_alloc.Map(idx);
    467  1.1  mrg     internal_memset(cb, 0, sizeof(*cb));
    468  1.1  mrg     append_block(idx);
    469  1.1  mrg   }
    470  1.1  mrg   size_ = nclk;
    471  1.1  mrg }
    472  1.1  mrg 
    473  1.1  mrg // Flushes all dirty elements into the main clock array.
    474  1.1  mrg void SyncClock::FlushDirty() {
    475  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
    476  1.1  mrg     Dirty *dirty = &dirty_[i];
    477  1.1  mrg     if (dirty->tid() != kInvalidTid) {
    478  1.1  mrg       CHECK_LT(dirty->tid(), size_);
    479  1.1  mrg       elem(dirty->tid()).epoch = dirty->epoch;
    480  1.1  mrg       dirty->set_tid(kInvalidTid);
    481  1.1  mrg     }
    482  1.1  mrg   }
    483  1.1  mrg }
    484  1.1  mrg 
    485  1.1  mrg bool SyncClock::IsShared() const {
    486  1.1  mrg   if (size_ == 0)
    487  1.1  mrg     return false;
    488  1.1  mrg   atomic_uint32_t *ref = ref_ptr(tab_);
    489  1.1  mrg   u32 v = atomic_load(ref, memory_order_acquire);
    490  1.1  mrg   CHECK_GT(v, 0);
    491  1.1  mrg   return v > 1;
    492  1.1  mrg }
    493  1.1  mrg 
    494  1.1  mrg // Unshares the current clock if it's shared.
    495  1.1  mrg // Shared clocks are immutable, so they need to be unshared before any updates.
    496  1.1  mrg // Note: this does not apply to dirty entries as they are not shared.
    497  1.1  mrg void SyncClock::Unshare(ClockCache *c) {
    498  1.1  mrg   if (!IsShared())
    499  1.1  mrg     return;
    500  1.1  mrg   // First, copy current state into old.
    501  1.1  mrg   SyncClock old;
    502  1.1  mrg   old.tab_ = tab_;
    503  1.1  mrg   old.tab_idx_ = tab_idx_;
    504  1.1  mrg   old.size_ = size_;
    505  1.1  mrg   old.blocks_ = blocks_;
    506  1.1  mrg   old.release_store_tid_ = release_store_tid_;
    507  1.1  mrg   old.release_store_reused_ = release_store_reused_;
    508  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++)
    509  1.1  mrg     old.dirty_[i] = dirty_[i];
    510  1.1  mrg   // Then, clear current object.
    511  1.1  mrg   ResetImpl();
    512  1.1  mrg   // Allocate brand new clock in the current object.
    513  1.1  mrg   Resize(c, old.size_);
    514  1.1  mrg   // Now copy state back into this object.
    515  1.1  mrg   Iter old_iter(&old);
    516  1.1  mrg   for (ClockElem &ce : *this) {
    517  1.1  mrg     ce = *old_iter;
    518  1.1  mrg     ++old_iter;
    519  1.1  mrg   }
    520  1.1  mrg   release_store_tid_ = old.release_store_tid_;
    521  1.1  mrg   release_store_reused_ = old.release_store_reused_;
    522  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++)
    523  1.1  mrg     dirty_[i] = old.dirty_[i];
    524  1.1  mrg   // Drop reference to old and delete if necessary.
    525  1.1  mrg   old.Reset(c);
    526  1.1  mrg }
    527  1.1  mrg 
    528  1.1  mrg // Can we cache this clock for future release operations?
    529  1.1  mrg ALWAYS_INLINE bool SyncClock::Cachable() const {
    530  1.1  mrg   if (size_ == 0)
    531  1.1  mrg     return false;
    532  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
    533  1.1  mrg     if (dirty_[i].tid() != kInvalidTid)
    534  1.1  mrg       return false;
    535  1.1  mrg   }
    536  1.1  mrg   return atomic_load_relaxed(ref_ptr(tab_)) == 1;
    537  1.1  mrg }
    538  1.1  mrg 
    539  1.1  mrg // elem linearizes the two-level structure into linear array.
    540  1.1  mrg // Note: this is used only for one time accesses, vector operations use
    541  1.1  mrg // the iterator as it is much faster.
    542  1.1  mrg ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
    543  1.1  mrg   DCHECK_LT(tid, size_);
    544  1.1  mrg   const uptr block = tid / ClockBlock::kClockCount;
    545  1.1  mrg   DCHECK_LE(block, blocks_);
    546  1.1  mrg   tid %= ClockBlock::kClockCount;
    547  1.1  mrg   if (block == blocks_)
    548  1.1  mrg     return tab_->clock[tid];
    549  1.1  mrg   u32 idx = get_block(block);
    550  1.1  mrg   ClockBlock *cb = ctx->clock_alloc.Map(idx);
    551  1.1  mrg   return cb->clock[tid];
    552  1.1  mrg }
    553  1.1  mrg 
    554  1.1  mrg ALWAYS_INLINE uptr SyncClock::capacity() const {
    555  1.1  mrg   if (size_ == 0)
    556  1.1  mrg     return 0;
    557  1.1  mrg   uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
    558  1.1  mrg   // How many clock elements we can fit into the first level block.
    559  1.1  mrg   // +1 for ref counter.
    560  1.1  mrg   uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
    561  1.1  mrg   return blocks_ * ClockBlock::kClockCount + top;
    562  1.1  mrg }
    563  1.1  mrg 
    564  1.1  mrg ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
    565  1.1  mrg   DCHECK(size_);
    566  1.1  mrg   DCHECK_LT(bi, blocks_);
    567  1.1  mrg   return tab_->table[ClockBlock::kBlockIdx - bi];
    568  1.1  mrg }
    569  1.1  mrg 
    570  1.1  mrg ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
    571  1.1  mrg   uptr bi = blocks_++;
    572  1.1  mrg   CHECK_EQ(get_block(bi), 0);
    573  1.1  mrg   tab_->table[ClockBlock::kBlockIdx - bi] = idx;
    574  1.1  mrg }
    575  1.1  mrg 
    576  1.1  mrg // Used only by tests.
    577  1.1  mrg u64 SyncClock::get(unsigned tid) const {
    578  1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
    579  1.1  mrg     Dirty dirty = dirty_[i];
    580  1.1  mrg     if (dirty.tid() == tid)
    581  1.1  mrg       return dirty.epoch;
    582  1.1  mrg   }
    583  1.1  mrg   return elem(tid).epoch;
    584  1.1  mrg }
    585  1.1  mrg 
    586  1.1  mrg // Used only by Iter test.
    587  1.1  mrg u64 SyncClock::get_clean(unsigned tid) const {
    588  1.1  mrg   return elem(tid).epoch;
    589  1.1  mrg }
    590  1.1  mrg 
    591  1.1  mrg void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
    592  1.1  mrg   printf("clock=[");
    593  1.1  mrg   for (uptr i = 0; i < size_; i++)
    594  1.1  mrg     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
    595  1.1  mrg   printf("] reused=[");
    596  1.1  mrg   for (uptr i = 0; i < size_; i++)
    597  1.1  mrg     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
    598  1.1  mrg   printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
    599  1.1  mrg          release_store_tid_, release_store_reused_, dirty_[0].tid(),
    600  1.1  mrg          dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);
    601  1.1  mrg }
    602  1.1  mrg 
    603  1.1  mrg void SyncClock::Iter::Next() {
    604  1.1  mrg   // Finished with the current block, move on to the next one.
    605  1.1  mrg   block_++;
    606  1.1  mrg   if (block_ < parent_->blocks_) {
    607  1.1  mrg     // Iterate over the next second level block.
    608  1.1  mrg     u32 idx = parent_->get_block(block_);
    609  1.1  mrg     ClockBlock *cb = ctx->clock_alloc.Map(idx);
    610  1.1  mrg     pos_ = &cb->clock[0];
    611  1.1  mrg     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
    612  1.1  mrg         ClockBlock::kClockCount);
    613  1.1  mrg     return;
    614  1.1  mrg   }
    615  1.1  mrg   if (block_ == parent_->blocks_ &&
    616  1.1  mrg       parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
    617  1.1  mrg     // Iterate over elements in the first level block.
    618  1.1  mrg     pos_ = &parent_->tab_->clock[0];
    619  1.1  mrg     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
    620  1.1  mrg         ClockBlock::kClockCount);
    621  1.1  mrg     return;
    622  1.1  mrg   }
    623  1.1  mrg   parent_ = nullptr;  // denotes end
    624  1.1  mrg }
    625  1.1  mrg }  // namespace __tsan
    626