libsanitizer/tsan/tsan_clock.cpp

1.1  mrg //===-- tsan_clock.cpp ----------------------------------------------------===//
1.1  mrg //
1.1  mrg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1.1  mrg // See https://llvm.org/LICENSE.txt for license information.
1.1  mrg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1.1  mrg //
1.1  mrg //===----------------------------------------------------------------------===//
1.1  mrg //
1.1  mrg // This file is a part of ThreadSanitizer (TSan), a race detector.
1.1  mrg //
1.1  mrg //===----------------------------------------------------------------------===//
1.1  mrg #include "tsan_clock.h"
1.1  mrg #include "tsan_rtl.h"
1.1  mrg #include "sanitizer_common/sanitizer_placement_new.h"
1.1  mrg
1.1  mrg // SyncClock and ThreadClock implement vector clocks for sync variables
1.1  mrg // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
1.1  mrg // ThreadClock contains fixed-size vector clock for maximum number of threads.
1.1  mrg // SyncClock contains growable vector clock for currently necessary number of
1.1  mrg // threads.
1.1  mrg // Together they implement very simple model of operations, namely:
1.1  mrg //
1.1  mrg //   void ThreadClock::acquire(const SyncClock *src) {
1.1  mrg //     for (int i = 0; i < kMaxThreads; i++)
1.1  mrg //       clock[i] = max(clock[i], src->clock[i]);
1.1  mrg //   }
1.1  mrg //
1.1  mrg //   void ThreadClock::release(SyncClock *dst) const {
1.1  mrg //     for (int i = 0; i < kMaxThreads; i++)
1.1  mrg //       dst->clock[i] = max(dst->clock[i], clock[i]);
1.1  mrg //   }
1.1  mrg //
1.1  mrg //   void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
1.1  mrg //     for (int i = 0; i < kMaxThreads; i++) {
1.1  mrg //       tmp = clock[i];
1.1  mrg //       clock[i] = max(clock[i], sc->clock[i]);
1.1  mrg //       sc->clock[i] = tmp;
1.1  mrg //     }
1.1  mrg //   }
1.1  mrg //
1.1  mrg //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
1.1  mrg //     for (int i = 0; i < kMaxThreads; i++)
1.1  mrg //       dst->clock[i] = clock[i];
1.1  mrg //   }
1.1  mrg //
1.1  mrg //   void ThreadClock::acq_rel(SyncClock *dst) {
1.1  mrg //     acquire(dst);
1.1  mrg //     release(dst);
1.1  mrg //   }
1.1  mrg //
1.1  mrg // Conformance to this model is extensively verified in tsan_clock_test.cpp.
1.1  mrg // However, the implementation is significantly more complex. The complexity
1.1  mrg // allows to implement important classes of use cases in O(1) instead of O(N).
1.1  mrg //
1.1  mrg // The use cases are:
1.1  mrg // 1. Singleton/once atomic that has a single release-store operation followed
1.1  mrg //    by zillions of acquire-loads (the acquire-load is O(1)).
1.1  mrg // 2. Thread-local mutex (both lock and unlock can be O(1)).
1.1  mrg // 3. Leaf mutex (unlock is O(1)).
1.1  mrg // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
1.1  mrg // 5. An atomic with a single writer (writes can be O(1)).
1.1  mrg // The implementation dynamically adopts to workload. So if an atomic is in
1.1  mrg // read-only phase, these reads will be O(1); if it later switches to read/write
1.1  mrg // phase, the implementation will correctly handle that by switching to O(N).
1.1  mrg //
1.1  mrg // Thread-safety note: all const operations on SyncClock's are conducted under
1.1  mrg // a shared lock; all non-const operations on SyncClock's are conducted under
1.1  mrg // an exclusive lock; ThreadClock's are private to respective threads and so
1.1  mrg // do not need any protection.
1.1  mrg //
1.1  mrg // Description of SyncClock state:
1.1  mrg // clk_ - variable size vector clock, low kClkBits hold timestamp,
1.1  mrg //   the remaining bits hold "acquired" flag (the actual value is thread's
1.1  mrg //   reused counter);
1.1  mrg //   if acquired == thr->reused_, then the respective thread has already
1.1  mrg //   acquired this clock (except possibly for dirty elements).
1.1  mrg // dirty_ - holds up to two indices in the vector clock that other threads
1.1  mrg //   need to acquire regardless of "acquired" flag value;
1.1  mrg // release_store_tid_ - denotes that the clock state is a result of
1.1  mrg //   release-store operation by the thread with release_store_tid_ index.
1.1  mrg // release_store_reused_ - reuse count of release_store_tid_.
1.1  mrg
1.1  mrg namespace __tsan {
1.1  mrg
1.1  mrg static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
1.1  mrg   return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
1.1  mrg }
1.1  mrg
1.1  mrg // Drop reference to the first level block idx.
1.1  mrg static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
1.1  mrg   ClockBlock *cb = ctx->clock_alloc.Map(idx);
1.1  mrg   atomic_uint32_t *ref = ref_ptr(cb);
1.1  mrg   u32 v = atomic_load(ref, memory_order_acquire);
1.1  mrg   for (;;) {
1.1  mrg     CHECK_GT(v, 0);
1.1  mrg     if (v == 1)
1.1  mrg       break;
1.1  mrg     if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
1.1  mrg       return;
1.1  mrg   }
1.1  mrg   // First level block owns second level blocks, so them as well.
1.1  mrg   for (uptr i = 0; i < blocks; i++)
1.1  mrg     ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
1.1  mrg   ctx->clock_alloc.Free(c, idx);
1.1  mrg }
1.1  mrg
1.1  mrg ThreadClock::ThreadClock(unsigned tid, unsigned reused)
1.1  mrg     : tid_(tid)
1.1  mrg     , reused_(reused + 1)  // 0 has special meaning
1.1  mrg     , last_acquire_()
1.1  mrg     , global_acquire_()
1.1  mrg     , cached_idx_()
1.1  mrg     , cached_size_()
1.1  mrg     , cached_blocks_() {
1.1  mrg   CHECK_LT(tid, kMaxTidInClock);
1.1  mrg   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
1.1  mrg   nclk_ = tid_ + 1;
1.1  mrg   internal_memset(clk_, 0, sizeof(clk_));
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::ResetCached(ClockCache *c) {
1.1  mrg   if (cached_idx_) {
1.1  mrg     UnrefClockBlock(c, cached_idx_, cached_blocks_);
1.1  mrg     cached_idx_ = 0;
1.1  mrg     cached_size_ = 0;
1.1  mrg     cached_blocks_ = 0;
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
1.1  mrg   DCHECK_LE(src->size_, kMaxTid);
1.1  mrg
1.1  mrg   // Check if it's empty -> no need to do anything.
1.1  mrg   const uptr nclk = src->size_;
1.1  mrg   if (nclk == 0)
1.1  mrg     return;
1.1  mrg
1.1  mrg   bool acquired = false;
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
1.1  mrg     SyncClock::Dirty dirty = src->dirty_[i];
1.1  mrg     unsigned tid = dirty.tid();
1.1  mrg     if (tid != kInvalidTid) {
1.1  mrg       if (clk_[tid] < dirty.epoch) {
1.1  mrg         clk_[tid] = dirty.epoch;
1.1  mrg         acquired = true;
1.1  mrg       }
1.1  mrg     }
1.1  mrg   }
1.1  mrg
1.1  mrg   // Check if we've already acquired src after the last release operation on src
1.1  mrg   if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
1.1  mrg     // O(N) acquire.
1.1  mrg     nclk_ = max(nclk_, nclk);
1.1  mrg     u64 *dst_pos = &clk_[0];
1.1  mrg     for (ClockElem &src_elem : *src) {
1.1  mrg       u64 epoch = src_elem.epoch;
1.1  mrg       if (*dst_pos < epoch) {
1.1  mrg         *dst_pos = epoch;
1.1  mrg         acquired = true;
1.1  mrg       }
1.1  mrg       dst_pos++;
1.1  mrg     }
1.1  mrg
1.1  mrg     // Remember that this thread has acquired this clock.
1.1  mrg     if (nclk > tid_)
1.1  mrg       src->elem(tid_).reused = reused_;
1.1  mrg   }
1.1  mrg
1.1  mrg   if (acquired) {
1.1  mrg     last_acquire_ = clk_[tid_];
1.1  mrg     ResetCached(c);
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
1.1  mrg   DCHECK_LE(sc->size_, kMaxTid);
1.1  mrg
1.1  mrg   if (sc->size_ == 0) {
1.1  mrg     // ReleaseStore will correctly set release_store_tid_,
1.1  mrg     // which can be important for future operations.
1.1  mrg     ReleaseStore(c, sc);
1.1  mrg     return;
1.1  mrg   }
1.1  mrg
1.1  mrg   nclk_ = max(nclk_, (uptr) sc->size_);
1.1  mrg
1.1  mrg   // Check if we need to resize sc.
1.1  mrg   if (sc->size_ < nclk_)
1.1  mrg     sc->Resize(c, nclk_);
1.1  mrg
1.1  mrg   bool acquired = false;
1.1  mrg
1.1  mrg   sc->Unshare(c);
1.1  mrg   // Update sc->clk_.
1.1  mrg   sc->FlushDirty();
1.1  mrg   uptr i = 0;
1.1  mrg   for (ClockElem &ce : *sc) {
1.1  mrg     u64 tmp = clk_[i];
1.1  mrg     if (clk_[i] < ce.epoch) {
1.1  mrg       clk_[i] = ce.epoch;
1.1  mrg       acquired = true;
1.1  mrg     }
1.1  mrg     ce.epoch = tmp;
1.1  mrg     ce.reused = 0;
1.1  mrg     i++;
1.1  mrg   }
1.1  mrg   sc->release_store_tid_ = kInvalidTid;
1.1  mrg   sc->release_store_reused_ = 0;
1.1  mrg
1.1  mrg   if (acquired) {
1.1  mrg     last_acquire_ = clk_[tid_];
1.1  mrg     ResetCached(c);
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::release(ClockCache *c, SyncClock *dst) {
1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
1.1  mrg   DCHECK_LE(dst->size_, kMaxTid);
1.1  mrg
1.1  mrg   if (dst->size_ == 0) {
1.1  mrg     // ReleaseStore will correctly set release_store_tid_,
1.1  mrg     // which can be important for future operations.
1.1  mrg     ReleaseStore(c, dst);
1.1  mrg     return;
1.1  mrg   }
1.1  mrg
1.1  mrg   // Check if we need to resize dst.
1.1  mrg   if (dst->size_ < nclk_)
1.1  mrg     dst->Resize(c, nclk_);
1.1  mrg
1.1  mrg   // Check if we had not acquired anything from other threads
1.1  mrg   // since the last release on dst. If so, we need to update
1.1  mrg   // only dst->elem(tid_).
1.1  mrg   if (!HasAcquiredAfterRelease(dst)) {
1.1  mrg     UpdateCurrentThread(c, dst);
1.1  mrg     if (dst->release_store_tid_ != tid_ ||
1.1  mrg         dst->release_store_reused_ != reused_)
1.1  mrg       dst->release_store_tid_ = kInvalidTid;
1.1  mrg     return;
1.1  mrg   }
1.1  mrg
1.1  mrg   // O(N) release.
1.1  mrg   dst->Unshare(c);
1.1  mrg   // First, remember whether we've acquired dst.
1.1  mrg   bool acquired = IsAlreadyAcquired(dst);
1.1  mrg   // Update dst->clk_.
1.1  mrg   dst->FlushDirty();
1.1  mrg   uptr i = 0;
1.1  mrg   for (ClockElem &ce : *dst) {
1.1  mrg     ce.epoch = max(ce.epoch, clk_[i]);
1.1  mrg     ce.reused = 0;
1.1  mrg     i++;
1.1  mrg   }
1.1  mrg   // Clear 'acquired' flag in the remaining elements.
1.1  mrg   dst->release_store_tid_ = kInvalidTid;
1.1  mrg   dst->release_store_reused_ = 0;
1.1  mrg   // If we've acquired dst, remember this fact,
1.1  mrg   // so that we don't need to acquire it on next acquire.
1.1  mrg   if (acquired)
1.1  mrg     dst->elem(tid_).reused = reused_;
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
1.1  mrg   DCHECK_LE(nclk_, kMaxTid);
1.1  mrg   DCHECK_LE(dst->size_, kMaxTid);
1.1  mrg
1.1  mrg   if (dst->size_ == 0 && cached_idx_ != 0) {
1.1  mrg     // Reuse the cached clock.
1.1  mrg     // Note: we could reuse/cache the cached clock in more cases:
1.1  mrg     // we could update the existing clock and cache it, or replace it with the
1.1  mrg     // currently cached clock and release the old one. And for a shared
1.1  mrg     // existing clock, we could replace it with the currently cached;
1.1  mrg     // or unshare, update and cache. But, for simplicity, we currently reuse
1.1  mrg     // cached clock only when the target clock is empty.
1.1  mrg     dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
1.1  mrg     dst->tab_idx_ = cached_idx_;
1.1  mrg     dst->size_ = cached_size_;
1.1  mrg     dst->blocks_ = cached_blocks_;
1.1  mrg     CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);
1.1  mrg     // The cached clock is shared (immutable),
1.1  mrg     // so this is where we store the current clock.
1.1  mrg     dst->dirty_[0].set_tid(tid_);
1.1  mrg     dst->dirty_[0].epoch = clk_[tid_];
1.1  mrg     dst->release_store_tid_ = tid_;
1.1  mrg     dst->release_store_reused_ = reused_;
1.1  mrg     // Remember that we don't need to acquire it in future.
1.1  mrg     dst->elem(tid_).reused = reused_;
1.1  mrg     // Grab a reference.
1.1  mrg     atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
1.1  mrg     return;
1.1  mrg   }
1.1  mrg
1.1  mrg   // Check if we need to resize dst.
1.1  mrg   if (dst->size_ < nclk_)
1.1  mrg     dst->Resize(c, nclk_);
1.1  mrg
1.1  mrg   if (dst->release_store_tid_ == tid_ &&
1.1  mrg       dst->release_store_reused_ == reused_ &&
1.1  mrg       !HasAcquiredAfterRelease(dst)) {
1.1  mrg     UpdateCurrentThread(c, dst);
1.1  mrg     return;
1.1  mrg   }
1.1  mrg
1.1  mrg   // O(N) release-store.
1.1  mrg   dst->Unshare(c);
1.1  mrg   // Note: dst can be larger than this ThreadClock.
1.1  mrg   // This is fine since clk_ beyond size is all zeros.
1.1  mrg   uptr i = 0;
1.1  mrg   for (ClockElem &ce : *dst) {
1.1  mrg     ce.epoch = clk_[i];
1.1  mrg     ce.reused = 0;
1.1  mrg     i++;
1.1  mrg   }
1.1  mrg   for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
1.1  mrg   dst->release_store_tid_ = tid_;
1.1  mrg   dst->release_store_reused_ = reused_;
1.1  mrg   // Remember that we don't need to acquire it in future.
1.1  mrg   dst->elem(tid_).reused = reused_;
1.1  mrg
1.1  mrg   // If the resulting clock is cachable, cache it for future release operations.
1.1  mrg   // The clock is always cachable if we released to an empty sync object.
1.1  mrg   if (cached_idx_ == 0 && dst->Cachable()) {
1.1  mrg     // Grab a reference to the ClockBlock.
1.1  mrg     atomic_uint32_t *ref = ref_ptr(dst->tab_);
1.1  mrg     if (atomic_load(ref, memory_order_acquire) == 1)
1.1  mrg       atomic_store_relaxed(ref, 2);
1.1  mrg     else
1.1  mrg       atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
1.1  mrg     cached_idx_ = dst->tab_idx_;
1.1  mrg     cached_size_ = dst->size_;
1.1  mrg     cached_blocks_ = dst->blocks_;
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
1.1  mrg   acquire(c, dst);
1.1  mrg   ReleaseStore(c, dst);
1.1  mrg }
1.1  mrg
1.1  mrg // Updates only single element related to the current thread in dst->clk_.
1.1  mrg void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
1.1  mrg   // Update the threads time, but preserve 'acquired' flag.
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
1.1  mrg     SyncClock::Dirty *dirty = &dst->dirty_[i];
1.1  mrg     const unsigned tid = dirty->tid();
1.1  mrg     if (tid == tid_ || tid == kInvalidTid) {
1.1  mrg       dirty->set_tid(tid_);
1.1  mrg       dirty->epoch = clk_[tid_];
1.1  mrg       return;
1.1  mrg     }
1.1  mrg   }
1.1  mrg   // Reset all 'acquired' flags, O(N).
1.1  mrg   // We are going to touch dst elements, so we need to unshare it.
1.1  mrg   dst->Unshare(c);
1.1  mrg   dst->elem(tid_).epoch = clk_[tid_];
1.1  mrg   for (uptr i = 0; i < dst->size_; i++)
1.1  mrg     dst->elem(i).reused = 0;
1.1  mrg   dst->FlushDirty();
1.1  mrg }
1.1  mrg
1.1  mrg // Checks whether the current thread has already acquired src.
1.1  mrg bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
1.1  mrg   if (src->elem(tid_).reused != reused_)
1.1  mrg     return false;
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
1.1  mrg     SyncClock::Dirty dirty = src->dirty_[i];
1.1  mrg     if (dirty.tid() != kInvalidTid) {
1.1  mrg       if (clk_[dirty.tid()] < dirty.epoch)
1.1  mrg         return false;
1.1  mrg     }
1.1  mrg   }
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg // Checks whether the current thread has acquired anything
1.1  mrg // from other clocks after releasing to dst (directly or indirectly).
1.1  mrg bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
1.1  mrg   const u64 my_epoch = dst->elem(tid_).epoch;
1.1  mrg   return my_epoch <= last_acquire_ ||
1.1  mrg       my_epoch <= atomic_load_relaxed(&global_acquire_);
1.1  mrg }
1.1  mrg
1.1  mrg // Sets a single element in the vector clock.
1.1  mrg // This function is called only from weird places like AcquireGlobal.
1.1  mrg void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
1.1  mrg   DCHECK_LT(tid, kMaxTid);
1.1  mrg   DCHECK_GE(v, clk_[tid]);
1.1  mrg   clk_[tid] = v;
1.1  mrg   if (nclk_ <= tid)
1.1  mrg     nclk_ = tid + 1;
1.1  mrg   last_acquire_ = clk_[tid_];
1.1  mrg   ResetCached(c);
1.1  mrg }
1.1  mrg
1.1  mrg void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
1.1  mrg   printf("clock=[");
1.1  mrg   for (uptr i = 0; i < nclk_; i++)
1.1  mrg     printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
1.1  mrg   printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
1.1  mrg }
1.1  mrg
1.1  mrg SyncClock::SyncClock() {
1.1  mrg   ResetImpl();
1.1  mrg }
1.1  mrg
1.1  mrg SyncClock::~SyncClock() {
1.1  mrg   // Reset must be called before dtor.
1.1  mrg   CHECK_EQ(size_, 0);
1.1  mrg   CHECK_EQ(blocks_, 0);
1.1  mrg   CHECK_EQ(tab_, 0);
1.1  mrg   CHECK_EQ(tab_idx_, 0);
1.1  mrg }
1.1  mrg
1.1  mrg void SyncClock::Reset(ClockCache *c) {
1.1  mrg   if (size_)
1.1  mrg     UnrefClockBlock(c, tab_idx_, blocks_);
1.1  mrg   ResetImpl();
1.1  mrg }
1.1  mrg
1.1  mrg void SyncClock::ResetImpl() {
1.1  mrg   tab_ = 0;
1.1  mrg   tab_idx_ = 0;
1.1  mrg   size_ = 0;
1.1  mrg   blocks_ = 0;
1.1  mrg   release_store_tid_ = kInvalidTid;
1.1  mrg   release_store_reused_ = 0;
1.1  mrg   for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);
1.1  mrg }
1.1  mrg
1.1  mrg void SyncClock::Resize(ClockCache *c, uptr nclk) {
1.1  mrg   Unshare(c);
1.1  mrg   if (nclk <= capacity()) {
1.1  mrg     // Memory is already allocated, just increase the size.
1.1  mrg     size_ = nclk;
1.1  mrg     return;
1.1  mrg   }
1.1  mrg   if (size_ == 0) {
1.1  mrg     // Grow from 0 to one-level table.
1.1  mrg     CHECK_EQ(size_, 0);
1.1  mrg     CHECK_EQ(blocks_, 0);
1.1  mrg     CHECK_EQ(tab_, 0);
1.1  mrg     CHECK_EQ(tab_idx_, 0);
1.1  mrg     tab_idx_ = ctx->clock_alloc.Alloc(c);
1.1  mrg     tab_ = ctx->clock_alloc.Map(tab_idx_);
1.1  mrg     internal_memset(tab_, 0, sizeof(*tab_));
1.1  mrg     atomic_store_relaxed(ref_ptr(tab_), 1);
1.1  mrg     size_ = 1;
1.1  mrg   } else if (size_ > blocks_ * ClockBlock::kClockCount) {
1.1  mrg     u32 idx = ctx->clock_alloc.Alloc(c);
1.1  mrg     ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
1.1  mrg     uptr top = size_ - blocks_ * ClockBlock::kClockCount;
1.1  mrg     CHECK_LT(top, ClockBlock::kClockCount);
1.1  mrg     const uptr move = top * sizeof(tab_->clock[0]);
1.1  mrg     internal_memcpy(&new_cb->clock[0], tab_->clock, move);
1.1  mrg     internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
1.1  mrg     internal_memset(tab_->clock, 0, move);
1.1  mrg     append_block(idx);
1.1  mrg   }
1.1  mrg   // At this point we have first level table allocated and all clock elements
1.1  mrg   // are evacuated from it to a second level block.
1.1  mrg   // Add second level tables as necessary.
1.1  mrg   while (nclk > capacity()) {
1.1  mrg     u32 idx = ctx->clock_alloc.Alloc(c);
1.1  mrg     ClockBlock *cb = ctx->clock_alloc.Map(idx);
1.1  mrg     internal_memset(cb, 0, sizeof(*cb));
1.1  mrg     append_block(idx);
1.1  mrg   }
1.1  mrg   size_ = nclk;
1.1  mrg }
1.1  mrg
1.1  mrg // Flushes all dirty elements into the main clock array.
1.1  mrg void SyncClock::FlushDirty() {
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
1.1  mrg     Dirty *dirty = &dirty_[i];
1.1  mrg     if (dirty->tid() != kInvalidTid) {
1.1  mrg       CHECK_LT(dirty->tid(), size_);
1.1  mrg       elem(dirty->tid()).epoch = dirty->epoch;
1.1  mrg       dirty->set_tid(kInvalidTid);
1.1  mrg     }
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg bool SyncClock::IsShared() const {
1.1  mrg   if (size_ == 0)
1.1  mrg     return false;
1.1  mrg   atomic_uint32_t *ref = ref_ptr(tab_);
1.1  mrg   u32 v = atomic_load(ref, memory_order_acquire);
1.1  mrg   CHECK_GT(v, 0);
1.1  mrg   return v > 1;
1.1  mrg }
1.1  mrg
1.1  mrg // Unshares the current clock if it's shared.
1.1  mrg // Shared clocks are immutable, so they need to be unshared before any updates.
1.1  mrg // Note: this does not apply to dirty entries as they are not shared.
1.1  mrg void SyncClock::Unshare(ClockCache *c) {
1.1  mrg   if (!IsShared())
1.1  mrg     return;
1.1  mrg   // First, copy current state into old.
1.1  mrg   SyncClock old;
1.1  mrg   old.tab_ = tab_;
1.1  mrg   old.tab_idx_ = tab_idx_;
1.1  mrg   old.size_ = size_;
1.1  mrg   old.blocks_ = blocks_;
1.1  mrg   old.release_store_tid_ = release_store_tid_;
1.1  mrg   old.release_store_reused_ = release_store_reused_;
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++)
1.1  mrg     old.dirty_[i] = dirty_[i];
1.1  mrg   // Then, clear current object.
1.1  mrg   ResetImpl();
1.1  mrg   // Allocate brand new clock in the current object.
1.1  mrg   Resize(c, old.size_);
1.1  mrg   // Now copy state back into this object.
1.1  mrg   Iter old_iter(&old);
1.1  mrg   for (ClockElem &ce : *this) {
1.1  mrg     ce = *old_iter;
1.1  mrg     ++old_iter;
1.1  mrg   }
1.1  mrg   release_store_tid_ = old.release_store_tid_;
1.1  mrg   release_store_reused_ = old.release_store_reused_;
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++)
1.1  mrg     dirty_[i] = old.dirty_[i];
1.1  mrg   // Drop reference to old and delete if necessary.
1.1  mrg   old.Reset(c);
1.1  mrg }
1.1  mrg
1.1  mrg // Can we cache this clock for future release operations?
1.1  mrg ALWAYS_INLINE bool SyncClock::Cachable() const {
1.1  mrg   if (size_ == 0)
1.1  mrg     return false;
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
1.1  mrg     if (dirty_[i].tid() != kInvalidTid)
1.1  mrg       return false;
1.1  mrg   }
1.1  mrg   return atomic_load_relaxed(ref_ptr(tab_)) == 1;
1.1  mrg }
1.1  mrg
1.1  mrg // elem linearizes the two-level structure into linear array.
1.1  mrg // Note: this is used only for one time accesses, vector operations use
1.1  mrg // the iterator as it is much faster.
1.1  mrg ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
1.1  mrg   DCHECK_LT(tid, size_);
1.1  mrg   const uptr block = tid / ClockBlock::kClockCount;
1.1  mrg   DCHECK_LE(block, blocks_);
1.1  mrg   tid %= ClockBlock::kClockCount;
1.1  mrg   if (block == blocks_)
1.1  mrg     return tab_->clock[tid];
1.1  mrg   u32 idx = get_block(block);
1.1  mrg   ClockBlock *cb = ctx->clock_alloc.Map(idx);
1.1  mrg   return cb->clock[tid];
1.1  mrg }
1.1  mrg
1.1  mrg ALWAYS_INLINE uptr SyncClock::capacity() const {
1.1  mrg   if (size_ == 0)
1.1  mrg     return 0;
1.1  mrg   uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
1.1  mrg   // How many clock elements we can fit into the first level block.
1.1  mrg   // +1 for ref counter.
1.1  mrg   uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
1.1  mrg   return blocks_ * ClockBlock::kClockCount + top;
1.1  mrg }
1.1  mrg
1.1  mrg ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
1.1  mrg   DCHECK(size_);
1.1  mrg   DCHECK_LT(bi, blocks_);
1.1  mrg   return tab_->table[ClockBlock::kBlockIdx - bi];
1.1  mrg }
1.1  mrg
1.1  mrg ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
1.1  mrg   uptr bi = blocks_++;
1.1  mrg   CHECK_EQ(get_block(bi), 0);
1.1  mrg   tab_->table[ClockBlock::kBlockIdx - bi] = idx;
1.1  mrg }
1.1  mrg
1.1  mrg // Used only by tests.
1.1  mrg u64 SyncClock::get(unsigned tid) const {
1.1  mrg   for (unsigned i = 0; i < kDirtyTids; i++) {
1.1  mrg     Dirty dirty = dirty_[i];
1.1  mrg     if (dirty.tid() == tid)
1.1  mrg       return dirty.epoch;
1.1  mrg   }
1.1  mrg   return elem(tid).epoch;
1.1  mrg }
1.1  mrg
1.1  mrg // Used only by Iter test.
1.1  mrg u64 SyncClock::get_clean(unsigned tid) const {
1.1  mrg   return elem(tid).epoch;
1.1  mrg }
1.1  mrg
1.1  mrg void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
1.1  mrg   printf("clock=[");
1.1  mrg   for (uptr i = 0; i < size_; i++)
1.1  mrg     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
1.1  mrg   printf("] reused=[");
1.1  mrg   for (uptr i = 0; i < size_; i++)
1.1  mrg     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
1.1  mrg   printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
1.1  mrg          release_store_tid_, release_store_reused_, dirty_[0].tid(),
1.1  mrg          dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);
1.1  mrg }
1.1  mrg
1.1  mrg void SyncClock::Iter::Next() {
1.1  mrg   // Finished with the current block, move on to the next one.
1.1  mrg   block_++;
1.1  mrg   if (block_ < parent_->blocks_) {
1.1  mrg     // Iterate over the next second level block.
1.1  mrg     u32 idx = parent_->get_block(block_);
1.1  mrg     ClockBlock *cb = ctx->clock_alloc.Map(idx);
1.1  mrg     pos_ = &cb->clock[0];
1.1  mrg     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
1.1  mrg         ClockBlock::kClockCount);
1.1  mrg     return;
1.1  mrg   }
1.1  mrg   if (block_ == parent_->blocks_ &&
1.1  mrg       parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
1.1  mrg     // Iterate over elements in the first level block.
1.1  mrg     pos_ = &parent_->tab_->clock[0];
1.1  mrg     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
1.1  mrg         ClockBlock::kClockCount);
1.1  mrg     return;
1.1  mrg   }
1.1  mrg   parent_ = nullptr;  // denotes end
1.1  mrg }
1.1  mrg }  // namespace __tsan