Home | History | Annotate | Line # | Download | only in internal
      1 #ifndef JEMALLOC_INTERNAL_DECAY_H
      2 #define JEMALLOC_INTERNAL_DECAY_H
      3 
      4 #include "jemalloc/internal/smoothstep.h"
      5 
      6 #define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
      7 
      8 /*
      9  * The decay_t computes the number of pages we should purge at any given time.
     10  * Page allocators inform a decay object when pages enter a decay-able state
     11  * (i.e. dirty or muzzy), and query it to determine how many pages should be
     12  * purged at any given time.
     13  *
     14  * This is mostly a single-threaded data structure and doesn't care about
     15  * synchronization at all; it's the caller's responsibility to manage their
     16  * synchronization on their own.  There are two exceptions:
     17  * 1) It's OK to racily call decay_ms_read (i.e. just the simplest state query).
     18  * 2) The mtx and purging fields live (and are initialized) here, but are
     19  *    logically owned by the page allocator.  This is just a convenience (since
     20  *    those fields would be duplicated for both the dirty and muzzy states
     21  *    otherwise).
     22  */
     23 typedef struct decay_s decay_t;
     24 struct decay_s {
     25 	/* Synchronizes all non-atomic fields. */
     26 	malloc_mutex_t mtx;
     27 	/*
     28 	 * True if a thread is currently purging the extents associated with
     29 	 * this decay structure.
     30 	 */
     31 	bool purging;
     32 	/*
     33 	 * Approximate time in milliseconds from the creation of a set of unused
     34 	 * dirty pages until an equivalent set of unused dirty pages is purged
     35 	 * and/or reused.
     36 	 */
     37 	atomic_zd_t time_ms;
     38 	/* time / SMOOTHSTEP_NSTEPS. */
     39 	nstime_t interval;
     40 	/*
     41 	 * Time at which the current decay interval logically started.  We do
     42 	 * not actually advance to a new epoch until sometime after it starts
     43 	 * because of scheduling and computation delays, and it is even possible
     44 	 * to completely skip epochs.  In all cases, during epoch advancement we
     45 	 * merge all relevant activity into the most recently recorded epoch.
     46 	 */
     47 	nstime_t epoch;
     48 	/* Deadline randomness generator. */
     49 	uint64_t jitter_state;
     50 	/*
     51 	 * Deadline for current epoch.  This is the sum of interval and per
     52 	 * epoch jitter which is a uniform random variable in [0..interval).
     53 	 * Epochs always advance by precise multiples of interval, but we
     54 	 * randomize the deadline to reduce the likelihood of arenas purging in
     55 	 * lockstep.
     56 	 */
     57 	nstime_t deadline;
     58 	/*
     59 	 * The number of pages we cap ourselves at in the current epoch, per
     60 	 * decay policies.  Updated on an epoch change.  After an epoch change,
     61 	 * the caller should take steps to try to purge down to this amount.
     62 	 */
     63 	size_t npages_limit;
     64 	/*
     65 	 * Number of unpurged pages at beginning of current epoch.  During epoch
     66 	 * advancement we use the delta between arena->decay_*.nunpurged and
     67 	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
     68 	 * if any, were generated.
     69 	 */
     70 	size_t nunpurged;
     71 	/*
     72 	 * Trailing log of how many unused dirty pages were generated during
     73 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
     74 	 * element is the most recent epoch.  Corresponding epoch times are
     75 	 * relative to epoch.
     76 	 *
     77 	 * Updated only on epoch advance, triggered by
     78 	 * decay_maybe_advance_epoch, below.
     79 	 */
     80 	size_t backlog[SMOOTHSTEP_NSTEPS];
     81 
     82 	/* Peak number of pages in associated extents.  Used for debug only. */
     83 	uint64_t ceil_npages;
     84 };
     85 
     86 /*
     87  * The current decay time setting.  This is the only public access to a decay_t
     88  * that's allowed without holding mtx.
     89  */
     90 static inline ssize_t
     91 decay_ms_read(const decay_t *decay) {
     92 	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
     93 }
     94 
     95 /*
     96  * See the comment on the struct field -- the limit on pages we should allow in
     97  * this decay state this epoch.
     98  */
     99 static inline size_t
    100 decay_npages_limit_get(const decay_t *decay) {
    101 	return decay->npages_limit;
    102 }
    103 
    104 /* How many unused dirty pages were generated during the last epoch. */
    105 static inline size_t
    106 decay_epoch_npages_delta(const decay_t *decay) {
    107 	return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
    108 }
    109 
    110 /*
    111  * Current epoch duration, in nanoseconds.  Given that new epochs are started
    112  * somewhat haphazardly, this is not necessarily exactly the time between any
    113  * two calls to decay_maybe_advance_epoch; see the comments on fields in the
    114  * decay_t.
    115  */
    116 static inline uint64_t
    117 decay_epoch_duration_ns(const decay_t *decay) {
    118 	return nstime_ns(&decay->interval);
    119 }
    120 
    121 static inline bool
    122 decay_immediately(const decay_t *decay) {
    123 	ssize_t decay_ms = decay_ms_read(decay);
    124 	return decay_ms == 0;
    125 }
    126 
    127 static inline bool
    128 decay_disabled(const decay_t *decay) {
    129 	ssize_t decay_ms = decay_ms_read(decay);
    130 	return decay_ms < 0;
    131 }
    132 
    133 /* Returns true if decay is enabled and done gradually. */
    134 static inline bool
    135 decay_gradually(const decay_t *decay) {
    136 	ssize_t decay_ms = decay_ms_read(decay);
    137 	return decay_ms > 0;
    138 }
    139 
    140 /*
    141  * Returns true if the passed in decay time setting is valid.
    142  * < -1 : invalid
    143  * -1   : never decay
    144  *  0   : decay immediately
    145  *  > 0 : some positive decay time, up to a maximum allowed value of
    146  *  NSTIME_SEC_MAX * 1000, which corresponds to decaying somewhere in the early
    147  *  27th century.  By that time, we expect to have implemented alternate purging
    148  *  strategies.
    149  */
    150 bool decay_ms_valid(ssize_t decay_ms);
    151 
    152 /*
    153  * As a precondition, the decay_t must be zeroed out (as if with memset).
    154  *
    155  * Returns true on error.
    156  */
    157 bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
    158 
    159 /*
    160  * Given an already-initialized decay_t, reinitialize it with the given decay
    161  * time.  The decay_t must have previously been initialized (and should not then
    162  * be zeroed).
    163  */
    164 void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
    165 
    166 /*
    167  * Compute how many of 'npages_new' pages we would need to purge in 'time'.
    168  */
    169 uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
    170     size_t npages_new);
    171 
    172 /* Returns true if the epoch advanced and there are pages to purge. */
    173 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
    174     size_t current_npages);
    175 
    176 /*
    177  * Calculates wait time until a number of pages in the interval
    178  * [0.5 * npages_threshold .. 1.5 * npages_threshold] should be purged.
    179  *
    180  * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
    181  * indefinite wait.
    182  */
    183 uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
    184     uint64_t npages_threshold);
    185 
    186 #endif /* JEMALLOC_INTERNAL_DECAY_H */
    187