Home | History | Annotate | Line # | Download | only in internal
      1  1.1  christos #ifndef JEMALLOC_INTERNAL_PA_H
      2  1.1  christos #define JEMALLOC_INTERNAL_PA_H
      3  1.1  christos 
      4  1.1  christos #include "jemalloc/internal/base.h"
      5  1.1  christos #include "jemalloc/internal/decay.h"
      6  1.1  christos #include "jemalloc/internal/ecache.h"
      7  1.1  christos #include "jemalloc/internal/edata_cache.h"
      8  1.1  christos #include "jemalloc/internal/emap.h"
      9  1.1  christos #include "jemalloc/internal/hpa.h"
     10  1.1  christos #include "jemalloc/internal/lockedint.h"
     11  1.1  christos #include "jemalloc/internal/pac.h"
     12  1.1  christos #include "jemalloc/internal/pai.h"
     13  1.1  christos #include "jemalloc/internal/sec.h"
     14  1.1  christos 
     15  1.1  christos /*
     16  1.1  christos  * The page allocator; responsible for acquiring pages of memory for
     17  1.1  christos  * allocations.  It picks the implementation of the page allocator interface
     18  1.1  christos  * (i.e. a pai_t) to handle a given page-level allocation request.  For now, the
     19  1.1  christos  * only such implementation is the PAC code ("page allocator classic"), but
     20  1.1  christos  * others will be coming soon.
     21  1.1  christos  */
     22  1.1  christos 
     23  1.1  christos typedef struct pa_central_s pa_central_t;
     24  1.1  christos struct pa_central_s {
     25  1.1  christos 	hpa_central_t hpa;
     26  1.1  christos };
     27  1.1  christos 
     28  1.1  christos /*
     29  1.1  christos  * The stats for a particular pa_shard.  Because of the way the ctl module
     30  1.1  christos  * handles stats epoch data collection (it has its own arena_stats, and merges
     31  1.1  christos  * the stats from each arena into it), this needs to live in the arena_stats_t;
     32  1.1  christos  * hence we define it here and let the pa_shard have a pointer (rather than the
     33  1.1  christos  * more natural approach of just embedding it in the pa_shard itself).
     34  1.1  christos  *
     35  1.1  christos  * We follow the arena_stats_t approach of marking the derived fields.  These
     36  1.1  christos  * are the ones that are not maintained on their own; instead, their values are
     37  1.1  christos  * derived during those stats merges.
     38  1.1  christos  */
     39  1.1  christos typedef struct pa_shard_stats_s pa_shard_stats_t;
     40  1.1  christos struct pa_shard_stats_s {
     41  1.1  christos 	/* Number of edata_t structs allocated by base, but not being used. */
     42  1.1  christos 	size_t edata_avail; /* Derived. */
     43  1.1  christos 	/*
     44  1.1  christos 	 * Stats specific to the PAC.  For now, these are the only stats that
     45  1.1  christos 	 * exist, but there will eventually be other page allocators.  Things
     46  1.1  christos 	 * like edata_avail make sense in a cross-PA sense, but things like
     47  1.1  christos 	 * npurges don't.
     48  1.1  christos 	 */
     49  1.1  christos 	pac_stats_t pac_stats;
     50  1.1  christos };
     51  1.1  christos 
     52  1.1  christos /*
     53  1.1  christos  * The local allocator handle.  Keeps the state necessary to satisfy page-sized
     54  1.1  christos  * allocations.
     55  1.1  christos  *
     56  1.1  christos  * The contents are mostly internal to the PA module.  The key exception is that
     57  1.1  christos  * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches
     58  1.1  christos  * decay_ts, for a couple of queries, passing them back to a PA function, or
     59  1.1  christos  * acquiring decay.mtx and looking at decay.purging.  The reasoning is that,
     60  1.1  christos  * while PA decides what and how to purge, the arena code decides when and where
     61  1.1  christos  * (e.g. on what thread).  It's allowed to use the presence of another purger to
     62  1.1  christos  * decide.
     63  1.1  christos  * (The background thread code also touches some other decay internals, but
     64  1.1  christos  * that's not fundamental; its' just an artifact of a partial refactoring, and
     65  1.1  christos  * its accesses could be straightforwardly moved inside the decay module).
     66  1.1  christos  */
     67  1.1  christos typedef struct pa_shard_s pa_shard_t;
     68  1.1  christos struct pa_shard_s {
     69  1.1  christos 	/* The central PA this shard is associated with. */
     70  1.1  christos 	pa_central_t *central;
     71  1.1  christos 
     72  1.1  christos 	/*
     73  1.1  christos 	 * Number of pages in active extents.
     74  1.1  christos 	 *
     75  1.1  christos 	 * Synchronization: atomic.
     76  1.1  christos 	 */
     77  1.1  christos 	atomic_zu_t nactive;
     78  1.1  christos 
     79  1.1  christos 	/*
     80  1.1  christos 	 * Whether or not we should prefer the hugepage allocator.  Atomic since
     81  1.1  christos 	 * it may be concurrently modified by a thread setting extent hooks.
     82  1.1  christos 	 * Note that we still may do HPA operations in this arena; if use_hpa is
     83  1.1  christos 	 * changed from true to false, we'll free back to the hugepage allocator
     84  1.1  christos 	 * for those allocations.
     85  1.1  christos 	 */
     86  1.1  christos 	atomic_b_t use_hpa;
     87  1.1  christos 
     88  1.1  christos 	/*
     89  1.1  christos 	 * If we never used the HPA to begin with, it wasn't initialized, and so
     90  1.1  christos 	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
     91  1.1  christos 	 * tracks that knowledge.
     92  1.1  christos 	 */
     93  1.1  christos 	bool ever_used_hpa;
     94  1.1  christos 
     95  1.1  christos 	/* Allocates from a PAC. */
     96  1.1  christos 	pac_t pac;
     97  1.1  christos 
     98  1.1  christos 	/*
     99  1.1  christos 	 * We place a small extent cache in front of the HPA, since we intend
    100  1.1  christos 	 * these configurations to use many fewer arenas, and therefore have a
    101  1.1  christos 	 * higher risk of hot locks.
    102  1.1  christos 	 */
    103  1.1  christos 	sec_t hpa_sec;
    104  1.1  christos 	hpa_shard_t hpa_shard;
    105  1.1  christos 
    106  1.1  christos 	/* The source of edata_t objects. */
    107  1.1  christos 	edata_cache_t edata_cache;
    108  1.1  christos 
    109  1.1  christos 	unsigned ind;
    110  1.1  christos 
    111  1.1  christos 	malloc_mutex_t *stats_mtx;
    112  1.1  christos 	pa_shard_stats_t *stats;
    113  1.1  christos 
    114  1.1  christos 	/* The emap this shard is tied to. */
    115  1.1  christos 	emap_t *emap;
    116  1.1  christos 
    117  1.1  christos 	/* The base from which we get the ehooks and allocate metadat. */
    118  1.1  christos 	base_t *base;
    119  1.1  christos };
    120  1.1  christos 
    121  1.1  christos static inline bool
    122  1.1  christos pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
    123  1.1  christos 	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
    124  1.1  christos 	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
    125  1.1  christos }
    126  1.1  christos 
    127  1.1  christos static inline ehooks_t *
    128  1.1  christos pa_shard_ehooks_get(pa_shard_t *shard) {
    129  1.1  christos 	return base_ehooks_get(shard->base);
    130  1.1  christos }
    131  1.1  christos 
    132  1.1  christos /* Returns true on error. */
    133  1.1  christos bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
    134  1.1  christos     hpa_hooks_t *hpa_hooks);
    135  1.1  christos 
    136  1.1  christos /* Returns true on error. */
    137  1.1  christos bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
    138  1.1  christos     emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
    139  1.1  christos     malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
    140  1.1  christos     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
    141  1.1  christos 
    142  1.1  christos /*
    143  1.1  christos  * This isn't exposed to users; we allow late enablement of the HPA shard so
    144  1.1  christos  * that we can boot without worrying about the HPA, then turn it on in a0.
    145  1.1  christos  */
    146  1.1  christos bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
    147  1.1  christos     const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
    148  1.1  christos 
    149  1.1  christos /*
    150  1.1  christos  * We stop using the HPA when custom extent hooks are installed, but still
    151  1.1  christos  * redirect deallocations to it.
    152  1.1  christos  */
    153  1.1  christos void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard);
    154  1.1  christos 
    155  1.1  christos /*
    156  1.1  christos  * This does the PA-specific parts of arena reset (i.e. freeing all active
    157  1.1  christos  * allocations).
    158  1.1  christos  */
    159  1.1  christos void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
    160  1.1  christos 
    161  1.1  christos /*
    162  1.1  christos  * Destroy all the remaining retained extents.  Should only be called after
    163  1.1  christos  * decaying all active, dirty, and muzzy extents to the retained state, as the
    164  1.1  christos  * last step in destroying the shard.
    165  1.1  christos  */
    166  1.1  christos void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
    167  1.1  christos 
    168  1.1  christos /* Gets an edata for the given allocation. */
    169  1.1  christos edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
    170  1.1  christos     size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
    171  1.1  christos     bool *deferred_work_generated);
    172  1.1  christos /* Returns true on error, in which case nothing changed. */
    173  1.1  christos bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
    174  1.1  christos     size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated);
    175  1.1  christos /*
    176  1.1  christos  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
    177  1.1  christos  * false otherwise.
    178  1.1  christos  */
    179  1.1  christos bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
    180  1.1  christos     size_t new_size, szind_t szind, bool *deferred_work_generated);
    181  1.1  christos /*
    182  1.1  christos  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
    183  1.1  christos  * new dirty pages (well, we always set it for now; but this need not be the
    184  1.1  christos  * case).
    185  1.1  christos  * (We could make generated_dirty the return value of course, but this is more
    186  1.1  christos  * consistent with the shrink pathway and our error codes here).
    187  1.1  christos  */
    188  1.1  christos void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
    189  1.1  christos     bool *deferred_work_generated);
    190  1.1  christos bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
    191  1.1  christos     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
    192  1.1  christos ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
    193  1.1  christos 
    194  1.1  christos /*
    195  1.1  christos  * Do deferred work on this PA shard.
    196  1.1  christos  *
    197  1.1  christos  * Morally, this should do both PAC decay and the HPA deferred work.  For now,
    198  1.1  christos  * though, the arena, background thread, and PAC modules are tightly interwoven
    199  1.1  christos  * in a way that's tricky to extricate, so we only do the HPA-specific parts.
    200  1.1  christos  */
    201  1.1  christos void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
    202  1.1  christos     bool deferral_allowed);
    203  1.1  christos void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
    204  1.1  christos void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
    205  1.1  christos uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
    206  1.1  christos 
    207  1.1  christos /******************************************************************************/
    208  1.1  christos /*
    209  1.1  christos  * Various bits of "boring" functionality that are still part of this module,
    210  1.1  christos  * but that we relegate to pa_extra.c, to keep the core logic in pa.c as
    211  1.1  christos  * readable as possible.
    212  1.1  christos  */
    213  1.1  christos 
    214  1.1  christos /*
    215  1.1  christos  * These fork phases are synchronized with the arena fork phase numbering to
    216  1.1  christos  * make it easy to keep straight. That's why there's no prefork1.
    217  1.1  christos  */
    218  1.1  christos void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
    219  1.1  christos void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
    220  1.1  christos void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
    221  1.1  christos void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
    222  1.1  christos void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
    223  1.1  christos void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
    224  1.1  christos void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
    225  1.1  christos 
    226  1.1  christos void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
    227  1.1  christos     size_t *ndirty, size_t *nmuzzy);
    228  1.1  christos 
    229  1.1  christos void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
    230  1.1  christos     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
    231  1.1  christos     hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
    232  1.1  christos     size_t *resident);
    233  1.1  christos 
    234  1.1  christos /*
    235  1.1  christos  * Reads the PA-owned mutex stats into the output stats array, at the
    236  1.1  christos  * appropriate positions.  Morally, these stats should really live in
    237  1.1  christos  * pa_shard_stats_t, but the indices are sort of baked into the various mutex
    238  1.1  christos  * prof macros.  This would be a good thing to do at some point.
    239  1.1  christos  */
    240  1.1  christos void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
    241  1.1  christos     mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]);
    242  1.1  christos 
    243  1.1  christos #endif /* JEMALLOC_INTERNAL_PA_H */
    244