1 #ifndef JEMALLOC_INTERNAL_PA_H 2 #define JEMALLOC_INTERNAL_PA_H 3 4 #include "jemalloc/internal/base.h" 5 #include "jemalloc/internal/decay.h" 6 #include "jemalloc/internal/ecache.h" 7 #include "jemalloc/internal/edata_cache.h" 8 #include "jemalloc/internal/emap.h" 9 #include "jemalloc/internal/hpa.h" 10 #include "jemalloc/internal/lockedint.h" 11 #include "jemalloc/internal/pac.h" 12 #include "jemalloc/internal/pai.h" 13 #include "jemalloc/internal/sec.h" 14 15 /* 16 * The page allocator; responsible for acquiring pages of memory for 17 * allocations. It picks the implementation of the page allocator interface 18 * (i.e. a pai_t) to handle a given page-level allocation request. For now, the 19 * only such implementation is the PAC code ("page allocator classic"), but 20 * others will be coming soon. 21 */ 22 23 typedef struct pa_central_s pa_central_t; 24 struct pa_central_s { 25 hpa_central_t hpa; 26 }; 27 28 /* 29 * The stats for a particular pa_shard. Because of the way the ctl module 30 * handles stats epoch data collection (it has its own arena_stats, and merges 31 * the stats from each arena into it), this needs to live in the arena_stats_t; 32 * hence we define it here and let the pa_shard have a pointer (rather than the 33 * more natural approach of just embedding it in the pa_shard itself). 34 * 35 * We follow the arena_stats_t approach of marking the derived fields. These 36 * are the ones that are not maintained on their own; instead, their values are 37 * derived during those stats merges. 38 */ 39 typedef struct pa_shard_stats_s pa_shard_stats_t; 40 struct pa_shard_stats_s { 41 /* Number of edata_t structs allocated by base, but not being used. */ 42 size_t edata_avail; /* Derived. */ 43 /* 44 * Stats specific to the PAC. For now, these are the only stats that 45 * exist, but there will eventually be other page allocators. Things 46 * like edata_avail make sense in a cross-PA sense, but things like 47 * npurges don't. 48 */ 49 pac_stats_t pac_stats; 50 }; 51 52 /* 53 * The local allocator handle. Keeps the state necessary to satisfy page-sized 54 * allocations. 55 * 56 * The contents are mostly internal to the PA module. The key exception is that 57 * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches 58 * decay_ts, for a couple of queries, passing them back to a PA function, or 59 * acquiring decay.mtx and looking at decay.purging. The reasoning is that, 60 * while PA decides what and how to purge, the arena code decides when and where 61 * (e.g. on what thread). It's allowed to use the presence of another purger to 62 * decide. 63 * (The background thread code also touches some other decay internals, but 64 * that's not fundamental; its' just an artifact of a partial refactoring, and 65 * its accesses could be straightforwardly moved inside the decay module). 66 */ 67 typedef struct pa_shard_s pa_shard_t; 68 struct pa_shard_s { 69 /* The central PA this shard is associated with. */ 70 pa_central_t *central; 71 72 /* 73 * Number of pages in active extents. 74 * 75 * Synchronization: atomic. 76 */ 77 atomic_zu_t nactive; 78 79 /* 80 * Whether or not we should prefer the hugepage allocator. Atomic since 81 * it may be concurrently modified by a thread setting extent hooks. 82 * Note that we still may do HPA operations in this arena; if use_hpa is 83 * changed from true to false, we'll free back to the hugepage allocator 84 * for those allocations. 85 */ 86 atomic_b_t use_hpa; 87 88 /* 89 * If we never used the HPA to begin with, it wasn't initialized, and so 90 * we shouldn't try to e.g. acquire its mutexes during fork. This 91 * tracks that knowledge. 92 */ 93 bool ever_used_hpa; 94 95 /* Allocates from a PAC. */ 96 pac_t pac; 97 98 /* 99 * We place a small extent cache in front of the HPA, since we intend 100 * these configurations to use many fewer arenas, and therefore have a 101 * higher risk of hot locks. 102 */ 103 sec_t hpa_sec; 104 hpa_shard_t hpa_shard; 105 106 /* The source of edata_t objects. */ 107 edata_cache_t edata_cache; 108 109 unsigned ind; 110 111 malloc_mutex_t *stats_mtx; 112 pa_shard_stats_t *stats; 113 114 /* The emap this shard is tied to. */ 115 emap_t *emap; 116 117 /* The base from which we get the ehooks and allocate metadat. */ 118 base_t *base; 119 }; 120 121 static inline bool 122 pa_shard_dont_decay_muzzy(pa_shard_t *shard) { 123 return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 && 124 pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0; 125 } 126 127 static inline ehooks_t * 128 pa_shard_ehooks_get(pa_shard_t *shard) { 129 return base_ehooks_get(shard->base); 130 } 131 132 /* Returns true on error. */ 133 bool pa_central_init(pa_central_t *central, base_t *base, bool hpa, 134 hpa_hooks_t *hpa_hooks); 135 136 /* Returns true on error. */ 137 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central, 138 emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats, 139 malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold, 140 ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms); 141 142 /* 143 * This isn't exposed to users; we allow late enablement of the HPA shard so 144 * that we can boot without worrying about the HPA, then turn it on in a0. 145 */ 146 bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard, 147 const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts); 148 149 /* 150 * We stop using the HPA when custom extent hooks are installed, but still 151 * redirect deallocations to it. 152 */ 153 void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard); 154 155 /* 156 * This does the PA-specific parts of arena reset (i.e. freeing all active 157 * allocations). 158 */ 159 void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard); 160 161 /* 162 * Destroy all the remaining retained extents. Should only be called after 163 * decaying all active, dirty, and muzzy extents to the retained state, as the 164 * last step in destroying the shard. 165 */ 166 void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard); 167 168 /* Gets an edata for the given allocation. */ 169 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, 170 size_t alignment, bool slab, szind_t szind, bool zero, bool guarded, 171 bool *deferred_work_generated); 172 /* Returns true on error, in which case nothing changed. */ 173 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size, 174 size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated); 175 /* 176 * The same. Sets *generated_dirty to true if we produced new dirty pages, and 177 * false otherwise. 178 */ 179 bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size, 180 size_t new_size, szind_t szind, bool *deferred_work_generated); 181 /* 182 * Frees the given edata back to the pa. Sets *generated_dirty if we produced 183 * new dirty pages (well, we always set it for now; but this need not be the 184 * case). 185 * (We could make generated_dirty the return value of course, but this is more 186 * consistent with the shrink pathway and our error codes here). 187 */ 188 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, 189 bool *deferred_work_generated); 190 bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state, 191 ssize_t decay_ms, pac_purge_eagerness_t eagerness); 192 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state); 193 194 /* 195 * Do deferred work on this PA shard. 196 * 197 * Morally, this should do both PAC decay and the HPA deferred work. For now, 198 * though, the arena, background thread, and PAC modules are tightly interwoven 199 * in a way that's tricky to extricate, so we only do the HPA-specific parts. 200 */ 201 void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard, 202 bool deferral_allowed); 203 void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); 204 void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); 205 uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard); 206 207 /******************************************************************************/ 208 /* 209 * Various bits of "boring" functionality that are still part of this module, 210 * but that we relegate to pa_extra.c, to keep the core logic in pa.c as 211 * readable as possible. 212 */ 213 214 /* 215 * These fork phases are synchronized with the arena fork phase numbering to 216 * make it easy to keep straight. That's why there's no prefork1. 217 */ 218 void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard); 219 void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard); 220 void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard); 221 void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard); 222 void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard); 223 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard); 224 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard); 225 226 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, 227 size_t *ndirty, size_t *nmuzzy); 228 229 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard, 230 pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out, 231 hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out, 232 size_t *resident); 233 234 /* 235 * Reads the PA-owned mutex stats into the output stats array, at the 236 * appropriate positions. Morally, these stats should really live in 237 * pa_shard_stats_t, but the indices are sort of baked into the various mutex 238 * prof macros. This would be a good thing to do at some point. 239 */ 240 void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard, 241 mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]); 242 243 #endif /* JEMALLOC_INTERNAL_PA_H */ 244