Home | History | Annotate | Line # | Download | only in src
      1 #include "jemalloc/internal/jemalloc_preamble.h"
      2 #include "jemalloc/internal/jemalloc_internal_includes.h"
      3 
      4 #include "jemalloc/internal/assert.h"
      5 #include "jemalloc/internal/ckh.h"
      6 #include "jemalloc/internal/hash.h"
      7 #include "jemalloc/internal/malloc_io.h"
      8 #include "jemalloc/internal/prof_data.h"
      9 
     10 /*
     11  * This file defines and manages the core profiling data structures.
     12  *
     13  * Conceptually, profiling data can be imagined as a table with three columns:
     14  * thread, stack trace, and current allocation size.  (When prof_accum is on,
     15  * there's one additional column which is the cumulative allocation size.)
     16  *
     17  * Implementation wise, each thread maintains a hash recording the stack trace
     18  * to allocation size correspondences, which are basically the individual rows
     19  * in the table.  In addition, two global "indices" are built to make data
     20  * aggregation efficient (for dumping): bt2gctx and tdatas, which are basically
     21  * the "grouped by stack trace" and "grouped by thread" views of the same table,
     22  * respectively.  Note that the allocation size is only aggregated to the two
     23  * indices at dumping time, so as to optimize for performance.
     24  */
     25 
     26 /******************************************************************************/
     27 
     28 malloc_mutex_t bt2gctx_mtx;
     29 malloc_mutex_t tdatas_mtx;
     30 malloc_mutex_t prof_dump_mtx;
     31 
     32 /*
     33  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
     34  * there is no problem with using them for more than one gctx at the same time.
     35  * The primary motivation for this sharing though is that gctx's are ephemeral,
     36  * and destroying mutexes causes complications for systems that allocate when
     37  * creating/destroying mutexes.
     38  */
     39 malloc_mutex_t *gctx_locks;
     40 static atomic_u_t cum_gctxs; /* Atomic counter. */
     41 
     42 /*
     43  * Table of mutexes that are shared among tdata's.  No operations require
     44  * holding multiple tdata locks, so there is no problem with using them for more
     45  * than one tdata at the same time, even though a gctx lock may be acquired
     46  * while holding a tdata lock.
     47  */
     48 malloc_mutex_t *tdata_locks;
     49 
     50 /*
     51  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
     52  * structure that knows about all backtraces currently captured.
     53  */
     54 static ckh_t bt2gctx;
     55 
     56 /*
     57  * Tree of all extant prof_tdata_t structures, regardless of state,
     58  * {attached,detached,expired}.
     59  */
     60 static prof_tdata_tree_t tdatas;
     61 
     62 size_t prof_unbiased_sz[PROF_SC_NSIZES];
     63 size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
     64 
     65 /******************************************************************************/
     66 /* Red-black trees. */
     67 
     68 static int
     69 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
     70 	uint64_t a_thr_uid = a->thr_uid;
     71 	uint64_t b_thr_uid = b->thr_uid;
     72 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
     73 	if (ret == 0) {
     74 		uint64_t a_thr_discrim = a->thr_discrim;
     75 		uint64_t b_thr_discrim = b->thr_discrim;
     76 		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
     77 		    b_thr_discrim);
     78 		if (ret == 0) {
     79 			uint64_t a_tctx_uid = a->tctx_uid;
     80 			uint64_t b_tctx_uid = b->tctx_uid;
     81 			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
     82 			    b_tctx_uid);
     83 		}
     84 	}
     85 	return ret;
     86 }
     87 
     88 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
     89     tctx_link, prof_tctx_comp)
     90 
     91 static int
     92 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
     93 	unsigned a_len = a->bt.len;
     94 	unsigned b_len = b->bt.len;
     95 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
     96 	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
     97 	if (ret == 0) {
     98 		ret = (a_len > b_len) - (a_len < b_len);
     99 	}
    100 	return ret;
    101 }
    102 
    103 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
    104     prof_gctx_comp)
    105 
    106 static int
    107 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
    108 	int ret;
    109 	uint64_t a_uid = a->thr_uid;
    110 	uint64_t b_uid = b->thr_uid;
    111 
    112 	ret = ((a_uid > b_uid) - (a_uid < b_uid));
    113 	if (ret == 0) {
    114 		uint64_t a_discrim = a->thr_discrim;
    115 		uint64_t b_discrim = b->thr_discrim;
    116 
    117 		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
    118 	}
    119 	return ret;
    120 }
    121 
    122 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
    123     prof_tdata_comp)
    124 
    125 /******************************************************************************/
    126 
    127 static malloc_mutex_t *
    128 prof_gctx_mutex_choose(void) {
    129 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
    130 
    131 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
    132 }
    133 
    134 static malloc_mutex_t *
    135 prof_tdata_mutex_choose(uint64_t thr_uid) {
    136 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
    137 }
    138 
    139 bool
    140 prof_data_init(tsd_t *tsd) {
    141 	tdata_tree_new(&tdatas);
    142 	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
    143 	    prof_bt_hash, prof_bt_keycomp);
    144 }
    145 
    146 static void
    147 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
    148 	cassert(config_prof);
    149 	assert(tdata == prof_tdata_get(tsd, false));
    150 
    151 	if (tdata != NULL) {
    152 		assert(!tdata->enq);
    153 		tdata->enq = true;
    154 	}
    155 
    156 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
    157 }
    158 
    159 static void
    160 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
    161 	cassert(config_prof);
    162 	assert(tdata == prof_tdata_get(tsd, false));
    163 
    164 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
    165 
    166 	if (tdata != NULL) {
    167 		bool idump, gdump;
    168 
    169 		assert(tdata->enq);
    170 		tdata->enq = false;
    171 		idump = tdata->enq_idump;
    172 		tdata->enq_idump = false;
    173 		gdump = tdata->enq_gdump;
    174 		tdata->enq_gdump = false;
    175 
    176 		if (idump) {
    177 			prof_idump(tsd_tsdn(tsd));
    178 		}
    179 		if (gdump) {
    180 			prof_gdump(tsd_tsdn(tsd));
    181 		}
    182 	}
    183 }
    184 
    185 static prof_gctx_t *
    186 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
    187 	/*
    188 	 * Create a single allocation that has space for vec of length bt->len.
    189 	 */
    190 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
    191 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
    192 	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
    193 	    true);
    194 	if (gctx == NULL) {
    195 		return NULL;
    196 	}
    197 	gctx->lock = prof_gctx_mutex_choose();
    198 	/*
    199 	 * Set nlimbo to 1, in order to avoid a race condition with
    200 	 * prof_tctx_destroy()/prof_gctx_try_destroy().
    201 	 */
    202 	gctx->nlimbo = 1;
    203 	tctx_tree_new(&gctx->tctxs);
    204 	/* Duplicate bt. */
    205 	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
    206 	gctx->bt.vec = gctx->vec;
    207 	gctx->bt.len = bt->len;
    208 	return gctx;
    209 }
    210 
    211 static void
    212 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self,
    213     prof_gctx_t *gctx) {
    214 	cassert(config_prof);
    215 
    216 	/*
    217 	 * Check that gctx is still unused by any thread cache before destroying
    218 	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
    219 	 * condition with this function, as does prof_tctx_destroy() in order to
    220 	 * avoid a race between the main body of prof_tctx_destroy() and entry
    221 	 * into this function.
    222 	 */
    223 	prof_enter(tsd, tdata_self);
    224 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
    225 	assert(gctx->nlimbo != 0);
    226 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
    227 		/* Remove gctx from bt2gctx. */
    228 		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
    229 			not_reached();
    230 		}
    231 		prof_leave(tsd, tdata_self);
    232 		/* Destroy gctx. */
    233 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    234 		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
    235 	} else {
    236 		/*
    237 		 * Compensate for increment in prof_tctx_destroy() or
    238 		 * prof_lookup().
    239 		 */
    240 		gctx->nlimbo--;
    241 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    242 		prof_leave(tsd, tdata_self);
    243 	}
    244 }
    245 
    246 static bool
    247 prof_gctx_should_destroy(prof_gctx_t *gctx) {
    248 	if (opt_prof_accum) {
    249 		return false;
    250 	}
    251 	if (!tctx_tree_empty(&gctx->tctxs)) {
    252 		return false;
    253 	}
    254 	if (gctx->nlimbo != 0) {
    255 		return false;
    256 	}
    257 	return true;
    258 }
    259 
    260 static bool
    261 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
    262     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
    263 	union {
    264 		prof_gctx_t	*p;
    265 		void		*v;
    266 	} gctx, tgctx;
    267 	union {
    268 		prof_bt_t	*p;
    269 		void		*v;
    270 	} btkey;
    271 	bool new_gctx;
    272 
    273 	prof_enter(tsd, tdata);
    274 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
    275 		/* bt has never been seen before.  Insert it. */
    276 		prof_leave(tsd, tdata);
    277 		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
    278 		if (tgctx.v == NULL) {
    279 			return true;
    280 		}
    281 		prof_enter(tsd, tdata);
    282 		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
    283 			gctx.p = tgctx.p;
    284 			btkey.p = &gctx.p->bt;
    285 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
    286 				/* OOM. */
    287 				prof_leave(tsd, tdata);
    288 				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
    289 				    true, true);
    290 				return true;
    291 			}
    292 			new_gctx = true;
    293 		} else {
    294 			new_gctx = false;
    295 		}
    296 	} else {
    297 		tgctx.v = NULL;
    298 		new_gctx = false;
    299 	}
    300 
    301 	if (!new_gctx) {
    302 		/*
    303 		 * Increment nlimbo, in order to avoid a race condition with
    304 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
    305 		 */
    306 		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
    307 		gctx.p->nlimbo++;
    308 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
    309 		new_gctx = false;
    310 
    311 		if (tgctx.v != NULL) {
    312 			/* Lost race to insert. */
    313 			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
    314 			    true);
    315 		}
    316 	}
    317 	prof_leave(tsd, tdata);
    318 
    319 	*p_btkey = btkey.v;
    320 	*p_gctx = gctx.p;
    321 	*p_new_gctx = new_gctx;
    322 	return false;
    323 }
    324 
    325 prof_tctx_t *
    326 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
    327 	union {
    328 		prof_tctx_t	*p;
    329 		void		*v;
    330 	} ret;
    331 	prof_tdata_t *tdata;
    332 	bool not_found;
    333 
    334 	cassert(config_prof);
    335 
    336 	tdata = prof_tdata_get(tsd, false);
    337 	assert(tdata != NULL);
    338 
    339 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
    340 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
    341 	if (!not_found) { /* Note double negative! */
    342 		ret.p->prepared = true;
    343 	}
    344 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
    345 	if (not_found) {
    346 		void *btkey;
    347 		prof_gctx_t *gctx;
    348 		bool new_gctx, error;
    349 
    350 		/*
    351 		 * This thread's cache lacks bt.  Look for it in the global
    352 		 * cache.
    353 		 */
    354 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
    355 		    &new_gctx)) {
    356 			return NULL;
    357 		}
    358 
    359 		/* Link a prof_tctx_t into gctx for this thread. */
    360 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
    361 		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
    362 		    arena_ichoose(tsd, NULL), true);
    363 		if (ret.p == NULL) {
    364 			if (new_gctx) {
    365 				prof_gctx_try_destroy(tsd, tdata, gctx);
    366 			}
    367 			return NULL;
    368 		}
    369 		ret.p->tdata = tdata;
    370 		ret.p->thr_uid = tdata->thr_uid;
    371 		ret.p->thr_discrim = tdata->thr_discrim;
    372 		ret.p->recent_count = 0;
    373 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
    374 		ret.p->gctx = gctx;
    375 		ret.p->tctx_uid = tdata->tctx_uid_next++;
    376 		ret.p->prepared = true;
    377 		ret.p->state = prof_tctx_state_initializing;
    378 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
    379 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
    380 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
    381 		if (error) {
    382 			if (new_gctx) {
    383 				prof_gctx_try_destroy(tsd, tdata, gctx);
    384 			}
    385 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
    386 			return NULL;
    387 		}
    388 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
    389 		ret.p->state = prof_tctx_state_nominal;
    390 		tctx_tree_insert(&gctx->tctxs, ret.p);
    391 		gctx->nlimbo--;
    392 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    393 	}
    394 
    395 	return ret.p;
    396 }
    397 
    398 /* Used in unit tests. */
    399 static prof_tdata_t *
    400 prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
    401     void *arg) {
    402 	size_t *tdata_count = (size_t *)arg;
    403 
    404 	(*tdata_count)++;
    405 
    406 	return NULL;
    407 }
    408 
    409 /* Used in unit tests. */
    410 size_t
    411 prof_tdata_count(void) {
    412 	size_t tdata_count = 0;
    413 	tsdn_t *tsdn;
    414 
    415 	tsdn = tsdn_fetch();
    416 	malloc_mutex_lock(tsdn, &tdatas_mtx);
    417 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
    418 	    (void *)&tdata_count);
    419 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
    420 
    421 	return tdata_count;
    422 }
    423 
    424 /* Used in unit tests. */
    425 size_t
    426 prof_bt_count(void) {
    427 	size_t bt_count;
    428 	tsd_t *tsd;
    429 	prof_tdata_t *tdata;
    430 
    431 	tsd = tsd_fetch();
    432 	tdata = prof_tdata_get(tsd, false);
    433 	if (tdata == NULL) {
    434 		return 0;
    435 	}
    436 
    437 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
    438 	bt_count = ckh_count(&bt2gctx);
    439 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
    440 
    441 	return bt_count;
    442 }
    443 
    444 char *
    445 prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
    446 	char *ret;
    447 	size_t size;
    448 
    449 	if (thread_name == NULL) {
    450 		return NULL;
    451 	}
    452 
    453 	size = strlen(thread_name) + 1;
    454 	if (size == 1) {
    455 		return __UNCONST("");
    456 	}
    457 
    458 	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
    459 	    true, arena_get(TSDN_NULL, 0, true), true);
    460 	if (ret == NULL) {
    461 		return NULL;
    462 	}
    463 	memcpy(ret, thread_name, size);
    464 	return ret;
    465 }
    466 
    467 int
    468 prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
    469 	assert(tsd_reentrancy_level_get(tsd) == 0);
    470 
    471 	prof_tdata_t *tdata;
    472 	unsigned i;
    473 	char *s;
    474 
    475 	tdata = prof_tdata_get(tsd, true);
    476 	if (tdata == NULL) {
    477 		return EAGAIN;
    478 	}
    479 
    480 	/* Validate input. */
    481 	if (thread_name == NULL) {
    482 		return EFAULT;
    483 	}
    484 	for (i = 0; thread_name[i] != '\0'; i++) {
    485 		unsigned char c = thread_name[i];
    486 		if (!isgraph(c) && !isblank(c)) {
    487 			return EFAULT;
    488 		}
    489 	}
    490 
    491 	s = prof_thread_name_alloc(tsd, thread_name);
    492 	if (s == NULL) {
    493 		return EAGAIN;
    494 	}
    495 
    496 	if (tdata->thread_name != NULL) {
    497 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
    498 		    true);
    499 		tdata->thread_name = NULL;
    500 	}
    501 	if (strlen(s) > 0) {
    502 		tdata->thread_name = s;
    503 	}
    504 	return 0;
    505 }
    506 
    507 JEMALLOC_FORMAT_PRINTF(3, 4)
    508 static void
    509 prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
    510     const char *format, ...) {
    511 	va_list ap;
    512 	char buf[PROF_PRINTF_BUFSIZE];
    513 
    514 	va_start(ap, format);
    515 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
    516 	va_end(ap);
    517 	prof_dump_write(cbopaque, buf);
    518 }
    519 
    520 /*
    521  * Casting a double to a uint64_t may not necessarily be in range; this can be
    522  * UB.  I don't think this is practically possible with the cur counters, but
    523  * plausibly could be with the accum counters.
    524  */
    525 #ifdef JEMALLOC_PROF
    526 static uint64_t
    527 prof_double_uint64_cast(double d) {
    528 	/*
    529 	 * Note: UINT64_MAX + 1 is exactly representable as a double on all
    530 	 * reasonable platforms (certainly those we'll support).  Writing this
    531 	 * as !(a < b) instead of (a >= b) means that we're NaN-safe.
    532 	 */
    533 	double rounded = round(d);
    534 	if (!(rounded < (double)UINT64_MAX)) {
    535 		return UINT64_MAX;
    536 	}
    537 	return (uint64_t)rounded;
    538 }
    539 #endif
    540 
    541 void prof_unbias_map_init(void) {
    542 	/* See the comment in prof_sample_new_event_wait */
    543 #ifdef JEMALLOC_PROF
    544 	for (szind_t i = 0; i < SC_NSIZES; i++) {
    545 		double sz = (double)sz_index2size(i);
    546 		double rate = (double)(ZU(1) << lg_prof_sample);
    547 		double div_val = 1.0 - exp(-sz / rate);
    548 		double unbiased_sz = sz / div_val;
    549 		/*
    550 		 * The "true" right value for the unbiased count is
    551 		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
    552 		 * as integers (for a variety of reasons -- rounding errors
    553 		 * could trigger asserts, and not all libcs can properly handle
    554 		 * floating point arithmetic during malloc calls inside libc).
    555 		 * Rounding to an integer, though, can lead to rounding errors
    556 		 * of over 30% for sizes close to the sampling rate.  So
    557 		 * instead, we multiply by a constant, dividing the maximum
    558 		 * possible roundoff error by that constant.  To avoid overflow
    559 		 * in summing up size_t values, the largest safe constant we can
    560 		 * pick is the size of the smallest allocation.
    561 		 */
    562 		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
    563 		double shifted_unbiased_cnt = cnt_shift / div_val;
    564 		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
    565 		prof_shifted_unbiased_cnt[i] = (size_t)round(
    566 		    shifted_unbiased_cnt);
    567 	}
    568 #else
    569 	unreachable();
    570 #endif
    571 }
    572 
    573 /*
    574  * The unbiasing story is long.  The jeprof unbiasing logic was copied from
    575  * pprof.  Both shared an issue: they unbiased using the average size of the
    576  * allocations at a particular stack trace.  This can work out OK if allocations
    577  * are mostly of the same size given some stack, but not otherwise.  We now
    578  * internally track what the unbiased results ought to be.  We can't just report
    579  * them as they are though; they'll still go through the jeprof unbiasing
    580  * process.  Instead, we figure out what values we can feed *into* jeprof's
    581  * unbiasing mechanism that will lead to getting the right values out.
    582  *
    583  * It'll unbias count and aggregate size as:
    584  *
    585  *   c_out = c_in * 1/(1-exp(-s_in/c_in/R)
    586  *   s_out = s_in * 1/(1-exp(-s_in/c_in/R)
    587  *
    588  * We want to solve for the values of c_in and s_in that will
    589  * give the c_out and s_out that we've computed internally.
    590  *
    591  * Let's do a change of variables (both to make the math easier and to make it
    592  * easier to write):
    593  *   x = s_in / c_in
    594  *   y = s_in
    595  *   k = 1/R.
    596  *
    597  * Then
    598  *   c_out = y/x * 1/(1-exp(-k*x))
    599  *   s_out = y * 1/(1-exp(-k*x))
    600  *
    601  * The first equation gives:
    602  *   y = x * c_out * (1-exp(-k*x))
    603  * The second gives:
    604  *   y = s_out * (1-exp(-k*x))
    605  * So we have
    606  *   x = s_out / c_out.
    607  * And all the other values fall out from that.
    608  *
    609  * This is all a fair bit of work.  The thing we get out of it is that we don't
    610  * break backwards compatibility with jeprof (and the various tools that have
    611  * copied its unbiasing logic).  Eventually, we anticipate a v3 heap profile
    612  * dump format based on JSON, at which point I think much of this logic can get
    613  * cleaned up (since we'll be taking a compatibility break there anyways).
    614  */
    615 static void
    616 prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
    617     uint64_t *r_s_in) {
    618 #ifdef JEMALLOC_PROF
    619 	if (c_out_shifted_i == 0 || s_out_i == 0) {
    620 		*r_c_in = 0;
    621 		*r_s_in = 0;
    622 		return;
    623 	}
    624 	/*
    625 	 * See the note in prof_unbias_map_init() to see why we take c_out in a
    626 	 * shifted form.
    627 	 */
    628 	double c_out = (double)c_out_shifted_i
    629 	    / (double)(ZU(1) << SC_LG_TINY_MIN);
    630 	double s_out = (double)s_out_i;
    631 	double R = (double)(ZU(1) << lg_prof_sample);
    632 
    633 	double x = s_out / c_out;
    634 	double y = s_out * (1.0 - exp(-x / R));
    635 
    636 	double c_in = y / x;
    637 	double s_in = y;
    638 
    639 	*r_c_in = prof_double_uint64_cast(c_in);
    640 	*r_s_in = prof_double_uint64_cast(s_in);
    641 #else
    642 	unreachable();
    643 #endif
    644 }
    645 
    646 static void
    647 prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
    648     const prof_cnt_t *cnts) {
    649 	uint64_t curobjs;
    650 	uint64_t curbytes;
    651 	uint64_t accumobjs;
    652 	uint64_t accumbytes;
    653 	if (opt_prof_unbias) {
    654 		prof_do_unbias(cnts->curobjs_shifted_unbiased,
    655 		    cnts->curbytes_unbiased, &curobjs, &curbytes);
    656 		prof_do_unbias(cnts->accumobjs_shifted_unbiased,
    657 		    cnts->accumbytes_unbiased, &accumobjs, &accumbytes);
    658 	} else {
    659 		curobjs = cnts->curobjs;
    660 		curbytes = cnts->curbytes;
    661 		accumobjs = cnts->accumobjs;
    662 		accumbytes = cnts->accumbytes;
    663 	}
    664 	prof_dump_printf(prof_dump_write, cbopaque,
    665 	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
    666 	    curobjs, curbytes, accumobjs, accumbytes);
    667 }
    668 
    669 static void
    670 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
    671 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
    672 
    673 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
    674 
    675 	switch (tctx->state) {
    676 	case prof_tctx_state_initializing:
    677 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
    678 		return;
    679 	case prof_tctx_state_nominal:
    680 		tctx->state = prof_tctx_state_dumping;
    681 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
    682 
    683 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
    684 
    685 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
    686 		tdata->cnt_summed.curobjs_shifted_unbiased
    687 		    += tctx->dump_cnts.curobjs_shifted_unbiased;
    688 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
    689 		tdata->cnt_summed.curbytes_unbiased
    690 		    += tctx->dump_cnts.curbytes_unbiased;
    691 		if (opt_prof_accum) {
    692 			tdata->cnt_summed.accumobjs +=
    693 			    tctx->dump_cnts.accumobjs;
    694 			tdata->cnt_summed.accumobjs_shifted_unbiased +=
    695 			    tctx->dump_cnts.accumobjs_shifted_unbiased;
    696 			tdata->cnt_summed.accumbytes +=
    697 			    tctx->dump_cnts.accumbytes;
    698 			tdata->cnt_summed.accumbytes_unbiased +=
    699 			    tctx->dump_cnts.accumbytes_unbiased;
    700 		}
    701 		break;
    702 	case prof_tctx_state_dumping:
    703 	case prof_tctx_state_purgatory:
    704 		not_reached();
    705 	}
    706 }
    707 
    708 static void
    709 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
    710 	malloc_mutex_assert_owner(tsdn, gctx->lock);
    711 
    712 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
    713 	gctx->cnt_summed.curobjs_shifted_unbiased
    714 	    += tctx->dump_cnts.curobjs_shifted_unbiased;
    715 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
    716 	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
    717 	if (opt_prof_accum) {
    718 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
    719 		gctx->cnt_summed.accumobjs_shifted_unbiased
    720 		    += tctx->dump_cnts.accumobjs_shifted_unbiased;
    721 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
    722 		gctx->cnt_summed.accumbytes_unbiased
    723 		    += tctx->dump_cnts.accumbytes_unbiased;
    724 	}
    725 }
    726 
    727 static prof_tctx_t *
    728 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
    729 	tsdn_t *tsdn = (tsdn_t *)arg;
    730 
    731 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
    732 
    733 	switch (tctx->state) {
    734 	case prof_tctx_state_nominal:
    735 		/* New since dumping started; ignore. */
    736 		break;
    737 	case prof_tctx_state_dumping:
    738 	case prof_tctx_state_purgatory:
    739 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
    740 		break;
    741 	default:
    742 		not_reached();
    743 	}
    744 
    745 	return NULL;
    746 }
    747 
    748 typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
    749 struct prof_dump_iter_arg_s {
    750 	tsdn_t *tsdn;
    751 	write_cb_t *prof_dump_write;
    752 	void *cbopaque;
    753 };
    754 
    755 static prof_tctx_t *
    756 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
    757 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
    758 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
    759 
    760 	switch (tctx->state) {
    761 	case prof_tctx_state_initializing:
    762 	case prof_tctx_state_nominal:
    763 		/* Not captured by this dump. */
    764 		break;
    765 	case prof_tctx_state_dumping:
    766 	case prof_tctx_state_purgatory:
    767 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
    768 		    "  t%"FMTu64": ", tctx->thr_uid);
    769 		prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
    770 		    &tctx->dump_cnts);
    771 		arg->prof_dump_write(arg->cbopaque, "\n");
    772 		break;
    773 	default:
    774 		not_reached();
    775 	}
    776 	return NULL;
    777 }
    778 
    779 static prof_tctx_t *
    780 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
    781 	tsdn_t *tsdn = (tsdn_t *)arg;
    782 	prof_tctx_t *ret;
    783 
    784 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
    785 
    786 	switch (tctx->state) {
    787 	case prof_tctx_state_nominal:
    788 		/* New since dumping started; ignore. */
    789 		break;
    790 	case prof_tctx_state_dumping:
    791 		tctx->state = prof_tctx_state_nominal;
    792 		break;
    793 	case prof_tctx_state_purgatory:
    794 		ret = tctx;
    795 		goto label_return;
    796 	default:
    797 		not_reached();
    798 	}
    799 
    800 	ret = NULL;
    801 label_return:
    802 	return ret;
    803 }
    804 
    805 static void
    806 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
    807 	cassert(config_prof);
    808 
    809 	malloc_mutex_lock(tsdn, gctx->lock);
    810 
    811 	/*
    812 	 * Increment nlimbo so that gctx won't go away before dump.
    813 	 * Additionally, link gctx into the dump list so that it is included in
    814 	 * prof_dump()'s second pass.
    815 	 */
    816 	gctx->nlimbo++;
    817 	gctx_tree_insert(gctxs, gctx);
    818 
    819 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
    820 
    821 	malloc_mutex_unlock(tsdn, gctx->lock);
    822 }
    823 
    824 typedef struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg_t;
    825 struct prof_gctx_merge_iter_arg_s {
    826 	tsdn_t *tsdn;
    827 	size_t *leak_ngctx;
    828 };
    829 
    830 static prof_gctx_t *
    831 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
    832 	prof_gctx_merge_iter_arg_t *arg = (prof_gctx_merge_iter_arg_t *)opaque;
    833 
    834 	malloc_mutex_lock(arg->tsdn, gctx->lock);
    835 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
    836 	    (void *)arg->tsdn);
    837 	if (gctx->cnt_summed.curobjs != 0) {
    838 		(*arg->leak_ngctx)++;
    839 	}
    840 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
    841 
    842 	return NULL;
    843 }
    844 
    845 static void
    846 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
    847 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
    848 	prof_gctx_t *gctx;
    849 
    850 	/*
    851 	 * Standard tree iteration won't work here, because as soon as we
    852 	 * decrement gctx->nlimbo and unlock gctx, another thread can
    853 	 * concurrently destroy it, which will corrupt the tree.  Therefore,
    854 	 * tear down the tree one node at a time during iteration.
    855 	 */
    856 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
    857 		gctx_tree_remove(gctxs, gctx);
    858 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
    859 		{
    860 			prof_tctx_t *next;
    861 
    862 			next = NULL;
    863 			do {
    864 				prof_tctx_t *to_destroy =
    865 				    tctx_tree_iter(&gctx->tctxs, next,
    866 				    prof_tctx_finish_iter,
    867 				    (void *)tsd_tsdn(tsd));
    868 				if (to_destroy != NULL) {
    869 					next = tctx_tree_next(&gctx->tctxs,
    870 					    to_destroy);
    871 					tctx_tree_remove(&gctx->tctxs,
    872 					    to_destroy);
    873 					idalloctm(tsd_tsdn(tsd), to_destroy,
    874 					    NULL, NULL, true, true);
    875 				} else {
    876 					next = NULL;
    877 				}
    878 			} while (next != NULL);
    879 		}
    880 		gctx->nlimbo--;
    881 		if (prof_gctx_should_destroy(gctx)) {
    882 			gctx->nlimbo++;
    883 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    884 			prof_gctx_try_destroy(tsd, tdata, gctx);
    885 		} else {
    886 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
    887 		}
    888 	}
    889 }
    890 
    891 typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
    892 struct prof_tdata_merge_iter_arg_s {
    893 	tsdn_t *tsdn;
    894 	prof_cnt_t *cnt_all;
    895 };
    896 
    897 static prof_tdata_t *
    898 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
    899     void *opaque) {
    900 	prof_tdata_merge_iter_arg_t *arg =
    901 	    (prof_tdata_merge_iter_arg_t *)opaque;
    902 
    903 	malloc_mutex_lock(arg->tsdn, tdata->lock);
    904 	if (!tdata->expired) {
    905 		size_t tabind;
    906 		union {
    907 			prof_tctx_t	*p;
    908 			void		*v;
    909 		} tctx;
    910 
    911 		tdata->dumping = true;
    912 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
    913 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
    914 		    &tctx.v);) {
    915 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
    916 		}
    917 
    918 		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
    919 		arg->cnt_all->curobjs_shifted_unbiased
    920 		    += tdata->cnt_summed.curobjs_shifted_unbiased;
    921 		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
    922 		arg->cnt_all->curbytes_unbiased
    923 		    += tdata->cnt_summed.curbytes_unbiased;
    924 		if (opt_prof_accum) {
    925 			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
    926 			arg->cnt_all->accumobjs_shifted_unbiased
    927 			    += tdata->cnt_summed.accumobjs_shifted_unbiased;
    928 			arg->cnt_all->accumbytes +=
    929 			    tdata->cnt_summed.accumbytes;
    930 			arg->cnt_all->accumbytes_unbiased +=
    931 			    tdata->cnt_summed.accumbytes_unbiased;
    932 		}
    933 	} else {
    934 		tdata->dumping = false;
    935 	}
    936 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
    937 
    938 	return NULL;
    939 }
    940 
    941 static prof_tdata_t *
    942 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
    943     void *opaque) {
    944 	if (!tdata->dumping) {
    945 		return NULL;
    946 	}
    947 
    948 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
    949 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
    950 	    tdata->thr_uid);
    951 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
    952 	    &tdata->cnt_summed);
    953 	if (tdata->thread_name != NULL) {
    954 		arg->prof_dump_write(arg->cbopaque, " ");
    955 		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
    956 	}
    957 	arg->prof_dump_write(arg->cbopaque, "\n");
    958 	return NULL;
    959 }
    960 
    961 static void
    962 prof_dump_header(prof_dump_iter_arg_t *arg, const prof_cnt_t *cnt_all) {
    963 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
    964 	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
    965 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
    966 	arg->prof_dump_write(arg->cbopaque, "\n");
    967 
    968 	malloc_mutex_lock(arg->tsdn, &tdatas_mtx);
    969 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, arg);
    970 	malloc_mutex_unlock(arg->tsdn, &tdatas_mtx);
    971 }
    972 
    973 static void
    974 prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
    975     const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
    976 	cassert(config_prof);
    977 	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
    978 
    979 	/* Avoid dumping such gctx's that have no useful data. */
    980 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
    981 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
    982 		assert(gctx->cnt_summed.curobjs == 0);
    983 		assert(gctx->cnt_summed.curbytes == 0);
    984 		/*
    985 		 * These asserts would not be correct -- see the comment on races
    986 		 * in prof.c
    987 		 * assert(gctx->cnt_summed.curobjs_unbiased == 0);
    988 		 * assert(gctx->cnt_summed.curbytes_unbiased == 0);
    989 		*/
    990 		assert(gctx->cnt_summed.accumobjs == 0);
    991 		assert(gctx->cnt_summed.accumobjs_shifted_unbiased == 0);
    992 		assert(gctx->cnt_summed.accumbytes == 0);
    993 		assert(gctx->cnt_summed.accumbytes_unbiased == 0);
    994 		return;
    995 	}
    996 
    997 	arg->prof_dump_write(arg->cbopaque, "@");
    998 	for (unsigned i = 0; i < bt->len; i++) {
    999 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
   1000 		    " %#"FMTxPTR, (uintptr_t)bt->vec[i]);
   1001 	}
   1002 
   1003 	arg->prof_dump_write(arg->cbopaque, "\n  t*: ");
   1004 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
   1005 	    &gctx->cnt_summed);
   1006 	arg->prof_dump_write(arg->cbopaque, "\n");
   1007 
   1008 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
   1009 }
   1010 
   1011 /*
   1012  * See prof_sample_new_event_wait() comment for why the body of this function
   1013  * is conditionally compiled.
   1014  */
   1015 static void
   1016 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
   1017 #ifdef JEMALLOC_PROF
   1018 	/*
   1019 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
   1020 	 * differ slightly from what jeprof reports, because here we scale the
   1021 	 * summary values, whereas jeprof scales each context individually and
   1022 	 * reports the sums of the scaled values.
   1023 	 */
   1024 	if (cnt_all->curbytes != 0) {
   1025 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
   1026 		double ratio = (((double)cnt_all->curbytes) /
   1027 		    (double)cnt_all->curobjs) / sample_period;
   1028 		double scale_factor = 1.0 / (1.0 - exp(-ratio));
   1029 		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
   1030 		    * scale_factor);
   1031 		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
   1032 		    scale_factor);
   1033 
   1034 		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
   1035 		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
   1036 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
   1037 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
   1038 		malloc_printf(
   1039 		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
   1040 		if (opt_prof_leak_error) {
   1041 			malloc_printf(
   1042 			    "<jemalloc>: Exiting with error code because memory"
   1043 			    " leaks were detected\n");
   1044 			/*
   1045 			 * Use _exit() with underscore to avoid calling atexit()
   1046 			 * and entering endless cycle.
   1047 			 */
   1048 			_exit(1);
   1049 		}
   1050 	}
   1051 #endif
   1052 }
   1053 
   1054 static prof_gctx_t *
   1055 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
   1056 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
   1057 	malloc_mutex_lock(arg->tsdn, gctx->lock);
   1058 	prof_dump_gctx(arg, gctx, &gctx->bt, gctxs);
   1059 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
   1060 	return NULL;
   1061 }
   1062 
   1063 static void
   1064 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
   1065     size_t *leak_ngctx, prof_gctx_tree_t *gctxs) {
   1066 	size_t tabind;
   1067 	union {
   1068 		prof_gctx_t	*p;
   1069 		void		*v;
   1070 	} gctx;
   1071 
   1072 	prof_enter(tsd, tdata);
   1073 
   1074 	/*
   1075 	 * Put gctx's in limbo and clear their counters in preparation for
   1076 	 * summing.
   1077 	 */
   1078 	gctx_tree_new(gctxs);
   1079 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
   1080 		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
   1081 	}
   1082 
   1083 	/*
   1084 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
   1085 	 * stats and merge them into the associated gctx's.
   1086 	 */
   1087 	memset(cnt_all, 0, sizeof(prof_cnt_t));
   1088 	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
   1089 	    cnt_all};
   1090 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1091 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
   1092 	    &prof_tdata_merge_iter_arg);
   1093 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1094 
   1095 	/* Merge tctx stats into gctx's. */
   1096 	*leak_ngctx = 0;
   1097 	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
   1098 	    leak_ngctx};
   1099 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
   1100 	    &prof_gctx_merge_iter_arg);
   1101 
   1102 	prof_leave(tsd, tdata);
   1103 }
   1104 
   1105 void
   1106 prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
   1107     prof_tdata_t *tdata, bool leakcheck) {
   1108 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
   1109 	prof_cnt_t cnt_all;
   1110 	size_t leak_ngctx;
   1111 	prof_gctx_tree_t gctxs;
   1112 	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
   1113 	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
   1114 	    prof_dump_write, cbopaque};
   1115 	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
   1116 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
   1117 	prof_gctx_finish(tsd, &gctxs);
   1118 	if (leakcheck) {
   1119 		prof_leakcheck(&cnt_all, leak_ngctx);
   1120 	}
   1121 }
   1122 
   1123 /* Used in unit tests. */
   1124 void
   1125 prof_cnt_all(prof_cnt_t *cnt_all) {
   1126 	tsd_t *tsd = tsd_fetch();
   1127 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
   1128 	if (tdata == NULL) {
   1129 		memset(cnt_all, 0, sizeof(prof_cnt_t));
   1130 	} else {
   1131 		size_t leak_ngctx;
   1132 		prof_gctx_tree_t gctxs;
   1133 		prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
   1134 		prof_gctx_finish(tsd, &gctxs);
   1135 	}
   1136 }
   1137 
   1138 void
   1139 prof_bt_hash(const void *key, size_t r_hash[2]) {
   1140 	prof_bt_t *bt = (prof_bt_t *)__UNCONST(key);
   1141 
   1142 	cassert(config_prof);
   1143 
   1144 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
   1145 }
   1146 
   1147 bool
   1148 prof_bt_keycomp(const void *k1, const void *k2) {
   1149 	const prof_bt_t *bt1 = (const prof_bt_t *)k1;
   1150 	const prof_bt_t *bt2 = (const prof_bt_t *)k2;
   1151 
   1152 	cassert(config_prof);
   1153 
   1154 	if (bt1->len != bt2->len) {
   1155 		return false;
   1156 	}
   1157 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
   1158 }
   1159 
   1160 prof_tdata_t *
   1161 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
   1162     char *thread_name, bool active) {
   1163 	assert(tsd_reentrancy_level_get(tsd) == 0);
   1164 
   1165 	prof_tdata_t *tdata;
   1166 
   1167 	cassert(config_prof);
   1168 
   1169 	/* Initialize an empty cache for this thread. */
   1170 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
   1171 	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
   1172 	    arena_get(TSDN_NULL, 0, true), true);
   1173 	if (tdata == NULL) {
   1174 		return NULL;
   1175 	}
   1176 
   1177 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
   1178 	tdata->thr_uid = thr_uid;
   1179 	tdata->thr_discrim = thr_discrim;
   1180 	tdata->thread_name = thread_name;
   1181 	tdata->attached = true;
   1182 	tdata->expired = false;
   1183 	tdata->tctx_uid_next = 0;
   1184 
   1185 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
   1186 	    prof_bt_keycomp)) {
   1187 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
   1188 		return NULL;
   1189 	}
   1190 
   1191 	tdata->enq = false;
   1192 	tdata->enq_idump = false;
   1193 	tdata->enq_gdump = false;
   1194 
   1195 	tdata->dumping = false;
   1196 	tdata->active = active;
   1197 
   1198 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1199 	tdata_tree_insert(&tdatas, tdata);
   1200 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1201 
   1202 	return tdata;
   1203 }
   1204 
   1205 static bool
   1206 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
   1207 	if (tdata->attached && !even_if_attached) {
   1208 		return false;
   1209 	}
   1210 	if (ckh_count(&tdata->bt2tctx) != 0) {
   1211 		return false;
   1212 	}
   1213 	return true;
   1214 }
   1215 
   1216 static bool
   1217 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
   1218     bool even_if_attached) {
   1219 	malloc_mutex_assert_owner(tsdn, tdata->lock);
   1220 
   1221 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
   1222 }
   1223 
   1224 static void
   1225 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
   1226     bool even_if_attached) {
   1227 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
   1228 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
   1229 
   1230 	tdata_tree_remove(&tdatas, tdata);
   1231 
   1232 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
   1233 
   1234 	if (tdata->thread_name != NULL) {
   1235 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
   1236 		    true);
   1237 	}
   1238 	ckh_delete(tsd, &tdata->bt2tctx);
   1239 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
   1240 }
   1241 
   1242 static void
   1243 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
   1244 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1245 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
   1246 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1247 }
   1248 
   1249 void
   1250 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
   1251 	bool destroy_tdata;
   1252 
   1253 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
   1254 	if (tdata->attached) {
   1255 		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
   1256 		    true);
   1257 		/*
   1258 		 * Only detach if !destroy_tdata, because detaching would allow
   1259 		 * another thread to win the race to destroy tdata.
   1260 		 */
   1261 		if (!destroy_tdata) {
   1262 			tdata->attached = false;
   1263 		}
   1264 		tsd_prof_tdata_set(tsd, NULL);
   1265 	} else {
   1266 		destroy_tdata = false;
   1267 	}
   1268 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
   1269 	if (destroy_tdata) {
   1270 		prof_tdata_destroy(tsd, tdata, true);
   1271 	}
   1272 }
   1273 
   1274 static bool
   1275 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
   1276 	bool destroy_tdata;
   1277 
   1278 	malloc_mutex_lock(tsdn, tdata->lock);
   1279 	if (!tdata->expired) {
   1280 		tdata->expired = true;
   1281 		destroy_tdata = prof_tdata_should_destroy(tsdn, tdata, false);
   1282 	} else {
   1283 		destroy_tdata = false;
   1284 	}
   1285 	malloc_mutex_unlock(tsdn, tdata->lock);
   1286 
   1287 	return destroy_tdata;
   1288 }
   1289 
   1290 static prof_tdata_t *
   1291 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
   1292     void *arg) {
   1293 	tsdn_t *tsdn = (tsdn_t *)arg;
   1294 
   1295 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
   1296 }
   1297 
   1298 void
   1299 prof_reset(tsd_t *tsd, size_t lg_sample) {
   1300 	prof_tdata_t *next;
   1301 
   1302 	assert(lg_sample < (sizeof(uint64_t) << 3));
   1303 
   1304 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
   1305 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
   1306 
   1307 	lg_prof_sample = lg_sample;
   1308 	prof_unbias_map_init();
   1309 
   1310 	next = NULL;
   1311 	do {
   1312 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
   1313 		    prof_tdata_reset_iter, (void *)tsd);
   1314 		if (to_destroy != NULL) {
   1315 			next = tdata_tree_next(&tdatas, to_destroy);
   1316 			prof_tdata_destroy_locked(tsd, to_destroy, false);
   1317 		} else {
   1318 			next = NULL;
   1319 		}
   1320 	} while (next != NULL);
   1321 
   1322 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
   1323 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
   1324 }
   1325 
   1326 static bool
   1327 prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
   1328 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
   1329 
   1330 	if (opt_prof_accum) {
   1331 		return false;
   1332 	}
   1333 	if (tctx->cnts.curobjs != 0) {
   1334 		return false;
   1335 	}
   1336 	if (tctx->prepared) {
   1337 		return false;
   1338 	}
   1339 	if (tctx->recent_count != 0) {
   1340 		return false;
   1341 	}
   1342 	return true;
   1343 }
   1344 
   1345 static void
   1346 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
   1347 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
   1348 
   1349 	assert(tctx->cnts.curobjs == 0);
   1350 	assert(tctx->cnts.curbytes == 0);
   1351 	/*
   1352 	 * These asserts are not correct -- see the comment about races in
   1353 	 * prof.c
   1354 	 *
   1355 	 * assert(tctx->cnts.curobjs_shifted_unbiased == 0);
   1356 	 * assert(tctx->cnts.curbytes_unbiased == 0);
   1357 	 */
   1358 	assert(!opt_prof_accum);
   1359 	assert(tctx->cnts.accumobjs == 0);
   1360 	assert(tctx->cnts.accumbytes == 0);
   1361 	/*
   1362 	 * These ones are, since accumbyte counts never go down.  Either
   1363 	 * prof_accum is off (in which case these should never have changed from
   1364 	 * their initial value of zero), or it's on (in which case we shouldn't
   1365 	 * be destroying this tctx).
   1366 	 */
   1367 	assert(tctx->cnts.accumobjs_shifted_unbiased == 0);
   1368 	assert(tctx->cnts.accumbytes_unbiased == 0);
   1369 
   1370 	prof_gctx_t *gctx = tctx->gctx;
   1371 
   1372 	{
   1373 		prof_tdata_t *tdata = tctx->tdata;
   1374 		tctx->tdata = NULL;
   1375 		ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
   1376 		bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
   1377 		    tdata, false);
   1378 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
   1379 		if (destroy_tdata) {
   1380 			prof_tdata_destroy(tsd, tdata, false);
   1381 		}
   1382 	}
   1383 
   1384 	bool destroy_tctx, destroy_gctx;
   1385 
   1386 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
   1387 	switch (tctx->state) {
   1388 	case prof_tctx_state_nominal:
   1389 		tctx_tree_remove(&gctx->tctxs, tctx);
   1390 		destroy_tctx = true;
   1391 		if (prof_gctx_should_destroy(gctx)) {
   1392 			/*
   1393 			 * Increment gctx->nlimbo in order to keep another
   1394 			 * thread from winning the race to destroy gctx while
   1395 			 * this one has gctx->lock dropped.  Without this, it
   1396 			 * would be possible for another thread to:
   1397 			 *
   1398 			 * 1) Sample an allocation associated with gctx.
   1399 			 * 2) Deallocate the sampled object.
   1400 			 * 3) Successfully prof_gctx_try_destroy(gctx).
   1401 			 *
   1402 			 * The result would be that gctx no longer exists by the
   1403 			 * time this thread accesses it in
   1404 			 * prof_gctx_try_destroy().
   1405 			 */
   1406 			gctx->nlimbo++;
   1407 			destroy_gctx = true;
   1408 		} else {
   1409 			destroy_gctx = false;
   1410 		}
   1411 		break;
   1412 	case prof_tctx_state_dumping:
   1413 		/*
   1414 		 * A dumping thread needs tctx to remain valid until dumping
   1415 		 * has finished.  Change state such that the dumping thread will
   1416 		 * complete destruction during a late dump iteration phase.
   1417 		 */
   1418 		tctx->state = prof_tctx_state_purgatory;
   1419 		destroy_tctx = false;
   1420 		destroy_gctx = false;
   1421 		break;
   1422 	default:
   1423 		not_reached();
   1424 		destroy_tctx = false;
   1425 		destroy_gctx = false;
   1426 	}
   1427 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
   1428 	if (destroy_gctx) {
   1429 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx);
   1430 	}
   1431 	if (destroy_tctx) {
   1432 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
   1433 	}
   1434 }
   1435 
   1436 void
   1437 prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
   1438 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
   1439 	if (prof_tctx_should_destroy(tsd, tctx)) {
   1440 		/* tctx->tdata->lock will be released in prof_tctx_destroy(). */
   1441 		prof_tctx_destroy(tsd, tctx);
   1442 	} else {
   1443 		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
   1444 	}
   1445 }
   1446 
   1447 /******************************************************************************/
   1448