Home | History | Annotate | Line # | Download | only in stdlib
jemalloc.c revision 1.4
      1 /*	$NetBSD: jemalloc.c,v 1.4 2007/10/09 00:59:52 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (C) 2006,2007 Jason Evans <jasone (at) FreeBSD.org>.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice(s), this list of conditions and the following disclaimer as
     12  *    the first lines of this file unmodified other than the possible
     13  *    addition of one or more copyright notices.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice(s), this list of conditions and the following disclaimer in
     16  *    the documentation and/or other materials provided with the
     17  *    distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
     20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
     23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
     28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
     29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  *
     31  *******************************************************************************
     32  *
     33  * This allocator implementation is designed to provide scalable performance
     34  * for multi-threaded programs on multi-processor systems.  The following
     35  * features are included for this purpose:
     36  *
     37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
     38  *     contention and cache sloshing.
     39  *
     40  *   + Cache line sharing between arenas is avoided for internal data
     41  *     structures.
     42  *
     43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
     44  *     rather than as individual pages.  This provides a constant-time
     45  *     mechanism for associating allocations with particular arenas.
     46  *
     47  * Allocation requests are rounded up to the nearest size class, and no record
     48  * of the original request size is maintained.  Allocations are broken into
     49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
     50  * and a 16 byte quantum, the size classes in each category are as follows:
     51  *
     52  *   |=====================================|
     53  *   | Category | Subcategory    |    Size |
     54  *   |=====================================|
     55  *   | Small    | Tiny           |       2 |
     56  *   |          |                |       4 |
     57  *   |          |                |       8 |
     58  *   |          |----------------+---------|
     59  *   |          | Quantum-spaced |      16 |
     60  *   |          |                |      32 |
     61  *   |          |                |      48 |
     62  *   |          |                |     ... |
     63  *   |          |                |     480 |
     64  *   |          |                |     496 |
     65  *   |          |                |     512 |
     66  *   |          |----------------+---------|
     67  *   |          | Sub-page       |    1 kB |
     68  *   |          |                |    2 kB |
     69  *   |=====================================|
     70  *   | Large                     |    4 kB |
     71  *   |                           |    8 kB |
     72  *   |                           |   12 kB |
     73  *   |                           |     ... |
     74  *   |                           | 1012 kB |
     75  *   |                           | 1016 kB |
     76  *   |                           | 1020 kB |
     77  *   |=====================================|
     78  *   | Huge                      |    1 MB |
     79  *   |                           |    2 MB |
     80  *   |                           |    3 MB |
     81  *   |                           |     ... |
     82  *   |=====================================|
     83  *
     84  * A different mechanism is used for each category:
     85  *
     86  *   Small : Each size class is segregated into its own set of runs.  Each run
     87  *           maintains a bitmap of which regions are free/allocated.
     88  *
     89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
     90  *           in the associated arena chunk header maps.
     91  *
     92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
     93  *          Metadata are stored in a separate red-black tree.
     94  *
     95  *******************************************************************************
     96  */
     97 
     98 /* LINTLIBRARY */
     99 
    100 #ifdef __NetBSD__
    101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
    102 #  define __DECONST(x, y)	((x)__UNCONST(y))
    103 #  define NO_TLS
    104 #else
    105 #  define xutrace(a, b)		utrace((a), (b))
    106 #endif	/* __NetBSD__ */
    107 
    108 /*
    109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
    110  * defaults the A and J runtime options to off.  These settings are appropriate
    111  * for production systems.
    112  */
    113 #define MALLOC_PRODUCTION
    114 
    115 #ifndef MALLOC_PRODUCTION
    116 #  define MALLOC_DEBUG
    117 #endif
    118 
    119 #include <sys/cdefs.h>
    120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
    121 __RCSID("$NetBSD: jemalloc.c,v 1.4 2007/10/09 00:59:52 ad Exp $");
    122 
    123 #ifdef __FreeBSD__
    124 #include "libc_private.h"
    125 #ifdef MALLOC_DEBUG
    126 #  define _LOCK_DEBUG
    127 #endif
    128 #include "spinlock.h"
    129 #endif
    130 #include "namespace.h"
    131 #include <sys/mman.h>
    132 #include <sys/param.h>
    133 #ifdef __FreeBSD__
    134 #include <sys/stddef.h>
    135 #endif
    136 #include <sys/time.h>
    137 #include <sys/types.h>
    138 #include <sys/sysctl.h>
    139 #include <sys/tree.h>
    140 #include <sys/uio.h>
    141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
    142 
    143 #ifdef __FreeBSD__
    144 #include <machine/atomic.h>
    145 #include <machine/cpufunc.h>
    146 #endif
    147 #include <machine/vmparam.h>
    148 
    149 #include <errno.h>
    150 #include <limits.h>
    151 #include <pthread.h>
    152 #include <sched.h>
    153 #include <stdarg.h>
    154 #include <stdbool.h>
    155 #include <stdio.h>
    156 #include <stdint.h>
    157 #include <stdlib.h>
    158 #include <string.h>
    159 #include <strings.h>
    160 #include <unistd.h>
    161 
    162 #ifdef __NetBSD__
    163 #  include <reentrant.h>
    164 void	_malloc_prefork(void);
    165 void	_malloc_postfork(void);
    166 ssize_t	_write(int, const void *, size_t);
    167 const char	*_getprogname(void);
    168 #endif
    169 
    170 #ifdef __FreeBSD__
    171 #include "un-namespace.h"
    172 #endif
    173 
    174 /* MALLOC_STATS enables statistics calculation. */
    175 #ifndef MALLOC_PRODUCTION
    176 #  define MALLOC_STATS
    177 #endif
    178 
    179 #ifdef MALLOC_DEBUG
    180 #  ifdef NDEBUG
    181 #    undef NDEBUG
    182 #  endif
    183 #else
    184 #  ifndef NDEBUG
    185 #    define NDEBUG
    186 #  endif
    187 #endif
    188 #include <assert.h>
    189 
    190 #ifdef MALLOC_DEBUG
    191    /* Disable inlining to make debugging easier. */
    192 #  define inline
    193 #endif
    194 
    195 /* Size of stack-allocated buffer passed to strerror_r(). */
    196 #define	STRERROR_BUF		64
    197 
    198 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
    199 #ifdef __i386__
    200 #  define QUANTUM_2POW_MIN	4
    201 #  define SIZEOF_PTR_2POW	2
    202 #  define USE_BRK
    203 #endif
    204 #ifdef __ia64__
    205 #  define QUANTUM_2POW_MIN	4
    206 #  define SIZEOF_PTR_2POW	3
    207 #endif
    208 #ifdef __alpha__
    209 #  define QUANTUM_2POW_MIN	4
    210 #  define SIZEOF_PTR_2POW	3
    211 #  define NO_TLS
    212 #endif
    213 #ifdef __sparc64__
    214 #  define QUANTUM_2POW_MIN	4
    215 #  define SIZEOF_PTR_2POW	3
    216 #  define NO_TLS
    217 #endif
    218 #ifdef __amd64__
    219 #  define QUANTUM_2POW_MIN	4
    220 #  define SIZEOF_PTR_2POW	3
    221 #endif
    222 #ifdef __arm__
    223 #  define QUANTUM_2POW_MIN	3
    224 #  define SIZEOF_PTR_2POW	2
    225 #  define USE_BRK
    226 #  define NO_TLS
    227 #endif
    228 #ifdef __powerpc__
    229 #  define QUANTUM_2POW_MIN	4
    230 #  define SIZEOF_PTR_2POW	2
    231 #  define USE_BRK
    232 #endif
    233 #if defined(__sparc__) && !defined(__sparc64__)
    234 #  define QUANTUM_2POW_MIN	4
    235 #  define SIZEOF_PTR_2POW	2
    236 #  define USE_BRK
    237 #endif
    238 #ifdef __vax__
    239 #  define QUANTUM_2POW_MIN	4
    240 #  define SIZEOF_PTR_2POW	2
    241 #  define USE_BRK
    242 #endif
    243 #ifdef __sh__
    244 #  define QUANTUM_2POW_MIN	4
    245 #  define SIZEOF_PTR_2POW	2
    246 #  define USE_BRK
    247 #endif
    248 #ifdef __m68k__
    249 #  define QUANTUM_2POW_MIN	4
    250 #  define SIZEOF_PTR_2POW	2
    251 #  define USE_BRK
    252 #endif
    253 #ifdef __mips__
    254 #  define QUANTUM_2POW_MIN	4
    255 #  define SIZEOF_PTR_2POW	2
    256 #  define USE_BRK
    257 #endif
    258 #ifdef __hppa__
    259 #  define QUANTUM_2POW_MIN     4
    260 #  define SIZEOF_PTR_2POW      2
    261 #  define USE_BRK
    262 #endif
    263 
    264 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
    265 
    266 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
    267 #ifndef SIZEOF_INT_2POW
    268 #  define SIZEOF_INT_2POW	2
    269 #endif
    270 
    271 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
    272 #if (!defined(PIC) && !defined(NO_TLS))
    273 #  define NO_TLS
    274 #endif
    275 
    276 /*
    277  * Size and alignment of memory chunks that are allocated by the OS's virtual
    278  * memory system.
    279  */
    280 #define	CHUNK_2POW_DEFAULT	20
    281 
    282 /*
    283  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
    284  * so over-estimates are okay (up to a point), but under-estimates will
    285  * negatively affect performance.
    286  */
    287 #define	CACHELINE_2POW		6
    288 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
    289 
    290 /* Smallest size class to support. */
    291 #define	TINY_MIN_2POW		1
    292 
    293 /*
    294  * Maximum size class that is a multiple of the quantum, but not (necessarily)
    295  * a power of 2.  Above this size, allocations are rounded up to the nearest
    296  * power of 2.
    297  */
    298 #define	SMALL_MAX_2POW_DEFAULT	9
    299 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
    300 
    301 /*
    302  * Maximum desired run header overhead.  Runs are sized as small as possible
    303  * such that this setting is still honored, without violating other constraints.
    304  * The goal is to make runs as small as possible without exceeding a per run
    305  * external fragmentation threshold.
    306  *
    307  * Note that it is possible to set this low enough that it cannot be honored
    308  * for some/all object sizes, since there is one bit of header overhead per
    309  * object (plus a constant).  In such cases, this constraint is relaxed.
    310  *
    311  * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
    312  * overhead for which RUN_MAX_OVRHD is relaxed.
    313  */
    314 #define RUN_MAX_OVRHD		0.015
    315 #define RUN_MAX_OVRHD_RELAX	1.5
    316 
    317 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
    318 #define RUN_MAX_SMALL_2POW	15
    319 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
    320 
    321 /******************************************************************************/
    322 
    323 #ifdef __FreeBSD__
    324 /*
    325  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
    326  * they require malloc()ed memory.
    327  */
    328 typedef struct {
    329 	spinlock_t	lock;
    330 } malloc_mutex_t;
    331 
    332 /* Set to true once the allocator has been initialized. */
    333 static bool malloc_initialized = false;
    334 
    335 /* Used to avoid initialization races. */
    336 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
    337 #else
    338 #define	malloc_mutex_t	mutex_t
    339 
    340 /* Set to true once the allocator has been initialized. */
    341 static bool malloc_initialized = false;
    342 
    343 /* Used to avoid initialization races. */
    344 static mutex_t init_lock = MUTEX_INITIALIZER;
    345 #endif
    346 
    347 /******************************************************************************/
    348 /*
    349  * Statistics data structures.
    350  */
    351 
    352 #ifdef MALLOC_STATS
    353 
    354 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
    355 struct malloc_bin_stats_s {
    356 	/*
    357 	 * Number of allocation requests that corresponded to the size of this
    358 	 * bin.
    359 	 */
    360 	uint64_t	nrequests;
    361 
    362 	/* Total number of runs created for this bin's size class. */
    363 	uint64_t	nruns;
    364 
    365 	/*
    366 	 * Total number of runs reused by extracting them from the runs tree for
    367 	 * this bin's size class.
    368 	 */
    369 	uint64_t	reruns;
    370 
    371 	/* High-water mark for this bin. */
    372 	unsigned long	highruns;
    373 
    374 	/* Current number of runs in this bin. */
    375 	unsigned long	curruns;
    376 };
    377 
    378 typedef struct arena_stats_s arena_stats_t;
    379 struct arena_stats_s {
    380 	/* Number of bytes currently mapped. */
    381 	size_t		mapped;
    382 
    383 	/* Per-size-category statistics. */
    384 	size_t		allocated_small;
    385 	uint64_t	nmalloc_small;
    386 	uint64_t	ndalloc_small;
    387 
    388 	size_t		allocated_large;
    389 	uint64_t	nmalloc_large;
    390 	uint64_t	ndalloc_large;
    391 };
    392 
    393 typedef struct chunk_stats_s chunk_stats_t;
    394 struct chunk_stats_s {
    395 	/* Number of chunks that were allocated. */
    396 	uint64_t	nchunks;
    397 
    398 	/* High-water mark for number of chunks allocated. */
    399 	unsigned long	highchunks;
    400 
    401 	/*
    402 	 * Current number of chunks allocated.  This value isn't maintained for
    403 	 * any other purpose, so keep track of it in order to be able to set
    404 	 * highchunks.
    405 	 */
    406 	unsigned long	curchunks;
    407 };
    408 
    409 #endif /* #ifdef MALLOC_STATS */
    410 
    411 /******************************************************************************/
    412 /*
    413  * Chunk data structures.
    414  */
    415 
    416 /* Tree of chunks. */
    417 typedef struct chunk_node_s chunk_node_t;
    418 struct chunk_node_s {
    419 	/* Linkage for the chunk tree. */
    420 	RB_ENTRY(chunk_node_s) link;
    421 
    422 	/*
    423 	 * Pointer to the chunk that this tree node is responsible for.  In some
    424 	 * (but certainly not all) cases, this data structure is placed at the
    425 	 * beginning of the corresponding chunk, so this field may point to this
    426 	 * node.
    427 	 */
    428 	void	*chunk;
    429 
    430 	/* Total chunk size. */
    431 	size_t	size;
    432 };
    433 typedef struct chunk_tree_s chunk_tree_t;
    434 RB_HEAD(chunk_tree_s, chunk_node_s);
    435 
    436 /******************************************************************************/
    437 /*
    438  * Arena data structures.
    439  */
    440 
    441 typedef struct arena_s arena_t;
    442 typedef struct arena_bin_s arena_bin_t;
    443 
    444 typedef struct arena_chunk_map_s arena_chunk_map_t;
    445 struct arena_chunk_map_s {
    446 	/* Number of pages in run. */
    447 	uint32_t	npages;
    448 	/*
    449 	 * Position within run.  For a free run, this is POS_FREE for the first
    450 	 * and last pages.  The POS_FREE special value makes it possible to
    451 	 * quickly coalesce free runs.
    452 	 *
    453 	 * This is the limiting factor for chunksize; there can be at most 2^31
    454 	 * pages in a run.
    455 	 */
    456 #define POS_FREE ((uint32_t)0xffffffffU)
    457 	uint32_t	pos;
    458 };
    459 
    460 /* Arena chunk header. */
    461 typedef struct arena_chunk_s arena_chunk_t;
    462 struct arena_chunk_s {
    463 	/* Arena that owns the chunk. */
    464 	arena_t *arena;
    465 
    466 	/* Linkage for the arena's chunk tree. */
    467 	RB_ENTRY(arena_chunk_s) link;
    468 
    469 	/*
    470 	 * Number of pages in use.  This is maintained in order to make
    471 	 * detection of empty chunks fast.
    472 	 */
    473 	uint32_t pages_used;
    474 
    475 	/*
    476 	 * Every time a free run larger than this value is created/coalesced,
    477 	 * this value is increased.  The only way that the value decreases is if
    478 	 * arena_run_alloc() fails to find a free run as large as advertised by
    479 	 * this value.
    480 	 */
    481 	uint32_t max_frun_npages;
    482 
    483 	/*
    484 	 * Every time a free run that starts at an earlier page than this value
    485 	 * is created/coalesced, this value is decreased.  It is reset in a
    486 	 * similar fashion to max_frun_npages.
    487 	 */
    488 	uint32_t min_frun_ind;
    489 
    490 	/*
    491 	 * Map of pages within chunk that keeps track of free/large/small.  For
    492 	 * free runs, only the map entries for the first and last pages are
    493 	 * kept up to date, so that free runs can be quickly coalesced.
    494 	 */
    495 	arena_chunk_map_t map[1]; /* Dynamically sized. */
    496 };
    497 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
    498 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
    499 
    500 typedef struct arena_run_s arena_run_t;
    501 struct arena_run_s {
    502 	/* Linkage for run trees. */
    503 	RB_ENTRY(arena_run_s) link;
    504 
    505 #ifdef MALLOC_DEBUG
    506 	uint32_t	magic;
    507 #  define ARENA_RUN_MAGIC 0x384adf93
    508 #endif
    509 
    510 	/* Bin this run is associated with. */
    511 	arena_bin_t	*bin;
    512 
    513 	/* Index of first element that might have a free region. */
    514 	unsigned	regs_minelm;
    515 
    516 	/* Number of free regions in run. */
    517 	unsigned	nfree;
    518 
    519 	/* Bitmask of in-use regions (0: in use, 1: free). */
    520 	unsigned	regs_mask[1]; /* Dynamically sized. */
    521 };
    522 typedef struct arena_run_tree_s arena_run_tree_t;
    523 RB_HEAD(arena_run_tree_s, arena_run_s);
    524 
    525 struct arena_bin_s {
    526 	/*
    527 	 * Current run being used to service allocations of this bin's size
    528 	 * class.
    529 	 */
    530 	arena_run_t	*runcur;
    531 
    532 	/*
    533 	 * Tree of non-full runs.  This tree is used when looking for an
    534 	 * existing run when runcur is no longer usable.  We choose the
    535 	 * non-full run that is lowest in memory; this policy tends to keep
    536 	 * objects packed well, and it can also help reduce the number of
    537 	 * almost-empty chunks.
    538 	 */
    539 	arena_run_tree_t runs;
    540 
    541 	/* Size of regions in a run for this bin's size class. */
    542 	size_t		reg_size;
    543 
    544 	/* Total size of a run for this bin's size class. */
    545 	size_t		run_size;
    546 
    547 	/* Total number of regions in a run for this bin's size class. */
    548 	uint32_t	nregs;
    549 
    550 	/* Number of elements in a run's regs_mask for this bin's size class. */
    551 	uint32_t	regs_mask_nelms;
    552 
    553 	/* Offset of first region in a run for this bin's size class. */
    554 	uint32_t	reg0_offset;
    555 
    556 #ifdef MALLOC_STATS
    557 	/* Bin statistics. */
    558 	malloc_bin_stats_t stats;
    559 #endif
    560 };
    561 
    562 struct arena_s {
    563 #ifdef MALLOC_DEBUG
    564 	uint32_t		magic;
    565 #  define ARENA_MAGIC 0x947d3d24
    566 #endif
    567 
    568 	/* All operations on this arena require that mtx be locked. */
    569 	malloc_mutex_t		mtx;
    570 
    571 #ifdef MALLOC_STATS
    572 	arena_stats_t		stats;
    573 #endif
    574 
    575 	/*
    576 	 * Tree of chunks this arena manages.
    577 	 */
    578 	arena_chunk_tree_t	chunks;
    579 
    580 	/*
    581 	 * In order to avoid rapid chunk allocation/deallocation when an arena
    582 	 * oscillates right on the cusp of needing a new chunk, cache the most
    583 	 * recently freed chunk.  This caching is disabled by opt_hint.
    584 	 *
    585 	 * There is one spare chunk per arena, rather than one spare total, in
    586 	 * order to avoid interactions between multiple threads that could make
    587 	 * a single spare inadequate.
    588 	 */
    589 	arena_chunk_t *spare;
    590 
    591 	/*
    592 	 * bins is used to store rings of free regions of the following sizes,
    593 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
    594 	 *
    595 	 *   bins[i] | size |
    596 	 *   --------+------+
    597 	 *        0  |    2 |
    598 	 *        1  |    4 |
    599 	 *        2  |    8 |
    600 	 *   --------+------+
    601 	 *        3  |   16 |
    602 	 *        4  |   32 |
    603 	 *        5  |   48 |
    604 	 *        6  |   64 |
    605 	 *           :      :
    606 	 *           :      :
    607 	 *       33  |  496 |
    608 	 *       34  |  512 |
    609 	 *   --------+------+
    610 	 *       35  | 1024 |
    611 	 *       36  | 2048 |
    612 	 *   --------+------+
    613 	 */
    614 	arena_bin_t		bins[1]; /* Dynamically sized. */
    615 };
    616 
    617 /******************************************************************************/
    618 /*
    619  * Data.
    620  */
    621 
    622 /* Number of CPUs. */
    623 static unsigned		ncpus;
    624 
    625 /* VM page size. */
    626 static size_t		pagesize;
    627 static size_t		pagesize_mask;
    628 static size_t		pagesize_2pow;
    629 
    630 /* Various bin-related settings. */
    631 static size_t		bin_maxclass; /* Max size class for bins. */
    632 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
    633 static unsigned		nqbins; /* Number of quantum-spaced bins. */
    634 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
    635 static size_t		small_min;
    636 static size_t		small_max;
    637 
    638 /* Various quantum-related settings. */
    639 static size_t		quantum;
    640 static size_t		quantum_mask; /* (quantum - 1). */
    641 
    642 /* Various chunk-related settings. */
    643 static size_t		chunksize;
    644 static size_t		chunksize_mask; /* (chunksize - 1). */
    645 static unsigned		chunk_npages;
    646 static unsigned		arena_chunk_header_npages;
    647 static size_t		arena_maxclass; /* Max size class for arenas. */
    648 
    649 /********/
    650 /*
    651  * Chunks.
    652  */
    653 
    654 /* Protects chunk-related data structures. */
    655 static malloc_mutex_t	chunks_mtx;
    656 
    657 /* Tree of chunks that are stand-alone huge allocations. */
    658 static chunk_tree_t	huge;
    659 
    660 #ifdef USE_BRK
    661 /*
    662  * Try to use brk for chunk-size allocations, due to address space constraints.
    663  */
    664 /*
    665  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
    666  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
    667  * could cause recursive lock acquisition).
    668  */
    669 static malloc_mutex_t	brk_mtx;
    670 /* Result of first sbrk(0) call. */
    671 static void		*brk_base;
    672 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
    673 static void		*brk_prev;
    674 /* Current upper limit on brk addresses. */
    675 static void		*brk_max;
    676 #endif
    677 
    678 #ifdef MALLOC_STATS
    679 /* Huge allocation statistics. */
    680 static uint64_t		huge_nmalloc;
    681 static uint64_t		huge_ndalloc;
    682 static size_t		huge_allocated;
    683 #endif
    684 
    685 /*
    686  * Tree of chunks that were previously allocated.  This is used when allocating
    687  * chunks, in an attempt to re-use address space.
    688  */
    689 static chunk_tree_t	old_chunks;
    690 
    691 /****************************/
    692 /*
    693  * base (internal allocation).
    694  */
    695 
    696 /*
    697  * Current pages that are being used for internal memory allocations.  These
    698  * pages are carved up in cacheline-size quanta, so that there is no chance of
    699  * false cache line sharing.
    700  */
    701 static void		*base_pages;
    702 static void		*base_next_addr;
    703 static void		*base_past_addr; /* Addr immediately past base_pages. */
    704 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
    705 static malloc_mutex_t	base_mtx;
    706 #ifdef MALLOC_STATS
    707 static size_t		base_mapped;
    708 #endif
    709 
    710 /********/
    711 /*
    712  * Arenas.
    713  */
    714 
    715 /*
    716  * Arenas that are used to service external requests.  Not all elements of the
    717  * arenas array are necessarily used; arenas are created lazily as needed.
    718  */
    719 static arena_t		**arenas;
    720 static unsigned		narenas;
    721 static unsigned		next_arena;
    722 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
    723 
    724 #ifndef NO_TLS
    725 /*
    726  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
    727  * for allocations.
    728  */
    729 static __thread arena_t	*arenas_map;
    730 #define	get_arenas_map()	(arenas_map)
    731 #define	set_arenas_map(x)	(arenas_map = x)
    732 #else
    733 static thread_key_t arenas_map_key;
    734 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
    735 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
    736 #endif
    737 
    738 #ifdef MALLOC_STATS
    739 /* Chunk statistics. */
    740 static chunk_stats_t	stats_chunks;
    741 #endif
    742 
    743 /*******************************/
    744 /*
    745  * Runtime configuration options.
    746  */
    747 const char	*_malloc_options;
    748 
    749 #ifndef MALLOC_PRODUCTION
    750 static bool	opt_abort = true;
    751 static bool	opt_junk = true;
    752 #else
    753 static bool	opt_abort = false;
    754 static bool	opt_junk = false;
    755 #endif
    756 static bool	opt_hint = false;
    757 static bool	opt_print_stats = false;
    758 static size_t	opt_quantum_2pow = QUANTUM_2POW_MIN;
    759 static size_t	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
    760 static size_t	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
    761 static bool	opt_utrace = false;
    762 static bool	opt_sysv = false;
    763 static bool	opt_xmalloc = false;
    764 static bool	opt_zero = false;
    765 static int32_t	opt_narenas_lshift = 0;
    766 
    767 typedef struct {
    768 	void	*p;
    769 	size_t	s;
    770 	void	*r;
    771 } malloc_utrace_t;
    772 
    773 #define	UTRACE(a, b, c)							\
    774 	if (opt_utrace) {						\
    775 		malloc_utrace_t ut;					\
    776 		ut.p = a;						\
    777 		ut.s = b;						\
    778 		ut.r = c;						\
    779 		xutrace(&ut, sizeof(ut));				\
    780 	}
    781 
    782 /******************************************************************************/
    783 /*
    784  * Begin function prototypes for non-inline static functions.
    785  */
    786 
    787 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
    788 		const char *p4);
    789 #ifdef MALLOC_STATS
    790 static void	malloc_printf(const char *format, ...);
    791 #endif
    792 static char	*umax2s(uintmax_t x, char *s);
    793 static bool	base_pages_alloc(size_t minsize);
    794 static void	*base_alloc(size_t size);
    795 static chunk_node_t *base_chunk_node_alloc(void);
    796 static void	base_chunk_node_dealloc(chunk_node_t *node);
    797 #ifdef MALLOC_STATS
    798 static void	stats_print(arena_t *arena);
    799 #endif
    800 static void	*pages_map(void *addr, size_t size);
    801 static void	pages_unmap(void *addr, size_t size);
    802 static void	*chunk_alloc(size_t size);
    803 static void	chunk_dealloc(void *chunk, size_t size);
    804 static arena_t	*choose_arena_hard(void);
    805 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
    806 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
    807 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
    808 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
    809 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
    810 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
    811 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
    812 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
    813 static void	*arena_malloc(arena_t *arena, size_t size);
    814 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
    815     size_t alloc_size);
    816 static size_t	arena_salloc(const void *ptr);
    817 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
    818 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
    819 static bool	arena_new(arena_t *arena);
    820 static arena_t	*arenas_extend(unsigned ind);
    821 static void	*huge_malloc(size_t size);
    822 static void	*huge_palloc(size_t alignment, size_t size);
    823 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
    824 static void	huge_dalloc(void *ptr);
    825 static void	*imalloc(size_t size);
    826 static void	*ipalloc(size_t alignment, size_t size);
    827 static void	*icalloc(size_t size);
    828 static size_t	isalloc(const void *ptr);
    829 static void	*iralloc(void *ptr, size_t size);
    830 static void	idalloc(void *ptr);
    831 static void	malloc_print_stats(void);
    832 static bool	malloc_init_hard(void);
    833 
    834 /*
    835  * End function prototypes.
    836  */
    837 /******************************************************************************/
    838 /*
    839  * Begin mutex.
    840  */
    841 
    842 #ifdef __NetBSD__
    843 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
    844 #define	malloc_mutex_lock(m)	mutex_lock(m)
    845 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
    846 #else	/* __NetBSD__ */
    847 static inline void
    848 malloc_mutex_init(malloc_mutex_t *a_mutex)
    849 {
    850 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
    851 
    852 	a_mutex->lock = lock;
    853 }
    854 
    855 static inline void
    856 malloc_mutex_lock(malloc_mutex_t *a_mutex)
    857 {
    858 
    859 	if (__isthreaded)
    860 		_SPINLOCK(&a_mutex->lock);
    861 }
    862 
    863 static inline void
    864 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
    865 {
    866 
    867 	if (__isthreaded)
    868 		_SPINUNLOCK(&a_mutex->lock);
    869 }
    870 #endif	/* __NetBSD__ */
    871 
    872 /*
    873  * End mutex.
    874  */
    875 /******************************************************************************/
    876 /*
    877  * Begin Utility functions/macros.
    878  */
    879 
    880 /* Return the chunk address for allocation address a. */
    881 #define	CHUNK_ADDR2BASE(a)						\
    882 	((void *)((uintptr_t)(a) & ~chunksize_mask))
    883 
    884 /* Return the chunk offset of address a. */
    885 #define	CHUNK_ADDR2OFFSET(a)						\
    886 	((size_t)((uintptr_t)(a) & chunksize_mask))
    887 
    888 /* Return the smallest chunk multiple that is >= s. */
    889 #define	CHUNK_CEILING(s)						\
    890 	(((s) + chunksize_mask) & ~chunksize_mask)
    891 
    892 /* Return the smallest cacheline multiple that is >= s. */
    893 #define	CACHELINE_CEILING(s)						\
    894 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
    895 
    896 /* Return the smallest quantum multiple that is >= a. */
    897 #define	QUANTUM_CEILING(a)						\
    898 	(((a) + quantum_mask) & ~quantum_mask)
    899 
    900 /* Return the smallest pagesize multiple that is >= s. */
    901 #define	PAGE_CEILING(s)							\
    902 	(((s) + pagesize_mask) & ~pagesize_mask)
    903 
    904 /* Compute the smallest power of 2 that is >= x. */
    905 static inline size_t
    906 pow2_ceil(size_t x)
    907 {
    908 
    909 	x--;
    910 	x |= x >> 1;
    911 	x |= x >> 2;
    912 	x |= x >> 4;
    913 	x |= x >> 8;
    914 	x |= x >> 16;
    915 #if (SIZEOF_PTR == 8)
    916 	x |= x >> 32;
    917 #endif
    918 	x++;
    919 	return (x);
    920 }
    921 
    922 static void
    923 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
    924 {
    925 
    926 	_write(STDERR_FILENO, p1, strlen(p1));
    927 	_write(STDERR_FILENO, p2, strlen(p2));
    928 	_write(STDERR_FILENO, p3, strlen(p3));
    929 	_write(STDERR_FILENO, p4, strlen(p4));
    930 }
    931 
    932 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
    933 	    const char *p4) = wrtmessage;
    934 
    935 #ifdef MALLOC_STATS
    936 /*
    937  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
    938  */
    939 static void
    940 malloc_printf(const char *format, ...)
    941 {
    942 	char buf[4096];
    943 	va_list ap;
    944 
    945 	va_start(ap, format);
    946 	vsnprintf(buf, sizeof(buf), format, ap);
    947 	va_end(ap);
    948 	_malloc_message(buf, "", "", "");
    949 }
    950 #endif
    951 
    952 /*
    953  * We don't want to depend on vsnprintf() for production builds, since that can
    954  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
    955  * integer printing functionality, so that malloc_printf() use can be limited to
    956  * MALLOC_STATS code.
    957  */
    958 #define UMAX2S_BUFSIZE	21
    959 static char *
    960 umax2s(uintmax_t x, char *s)
    961 {
    962 	unsigned i;
    963 
    964 	/* Make sure UMAX2S_BUFSIZE is large enough. */
    965 	assert(sizeof(uintmax_t) <= 8);
    966 
    967 	i = UMAX2S_BUFSIZE - 1;
    968 	s[i] = '\0';
    969 	do {
    970 		i--;
    971 		s[i] = "0123456789"[(int)x % 10];
    972 		x /= (uintmax_t)10LL;
    973 	} while (x > 0);
    974 
    975 	return (&s[i]);
    976 }
    977 
    978 /******************************************************************************/
    979 
    980 static bool
    981 base_pages_alloc(size_t minsize)
    982 {
    983 	size_t csize = 0;
    984 
    985 #ifdef USE_BRK
    986 	/*
    987 	 * Do special brk allocation here, since base allocations don't need to
    988 	 * be chunk-aligned.
    989 	 */
    990 	if (brk_prev != (void *)-1) {
    991 		void *brk_cur;
    992 		intptr_t incr;
    993 
    994 		if (minsize != 0)
    995 			csize = CHUNK_CEILING(minsize);
    996 
    997 		malloc_mutex_lock(&brk_mtx);
    998 		do {
    999 			/* Get the current end of brk. */
   1000 			brk_cur = sbrk(0);
   1001 
   1002 			/*
   1003 			 * Calculate how much padding is necessary to
   1004 			 * chunk-align the end of brk.  Don't worry about
   1005 			 * brk_cur not being chunk-aligned though.
   1006 			 */
   1007 			incr = (intptr_t)chunksize
   1008 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
   1009 			if (incr < minsize)
   1010 				incr += csize;
   1011 
   1012 			brk_prev = sbrk(incr);
   1013 			if (brk_prev == brk_cur) {
   1014 				/* Success. */
   1015 				malloc_mutex_unlock(&brk_mtx);
   1016 				base_pages = brk_cur;
   1017 				base_next_addr = base_pages;
   1018 				base_past_addr = (void *)((uintptr_t)base_pages
   1019 				    + incr);
   1020 #ifdef MALLOC_STATS
   1021 				base_mapped += incr;
   1022 #endif
   1023 				return (false);
   1024 			}
   1025 		} while (brk_prev != (void *)-1);
   1026 		malloc_mutex_unlock(&brk_mtx);
   1027 	}
   1028 	if (minsize == 0) {
   1029 		/*
   1030 		 * Failure during initialization doesn't matter, so avoid
   1031 		 * falling through to the mmap-based page mapping code.
   1032 		 */
   1033 		return (true);
   1034 	}
   1035 #endif
   1036 	assert(minsize != 0);
   1037 	csize = PAGE_CEILING(minsize);
   1038 	base_pages = pages_map(NULL, csize);
   1039 	if (base_pages == NULL)
   1040 		return (true);
   1041 	base_next_addr = base_pages;
   1042 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
   1043 #ifdef MALLOC_STATS
   1044 	base_mapped += csize;
   1045 #endif
   1046 	return (false);
   1047 }
   1048 
   1049 static void *
   1050 base_alloc(size_t size)
   1051 {
   1052 	void *ret;
   1053 	size_t csize;
   1054 
   1055 	/* Round size up to nearest multiple of the cacheline size. */
   1056 	csize = CACHELINE_CEILING(size);
   1057 
   1058 	malloc_mutex_lock(&base_mtx);
   1059 
   1060 	/* Make sure there's enough space for the allocation. */
   1061 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
   1062 		if (base_pages_alloc(csize)) {
   1063 			ret = NULL;
   1064 			goto RETURN;
   1065 		}
   1066 	}
   1067 
   1068 	/* Allocate. */
   1069 	ret = base_next_addr;
   1070 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
   1071 
   1072 RETURN:
   1073 	malloc_mutex_unlock(&base_mtx);
   1074 	return (ret);
   1075 }
   1076 
   1077 static chunk_node_t *
   1078 base_chunk_node_alloc(void)
   1079 {
   1080 	chunk_node_t *ret;
   1081 
   1082 	malloc_mutex_lock(&base_mtx);
   1083 	if (base_chunk_nodes != NULL) {
   1084 		ret = base_chunk_nodes;
   1085 		/* LINTED */
   1086 		base_chunk_nodes = *(chunk_node_t **)ret;
   1087 		malloc_mutex_unlock(&base_mtx);
   1088 	} else {
   1089 		malloc_mutex_unlock(&base_mtx);
   1090 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
   1091 	}
   1092 
   1093 	return (ret);
   1094 }
   1095 
   1096 static void
   1097 base_chunk_node_dealloc(chunk_node_t *node)
   1098 {
   1099 
   1100 	malloc_mutex_lock(&base_mtx);
   1101 	/* LINTED */
   1102 	*(chunk_node_t **)node = base_chunk_nodes;
   1103 	base_chunk_nodes = node;
   1104 	malloc_mutex_unlock(&base_mtx);
   1105 }
   1106 
   1107 /******************************************************************************/
   1108 
   1109 #ifdef MALLOC_STATS
   1110 static void
   1111 stats_print(arena_t *arena)
   1112 {
   1113 	unsigned i;
   1114 	int gap_start;
   1115 
   1116 	malloc_printf(
   1117 	    "          allocated/mapped            nmalloc      ndalloc\n");
   1118 
   1119 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
   1120 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
   1121 	    arena->stats.ndalloc_small);
   1122 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
   1123 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
   1124 	    arena->stats.ndalloc_large);
   1125 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
   1126 	    arena->stats.allocated_small + arena->stats.allocated_large,
   1127 	    arena->stats.mapped,
   1128 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
   1129 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
   1130 
   1131 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
   1132 	    "    reruns maxruns curruns\n");
   1133 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
   1134 		if (arena->bins[i].stats.nrequests == 0) {
   1135 			if (gap_start == -1)
   1136 				gap_start = i;
   1137 		} else {
   1138 			if (gap_start != -1) {
   1139 				if (i > gap_start + 1) {
   1140 					/* Gap of more than one size class. */
   1141 					malloc_printf("[%u..%u]\n",
   1142 					    gap_start, i - 1);
   1143 				} else {
   1144 					/* Gap of one size class. */
   1145 					malloc_printf("[%u]\n", gap_start);
   1146 				}
   1147 				gap_start = -1;
   1148 			}
   1149 			malloc_printf(
   1150 			    "%13u %1s %4u %4u %3u %9llu %9llu"
   1151 			    " %9llu %7lu %7lu\n",
   1152 			    i,
   1153 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
   1154 			    arena->bins[i].reg_size,
   1155 			    arena->bins[i].nregs,
   1156 			    arena->bins[i].run_size >> pagesize_2pow,
   1157 			    arena->bins[i].stats.nrequests,
   1158 			    arena->bins[i].stats.nruns,
   1159 			    arena->bins[i].stats.reruns,
   1160 			    arena->bins[i].stats.highruns,
   1161 			    arena->bins[i].stats.curruns);
   1162 		}
   1163 	}
   1164 	if (gap_start != -1) {
   1165 		if (i > gap_start + 1) {
   1166 			/* Gap of more than one size class. */
   1167 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
   1168 		} else {
   1169 			/* Gap of one size class. */
   1170 			malloc_printf("[%u]\n", gap_start);
   1171 		}
   1172 	}
   1173 }
   1174 #endif
   1175 
   1176 /*
   1177  * End Utility functions/macros.
   1178  */
   1179 /******************************************************************************/
   1180 /*
   1181  * Begin chunk management functions.
   1182  */
   1183 
   1184 static inline int
   1185 chunk_comp(chunk_node_t *a, chunk_node_t *b)
   1186 {
   1187 
   1188 	assert(a != NULL);
   1189 	assert(b != NULL);
   1190 
   1191 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
   1192 		return (-1);
   1193 	else if (a->chunk == b->chunk)
   1194 		return (0);
   1195 	else
   1196 		return (1);
   1197 }
   1198 
   1199 /* Generate red-black tree code for chunks. */
   1200 #ifndef lint
   1201 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
   1202 #endif
   1203 
   1204 static void *
   1205 pages_map(void *addr, size_t size)
   1206 {
   1207 	void *ret;
   1208 
   1209 	/*
   1210 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
   1211 	 * of existing mappings, and we only want to create new mappings.
   1212 	 */
   1213 	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
   1214 	    -1, 0);
   1215 	assert(ret != NULL);
   1216 
   1217 	if (ret == MAP_FAILED)
   1218 		ret = NULL;
   1219 	else if (addr != NULL && ret != addr) {
   1220 		/*
   1221 		 * We succeeded in mapping memory, but not in the right place.
   1222 		 */
   1223 		if (munmap(ret, size) == -1) {
   1224 			char buf[STRERROR_BUF];
   1225 
   1226 			strerror_r(errno, buf, sizeof(buf));
   1227 			_malloc_message(_getprogname(),
   1228 			    ": (malloc) Error in munmap(): ", buf, "\n");
   1229 			if (opt_abort)
   1230 				abort();
   1231 		}
   1232 		ret = NULL;
   1233 	}
   1234 
   1235 	assert(ret == NULL || (addr == NULL && ret != addr)
   1236 	    || (addr != NULL && ret == addr));
   1237 	return (ret);
   1238 }
   1239 
   1240 static void
   1241 pages_unmap(void *addr, size_t size)
   1242 {
   1243 
   1244 	if (munmap(addr, size) == -1) {
   1245 		char buf[STRERROR_BUF];
   1246 
   1247 		strerror_r(errno, buf, sizeof(buf));
   1248 		_malloc_message(_getprogname(),
   1249 		    ": (malloc) Error in munmap(): ", buf, "\n");
   1250 		if (opt_abort)
   1251 			abort();
   1252 	}
   1253 }
   1254 
   1255 static void *
   1256 chunk_alloc(size_t size)
   1257 {
   1258 	void *ret, *chunk;
   1259 	chunk_node_t *tchunk, *delchunk;
   1260 
   1261 	assert(size != 0);
   1262 	assert((size & chunksize_mask) == 0);
   1263 
   1264 	malloc_mutex_lock(&chunks_mtx);
   1265 
   1266 	if (size == chunksize) {
   1267 		/*
   1268 		 * Check for address ranges that were previously chunks and try
   1269 		 * to use them.
   1270 		 */
   1271 
   1272 		/* LINTED */
   1273 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
   1274 		while (tchunk != NULL) {
   1275 			/* Found an address range.  Try to recycle it. */
   1276 
   1277 			chunk = tchunk->chunk;
   1278 			delchunk = tchunk;
   1279 			/* LINTED */
   1280 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
   1281 
   1282 			/* Remove delchunk from the tree. */
   1283 			/* LINTED */
   1284 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
   1285 			base_chunk_node_dealloc(delchunk);
   1286 
   1287 #ifdef USE_BRK
   1288 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
   1289 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
   1290 				/* Re-use a previously freed brk chunk. */
   1291 				ret = chunk;
   1292 				goto RETURN;
   1293 			}
   1294 #endif
   1295 			if ((ret = pages_map(chunk, size)) != NULL) {
   1296 				/* Success. */
   1297 				goto RETURN;
   1298 			}
   1299 		}
   1300 	}
   1301 
   1302 	/*
   1303 	 * Try to over-allocate, but allow the OS to place the allocation
   1304 	 * anywhere.  Beware of size_t wrap-around.
   1305 	 */
   1306 	if (size + chunksize > size) {
   1307 		if ((ret = pages_map(NULL, size + chunksize)) != NULL) {
   1308 			size_t offset = CHUNK_ADDR2OFFSET(ret);
   1309 
   1310 			/*
   1311 			 * Success.  Clean up unneeded leading/trailing space.
   1312 			 */
   1313 			if (offset != 0) {
   1314 				/* Leading space. */
   1315 				pages_unmap(ret, chunksize - offset);
   1316 
   1317 				ret = (void *)((uintptr_t)ret + (chunksize -
   1318 				    offset));
   1319 
   1320 				/* Trailing space. */
   1321 				pages_unmap((void *)((uintptr_t)ret + size),
   1322 				    offset);
   1323 			} else {
   1324 				/* Trailing space only. */
   1325 				pages_unmap((void *)((uintptr_t)ret + size),
   1326 				    chunksize);
   1327 			}
   1328 			goto RETURN;
   1329 		}
   1330 	}
   1331 
   1332 #ifdef USE_BRK
   1333 	/*
   1334 	 * Try to create allocations in brk, in order to make full use of
   1335 	 * limited address space.
   1336 	 */
   1337 	if (brk_prev != (void *)-1) {
   1338 		void *brk_cur;
   1339 		intptr_t incr;
   1340 
   1341 		/*
   1342 		 * The loop is necessary to recover from races with other
   1343 		 * threads that are using brk for something other than malloc.
   1344 		 */
   1345 		malloc_mutex_lock(&brk_mtx);
   1346 		do {
   1347 			/* Get the current end of brk. */
   1348 			brk_cur = sbrk(0);
   1349 
   1350 			/*
   1351 			 * Calculate how much padding is necessary to
   1352 			 * chunk-align the end of brk.
   1353 			 */
   1354 			incr = (intptr_t)size
   1355 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
   1356 			if (incr == size) {
   1357 				ret = brk_cur;
   1358 			} else {
   1359 				ret = (void *)((intptr_t)brk_cur + incr);
   1360 				incr += size;
   1361 			}
   1362 
   1363 			brk_prev = sbrk(incr);
   1364 			if (brk_prev == brk_cur) {
   1365 				/* Success. */
   1366 				malloc_mutex_unlock(&brk_mtx);
   1367 				brk_max = (void *)((intptr_t)ret + size);
   1368 				goto RETURN;
   1369 			}
   1370 		} while (brk_prev != (void *)-1);
   1371 		malloc_mutex_unlock(&brk_mtx);
   1372 	}
   1373 #endif
   1374 
   1375 	/* All strategies for allocation failed. */
   1376 	ret = NULL;
   1377 RETURN:
   1378 	if (ret != NULL) {
   1379 		chunk_node_t key;
   1380 		/*
   1381 		 * Clean out any entries in old_chunks that overlap with the
   1382 		 * memory we just allocated.
   1383 		 */
   1384 		key.chunk = ret;
   1385 		/* LINTED */
   1386 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
   1387 		while (tchunk != NULL
   1388 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
   1389 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
   1390 			delchunk = tchunk;
   1391 			/* LINTED */
   1392 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
   1393 			/* LINTED */
   1394 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
   1395 			base_chunk_node_dealloc(delchunk);
   1396 		}
   1397 
   1398 	}
   1399 #ifdef MALLOC_STATS
   1400 	if (ret != NULL) {
   1401 		stats_chunks.nchunks += (size / chunksize);
   1402 		stats_chunks.curchunks += (size / chunksize);
   1403 	}
   1404 	if (stats_chunks.curchunks > stats_chunks.highchunks)
   1405 		stats_chunks.highchunks = stats_chunks.curchunks;
   1406 #endif
   1407 	malloc_mutex_unlock(&chunks_mtx);
   1408 
   1409 	assert(CHUNK_ADDR2BASE(ret) == ret);
   1410 	return (ret);
   1411 }
   1412 
   1413 static void
   1414 chunk_dealloc(void *chunk, size_t size)
   1415 {
   1416 	chunk_node_t *node;
   1417 
   1418 	assert(chunk != NULL);
   1419 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
   1420 	assert(size != 0);
   1421 	assert((size & chunksize_mask) == 0);
   1422 
   1423 	malloc_mutex_lock(&chunks_mtx);
   1424 
   1425 #ifdef USE_BRK
   1426 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
   1427 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
   1428 		void *brk_cur;
   1429 
   1430 		malloc_mutex_lock(&brk_mtx);
   1431 		/* Get the current end of brk. */
   1432 		brk_cur = sbrk(0);
   1433 
   1434 		/*
   1435 		 * Try to shrink the data segment if this chunk is at the end
   1436 		 * of the data segment.  The sbrk() call here is subject to a
   1437 		 * race condition with threads that use brk(2) or sbrk(2)
   1438 		 * directly, but the alternative would be to leak memory for
   1439 		 * the sake of poorly designed multi-threaded programs.
   1440 		 */
   1441 		if (brk_cur == brk_max
   1442 		    && (void *)((uintptr_t)chunk + size) == brk_max
   1443 		    && sbrk(-(intptr_t)size) == brk_max) {
   1444 			malloc_mutex_unlock(&brk_mtx);
   1445 			if (brk_prev == brk_max) {
   1446 				/* Success. */
   1447 				brk_prev = (void *)((intptr_t)brk_max
   1448 				    - (intptr_t)size);
   1449 				brk_max = brk_prev;
   1450 			}
   1451 		} else {
   1452 			size_t offset;
   1453 
   1454 			malloc_mutex_unlock(&brk_mtx);
   1455 			madvise(chunk, size, MADV_FREE);
   1456 
   1457 			/*
   1458 			 * Iteratively create records of each chunk-sized
   1459 			 * memory region that 'chunk' is comprised of, so that
   1460 			 * the address range can be recycled if memory usage
   1461 			 * increases later on.
   1462 			 */
   1463 			for (offset = 0; offset < size; offset += chunksize) {
   1464 				node = base_chunk_node_alloc();
   1465 				if (node == NULL)
   1466 					break;
   1467 
   1468 				node->chunk = (void *)((uintptr_t)chunk
   1469 				    + (uintptr_t)offset);
   1470 				node->size = chunksize;
   1471 				/* LINTED */
   1472 				RB_INSERT(chunk_tree_s, &old_chunks, node);
   1473 			}
   1474 		}
   1475 	} else {
   1476 #endif
   1477 		pages_unmap(chunk, size);
   1478 
   1479 		/*
   1480 		 * Make a record of the chunk's address, so that the address
   1481 		 * range can be recycled if memory usage increases later on.
   1482 		 * Don't bother to create entries if (size > chunksize), since
   1483 		 * doing so could cause scalability issues for truly gargantuan
   1484 		 * objects (many gigabytes or larger).
   1485 		 */
   1486 		if (size == chunksize) {
   1487 			node = base_chunk_node_alloc();
   1488 			if (node != NULL) {
   1489 				node->chunk = (void *)(uintptr_t)chunk;
   1490 				node->size = chunksize;
   1491 				/* LINTED */
   1492 				RB_INSERT(chunk_tree_s, &old_chunks, node);
   1493 			}
   1494 		}
   1495 #ifdef USE_BRK
   1496 	}
   1497 #endif
   1498 
   1499 #ifdef MALLOC_STATS
   1500 	stats_chunks.curchunks -= (size / chunksize);
   1501 #endif
   1502 	malloc_mutex_unlock(&chunks_mtx);
   1503 }
   1504 
   1505 /*
   1506  * End chunk management functions.
   1507  */
   1508 /******************************************************************************/
   1509 /*
   1510  * Begin arena.
   1511  */
   1512 
   1513 /*
   1514  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
   1515  * code if necessary).
   1516  */
   1517 static inline arena_t *
   1518 choose_arena(void)
   1519 {
   1520 	arena_t *ret;
   1521 
   1522 	/*
   1523 	 * We can only use TLS if this is a PIC library, since for the static
   1524 	 * library version, libc's malloc is used by TLS allocation, which
   1525 	 * introduces a bootstrapping issue.
   1526 	 */
   1527 	if (__isthreaded == false) {
   1528 	    /*
   1529 	     * Avoid the overhead of TLS for single-threaded operation.  If the
   1530 	     * app switches to threaded mode, the initial thread may end up
   1531 	     * being assigned to some other arena, but this one-time switch
   1532 	     * shouldn't cause significant issues.
   1533 	     */
   1534 	    return (arenas[0]);
   1535 	}
   1536 
   1537 	ret = get_arenas_map();
   1538 	if (ret == NULL)
   1539 		ret = choose_arena_hard();
   1540 
   1541 	assert(ret != NULL);
   1542 	return (ret);
   1543 }
   1544 
   1545 /*
   1546  * Choose an arena based on a per-thread value (slow-path code only, called
   1547  * only by choose_arena()).
   1548  */
   1549 static arena_t *
   1550 choose_arena_hard(void)
   1551 {
   1552 	arena_t *ret;
   1553 
   1554 	assert(__isthreaded);
   1555 
   1556 	/* Assign one of the arenas to this thread, in a round-robin fashion. */
   1557 	malloc_mutex_lock(&arenas_mtx);
   1558 	ret = arenas[next_arena];
   1559 	if (ret == NULL)
   1560 		ret = arenas_extend(next_arena);
   1561 	if (ret == NULL) {
   1562 		/*
   1563 		 * Make sure that this function never returns NULL, so that
   1564 		 * choose_arena() doesn't have to check for a NULL return
   1565 		 * value.
   1566 		 */
   1567 		ret = arenas[0];
   1568 	}
   1569 	next_arena = (next_arena + 1) % narenas;
   1570 	malloc_mutex_unlock(&arenas_mtx);
   1571 	set_arenas_map(ret);
   1572 
   1573 	return (ret);
   1574 }
   1575 
   1576 static inline int
   1577 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
   1578 {
   1579 
   1580 	assert(a != NULL);
   1581 	assert(b != NULL);
   1582 
   1583 	if ((uintptr_t)a < (uintptr_t)b)
   1584 		return (-1);
   1585 	else if (a == b)
   1586 		return (0);
   1587 	else
   1588 		return (1);
   1589 }
   1590 
   1591 /* Generate red-black tree code for arena chunks. */
   1592 #ifndef lint
   1593 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
   1594 #endif
   1595 
   1596 static inline int
   1597 arena_run_comp(arena_run_t *a, arena_run_t *b)
   1598 {
   1599 
   1600 	assert(a != NULL);
   1601 	assert(b != NULL);
   1602 
   1603 	if ((uintptr_t)a < (uintptr_t)b)
   1604 		return (-1);
   1605 	else if (a == b)
   1606 		return (0);
   1607 	else
   1608 		return (1);
   1609 }
   1610 
   1611 /* Generate red-black tree code for arena runs. */
   1612 #ifndef lint
   1613 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
   1614 #endif
   1615 
   1616 static inline void *
   1617 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
   1618 {
   1619 	void *ret;
   1620 	unsigned i, mask, bit, regind;
   1621 
   1622 	assert(run->magic == ARENA_RUN_MAGIC);
   1623 	assert(run->regs_minelm < bin->regs_mask_nelms);
   1624 
   1625 	/*
   1626 	 * Move the first check outside the loop, so that run->regs_minelm can
   1627 	 * be updated unconditionally, without the possibility of updating it
   1628 	 * multiple times.
   1629 	 */
   1630 	i = run->regs_minelm;
   1631 	mask = run->regs_mask[i];
   1632 	if (mask != 0) {
   1633 		/* Usable allocation found. */
   1634 		bit = ffs((int)mask) - 1;
   1635 
   1636 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
   1637 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
   1638 		    + (bin->reg_size * regind));
   1639 
   1640 		/* Clear bit. */
   1641 		mask ^= (1 << bit);
   1642 		run->regs_mask[i] = mask;
   1643 
   1644 		return (ret);
   1645 	}
   1646 
   1647 	for (i++; i < bin->regs_mask_nelms; i++) {
   1648 		mask = run->regs_mask[i];
   1649 		if (mask != 0) {
   1650 			/* Usable allocation found. */
   1651 			bit = ffs((int)mask) - 1;
   1652 
   1653 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
   1654 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
   1655 			    + (bin->reg_size * regind));
   1656 
   1657 			/* Clear bit. */
   1658 			mask ^= (1 << bit);
   1659 			run->regs_mask[i] = mask;
   1660 
   1661 			/*
   1662 			 * Make a note that nothing before this element
   1663 			 * contains a free region.
   1664 			 */
   1665 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
   1666 
   1667 			return (ret);
   1668 		}
   1669 	}
   1670 	/* Not reached. */
   1671 	assert(0);
   1672 	return (NULL);
   1673 }
   1674 
   1675 static inline void
   1676 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
   1677 {
   1678 	/*
   1679 	 * To divide by a number D that is not a power of two we multiply
   1680 	 * by (2^21 / D) and then right shift by 21 positions.
   1681 	 *
   1682 	 *   X / D
   1683 	 *
   1684 	 * becomes
   1685 	 *
   1686 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
   1687 	 */
   1688 #define SIZE_INV_SHIFT 21
   1689 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
   1690 	static const unsigned size_invs[] = {
   1691 	    SIZE_INV(3),
   1692 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
   1693 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
   1694 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
   1695 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
   1696 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
   1697 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
   1698 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
   1699 #if (QUANTUM_2POW_MIN < 4)
   1700 	    ,
   1701 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
   1702 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
   1703 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
   1704 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
   1705 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
   1706 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
   1707 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
   1708 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
   1709 #endif
   1710 	};
   1711 	unsigned diff, regind, elm, bit;
   1712 
   1713 	assert(run->magic == ARENA_RUN_MAGIC);
   1714 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
   1715 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
   1716 
   1717 	/*
   1718 	 * Avoid doing division with a variable divisor if possible.  Using
   1719 	 * actual division here can reduce allocator throughput by over 20%!
   1720 	 */
   1721 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
   1722 	if ((size & (size - 1)) == 0) {
   1723 		/*
   1724 		 * log2_table allows fast division of a power of two in the
   1725 		 * [1..128] range.
   1726 		 *
   1727 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
   1728 		 */
   1729 		static const unsigned char log2_table[] = {
   1730 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
   1731 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
   1732 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1733 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
   1734 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1735 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1736 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1737 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
   1738 		};
   1739 
   1740 		if (size <= 128)
   1741 			regind = (diff >> log2_table[size - 1]);
   1742 		else if (size <= 32768)
   1743 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
   1744 		else {
   1745 			/*
   1746 			 * The page size is too large for us to use the lookup
   1747 			 * table.  Use real division.
   1748 			 */
   1749 			regind = diff / size;
   1750 		}
   1751 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
   1752 	    << QUANTUM_2POW_MIN) + 2) {
   1753 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
   1754 		regind >>= SIZE_INV_SHIFT;
   1755 	} else {
   1756 		/*
   1757 		 * size_invs isn't large enough to handle this size class, so
   1758 		 * calculate regind using actual division.  This only happens
   1759 		 * if the user increases small_max via the 'S' runtime
   1760 		 * configuration option.
   1761 		 */
   1762 		regind = diff / size;
   1763 	};
   1764 	assert(diff == regind * size);
   1765 	assert(regind < bin->nregs);
   1766 
   1767 	elm = regind >> (SIZEOF_INT_2POW + 3);
   1768 	if (elm < run->regs_minelm)
   1769 		run->regs_minelm = elm;
   1770 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
   1771 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
   1772 	run->regs_mask[elm] |= (1 << bit);
   1773 #undef SIZE_INV
   1774 #undef SIZE_INV_SHIFT
   1775 }
   1776 
   1777 static void
   1778 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
   1779 {
   1780 	arena_chunk_t *chunk;
   1781 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
   1782 	unsigned i;
   1783 
   1784 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
   1785 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
   1786 	    >> pagesize_2pow);
   1787 	total_pages = chunk->map[run_ind].npages;
   1788 	need_pages = (size >> pagesize_2pow);
   1789 	assert(need_pages <= total_pages);
   1790 	rem_pages = total_pages - need_pages;
   1791 
   1792 	/* Split enough pages from the front of run to fit allocation size. */
   1793 	map_offset = run_ind;
   1794 	for (i = 0; i < need_pages; i++) {
   1795 		chunk->map[map_offset + i].npages = need_pages;
   1796 		chunk->map[map_offset + i].pos = i;
   1797 	}
   1798 
   1799 	/* Keep track of trailing unused pages for later use. */
   1800 	if (rem_pages > 0) {
   1801 		/* Update map for trailing pages. */
   1802 		map_offset += need_pages;
   1803 		chunk->map[map_offset].npages = rem_pages;
   1804 		chunk->map[map_offset].pos = POS_FREE;
   1805 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
   1806 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
   1807 	}
   1808 
   1809 	chunk->pages_used += need_pages;
   1810 }
   1811 
   1812 static arena_chunk_t *
   1813 arena_chunk_alloc(arena_t *arena)
   1814 {
   1815 	arena_chunk_t *chunk;
   1816 
   1817 	if (arena->spare != NULL) {
   1818 		chunk = arena->spare;
   1819 		arena->spare = NULL;
   1820 
   1821 		/* LINTED */
   1822 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
   1823 	} else {
   1824 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
   1825 		if (chunk == NULL)
   1826 			return (NULL);
   1827 #ifdef MALLOC_STATS
   1828 		arena->stats.mapped += chunksize;
   1829 #endif
   1830 
   1831 		chunk->arena = arena;
   1832 
   1833 		/* LINTED */
   1834 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
   1835 
   1836 		/*
   1837 		 * Claim that no pages are in use, since the header is merely
   1838 		 * overhead.
   1839 		 */
   1840 		chunk->pages_used = 0;
   1841 
   1842 		chunk->max_frun_npages = chunk_npages -
   1843 		    arena_chunk_header_npages;
   1844 		chunk->min_frun_ind = arena_chunk_header_npages;
   1845 
   1846 		/*
   1847 		 * Initialize enough of the map to support one maximal free run.
   1848 		 */
   1849 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
   1850 		    arena_chunk_header_npages;
   1851 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
   1852 		chunk->map[chunk_npages - 1].npages = chunk_npages -
   1853 		    arena_chunk_header_npages;
   1854 		chunk->map[chunk_npages - 1].pos = POS_FREE;
   1855 	}
   1856 
   1857 	return (chunk);
   1858 }
   1859 
   1860 static void
   1861 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
   1862 {
   1863 
   1864 	/*
   1865 	 * Remove chunk from the chunk tree, regardless of whether this chunk
   1866 	 * will be cached, so that the arena does not use it.
   1867 	 */
   1868 	/* LINTED */
   1869 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
   1870 
   1871 	if (opt_hint == false) {
   1872 		if (arena->spare != NULL) {
   1873 			chunk_dealloc((void *)arena->spare, chunksize);
   1874 #ifdef MALLOC_STATS
   1875 			arena->stats.mapped -= chunksize;
   1876 #endif
   1877 		}
   1878 		arena->spare = chunk;
   1879 	} else {
   1880 		assert(arena->spare == NULL);
   1881 		chunk_dealloc((void *)chunk, chunksize);
   1882 #ifdef MALLOC_STATS
   1883 		arena->stats.mapped -= chunksize;
   1884 #endif
   1885 	}
   1886 }
   1887 
   1888 static arena_run_t *
   1889 arena_run_alloc(arena_t *arena, size_t size)
   1890 {
   1891 	arena_chunk_t *chunk;
   1892 	arena_run_t *run;
   1893 	unsigned need_npages, limit_pages, compl_need_npages;
   1894 
   1895 	assert(size <= (chunksize - (arena_chunk_header_npages <<
   1896 	    pagesize_2pow)));
   1897 	assert((size & pagesize_mask) == 0);
   1898 
   1899 	/*
   1900 	 * Search through arena's chunks in address order for a free run that is
   1901 	 * large enough.  Look for the first fit.
   1902 	 */
   1903 	need_npages = (size >> pagesize_2pow);
   1904 	limit_pages = chunk_npages - arena_chunk_header_npages;
   1905 	compl_need_npages = limit_pages - need_npages;
   1906 	/* LINTED */
   1907 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
   1908 		/*
   1909 		 * Avoid searching this chunk if there are not enough
   1910 		 * contiguous free pages for there to possibly be a large
   1911 		 * enough free run.
   1912 		 */
   1913 		if (chunk->pages_used <= compl_need_npages &&
   1914 		    need_npages <= chunk->max_frun_npages) {
   1915 			arena_chunk_map_t *mapelm;
   1916 			unsigned i;
   1917 			unsigned max_frun_npages = 0;
   1918 			unsigned min_frun_ind = chunk_npages;
   1919 
   1920 			assert(chunk->min_frun_ind >=
   1921 			    arena_chunk_header_npages);
   1922 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
   1923 				mapelm = &chunk->map[i];
   1924 				if (mapelm->pos == POS_FREE) {
   1925 					if (mapelm->npages >= need_npages) {
   1926 						run = (arena_run_t *)
   1927 						    ((uintptr_t)chunk + (i <<
   1928 						    pagesize_2pow));
   1929 						/* Update page map. */
   1930 						arena_run_split(arena, run,
   1931 						    size);
   1932 						return (run);
   1933 					}
   1934 					if (mapelm->npages >
   1935 					    max_frun_npages) {
   1936 						max_frun_npages =
   1937 						    mapelm->npages;
   1938 					}
   1939 					if (i < min_frun_ind) {
   1940 						min_frun_ind = i;
   1941 						if (i < chunk->min_frun_ind)
   1942 							chunk->min_frun_ind = i;
   1943 					}
   1944 				}
   1945 				i += mapelm->npages;
   1946 			}
   1947 			/*
   1948 			 * Search failure.  Reset cached chunk->max_frun_npages.
   1949 			 * chunk->min_frun_ind was already reset above (if
   1950 			 * necessary).
   1951 			 */
   1952 			chunk->max_frun_npages = max_frun_npages;
   1953 		}
   1954 	}
   1955 
   1956 	/*
   1957 	 * No usable runs.  Create a new chunk from which to allocate the run.
   1958 	 */
   1959 	chunk = arena_chunk_alloc(arena);
   1960 	if (chunk == NULL)
   1961 		return (NULL);
   1962 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
   1963 	    pagesize_2pow));
   1964 	/* Update page map. */
   1965 	arena_run_split(arena, run, size);
   1966 	return (run);
   1967 }
   1968 
   1969 static void
   1970 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
   1971 {
   1972 	arena_chunk_t *chunk;
   1973 	unsigned run_ind, run_pages;
   1974 
   1975 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
   1976 
   1977 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
   1978 	    >> pagesize_2pow);
   1979 	assert(run_ind >= arena_chunk_header_npages);
   1980 	assert(run_ind < (chunksize >> pagesize_2pow));
   1981 	run_pages = (size >> pagesize_2pow);
   1982 	assert(run_pages == chunk->map[run_ind].npages);
   1983 
   1984 	/* Subtract pages from count of pages used in chunk. */
   1985 	chunk->pages_used -= run_pages;
   1986 
   1987 	/* Mark run as deallocated. */
   1988 	assert(chunk->map[run_ind].npages == run_pages);
   1989 	chunk->map[run_ind].pos = POS_FREE;
   1990 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
   1991 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
   1992 
   1993 	/*
   1994 	 * Tell the kernel that we don't need the data in this run, but only if
   1995 	 * requested via runtime configuration.
   1996 	 */
   1997 	if (opt_hint)
   1998 		madvise(run, size, MADV_FREE);
   1999 
   2000 	/* Try to coalesce with neighboring runs. */
   2001 	if (run_ind > arena_chunk_header_npages &&
   2002 	    chunk->map[run_ind - 1].pos == POS_FREE) {
   2003 		unsigned prev_npages;
   2004 
   2005 		/* Coalesce with previous run. */
   2006 		prev_npages = chunk->map[run_ind - 1].npages;
   2007 		run_ind -= prev_npages;
   2008 		assert(chunk->map[run_ind].npages == prev_npages);
   2009 		assert(chunk->map[run_ind].pos == POS_FREE);
   2010 		run_pages += prev_npages;
   2011 
   2012 		chunk->map[run_ind].npages = run_pages;
   2013 		assert(chunk->map[run_ind].pos == POS_FREE);
   2014 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
   2015 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
   2016 	}
   2017 
   2018 	if (run_ind + run_pages < chunk_npages &&
   2019 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
   2020 		unsigned next_npages;
   2021 
   2022 		/* Coalesce with next run. */
   2023 		next_npages = chunk->map[run_ind + run_pages].npages;
   2024 		run_pages += next_npages;
   2025 		assert(chunk->map[run_ind + run_pages - 1].npages ==
   2026 		    next_npages);
   2027 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
   2028 
   2029 		chunk->map[run_ind].npages = run_pages;
   2030 		chunk->map[run_ind].pos = POS_FREE;
   2031 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
   2032 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
   2033 	}
   2034 
   2035 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
   2036 		chunk->max_frun_npages = chunk->map[run_ind].npages;
   2037 	if (run_ind < chunk->min_frun_ind)
   2038 		chunk->min_frun_ind = run_ind;
   2039 
   2040 	/* Deallocate chunk if it is now completely unused. */
   2041 	if (chunk->pages_used == 0)
   2042 		arena_chunk_dealloc(arena, chunk);
   2043 }
   2044 
   2045 static arena_run_t *
   2046 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
   2047 {
   2048 	arena_run_t *run;
   2049 	unsigned i, remainder;
   2050 
   2051 	/* Look for a usable run. */
   2052 	/* LINTED */
   2053 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
   2054 		/* run is guaranteed to have available space. */
   2055 		/* LINTED */
   2056 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
   2057 #ifdef MALLOC_STATS
   2058 		bin->stats.reruns++;
   2059 #endif
   2060 		return (run);
   2061 	}
   2062 	/* No existing runs have any space available. */
   2063 
   2064 	/* Allocate a new run. */
   2065 	run = arena_run_alloc(arena, bin->run_size);
   2066 	if (run == NULL)
   2067 		return (NULL);
   2068 
   2069 	/* Initialize run internals. */
   2070 	run->bin = bin;
   2071 
   2072 	for (i = 0; i < bin->regs_mask_nelms; i++)
   2073 		run->regs_mask[i] = UINT_MAX;
   2074 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
   2075 	if (remainder != 0) {
   2076 		/* The last element has spare bits that need to be unset. */
   2077 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
   2078 		    - remainder));
   2079 	}
   2080 
   2081 	run->regs_minelm = 0;
   2082 
   2083 	run->nfree = bin->nregs;
   2084 #ifdef MALLOC_DEBUG
   2085 	run->magic = ARENA_RUN_MAGIC;
   2086 #endif
   2087 
   2088 #ifdef MALLOC_STATS
   2089 	bin->stats.nruns++;
   2090 	bin->stats.curruns++;
   2091 	if (bin->stats.curruns > bin->stats.highruns)
   2092 		bin->stats.highruns = bin->stats.curruns;
   2093 #endif
   2094 	return (run);
   2095 }
   2096 
   2097 /* bin->runcur must have space available before this function is called. */
   2098 static inline void *
   2099 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
   2100 {
   2101 	void *ret;
   2102 
   2103 	assert(run->magic == ARENA_RUN_MAGIC);
   2104 	assert(run->nfree > 0);
   2105 
   2106 	ret = arena_run_reg_alloc(run, bin);
   2107 	assert(ret != NULL);
   2108 	run->nfree--;
   2109 
   2110 	return (ret);
   2111 }
   2112 
   2113 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
   2114 static void *
   2115 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
   2116 {
   2117 
   2118 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
   2119 	if (bin->runcur == NULL)
   2120 		return (NULL);
   2121 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
   2122 	assert(bin->runcur->nfree > 0);
   2123 
   2124 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
   2125 }
   2126 
   2127 /*
   2128  * Calculate bin->run_size such that it meets the following constraints:
   2129  *
   2130  *   *) bin->run_size >= min_run_size
   2131  *   *) bin->run_size <= arena_maxclass
   2132  *   *) bin->run_size <= RUN_MAX_SMALL
   2133  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
   2134  *
   2135  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
   2136  * also calculated here, since these settings are all interdependent.
   2137  */
   2138 static size_t
   2139 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
   2140 {
   2141 	size_t try_run_size, good_run_size;
   2142 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
   2143 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
   2144 	float max_ovrhd = RUN_MAX_OVRHD;
   2145 
   2146 	assert(min_run_size >= pagesize);
   2147 	assert(min_run_size <= arena_maxclass);
   2148 	assert(min_run_size <= RUN_MAX_SMALL);
   2149 
   2150 	/*
   2151 	 * Calculate known-valid settings before entering the run_size
   2152 	 * expansion loop, so that the first part of the loop always copies
   2153 	 * valid settings.
   2154 	 *
   2155 	 * The do..while loop iteratively reduces the number of regions until
   2156 	 * the run header and the regions no longer overlap.  A closed formula
   2157 	 * would be quite messy, since there is an interdependency between the
   2158 	 * header's mask length and the number of regions.
   2159 	 */
   2160 	try_run_size = min_run_size;
   2161 	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
   2162 	    + 1; /* Counter-act the first line of the loop. */
   2163 	do {
   2164 		try_nregs--;
   2165 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
   2166 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
   2167 		try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
   2168 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
   2169 	    > try_reg0_offset);
   2170 
   2171 	/* run_size expansion loop. */
   2172 	do {
   2173 		/*
   2174 		 * Copy valid settings before trying more aggressive settings.
   2175 		 */
   2176 		good_run_size = try_run_size;
   2177 		good_nregs = try_nregs;
   2178 		good_mask_nelms = try_mask_nelms;
   2179 		good_reg0_offset = try_reg0_offset;
   2180 
   2181 		/* Try more aggressive settings. */
   2182 		try_run_size += pagesize;
   2183 		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
   2184 		    bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
   2185 		do {
   2186 			try_nregs--;
   2187 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
   2188 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
   2189 			    1 : 0);
   2190 			try_reg0_offset = try_run_size - (try_nregs *
   2191 			    bin->reg_size);
   2192 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
   2193 		    (try_mask_nelms - 1)) > try_reg0_offset);
   2194 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
   2195 	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
   2196 	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
   2197 	    max_ovrhd);
   2198 
   2199 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
   2200 	    <= good_reg0_offset);
   2201 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
   2202 
   2203 	/* Copy final settings. */
   2204 	bin->run_size = good_run_size;
   2205 	bin->nregs = good_nregs;
   2206 	bin->regs_mask_nelms = good_mask_nelms;
   2207 	bin->reg0_offset = good_reg0_offset;
   2208 
   2209 	return (good_run_size);
   2210 }
   2211 
   2212 static void *
   2213 arena_malloc(arena_t *arena, size_t size)
   2214 {
   2215 	void *ret;
   2216 
   2217 	assert(arena != NULL);
   2218 	assert(arena->magic == ARENA_MAGIC);
   2219 	assert(size != 0);
   2220 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
   2221 
   2222 	if (size <= bin_maxclass) {
   2223 		arena_bin_t *bin;
   2224 		arena_run_t *run;
   2225 
   2226 		/* Small allocation. */
   2227 
   2228 		if (size < small_min) {
   2229 			/* Tiny. */
   2230 			size = pow2_ceil(size);
   2231 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
   2232 			    1)))];
   2233 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
   2234 			/*
   2235 			 * Bin calculation is always correct, but we may need
   2236 			 * to fix size for the purposes of assertions and/or
   2237 			 * stats accuracy.
   2238 			 */
   2239 			if (size < (1 << TINY_MIN_2POW))
   2240 				size = (1 << TINY_MIN_2POW);
   2241 #endif
   2242 		} else if (size <= small_max) {
   2243 			/* Quantum-spaced. */
   2244 			size = QUANTUM_CEILING(size);
   2245 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
   2246 			    - 1];
   2247 		} else {
   2248 			/* Sub-page. */
   2249 			size = pow2_ceil(size);
   2250 			bin = &arena->bins[ntbins + nqbins
   2251 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
   2252 		}
   2253 		assert(size == bin->reg_size);
   2254 
   2255 		malloc_mutex_lock(&arena->mtx);
   2256 		if ((run = bin->runcur) != NULL && run->nfree > 0)
   2257 			ret = arena_bin_malloc_easy(arena, bin, run);
   2258 		else
   2259 			ret = arena_bin_malloc_hard(arena, bin);
   2260 
   2261 		if (ret == NULL) {
   2262 			malloc_mutex_unlock(&arena->mtx);
   2263 			return (NULL);
   2264 		}
   2265 
   2266 #ifdef MALLOC_STATS
   2267 		bin->stats.nrequests++;
   2268 		arena->stats.nmalloc_small++;
   2269 		arena->stats.allocated_small += size;
   2270 #endif
   2271 	} else {
   2272 		/* Large allocation. */
   2273 		size = PAGE_CEILING(size);
   2274 		malloc_mutex_lock(&arena->mtx);
   2275 		ret = (void *)arena_run_alloc(arena, size);
   2276 		if (ret == NULL) {
   2277 			malloc_mutex_unlock(&arena->mtx);
   2278 			return (NULL);
   2279 		}
   2280 #ifdef MALLOC_STATS
   2281 		arena->stats.nmalloc_large++;
   2282 		arena->stats.allocated_large += size;
   2283 #endif
   2284 	}
   2285 
   2286 	malloc_mutex_unlock(&arena->mtx);
   2287 
   2288 	if (opt_junk)
   2289 		memset(ret, 0xa5, size);
   2290 	else if (opt_zero)
   2291 		memset(ret, 0, size);
   2292 	return (ret);
   2293 }
   2294 
   2295 static inline void
   2296 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
   2297     unsigned npages)
   2298 {
   2299 	unsigned i;
   2300 
   2301 	assert(npages > 0);
   2302 
   2303 	/*
   2304 	 * Modifiy the map such that arena_run_dalloc() sees the run as
   2305 	 * separately allocated.
   2306 	 */
   2307 	for (i = 0; i < npages; i++) {
   2308 		chunk->map[pageind + i].npages = npages;
   2309 		chunk->map[pageind + i].pos = i;
   2310 	}
   2311 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
   2312 	    pagesize_2pow)), npages << pagesize_2pow);
   2313 }
   2314 
   2315 /* Only handles large allocations that require more than page alignment. */
   2316 static void *
   2317 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
   2318 {
   2319 	void *ret;
   2320 	size_t offset;
   2321 	arena_chunk_t *chunk;
   2322 	unsigned pageind, i, npages;
   2323 
   2324 	assert((size & pagesize_mask) == 0);
   2325 	assert((alignment & pagesize_mask) == 0);
   2326 
   2327 	npages = size >> pagesize_2pow;
   2328 
   2329 	malloc_mutex_lock(&arena->mtx);
   2330 	ret = (void *)arena_run_alloc(arena, alloc_size);
   2331 	if (ret == NULL) {
   2332 		malloc_mutex_unlock(&arena->mtx);
   2333 		return (NULL);
   2334 	}
   2335 
   2336 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
   2337 
   2338 	offset = (uintptr_t)ret & (alignment - 1);
   2339 	assert((offset & pagesize_mask) == 0);
   2340 	assert(offset < alloc_size);
   2341 	if (offset == 0) {
   2342 		pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
   2343 		    pagesize_2pow);
   2344 
   2345 		/* Update the map for the run to be kept. */
   2346 		for (i = 0; i < npages; i++) {
   2347 			chunk->map[pageind + i].npages = npages;
   2348 			assert(chunk->map[pageind + i].pos == i);
   2349 		}
   2350 
   2351 		/* Trim trailing space. */
   2352 		arena_palloc_trim(arena, chunk, pageind + npages,
   2353 		    (alloc_size - size) >> pagesize_2pow);
   2354 	} else {
   2355 		size_t leadsize, trailsize;
   2356 
   2357 		leadsize = alignment - offset;
   2358 		ret = (void *)((uintptr_t)ret + leadsize);
   2359 		pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
   2360 		    pagesize_2pow);
   2361 
   2362 		/* Update the map for the run to be kept. */
   2363 		for (i = 0; i < npages; i++) {
   2364 			chunk->map[pageind + i].npages = npages;
   2365 			chunk->map[pageind + i].pos = i;
   2366 		}
   2367 
   2368 		/* Trim leading space. */
   2369 		arena_palloc_trim(arena, chunk, pageind - (leadsize >>
   2370 		    pagesize_2pow), leadsize >> pagesize_2pow);
   2371 
   2372 		trailsize = alloc_size - leadsize - size;
   2373 		if (trailsize != 0) {
   2374 			/* Trim trailing space. */
   2375 			assert(trailsize < alloc_size);
   2376 			arena_palloc_trim(arena, chunk, pageind + npages,
   2377 			    trailsize >> pagesize_2pow);
   2378 		}
   2379 	}
   2380 
   2381 #ifdef MALLOC_STATS
   2382 	arena->stats.nmalloc_large++;
   2383 	arena->stats.allocated_large += size;
   2384 #endif
   2385 	malloc_mutex_unlock(&arena->mtx);
   2386 
   2387 	if (opt_junk)
   2388 		memset(ret, 0xa5, size);
   2389 	else if (opt_zero)
   2390 		memset(ret, 0, size);
   2391 	return (ret);
   2392 }
   2393 
   2394 /* Return the size of the allocation pointed to by ptr. */
   2395 static size_t
   2396 arena_salloc(const void *ptr)
   2397 {
   2398 	size_t ret;
   2399 	arena_chunk_t *chunk;
   2400 	arena_chunk_map_t *mapelm;
   2401 	unsigned pageind;
   2402 
   2403 	assert(ptr != NULL);
   2404 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
   2405 
   2406 	/*
   2407 	 * No arena data structures that we query here can change in a way that
   2408 	 * affects this function, so we don't need to lock.
   2409 	 */
   2410 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
   2411 	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
   2412 	mapelm = &chunk->map[pageind];
   2413 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
   2414 	    pagesize_2pow)) {
   2415 		arena_run_t *run;
   2416 
   2417 		pageind -= mapelm->pos;
   2418 
   2419 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
   2420 		    pagesize_2pow));
   2421 		assert(run->magic == ARENA_RUN_MAGIC);
   2422 		ret = run->bin->reg_size;
   2423 	} else
   2424 		ret = mapelm->npages << pagesize_2pow;
   2425 
   2426 	return (ret);
   2427 }
   2428 
   2429 static void *
   2430 arena_ralloc(void *ptr, size_t size, size_t oldsize)
   2431 {
   2432 	void *ret;
   2433 
   2434 	/* Avoid moving the allocation if the size class would not change. */
   2435 	if (size < small_min) {
   2436 		if (oldsize < small_min &&
   2437 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
   2438 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
   2439 			goto IN_PLACE;
   2440 	} else if (size <= small_max) {
   2441 		if (oldsize >= small_min && oldsize <= small_max &&
   2442 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
   2443 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
   2444 			goto IN_PLACE;
   2445 	} else {
   2446 		/*
   2447 		 * We make no attempt to resize runs here, though it would be
   2448 		 * possible to do so.
   2449 		 */
   2450 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
   2451 			goto IN_PLACE;
   2452 	}
   2453 
   2454 	/*
   2455 	 * If we get here, then size and oldsize are different enough that we
   2456 	 * need to use a different size class.  In that case, fall back to
   2457 	 * allocating new space and copying.
   2458 	 */
   2459 	ret = arena_malloc(choose_arena(), size);
   2460 	if (ret == NULL)
   2461 		return (NULL);
   2462 
   2463 	/* Junk/zero-filling were already done by arena_malloc(). */
   2464 	if (size < oldsize)
   2465 		memcpy(ret, ptr, size);
   2466 	else
   2467 		memcpy(ret, ptr, oldsize);
   2468 	idalloc(ptr);
   2469 	return (ret);
   2470 IN_PLACE:
   2471 	if (opt_junk && size < oldsize)
   2472 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
   2473 	else if (opt_zero && size > oldsize)
   2474 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
   2475 	return (ptr);
   2476 }
   2477 
   2478 static void
   2479 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
   2480 {
   2481 	unsigned pageind;
   2482 	arena_chunk_map_t *mapelm;
   2483 	size_t size;
   2484 
   2485 	assert(arena != NULL);
   2486 	assert(arena->magic == ARENA_MAGIC);
   2487 	assert(chunk->arena == arena);
   2488 	assert(ptr != NULL);
   2489 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
   2490 
   2491 	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
   2492 	mapelm = &chunk->map[pageind];
   2493 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
   2494 	    pagesize_2pow)) {
   2495 		arena_run_t *run;
   2496 		arena_bin_t *bin;
   2497 
   2498 		/* Small allocation. */
   2499 
   2500 		pageind -= mapelm->pos;
   2501 
   2502 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
   2503 		    pagesize_2pow));
   2504 		assert(run->magic == ARENA_RUN_MAGIC);
   2505 		bin = run->bin;
   2506 		size = bin->reg_size;
   2507 
   2508 		if (opt_junk)
   2509 			memset(ptr, 0x5a, size);
   2510 
   2511 		malloc_mutex_lock(&arena->mtx);
   2512 		arena_run_reg_dalloc(run, bin, ptr, size);
   2513 		run->nfree++;
   2514 
   2515 		if (run->nfree == bin->nregs) {
   2516 			/* Deallocate run. */
   2517 			if (run == bin->runcur)
   2518 				bin->runcur = NULL;
   2519 			else if (bin->nregs != 1) {
   2520 				/*
   2521 				 * This block's conditional is necessary because
   2522 				 * if the run only contains one region, then it
   2523 				 * never gets inserted into the non-full runs
   2524 				 * tree.
   2525 				 */
   2526 				/* LINTED */
   2527 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
   2528 			}
   2529 #ifdef MALLOC_DEBUG
   2530 			run->magic = 0;
   2531 #endif
   2532 			arena_run_dalloc(arena, run, bin->run_size);
   2533 #ifdef MALLOC_STATS
   2534 			bin->stats.curruns--;
   2535 #endif
   2536 		} else if (run->nfree == 1 && run != bin->runcur) {
   2537 			/*
   2538 			 * Make sure that bin->runcur always refers to the
   2539 			 * lowest non-full run, if one exists.
   2540 			 */
   2541 			if (bin->runcur == NULL)
   2542 				bin->runcur = run;
   2543 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
   2544 				/* Switch runcur. */
   2545 				if (bin->runcur->nfree > 0) {
   2546 					/* Insert runcur. */
   2547 					/* LINTED */
   2548 					RB_INSERT(arena_run_tree_s, &bin->runs,
   2549 					    bin->runcur);
   2550 				}
   2551 				bin->runcur = run;
   2552 			} else {
   2553 				/* LINTED */
   2554 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
   2555 			}
   2556 		}
   2557 #ifdef MALLOC_STATS
   2558 		arena->stats.allocated_small -= size;
   2559 		arena->stats.ndalloc_small++;
   2560 #endif
   2561 	} else {
   2562 		/* Large allocation. */
   2563 
   2564 		size = mapelm->npages << pagesize_2pow;
   2565 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
   2566 
   2567 		if (opt_junk)
   2568 			memset(ptr, 0x5a, size);
   2569 
   2570 		malloc_mutex_lock(&arena->mtx);
   2571 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
   2572 #ifdef MALLOC_STATS
   2573 		arena->stats.allocated_large -= size;
   2574 		arena->stats.ndalloc_large++;
   2575 #endif
   2576 	}
   2577 
   2578 	malloc_mutex_unlock(&arena->mtx);
   2579 }
   2580 
   2581 static bool
   2582 arena_new(arena_t *arena)
   2583 {
   2584 	unsigned i;
   2585 	arena_bin_t *bin;
   2586 	size_t prev_run_size;
   2587 
   2588 	malloc_mutex_init(&arena->mtx);
   2589 
   2590 #ifdef MALLOC_STATS
   2591 	memset(&arena->stats, 0, sizeof(arena_stats_t));
   2592 #endif
   2593 
   2594 	/* Initialize chunks. */
   2595 	RB_INIT(&arena->chunks);
   2596 	arena->spare = NULL;
   2597 
   2598 	/* Initialize bins. */
   2599 	prev_run_size = pagesize;
   2600 
   2601 	/* (2^n)-spaced tiny bins. */
   2602 	for (i = 0; i < ntbins; i++) {
   2603 		bin = &arena->bins[i];
   2604 		bin->runcur = NULL;
   2605 		RB_INIT(&bin->runs);
   2606 
   2607 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
   2608 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
   2609 
   2610 #ifdef MALLOC_STATS
   2611 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
   2612 #endif
   2613 	}
   2614 
   2615 	/* Quantum-spaced bins. */
   2616 	for (; i < ntbins + nqbins; i++) {
   2617 		bin = &arena->bins[i];
   2618 		bin->runcur = NULL;
   2619 		RB_INIT(&bin->runs);
   2620 
   2621 		bin->reg_size = quantum * (i - ntbins + 1);
   2622 /*
   2623 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
   2624 */
   2625 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
   2626 
   2627 #ifdef MALLOC_STATS
   2628 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
   2629 #endif
   2630 	}
   2631 
   2632 	/* (2^n)-spaced sub-page bins. */
   2633 	for (; i < ntbins + nqbins + nsbins; i++) {
   2634 		bin = &arena->bins[i];
   2635 		bin->runcur = NULL;
   2636 		RB_INIT(&bin->runs);
   2637 
   2638 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
   2639 
   2640 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
   2641 
   2642 #ifdef MALLOC_STATS
   2643 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
   2644 #endif
   2645 	}
   2646 
   2647 #ifdef MALLOC_DEBUG
   2648 	arena->magic = ARENA_MAGIC;
   2649 #endif
   2650 
   2651 	return (false);
   2652 }
   2653 
   2654 /* Create a new arena and insert it into the arenas array at index ind. */
   2655 static arena_t *
   2656 arenas_extend(unsigned ind)
   2657 {
   2658 	arena_t *ret;
   2659 
   2660 	/* Allocate enough space for trailing bins. */
   2661 	ret = (arena_t *)base_alloc(sizeof(arena_t)
   2662 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
   2663 	if (ret != NULL && arena_new(ret) == false) {
   2664 		arenas[ind] = ret;
   2665 		return (ret);
   2666 	}
   2667 	/* Only reached if there is an OOM error. */
   2668 
   2669 	/*
   2670 	 * OOM here is quite inconvenient to propagate, since dealing with it
   2671 	 * would require a check for failure in the fast path.  Instead, punt
   2672 	 * by using arenas[0].  In practice, this is an extremely unlikely
   2673 	 * failure.
   2674 	 */
   2675 	_malloc_message(_getprogname(),
   2676 	    ": (malloc) Error initializing arena\n", "", "");
   2677 	if (opt_abort)
   2678 		abort();
   2679 
   2680 	return (arenas[0]);
   2681 }
   2682 
   2683 /*
   2684  * End arena.
   2685  */
   2686 /******************************************************************************/
   2687 /*
   2688  * Begin general internal functions.
   2689  */
   2690 
   2691 static void *
   2692 huge_malloc(size_t size)
   2693 {
   2694 	void *ret;
   2695 	size_t csize;
   2696 	chunk_node_t *node;
   2697 
   2698 	/* Allocate one or more contiguous chunks for this request. */
   2699 
   2700 	csize = CHUNK_CEILING(size);
   2701 	if (csize == 0) {
   2702 		/* size is large enough to cause size_t wrap-around. */
   2703 		return (NULL);
   2704 	}
   2705 
   2706 	/* Allocate a chunk node with which to track the chunk. */
   2707 	node = base_chunk_node_alloc();
   2708 	if (node == NULL)
   2709 		return (NULL);
   2710 
   2711 	ret = chunk_alloc(csize);
   2712 	if (ret == NULL) {
   2713 		base_chunk_node_dealloc(node);
   2714 		return (NULL);
   2715 	}
   2716 
   2717 	/* Insert node into huge. */
   2718 	node->chunk = ret;
   2719 	node->size = csize;
   2720 
   2721 	malloc_mutex_lock(&chunks_mtx);
   2722 	RB_INSERT(chunk_tree_s, &huge, node);
   2723 #ifdef MALLOC_STATS
   2724 	huge_nmalloc++;
   2725 	huge_allocated += csize;
   2726 #endif
   2727 	malloc_mutex_unlock(&chunks_mtx);
   2728 
   2729 	if (opt_junk)
   2730 		memset(ret, 0xa5, csize);
   2731 	else if (opt_zero)
   2732 		memset(ret, 0, csize);
   2733 
   2734 	return (ret);
   2735 }
   2736 
   2737 /* Only handles large allocations that require more than chunk alignment. */
   2738 static void *
   2739 huge_palloc(size_t alignment, size_t size)
   2740 {
   2741 	void *ret;
   2742 	size_t alloc_size, chunk_size, offset;
   2743 	chunk_node_t *node;
   2744 
   2745 	/*
   2746 	 * This allocation requires alignment that is even larger than chunk
   2747 	 * alignment.  This means that huge_malloc() isn't good enough.
   2748 	 *
   2749 	 * Allocate almost twice as many chunks as are demanded by the size or
   2750 	 * alignment, in order to assure the alignment can be achieved, then
   2751 	 * unmap leading and trailing chunks.
   2752 	 */
   2753 	assert(alignment >= chunksize);
   2754 
   2755 	chunk_size = CHUNK_CEILING(size);
   2756 
   2757 	if (size >= alignment)
   2758 		alloc_size = chunk_size + alignment - chunksize;
   2759 	else
   2760 		alloc_size = (alignment << 1) - chunksize;
   2761 
   2762 	/* Allocate a chunk node with which to track the chunk. */
   2763 	node = base_chunk_node_alloc();
   2764 	if (node == NULL)
   2765 		return (NULL);
   2766 
   2767 	ret = chunk_alloc(alloc_size);
   2768 	if (ret == NULL) {
   2769 		base_chunk_node_dealloc(node);
   2770 		return (NULL);
   2771 	}
   2772 
   2773 	offset = (uintptr_t)ret & (alignment - 1);
   2774 	assert((offset & chunksize_mask) == 0);
   2775 	assert(offset < alloc_size);
   2776 	if (offset == 0) {
   2777 		/* Trim trailing space. */
   2778 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
   2779 		    - chunk_size);
   2780 	} else {
   2781 		size_t trailsize;
   2782 
   2783 		/* Trim leading space. */
   2784 		chunk_dealloc(ret, alignment - offset);
   2785 
   2786 		ret = (void *)((uintptr_t)ret + (alignment - offset));
   2787 
   2788 		trailsize = alloc_size - (alignment - offset) - chunk_size;
   2789 		if (trailsize != 0) {
   2790 		    /* Trim trailing space. */
   2791 		    assert(trailsize < alloc_size);
   2792 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
   2793 			trailsize);
   2794 		}
   2795 	}
   2796 
   2797 	/* Insert node into huge. */
   2798 	node->chunk = ret;
   2799 	node->size = chunk_size;
   2800 
   2801 	malloc_mutex_lock(&chunks_mtx);
   2802 	RB_INSERT(chunk_tree_s, &huge, node);
   2803 #ifdef MALLOC_STATS
   2804 	huge_nmalloc++;
   2805 	huge_allocated += chunk_size;
   2806 #endif
   2807 	malloc_mutex_unlock(&chunks_mtx);
   2808 
   2809 	if (opt_junk)
   2810 		memset(ret, 0xa5, chunk_size);
   2811 	else if (opt_zero)
   2812 		memset(ret, 0, chunk_size);
   2813 
   2814 	return (ret);
   2815 }
   2816 
   2817 static void *
   2818 huge_ralloc(void *ptr, size_t size, size_t oldsize)
   2819 {
   2820 	void *ret;
   2821 
   2822 	/* Avoid moving the allocation if the size class would not change. */
   2823 	if (oldsize > arena_maxclass &&
   2824 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
   2825 		if (opt_junk && size < oldsize) {
   2826 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
   2827 			    - size);
   2828 		} else if (opt_zero && size > oldsize) {
   2829 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
   2830 			    - oldsize);
   2831 		}
   2832 		return (ptr);
   2833 	}
   2834 
   2835 	/*
   2836 	 * If we get here, then size and oldsize are different enough that we
   2837 	 * need to use a different size class.  In that case, fall back to
   2838 	 * allocating new space and copying.
   2839 	 */
   2840 	ret = huge_malloc(size);
   2841 	if (ret == NULL)
   2842 		return (NULL);
   2843 
   2844 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
   2845 		/* The old allocation is a chunk. */
   2846 		if (size < oldsize)
   2847 			memcpy(ret, ptr, size);
   2848 		else
   2849 			memcpy(ret, ptr, oldsize);
   2850 	} else {
   2851 		/* The old allocation is a region. */
   2852 		assert(oldsize < size);
   2853 		memcpy(ret, ptr, oldsize);
   2854 	}
   2855 	idalloc(ptr);
   2856 	return (ret);
   2857 }
   2858 
   2859 static void
   2860 huge_dalloc(void *ptr)
   2861 {
   2862 	chunk_node_t key;
   2863 	chunk_node_t *node;
   2864 
   2865 	malloc_mutex_lock(&chunks_mtx);
   2866 
   2867 	/* Extract from tree of huge allocations. */
   2868 	key.chunk = ptr;
   2869 	/* LINTED */
   2870 	node = RB_FIND(chunk_tree_s, &huge, &key);
   2871 	assert(node != NULL);
   2872 	assert(node->chunk == ptr);
   2873 	/* LINTED */
   2874 	RB_REMOVE(chunk_tree_s, &huge, node);
   2875 
   2876 #ifdef MALLOC_STATS
   2877 	huge_ndalloc++;
   2878 	huge_allocated -= node->size;
   2879 #endif
   2880 
   2881 	malloc_mutex_unlock(&chunks_mtx);
   2882 
   2883 	/* Unmap chunk. */
   2884 #ifdef USE_BRK
   2885 	if (opt_junk)
   2886 		memset(node->chunk, 0x5a, node->size);
   2887 #endif
   2888 	chunk_dealloc(node->chunk, node->size);
   2889 
   2890 	base_chunk_node_dealloc(node);
   2891 }
   2892 
   2893 static void *
   2894 imalloc(size_t size)
   2895 {
   2896 	void *ret;
   2897 
   2898 	assert(size != 0);
   2899 
   2900 	if (size <= arena_maxclass)
   2901 		ret = arena_malloc(choose_arena(), size);
   2902 	else
   2903 		ret = huge_malloc(size);
   2904 
   2905 	return (ret);
   2906 }
   2907 
   2908 static void *
   2909 ipalloc(size_t alignment, size_t size)
   2910 {
   2911 	void *ret;
   2912 	size_t ceil_size;
   2913 
   2914 	/*
   2915 	 * Round size up to the nearest multiple of alignment.
   2916 	 *
   2917 	 * This done, we can take advantage of the fact that for each small
   2918 	 * size class, every object is aligned at the smallest power of two
   2919 	 * that is non-zero in the base two representation of the size.  For
   2920 	 * example:
   2921 	 *
   2922 	 *   Size |   Base 2 | Minimum alignment
   2923 	 *   -----+----------+------------------
   2924 	 *     96 |  1100000 |  32
   2925 	 *    144 | 10100000 |  32
   2926 	 *    192 | 11000000 |  64
   2927 	 *
   2928 	 * Depending on runtime settings, it is possible that arena_malloc()
   2929 	 * will further round up to a power of two, but that never causes
   2930 	 * correctness issues.
   2931 	 */
   2932 	ceil_size = (size + (alignment - 1)) & (-alignment);
   2933 	/*
   2934 	 * (ceil_size < size) protects against the combination of maximal
   2935 	 * alignment and size greater than maximal alignment.
   2936 	 */
   2937 	if (ceil_size < size) {
   2938 		/* size_t overflow. */
   2939 		return (NULL);
   2940 	}
   2941 
   2942 	if (ceil_size <= pagesize || (alignment <= pagesize
   2943 	    && ceil_size <= arena_maxclass))
   2944 		ret = arena_malloc(choose_arena(), ceil_size);
   2945 	else {
   2946 		size_t run_size;
   2947 
   2948 		/*
   2949 		 * We can't achieve sub-page alignment, so round up alignment
   2950 		 * permanently; it makes later calculations simpler.
   2951 		 */
   2952 		alignment = PAGE_CEILING(alignment);
   2953 		ceil_size = PAGE_CEILING(size);
   2954 		/*
   2955 		 * (ceil_size < size) protects against very large sizes within
   2956 		 * pagesize of SIZE_T_MAX.
   2957 		 *
   2958 		 * (ceil_size + alignment < ceil_size) protects against the
   2959 		 * combination of maximal alignment and ceil_size large enough
   2960 		 * to cause overflow.  This is similar to the first overflow
   2961 		 * check above, but it needs to be repeated due to the new
   2962 		 * ceil_size value, which may now be *equal* to maximal
   2963 		 * alignment, whereas before we only detected overflow if the
   2964 		 * original size was *greater* than maximal alignment.
   2965 		 */
   2966 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
   2967 			/* size_t overflow. */
   2968 			return (NULL);
   2969 		}
   2970 
   2971 		/*
   2972 		 * Calculate the size of the over-size run that arena_palloc()
   2973 		 * would need to allocate in order to guarantee the alignment.
   2974 		 */
   2975 		if (ceil_size >= alignment)
   2976 			run_size = ceil_size + alignment - pagesize;
   2977 		else {
   2978 			/*
   2979 			 * It is possible that (alignment << 1) will cause
   2980 			 * overflow, but it doesn't matter because we also
   2981 			 * subtract pagesize, which in the case of overflow
   2982 			 * leaves us with a very large run_size.  That causes
   2983 			 * the first conditional below to fail, which means
   2984 			 * that the bogus run_size value never gets used for
   2985 			 * anything important.
   2986 			 */
   2987 			run_size = (alignment << 1) - pagesize;
   2988 		}
   2989 
   2990 		if (run_size <= arena_maxclass) {
   2991 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
   2992 			    run_size);
   2993 		} else if (alignment <= chunksize)
   2994 			ret = huge_malloc(ceil_size);
   2995 		else
   2996 			ret = huge_palloc(alignment, ceil_size);
   2997 	}
   2998 
   2999 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
   3000 	return (ret);
   3001 }
   3002 
   3003 static void *
   3004 icalloc(size_t size)
   3005 {
   3006 	void *ret;
   3007 
   3008 	if (size <= arena_maxclass) {
   3009 		ret = arena_malloc(choose_arena(), size);
   3010 		if (ret == NULL)
   3011 			return (NULL);
   3012 		memset(ret, 0, size);
   3013 	} else {
   3014 		/*
   3015 		 * The virtual memory system provides zero-filled pages, so
   3016 		 * there is no need to do so manually, unless opt_junk is
   3017 		 * enabled, in which case huge_malloc() fills huge allocations
   3018 		 * with junk.
   3019 		 */
   3020 		ret = huge_malloc(size);
   3021 		if (ret == NULL)
   3022 			return (NULL);
   3023 
   3024 		if (opt_junk)
   3025 			memset(ret, 0, size);
   3026 #ifdef USE_BRK
   3027 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
   3028 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
   3029 			/*
   3030 			 * This may be a re-used brk chunk.  Therefore, zero
   3031 			 * the memory.
   3032 			 */
   3033 			memset(ret, 0, size);
   3034 		}
   3035 #endif
   3036 	}
   3037 
   3038 	return (ret);
   3039 }
   3040 
   3041 static size_t
   3042 isalloc(const void *ptr)
   3043 {
   3044 	size_t ret;
   3045 	arena_chunk_t *chunk;
   3046 
   3047 	assert(ptr != NULL);
   3048 
   3049 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
   3050 	if (chunk != ptr) {
   3051 		/* Region. */
   3052 		assert(chunk->arena->magic == ARENA_MAGIC);
   3053 
   3054 		ret = arena_salloc(ptr);
   3055 	} else {
   3056 		chunk_node_t *node, key;
   3057 
   3058 		/* Chunk (huge allocation). */
   3059 
   3060 		malloc_mutex_lock(&chunks_mtx);
   3061 
   3062 		/* Extract from tree of huge allocations. */
   3063 		key.chunk = __DECONST(void *, ptr);
   3064 		/* LINTED */
   3065 		node = RB_FIND(chunk_tree_s, &huge, &key);
   3066 		assert(node != NULL);
   3067 
   3068 		ret = node->size;
   3069 
   3070 		malloc_mutex_unlock(&chunks_mtx);
   3071 	}
   3072 
   3073 	return (ret);
   3074 }
   3075 
   3076 static void *
   3077 iralloc(void *ptr, size_t size)
   3078 {
   3079 	void *ret;
   3080 	size_t oldsize;
   3081 
   3082 	assert(ptr != NULL);
   3083 	assert(size != 0);
   3084 
   3085 	oldsize = isalloc(ptr);
   3086 
   3087 	if (size <= arena_maxclass)
   3088 		ret = arena_ralloc(ptr, size, oldsize);
   3089 	else
   3090 		ret = huge_ralloc(ptr, size, oldsize);
   3091 
   3092 	return (ret);
   3093 }
   3094 
   3095 static void
   3096 idalloc(void *ptr)
   3097 {
   3098 	arena_chunk_t *chunk;
   3099 
   3100 	assert(ptr != NULL);
   3101 
   3102 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
   3103 	if (chunk != ptr) {
   3104 		/* Region. */
   3105 		arena_dalloc(chunk->arena, chunk, ptr);
   3106 	} else
   3107 		huge_dalloc(ptr);
   3108 }
   3109 
   3110 static void
   3111 malloc_print_stats(void)
   3112 {
   3113 
   3114 	if (opt_print_stats) {
   3115 		char s[UMAX2S_BUFSIZE];
   3116 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
   3117 		    "");
   3118 		_malloc_message("Assertions ",
   3119 #ifdef NDEBUG
   3120 		    "disabled",
   3121 #else
   3122 		    "enabled",
   3123 #endif
   3124 		    "\n", "");
   3125 		_malloc_message("Boolean MALLOC_OPTIONS: ",
   3126 		    opt_abort ? "A" : "a",
   3127 		    opt_junk ? "J" : "j",
   3128 		    opt_hint ? "H" : "h");
   3129 		_malloc_message(opt_utrace ? "PU" : "Pu",
   3130 		    opt_sysv ? "V" : "v",
   3131 		    opt_xmalloc ? "X" : "x",
   3132 		    opt_zero ? "Z\n" : "z\n");
   3133 
   3134 		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
   3135 		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
   3136 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
   3137 		    "\n", "");
   3138 		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
   3139 		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
   3140 		    "");
   3141 
   3142 		_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
   3143 		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
   3144 
   3145 #ifdef MALLOC_STATS
   3146 		{
   3147 			size_t allocated, mapped;
   3148 			unsigned i;
   3149 			arena_t *arena;
   3150 
   3151 			/* Calculate and print allocated/mapped stats. */
   3152 
   3153 			/* arenas. */
   3154 			for (i = 0, allocated = 0; i < narenas; i++) {
   3155 				if (arenas[i] != NULL) {
   3156 					malloc_mutex_lock(&arenas[i]->mtx);
   3157 					allocated +=
   3158 					    arenas[i]->stats.allocated_small;
   3159 					allocated +=
   3160 					    arenas[i]->stats.allocated_large;
   3161 					malloc_mutex_unlock(&arenas[i]->mtx);
   3162 				}
   3163 			}
   3164 
   3165 			/* huge/base. */
   3166 			malloc_mutex_lock(&chunks_mtx);
   3167 			allocated += huge_allocated;
   3168 			mapped = stats_chunks.curchunks * chunksize;
   3169 			malloc_mutex_unlock(&chunks_mtx);
   3170 
   3171 			malloc_mutex_lock(&base_mtx);
   3172 			mapped += base_mapped;
   3173 			malloc_mutex_unlock(&base_mtx);
   3174 
   3175 			malloc_printf("Allocated: %zu, mapped: %zu\n",
   3176 			    allocated, mapped);
   3177 
   3178 			/* Print chunk stats. */
   3179 			{
   3180 				chunk_stats_t chunks_stats;
   3181 
   3182 				malloc_mutex_lock(&chunks_mtx);
   3183 				chunks_stats = stats_chunks;
   3184 				malloc_mutex_unlock(&chunks_mtx);
   3185 
   3186 				malloc_printf("chunks: nchunks   "
   3187 				    "highchunks    curchunks\n");
   3188 				malloc_printf("  %13llu%13lu%13lu\n",
   3189 				    chunks_stats.nchunks,
   3190 				    chunks_stats.highchunks,
   3191 				    chunks_stats.curchunks);
   3192 			}
   3193 
   3194 			/* Print chunk stats. */
   3195 			malloc_printf(
   3196 			    "huge: nmalloc      ndalloc    allocated\n");
   3197 			malloc_printf(" %12llu %12llu %12zu\n",
   3198 			    huge_nmalloc, huge_ndalloc, huge_allocated
   3199 			    * chunksize);
   3200 
   3201 			/* Print stats for each arena. */
   3202 			for (i = 0; i < narenas; i++) {
   3203 				arena = arenas[i];
   3204 				if (arena != NULL) {
   3205 					malloc_printf(
   3206 					    "\narenas[%u] @ %p\n", i, arena);
   3207 					malloc_mutex_lock(&arena->mtx);
   3208 					stats_print(arena);
   3209 					malloc_mutex_unlock(&arena->mtx);
   3210 				}
   3211 			}
   3212 		}
   3213 #endif /* #ifdef MALLOC_STATS */
   3214 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
   3215 	}
   3216 }
   3217 
   3218 /*
   3219  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
   3220  * implementation has to take pains to avoid infinite recursion during
   3221  * initialization.
   3222  */
   3223 static inline bool
   3224 malloc_init(void)
   3225 {
   3226 
   3227 	if (malloc_initialized == false)
   3228 		return (malloc_init_hard());
   3229 
   3230 	return (false);
   3231 }
   3232 
   3233 static bool
   3234 malloc_init_hard(void)
   3235 {
   3236 	unsigned i, j;
   3237 	int linklen;
   3238 	char buf[PATH_MAX + 1];
   3239 	const char *opts = "";
   3240 
   3241 	malloc_mutex_lock(&init_lock);
   3242 	if (malloc_initialized) {
   3243 		/*
   3244 		 * Another thread initialized the allocator before this one
   3245 		 * acquired init_lock.
   3246 		 */
   3247 		malloc_mutex_unlock(&init_lock);
   3248 		return (false);
   3249 	}
   3250 
   3251 	/* Get number of CPUs. */
   3252 	{
   3253 		int mib[2];
   3254 		size_t len;
   3255 
   3256 		mib[0] = CTL_HW;
   3257 		mib[1] = HW_NCPU;
   3258 		len = sizeof(ncpus);
   3259 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
   3260 			/* Error. */
   3261 			ncpus = 1;
   3262 		}
   3263 	}
   3264 
   3265 	/* Get page size. */
   3266 	{
   3267 		long result;
   3268 
   3269 		result = sysconf(_SC_PAGESIZE);
   3270 		assert(result != -1);
   3271 		pagesize = (unsigned) result;
   3272 
   3273 		/*
   3274 		 * We assume that pagesize is a power of 2 when calculating
   3275 		 * pagesize_mask and pagesize_2pow.
   3276 		 */
   3277 		assert(((result - 1) & result) == 0);
   3278 		pagesize_mask = result - 1;
   3279 		pagesize_2pow = ffs((int)result) - 1;
   3280 	}
   3281 
   3282 	for (i = 0; i < 3; i++) {
   3283 		/* Get runtime configuration. */
   3284 		switch (i) {
   3285 		case 0:
   3286 			if ((linklen = readlink("/etc/malloc.conf", buf,
   3287 						sizeof(buf) - 1)) != -1) {
   3288 				/*
   3289 				 * Use the contents of the "/etc/malloc.conf"
   3290 				 * symbolic link's name.
   3291 				 */
   3292 				buf[linklen] = '\0';
   3293 				opts = buf;
   3294 			} else {
   3295 				/* No configuration specified. */
   3296 				buf[0] = '\0';
   3297 				opts = buf;
   3298 			}
   3299 			break;
   3300 		case 1:
   3301 			if (issetugid() == 0 && (opts =
   3302 			    getenv("MALLOC_OPTIONS")) != NULL) {
   3303 				/*
   3304 				 * Do nothing; opts is already initialized to
   3305 				 * the value of the MALLOC_OPTIONS environment
   3306 				 * variable.
   3307 				 */
   3308 			} else {
   3309 				/* No configuration specified. */
   3310 				buf[0] = '\0';
   3311 				opts = buf;
   3312 			}
   3313 			break;
   3314 		case 2:
   3315 			if (_malloc_options != NULL) {
   3316 			    /*
   3317 			     * Use options that were compiled into the program.
   3318 			     */
   3319 			    opts = _malloc_options;
   3320 			} else {
   3321 				/* No configuration specified. */
   3322 				buf[0] = '\0';
   3323 				opts = buf;
   3324 			}
   3325 			break;
   3326 		default:
   3327 			/* NOTREACHED */
   3328 			assert(false);
   3329 		}
   3330 
   3331 		for (j = 0; opts[j] != '\0'; j++) {
   3332 			switch (opts[j]) {
   3333 			case 'a':
   3334 				opt_abort = false;
   3335 				break;
   3336 			case 'A':
   3337 				opt_abort = true;
   3338 				break;
   3339 			case 'h':
   3340 				opt_hint = false;
   3341 				break;
   3342 			case 'H':
   3343 				opt_hint = true;
   3344 				break;
   3345 			case 'j':
   3346 				opt_junk = false;
   3347 				break;
   3348 			case 'J':
   3349 				opt_junk = true;
   3350 				break;
   3351 			case 'k':
   3352 				/*
   3353 				 * Chunks always require at least one header
   3354 				 * page, so chunks can never be smaller than
   3355 				 * two pages.
   3356 				 */
   3357 				if (opt_chunk_2pow > pagesize_2pow + 1)
   3358 					opt_chunk_2pow--;
   3359 				break;
   3360 			case 'K':
   3361 				/*
   3362 				 * There must be fewer pages in a chunk than
   3363 				 * can be recorded by the pos field of
   3364 				 * arena_chunk_map_t, in order to make POS_FREE
   3365 				 * special.
   3366 				 */
   3367 				if (opt_chunk_2pow - pagesize_2pow
   3368 				    < (sizeof(uint32_t) << 3) - 1)
   3369 					opt_chunk_2pow++;
   3370 				break;
   3371 			case 'n':
   3372 				opt_narenas_lshift--;
   3373 				break;
   3374 			case 'N':
   3375 				opt_narenas_lshift++;
   3376 				break;
   3377 			case 'p':
   3378 				opt_print_stats = false;
   3379 				break;
   3380 			case 'P':
   3381 				opt_print_stats = true;
   3382 				break;
   3383 			case 'q':
   3384 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
   3385 					opt_quantum_2pow--;
   3386 				break;
   3387 			case 'Q':
   3388 				if (opt_quantum_2pow < pagesize_2pow - 1)
   3389 					opt_quantum_2pow++;
   3390 				break;
   3391 			case 's':
   3392 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
   3393 					opt_small_max_2pow--;
   3394 				break;
   3395 			case 'S':
   3396 				if (opt_small_max_2pow < pagesize_2pow - 1)
   3397 					opt_small_max_2pow++;
   3398 				break;
   3399 			case 'u':
   3400 				opt_utrace = false;
   3401 				break;
   3402 			case 'U':
   3403 				opt_utrace = true;
   3404 				break;
   3405 			case 'v':
   3406 				opt_sysv = false;
   3407 				break;
   3408 			case 'V':
   3409 				opt_sysv = true;
   3410 				break;
   3411 			case 'x':
   3412 				opt_xmalloc = false;
   3413 				break;
   3414 			case 'X':
   3415 				opt_xmalloc = true;
   3416 				break;
   3417 			case 'z':
   3418 				opt_zero = false;
   3419 				break;
   3420 			case 'Z':
   3421 				opt_zero = true;
   3422 				break;
   3423 			default: {
   3424 				char cbuf[2];
   3425 
   3426 				cbuf[0] = opts[j];
   3427 				cbuf[1] = '\0';
   3428 				_malloc_message(_getprogname(),
   3429 				    ": (malloc) Unsupported character in "
   3430 				    "malloc options: '", cbuf, "'\n");
   3431 			}
   3432 			}
   3433 		}
   3434 	}
   3435 
   3436 	/* Take care to call atexit() only once. */
   3437 	if (opt_print_stats) {
   3438 		/* Print statistics at exit. */
   3439 		atexit(malloc_print_stats);
   3440 	}
   3441 
   3442 	/* Set variables according to the value of opt_small_max_2pow. */
   3443 	if (opt_small_max_2pow < opt_quantum_2pow)
   3444 		opt_small_max_2pow = opt_quantum_2pow;
   3445 	small_max = (1 << opt_small_max_2pow);
   3446 
   3447 	/* Set bin-related variables. */
   3448 	bin_maxclass = (pagesize >> 1);
   3449 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
   3450 	ntbins = opt_quantum_2pow - TINY_MIN_2POW;
   3451 	assert(ntbins <= opt_quantum_2pow);
   3452 	nqbins = (small_max >> opt_quantum_2pow);
   3453 	nsbins = pagesize_2pow - opt_small_max_2pow - 1;
   3454 
   3455 	/* Set variables according to the value of opt_quantum_2pow. */
   3456 	quantum = (1 << opt_quantum_2pow);
   3457 	quantum_mask = quantum - 1;
   3458 	if (ntbins > 0)
   3459 		small_min = (quantum >> 1) + 1;
   3460 	else
   3461 		small_min = 1;
   3462 	assert(small_min <= quantum);
   3463 
   3464 	/* Set variables according to the value of opt_chunk_2pow. */
   3465 	chunksize = (1LU << opt_chunk_2pow);
   3466 	chunksize_mask = chunksize - 1;
   3467 	chunk_npages = (chunksize >> pagesize_2pow);
   3468 	{
   3469 		unsigned header_size;
   3470 
   3471 		header_size = sizeof(arena_chunk_t) + (sizeof(arena_chunk_map_t)
   3472 		    * (chunk_npages - 1));
   3473 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
   3474 		if ((header_size & pagesize_mask) != 0)
   3475 			arena_chunk_header_npages++;
   3476 	}
   3477 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
   3478 	    pagesize_2pow);
   3479 
   3480 	UTRACE(0, 0, 0);
   3481 
   3482 #ifdef MALLOC_STATS
   3483 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
   3484 #endif
   3485 
   3486 	/* Various sanity checks that regard configuration. */
   3487 	assert(quantum >= sizeof(void *));
   3488 	assert(quantum <= pagesize);
   3489 	assert(chunksize >= pagesize);
   3490 	assert(quantum * 4 <= chunksize);
   3491 
   3492 	/* Initialize chunks data. */
   3493 	malloc_mutex_init(&chunks_mtx);
   3494 	RB_INIT(&huge);
   3495 #ifdef USE_BRK
   3496 	malloc_mutex_init(&brk_mtx);
   3497 	brk_base = sbrk(0);
   3498 	brk_prev = brk_base;
   3499 	brk_max = brk_base;
   3500 #endif
   3501 #ifdef MALLOC_STATS
   3502 	huge_nmalloc = 0;
   3503 	huge_ndalloc = 0;
   3504 	huge_allocated = 0;
   3505 #endif
   3506 	RB_INIT(&old_chunks);
   3507 
   3508 	/* Initialize base allocation data structures. */
   3509 #ifdef MALLOC_STATS
   3510 	base_mapped = 0;
   3511 #endif
   3512 #ifdef USE_BRK
   3513 	/*
   3514 	 * Allocate a base chunk here, since it doesn't actually have to be
   3515 	 * chunk-aligned.  Doing this before allocating any other chunks allows
   3516 	 * the use of space that would otherwise be wasted.
   3517 	 */
   3518 	base_pages_alloc(0);
   3519 #endif
   3520 	base_chunk_nodes = NULL;
   3521 	malloc_mutex_init(&base_mtx);
   3522 
   3523 	if (ncpus > 1) {
   3524 		/*
   3525 		 * For SMP systems, create four times as many arenas as there
   3526 		 * are CPUs by default.
   3527 		 */
   3528 		opt_narenas_lshift += 2;
   3529 	}
   3530 
   3531 #ifdef NO_TLS
   3532 	/* Initialize arena key. */
   3533 	(void)thr_keycreate(&arenas_map_key, NULL);
   3534 #endif
   3535 
   3536 	/* Determine how many arenas to use. */
   3537 	narenas = ncpus;
   3538 	if (opt_narenas_lshift > 0) {
   3539 		if ((narenas << opt_narenas_lshift) > narenas)
   3540 			narenas <<= opt_narenas_lshift;
   3541 		/*
   3542 		 * Make sure not to exceed the limits of what base_malloc()
   3543 		 * can handle.
   3544 		 */
   3545 		if (narenas * sizeof(arena_t *) > chunksize)
   3546 			narenas = chunksize / sizeof(arena_t *);
   3547 	} else if (opt_narenas_lshift < 0) {
   3548 		if ((narenas << opt_narenas_lshift) < narenas)
   3549 			narenas <<= opt_narenas_lshift;
   3550 		/* Make sure there is at least one arena. */
   3551 		if (narenas == 0)
   3552 			narenas = 1;
   3553 	}
   3554 
   3555 	next_arena = 0;
   3556 
   3557 	/* Allocate and initialize arenas. */
   3558 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
   3559 	if (arenas == NULL) {
   3560 		malloc_mutex_unlock(&init_lock);
   3561 		return (true);
   3562 	}
   3563 	/*
   3564 	 * Zero the array.  In practice, this should always be pre-zeroed,
   3565 	 * since it was just mmap()ed, but let's be sure.
   3566 	 */
   3567 	memset(arenas, 0, sizeof(arena_t *) * narenas);
   3568 
   3569 	/*
   3570 	 * Initialize one arena here.  The rest are lazily created in
   3571 	 * arena_choose_hard().
   3572 	 */
   3573 	arenas_extend(0);
   3574 	if (arenas[0] == NULL) {
   3575 		malloc_mutex_unlock(&init_lock);
   3576 		return (true);
   3577 	}
   3578 
   3579 	malloc_mutex_init(&arenas_mtx);
   3580 
   3581 	malloc_initialized = true;
   3582 	malloc_mutex_unlock(&init_lock);
   3583 	return (false);
   3584 }
   3585 
   3586 /*
   3587  * End general internal functions.
   3588  */
   3589 /******************************************************************************/
   3590 /*
   3591  * Begin malloc(3)-compatible functions.
   3592  */
   3593 
   3594 void *
   3595 malloc(size_t size)
   3596 {
   3597 	void *ret;
   3598 
   3599 	if (malloc_init()) {
   3600 		ret = NULL;
   3601 		goto RETURN;
   3602 	}
   3603 
   3604 	if (size == 0) {
   3605 		if (opt_sysv == false)
   3606 			size = 1;
   3607 		else {
   3608 			ret = NULL;
   3609 			goto RETURN;
   3610 		}
   3611 	}
   3612 
   3613 	ret = imalloc(size);
   3614 
   3615 RETURN:
   3616 	if (ret == NULL) {
   3617 		if (opt_xmalloc) {
   3618 			_malloc_message(_getprogname(),
   3619 			    ": (malloc) Error in malloc(): out of memory\n", "",
   3620 			    "");
   3621 			abort();
   3622 		}
   3623 		errno = ENOMEM;
   3624 	}
   3625 
   3626 	UTRACE(0, size, ret);
   3627 	return (ret);
   3628 }
   3629 
   3630 /* XXXAD */
   3631 int	posix_memalign(void **memptr, size_t alignment, size_t size);
   3632 
   3633 int
   3634 posix_memalign(void **memptr, size_t alignment, size_t size)
   3635 {
   3636 	int ret;
   3637 	void *result;
   3638 
   3639 	if (malloc_init())
   3640 		result = NULL;
   3641 	else {
   3642 		/* Make sure that alignment is a large enough power of 2. */
   3643 		if (((alignment - 1) & alignment) != 0
   3644 		    || alignment < sizeof(void *)) {
   3645 			if (opt_xmalloc) {
   3646 				_malloc_message(_getprogname(),
   3647 				    ": (malloc) Error in posix_memalign(): "
   3648 				    "invalid alignment\n", "", "");
   3649 				abort();
   3650 			}
   3651 			result = NULL;
   3652 			ret = EINVAL;
   3653 			goto RETURN;
   3654 		}
   3655 
   3656 		result = ipalloc(alignment, size);
   3657 	}
   3658 
   3659 	if (result == NULL) {
   3660 		if (opt_xmalloc) {
   3661 			_malloc_message(_getprogname(),
   3662 			": (malloc) Error in posix_memalign(): out of memory\n",
   3663 			"", "");
   3664 			abort();
   3665 		}
   3666 		ret = ENOMEM;
   3667 		goto RETURN;
   3668 	}
   3669 
   3670 	*memptr = result;
   3671 	ret = 0;
   3672 
   3673 RETURN:
   3674 	UTRACE(0, size, result);
   3675 	return (ret);
   3676 }
   3677 
   3678 void *
   3679 calloc(size_t num, size_t size)
   3680 {
   3681 	void *ret;
   3682 	size_t num_size;
   3683 
   3684 	if (malloc_init()) {
   3685 		num_size = 0;
   3686 		ret = NULL;
   3687 		goto RETURN;
   3688 	}
   3689 
   3690 	num_size = num * size;
   3691 	if (num_size == 0) {
   3692 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
   3693 			num_size = 1;
   3694 		else {
   3695 			ret = NULL;
   3696 			goto RETURN;
   3697 		}
   3698 	/*
   3699 	 * Try to avoid division here.  We know that it isn't possible to
   3700 	 * overflow during multiplication if neither operand uses any of the
   3701 	 * most significant half of the bits in a size_t.
   3702 	 */
   3703 	} else if ((unsigned long long)((num | size) &
   3704 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
   3705 	   (num_size / size != num)) {
   3706 		/* size_t overflow. */
   3707 		ret = NULL;
   3708 		goto RETURN;
   3709 	}
   3710 
   3711 	ret = icalloc(num_size);
   3712 
   3713 RETURN:
   3714 	if (ret == NULL) {
   3715 		if (opt_xmalloc) {
   3716 			_malloc_message(_getprogname(),
   3717 			    ": (malloc) Error in calloc(): out of memory\n", "",
   3718 			    "");
   3719 			abort();
   3720 		}
   3721 		errno = ENOMEM;
   3722 	}
   3723 
   3724 	UTRACE(0, num_size, ret);
   3725 	return (ret);
   3726 }
   3727 
   3728 void *
   3729 realloc(void *ptr, size_t size)
   3730 {
   3731 	void *ret;
   3732 
   3733 	if (size == 0) {
   3734 		if (opt_sysv == false)
   3735 			size = 1;
   3736 		else {
   3737 			if (ptr != NULL)
   3738 				idalloc(ptr);
   3739 			ret = NULL;
   3740 			goto RETURN;
   3741 		}
   3742 	}
   3743 
   3744 	if (ptr != NULL) {
   3745 		assert(malloc_initialized);
   3746 
   3747 		ret = iralloc(ptr, size);
   3748 
   3749 		if (ret == NULL) {
   3750 			if (opt_xmalloc) {
   3751 				_malloc_message(_getprogname(),
   3752 				    ": (malloc) Error in realloc(): out of "
   3753 				    "memory\n", "", "");
   3754 				abort();
   3755 			}
   3756 			errno = ENOMEM;
   3757 		}
   3758 	} else {
   3759 		if (malloc_init())
   3760 			ret = NULL;
   3761 		else
   3762 			ret = imalloc(size);
   3763 
   3764 		if (ret == NULL) {
   3765 			if (opt_xmalloc) {
   3766 				_malloc_message(_getprogname(),
   3767 				    ": (malloc) Error in realloc(): out of "
   3768 				    "memory\n", "", "");
   3769 				abort();
   3770 			}
   3771 			errno = ENOMEM;
   3772 		}
   3773 	}
   3774 
   3775 RETURN:
   3776 	UTRACE(ptr, size, ret);
   3777 	return (ret);
   3778 }
   3779 
   3780 void
   3781 free(void *ptr)
   3782 {
   3783 
   3784 	UTRACE(ptr, 0, 0);
   3785 	if (ptr != NULL) {
   3786 		assert(malloc_initialized);
   3787 
   3788 		idalloc(ptr);
   3789 	}
   3790 }
   3791 
   3792 /*
   3793  * End malloc(3)-compatible functions.
   3794  */
   3795 /******************************************************************************/
   3796 /*
   3797  * Begin non-standard functions.
   3798  */
   3799 #ifndef __NetBSD__
   3800 size_t
   3801 malloc_usable_size(const void *ptr)
   3802 {
   3803 
   3804 	assert(ptr != NULL);
   3805 
   3806 	return (isalloc(ptr));
   3807 }
   3808 #endif
   3809 
   3810 /*
   3811  * End non-standard functions.
   3812  */
   3813 /******************************************************************************/
   3814 /*
   3815  * Begin library-private functions, used by threading libraries for protection
   3816  * of malloc during fork().  These functions are only called if the program is
   3817  * running in threaded mode, so there is no need to check whether the program
   3818  * is threaded here.
   3819  */
   3820 
   3821 void
   3822 _malloc_prefork(void)
   3823 {
   3824 	unsigned i;
   3825 
   3826 	/* Acquire all mutexes in a safe order. */
   3827 
   3828 	malloc_mutex_lock(&arenas_mtx);
   3829 	for (i = 0; i < narenas; i++) {
   3830 		if (arenas[i] != NULL)
   3831 			malloc_mutex_lock(&arenas[i]->mtx);
   3832 	}
   3833 	malloc_mutex_unlock(&arenas_mtx);
   3834 
   3835 	malloc_mutex_lock(&base_mtx);
   3836 
   3837 	malloc_mutex_lock(&chunks_mtx);
   3838 }
   3839 
   3840 void
   3841 _malloc_postfork(void)
   3842 {
   3843 	unsigned i;
   3844 
   3845 	/* Release all mutexes, now that fork() has completed. */
   3846 
   3847 	malloc_mutex_unlock(&chunks_mtx);
   3848 
   3849 	malloc_mutex_unlock(&base_mtx);
   3850 
   3851 	malloc_mutex_lock(&arenas_mtx);
   3852 	for (i = 0; i < narenas; i++) {
   3853 		if (arenas[i] != NULL)
   3854 			malloc_mutex_unlock(&arenas[i]->mtx);
   3855 	}
   3856 	malloc_mutex_unlock(&arenas_mtx);
   3857 }
   3858 
   3859 /*
   3860  * End library-private functions.
   3861  */
   3862 /******************************************************************************/
   3863