Home | History | Annotate | Line # | Download | only in ld.elf_so
      1 /*	$NetBSD: tls.c,v 1.29 2026/03/15 13:56:17 skrll Exp $	*/
      2 /*-
      3  * Copyright (c) 2011 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This code is derived from software contributed to The NetBSD Foundation
      7  * by Joerg Sonnenberger.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include <sys/cdefs.h>
     32 __RCSID("$NetBSD: tls.c,v 1.29 2026/03/15 13:56:17 skrll Exp $");
     33 
     34 /*
     35  * Thread-local storage
     36  *
     37  * Reference:
     38  *
     39  *	[ELFTLS] Ulrich Drepper, `ELF Handling For Thread-Local
     40  *	Storage', Version 0.21, 2023-08-22.
     41  *	https://akkadia.org/drepper/tls.pdf
     42  *	https://web.archive.org/web/20240718081934/https://akkadia.org/drepper/tls.pdf
     43  */
     44 
     45 #include <sys/param.h>
     46 #include <sys/ucontext.h>
     47 #include <lwp.h>
     48 #include <stdalign.h>
     49 #include <stddef.h>
     50 #include <string.h>
     51 #include "debug.h"
     52 #include "rtld.h"
     53 
     54 #include <machine/lwp_private.h>
     55 
     56 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
     57 
     58 static struct tls_tcb *_rtld_tls_allocate_locked(void);
     59 static void *_rtld_tls_module_allocate(struct tls_tcb *, size_t);
     60 
     61 /* A macro to test correct alignment of a pointer. */
     62 #define ALIGNED_P(ptr, algnmt)	((algnmt) == 0 || ((uintptr_t)(ptr) & ((algnmt) - 1)) == 0)
     63 
     64 /*
     65  * DTV offset
     66  *
     67  *	On some architectures (m68k, mips, or1k, powerpc, and riscv),
     68  *	the DTV offsets passed to __tls_get_addr have a bias relative
     69  *	to the start of the DTV, in order to maximize the range of TLS
     70  *	offsets that can be used by instruction encodings with signed
     71  *	displacements.
     72  */
     73 #ifndef TLS_DTV_OFFSET
     74 #define	TLS_DTV_OFFSET	0
     75 #endif
     76 
     77 /*
     78  * Alignment of the static data
     79  *
     80  * In Variant I, the thread pointer (TP) can be anchored in three ways
     81  * depending on the architecture. It either points
     82  *
     83  * - directly to the TCB (e.g. Arm, and AArch64); or
     84  * - at a specific "biased" offset within the data (e.g. PowerPC), or
     85  *   directly at the data (e.g. RISC-V). These architectures define
     86  *   __HAVE___LWP_SETTCB to handle the offset
     87  *
     88  * An area of memory with the correct alignment is allocated and the
     89  * struct tcb placed as follows for each case above
     90  *
     91  * - at the start of the aligned memory with data starting at the first
     92  *   object's required alignment.
     93  * - just below the second max alignment boundary so that data starts
     94  *   on the second max alignment boundary.
     95  *
     96  * The code is written such that obj->tlsoffset is always relative to
     97  * the end of the struct tcb. Maybe this is suboptimal?
     98  *
     99  */
    100 
    101 #if defined(__HAVE_TLS_VARIANT_I) && !defined(__HAVE___LWP_SETTCB)
    102 #define _RTLD_TLS_INITIAL_OFFSET	sizeof(struct tls_tcb)
    103 #endif
    104 
    105 #ifndef _RTLD_TLS_INITIAL_OFFSET
    106 #define _RTLD_TLS_INITIAL_OFFSET		0
    107 #endif
    108 
    109 static size_t _rtld_tls_static_space;	/* Static TLS space allocated */
    110 static size_t _rtld_tls_static_offset =
    111 	_RTLD_TLS_INITIAL_OFFSET;	/* Next offset for static TLS to use */
    112 static size_t _rtld_tls_static_max_align;
    113 
    114 size_t _rtld_tls_dtv_generation = 1;	/* Bumped on each load of obj w/ TLS */
    115 size_t _rtld_tls_max_index = 1;		/* Max index into up-to-date DTV */
    116 
    117 /*
    118  * DTV -- Dynamic Thread Vector
    119  *
    120  *	The DTV is a per-thread array that maps each module with
    121  *	thread-local storage to a pointer into part of the thread's TCB
    122  *	(thread control block), or dynamically loaded TLS blocks,
    123  *	reserved for that module's storage.
    124  *
    125  *	The TCB itself, struct tls_tcb, has a pointer to the DTV at
    126  *	tcb->tcb_dtv.
    127  *
    128  *	The layout is:
    129  *
    130  *		+---------------+
    131  *		| max index     | -1    max index i for which dtv[i] is alloced
    132  *		+---------------+
    133  *		| generation    |  0    void **dtv points here
    134  *		+---------------+
    135  *		| obj 1 tls ptr |  1    TLS pointer for obj w/ obj->tlsindex 1
    136  *		+---------------+
    137  *		| obj 2 tls ptr |  2    TLS pointer for obj w/ obj->tlsindex 2
    138  *		+---------------+
    139  *		  .
    140  *		  .
    141  *		  .
    142  *
    143  *	The values of obj->tlsindex start at 1; this way,
    144  *	dtv[obj->tlsindex] works, when dtv[0] is the generation.  The
    145  *	TLS pointers go either into the static thread-local storage,
    146  *	for the initial objects (i.e., those loaded at startup), or
    147  *	into TLS blocks dynamically allocated for objects that
    148  *	dynamically loaded by dlopen.
    149  *
    150  *	The generation field is a cache of the global generation number
    151  *	_rtld_tls_dtv_generation, which is bumped every time an object
    152  *	with TLS is loaded in _rtld_map_object, and cached by
    153  *	__tls_get_addr (via _rtld_tls_get_addr) when a newly loaded
    154  *	module lies outside the bounds of the current DTV.
    155  *
    156  *	XXX Why do we keep max index and generation separately?  They
    157  *	appear to be initialized the same, always incremented together,
    158  *	and always stored together.
    159  *
    160  *	XXX Why is this not a struct?
    161  *
    162  *		struct dtv {
    163  *			size_t	dtv_gen;
    164  *			void	*dtv_module[];
    165  *		};
    166  */
    167 #define	DTV_GENERATION(dtv)		((size_t)((dtv)[0]))
    168 #define	DTV_MAX_INDEX(dtv)		((size_t)((dtv)[-1]))
    169 #define	SET_DTV_GENERATION(dtv, val)	(dtv)[0] = (void *)(size_t)(val)
    170 #define	SET_DTV_MAX_INDEX(dtv, val)	(dtv)[-1] = (void *)(size_t)(val)
    171 
    172 /*
    173  * _rtld_tls_get_addr(tcb, idx, offset)
    174  *
    175  *	Slow path for __tls_get_addr (see below), called to allocate
    176  *	TLS space if needed for the object obj with obj->tlsindex idx,
    177  *	at offset, which must be below obj->tlssize.
    178  *
    179  *	This may allocate a DTV if the current one is too old, and it
    180  *	may allocate a dynamically loaded TLS block if there isn't one
    181  *	already allocated for it.
    182  *
    183  *	XXX Why is the first argument passed as `void *tls' instead of
    184  *	just `struct tls_tcb *tcb'?
    185  */
    186 void *
    187 _rtld_tls_get_addr(void *tls, size_t idx, size_t offset)
    188 {
    189 	struct tls_tcb *tcb = tls;
    190 	void **dtv, **new_dtv;
    191 	sigset_t mask;
    192 
    193 	_rtld_exclusive_enter(&mask);
    194 
    195 	dtv = tcb->tcb_dtv;
    196 
    197 	/*
    198 	 * If the generation number has changed, we have to allocate a
    199 	 * new DTV.
    200 	 *
    201 	 * XXX Do we really?  Isn't it enough to check whether idx <=
    202 	 * DTV_MAX_INDEX(dtv)?
    203 	 */
    204 	if (__predict_false(DTV_GENERATION(dtv) != _rtld_tls_dtv_generation)) {
    205 		size_t to_copy = DTV_MAX_INDEX(dtv);
    206 
    207 		/*
    208 		 * "2 +" because the first element is the generation and
    209 		 * the second one is the maximum index.
    210 		 */
    211 		new_dtv = xcalloc((2 + _rtld_tls_max_index) * sizeof(*dtv));
    212 		++new_dtv;		/* advance past DTV_MAX_INDEX */
    213 		if (to_copy > _rtld_tls_max_index)	/* XXX How? */
    214 			to_copy = _rtld_tls_max_index;
    215 		memcpy(new_dtv + 1, dtv + 1, to_copy * sizeof(*dtv));
    216 		xfree(dtv - 1);		/* retreat back to DTV_MAX_INDEX */
    217 		dtv = tcb->tcb_dtv = new_dtv;
    218 		SET_DTV_MAX_INDEX(dtv, _rtld_tls_max_index);
    219 		SET_DTV_GENERATION(dtv, _rtld_tls_dtv_generation);
    220 	}
    221 
    222 	if (__predict_false(dtv[idx] == NULL))
    223 		dtv[idx] = _rtld_tls_module_allocate(tcb, idx);
    224 
    225 	_rtld_exclusive_exit(&mask);
    226 
    227 	return (uint8_t *)dtv[idx] + offset;
    228 }
    229 
    230 /*
    231  * _rtld_tls_initial_allocation()
    232  *
    233  *	Allocate the TCB (thread control block) for the initial thread,
    234  *	once the static TLS space usage has been determined (plus some
    235  *	slop to allow certain special cases like Mesa to be dlopened).
    236  *
    237  *	This must be done _after_ all initial objects (i.e., those
    238  *	loaded at startup, as opposed to objects dynamically loaded by
    239  *	dlopen) have had TLS offsets allocated if need be by
    240  *	_rtld_tls_offset_allocate, and have had relocations processed.
    241  */
    242 void
    243 _rtld_tls_initial_allocation(void)
    244 {
    245 	struct tls_tcb *tcb;
    246 
    247 	_rtld_tls_static_space = _rtld_tls_static_offset +
    248 	    RTLD_STATIC_TLS_RESERVATION;
    249 
    250 #ifndef __HAVE_TLS_VARIANT_I
    251 	_rtld_tls_static_space = roundup2(_rtld_tls_static_space,
    252 	    alignof(max_align_t));
    253 
    254 #ifdef __HAVE___LWP_SETTCB
    255 	if (_rtld_tls_static_max_align > sizeof(struct tls_tcb))
    256 		_rtld_tls_static_space +=
    257 		     _rtld_tls_static_max_align - sizeof(struct tls_tcb);
    258 #endif
    259 
    260 #endif
    261 	dbg(("_rtld_tls_static_space %zu", _rtld_tls_static_space));
    262 
    263 	tcb = _rtld_tls_allocate_locked();
    264 #ifdef __HAVE___LWP_SETTCB
    265 	__lwp_settcb(tcb);
    266 #else
    267 	_lwp_setprivate(tcb);
    268 #endif
    269 }
    270 
    271 /*
    272  * _rtld_tls_allocate_locked()
    273  *
    274  *	Internal subroutine to allocate a TCB (thread control block)
    275  *	for the current thread.
    276  *
    277  *	This allocates a DTV and a TCB that points to it, including
    278  *	static space in the TCB for the TLS of the initial objects.
    279  *	TLS blocks for dynamically loaded objects are allocated lazily.
    280  *
    281  *	Caller must either be single-threaded (at startup via
    282  *	_rtld_tls_initial_allocation) or hold the rtld exclusive lock
    283  *	(via _rtld_tls_allocate).
    284  */
    285 static struct tls_tcb *
    286 _rtld_tls_allocate_locked(void)
    287 {
    288 	Obj_Entry *obj;
    289 	struct tls_tcb *tcb;
    290 	uint8_t *p, *q;
    291 
    292 	p = xmalloc_aligned(_rtld_tls_static_space + sizeof(struct tls_tcb),
    293 	    _rtld_tls_static_max_align, 0);
    294 
    295 	memset(p, 0, _rtld_tls_static_space + sizeof(struct tls_tcb));
    296 #ifdef __HAVE_TLS_VARIANT_I
    297 #ifdef __HAVE___LWP_SETTCB
    298 	if (_rtld_tls_static_max_align > sizeof(struct tls_tcb))
    299 		p += _rtld_tls_static_max_align - sizeof(struct tls_tcb);
    300 #endif
    301 	tcb = (struct tls_tcb *)p;
    302 	p += sizeof(struct tls_tcb);
    303 #else
    304 	p += _rtld_tls_static_space;
    305 	tcb = (struct tls_tcb *)p;
    306 	tcb->tcb_self = tcb;
    307 #endif
    308 	dbg(("lwp %d tls tcb %p", _lwp_self(), tcb));
    309 	/*
    310 	 * "2 +" because the first element is the generation and the second
    311 	 * one is the maximum index.
    312 	 */
    313 	tcb->tcb_dtv = xcalloc(sizeof(*tcb->tcb_dtv) * (2 + _rtld_tls_max_index));
    314 	++tcb->tcb_dtv;		/* advance past DTV_MAX_INDEX */
    315 	SET_DTV_MAX_INDEX(tcb->tcb_dtv, _rtld_tls_max_index);
    316 	SET_DTV_GENERATION(tcb->tcb_dtv, _rtld_tls_dtv_generation);
    317 
    318 	for (obj = _rtld_objlist; obj != NULL; obj = obj->next) {
    319 		if (obj->tls_static) {
    320 #ifdef __HAVE_TLS_VARIANT_I
    321 			q = p + obj->tlsoffset;
    322 #else
    323 			q = p - obj->tlsoffset;
    324 #endif
    325 			dbg(("%s: [lwp %d] tls dtv %p-%p index %zu "
    326 			    "offset %zx alignment %zx tlsinit %p%s",
    327 			    obj->path, _lwp_self(),
    328 			    q, q + obj->tlsinitsize, obj->tlsindex,
    329 			    obj->tlsoffset, obj->tlsalign, obj->tlsinit,
    330 			    ALIGNED_P(q, obj->tlsalign) ? "" :
    331 				 " BAD ALIGNMENT"));
    332 
    333 			assert(ALIGNED_P(q, obj->tlsalign));
    334 
    335 			if (obj->tlsinitsize)
    336 				memcpy(q, obj->tlsinit, obj->tlsinitsize);
    337 			tcb->tcb_dtv[obj->tlsindex] = q;
    338 		}
    339 	}
    340 
    341 	return tcb;
    342 }
    343 
    344 /*
    345  * _rtld_tls_allocate()
    346  *
    347  *	Allocate a TCB (thread control block) for the current thread.
    348  *
    349  *	Called by pthread_create for non-initial threads.  (The initial
    350  *	thread's TCB is allocated by _rtld_tls_initial_allocation.)
    351  */
    352 struct tls_tcb *
    353 _rtld_tls_allocate(void)
    354 {
    355 	struct tls_tcb *tcb;
    356 	sigset_t mask;
    357 
    358 	_rtld_exclusive_enter(&mask);
    359 	tcb = _rtld_tls_allocate_locked();
    360 	_rtld_exclusive_exit(&mask);
    361 
    362 	return tcb;
    363 }
    364 
    365 /*
    366  * _rtld_tls_free(tcb)
    367  *
    368  *	Free a TCB allocated with _rtld_tls_allocate.
    369  *
    370  *	Frees any TLS blocks for dynamically loaded objects that tcb's
    371  *	DTV points to, and frees tcb's DTV, and frees tcb.
    372  */
    373 void
    374 _rtld_tls_free(struct tls_tcb *tcb)
    375 {
    376 	size_t i, max_index;
    377 	uint8_t *p, *p_end;
    378 	sigset_t mask;
    379 
    380 	_rtld_exclusive_enter(&mask);
    381 
    382 #ifdef __HAVE_TLS_VARIANT_I
    383 	p = (uint8_t *)tcb;
    384 #else
    385 	p = (uint8_t *)tcb - _rtld_tls_static_space;
    386 #endif
    387 	p_end = p + _rtld_tls_static_space;
    388 
    389 	max_index = DTV_MAX_INDEX(tcb->tcb_dtv);
    390 	for (i = 1; i <= max_index; ++i) {
    391 		if ((uint8_t *)tcb->tcb_dtv[i] < p ||
    392 		    (uint8_t *)tcb->tcb_dtv[i] >= p_end)
    393 			xfree(tcb->tcb_dtv[i]);
    394 	}
    395 	xfree(tcb->tcb_dtv - 1);	/* retreat back to DTV_MAX_INDEX */
    396 	xfree(p);
    397 
    398 	_rtld_exclusive_exit(&mask);
    399 }
    400 
    401 /*
    402  * _rtld_tls_module_allocate(tcb, idx)
    403  *
    404  *	Allocate thread-local storage in the thread with the given TCB
    405  *	(thread control block) for the object obj whose obj->tlsindex
    406  *	is idx.
    407  *
    408  *	If obj has had space in static TLS reserved (obj->tls_static),
    409  *	return a pointer into that.  Otherwise, allocate a TLS block,
    410  *	mark obj as having a TLS block allocated (obj->tls_dynamic),
    411  *	and return it.
    412  *
    413  *	Called by _rtld_tls_get_addr to get the thread-local storage
    414  *	for an object the first time around.
    415  */
    416 static void *
    417 _rtld_tls_module_allocate(struct tls_tcb *tcb, size_t idx)
    418 {
    419 	Obj_Entry *obj;
    420 	uint8_t *p;
    421 
    422 	for (obj = _rtld_objlist; obj != NULL; obj = obj->next) {
    423 		if (obj->tlsindex == idx)
    424 			break;
    425 	}
    426 	if (obj == NULL) {
    427 		_rtld_error("Module for TLS index %zu missing", idx);
    428 		_rtld_die();
    429 	}
    430 	if (obj->tls_static) {
    431 #ifdef __HAVE_TLS_VARIANT_I
    432 		p = (uint8_t *)tcb + obj->tlsoffset + sizeof(struct tls_tcb);
    433 #else
    434 		p = (uint8_t *)tcb - obj->tlsoffset;
    435 #endif
    436 		return p;
    437 	}
    438 
    439 	p = xmalloc_aligned(obj->tlssize, obj->tlsalign, 0);
    440 	memcpy(p, obj->tlsinit, obj->tlsinitsize);
    441 	memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize);
    442 
    443 	obj->tls_dynamic = 1;
    444 
    445 	return p;
    446 }
    447 
    448 /*
    449  * _rtld_tls_offset_allocate(obj)
    450  *
    451  *	Allocate a static thread-local storage offset for obj.
    452  *
    453  *	Called by _rtld at startup for all initial objects.  Called
    454  *	also by MD relocation logic, which is allowed (for Mesa) to
    455  *	allocate an additional 64 bytes (RTLD_STATIC_TLS_RESERVATION)
    456  *	of static thread-local storage in dlopened objects.
    457  */
    458 int
    459 _rtld_tls_offset_allocate(Obj_Entry *obj)
    460 {
    461 	size_t offset, next_offset;
    462 
    463 	if (obj->tls_dynamic)
    464 		return -1;
    465 
    466 	if (obj->tls_static)
    467 		return 0;
    468 
    469 	if (obj->tlssize == 0) {
    470 		obj->tlsoffset = 0;
    471 		obj->tls_static = 1;
    472 		return 0;
    473 	}
    474 
    475 #ifdef __HAVE_TLS_VARIANT_I
    476 	offset = roundup2(_rtld_tls_static_offset, obj->tlsalign);
    477 	next_offset = offset + obj->tlssize;
    478 #ifndef __HAVE___LWP_GETTCB_FAST
    479 	offset -= sizeof(struct tls_tcb);
    480 #endif
    481 #else
    482 	offset = roundup2(_rtld_tls_static_offset + obj->tlssize,
    483 	    obj->tlsalign);
    484 	next_offset = offset;
    485 #endif
    486 
    487 	/*
    488 	 * Check if the static allocation was already done.
    489 	 * This happens if dynamically loaded modules want to use
    490 	 * static TLS space.
    491 	 *
    492 	 * XXX Keep an actual free list and callbacks for initialisation.
    493 	 */
    494 	if (_rtld_tls_static_space) {
    495 		if (obj->tlsinitsize) {
    496 			_rtld_error("%s: Use of initialized "
    497 			    "Thread Local Storage with model initial-exec "
    498 			    "and dlopen is not supported",
    499 			    obj->path);
    500 			return -1;
    501 		}
    502 		if (next_offset > _rtld_tls_static_space) {
    503 			_rtld_error("%s: No space available "
    504 			    "for static Thread Local Storage",
    505 			    obj->path);
    506 			return -1;
    507 		}
    508 	}
    509 	if (obj->tlsalign > _rtld_tls_static_max_align) {
    510 		_rtld_tls_static_max_align = obj->tlsalign;
    511 	}
    512 	obj->tlsoffset = offset;
    513 	dbg(("%s: static tls offset 0x%zx size %zu align %zu (%zx/%zx)",
    514 	    obj->path, obj->tlsoffset, obj->tlssize, obj->tlsalign,
    515 	    _rtld_tls_static_offset, next_offset));
    516 	_rtld_tls_static_offset = next_offset;
    517 	obj->tls_static = 1;
    518 
    519 	return 0;
    520 }
    521 
    522 /*
    523  * _rtld_tls_offset_free(obj)
    524  *
    525  *	Free a static thread-local storage offset for obj.
    526  *
    527  *	Called by dlclose (via _rtld_unload_object -> _rtld_obj_free).
    528  *
    529  *	Since static thread-local storage is normally not used by
    530  *	dlopened objects (with the exception of Mesa), this doesn't do
    531  *	anything to recycle the space right now.
    532  */
    533 void
    534 _rtld_tls_offset_free(Obj_Entry *obj)
    535 {
    536 
    537 	/*
    538 	 * XXX See above.
    539 	 */
    540 	obj->tls_static = 0;
    541 	return;
    542 }
    543 
    544 #if defined(__HAVE_COMMON___TLS_GET_ADDR) && defined(RTLD_LOADER)
    545 /*
    546  * __tls_get_addr(tlsindex)
    547  *
    548  *	Symbol directly called by code generated by the compiler for
    549  *	references thread-local storage in the general-dynamic or
    550  *	local-dynamic TLS models (but not initial-exec or local-exec).
    551  *
    552  *	The argument is a pointer to
    553  *
    554  *		struct {
    555  *			unsigned long int ti_module;
    556  *			unsigned long int ti_offset;
    557  *		};
    558  *
    559  *	 as in, e.g., [ELFTLS] Sec. 3.4.3.  This coincides with the
    560  *	 type size_t[2] on all architectures that use this common
    561  *	 __tls_get_addr definition (XXX but why do we write it as
    562  *	 size_t[2]?).
    563  *
    564  *	 ti_module, i.e., arg[0], is the obj->tlsindex assigned at
    565  *	 load-time by _rtld_map_object, and ti_offset, i.e., arg[1], is
    566  *	 assigned at link-time by ld(1), possibly adjusted by
    567  *	 TLS_DTV_OFFSET.
    568  *
    569  *	 Some architectures -- specifically IA-64 -- use a different
    570  *	 calling convention.  Some architectures -- specifically i386
    571  *	 -- also use another entry point ___tls_get_addr (that's three
    572  *	 leading underscores) with a different calling convention.
    573  */
    574 void *
    575 __tls_get_addr(void *arg_)
    576 {
    577 	size_t *arg = (size_t *)arg_;
    578 	void **dtv;
    579 #ifdef __HAVE___LWP_GETTCB_FAST
    580 	struct tls_tcb * const tcb = __lwp_gettcb_fast();
    581 #else
    582 	struct tls_tcb * const tcb = __lwp_getprivate_fast();
    583 #endif
    584 	size_t idx = arg[0], offset = arg[1] + TLS_DTV_OFFSET;
    585 
    586 	dtv = tcb->tcb_dtv;
    587 
    588 	/*
    589 	 * Fast path: access to an already allocated DTV entry.  This
    590 	 * checks the current limit and the entry without needing any
    591 	 * locking.  Entries are only freed on dlclose() and it is an
    592 	 * application bug if code of the module is still running at
    593 	 * that point.
    594 	 */
    595 	if (__predict_true(idx <= DTV_MAX_INDEX(dtv) && dtv[idx] != NULL))
    596 		return (uint8_t *)dtv[idx] + offset;
    597 
    598 	return _rtld_tls_get_addr(tcb, idx, offset);
    599 }
    600 #endif
    601 
    602 #endif /* __HAVE_TLS_VARIANT_I || __HAVE_TLS_VARIANT_II */
    603