1 /* $NetBSD: libelf_ar.c,v 1.8 2025/12/25 18:58:13 jkoshy Exp $ */ 2 3 /*- 4 * Copyright (c) 2006,2008,2010 Joseph Koshy 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS `AS IS' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #if HAVE_NBTOOL_CONFIG_H 30 # include "nbtool_config.h" 31 #endif 32 33 #include <sys/cdefs.h> 34 35 #include <assert.h> 36 #include <ctype.h> 37 #include <libelf.h> 38 #include <stdlib.h> 39 #include <string.h> 40 41 #include "_libelf.h" 42 #include "_libelf_ar.h" 43 44 ELFTC_VCSID("Id: libelf_ar.c 4252 2025-10-18 19:34:13Z jkoshy"); 45 46 __RCSID("$NetBSD: libelf_ar.c,v 1.8 2025/12/25 18:58:13 jkoshy Exp $"); 47 48 #define LIBELF_NALLOC_SIZE 16 49 50 /* 51 * `ar' archive handling. 52 * 53 * `ar' archives start with signature `ARMAG'. Each archive member is 54 * preceded by a header containing meta-data for the member. This 55 * header is described in <ar.h> (struct ar_hdr). The header always 56 * starts on an even address. File data is padded with "\n" 57 * characters to keep this invariant. 58 * 59 * Special considerations for `ar' archives: 60 * 61 * There are two variants of the `ar' archive format: traditional BSD 62 * and SVR4. These differ in the way long file names are treated, and 63 * in the layout of the archive symbol table. 64 * 65 * The `ar' header only has space for a 16 character file name. 66 * 67 * In the SVR4 format, file names are terminated with a '/', so this 68 * effectively leaves 15 characters for the actual file name. Longer 69 * file names stored in a separate 'string table' and referenced 70 * indirectly from the name field. The string table itself appears as 71 * an archive member with name "// ". An `indirect' file name in an 72 * `ar' header matches the pattern "/[0-9]*". The digits form a 73 * decimal number that corresponds to a byte offset into the string 74 * table where the actual file name of the object starts. Strings in 75 * the string table are padded to start on even addresses. 76 * 77 * In the BSD format, file names can be up to 16 characters. File 78 * names shorter than 16 characters are padded to 16 characters using 79 * (ASCII) space characters. File names with embedded spaces and file 80 * names longer than 16 characters are stored immediately after the 81 * archive header and the name field set to a special indirect name 82 * matching the pattern "#1/[0-9]+". The digits form a decimal number 83 * that corresponds to the actual length of the file name following 84 * the archive header. The content of the archive member immediately 85 * follows the file name, and the size field of the archive member 86 * holds the sum of the sizes of the member and of the appended file 87 * name. 88 * 89 * Archives may also have a symbol table (see ranlib(1)), mapping 90 * program symbols to object files inside the archive. 91 * 92 * In the SVR4 format, a symbol table uses a file name of "/ " in its 93 * archive header. The symbol table is structured as: 94 * - a 4-byte count of entries stored as a binary value, MSB first 95 * - 'n' 4-byte offsets, stored as binary values, MSB first 96 * - 'n' NUL-terminated strings, for ELF symbol names, stored unpadded. 97 * 98 * In the BSD format, the symbol table uses a file name of "__.SYMDEF". 99 * It is structured as two parts: 100 * - The first part is an array of "ranlib" structures preceded by 101 * the size of the array in bytes. Each "ranlib" structure 102 * describes one symbol. Each structure contains an offset into 103 * the string table for the symbol name, and a file offset into the 104 * archive for the member defining the symbol. 105 * - The second part is a string table containing NUL-terminated 106 * strings, preceded by the size of the string table in bytes. 107 * 108 * If the symbol table and string table are is present in an archive 109 * they must be the very first objects and in that order. 110 */ 111 112 113 /* 114 * Retrieve an archive header descriptor. 115 */ 116 117 Elf_Arhdr * 118 _libelf_ar_gethdr(Elf *e) 119 { 120 Elf *parent; 121 Elf_Arhdr *eh; 122 char *namelen; 123 size_t n, nlen; 124 struct ar_hdr *arh; 125 126 if ((parent = e->e_parent) == NULL) { 127 LIBELF_SET_ERROR(ARGUMENT, 0); 128 return (NULL); 129 } 130 131 assert((e->e_flags & LIBELF_F_AR_HEADER) == 0); 132 133 arh = (struct ar_hdr *) (uintptr_t) e->e_hdr.e_rawhdr; 134 135 assert((uintptr_t) arh >= (uintptr_t) parent->e_rawfile + SARMAG); 136 137 /* 138 * There needs to be enough space remaining in the file for the 139 * archive header. 140 */ 141 if ((uintptr_t) arh > (uintptr_t) parent->e_rawfile + 142 (uintptr_t) parent->e_rawsize - sizeof(struct ar_hdr)) { 143 LIBELF_SET_ERROR(ARCHIVE, 0); 144 return (NULL); 145 } 146 147 if ((eh = malloc(sizeof(Elf_Arhdr))) == NULL) { 148 LIBELF_SET_ERROR(RESOURCE, 0); 149 return (NULL); 150 } 151 152 e->e_hdr.e_arhdr = eh; 153 e->e_flags |= LIBELF_F_AR_HEADER; 154 155 eh->ar_name = eh->ar_rawname = NULL; 156 157 if ((eh->ar_name = _libelf_ar_get_translated_name(arh, parent)) == 158 NULL) 159 goto error; 160 161 if (_libelf_ar_get_number(arh->ar_uid, sizeof(arh->ar_uid), 10, 162 &n) == 0) 163 goto error; 164 eh->ar_uid = (uid_t) n; 165 166 if (_libelf_ar_get_number(arh->ar_gid, sizeof(arh->ar_gid), 10, 167 &n) == 0) 168 goto error; 169 eh->ar_gid = (gid_t) n; 170 171 if (_libelf_ar_get_number(arh->ar_mode, sizeof(arh->ar_mode), 8, 172 &n) == 0) 173 goto error; 174 eh->ar_mode = (mode_t) n; 175 176 if (_libelf_ar_get_number(arh->ar_date, sizeof(arh->ar_date), 10, 177 &n) == 0) 178 goto error; 179 eh->ar_date = (time_t) n; 180 181 if (_libelf_ar_get_number(arh->ar_size, sizeof(arh->ar_size), 10, 182 &n) == 0) 183 goto error; 184 185 /* 186 * Get the true size of the member if extended naming is being used. 187 */ 188 if (IS_EXTENDED_BSD_NAME(arh->ar_name)) { 189 namelen = arh->ar_name + 190 LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE; 191 if (_libelf_ar_get_number(namelen, sizeof(arh->ar_name) - 192 LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE, 10, &nlen) == 0) 193 goto error; 194 n -= nlen; 195 } 196 197 eh->ar_size = n; 198 199 if ((eh->ar_rawname = _libelf_ar_get_raw_name(arh)) == NULL) 200 goto error; 201 202 eh->ar_flags = 0; 203 204 return (eh); 205 206 error: 207 if (eh) { 208 if (eh->ar_name) 209 free(eh->ar_name); 210 if (eh->ar_rawname) 211 free(eh->ar_rawname); 212 free(eh); 213 } 214 215 e->e_flags &= ~LIBELF_F_AR_HEADER; 216 e->e_hdr.e_rawhdr = (unsigned char *) arh; 217 218 LIBELF_SET_ERROR(ARCHIVE, 0); 219 220 return (NULL); 221 } 222 223 Elf * 224 _libelf_ar_open_member(int fd, Elf_Cmd c, Elf *elf) 225 { 226 Elf *e; 227 size_t nsz, sz; 228 off_t next, end; 229 struct ar_hdr *arh; 230 char *member, *namelen; 231 232 assert(elf->e_kind == ELF_K_AR); 233 234 next = elf->e_u.e_ar.e_next; 235 236 /* 237 * `next' is only set to zero by elf_next() when the last 238 * member of an archive is processed. 239 */ 240 if (next == (off_t) 0) 241 return (NULL); 242 243 assert((next & 1) == 0); 244 245 /* 246 * There needs to be enough space in the file to contain an 247 * ar(1) header. 248 */ 249 end = next + (off_t) sizeof(struct ar_hdr); 250 if ((uintmax_t) end < (uintmax_t) next || /* Overflow. */ 251 end > (off_t) elf->e_rawsize) { 252 LIBELF_SET_ERROR(ARCHIVE, 0); 253 return (NULL); 254 } 255 256 arh = (struct ar_hdr *) (elf->e_rawfile + next); 257 258 /* 259 * Retrieve the size of the member. 260 */ 261 if (_libelf_ar_get_number(arh->ar_size, sizeof(arh->ar_size), 10, 262 &sz) == 0) { 263 LIBELF_SET_ERROR(ARCHIVE, 0); 264 return (NULL); 265 } 266 267 /* 268 * Check if the archive member that follows will fit in the 269 * containing archive. 270 */ 271 end += (off_t) sz; 272 if (end < next || /* Overflow. */ 273 end > (off_t) elf->e_rawsize) { 274 LIBELF_SET_ERROR(ARCHIVE, 0); 275 return (NULL); 276 } 277 278 /* 279 * Adjust the size field for members in BSD archives using 280 * extended naming. 281 */ 282 if (IS_EXTENDED_BSD_NAME(arh->ar_name)) { 283 namelen = arh->ar_name + 284 LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE; 285 if (_libelf_ar_get_number(namelen, sizeof(arh->ar_name) - 286 LIBELF_AR_BSD_EXTENDED_NAME_PREFIX_SIZE, 10, &nsz) == 0) { 287 LIBELF_SET_ERROR(ARCHIVE, 0); 288 return (NULL); 289 } 290 291 member = (char *) (arh + 1) + nsz; 292 sz -= nsz; 293 } else 294 member = (char *) (arh + 1); 295 296 297 if ((e = elf_memory(member, sz)) == NULL) 298 return (NULL); 299 300 e->e_fd = fd; 301 e->e_cmd = c; 302 e->e_hdr.e_rawhdr = (unsigned char *) arh; 303 304 elf->e_u.e_ar.e_nchildren++; 305 e->e_parent = elf; 306 307 return (e); 308 } 309 310 /* 311 * A BSD-style ar(1) symbol table has the following layout: 312 * 313 * - A count of bytes used by the following array of 'ranlib' 314 * structures, stored as a 'long'. 315 * - An array of 'ranlib' structures. Each array element is 316 * two 'long's in size. 317 * - A count of bytes used for the following symbol table. 318 * - The symbol table itself. 319 */ 320 321 /* 322 * A helper macro to read in a 'long' value from the archive. 323 * 324 * We use memcpy() since the source pointer may be misaligned with 325 * respect to the natural alignment for a C 'long'. 326 */ 327 #define GET_LONG(P, V)do { \ 328 memcpy(&(V), (P), sizeof(long)); \ 329 (P) += sizeof(long); \ 330 } while (/* CONSTCOND */ 0) 331 332 Elf_Arsym * 333 _libelf_ar_process_bsd_symtab(Elf *e, size_t *count) 334 { 335 Elf_Arsym *symtab, *sym; 336 unsigned int n; 337 size_t nentries; 338 unsigned char *end, *p, *p0, *s, *s0; 339 const size_t entrysize = 2 * sizeof(long); 340 long arraysize, fileoffset, stroffset, strtabsize; 341 342 assert(e != NULL); 343 assert(count != NULL); 344 assert(e->e_u.e_ar.e_symtab == NULL); 345 346 symtab = NULL; 347 348 /* 349 * The BSD symbol table always contains the count fields even 350 * if there are no entries in it. 351 */ 352 if (e->e_u.e_ar.e_rawsymtabsz < 2 * sizeof(long)) 353 goto symtaberror; 354 355 p = p0 = (unsigned char *) e->e_u.e_ar.e_rawsymtab; 356 end = p0 + e->e_u.e_ar.e_rawsymtabsz; 357 358 /* 359 * Retrieve the size of the array of ranlib descriptors and 360 * check it for validity. 361 */ 362 GET_LONG(p, arraysize); 363 364 if (arraysize < 0 || p0 + arraysize >= end || 365 ((size_t) arraysize % entrysize != 0)) 366 goto symtaberror; 367 368 /* 369 * Check the value of the string table size. 370 */ 371 s = p + arraysize; 372 GET_LONG(s, strtabsize); 373 374 s0 = s; /* Start of string table. */ 375 if (strtabsize < 0 || s0 + strtabsize > end) 376 goto symtaberror; 377 378 nentries = (size_t) arraysize / entrysize; 379 380 /* 381 * Allocate space for the returned Elf_Arsym array. 382 */ 383 if ((symtab = malloc(sizeof(Elf_Arsym) * (nentries + 1))) == NULL) { 384 LIBELF_SET_ERROR(RESOURCE, 0); 385 return (NULL); 386 } 387 388 /* Read in symbol table entries. */ 389 for (n = 0, sym = symtab; n < nentries; n++, sym++) { 390 GET_LONG(p, stroffset); 391 GET_LONG(p, fileoffset); 392 393 if (stroffset < 0 || fileoffset < 0 || 394 (off_t) fileoffset >= e->e_rawsize) 395 goto symtaberror; 396 397 s = s0 + stroffset; 398 399 if (s >= end) 400 goto symtaberror; 401 402 sym->as_off = (off_t) fileoffset; 403 sym->as_hash = elf_hash((char *) s); 404 sym->as_name = (char *) s; 405 } 406 407 /* Fill up the sentinel entry. */ 408 sym->as_name = NULL; 409 sym->as_hash = ~0UL; 410 sym->as_off = (off_t) 0; 411 412 /* Remember the processed symbol table. */ 413 e->e_u.e_ar.e_symtab = symtab; 414 415 *count = e->e_u.e_ar.e_symtabsz = nentries + 1; 416 417 return (symtab); 418 419 symtaberror: 420 if (symtab) 421 free(symtab); 422 LIBELF_SET_ERROR(ARCHIVE, 0); 423 return (NULL); 424 } 425 426 /* 427 * An SVR4-style ar(1) symbol table has the following layout: 428 * 429 * - The first 4 bytes are a binary count of the number of entries in the 430 * symbol table, stored MSB-first. 431 * - Then there are 'n' 4-byte binary offsets, also stored MSB first. 432 * - Following this, there are 'n' null-terminated strings. 433 */ 434 435 #define GET_WORD(P, V) do { \ 436 (V) = 0; \ 437 (V) = (P)[0]; (V) <<= 8; \ 438 (V) += (P)[1]; (V) <<= 8; \ 439 (V) += (P)[2]; (V) <<= 8; \ 440 (V) += (P)[3]; \ 441 } while (/* CONSTCOND */ 0) 442 443 #define INTSZ 4 444 445 446 Elf_Arsym * 447 _libelf_ar_process_svr4_symtab(Elf *e, size_t *count) 448 { 449 uint32_t off; 450 size_t n, nentries; 451 Elf_Arsym *symtab, *sym; 452 unsigned char *p, *s, *end; 453 454 assert(e != NULL); 455 assert(count != NULL); 456 assert(e->e_u.e_ar.e_symtab == NULL); 457 458 symtab = NULL; 459 460 if (e->e_u.e_ar.e_rawsymtabsz < INTSZ) 461 goto symtaberror; 462 463 p = (unsigned char *) e->e_u.e_ar.e_rawsymtab; 464 end = p + e->e_u.e_ar.e_rawsymtabsz; 465 466 GET_WORD(p, nentries); 467 p += INTSZ; 468 469 if (nentries == 0 || p + nentries * INTSZ >= end) 470 goto symtaberror; 471 472 /* Allocate space for a nentries + a sentinel. */ 473 if ((symtab = malloc(sizeof(Elf_Arsym) * (nentries+1))) == NULL) { 474 LIBELF_SET_ERROR(RESOURCE, 0); 475 return (NULL); 476 } 477 478 s = p + (nentries * INTSZ); /* start of the string table. */ 479 480 for (n = nentries, sym = symtab; n > 0; n--) { 481 if (s >= end) 482 goto symtaberror; 483 484 GET_WORD(p, off); 485 if ((off_t) off >= e->e_rawsize) 486 goto symtaberror; 487 488 sym->as_off = (off_t) off; 489 sym->as_hash = elf_hash((char *) s); 490 sym->as_name = (char *) s; 491 492 p += INTSZ; 493 sym++; 494 495 for (; s < end && *s++ != '\0';) /* skip to next string */ 496 ; 497 } 498 499 /* Fill up the sentinel entry. */ 500 sym->as_name = NULL; 501 sym->as_hash = ~0UL; 502 sym->as_off = (off_t) 0; 503 504 *count = e->e_u.e_ar.e_symtabsz = nentries + 1; 505 e->e_u.e_ar.e_symtab = symtab; 506 507 return (symtab); 508 509 symtaberror: 510 if (symtab) 511 free(symtab); 512 LIBELF_SET_ERROR(ARCHIVE, 0); 513 return (NULL); 514 } 515