1 /* CTF type deduplication. 2 Copyright (C) 2019-2026 Free Software Foundation, Inc. 3 4 This file is part of libctf. 5 6 libctf is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 This program is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 See the GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; see the file COPYING. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include <ctf-impl.h> 21 #include <string.h> 22 #include <errno.h> 23 #include <assert.h> 24 #include "hashtab.h" 25 26 /* (In the below, relevant functions are named in square brackets.) */ 27 28 /* Type deduplication is a three-phase process: 29 30 [ctf_dedup, ctf_dedup_hash_type, ctf_dedup_rhash_type] 31 1) come up with unambiguous hash values for all types: no two types may have 32 the same hash value, and any given type should have only one hash value 33 (for optimal deduplication). 34 35 [ctf_dedup, ctf_dedup_detect_name_ambiguity, 36 ctf_dedup_conflictify_unshared, ctf_dedup_mark_conflicting_hash] 37 2) mark those distinct types with names that collide (and thus cannot be 38 declared simultaneously in the same translation unit) as conflicting, and 39 recursively mark all types that cite one of those types as conflicting as 40 well. Possibly mark all types cited in only one TU as conflicting, if 41 the CTF_LINK_SHARE_DUPLICATED link mode is active. 42 43 [ctf_dedup_emit, ctf_dedup_emit_struct_members, ctf_dedup_id_to_target] 44 3) emit all the types, one hash value at a time. Types not marked 45 conflicting are emitted once, into the shared dictionary: types marked 46 conflicting are emitted once per TU into a dictionary corresponding to 47 each TU in which they appear. Structs marked conflicting get at the very 48 least a forward emitted into the shared dict so that other dicts can cite 49 it if needed. 50 51 [id_to_packed_id] 52 This all works over an array of inputs (usually in the same order as the 53 inputs on the link line). We don't use the ctf_link_inputs hash directly 54 because it is convenient to be able to address specific input types as a 55 *global type ID* or 'GID', a pair of an array offset and a ctf_id_t. Since 56 both are already 32 bits or less or can easily be constrained to that range, 57 we can pack them both into a single 64-bit hash word for easy lookups, which 58 would be much more annoying to do with a ctf_dict_t * and a ctf_id_t. (On 59 32-bit platforms, we must do that anyway, since pointers, and thus hash keys 60 and values, are only 32 bits wide). We track which inputs are parents of 61 which other inputs so that we can correctly recognize that types we have 62 traversed in children may cite types in parents, and so that we can process 63 the parents first.) 64 65 Note that thanks to ld -r, the deduplicator can be fed its own output, so the 66 inputs may themselves have child dicts. Since we need to support this usage 67 anyway, we can use it in one other place. If the caller finds translation 68 units to be too small a unit ambiguous types, links can be 'cu-mapped', where 69 the caller provides a mapping of input TU names to output child dict names. 70 This mapping can fuse many child TUs into one potential child dict, so that 71 ambiguous types in any of those input TUs go into the same child dict. 72 When a many:1 cu-mapping is detected, the ctf_dedup machinery is called 73 repeatedly, once for every output name that has more than one input, to fuse 74 all the input TUs associated with a given output dict into one, and once again 75 as normal to deduplicate all those intermediate outputs (and any 1:1 inputs) 76 together. This has much higher memory usage than otherwise, because in the 77 intermediate state, all the output TUs are in memory at once and cannot be 78 lazily opened. It also has implications for the emission code: if types 79 appear ambiguously in multiple input TUs that are all mapped to the same 80 child dict, we cannot put them in children in the cu-mapping link phase 81 because this output is meant to *become* a child in the next link stage and 82 parent/child relationships are only one level deep: so instead, we just hide 83 all but one of the ambiguous types. 84 85 There are a few other subtleties here that make this more complex than it 86 seems. Let's go over the steps above in more detail. 87 88 1) HASHING. 89 90 [ctf_dedup_hash_type, ctf_dedup_rhash_type] 91 Hashing proceeds recursively, mixing in the properties of each input type 92 (including its name, if any), and then adding the hash values of every type 93 cited by that type. The result is stashed in the cd_type_hashes so other 94 phases can find the hash values of input types given their IDs, and so that 95 if we encounter this type again while hashing we can just return its hash 96 value: it is also stashed in the *output mapping*, a mapping from hash value 97 to the set of GIDs corresponding to that type in all inputs. We also keep 98 track of the GID of the first appearance of the type in any input (in 99 cd_output_first_gid), the GID of structs, unions, and forwards that only 100 appear in one TU (in cd_struct_origin), and an indication of whether this 101 type is root-visible or not. See below for where these things are used. 102 103 Everything in this phase is time-critical, because it is operating over 104 non-deduplicated types and so may have hundreds or thousands of times the 105 data volume to deal with than later phases. Trace output is hidden behind 106 ENABLE_LIBCTF_HASH_DEBUGGING to prevent the sheer number of calls to 107 ctf_dprintf from slowing things down (tenfold slowdowns are observed purely 108 from the calls to ctf_dprintf(), even with debugging switched off), and keep 109 down the volume of output (hundreds of gigabytes of debug output are not 110 uncommon on larger links). 111 112 We have to do *something* about potential cycles in the type graph. We'd 113 like to avoid emitting forwards in the final output if possible, because 114 forwards aren't much use: they have no members. We are mostly saved from 115 needing to worry about this at emission time by ctf_add_struct*() 116 automatically replacing newly-created forwards when the real struct/union 117 comes along. So we only have to avoid getting stuck in cycles during the 118 hashing phase, while also not confusing types that cite members that are 119 structs with each other. It is easiest to solve this problem by noting two 120 things: 121 122 - all cycles in C depend on the presence of tagged structs/unions 123 - all tagged structs/unions have a unique name they can be disambiguated by 124 125 [ctf_dedup_is_stub] 126 This means that we can break all cycles by ceasing to hash in cited types at 127 every tagged struct/union and instead hashing in a stub consisting of the 128 struct/union's *decorated name*, which is the name preceded by "s " or "u " 129 depending on the namespace (cached in cd_decorated_names). Forwards are 130 decorated identically (so a forward to "struct foo" would be represented as 131 "s foo"): this means that a citation of a forward to a type and a citation of 132 a concrete definition of a type with the same name ends up getting the same 133 hash value. 134 135 Of course, it is quite possible to have two TUs with structs with the same 136 name and different definitions, but that's OK because when we scan for types 137 with ambiguous names we will identify these and mark them conflicting. 138 139 We populate one thing to help conflictedness marking. No unconflicted type 140 may cite a conflicted one, but this means that conflictedness marking must 141 walk from types to the types that cite them, which is the opposite of the 142 usual order. We can make this easier to do by constructing a *citers* graph 143 in cd_citers, which points from types to the types that cite them: because we 144 emit forwards corresponding to every conflicted struct/union, we don't need 145 to do this for citations of structs/unions by other types. This is very 146 convenient for us, because that's the only type we don't traverse 147 recursively: so we can construct the citers graph at the same time as we 148 hash, rather than needing to add an extra pass. (This graph is a dynhash of 149 *type hash values*, so it's small: in effect it is automatically 150 deduplicated.) 151 152 2) COLLISIONAL MARKING. 153 154 [ctf_dedup_detect_name_ambiguity, ctf_dedup_mark_conflicting_hash] 155 We identify types whose names collide during the hashing process, and count 156 the rough number of uses of each name (caching may throw it off a bit: this 157 doesn't need to be accurate). We then mark the less-frequently-cited types 158 with each names conflicting: the most-frequently-cited one goes into the 159 shared type dictionary, while all others are duplicated into per-TU 160 dictionaries, named after the input TU, that have the shared dictionary as a 161 parent. For structures and unions this is not quite good enough: we'd like 162 to have citations of forwards to ambiguously named structures and unions 163 *stay* as citations of forwards, so that the user can tell that the caller 164 didn't actually know which structure definition was meant: but if we put one 165 of those structures into the shared dictionary, it would supplant and replace 166 the forward, leaving no sign. So structures and unions do not take part in 167 this popularity contest: if their names are ambiguous, they are just 168 duplicated, and only a forward appears in the shared dict. 169 170 [ctf_dedup_propagate_conflictedness] 171 The process of marking types conflicted is itself recursive: we recursively 172 traverse the cd_citers graph populated in the hashing pass above and mark 173 everything that we encounter conflicted (without wasting time re-marking 174 anything that is already marked). This naturally terminates just where we 175 want it to (at types that are cited by no other types, and at structures and 176 unions) and suffices to ensure that types that cite conflicted types are 177 always marked conflicted. 178 179 [ctf_dedup_conflictify_unshared, ctf_dedup_multiple_input_dicts] 180 When linking in CTF_LINK_SHARE_DUPLICATED mode, we would like all types that 181 are used in only one TU to end up in a per-CU dict. The easiest way to do 182 that is to mark them conflicted. ctf_dedup_conflictify_unshared does this, 183 traversing the output mapping and using ctf_dedup_multiple_input_dicts to 184 check the number of input dicts each distinct type hash value came from: 185 types that only came from one get marked conflicted. One caveat here is that 186 we need to consider both structs and forwards to them: a struct that appears 187 in one TU and has a dozen citations to an opaque forward in other TUs should 188 *not* be considered to be used in only one TU, because users would find it 189 useful to be able to traverse into opaque structures of that sort: so we use 190 cd_struct_origin to check both structs/unions and the forwards corresponding 191 to them. 192 193 3) EMISSION. 194 195 [ctf_dedup_walk_output_mapping, ctf_dedup_rwalk_output_mapping, 196 ctf_dedup_rwalk_one_output_mapping] 197 Emission involves another walk of the entire output mapping, this time 198 traversing everything other than struct members, recursively. Types are 199 emitted from leaves to trunk, emitting all types a type cites before emitting 200 the type itself. We sort the output mapping before traversing it, for 201 reproducibility and also correctness: the input dicts may have parent/child 202 relationships, so we simply sort all types that first appear in parents 203 before all children, then sort types that first appear in dicts appearing 204 earlier on the linker command line before those that appear later, then sort 205 by input ctf_id_t. (This is where we use cd_output_first_gid, collected 206 above.) 207 208 The walking is done using a recursive traverser which arranges to not revisit 209 any type already visited and to call its callback once per input GID for 210 input GIDs corresponding to conflicted output types. The traverser only 211 finds input types and calls a callback for them as many times as the output 212 needs to appear: it doesn't try to figure out anything about where the output 213 might go. That's done by the callback based on whether the type is 214 marked conflicted or not. 215 216 [ctf_dedup_emit_type, ctf_dedup_id_to_target, ctf_dedup_synthesize_forward] 217 ctf_dedup_emit_type is the (sole) callback for ctf_dedup_walk_output_mapping. 218 Conflicted types have all necessary dictionaries created, and then we emit 219 the type into each dictionary in turn, working over each input CTF type 220 corresponding to each hash value and using ctf_dedup_id_to_target to map each 221 input ctf_id_t into the corresponding type in the output (dealing with input 222 ctf_id_t's with parents in the process by simply chasing to the parent dict 223 if the type we're looking up is in there). Emitting structures involves 224 simply noting that the members of this structure need emission later on: 225 because you cannot cite a single structure member from another type, we avoid 226 emitting the members at this stage to keep recursion depths down a bit. 227 228 At this point, if we have by some mischance decided that two different types 229 with child types that hash to different values have in fact got the same hash 230 value themselves and *not* marked it conflicting, the type walk will walk 231 only *one* of them and in all likelihood we'll find that we are trying to 232 emit a type into some child dictionary that references a type that was never 233 emitted into that dictionary and assertion-fail. This always indicates a bug 234 in the conflictedness marking machinery or the hashing code, or both. 235 236 ctf_dedup_id_to_target calls ctf_dedup_synthesize_forward to do one extra 237 thing, alluded to above: if this is a conflicted tagged structure or union, 238 and the target is the shared dict (i.e., the type we're being asked to emit 239 is not itself conflicted so can't just point straight at the conflicted 240 type), we instead synthesise a forward with the same name, emit it into the 241 shared dict, record it in cd_output_emission_conflicted_forwards so that we 242 don't re-emit it, and return it. This means that cycles that contain 243 conflicts do not cause the entire cycle to be replicated in every child: only 244 that piece of the cycle which takes you back as far as the closest tagged 245 struct/union needs to be replicated. This trick means that no part of the 246 deduplicator needs a cycle detector: every recursive walk can stop at tagged 247 structures. 248 249 [ctf_dedup_emit_struct_members] 250 The final stage of emission is to walk over all structures with members 251 that need emission and emit all of them. Every type has been emitted at 252 this stage, so emission cannot fail. 253 254 [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping] 255 Finally, we update the input -> output type ID mappings used by the ctf-link 256 machinery to update all the other sections. This is surprisingly expensive 257 and may be replaced with a scheme which lets the ctf-link machinery extract 258 the needed info directly from the deduplicator. */ 259 260 /* Possible future optimizations are flagged with 'optimization opportunity' 261 below. */ 262 263 /* Global optimization opportunity: a GC pass, eliminating types with no direct 264 or indirect citations from the other sections in the dictionary. */ 265 266 /* Internal flag values for ctf_dedup_hash_type. */ 267 268 /* Child call: consider forwardable types equivalent to forwards or stubs below 269 this point. */ 270 #define CTF_DEDUP_HASH_INTERNAL_CHILD 0x01 271 272 /* Transform references to single ctf_id_ts in passed-in inputs into a number 273 that will fit in a uint64_t. Needs rethinking if CTF_MAX_TYPE is boosted. 274 275 On 32-bit platforms, we pack things together differently: see the note 276 above. */ 277 278 #if UINTPTR_MAX < UINT64_MAX 279 # define IDS_NEED_ALLOCATION 1 280 # define CTF_DEDUP_GID(fp, input, type) id_to_packed_id (fp, input, type) 281 # define CTF_DEDUP_GID_TO_INPUT(id) packed_id_to_input (id) 282 # define CTF_DEDUP_GID_TO_TYPE(id) packed_id_to_type (id) 283 #else 284 # define CTF_DEDUP_GID(fp, input, type) \ 285 (void *) (((uint64_t) input) << 32 | (type)) 286 # define CTF_DEDUP_GID_TO_INPUT(id) ((int) (((uint64_t) id) >> 32)) 287 # define CTF_DEDUP_GID_TO_TYPE(id) (ctf_id_t) (((uint64_t) id) & ~(0xffffffff00000000ULL)) 288 #endif 289 290 #ifdef IDS_NEED_ALLOCATION 291 292 /* This is the 32-bit path, which stores GIDs in a pool and returns a pointer 293 into the pool. It is notably less efficient than the 64-bit direct storage 294 approach, but with a smaller key, this is all we can do. */ 295 296 static void * 297 id_to_packed_id (ctf_dict_t *fp, int input_num, ctf_id_t type) 298 { 299 const void *lookup; 300 ctf_type_id_key_t *dynkey = NULL; 301 ctf_type_id_key_t key = { input_num, type }; 302 303 if (!ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_dict_t, 304 &key, &lookup, NULL)) 305 { 306 if ((dynkey = malloc (sizeof (ctf_type_id_key_t))) == NULL) 307 goto oom; 308 memcpy (dynkey, &key, sizeof (ctf_type_id_key_t)); 309 310 if (ctf_dynhash_insert (fp->ctf_dedup.cd_id_to_dict_t, dynkey, NULL) < 0) 311 goto oom; 312 313 ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_dict_t, 314 dynkey, &lookup, NULL); 315 } 316 /* We use a raw assert() here because there isn't really a way to get any sort 317 of error back from this routine without vastly complicating things for the 318 much more common case of !IDS_NEED_ALLOCATION. */ 319 assert (lookup); 320 return (void *) lookup; 321 322 oom: 323 free (dynkey); 324 ctf_set_errno (fp, ENOMEM); 325 return NULL; 326 } 327 328 static int 329 packed_id_to_input (const void *id) 330 { 331 const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id; 332 333 return key->ctii_input_num; 334 } 335 336 static ctf_id_t 337 packed_id_to_type (const void *id) 338 { 339 const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id; 340 341 return key->ctii_type; 342 } 343 #endif 344 345 /* Make an element in a dynhash-of-dynsets, or return it if already present. */ 346 347 static ctf_dynset_t * 348 make_set_element (ctf_dynhash_t *set, const void *key) 349 { 350 ctf_dynset_t *element; 351 352 if ((element = ctf_dynhash_lookup (set, key)) == NULL) 353 { 354 if ((element = ctf_dynset_create (htab_hash_string, 355 htab_eq_string, 356 NULL)) == NULL) 357 return NULL; 358 359 if (ctf_dynhash_insert (set, (void *) key, element) < 0) 360 { 361 ctf_dynset_destroy (element); 362 return NULL; 363 } 364 } 365 366 return element; 367 } 368 369 /* Initialize the dedup atoms table. */ 370 int 371 ctf_dedup_atoms_init (ctf_dict_t *fp) 372 { 373 if (fp->ctf_dedup_atoms) 374 return 0; 375 376 if (!fp->ctf_dedup_atoms_alloc) 377 { 378 if ((fp->ctf_dedup_atoms_alloc 379 = ctf_dynset_create (htab_hash_string, htab_eq_string, 380 free)) == NULL) 381 return ctf_set_errno (fp, ENOMEM); 382 } 383 fp->ctf_dedup_atoms = fp->ctf_dedup_atoms_alloc; 384 return 0; 385 } 386 387 /* Intern things in the dedup atoms table. */ 388 389 static const char * 390 intern (ctf_dict_t *fp, char *atom) 391 { 392 const void *foo; 393 394 if (atom == NULL) 395 return NULL; 396 397 if (!ctf_dynset_exists (fp->ctf_dedup_atoms, atom, &foo)) 398 { 399 if (ctf_dynset_insert (fp->ctf_dedup_atoms, atom) < 0) 400 { 401 ctf_set_errno (fp, ENOMEM); 402 return NULL; 403 } 404 foo = atom; 405 } 406 else 407 free (atom); 408 409 return (const char *) foo; 410 } 411 412 /* Add an indication of the namespace to a type name in a way that is not valid 413 for C identifiers. Used to maintain hashes of type names to other things 414 while allowing for the four C namespaces (normal, struct, union, enum). 415 Return a pointer into the cd_decorated_names atoms table. */ 416 static const char * 417 ctf_decorate_type_name (ctf_dict_t *fp, const char *name, int kind) 418 { 419 ctf_dedup_t *d = &fp->ctf_dedup; 420 const char *ret; 421 const char *k; 422 char *p; 423 size_t i; 424 425 switch (kind) 426 { 427 case CTF_K_STRUCT: 428 k = "s "; 429 i = 0; 430 break; 431 case CTF_K_UNION: 432 k = "u "; 433 i = 1; 434 break; 435 case CTF_K_ENUM: 436 k = "e "; 437 i = 2; 438 break; 439 default: 440 k = ""; 441 i = 3; 442 } 443 444 if ((ret = ctf_dynhash_lookup (d->cd_decorated_names[i], name)) == NULL) 445 { 446 char *str; 447 448 if ((str = malloc (strlen (name) + strlen (k) + 1)) == NULL) 449 goto oom; 450 451 p = stpcpy (str, k); 452 strcpy (p, name); 453 ret = intern (fp, str); 454 if (!ret) 455 goto oom; 456 457 if (ctf_dynhash_cinsert (d->cd_decorated_names[i], name, ret) < 0) 458 goto oom; 459 } 460 461 return ret; 462 463 oom: 464 ctf_set_errno (fp, ENOMEM); 465 return NULL; 466 } 467 468 /* Hash a type, possibly debugging-dumping something about it as well. */ 469 static inline void 470 ctf_dedup_sha1_add (ctf_sha1_t *sha1, const void *buf, size_t len, 471 const char *description _libctf_unused_, 472 unsigned long depth _libctf_unused_) 473 { 474 ctf_sha1_add (sha1, buf, len); 475 476 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 477 ctf_sha1_t tmp; 478 char tmp_hval[CTF_SHA1_SIZE]; 479 tmp = *sha1; 480 ctf_sha1_fini (&tmp, tmp_hval); 481 ctf_dprintf ("%lu: after hash addition of %s: %s\n", depth, description, 482 tmp_hval); 483 #endif 484 } 485 486 static const char * 487 ctf_dedup_hash_type (ctf_dict_t *fp, ctf_dict_t *input, 488 ctf_dict_t **inputs, int input_num, 489 ctf_id_t type, int flags, unsigned long depth, 490 int (*populate_fun) (ctf_dict_t *fp, 491 ctf_dict_t *input, 492 ctf_dict_t **inputs, 493 int input_num, 494 ctf_id_t type, 495 int isroot, 496 void *id, 497 const char *decorated_name, 498 const char *hash)); 499 500 /* Determine whether this type is being hashed as a stub (in which case it is 501 unsafe to cache it). */ 502 static int 503 ctf_dedup_is_stub (const char *name, int kind, int fwdkind, int flags) 504 { 505 /* We can cache all types unless we are recursing to children and are hashing 506 in a tagged struct, union or forward, all of which are replaced with their 507 decorated name as a stub and will have different hash values when hashed at 508 the top level. */ 509 510 return ((flags & CTF_DEDUP_HASH_INTERNAL_CHILD) && name 511 && (kind == CTF_K_STRUCT || kind == CTF_K_UNION 512 || (kind == CTF_K_FORWARD && (fwdkind == CTF_K_STRUCT 513 || fwdkind == CTF_K_UNION)))); 514 } 515 516 /* Populate struct_origin if need be (not already populated, or populated with 517 a different origin), in which case it must go to -1, "shared".) 518 519 Only called for forwards or forwardable types with names, when the link mode 520 is CTF_LINK_SHARE_DUPLICATED. */ 521 static int 522 ctf_dedup_record_origin (ctf_dict_t *fp, int input_num, const char *decorated, 523 void *id) 524 { 525 ctf_dedup_t *d = &fp->ctf_dedup; 526 void *origin; 527 int populate_origin = 0; 528 529 if (ctf_dynhash_lookup_kv (d->cd_struct_origin, decorated, NULL, &origin)) 530 { 531 if (CTF_DEDUP_GID_TO_INPUT (origin) != input_num 532 && CTF_DEDUP_GID_TO_INPUT (origin) != -1) 533 { 534 populate_origin = 1; 535 origin = CTF_DEDUP_GID (fp, -1, -1); 536 } 537 } 538 else 539 { 540 populate_origin = 1; 541 origin = id; 542 } 543 544 if (populate_origin) 545 if (ctf_dynhash_cinsert (d->cd_struct_origin, decorated, origin) < 0) 546 return ctf_set_errno (fp, errno); 547 return 0; 548 } 549 550 /* Do the underlying hashing and recursion for ctf_dedup_hash_type (which it 551 calls, recursively). */ 552 553 static const char * 554 ctf_dedup_rhash_type (ctf_dict_t *fp, ctf_dict_t *input, ctf_dict_t **inputs, 555 int input_num, ctf_id_t type, void *type_id, 556 const ctf_type_t *tp, const char *name, 557 const char *decorated, int kind, int flags, 558 unsigned long depth, 559 int (*populate_fun) (ctf_dict_t *fp, 560 ctf_dict_t *input, 561 ctf_dict_t **inputs, 562 int input_num, 563 ctf_id_t type, 564 int isroot, 565 void *id, 566 const char *decorated_name, 567 const char *hash)) 568 { 569 ctf_dedup_t *d = &fp->ctf_dedup; 570 ctf_next_t *i = NULL; 571 ctf_sha1_t hash; 572 ctf_id_t child_type; 573 char hashbuf[CTF_SHA1_SIZE]; 574 const char *hval = NULL; 575 const char *whaterr; 576 int err = 0; 577 578 const char *citer = NULL; 579 ctf_dynset_t *citers = NULL; 580 581 /* Add a citer to the citers set. */ 582 #define ADD_CITER(citers, hval) \ 583 do \ 584 { \ 585 whaterr = N_("error updating citers"); \ 586 if (!citers) \ 587 if ((citers = ctf_dynset_create (htab_hash_string, \ 588 htab_eq_string, \ 589 NULL)) == NULL) \ 590 goto oom; \ 591 if (ctf_dynset_cinsert (citers, hval) < 0) \ 592 goto oom; \ 593 } \ 594 while (0) 595 596 /* If this is a named struct or union or a forward to one, and this is a child 597 traversal, treat this type as if it were a forward -- do not recurse to 598 children, ignore all content not already hashed in, and hash in the 599 decorated name of the type instead. */ 600 601 if (ctf_dedup_is_stub (name, kind, tp->ctt_type, flags)) 602 { 603 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 604 ctf_dprintf ("Struct/union/forward citation: substituting forwarding " 605 "stub with decorated name %s\n", decorated); 606 607 #endif 608 ctf_sha1_init (&hash); 609 ctf_dedup_sha1_add (&hash, decorated, strlen (decorated) + 1, 610 "decorated struct/union/forward name", depth); 611 ctf_sha1_fini (&hash, hashbuf); 612 613 if ((hval = intern (fp, strdup (hashbuf))) == NULL) 614 { 615 ctf_err_warn (fp, 0, 0, _("%s (%i): out of memory during forwarding-" 616 "stub hashing for type with GID %p"), 617 ctf_link_input_name (input), input_num, type_id); 618 return NULL; /* errno is set for us. */ 619 } 620 621 /* In share-duplicated link mode, make sure the origin of this type is 622 recorded, even if this is a type in a parent dict which will not be 623 directly traversed. */ 624 if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED 625 && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0) 626 return NULL; /* errno is set for us. */ 627 628 return hval; 629 } 630 631 /* Now ensure that subsequent recursive calls (but *not* the top-level call) 632 get this treatment. */ 633 flags |= CTF_DEDUP_HASH_INTERNAL_CHILD; 634 635 /* If this is a struct, union, or forward with a name, record the unique 636 originating input TU, if there is one. */ 637 638 if (decorated && (ctf_forwardable_kind (kind) || kind != CTF_K_FORWARD)) 639 if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED 640 && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0) 641 return NULL; /* errno is set for us. */ 642 643 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 644 ctf_dprintf ("%lu: hashing thing with ID %i/%lx (kind %i): %s.\n", 645 depth, input_num, type, kind, name ? name : ""); 646 #endif 647 648 /* Some type kinds don't have names: the API provides no way to set the name, 649 so the type the deduplicator outputs will be nameless even if the input 650 somehow has a name, and the name should not be mixed into the hash. */ 651 652 switch (kind) 653 { 654 case CTF_K_POINTER: 655 case CTF_K_ARRAY: 656 case CTF_K_FUNCTION: 657 case CTF_K_VOLATILE: 658 case CTF_K_CONST: 659 case CTF_K_RESTRICT: 660 case CTF_K_SLICE: 661 name = NULL; 662 } 663 664 /* Mix in invariant stuff, transforming the type kind if needed. Note that 665 the vlen is *not* hashed in: the actual variable-length info is hashed in 666 instead, piecewise. The vlen is not part of the type, only the 667 variable-length data is: identical types with distinct vlens are quite 668 possible. Equally, we do not want to hash in the isroot flag: both the 669 compiler and the deduplicator set the nonroot flag to indicate clashes with 670 *other types in the same TU* with the same name: so two types can easily 671 have distinct nonroot flags, yet be exactly the same type. This means we 672 can never use the non-root-visible flag from the input for anything, 673 because if there are several distinct values the one chosen is basically 674 random. We unify non-root-visible flags separately: see the uses of 675 cd_nonroot_consistency. */ 676 677 ctf_sha1_init (&hash); 678 if (name) 679 ctf_dedup_sha1_add (&hash, name, strlen (name) + 1, "name", depth); 680 ctf_dedup_sha1_add (&hash, &kind, sizeof (uint32_t), "kind", depth); 681 682 /* Hash content of this type. */ 683 switch (kind) 684 { 685 case CTF_K_UNKNOWN: 686 /* No extra state. */ 687 break; 688 case CTF_K_FORWARD: 689 690 /* Add the forwarded kind, stored in the ctt_type. */ 691 ctf_dedup_sha1_add (&hash, &tp->ctt_type, sizeof (tp->ctt_type), 692 "forwarded kind", depth); 693 break; 694 case CTF_K_INTEGER: 695 case CTF_K_FLOAT: 696 { 697 ctf_encoding_t ep; 698 memset (&ep, 0, sizeof (ctf_encoding_t)); 699 700 ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), "size", 701 depth); 702 if (ctf_type_encoding (input, type, &ep) < 0) 703 { 704 whaterr = N_("error getting encoding"); 705 goto input_err; 706 } 707 ctf_dedup_sha1_add (&hash, &ep, sizeof (ctf_encoding_t), "encoding", 708 depth); 709 break; 710 } 711 /* Types that reference other types. */ 712 case CTF_K_TYPEDEF: 713 case CTF_K_VOLATILE: 714 case CTF_K_CONST: 715 case CTF_K_RESTRICT: 716 case CTF_K_POINTER: 717 /* Hash the referenced type, if not already hashed, and mix it in. */ 718 child_type = ctf_type_reference (input, type); 719 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, child_type, 720 flags, depth, populate_fun)) == NULL) 721 { 722 whaterr = N_("error doing referenced type hashing"); 723 goto err; 724 } 725 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "referenced type", 726 depth); 727 citer = hval; 728 729 break; 730 731 /* The slices of two types hash identically only if the type they overlay 732 also has the same encoding. This is not ideal, but in practice will work 733 well enough. We work directly rather than using the CTF API because 734 we do not want the slice's normal automatically-shine-through 735 semantics to kick in here. */ 736 case CTF_K_SLICE: 737 { 738 const ctf_slice_t *slice; 739 const ctf_dtdef_t *dtd; 740 ssize_t size; 741 ssize_t increment; 742 743 child_type = ctf_type_reference (input, type); 744 ctf_get_ctt_size (input, tp, &size, &increment); 745 ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "size", depth); 746 747 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, 748 child_type, flags, depth, 749 populate_fun)) == NULL) 750 { 751 whaterr = N_("error doing slice-referenced type hashing"); 752 goto err; 753 } 754 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "sliced type", 755 depth); 756 citer = hval; 757 758 if ((dtd = ctf_dynamic_type (input, type)) != NULL) 759 slice = (ctf_slice_t *) dtd->dtd_vlen; 760 else 761 slice = (ctf_slice_t *) ((uintptr_t) tp + increment); 762 763 ctf_dedup_sha1_add (&hash, &slice->cts_offset, 764 sizeof (slice->cts_offset), "slice offset", depth); 765 ctf_dedup_sha1_add (&hash, &slice->cts_bits, 766 sizeof (slice->cts_bits), "slice bits", depth); 767 break; 768 } 769 770 case CTF_K_ARRAY: 771 { 772 ctf_arinfo_t ar; 773 774 if (ctf_array_info (input, type, &ar) < 0) 775 { 776 whaterr = N_("error getting array info"); 777 goto input_err; 778 } 779 780 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, 781 ar.ctr_contents, flags, depth, 782 populate_fun)) == NULL) 783 { 784 whaterr = N_("error doing array contents type hashing"); 785 goto err; 786 } 787 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array contents", 788 depth); 789 ADD_CITER (citers, hval); 790 791 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, 792 ar.ctr_index, flags, depth, 793 populate_fun)) == NULL) 794 { 795 whaterr = N_("error doing array index type hashing"); 796 goto err; 797 } 798 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array index", 799 depth); 800 ctf_dedup_sha1_add (&hash, &ar.ctr_nelems, sizeof (ar.ctr_nelems), 801 "element count", depth); 802 ADD_CITER (citers, hval); 803 804 break; 805 } 806 case CTF_K_FUNCTION: 807 { 808 ctf_funcinfo_t fi; 809 ctf_id_t *args; 810 uint32_t j; 811 812 if (ctf_func_type_info (input, type, &fi) < 0) 813 { 814 whaterr = N_("error getting func type info"); 815 goto input_err; 816 } 817 818 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, 819 fi.ctc_return, flags, depth, 820 populate_fun)) == NULL) 821 { 822 whaterr = N_("error getting func return type"); 823 goto err; 824 } 825 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func return", 826 depth); 827 ctf_dedup_sha1_add (&hash, &fi.ctc_argc, sizeof (fi.ctc_argc), 828 "func argc", depth); 829 ctf_dedup_sha1_add (&hash, &fi.ctc_flags, sizeof (fi.ctc_flags), 830 "func flags", depth); 831 ADD_CITER (citers, hval); 832 833 if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) 834 { 835 err = ENOMEM; 836 whaterr = N_("error doing memory allocation"); 837 goto err; 838 } 839 840 if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0) 841 { 842 free (args); 843 whaterr = N_("error getting func arg type"); 844 goto input_err; 845 } 846 for (j = 0; j < fi.ctc_argc; j++) 847 { 848 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, 849 args[j], flags, depth, 850 populate_fun)) == NULL) 851 { 852 free (args); 853 whaterr = N_("error doing func arg type hashing"); 854 goto err; 855 } 856 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func arg type", 857 depth); 858 ADD_CITER (citers, hval); 859 } 860 free (args); 861 break; 862 } 863 case CTF_K_ENUM: 864 { 865 int val; 866 const char *ename; 867 868 ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), 869 "enum size", depth); 870 while ((ename = ctf_enum_next (input, type, &i, &val)) != NULL) 871 { 872 ctf_dedup_sha1_add (&hash, ename, strlen (ename) + 1, "enumerator", 873 depth); 874 ctf_dedup_sha1_add (&hash, &val, sizeof (val), "enumerand", depth); 875 } 876 if (ctf_errno (input) != ECTF_NEXT_END) 877 { 878 whaterr = N_("error doing enum member iteration"); 879 goto input_err; 880 } 881 break; 882 } 883 /* Top-level only. */ 884 case CTF_K_STRUCT: 885 case CTF_K_UNION: 886 { 887 ssize_t offset; 888 const char *mname; 889 ctf_id_t membtype; 890 ssize_t size; 891 892 ctf_get_ctt_size (input, tp, &size, NULL); 893 ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "struct size", 894 depth); 895 896 while ((offset = ctf_member_next (input, type, &i, &mname, &membtype, 897 0)) >= 0) 898 { 899 if (mname == NULL) 900 mname = ""; 901 ctf_dedup_sha1_add (&hash, mname, strlen (mname) + 1, 902 "member name", depth); 903 904 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 905 ctf_dprintf ("%lu: Traversing to member %s\n", depth, mname); 906 #endif 907 if ((hval = ctf_dedup_hash_type (fp, input, inputs, input_num, 908 membtype, flags, depth, 909 populate_fun)) == NULL) 910 { 911 whaterr = N_("error doing struct/union member type hashing"); 912 goto iterr; 913 } 914 915 ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "member hash", 916 depth); 917 ctf_dedup_sha1_add (&hash, &offset, sizeof (offset), "member offset", 918 depth); 919 ADD_CITER (citers, hval); 920 } 921 if (ctf_errno (input) != ECTF_NEXT_END) 922 { 923 whaterr = N_("error doing struct/union member iteration"); 924 goto input_err; 925 } 926 break; 927 } 928 default: 929 whaterr = N_("error: unknown type kind"); 930 goto err; 931 } 932 ctf_sha1_fini (&hash, hashbuf); 933 934 if ((hval = intern (fp, strdup (hashbuf))) == NULL) 935 { 936 whaterr = N_("cannot intern hash"); 937 goto oom; 938 } 939 940 /* Populate the citers for this type's subtypes, now the hash for the type 941 itself is known. */ 942 whaterr = N_("error tracking citers"); 943 944 if (citer) 945 { 946 ctf_dynset_t *citer_hashes; 947 948 if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL) 949 goto oom; 950 if (ctf_dynset_cinsert (citer_hashes, hval) < 0) 951 goto oom; 952 } 953 else if (citers) 954 { 955 const void *k; 956 957 while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0) 958 { 959 ctf_dynset_t *citer_hashes; 960 citer = (const char *) k; 961 962 if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL) 963 goto oom; 964 965 if (ctf_dynset_exists (citer_hashes, hval, NULL)) 966 continue; 967 if (ctf_dynset_cinsert (citer_hashes, hval) < 0) 968 goto oom; 969 } 970 if (err != ECTF_NEXT_END) 971 goto err; 972 ctf_dynset_destroy (citers); 973 } 974 975 return hval; 976 977 iterr: 978 ctf_next_destroy (i); 979 input_err: 980 err = ctf_errno (input); 981 err: 982 ctf_sha1_fini (&hash, NULL); 983 ctf_err_warn (fp, 0, err, _("%s (%i): %s: during type hashing for type %lx, " 984 "kind %i"), ctf_link_input_name (input), 985 input_num, gettext (whaterr), type, kind); 986 return NULL; 987 oom: 988 ctf_set_errno (fp, errno); 989 ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing for type %lx, " 990 "kind %i"), ctf_link_input_name (input), 991 input_num, gettext (whaterr), type, kind); 992 return NULL; 993 } 994 995 /* Hash a TYPE in the INPUT: FP is the eventual output, where the ctf_dedup 996 state is stored. INPUT_NUM is the number of this input in the set of inputs. 997 Record its hash in FP's cd_type_hashes once it is known. 998 999 (The flags argument currently accepts only the flag 1000 CTF_DEDUP_HASH_INTERNAL_CHILD, an implementation detail used to prevent 1001 struct/union hashing in recursive traversals below the TYPE.) 1002 1003 We use the CTF API rather than direct access wherever possible, because types 1004 that appear identical through the API should be considered identical, with 1005 one exception: slices should only be considered identical to other slices, 1006 not to the corresponding unsliced type. 1007 1008 The POPULATE_FUN is a mandatory hook that populates other mappings with each 1009 type we see (excepting types that are recursively hashed as stubs). The 1010 caller should not rely on the order of calls to this hook, though it will be 1011 called at least once for every non-stub reference to every type. 1012 1013 Returns a hash value (an atom), or NULL on error. */ 1014 1015 static const char * 1016 ctf_dedup_hash_type (ctf_dict_t *fp, ctf_dict_t *input, 1017 ctf_dict_t **inputs, int input_num, ctf_id_t type, 1018 int flags, unsigned long depth, 1019 int (*populate_fun) (ctf_dict_t *fp, 1020 ctf_dict_t *input, 1021 ctf_dict_t **inputs, 1022 int input_num, 1023 ctf_id_t type, 1024 int isroot, 1025 void *id, 1026 const char *decorated_name, 1027 const char *hash)) 1028 { 1029 ctf_dedup_t *d = &fp->ctf_dedup; 1030 const ctf_type_t *tp; 1031 void *type_id; 1032 const char *hval = NULL; 1033 const char *name; 1034 const char *whaterr; 1035 const char *decorated = NULL; 1036 uint32_t kind, fwdkind; 1037 int isroot; 1038 1039 depth++; 1040 1041 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1042 ctf_dprintf ("%lu: ctf_dedup_hash_type (%i, %lx, flags %x)\n", depth, input_num, type, flags); 1043 #endif 1044 1045 /* The unimplemented type doesn't really exist, but must be noted in parent 1046 hashes: so it gets a fixed, arbitrary hash. */ 1047 if (type == 0) 1048 return "00000000000000000000"; 1049 1050 /* Possible optimization: if the input type is in the parent type space, just 1051 copy recursively-cited hashes from the parent's types into the output 1052 mapping rather than rehashing them. */ 1053 1054 type_id = CTF_DEDUP_GID (fp, input_num, type); 1055 1056 if ((tp = ctf_lookup_by_id (&input, type)) == NULL) 1057 { 1058 ctf_set_errno (fp, ctf_errno (input)); 1059 ctf_err_warn (fp, 0, 0, _("%s (%i): lookup failure for type %lx: " 1060 "flags %x"), ctf_link_input_name (input), 1061 input_num, type, flags); 1062 return NULL; /* errno is set for us. */ 1063 } 1064 1065 kind = LCTF_INFO_KIND (input, tp->ctt_info); 1066 name = ctf_strraw (input, tp->ctt_name); 1067 isroot = LCTF_INFO_ISROOT (input, tp->ctt_info); 1068 1069 if (tp->ctt_name == 0 || !name || name[0] == '\0') 1070 name = NULL; 1071 1072 /* Decorate the name appropriately for the namespace it appears in: forwards 1073 appear in the namespace of their referent. */ 1074 1075 fwdkind = kind; 1076 if (name) 1077 { 1078 if (kind == CTF_K_FORWARD) 1079 fwdkind = tp->ctt_type; 1080 1081 if ((decorated = ctf_decorate_type_name (fp, name, fwdkind)) == NULL) 1082 return NULL; /* errno is set for us. */ 1083 } 1084 1085 /* If not hashing a stub, we can rely on various sorts of caches. 1086 1087 Optimization opportunity: we may be able to avoid calling the populate_fun 1088 sometimes here. */ 1089 1090 if (!ctf_dedup_is_stub (name, kind, fwdkind, flags)) 1091 { 1092 if ((hval = ctf_dynhash_lookup (d->cd_type_hashes, type_id)) != NULL) 1093 { 1094 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1095 ctf_dprintf ("%lu: Known hash for ID %i/%lx: %s\n", depth, input_num, 1096 type, hval); 1097 #endif 1098 populate_fun (fp, input, inputs, input_num, type, isroot, type_id, 1099 decorated, hval); 1100 1101 return hval; 1102 } 1103 } 1104 1105 /* We have never seen this type before, and must figure out its hash and the 1106 hashes of the types it cites. 1107 1108 Hash this type, and call ourselves recursively. (The hashing part is 1109 optional, and is disabled if overidden_hval is set.) */ 1110 1111 if ((hval = ctf_dedup_rhash_type (fp, input, inputs, input_num, 1112 type, type_id, tp, name, decorated, 1113 kind, flags, depth, populate_fun)) == NULL) 1114 return NULL; /* errno is set for us. */ 1115 1116 /* The hash of this type is now known: record it unless caching is unsafe 1117 because the hash value will change later. This will be the final storage 1118 of this type's hash, so we call the population function on it. */ 1119 1120 if (!ctf_dedup_is_stub (name, kind, fwdkind, flags)) 1121 { 1122 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1123 ctf_dprintf ("Caching %lx, ID %p (%s), %s in final location\n", type, 1124 type_id, name ? name : "", hval); 1125 #endif 1126 1127 if (ctf_dynhash_cinsert (d->cd_type_hashes, type_id, hval) < 0) 1128 { 1129 whaterr = N_("error hash caching"); 1130 goto oom; 1131 } 1132 1133 if (populate_fun (fp, input, inputs, input_num, type, isroot, type_id, 1134 decorated, hval) < 0) 1135 { 1136 whaterr = N_("error calling population function"); 1137 goto err; /* errno is set for us. */ 1138 } 1139 } 1140 1141 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1142 ctf_dprintf ("%lu: Returning final hash for ID %i/%lx: %s\n", depth, 1143 input_num, type, hval); 1144 #endif 1145 return hval; 1146 1147 oom: 1148 ctf_set_errno (fp, errno); 1149 err: 1150 ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing, " 1151 "type %lx, kind %i"), 1152 ctf_link_input_name (input), input_num, 1153 gettext (whaterr), type, kind); 1154 return NULL; 1155 } 1156 1157 static int 1158 ctf_dedup_count_name (ctf_dict_t *fp, const char *name, void *id); 1159 1160 /* Populate a number of useful mappings not directly used by the hashing 1161 machinery: the output mapping, the cd_name_counts mapping from name -> hash 1162 -> count of hashval deduplication state for a given hashed type; the 1163 cd_output_first_gid mapping; and the cd_nonroot_consistency mapping. */ 1164 1165 static int 1166 ctf_dedup_populate_mappings (ctf_dict_t *fp, ctf_dict_t *input _libctf_unused_, 1167 ctf_dict_t **inputs _libctf_unused_, 1168 int input_num _libctf_unused_, 1169 ctf_id_t type _libctf_unused_, int isroot, 1170 void *id, const char *decorated_name, 1171 const char *hval) 1172 { 1173 ctf_dedup_t *d = &fp->ctf_dedup; 1174 ctf_dynset_t *type_ids; 1175 void *root_visible; 1176 1177 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1178 ctf_dprintf ("Hash %s, %s, into output mapping for %i/%lx @ %s\n", 1179 hval, decorated_name ? decorated_name : "(unnamed)", 1180 input_num, type, ctf_link_input_name (input)); 1181 1182 const char *orig_hval; 1183 1184 /* Make sure we never map a single GID to multiple hash values. */ 1185 1186 if ((orig_hval = ctf_dynhash_lookup (d->cd_output_mapping_guard, id)) != NULL) 1187 { 1188 /* We can rely on pointer identity here, since all hashes are 1189 interned. */ 1190 if (!ctf_assert (fp, orig_hval == hval)) 1191 return -1; 1192 } 1193 else 1194 if (ctf_dynhash_cinsert (d->cd_output_mapping_guard, id, hval) < 0) 1195 return ctf_set_errno (fp, errno); 1196 #endif 1197 1198 /* Record the type in the output mapping: if this is the first time this type 1199 has been seen, also record it in the cd_output_first_gid. Because we 1200 traverse types in TU order and we do not merge types after the hashing 1201 phase, this will be the lowest TU this type ever appears in. */ 1202 1203 if ((type_ids = ctf_dynhash_lookup (d->cd_output_mapping, 1204 hval)) == NULL) 1205 { 1206 if (ctf_dynhash_cinsert (d->cd_output_first_gid, hval, id) < 0) 1207 return ctf_set_errno (fp, errno); 1208 1209 if ((type_ids = ctf_dynset_create (htab_hash_pointer, 1210 htab_eq_pointer, 1211 NULL)) == NULL) 1212 return ctf_set_errno (fp, errno); 1213 if (ctf_dynhash_insert (d->cd_output_mapping, (void *) hval, 1214 type_ids) < 0) 1215 { 1216 ctf_dynset_destroy (type_ids); 1217 return ctf_set_errno (fp, errno); 1218 } 1219 } 1220 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1221 { 1222 /* Verify that all types with this hash are of the same kind, and that the 1223 first TU a type was seen in never falls. */ 1224 1225 int err; 1226 const void *one_id; 1227 ctf_next_t *i = NULL; 1228 int orig_kind = ctf_type_kind_unsliced (input, type); 1229 int orig_first_tu; 1230 1231 orig_first_tu = CTF_DEDUP_GID_TO_INPUT 1232 (ctf_dynhash_lookup (d->cd_output_first_gid, hval)); 1233 if (!ctf_assert (fp, orig_first_tu <= CTF_DEDUP_GID_TO_INPUT (id))) 1234 return -1; 1235 1236 while ((err = ctf_dynset_cnext (type_ids, &i, &one_id)) == 0) 1237 { 1238 ctf_dict_t *foo = inputs[CTF_DEDUP_GID_TO_INPUT (one_id)]; 1239 ctf_id_t bar = CTF_DEDUP_GID_TO_TYPE (one_id); 1240 if (ctf_type_kind_unsliced (foo, bar) != orig_kind) 1241 { 1242 ctf_err_warn (fp, 1, 0, "added wrong kind to output mapping " 1243 "for hash %s named %s: %p/%lx from %s is " 1244 "kind %i, but newly-added %p/%lx from %s is " 1245 "kind %i", hval, 1246 decorated_name ? decorated_name : "(unnamed)", 1247 (void *) foo, bar, 1248 ctf_link_input_name (foo), 1249 ctf_type_kind_unsliced (foo, bar), 1250 (void *) input, type, 1251 ctf_link_input_name (input), orig_kind); 1252 if (!ctf_assert (fp, ctf_type_kind_unsliced (foo, bar) 1253 == orig_kind)) 1254 return -1; 1255 } 1256 } 1257 if (err != ECTF_NEXT_END) 1258 return ctf_set_errno (fp, err); 1259 } 1260 #endif 1261 1262 /* Track the consistency of the non-root flag for this type. 1263 0: all root-visible; 1: all non-root-visible; 2: inconsistent. */ 1264 1265 if (!ctf_dynhash_lookup_kv (d->cd_nonroot_consistency, hval, NULL, 1266 &root_visible)) 1267 { 1268 if (isroot) 1269 root_visible = (void *) 0; 1270 else 1271 root_visible = (void *) 1; 1272 1273 if (ctf_dynhash_cinsert (d->cd_nonroot_consistency, hval, root_visible) < 0) 1274 return ctf_set_errno (fp, errno); 1275 } 1276 else 1277 { 1278 if (((uintptr_t) root_visible == 0 && !isroot) 1279 || ((uintptr_t) root_visible == 1 && isroot)) 1280 { 1281 root_visible = (void *) 2; 1282 1283 if (ctf_dynhash_cinsert (d->cd_nonroot_consistency, hval, root_visible) < 0) 1284 return ctf_set_errno (fp, errno); 1285 } 1286 } 1287 1288 /* This function will be repeatedly called for the same types many times: 1289 don't waste time reinserting the same keys in that case. */ 1290 if (!ctf_dynset_exists (type_ids, id, NULL) 1291 && ctf_dynset_insert (type_ids, id) < 0) 1292 return ctf_set_errno (fp, errno); 1293 1294 if (ctf_type_kind_unsliced (input, type) == CTF_K_ENUM) 1295 { 1296 ctf_next_t *i = NULL; 1297 const char *enumerator; 1298 1299 while ((enumerator = ctf_enum_next (input, type, &i, NULL)) != NULL) 1300 { 1301 if (ctf_dedup_count_name (fp, enumerator, id) < 0) 1302 { 1303 ctf_next_destroy (i); 1304 return -1; 1305 } 1306 } 1307 if (ctf_errno (input) != ECTF_NEXT_END) 1308 return ctf_set_errno (fp, ctf_errno (input)); 1309 } 1310 1311 /* The rest only needs to happen for types with names. */ 1312 if (!decorated_name) 1313 return 0; 1314 1315 if (ctf_dedup_count_name (fp, decorated_name, id) < 0) 1316 return -1; /* errno is set for us. */ 1317 1318 return 0; 1319 } 1320 1321 /* Clean up things no longer needed after hashing is over. */ 1322 static int 1323 ctf_dedup_hash_type_fini (ctf_dict_t *fp) 1324 { 1325 ctf_next_t *i = NULL; 1326 int err; 1327 void *hval, *root_visible; 1328 1329 /* Clean up cd_nonroot_consistency. We only care now about types we are sure 1330 are non-root-visible everywhere: root-visible types and types that are 1331 sometimes root-visible and sometimes not are treated as root-visible. */ 1332 1333 while ((err = ctf_dynhash_next (fp->ctf_dedup.cd_nonroot_consistency, &i, 1334 &hval, &root_visible)) == 0) 1335 { 1336 if ((uintptr_t) root_visible != 1) 1337 ctf_dynhash_next_remove (&i); 1338 } 1339 if (err != ECTF_NEXT_END) 1340 { 1341 ctf_err_warn (fp, 0, err, _("iteration failure cleaning up type hashes")); 1342 return ctf_set_errno (fp, err); 1343 } 1344 1345 return 0; 1346 } 1347 1348 static int 1349 ctf_dedup_count_name (ctf_dict_t *fp, const char *name, void *id) 1350 { 1351 ctf_dedup_t *d = &fp->ctf_dedup; 1352 ctf_dynhash_t *name_counts; 1353 long int count; 1354 const char *hval; 1355 1356 /* Count the number of occurrences of the hash value for this GID. */ 1357 1358 hval = ctf_dynhash_lookup (d->cd_type_hashes, id); 1359 1360 /* Mapping from name -> hash(hashval, count) not already present? */ 1361 if ((name_counts = ctf_dynhash_lookup (d->cd_name_counts, name)) == NULL) 1362 { 1363 if ((name_counts = ctf_dynhash_create (ctf_hash_string, 1364 ctf_hash_eq_string, 1365 NULL, NULL)) == NULL) 1366 return ctf_set_errno (fp, errno); 1367 if (ctf_dynhash_cinsert (d->cd_name_counts, name, name_counts) < 0) 1368 { 1369 ctf_dynhash_destroy (name_counts); 1370 return ctf_set_errno (fp, errno); 1371 } 1372 } 1373 1374 /* This will, conveniently, return NULL (i.e. 0) for a new entry. */ 1375 count = (long int) (uintptr_t) ctf_dynhash_lookup (name_counts, hval); 1376 1377 if (ctf_dynhash_cinsert (name_counts, hval, 1378 (const void *) (uintptr_t) (count + 1)) < 0) 1379 return ctf_set_errno (fp, errno); 1380 1381 return 0; 1382 } 1383 1384 /* Mark a single hash as corresponding to a conflicting type. Mark all types 1385 that cite it as conflicting as well, terminating the recursive walk only when 1386 types that are already conflicted or types do not cite other types are seen. 1387 (Tagged structures and unions do not appear in the cd_citers graph, so the 1388 walk also terminates there, since any reference to a conflicting structure is 1389 just going to reference an unconflicting forward instead: see 1390 ctf_dedup_maybe_synthesize_forward.) */ 1391 1392 static int 1393 ctf_dedup_mark_conflicting_hash (ctf_dict_t *fp, const char *hval) 1394 { 1395 ctf_dedup_t *d = &fp->ctf_dedup; 1396 ctf_next_t *i = NULL; 1397 int err; 1398 const void *k; 1399 ctf_dynset_t *citers; 1400 1401 /* Mark conflicted if not already so marked. */ 1402 if (ctf_dynset_exists (d->cd_conflicting_types, hval, NULL)) 1403 return 0; 1404 1405 ctf_dprintf ("Marking %s as conflicted\n", hval); 1406 1407 if (ctf_dynset_cinsert (d->cd_conflicting_types, hval) < 0) 1408 { 1409 ctf_dprintf ("Out of memory marking %s as conflicted\n", hval); 1410 return ctf_set_errno (fp, errno); 1411 } 1412 1413 /* If any types cite this type, mark them conflicted too. */ 1414 if ((citers = ctf_dynhash_lookup (d->cd_citers, hval)) == NULL) 1415 return 0; 1416 1417 while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0) 1418 { 1419 const char *hv = (const char *) k; 1420 1421 if (ctf_dynset_exists (d->cd_conflicting_types, hv, NULL)) 1422 continue; 1423 1424 if (ctf_dedup_mark_conflicting_hash (fp, hv) < 0) 1425 { 1426 ctf_next_destroy (i); 1427 return -1; /* errno is set for us. */ 1428 } 1429 } 1430 if (err != ECTF_NEXT_END) 1431 return ctf_set_errno (fp, err); 1432 1433 return 0; 1434 } 1435 1436 /* Look up a type kind from the output mapping, given a type hash value. */ 1437 static int 1438 ctf_dedup_hash_kind (ctf_dict_t *fp, ctf_dict_t **inputs, const char *hash) 1439 { 1440 ctf_dedup_t *d = &fp->ctf_dedup; 1441 void *id; 1442 ctf_dynset_t *type_ids; 1443 1444 /* Precondition: the output mapping is populated. */ 1445 if (!ctf_assert (fp, ctf_dynhash_elements (d->cd_output_mapping) > 0)) 1446 return -1; 1447 1448 /* Look up some GID from the output hash for this type. (They are all 1449 identical, so we can pick any). Don't assert if someone calls this 1450 function wrongly, but do assert if the output mapping knows about the hash, 1451 but has nothing associated with it. */ 1452 1453 type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hash); 1454 if (!type_ids) 1455 { 1456 ctf_dprintf ("Looked up type kind by nonexistent hash %s.\n", hash); 1457 return ctf_set_errno (fp, ECTF_INTERNAL); 1458 } 1459 id = ctf_dynset_lookup_any (type_ids); 1460 if (!ctf_assert (fp, id)) 1461 return -1; 1462 1463 return ctf_type_kind_unsliced (inputs[CTF_DEDUP_GID_TO_INPUT (id)], 1464 CTF_DEDUP_GID_TO_TYPE (id)); 1465 } 1466 1467 /* Used to keep a count of types: i.e. distinct type hash values. */ 1468 typedef struct ctf_dedup_type_counter 1469 { 1470 ctf_dict_t *fp; 1471 ctf_dict_t **inputs; 1472 int num_non_forwards; 1473 } ctf_dedup_type_counter_t; 1474 1475 /* Add to the type counter for one name entry from the cd_name_counts. */ 1476 static int 1477 ctf_dedup_count_types (void *key_, void *value _libctf_unused_, void *arg_) 1478 { 1479 const char *hval = (const char *) key_; 1480 int kind; 1481 ctf_dedup_type_counter_t *arg = (ctf_dedup_type_counter_t *) arg_; 1482 1483 kind = ctf_dedup_hash_kind (arg->fp, arg->inputs, hval); 1484 1485 /* We rely on ctf_dedup_hash_kind setting the fp to -ECTF_INTERNAL on error to 1486 smuggle errors out of here. */ 1487 1488 if (kind != CTF_K_FORWARD) 1489 { 1490 arg->num_non_forwards++; 1491 ctf_dprintf ("Counting hash %s: kind %i: num_non_forwards is %i\n", 1492 hval, kind, arg->num_non_forwards); 1493 } 1494 1495 /* We only need to know if there is more than one non-forward (an ambiguous 1496 type): don't waste time iterating any more than needed to figure that 1497 out. */ 1498 1499 if (arg->num_non_forwards > 1) 1500 return 1; 1501 1502 return 0; 1503 } 1504 1505 /* Detect name ambiguity and mark ambiguous names as conflicting, other than the 1506 most common. */ 1507 static int 1508 ctf_dedup_detect_name_ambiguity (ctf_dict_t *fp, ctf_dict_t **inputs) 1509 { 1510 ctf_dedup_t *d = &fp->ctf_dedup; 1511 ctf_next_t *i = NULL; 1512 void *k; 1513 void *v; 1514 int err; 1515 const char *whaterr; 1516 1517 /* Go through cd_name_counts for all CTF namespaces in turn. */ 1518 1519 while ((err = ctf_dynhash_next (d->cd_name_counts, &i, &k, &v)) == 0) 1520 { 1521 const char *decorated = (const char *) k; 1522 ctf_dynhash_t *name_counts = (ctf_dynhash_t *) v; 1523 ctf_next_t *j = NULL; 1524 1525 /* If this is a forwardable kind or a forward (which we can tell without 1526 consulting the type because its decorated name has a space as its 1527 second character: see ctf_decorate_type_name), we are only interested 1528 in whether this name has many hashes associated with it: any such name 1529 is necessarily ambiguous, and types with that name are conflicting. 1530 Once we know whether this is true, we can skip to the next name: so use 1531 ctf_dynhash_iter_find for efficiency. */ 1532 1533 if (decorated[0] != '\0' && decorated[1] == ' ') 1534 { 1535 ctf_dedup_type_counter_t counters = { fp, inputs, 0 }; 1536 1537 ctf_dynhash_iter_find (name_counts, ctf_dedup_count_types, &counters); 1538 1539 /* Check for assertion failure and pass it up. */ 1540 if (ctf_errno (fp) == ECTF_INTERNAL) 1541 goto assert_err; 1542 1543 if (counters.num_non_forwards > 1) 1544 { 1545 const void *hval_; 1546 1547 while ((err = ctf_dynhash_cnext (name_counts, &j, &hval_, NULL)) == 0) 1548 { 1549 const char *hval = (const char *) hval_; 1550 ctf_dynset_t *type_ids; 1551 void *id; 1552 int kind; 1553 1554 /* Dig through the types in this hash to find the non-forwards 1555 and mark them ambiguous. */ 1556 1557 type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); 1558 1559 /* Nonexistent? Must be a forward with no referent. */ 1560 if (!type_ids) 1561 continue; 1562 1563 id = ctf_dynset_lookup_any (type_ids); 1564 1565 kind = ctf_type_kind (inputs[CTF_DEDUP_GID_TO_INPUT (id)], 1566 CTF_DEDUP_GID_TO_TYPE (id)); 1567 1568 if (kind != CTF_K_FORWARD) 1569 { 1570 ctf_dprintf ("Marking %p, with hash %s, conflicting: one " 1571 "of many non-forward GIDs for %s\n", id, 1572 hval, (char *) k); 1573 ctf_dedup_mark_conflicting_hash (fp, hval); 1574 } 1575 } 1576 if (err != ECTF_NEXT_END) 1577 { 1578 whaterr = N_("error marking conflicting structs/unions"); 1579 goto iterr; 1580 } 1581 } 1582 } 1583 else 1584 { 1585 /* This is an ordinary type. Find the most common type with this 1586 name, and mark it unconflicting: all others are conflicting. (We 1587 cannot do this sort of popularity contest with forwardable types 1588 because any forwards to that type would be immediately unified with 1589 the most-popular type on insertion, and we want conflicting structs 1590 et al to have all forwards left intact, so the user is notified 1591 that this type is conflicting. TODO: improve this in future by 1592 setting such forwards non-root-visible.) 1593 1594 If multiple distinct types are "most common", pick the one that 1595 appears first on the link line, and within that, the one with the 1596 lowest type ID. (See sort_output_mapping.) */ 1597 1598 const void *key; 1599 const void *count; 1600 const char *hval; 1601 long max_hcount = -1; 1602 void *max_gid = NULL; 1603 const char *max_hval = NULL; 1604 1605 if (ctf_dynhash_elements (name_counts) <= 1) 1606 continue; 1607 1608 /* First find the most common. */ 1609 while ((err = ctf_dynhash_cnext (name_counts, &j, &key, &count)) == 0) 1610 { 1611 hval = (const char *) key; 1612 1613 if ((long int) (uintptr_t) count > max_hcount) 1614 { 1615 max_hcount = (long int) (uintptr_t) count; 1616 max_hval = hval; 1617 max_gid = ctf_dynhash_lookup (d->cd_output_first_gid, hval); 1618 } 1619 else if ((long int) (uintptr_t) count == max_hcount) 1620 { 1621 void *gid = ctf_dynhash_lookup (d->cd_output_first_gid, hval); 1622 1623 if (CTF_DEDUP_GID_TO_INPUT(gid) < CTF_DEDUP_GID_TO_INPUT(max_gid) 1624 || (CTF_DEDUP_GID_TO_INPUT(gid) == CTF_DEDUP_GID_TO_INPUT(max_gid) 1625 && CTF_DEDUP_GID_TO_TYPE(gid) < CTF_DEDUP_GID_TO_TYPE(max_gid))) 1626 { 1627 max_hval = hval; 1628 max_gid = ctf_dynhash_lookup (d->cd_output_first_gid, hval); 1629 } 1630 } 1631 } 1632 if (err != ECTF_NEXT_END) 1633 { 1634 whaterr = N_("error finding commonest conflicting type"); 1635 goto iterr; 1636 } 1637 1638 /* Mark all the others as conflicting. */ 1639 while ((err = ctf_dynhash_cnext (name_counts, &j, &key, NULL)) == 0) 1640 { 1641 hval = (const char *) key; 1642 if (strcmp (max_hval, hval) == 0) 1643 continue; 1644 1645 ctf_dprintf ("Marking %s, an uncommon hash for %s, conflicting\n", 1646 hval, (const char *) k); 1647 if (ctf_dedup_mark_conflicting_hash (fp, hval) < 0) 1648 { 1649 whaterr = N_("error marking hashes as conflicting"); 1650 goto err; 1651 } 1652 } 1653 if (err != ECTF_NEXT_END) 1654 { 1655 whaterr = N_("marking uncommon conflicting types"); 1656 goto iterr; 1657 } 1658 } 1659 } 1660 if (err != ECTF_NEXT_END) 1661 { 1662 whaterr = N_("scanning for ambiguous names"); 1663 goto iterr; 1664 } 1665 1666 return 0; 1667 1668 err: 1669 ctf_next_destroy (i); 1670 ctf_err_warn (fp, 0, 0, "%s", gettext (whaterr)); 1671 return -1; /* errno is set for us. */ 1672 1673 iterr: 1674 ctf_err_warn (fp, 0, err, _("iteration failed: %s"), gettext (whaterr)); 1675 return ctf_set_errno (fp, err); 1676 1677 assert_err: 1678 ctf_next_destroy (i); 1679 return -1; /* errno is set for us. */ 1680 } 1681 1682 /* Initialize the deduplication machinery. */ 1683 1684 static int 1685 ctf_dedup_init (ctf_dict_t *fp) 1686 { 1687 ctf_dedup_t *d = &fp->ctf_dedup; 1688 size_t i; 1689 1690 if (ctf_dedup_atoms_init (fp) < 0) 1691 goto oom; 1692 1693 #if IDS_NEED_ALLOCATION 1694 if ((d->cd_id_to_dict_t = ctf_dynhash_create (ctf_hash_type_id_key, 1695 ctf_hash_eq_type_id_key, 1696 free, NULL)) == NULL) 1697 goto oom; 1698 #endif 1699 1700 for (i = 0; i < 4; i++) 1701 { 1702 if ((d->cd_decorated_names[i] = ctf_dynhash_create (ctf_hash_string, 1703 ctf_hash_eq_string, 1704 NULL, NULL)) == NULL) 1705 goto oom; 1706 } 1707 1708 if ((d->cd_name_counts 1709 = ctf_dynhash_create (ctf_hash_string, 1710 ctf_hash_eq_string, NULL, 1711 (ctf_hash_free_fun) ctf_dynhash_destroy)) == NULL) 1712 goto oom; 1713 1714 if ((d->cd_type_hashes 1715 = ctf_dynhash_create (ctf_hash_integer, 1716 ctf_hash_eq_integer, 1717 NULL, NULL)) == NULL) 1718 goto oom; 1719 1720 if ((d->cd_struct_origin 1721 = ctf_dynhash_create (ctf_hash_string, 1722 ctf_hash_eq_string, 1723 NULL, NULL)) == NULL) 1724 goto oom; 1725 1726 if ((d->cd_citers 1727 = ctf_dynhash_create (ctf_hash_string, 1728 ctf_hash_eq_string, NULL, 1729 (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL) 1730 goto oom; 1731 1732 if ((d->cd_output_mapping 1733 = ctf_dynhash_create (ctf_hash_string, 1734 ctf_hash_eq_string, NULL, 1735 (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL) 1736 goto oom; 1737 1738 if ((d->cd_output_first_gid 1739 = ctf_dynhash_create (ctf_hash_string, 1740 ctf_hash_eq_string, 1741 NULL, NULL)) == NULL) 1742 goto oom; 1743 1744 if ((d->cd_nonroot_consistency 1745 = ctf_dynhash_create (ctf_hash_string, 1746 ctf_hash_eq_string, 1747 NULL, NULL)) == NULL) 1748 goto oom; 1749 1750 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1751 if ((d->cd_output_mapping_guard 1752 = ctf_dynhash_create (ctf_hash_integer, 1753 ctf_hash_eq_integer, NULL, NULL)) == NULL) 1754 goto oom; 1755 #endif 1756 1757 if ((d->cd_input_nums 1758 = ctf_dynhash_create (ctf_hash_integer, 1759 ctf_hash_eq_integer, 1760 NULL, NULL)) == NULL) 1761 goto oom; 1762 1763 if ((d->cd_emission_struct_members 1764 = ctf_dynhash_create (ctf_hash_integer, 1765 ctf_hash_eq_integer, 1766 NULL, NULL)) == NULL) 1767 goto oom; 1768 1769 if ((d->cd_conflicting_types 1770 = ctf_dynset_create (htab_hash_string, 1771 htab_eq_string, NULL)) == NULL) 1772 goto oom; 1773 1774 return 0; 1775 1776 oom: 1777 ctf_err_warn (fp, 0, ENOMEM, _("ctf_dedup_init: cannot initialize: " 1778 "out of memory")); 1779 return ctf_set_errno (fp, ENOMEM); 1780 } 1781 1782 /* No ctf_dedup calls are allowed after this call other than starting a new 1783 deduplication via ctf_dedup (not even ctf_dedup_type_mapping lookups). */ 1784 void 1785 ctf_dedup_fini (ctf_dict_t *fp, ctf_dict_t **outputs, uint32_t noutputs) 1786 { 1787 ctf_dedup_t *d = &fp->ctf_dedup; 1788 size_t i; 1789 1790 /* ctf_dedup_atoms is kept across links. */ 1791 #if IDS_NEED_ALLOCATION 1792 ctf_dynhash_destroy (d->cd_id_to_dict_t); 1793 #endif 1794 for (i = 0; i < 4; i++) 1795 ctf_dynhash_destroy (d->cd_decorated_names[i]); 1796 ctf_dynhash_destroy (d->cd_name_counts); 1797 ctf_dynhash_destroy (d->cd_type_hashes); 1798 ctf_dynhash_destroy (d->cd_struct_origin); 1799 ctf_dynhash_destroy (d->cd_citers); 1800 ctf_dynhash_destroy (d->cd_output_mapping); 1801 ctf_dynhash_destroy (d->cd_output_first_gid); 1802 ctf_dynhash_destroy (d->cd_nonroot_consistency); 1803 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 1804 ctf_dynhash_destroy (d->cd_output_mapping_guard); 1805 #endif 1806 ctf_dynhash_destroy (d->cd_input_nums); 1807 ctf_dynhash_destroy (d->cd_emission_struct_members); 1808 ctf_dynset_destroy (d->cd_conflicting_types); 1809 1810 /* Free the per-output state. */ 1811 if (outputs) 1812 { 1813 for (i = 0; i < noutputs; i++) 1814 { 1815 ctf_dedup_t *od = &outputs[i]->ctf_dedup; 1816 ctf_dynhash_destroy (od->cd_output_emission_hashes); 1817 ctf_dynhash_destroy (od->cd_output_emission_conflicted_forwards); 1818 ctf_dict_close (od->cd_output); 1819 } 1820 } 1821 memset (d, 0, sizeof (ctf_dedup_t)); 1822 } 1823 1824 /* Return 1 if this type is cited by multiple input dictionaries. */ 1825 1826 static int 1827 ctf_dedup_multiple_input_dicts (ctf_dict_t *output, ctf_dict_t **inputs, 1828 const char *hval) 1829 { 1830 ctf_dedup_t *d = &output->ctf_dedup; 1831 ctf_dynset_t *type_ids; 1832 ctf_next_t *i = NULL; 1833 void *id; 1834 ctf_dict_t *found = NULL, *relative_found = NULL; 1835 const char *type_id; 1836 ctf_dict_t *input_fp; 1837 ctf_id_t input_id; 1838 const char *name; 1839 const char *decorated; 1840 int fwdkind; 1841 int multiple = 0; 1842 int err; 1843 1844 type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); 1845 if (!ctf_assert (output, type_ids)) 1846 return -1; 1847 1848 /* Scan across the IDs until we find proof that two disjoint dictionaries 1849 are referenced. Exit as soon as possible. Optimization opportunity, but 1850 possibly not worth it, given that this is only executed in 1851 CTF_LINK_SHARE_DUPLICATED mode. */ 1852 1853 while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0) 1854 { 1855 ctf_dict_t *fp = inputs[CTF_DEDUP_GID_TO_INPUT (id)]; 1856 1857 if (fp == found || fp == relative_found) 1858 continue; 1859 1860 if (!found) 1861 { 1862 found = fp; 1863 continue; 1864 } 1865 1866 if (!relative_found 1867 && (fp->ctf_parent == found || found->ctf_parent == fp)) 1868 { 1869 relative_found = fp; 1870 continue; 1871 } 1872 1873 multiple = 1; 1874 ctf_next_destroy (i); 1875 break; 1876 } 1877 if ((err != ECTF_NEXT_END) && (err != 0)) 1878 { 1879 ctf_err_warn (output, 0, err, _("iteration error " 1880 "propagating conflictedness")); 1881 return ctf_set_errno (output, err); 1882 } 1883 1884 if (multiple) 1885 return multiple; 1886 1887 /* This type itself does not appear in multiple input dicts: how about another 1888 related type with the same name (e.g. a forward if this is a struct, 1889 etc). */ 1890 1891 type_id = ctf_dynset_lookup_any (type_ids); 1892 if (!ctf_assert (output, type_id)) 1893 return -1; 1894 1895 input_fp = inputs[CTF_DEDUP_GID_TO_INPUT (type_id)]; 1896 input_id = CTF_DEDUP_GID_TO_TYPE (type_id); 1897 fwdkind = ctf_type_kind_forwarded (input_fp, input_id); 1898 name = ctf_type_name_raw (input_fp, input_id); 1899 1900 if ((fwdkind == CTF_K_STRUCT || fwdkind == CTF_K_UNION) 1901 && name[0] != '\0') 1902 { 1903 const void *origin; 1904 1905 if ((decorated = ctf_decorate_type_name (output, name, 1906 fwdkind)) == NULL) 1907 return -1; /* errno is set for us. */ 1908 1909 origin = ctf_dynhash_lookup (d->cd_struct_origin, decorated); 1910 if ((origin != NULL) && (CTF_DEDUP_GID_TO_INPUT (origin) < 0)) 1911 multiple = 1; 1912 } 1913 1914 return multiple; 1915 } 1916 1917 /* Demote unconflicting types which reference only one input, or which reference 1918 two inputs where one input is the parent of the other, into conflicting 1919 types. Only used if the link mode is CTF_LINK_SHARE_DUPLICATED. */ 1920 1921 static int 1922 ctf_dedup_conflictify_unshared (ctf_dict_t *output, ctf_dict_t **inputs) 1923 { 1924 ctf_dedup_t *d = &output->ctf_dedup; 1925 ctf_next_t *i = NULL; 1926 int err; 1927 const void *k; 1928 ctf_dynset_t *to_mark = NULL; 1929 1930 if ((to_mark = ctf_dynset_create (htab_hash_string, htab_eq_string, 1931 NULL)) == NULL) 1932 goto err_no; 1933 1934 while ((err = ctf_dynhash_cnext (d->cd_output_mapping, &i, &k, NULL)) == 0) 1935 { 1936 const char *hval = (const char *) k; 1937 int conflicting; 1938 1939 /* Types referenced by only one dict, with no type appearing under that 1940 name elsewhere, are marked conflicting. */ 1941 1942 conflicting = !ctf_dedup_multiple_input_dicts (output, inputs, hval); 1943 1944 if (conflicting < 0) 1945 goto err; /* errno is set for us. */ 1946 1947 if (conflicting) 1948 if (ctf_dynset_cinsert (to_mark, hval) < 0) 1949 goto err; 1950 } 1951 if (err != ECTF_NEXT_END) 1952 goto iterr; 1953 1954 while ((err = ctf_dynset_cnext (to_mark, &i, &k)) == 0) 1955 { 1956 const char *hval = (const char *) k; 1957 1958 if (ctf_dedup_mark_conflicting_hash (output, hval) < 0) 1959 goto err; 1960 } 1961 if (err != ECTF_NEXT_END) 1962 goto iterr; 1963 1964 ctf_dynset_destroy (to_mark); 1965 1966 return 0; 1967 1968 err_no: 1969 ctf_set_errno (output, errno); 1970 err: 1971 err = ctf_errno (output); 1972 ctf_next_destroy (i); 1973 iterr: 1974 ctf_dynset_destroy (to_mark); 1975 ctf_err_warn (output, 0, err, _("conflictifying unshared types")); 1976 return ctf_set_errno (output, err); 1977 } 1978 1979 /* The core deduplicator. Populate cd_output_mapping in the output ctf_dedup with a 1980 mapping of all types that belong in this dictionary and where they come from, and 1981 cd_conflicting_types with an indication of whether each type is conflicted or not. 1982 OUTPUT is the top-level output: INPUTS is the array of input dicts; NINPUTS is the 1983 size of that array. 1984 1985 If CU_MAPPING_PHASE is nonzero, this is a link with a non-empty CU mapping: 1986 in phase 1, only one output will result. 1987 1988 Only deduplicates: does not emit the types into the output. Call 1989 ctf_dedup_emit afterwards to do that. */ 1990 1991 int 1992 ctf_dedup (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs, 1993 int cu_mapping_phase) 1994 { 1995 ctf_dedup_t *d = &output->ctf_dedup; 1996 size_t i; 1997 ctf_next_t *it = NULL; 1998 1999 if (ctf_dedup_init (output) < 0) 2000 return -1; /* errno is set for us. */ 2001 2002 for (i = 0; i < ninputs; i++) 2003 { 2004 ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i])); 2005 if (ctf_dynhash_insert (d->cd_input_nums, inputs[i], 2006 (void *) (uintptr_t) i) < 0) 2007 { 2008 ctf_set_errno (output, errno); 2009 ctf_err_warn (output, 0, errno, _("ctf_dedup: cannot initialize: %s\n"), 2010 ctf_errmsg (errno)); 2011 goto err; 2012 } 2013 } 2014 2015 /* Some flags do not apply in the first phase of CU-mapped links: this is not 2016 a share-duplicated link, because there is only one output and we really 2017 don't want to end up marking all nonconflicting but appears-only-once types 2018 as conflicting. */ 2019 2020 d->cd_link_flags = output->ctf_link_flags; 2021 if (cu_mapping_phase == 1) 2022 d->cd_link_flags &= ~(CTF_LINK_SHARE_DUPLICATED); 2023 2024 /* Compute hash values for all types, recursively, treating child structures 2025 and unions equivalent to forwards, and hashing in the name of the referent 2026 of each such type into structures, unions, and non-opaque forwards. 2027 Populate a mapping from decorated name (including an indication of 2028 struct/union/enum namespace) to count of type hash values in 2029 cd_name_counts, a mapping from and a mapping from hash values to input type 2030 IDs in cd_output_mapping. */ 2031 2032 ctf_dprintf ("Computing type hashes\n"); 2033 for (i = 0; i < ninputs; i++) 2034 { 2035 ctf_id_t id; 2036 2037 while ((id = ctf_type_next (inputs[i], &it, NULL, 1)) != CTF_ERR) 2038 { 2039 if (ctf_dedup_hash_type (output, inputs[i], inputs, 2040 i, id, 0, 0, 2041 ctf_dedup_populate_mappings) == NULL) 2042 goto err; /* errno is set for us. */ 2043 } 2044 if (ctf_errno (inputs[i]) != ECTF_NEXT_END) 2045 { 2046 ctf_set_errno (output, ctf_errno (inputs[i])); 2047 ctf_err_warn (output, 0, 0, _("iteration failure " 2048 "computing type hashes")); 2049 goto err; 2050 } 2051 } 2052 2053 /* Drop state no longer needed after hashing is over. */ 2054 2055 ctf_dedup_hash_type_fini (output); 2056 2057 /* Go through the cd_name_counts name->hash->count mapping for all CTF 2058 namespaces: any name with many hashes associated with it at this stage is 2059 necessarily ambiguous. Mark all the hashes except the most common as 2060 conflicting in the output. */ 2061 2062 ctf_dprintf ("Detecting type name ambiguity\n"); 2063 if (ctf_dedup_detect_name_ambiguity (output, inputs) < 0) 2064 goto err; /* errno is set for us. */ 2065 2066 /* If the link mode is CTF_LINK_SHARE_DUPLICATED, we change any unconflicting 2067 types whose output mapping references only one input dict into a 2068 conflicting type, so that they end up in the per-CU dictionaries. */ 2069 2070 if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED) 2071 { 2072 ctf_dprintf ("Conflictifying unshared types\n"); 2073 if (ctf_dedup_conflictify_unshared (output, inputs) < 0) 2074 goto err; /* errno is set for us. */ 2075 } 2076 return 0; 2077 2078 err: 2079 ctf_dedup_fini (output, NULL, 0); 2080 return -1; 2081 } 2082 2083 static int 2084 ctf_dedup_rwalk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs, 2085 uint32_t ninputs, uint32_t *parents, 2086 ctf_dynset_t *already_visited, 2087 const char *hval, 2088 int (*visit_fun) (const char *hval, 2089 ctf_dict_t *output, 2090 ctf_dict_t **inputs, 2091 uint32_t ninputs, 2092 uint32_t *parents, 2093 int already_visited, 2094 ctf_dict_t *input, 2095 ctf_id_t type, 2096 void *id, 2097 int depth, 2098 void *arg), 2099 void *arg, unsigned long depth); 2100 2101 /* Like ctf_dedup_rwalk_output_mapping (which see), only takes a single target 2102 type and visits it. */ 2103 static int 2104 ctf_dedup_rwalk_one_output_mapping (ctf_dict_t *output, 2105 ctf_dict_t **inputs, uint32_t ninputs, 2106 uint32_t *parents, 2107 ctf_dynset_t *already_visited, 2108 int visited, void *type_id, 2109 const char *hval, 2110 int (*visit_fun) (const char *hval, 2111 ctf_dict_t *output, 2112 ctf_dict_t **inputs, 2113 uint32_t ninputs, 2114 uint32_t *parents, 2115 int already_visited, 2116 ctf_dict_t *input, 2117 ctf_id_t type, 2118 void *id, 2119 int depth, 2120 void *arg), 2121 void *arg, unsigned long depth) 2122 { 2123 ctf_dedup_t *d = &output->ctf_dedup; 2124 ctf_dict_t *fp; 2125 int input_num; 2126 ctf_id_t type; 2127 int ret; 2128 const char *whaterr; 2129 2130 input_num = CTF_DEDUP_GID_TO_INPUT (type_id); 2131 fp = inputs[input_num]; 2132 type = CTF_DEDUP_GID_TO_TYPE (type_id); 2133 2134 ctf_dprintf ("%lu: Starting walk over type %s, %i/%lx (%p), from %s, " 2135 "kind %i\n", depth, hval, input_num, type, (void *) fp, 2136 ctf_link_input_name (fp), ctf_type_kind_unsliced (fp, type)); 2137 2138 /* Get the single call we do if this type has already been visited out of the 2139 way. */ 2140 if (visited) 2141 return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, 2142 type, type_id, depth, arg); 2143 2144 /* This macro is really ugly, but the alternative is repeating this code many 2145 times, which is worse. */ 2146 2147 #define CTF_TYPE_WALK(type, errlabel, errmsg) \ 2148 do \ 2149 { \ 2150 void *type_id; \ 2151 const char *hashval; \ 2152 int cited_type_input_num = input_num; \ 2153 \ 2154 if ((fp->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (fp, type))) \ 2155 cited_type_input_num = parents[input_num]; \ 2156 \ 2157 type_id = CTF_DEDUP_GID (output, cited_type_input_num, type); \ 2158 \ 2159 if (type == 0) \ 2160 { \ 2161 ctf_dprintf ("Walking: unimplemented type\n"); \ 2162 break; \ 2163 } \ 2164 \ 2165 ctf_dprintf ("Looking up ID %i/%lx in type hashes\n", \ 2166 cited_type_input_num, type); \ 2167 hashval = ctf_dynhash_lookup (d->cd_type_hashes, type_id); \ 2168 if (!ctf_assert (output, hashval)) \ 2169 { \ 2170 whaterr = N_("error looking up ID in type hashes"); \ 2171 goto errlabel; \ 2172 } \ 2173 ctf_dprintf ("ID %i/%lx has hash %s\n", cited_type_input_num, type, \ 2174 hashval); \ 2175 \ 2176 ret = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, \ 2177 already_visited, hashval, \ 2178 visit_fun, arg, depth); \ 2179 if (ret < 0) \ 2180 { \ 2181 whaterr = errmsg; \ 2182 goto errlabel; \ 2183 } \ 2184 } \ 2185 while (0) 2186 2187 switch (ctf_type_kind_unsliced (fp, type)) 2188 { 2189 case CTF_K_UNKNOWN: 2190 case CTF_K_FORWARD: 2191 case CTF_K_INTEGER: 2192 case CTF_K_FLOAT: 2193 case CTF_K_ENUM: 2194 /* No types referenced. */ 2195 break; 2196 2197 case CTF_K_TYPEDEF: 2198 case CTF_K_VOLATILE: 2199 case CTF_K_CONST: 2200 case CTF_K_RESTRICT: 2201 case CTF_K_POINTER: 2202 case CTF_K_SLICE: 2203 CTF_TYPE_WALK (ctf_type_reference (fp, type), err, 2204 N_("error during referenced type walk")); 2205 break; 2206 2207 case CTF_K_ARRAY: 2208 { 2209 ctf_arinfo_t ar; 2210 2211 if (ctf_array_info (fp, type, &ar) < 0) 2212 { 2213 whaterr = N_("error during array info lookup"); 2214 goto err_msg; 2215 } 2216 2217 CTF_TYPE_WALK (ar.ctr_contents, err, 2218 N_("error during array contents type walk")); 2219 CTF_TYPE_WALK (ar.ctr_index, err, 2220 N_("error during array index type walk")); 2221 break; 2222 } 2223 2224 case CTF_K_FUNCTION: 2225 { 2226 ctf_funcinfo_t fi; 2227 ctf_id_t *args; 2228 uint32_t j; 2229 2230 if (ctf_func_type_info (fp, type, &fi) < 0) 2231 { 2232 whaterr = N_("error during func type info lookup"); 2233 goto err_msg; 2234 } 2235 2236 CTF_TYPE_WALK (fi.ctc_return, err, 2237 N_("error during func return type walk")); 2238 2239 if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) 2240 { 2241 whaterr = N_("error doing memory allocation"); 2242 goto err_msg; 2243 } 2244 2245 if (ctf_func_type_args (fp, type, fi.ctc_argc, args) < 0) 2246 { 2247 whaterr = N_("error doing func arg type lookup"); 2248 free (args); 2249 goto err_msg; 2250 } 2251 2252 for (j = 0; j < fi.ctc_argc; j++) 2253 CTF_TYPE_WALK (args[j], err_free_args, 2254 N_("error during Func arg type walk")); 2255 free (args); 2256 break; 2257 2258 err_free_args: 2259 free (args); 2260 goto err; 2261 } 2262 case CTF_K_STRUCT: 2263 case CTF_K_UNION: 2264 /* We do not recursively traverse the members of structures: they are 2265 emitted later, in a separate pass. */ 2266 break; 2267 default: 2268 whaterr = N_("CTF dict corruption: unknown type kind"); 2269 goto err_msg; 2270 } 2271 2272 return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, type, 2273 type_id, depth, arg); 2274 2275 err_msg: 2276 ctf_set_errno (output, ctf_errno (fp)); 2277 ctf_err_warn (output, 0, 0, _("%s in input file %s at type ID %lx"), 2278 gettext (whaterr), ctf_link_input_name (fp), type); 2279 err: 2280 return -1; 2281 } 2282 /* Recursively traverse the output mapping, and do something with each type 2283 visited, from leaves to root. VISIT_FUN, called as recursion unwinds, 2284 returns a negative error code or zero. Type hashes may be visited more than 2285 once, but are not recursed through repeatedly: ALREADY_VISITED tracks whether 2286 types have already been visited. */ 2287 static int 2288 ctf_dedup_rwalk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs, 2289 uint32_t ninputs, uint32_t *parents, 2290 ctf_dynset_t *already_visited, 2291 const char *hval, 2292 int (*visit_fun) (const char *hval, 2293 ctf_dict_t *output, 2294 ctf_dict_t **inputs, 2295 uint32_t ninputs, 2296 uint32_t *parents, 2297 int already_visited, 2298 ctf_dict_t *input, 2299 ctf_id_t type, 2300 void *id, 2301 int depth, 2302 void *arg), 2303 void *arg, unsigned long depth) 2304 { 2305 ctf_dedup_t *d = &output->ctf_dedup; 2306 ctf_next_t *i = NULL; 2307 int err; 2308 int visited = 1; 2309 ctf_dynset_t *type_ids; 2310 void *id; 2311 2312 depth++; 2313 2314 type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); 2315 if (!type_ids) 2316 { 2317 ctf_err_warn (output, 0, ECTF_INTERNAL, 2318 _("looked up type kind by nonexistent hash %s"), hval); 2319 return ctf_set_errno (output, ECTF_INTERNAL); 2320 } 2321 2322 /* Have we seen this type before? */ 2323 2324 if (!ctf_dynset_exists (already_visited, hval, NULL)) 2325 { 2326 /* Mark as already-visited immediately, to eliminate the possibility of 2327 cycles: but remember we have not actually visited it yet for the 2328 upcoming call to the visit_fun. (All our callers handle cycles 2329 properly themselves, so we can just abort them aggressively as soon as 2330 we find ourselves in one.) */ 2331 2332 visited = 0; 2333 if (ctf_dynset_cinsert (already_visited, hval) < 0) 2334 { 2335 ctf_err_warn (output, 0, ENOMEM, 2336 _("out of memory tracking already-visited types")); 2337 return ctf_set_errno (output, ENOMEM); 2338 } 2339 } 2340 2341 /* If this type is marked conflicted, traverse members and call 2342 ctf_dedup_rwalk_one_output_mapping on all the unique ones: otherwise, just 2343 pick a random one and use it. */ 2344 2345 if (!ctf_dynset_exists (d->cd_conflicting_types, hval, NULL)) 2346 { 2347 id = ctf_dynset_lookup_any (type_ids); 2348 if (!ctf_assert (output, id)) 2349 return -1; 2350 2351 return ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs, 2352 parents, already_visited, 2353 visited, id, hval, visit_fun, 2354 arg, depth); 2355 } 2356 2357 while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0) 2358 { 2359 int ret; 2360 2361 ret = ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs, 2362 parents, already_visited, 2363 visited, id, hval, 2364 visit_fun, arg, depth); 2365 if (ret < 0) 2366 { 2367 ctf_next_destroy (i); 2368 return ret; /* errno is set for us. */ 2369 } 2370 } 2371 if (err != ECTF_NEXT_END) 2372 { 2373 ctf_err_warn (output, 0, err, _("cannot walk conflicted type")); 2374 return ctf_set_errno (output, err); 2375 } 2376 2377 return 0; 2378 } 2379 2380 typedef struct ctf_sort_om_cb_arg 2381 { 2382 ctf_dict_t **inputs; 2383 uint32_t ninputs; 2384 ctf_dedup_t *d; 2385 } ctf_sort_om_cb_arg_t; 2386 2387 /* Sort the output mapping into order: types first appearing in earlier inputs 2388 first, parents preceding children: if types first appear in the same input, 2389 sort those with earlier ctf_id_t's first. */ 2390 static int 2391 sort_output_mapping (const ctf_next_hkv_t *one, const ctf_next_hkv_t *two, 2392 void *arg_) 2393 { 2394 ctf_sort_om_cb_arg_t *arg = (ctf_sort_om_cb_arg_t *) arg_; 2395 ctf_dedup_t *d = arg->d; 2396 const char *one_hval = (const char *) one->hkv_key; 2397 const char *two_hval = (const char *) two->hkv_key; 2398 void *one_gid, *two_gid; 2399 uint32_t one_ninput; 2400 uint32_t two_ninput; 2401 ctf_dict_t *one_fp; 2402 ctf_dict_t *two_fp; 2403 ctf_id_t one_type; 2404 ctf_id_t two_type; 2405 2406 /* Inputs are always equal to themselves. */ 2407 if (one == two) 2408 return 0; 2409 2410 one_gid = ctf_dynhash_lookup (d->cd_output_first_gid, one_hval); 2411 two_gid = ctf_dynhash_lookup (d->cd_output_first_gid, two_hval); 2412 2413 one_ninput = CTF_DEDUP_GID_TO_INPUT (one_gid); 2414 two_ninput = CTF_DEDUP_GID_TO_INPUT (two_gid); 2415 2416 one_type = CTF_DEDUP_GID_TO_TYPE (one_gid); 2417 two_type = CTF_DEDUP_GID_TO_TYPE (two_gid); 2418 2419 /* It's kind of hard to smuggle an assertion failure out of here. */ 2420 assert (one_ninput < arg->ninputs && two_ninput < arg->ninputs); 2421 2422 one_fp = arg->inputs[one_ninput]; 2423 two_fp = arg->inputs[two_ninput]; 2424 2425 /* Parents before children. */ 2426 2427 if (!(one_fp->ctf_flags & LCTF_CHILD) 2428 && (two_fp->ctf_flags & LCTF_CHILD)) 2429 return -1; 2430 else if ((one_fp->ctf_flags & LCTF_CHILD) 2431 && !(two_fp->ctf_flags & LCTF_CHILD)) 2432 return 1; 2433 2434 /* ninput order, types appearing in earlier TUs first. */ 2435 2436 if (one_ninput < two_ninput) 2437 return -1; 2438 else if (two_ninput < one_ninput) 2439 return 1; 2440 2441 /* Same TU. Earliest ctf_id_t first. They cannot be the same. */ 2442 2443 assert (one_type != two_type); 2444 if (one_type < two_type) 2445 return -1; 2446 else 2447 return 1; 2448 } 2449 2450 /* The public entry point to ctf_dedup_rwalk_output_mapping, above. */ 2451 static int 2452 ctf_dedup_walk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs, 2453 uint32_t ninputs, uint32_t *parents, 2454 int (*visit_fun) (const char *hval, 2455 ctf_dict_t *output, 2456 ctf_dict_t **inputs, 2457 uint32_t ninputs, 2458 uint32_t *parents, 2459 int already_visited, 2460 ctf_dict_t *input, 2461 ctf_id_t type, 2462 void *id, 2463 int depth, 2464 void *arg), 2465 void *arg) 2466 { 2467 ctf_dynset_t *already_visited; 2468 ctf_next_t *i = NULL; 2469 ctf_sort_om_cb_arg_t sort_arg; 2470 int err; 2471 void *k; 2472 2473 if ((already_visited = ctf_dynset_create (htab_hash_string, 2474 htab_eq_string, 2475 NULL)) == NULL) 2476 return ctf_set_errno (output, ENOMEM); 2477 2478 sort_arg.inputs = inputs; 2479 sort_arg.ninputs = ninputs; 2480 sort_arg.d = &output->ctf_dedup; 2481 2482 while ((err = ctf_dynhash_next_sorted (output->ctf_dedup.cd_output_mapping, 2483 &i, &k, NULL, sort_output_mapping, 2484 &sort_arg)) == 0) 2485 { 2486 const char *hval = (const char *) k; 2487 2488 err = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, 2489 already_visited, hval, visit_fun, 2490 arg, 0); 2491 if (err < 0) 2492 { 2493 ctf_next_destroy (i); 2494 goto err; /* errno is set for us. */ 2495 } 2496 } 2497 if (err != ECTF_NEXT_END) 2498 { 2499 ctf_set_errno (output, err); 2500 ctf_err_warn (output, 0, 0, _("cannot recurse over output mapping")); 2501 goto err; 2502 } 2503 ctf_dynset_destroy (already_visited); 2504 2505 return 0; 2506 err: 2507 ctf_dynset_destroy (already_visited); 2508 return -1; 2509 } 2510 2511 /* Possibly synthesise a synthetic forward in TARGET to subsitute for a 2512 conflicted per-TU type ID in INPUT with hash HVAL. Return its CTF ID, or 0 2513 if none was needed. */ 2514 static ctf_id_t 2515 ctf_dedup_maybe_synthesize_forward (ctf_dict_t *output, ctf_dict_t *target, 2516 ctf_dict_t *input, ctf_id_t id, 2517 const char *hval) 2518 { 2519 ctf_dedup_t *od = &output->ctf_dedup; 2520 ctf_dedup_t *td = &target->ctf_dedup; 2521 int kind; 2522 int fwdkind; 2523 const char *name = ctf_type_name_raw (input, id); 2524 const char *decorated; 2525 void *v; 2526 ctf_id_t emitted_forward; 2527 2528 if (!ctf_dynset_exists (od->cd_conflicting_types, hval, NULL) 2529 || target->ctf_flags & LCTF_CHILD 2530 || name[0] == '\0' 2531 || (((kind = ctf_type_kind_unsliced (input, id)) != CTF_K_STRUCT 2532 && kind != CTF_K_UNION && kind != CTF_K_FORWARD))) 2533 return 0; 2534 2535 fwdkind = ctf_type_kind_forwarded (input, id); 2536 2537 ctf_dprintf ("Using synthetic forward for conflicted struct/union with " 2538 "hval %s\n", hval); 2539 2540 if (!ctf_assert (output, name)) 2541 return CTF_ERR; 2542 2543 if ((decorated = ctf_decorate_type_name (output, name, fwdkind)) == NULL) 2544 return CTF_ERR; 2545 2546 if (!ctf_dynhash_lookup_kv (td->cd_output_emission_conflicted_forwards, 2547 decorated, NULL, &v)) 2548 { 2549 if ((emitted_forward = ctf_add_forward (target, CTF_ADD_ROOT, name, 2550 fwdkind)) == CTF_ERR) 2551 return ctf_set_typed_errno (output, ctf_errno (target)); 2552 2553 if (ctf_dynhash_cinsert (td->cd_output_emission_conflicted_forwards, 2554 decorated, (void *) (uintptr_t) 2555 emitted_forward) < 0) 2556 return ctf_set_typed_errno (output, ENOMEM); 2557 } 2558 else 2559 emitted_forward = (ctf_id_t) (uintptr_t) v; 2560 2561 ctf_dprintf ("Cross-TU conflicted struct: passing back forward, %lx\n", 2562 emitted_forward); 2563 2564 return emitted_forward; 2565 } 2566 2567 /* Map a GID in some INPUT dict, in the form of an input number and a ctf_id_t, 2568 into a GID in a target output dict. If it returns 0, this is the 2569 unimplemented type, and the input type must have been 0. The OUTPUT dict is 2570 assumed to be the parent of the TARGET, if it is not the TARGET itself. 2571 2572 Returns CTF_ERR on failure. Responds to an incoming CTF_ERR as an 'id' by 2573 returning CTF_ERR, to simplify callers. Errors are always propagated to the 2574 input, even if they relate to the target, for the same reason. (Target 2575 errors are expected to be very rare.) 2576 2577 If the type in question is a citation of a conflicted type in a different TU, 2578 emit a forward of the right type in its place (if not already emitted), and 2579 record that forward in cd_output_emission_conflicted_forwards. This avoids 2580 the need to replicate the entire type graph below this point in the current 2581 TU (an appalling waste of space). 2582 2583 TODO: maybe replace forwards in the same TU with their referents? Might 2584 make usability a bit better. */ 2585 2586 static ctf_id_t 2587 ctf_dedup_id_to_target (ctf_dict_t *output, ctf_dict_t *target, 2588 ctf_dict_t **inputs, uint32_t ninputs, 2589 uint32_t *parents, ctf_dict_t *input, int input_num, 2590 ctf_id_t id) 2591 { 2592 ctf_dedup_t *od = &output->ctf_dedup; 2593 ctf_dedup_t *td = &target->ctf_dedup; 2594 ctf_dict_t *err_fp = input; 2595 const char *hval; 2596 void *target_id; 2597 ctf_id_t emitted_forward; 2598 2599 /* The target type of an error is an error. */ 2600 if (id == CTF_ERR) 2601 return CTF_ERR; 2602 2603 /* The unimplemented type's ID never changes. */ 2604 if (!id) 2605 { 2606 ctf_dprintf ("%i/%lx: unimplemented type\n", input_num, id); 2607 return 0; 2608 } 2609 2610 ctf_dprintf ("Mapping %i/%lx to target %p (%s)\n", input_num, 2611 id, (void *) target, ctf_link_input_name (target)); 2612 2613 /* If the input type is in the parent type space, and this is a child, reset 2614 the input to the parent (which must already have been emitted, since 2615 emission of parent dicts happens before children). */ 2616 if ((input->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (input, id))) 2617 { 2618 if (!ctf_assert (output, parents[input_num] <= ninputs)) 2619 return CTF_ERR; 2620 input = inputs[parents[input_num]]; 2621 input_num = parents[input_num]; 2622 } 2623 2624 hval = ctf_dynhash_lookup (od->cd_type_hashes, 2625 CTF_DEDUP_GID (output, input_num, id)); 2626 2627 if (!ctf_assert (output, hval && td->cd_output_emission_hashes)) 2628 return CTF_ERR; 2629 2630 /* If this type is a conflicted tagged structure, union, or forward, 2631 substitute a synthetic forward instead, emitting it if need be. Only do 2632 this if the target is in the parent dict: if it's in the child dict, we can 2633 just point straight at the thing itself. Of course, we might be looking in 2634 the child dict right now and not find it and have to look in the parent, so 2635 we have to do this check twice. */ 2636 2637 emitted_forward = ctf_dedup_maybe_synthesize_forward (output, target, 2638 input, id, hval); 2639 switch (emitted_forward) 2640 { 2641 case 0: /* No forward needed. */ 2642 break; 2643 case -1: 2644 ctf_set_errno (err_fp, ctf_errno (output)); 2645 ctf_err_warn (err_fp, 0, 0, _("cannot add synthetic forward for type " 2646 "%i/%lx"), input_num, id); 2647 return CTF_ERR; 2648 default: 2649 return emitted_forward; 2650 } 2651 2652 ctf_dprintf ("Looking up %i/%lx, hash %s, in target\n", input_num, id, hval); 2653 2654 target_id = ctf_dynhash_lookup (td->cd_output_emission_hashes, hval); 2655 if (!target_id) 2656 { 2657 /* Must be in the parent, so this must be a child, and they must not be 2658 the same dict. */ 2659 ctf_dprintf ("Checking shared parent for target\n"); 2660 if (!ctf_assert (output, (target != output) 2661 && (target->ctf_flags & LCTF_CHILD))) 2662 return CTF_ERR; 2663 2664 target_id = ctf_dynhash_lookup (od->cd_output_emission_hashes, hval); 2665 2666 emitted_forward = ctf_dedup_maybe_synthesize_forward (output, output, 2667 input, id, hval); 2668 switch (emitted_forward) 2669 { 2670 case 0: /* No forward needed. */ 2671 break; 2672 case -1: 2673 ctf_err_warn (err_fp, 0, ctf_errno (output), 2674 _("cannot add synthetic forward for type %i/%lx"), 2675 input_num, id); 2676 return ctf_set_typed_errno (err_fp, ctf_errno (output)); 2677 default: 2678 return emitted_forward; 2679 } 2680 } 2681 if (!ctf_assert (output, target_id)) 2682 return CTF_ERR; 2683 return (ctf_id_t) (uintptr_t) target_id; 2684 } 2685 2686 /* Emit a single deduplicated TYPE with the given HVAL, located in a given 2687 INPUT, with the given (G)ID, into the shared OUTPUT or a 2688 possibly-newly-created per-CU dict. All the types this type depends upon 2689 have already been emitted. (This type itself may also have been emitted.) 2690 2691 If the ARG is 1, this is a CU-mapped deduplication round mapping many 2692 ctf_dict_t's into precisely one: conflicting types should be marked 2693 non-root-visible. If the ARG is 0, conflicting types go into per-CU 2694 dictionaries stored in the input's ctf_dedup.cd_output: otherwise, everything 2695 is emitted directly into the output. No struct/union members are emitted. 2696 2697 Optimization opportunity: trace the ancestry of non-root-visible types and 2698 elide all that neither have a root-visible type somewhere towards their root, 2699 nor have the type visible via any other route (the function info section, 2700 data object section, backtrace section etc). */ 2701 2702 static int 2703 ctf_dedup_emit_type (const char *hval, ctf_dict_t *output, ctf_dict_t **inputs, 2704 uint32_t ninputs, uint32_t *parents, int already_visited, 2705 ctf_dict_t *input, ctf_id_t type, void *id, int depth, 2706 void *arg) 2707 { 2708 ctf_dedup_t *d = &output->ctf_dedup; 2709 int kind = ctf_type_kind_unsliced (input, type); 2710 const char *name; 2711 ctf_dict_t *target = output; 2712 ctf_dict_t *real_input; 2713 const ctf_type_t *tp; 2714 int input_num = CTF_DEDUP_GID_TO_INPUT (id); 2715 int output_num = (uint32_t) -1; /* 'shared' */ 2716 int cu_mapping_phase = *(int *)arg; 2717 int isroot = 1; 2718 int is_conflicting; 2719 2720 ctf_next_t *i = NULL; 2721 ctf_id_t new_type; 2722 ctf_id_t ref; 2723 ctf_id_t maybe_dup = 0; 2724 ctf_encoding_t ep; 2725 const char *errtype; 2726 int emission_hashed = 0; 2727 2728 /* We don't want to re-emit something we've already emitted. */ 2729 2730 if (already_visited) 2731 return 0; 2732 2733 ctf_dprintf ("%i: Emitting type with hash %s from %s: determining target\n", 2734 depth, hval, ctf_link_input_name (input)); 2735 2736 /* Conflicting types go into a per-CU output dictionary, unless this is a 2737 CU-mapped run. The import is not refcounted, since it goes into the 2738 ctf_link_outputs dict of the output that is its parent. */ 2739 is_conflicting = ctf_dynset_exists (d->cd_conflicting_types, hval, NULL); 2740 2741 if (is_conflicting && cu_mapping_phase != 1) 2742 { 2743 ctf_dprintf ("%i: Type %s in %i/%lx is conflicted: " 2744 "inserting into per-CU target.\n", 2745 depth, hval, input_num, type); 2746 2747 if (input->ctf_dedup.cd_output) 2748 target = input->ctf_dedup.cd_output; 2749 else 2750 { 2751 int err; 2752 2753 if ((target = ctf_create (&err)) == NULL) 2754 { 2755 ctf_err_warn (output, 0, err, 2756 _("cannot create per-CU CTF archive for CU %s"), 2757 ctf_link_input_name (input)); 2758 return ctf_set_errno (output, err); 2759 } 2760 2761 target->ctf_flags |= LCTF_STRICT_NO_DUP_ENUMERATORS; 2762 ctf_import_unref (target, output); 2763 if (ctf_cuname (input) != NULL) 2764 ctf_cuname_set (target, ctf_cuname (input)); 2765 else 2766 ctf_cuname_set (target, "unnamed-CU"); 2767 ctf_parent_name_set (target, _CTF_SECTION); 2768 2769 input->ctf_dedup.cd_output = target; 2770 input->ctf_link_in_out = target; 2771 target->ctf_link_in_out = input; 2772 } 2773 output_num = input_num; 2774 } 2775 2776 if (!target->ctf_dedup.cd_output_emission_hashes) 2777 if ((target->ctf_dedup.cd_output_emission_hashes 2778 = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, 2779 NULL, NULL)) == NULL) 2780 goto oom_hash; 2781 2782 if (!target->ctf_dedup.cd_output_emission_conflicted_forwards) 2783 if ((target->ctf_dedup.cd_output_emission_conflicted_forwards 2784 = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, 2785 NULL, NULL)) == NULL) 2786 goto oom_hash; 2787 2788 /* When cu-mapping mode is turned on, we merge types derived from multiple CUs 2789 into one target dict: in phase 1, by merging them according to the mapping; 2790 in phase 2, as a consequence of taking the merged results from phase 1. 2791 Any given type appears only once in the type mapping, but in 2792 ctf_dedup_rwalk_output_mapping we loop inserting conflicting types into a 2793 child dict corresponding to every input dict they came from. This means 2794 that if those dicts are mapped together, in phase 1 we can attempt to 2795 insert them *multiple times* into the same dict, which then causes them to 2796 be duplicated in phase 2 as well. Avoid this by making sure this hval 2797 isn't already present in the emission hash in phase 1: if it is, we in 2798 effect already visited this type, and can return as we did above. */ 2799 2800 if (cu_mapping_phase == 1 2801 && ctf_dynhash_lookup (target->ctf_dedup.cd_output_emission_hashes, hval)) 2802 return 0; 2803 2804 real_input = input; 2805 if ((tp = ctf_lookup_by_id (&real_input, type)) == NULL) 2806 { 2807 ctf_err_warn (output, 0, ctf_errno (input), 2808 _("%s: lookup failure for type %lx"), 2809 ctf_link_input_name (real_input), type); 2810 return ctf_set_errno (output, ctf_errno (input)); 2811 } 2812 2813 name = ctf_strraw (real_input, tp->ctt_name); 2814 2815 /* cu_mapped links at phase 1 get absolutely *everything* marked non-root, 2816 named or not. Such links, when we are merging multiple child CUs into one, 2817 are the only point at which we can ever put conflicting and nonconflicting 2818 instances of the same type into the same dict, and which one comes first is 2819 arbitrary. Rather than having to figure out when we insert a type whether 2820 another one is coming that might conflict with it without being so marked, 2821 just mark everything as non-root: we'll disregard it in the next phase of 2822 cu-mapped linking anyway. 2823 2824 In phase 2 (the final dedup phase) of cu-mapped links, we have to deal with 2825 the fallout of this, in that single inputs have 100% non-root types (so the 2826 non-root bit isn't really meaningful) but some subset of them may be 2827 genuinely clashing, conflicting, but already in child dicts (a thing that 2828 is impossible in non-CU-mapped links, when child dicts correspond to single 2829 CUs). 2830 2831 So in phase 2, we hide conflicting types, if this type is conflicting and a 2832 type with this name already exists in the target and is not a forward. 2833 2834 Note that enums also get their enumerands checked, below. 2835 2836 Otherwise, in "phase 0" (i.e. normal links), we can respect the non-root 2837 flag the user passed in and simply propagate it directly to the output. 2838 If the user provided a mix of root-visible and non-root-visible flags, 2839 we treat it as non-root-visible: see ctf_dedup_hash_type_fini. */ 2840 2841 switch (cu_mapping_phase) 2842 { 2843 case 0: /* Normal link. Root-visibility explicitly tracked. */ 2844 if (ctf_dynhash_lookup (d->cd_nonroot_consistency, hval)) 2845 isroot = 0; 2846 break; 2847 case 1: /* cu-mapped link. Never root-visible. */ 2848 isroot = 0; 2849 break; 2850 case 2: /* Final phase of cu-mapped link. Non-root if already present. */ 2851 if (is_conflicting && name 2852 && ((maybe_dup = ctf_lookup_by_rawname (target, kind, name)) != 0)) 2853 { 2854 if (ctf_type_kind (target, maybe_dup) != CTF_K_FORWARD) 2855 { 2856 ctf_dprintf ("%s, kind %i, hval %s: conflicting type marked as " 2857 "non-root because of pre-existing type %s/%lx, " 2858 "kind %i.\n", name, kind, hval, ctf_cuname (target), 2859 maybe_dup, ctf_type_kind (target, maybe_dup)); 2860 isroot = 0; 2861 } 2862 } 2863 break; 2864 default: 2865 if (!ctf_assert (output, cu_mapping_phase >= 0 && cu_mapping_phase <= 2)) 2866 return -1; /* errno is set for us. */ 2867 } 2868 2869 ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n", 2870 depth, hval, name ? name : "", input_num, (void *) target); 2871 2872 switch (kind) 2873 { 2874 case CTF_K_UNKNOWN: 2875 /* These are types that CTF cannot encode, marked as such by the 2876 compiler. */ 2877 errtype = _("unknown type"); 2878 if ((new_type = ctf_add_unknown (target, isroot, name)) == CTF_ERR) 2879 goto err_target; 2880 break; 2881 case CTF_K_FORWARD: 2882 /* This will do nothing if the type to which this forwards already exists, 2883 and will be replaced with such a type if it appears later. */ 2884 2885 errtype = _("forward"); 2886 if ((new_type = ctf_add_forward (target, isroot, name, 2887 ctf_type_kind_forwarded (input, type))) 2888 == CTF_ERR) 2889 goto err_target; 2890 break; 2891 2892 case CTF_K_FLOAT: 2893 case CTF_K_INTEGER: 2894 errtype = _("float/int"); 2895 if (ctf_type_encoding (input, type, &ep) < 0) 2896 goto err_input; /* errno is set for us. */ 2897 if ((new_type = ctf_add_encoded (target, isroot, name, &ep, kind)) 2898 == CTF_ERR) 2899 goto err_target; 2900 break; 2901 2902 case CTF_K_ENUM: 2903 { 2904 int val; 2905 errtype = _("enum"); 2906 2907 /* Check enumerands for duplication and nonrootify if clashing: this is 2908 an extension of the isroot check above. */ 2909 2910 if (isroot && cu_mapping_phase == 2) 2911 { 2912 const char *enumerand; 2913 while ((enumerand = ctf_enum_next (input, type, &i, &val)) != NULL) 2914 { 2915 if (is_conflicting && name 2916 && ctf_dynhash_lookup (target->ctf_names, enumerand) != NULL) 2917 { 2918 ctf_dprintf ("%s, kind %i, hval %s: conflicting type marked " 2919 "as non-root because of pre-existing enumerand " 2920 "%s.\n", name, kind, hval, enumerand); 2921 isroot = 0; 2922 } 2923 } 2924 if (ctf_errno (input) != ECTF_NEXT_END) 2925 goto err_input; 2926 } 2927 2928 if ((new_type = ctf_add_enum (target, isroot, name)) == CTF_ERR) 2929 goto err_input; /* errno is set for us. */ 2930 2931 while ((name = ctf_enum_next (input, type, &i, &val)) != NULL) 2932 { 2933 if (ctf_add_enumerator (target, new_type, name, val) < 0) 2934 { 2935 ctf_err_warn (target, 0, ctf_errno (target), 2936 _("%s (%i): cannot add enumeration value %s " 2937 "from input type %lx"), 2938 ctf_link_input_name (input), input_num, name, 2939 type); 2940 ctf_next_destroy (i); 2941 return ctf_set_errno (output, ctf_errno (target)); 2942 } 2943 } 2944 if (ctf_errno (input) != ECTF_NEXT_END) 2945 goto err_input; 2946 break; 2947 } 2948 2949 case CTF_K_TYPEDEF: 2950 errtype = _("typedef"); 2951 2952 ref = ctf_type_reference (input, type); 2953 if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, 2954 parents, input, input_num, 2955 ref)) == CTF_ERR) 2956 goto err_input; /* errno is set for us. */ 2957 2958 if ((new_type = ctf_add_typedef (target, isroot, name, ref)) == CTF_ERR) 2959 goto err_target; /* errno is set for us. */ 2960 break; 2961 2962 case CTF_K_VOLATILE: 2963 case CTF_K_CONST: 2964 case CTF_K_RESTRICT: 2965 case CTF_K_POINTER: 2966 errtype = _("pointer or cvr-qual"); 2967 2968 ref = ctf_type_reference (input, type); 2969 if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, 2970 parents, input, input_num, 2971 ref)) == CTF_ERR) 2972 goto err_input; /* errno is set for us. */ 2973 2974 if ((new_type = ctf_add_reftype (target, isroot, ref, kind)) == CTF_ERR) 2975 goto err_target; /* errno is set for us. */ 2976 break; 2977 2978 case CTF_K_SLICE: 2979 errtype = _("slice"); 2980 2981 if (ctf_type_encoding (input, type, &ep) < 0) 2982 goto err_input; /* errno is set for us. */ 2983 2984 ref = ctf_type_reference (input, type); 2985 if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, 2986 parents, input, input_num, 2987 ref)) == CTF_ERR) 2988 goto err_input; 2989 2990 if ((new_type = ctf_add_slice (target, isroot, ref, &ep)) == CTF_ERR) 2991 goto err_target; 2992 break; 2993 2994 case CTF_K_ARRAY: 2995 { 2996 ctf_arinfo_t ar; 2997 2998 errtype = _("array info"); 2999 if (ctf_array_info (input, type, &ar) < 0) 3000 goto err_input; 3001 3002 ar.ctr_contents = ctf_dedup_id_to_target (output, target, inputs, 3003 ninputs, parents, input, 3004 input_num, ar.ctr_contents); 3005 ar.ctr_index = ctf_dedup_id_to_target (output, target, inputs, ninputs, 3006 parents, input, input_num, 3007 ar.ctr_index); 3008 3009 if (ar.ctr_contents == CTF_ERR || ar.ctr_index == CTF_ERR) 3010 goto err_input; 3011 3012 if ((new_type = ctf_add_array (target, isroot, &ar)) == CTF_ERR) 3013 goto err_target; 3014 3015 break; 3016 } 3017 3018 case CTF_K_FUNCTION: 3019 { 3020 ctf_funcinfo_t fi; 3021 ctf_id_t *args; 3022 uint32_t j; 3023 3024 errtype = _("function"); 3025 if (ctf_func_type_info (input, type, &fi) < 0) 3026 goto err_input; 3027 3028 fi.ctc_return = ctf_dedup_id_to_target (output, target, inputs, ninputs, 3029 parents, input, input_num, 3030 fi.ctc_return); 3031 if (fi.ctc_return == CTF_ERR) 3032 goto err_input; 3033 3034 if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) 3035 { 3036 ctf_set_errno (input, ENOMEM); 3037 goto err_input; 3038 } 3039 3040 errtype = _("function args"); 3041 if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0) 3042 { 3043 free (args); 3044 goto err_input; 3045 } 3046 3047 for (j = 0; j < fi.ctc_argc; j++) 3048 { 3049 args[j] = ctf_dedup_id_to_target (output, target, inputs, ninputs, 3050 parents, input, input_num, 3051 args[j]); 3052 if (args[j] == CTF_ERR) 3053 goto err_input; 3054 } 3055 3056 if ((new_type = ctf_add_function (target, isroot, 3057 &fi, args)) == CTF_ERR) 3058 { 3059 free (args); 3060 goto err_target; 3061 } 3062 free (args); 3063 break; 3064 } 3065 3066 case CTF_K_STRUCT: 3067 case CTF_K_UNION: 3068 { 3069 size_t size = ctf_type_size (input, type); 3070 void *out_id; 3071 /* Insert the structure itself, so other types can refer to it. */ 3072 3073 errtype = _("structure/union"); 3074 if (kind == CTF_K_STRUCT) 3075 new_type = ctf_add_struct_sized (target, isroot, name, size); 3076 else 3077 new_type = ctf_add_union_sized (target, isroot, name, size); 3078 3079 if (new_type == CTF_ERR) 3080 goto err_target; 3081 3082 out_id = CTF_DEDUP_GID (output, output_num, new_type); 3083 ctf_dprintf ("%i: Noting need to emit members of %p -> %p\n", depth, 3084 id, out_id); 3085 /* Record the need to emit the members of this structure later. */ 3086 if (ctf_dynhash_insert (d->cd_emission_struct_members, id, out_id) < 0) 3087 { 3088 ctf_set_errno (target, errno); 3089 goto err_target; 3090 } 3091 break; 3092 } 3093 default: 3094 ctf_err_warn (output, 0, ECTF_CORRUPT, _("%s: unknown type kind for " 3095 "input type %lx"), 3096 ctf_link_input_name (input), type); 3097 return ctf_set_errno (output, ECTF_CORRUPT); 3098 } 3099 3100 if (!emission_hashed 3101 && new_type != 0 3102 && ctf_dynhash_cinsert (target->ctf_dedup.cd_output_emission_hashes, 3103 hval, (void *) (uintptr_t) new_type) < 0) 3104 { 3105 ctf_err_warn (output, 0, ENOMEM, _("out of memory tracking deduplicated " 3106 "global type IDs")); 3107 return ctf_set_errno (output, ENOMEM); 3108 } 3109 3110 if (!emission_hashed && new_type != 0) 3111 ctf_dprintf ("%i: Inserted %s, %i/%lx -> %lx into emission hash for " 3112 "target %p (%s)\n", depth, hval, input_num, type, new_type, 3113 (void *) target, ctf_link_input_name (target)); 3114 3115 return 0; 3116 3117 oom_hash: 3118 ctf_err_warn (output, 0, ENOMEM, _("out of memory creating emission-tracking " 3119 "hashes")); 3120 return ctf_set_errno (output, ENOMEM); 3121 3122 err_input: 3123 ctf_err_warn (output, 0, ctf_errno (input), 3124 _("%s (%i): while emitting deduplicated %s, error getting " 3125 "input type %lx"), ctf_link_input_name (input), 3126 input_num, errtype, type); 3127 return ctf_set_errno (output, ctf_errno (input)); 3128 err_target: 3129 ctf_err_warn (output, 0, ctf_errno (target), 3130 _("%s (%i): while emitting deduplicated %s, error emitting " 3131 "target type from input type %lx"), 3132 ctf_link_input_name (input), input_num, 3133 errtype, type); 3134 return ctf_set_errno (output, ctf_errno (target)); 3135 } 3136 3137 /* Traverse the cd_emission_struct_members and emit the members of all 3138 structures and unions. All other types are emitted and complete by this 3139 point. */ 3140 3141 static int 3142 ctf_dedup_emit_struct_members (ctf_dict_t *output, ctf_dict_t **inputs, 3143 uint32_t ninputs, uint32_t *parents) 3144 { 3145 ctf_dedup_t *d = &output->ctf_dedup; 3146 ctf_next_t *i = NULL; 3147 void *input_id, *target_id; 3148 int err; 3149 ctf_dict_t *err_fp, *input_fp; 3150 int input_num; 3151 ctf_id_t err_type; 3152 3153 while ((err = ctf_dynhash_next (d->cd_emission_struct_members, &i, 3154 &input_id, &target_id)) == 0) 3155 { 3156 ctf_next_t *j = NULL; 3157 ctf_dict_t *target; 3158 uint32_t target_num; 3159 ctf_id_t input_type, target_type; 3160 ssize_t offset; 3161 ctf_id_t membtype; 3162 const char *name; 3163 3164 input_num = CTF_DEDUP_GID_TO_INPUT (input_id); 3165 input_fp = inputs[input_num]; 3166 input_type = CTF_DEDUP_GID_TO_TYPE (input_id); 3167 3168 /* The output is either -1 (for the shared, parent output dict) or the 3169 number of the corresponding input. */ 3170 target_num = CTF_DEDUP_GID_TO_INPUT (target_id); 3171 if (target_num == (uint32_t) -1) 3172 target = output; 3173 else 3174 { 3175 target = inputs[target_num]->ctf_dedup.cd_output; 3176 if (!ctf_assert (output, target)) 3177 { 3178 err_fp = output; 3179 err_type = input_type; 3180 goto err_target; 3181 } 3182 } 3183 target_type = CTF_DEDUP_GID_TO_TYPE (target_id); 3184 3185 while ((offset = ctf_member_next (input_fp, input_type, &j, &name, 3186 &membtype, 0)) >= 0) 3187 { 3188 err_fp = target; 3189 err_type = target_type; 3190 if ((membtype = ctf_dedup_id_to_target (output, target, inputs, 3191 ninputs, parents, input_fp, 3192 input_num, 3193 membtype)) == CTF_ERR) 3194 { 3195 ctf_next_destroy (j); 3196 goto err_target; 3197 } 3198 3199 if (name == NULL) 3200 name = ""; 3201 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING 3202 ctf_dprintf ("Emitting %s, offset %zi\n", name, offset); 3203 #endif 3204 if (ctf_add_member_offset (target, target_type, name, 3205 membtype, offset) < 0) 3206 { 3207 ctf_next_destroy (j); 3208 goto err_target; 3209 } 3210 } 3211 if (ctf_errno (input_fp) != ECTF_NEXT_END) 3212 { 3213 err = ctf_errno (input_fp); 3214 ctf_next_destroy (i); 3215 goto iterr; 3216 } 3217 } 3218 if (err != ECTF_NEXT_END) 3219 goto iterr; 3220 3221 return 0; 3222 err_target: 3223 ctf_next_destroy (i); 3224 ctf_err_warn (output, 0, ctf_errno (err_fp), 3225 _("%s (%i): error emitting members for structure type %lx"), 3226 ctf_link_input_name (input_fp), input_num, err_type); 3227 return ctf_set_errno (output, ctf_errno (err_fp)); 3228 iterr: 3229 ctf_err_warn (output, 0, err, _("iteration failure emitting " 3230 "structure members")); 3231 return ctf_set_errno (output, err); 3232 } 3233 3234 /* Emit deduplicated types into the outputs. The shared type repository is 3235 OUTPUT, on which the ctf_dedup function must have already been called. The 3236 PARENTS array contains the INPUTS index of the parent dict for every child 3237 dict at the corresponding index in the INPUTS (for non-child dicts, the value 3238 is undefined and can just be left at zero). 3239 3240 Return an array of fps with content emitted into them (starting with OUTPUT, 3241 which is the parent of all others, then all the newly-generated outputs). 3242 3243 If CU_MAPPING_PHASE is set to 1, this is a first pass for a link with a 3244 non-empty CU mapping: only one output will result. */ 3245 3246 ctf_dict_t ** 3247 ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs, 3248 uint32_t *parents, uint32_t *noutputs, int cu_mapping_phase) 3249 { 3250 size_t num_outputs = 1; /* Always at least one output: us. */ 3251 ctf_dict_t **outputs; 3252 ctf_dict_t **walk; 3253 size_t i; 3254 3255 ctf_dprintf ("Triggering emission.\n"); 3256 if (ctf_dedup_walk_output_mapping (output, inputs, ninputs, parents, 3257 ctf_dedup_emit_type, &cu_mapping_phase) < 0) 3258 return NULL; /* errno is set for us. */ 3259 3260 ctf_dprintf ("Populating struct members.\n"); 3261 if (ctf_dedup_emit_struct_members (output, inputs, ninputs, parents) < 0) 3262 return NULL; /* errno is set for us. */ 3263 3264 for (i = 0; i < ninputs; i++) 3265 { 3266 if (inputs[i]->ctf_dedup.cd_output) 3267 num_outputs++; 3268 } 3269 3270 if (!ctf_assert (output, (cu_mapping_phase != 1 3271 || (cu_mapping_phase == 1 && num_outputs == 1)))) 3272 return NULL; 3273 3274 if ((outputs = calloc (num_outputs, sizeof (ctf_dict_t *))) == NULL) 3275 { 3276 ctf_set_errno (output, ENOMEM); 3277 ctf_err_warn (output, 0, 0, 3278 _("out of memory allocating link outputs array")); 3279 return NULL; 3280 } 3281 *noutputs = num_outputs; 3282 3283 walk = outputs; 3284 *walk = output; 3285 output->ctf_refcnt++; 3286 walk++; 3287 3288 for (i = 0; i < ninputs; i++) 3289 { 3290 if (inputs[i]->ctf_dedup.cd_output) 3291 { 3292 *walk = inputs[i]->ctf_dedup.cd_output; 3293 inputs[i]->ctf_dedup.cd_output = NULL; 3294 walk++; 3295 } 3296 } 3297 3298 return outputs; 3299 } 3300 3301 /* Determine what type SRC_FP / SRC_TYPE was emitted as in the FP, which 3302 must be the shared dict or have it as a parent: return 0 if none. The SRC_FP 3303 must be a past input to ctf_dedup. */ 3304 3305 ctf_id_t 3306 ctf_dedup_type_mapping (ctf_dict_t *fp, ctf_dict_t *src_fp, ctf_id_t src_type) 3307 { 3308 ctf_dict_t *output = NULL; 3309 ctf_dedup_t *d; 3310 int input_num; 3311 void *num_ptr; 3312 void *type_ptr; 3313 int found; 3314 const char *hval; 3315 3316 /* It is an error (an internal error in the caller, in ctf-link.c) to call 3317 this with an FP that is not a per-CU output or shared output dict, or with 3318 a SRC_FP that was not passed to ctf_dedup as an input; it is an internal 3319 error in ctf-dedup for the type passed not to have been hashed, though if 3320 the src_fp is a child dict and the type is not a child type, it will have 3321 been hashed under the GID corresponding to the parent. */ 3322 3323 if (fp->ctf_dedup.cd_type_hashes != NULL) 3324 output = fp; 3325 else if (fp->ctf_parent && fp->ctf_parent->ctf_dedup.cd_type_hashes != NULL) 3326 output = fp->ctf_parent; 3327 else 3328 { 3329 ctf_set_errno (fp, ECTF_INTERNAL); 3330 ctf_err_warn (fp, 0, 0, 3331 _("dict %p passed to ctf_dedup_type_mapping is not a " 3332 "deduplicated output"), (void *) fp); 3333 return CTF_ERR; 3334 } 3335 3336 if (src_fp->ctf_parent && ctf_type_isparent (src_fp, src_type)) 3337 src_fp = src_fp->ctf_parent; 3338 3339 d = &output->ctf_dedup; 3340 3341 found = ctf_dynhash_lookup_kv (d->cd_input_nums, src_fp, NULL, &num_ptr); 3342 if (!ctf_assert (output, found != 0)) 3343 return CTF_ERR; /* errno is set for us. */ 3344 input_num = (uintptr_t) num_ptr; 3345 3346 hval = ctf_dynhash_lookup (d->cd_type_hashes, 3347 CTF_DEDUP_GID (output, input_num, src_type)); 3348 3349 if (!ctf_assert (output, hval != NULL)) 3350 return CTF_ERR; /* errno is set for us. */ 3351 3352 /* The emission hashes may be unset if this dict was created after 3353 deduplication to house variables or other things that would conflict if 3354 stored in the shared dict. */ 3355 if (fp->ctf_dedup.cd_output_emission_hashes) 3356 if (ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_output_emission_hashes, hval, 3357 NULL, &type_ptr)) 3358 return (ctf_id_t) (uintptr_t) type_ptr; 3359 3360 if (fp->ctf_parent) 3361 { 3362 ctf_dict_t *pfp = fp->ctf_parent; 3363 if (pfp->ctf_dedup.cd_output_emission_hashes) 3364 if (ctf_dynhash_lookup_kv (pfp->ctf_dedup.cd_output_emission_hashes, 3365 hval, NULL, &type_ptr)) 3366 return (ctf_id_t) (uintptr_t) type_ptr; 3367 } 3368 3369 return 0; 3370 } 3371