nir_serialize.c revision 7ec681f3
1/* 2 * Copyright © 2017 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_serialize.h" 25#include "nir_control_flow.h" 26#include "util/u_dynarray.h" 27#include "util/u_math.h" 28 29#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1) 30#define MAX_OBJECT_IDS (1 << 20) 31 32typedef struct { 33 size_t blob_offset; 34 nir_ssa_def *src; 35 nir_block *block; 36} write_phi_fixup; 37 38typedef struct { 39 const nir_shader *nir; 40 41 struct blob *blob; 42 43 /* maps pointer to index */ 44 struct hash_table *remap_table; 45 46 /* the next index to assign to a NIR in-memory object */ 47 uint32_t next_idx; 48 49 /* Array of write_phi_fixup structs representing phi sources that need to 50 * be resolved in the second pass. 51 */ 52 struct util_dynarray phi_fixups; 53 54 /* The last serialized type. */ 55 const struct glsl_type *last_type; 56 const struct glsl_type *last_interface_type; 57 struct nir_variable_data last_var_data; 58 59 /* For skipping equal ALU headers (typical after scalarization). */ 60 nir_instr_type last_instr_type; 61 uintptr_t last_alu_header_offset; 62 63 /* Don't write optional data such as variable names. */ 64 bool strip; 65} write_ctx; 66 67typedef struct { 68 nir_shader *nir; 69 70 struct blob_reader *blob; 71 72 /* the next index to assign to a NIR in-memory object */ 73 uint32_t next_idx; 74 75 /* The length of the index -> object table */ 76 uint32_t idx_table_len; 77 78 /* map from index to deserialized pointer */ 79 void **idx_table; 80 81 /* List of phi sources. */ 82 struct list_head phi_srcs; 83 84 /* The last deserialized type. */ 85 const struct glsl_type *last_type; 86 const struct glsl_type *last_interface_type; 87 struct nir_variable_data last_var_data; 88} read_ctx; 89 90static void 91write_add_object(write_ctx *ctx, const void *obj) 92{ 93 uint32_t index = ctx->next_idx++; 94 assert(index != MAX_OBJECT_IDS); 95 _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index); 96} 97 98static uint32_t 99write_lookup_object(write_ctx *ctx, const void *obj) 100{ 101 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); 102 assert(entry); 103 return (uint32_t)(uintptr_t) entry->data; 104} 105 106static void 107read_add_object(read_ctx *ctx, void *obj) 108{ 109 assert(ctx->next_idx < ctx->idx_table_len); 110 ctx->idx_table[ctx->next_idx++] = obj; 111} 112 113static void * 114read_lookup_object(read_ctx *ctx, uint32_t idx) 115{ 116 assert(idx < ctx->idx_table_len); 117 return ctx->idx_table[idx]; 118} 119 120static void * 121read_object(read_ctx *ctx) 122{ 123 return read_lookup_object(ctx, blob_read_uint32(ctx->blob)); 124} 125 126static uint32_t 127encode_bit_size_3bits(uint8_t bit_size) 128{ 129 /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */ 130 assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size)); 131 if (bit_size) 132 return util_logbase2(bit_size) + 1; 133 return 0; 134} 135 136static uint8_t 137decode_bit_size_3bits(uint8_t bit_size) 138{ 139 if (bit_size) 140 return 1 << (bit_size - 1); 141 return 0; 142} 143 144#define NUM_COMPONENTS_IS_SEPARATE_7 7 145 146static uint8_t 147encode_num_components_in_3bits(uint8_t num_components) 148{ 149 if (num_components <= 4) 150 return num_components; 151 if (num_components == 8) 152 return 5; 153 if (num_components == 16) 154 return 6; 155 156 /* special value indicating that num_components is in the next uint32 */ 157 return NUM_COMPONENTS_IS_SEPARATE_7; 158} 159 160static uint8_t 161decode_num_components_in_3bits(uint8_t value) 162{ 163 if (value <= 4) 164 return value; 165 if (value == 5) 166 return 8; 167 if (value == 6) 168 return 16; 169 170 unreachable("invalid num_components encoding"); 171 return 0; 172} 173 174static void 175write_constant(write_ctx *ctx, const nir_constant *c) 176{ 177 blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); 178 blob_write_uint32(ctx->blob, c->num_elements); 179 for (unsigned i = 0; i < c->num_elements; i++) 180 write_constant(ctx, c->elements[i]); 181} 182 183static nir_constant * 184read_constant(read_ctx *ctx, nir_variable *nvar) 185{ 186 nir_constant *c = ralloc(nvar, nir_constant); 187 188 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); 189 c->num_elements = blob_read_uint32(ctx->blob); 190 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 191 for (unsigned i = 0; i < c->num_elements; i++) 192 c->elements[i] = read_constant(ctx, nvar); 193 194 return c; 195} 196 197enum var_data_encoding { 198 var_encode_full, 199 var_encode_shader_temp, 200 var_encode_function_temp, 201 var_encode_location_diff, 202}; 203 204union packed_var { 205 uint32_t u32; 206 struct { 207 unsigned has_name:1; 208 unsigned has_constant_initializer:1; 209 unsigned has_pointer_initializer:1; 210 unsigned has_interface_type:1; 211 unsigned num_state_slots:7; 212 unsigned data_encoding:2; 213 unsigned type_same_as_last:1; 214 unsigned interface_type_same_as_last:1; 215 unsigned _pad:1; 216 unsigned num_members:16; 217 } u; 218}; 219 220union packed_var_data_diff { 221 uint32_t u32; 222 struct { 223 int location:13; 224 int location_frac:3; 225 int driver_location:16; 226 } u; 227}; 228 229static void 230write_variable(write_ctx *ctx, const nir_variable *var) 231{ 232 write_add_object(ctx, var); 233 234 assert(var->num_state_slots < (1 << 7)); 235 236 STATIC_ASSERT(sizeof(union packed_var) == 4); 237 union packed_var flags; 238 flags.u32 = 0; 239 240 flags.u.has_name = !ctx->strip && var->name; 241 flags.u.has_constant_initializer = !!(var->constant_initializer); 242 flags.u.has_pointer_initializer = !!(var->pointer_initializer); 243 flags.u.has_interface_type = !!(var->interface_type); 244 flags.u.type_same_as_last = var->type == ctx->last_type; 245 flags.u.interface_type_same_as_last = 246 var->interface_type && var->interface_type == ctx->last_interface_type; 247 flags.u.num_state_slots = var->num_state_slots; 248 flags.u.num_members = var->num_members; 249 250 struct nir_variable_data data = var->data; 251 252 /* When stripping, we expect that the location is no longer needed, 253 * which is typically after shaders are linked. 254 */ 255 if (ctx->strip && 256 data.mode != nir_var_system_value && 257 data.mode != nir_var_shader_in && 258 data.mode != nir_var_shader_out) 259 data.location = 0; 260 261 /* Temporary variables don't serialize var->data. */ 262 if (data.mode == nir_var_shader_temp) 263 flags.u.data_encoding = var_encode_shader_temp; 264 else if (data.mode == nir_var_function_temp) 265 flags.u.data_encoding = var_encode_function_temp; 266 else { 267 struct nir_variable_data tmp = data; 268 269 tmp.location = ctx->last_var_data.location; 270 tmp.location_frac = ctx->last_var_data.location_frac; 271 tmp.driver_location = ctx->last_var_data.driver_location; 272 273 /* See if we can encode only the difference in locations from the last 274 * variable. 275 */ 276 if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 && 277 abs((int)data.location - 278 (int)ctx->last_var_data.location) < (1 << 12) && 279 abs((int)data.driver_location - 280 (int)ctx->last_var_data.driver_location) < (1 << 15)) 281 flags.u.data_encoding = var_encode_location_diff; 282 else 283 flags.u.data_encoding = var_encode_full; 284 } 285 286 blob_write_uint32(ctx->blob, flags.u32); 287 288 if (!flags.u.type_same_as_last) { 289 encode_type_to_blob(ctx->blob, var->type); 290 ctx->last_type = var->type; 291 } 292 293 if (var->interface_type && !flags.u.interface_type_same_as_last) { 294 encode_type_to_blob(ctx->blob, var->interface_type); 295 ctx->last_interface_type = var->interface_type; 296 } 297 298 if (flags.u.has_name) 299 blob_write_string(ctx->blob, var->name); 300 301 if (flags.u.data_encoding == var_encode_full || 302 flags.u.data_encoding == var_encode_location_diff) { 303 if (flags.u.data_encoding == var_encode_full) { 304 blob_write_bytes(ctx->blob, &data, sizeof(data)); 305 } else { 306 /* Serialize only the difference in locations from the last variable. 307 */ 308 union packed_var_data_diff diff; 309 310 diff.u.location = data.location - ctx->last_var_data.location; 311 diff.u.location_frac = data.location_frac - 312 ctx->last_var_data.location_frac; 313 diff.u.driver_location = data.driver_location - 314 ctx->last_var_data.driver_location; 315 316 blob_write_uint32(ctx->blob, diff.u32); 317 } 318 319 ctx->last_var_data = data; 320 } 321 322 for (unsigned i = 0; i < var->num_state_slots; i++) { 323 blob_write_bytes(ctx->blob, &var->state_slots[i], 324 sizeof(var->state_slots[i])); 325 } 326 if (var->constant_initializer) 327 write_constant(ctx, var->constant_initializer); 328 if (var->pointer_initializer) 329 write_lookup_object(ctx, var->pointer_initializer); 330 if (var->num_members > 0) { 331 blob_write_bytes(ctx->blob, (uint8_t *) var->members, 332 var->num_members * sizeof(*var->members)); 333 } 334} 335 336static nir_variable * 337read_variable(read_ctx *ctx) 338{ 339 nir_variable *var = rzalloc(ctx->nir, nir_variable); 340 read_add_object(ctx, var); 341 342 union packed_var flags; 343 flags.u32 = blob_read_uint32(ctx->blob); 344 345 if (flags.u.type_same_as_last) { 346 var->type = ctx->last_type; 347 } else { 348 var->type = decode_type_from_blob(ctx->blob); 349 ctx->last_type = var->type; 350 } 351 352 if (flags.u.has_interface_type) { 353 if (flags.u.interface_type_same_as_last) { 354 var->interface_type = ctx->last_interface_type; 355 } else { 356 var->interface_type = decode_type_from_blob(ctx->blob); 357 ctx->last_interface_type = var->interface_type; 358 } 359 } 360 361 if (flags.u.has_name) { 362 const char *name = blob_read_string(ctx->blob); 363 var->name = ralloc_strdup(var, name); 364 } else { 365 var->name = NULL; 366 } 367 368 if (flags.u.data_encoding == var_encode_shader_temp) 369 var->data.mode = nir_var_shader_temp; 370 else if (flags.u.data_encoding == var_encode_function_temp) 371 var->data.mode = nir_var_function_temp; 372 else if (flags.u.data_encoding == var_encode_full) { 373 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data)); 374 ctx->last_var_data = var->data; 375 } else { /* var_encode_location_diff */ 376 union packed_var_data_diff diff; 377 diff.u32 = blob_read_uint32(ctx->blob); 378 379 var->data = ctx->last_var_data; 380 var->data.location += diff.u.location; 381 var->data.location_frac += diff.u.location_frac; 382 var->data.driver_location += diff.u.driver_location; 383 384 ctx->last_var_data = var->data; 385 } 386 387 var->num_state_slots = flags.u.num_state_slots; 388 if (var->num_state_slots != 0) { 389 var->state_slots = ralloc_array(var, nir_state_slot, 390 var->num_state_slots); 391 for (unsigned i = 0; i < var->num_state_slots; i++) { 392 blob_copy_bytes(ctx->blob, &var->state_slots[i], 393 sizeof(var->state_slots[i])); 394 } 395 } 396 if (flags.u.has_constant_initializer) 397 var->constant_initializer = read_constant(ctx, var); 398 else 399 var->constant_initializer = NULL; 400 401 if (flags.u.has_pointer_initializer) 402 var->pointer_initializer = read_object(ctx); 403 else 404 var->pointer_initializer = NULL; 405 406 var->num_members = flags.u.num_members; 407 if (var->num_members > 0) { 408 var->members = ralloc_array(var, struct nir_variable_data, 409 var->num_members); 410 blob_copy_bytes(ctx->blob, (uint8_t *) var->members, 411 var->num_members * sizeof(*var->members)); 412 } 413 414 return var; 415} 416 417static void 418write_var_list(write_ctx *ctx, const struct exec_list *src) 419{ 420 blob_write_uint32(ctx->blob, exec_list_length(src)); 421 foreach_list_typed(nir_variable, var, node, src) { 422 write_variable(ctx, var); 423 } 424} 425 426static void 427read_var_list(read_ctx *ctx, struct exec_list *dst) 428{ 429 exec_list_make_empty(dst); 430 unsigned num_vars = blob_read_uint32(ctx->blob); 431 for (unsigned i = 0; i < num_vars; i++) { 432 nir_variable *var = read_variable(ctx); 433 exec_list_push_tail(dst, &var->node); 434 } 435} 436 437static void 438write_register(write_ctx *ctx, const nir_register *reg) 439{ 440 write_add_object(ctx, reg); 441 blob_write_uint32(ctx->blob, reg->num_components); 442 blob_write_uint32(ctx->blob, reg->bit_size); 443 blob_write_uint32(ctx->blob, reg->num_array_elems); 444 blob_write_uint32(ctx->blob, reg->index); 445} 446 447static nir_register * 448read_register(read_ctx *ctx) 449{ 450 nir_register *reg = ralloc(ctx->nir, nir_register); 451 read_add_object(ctx, reg); 452 reg->num_components = blob_read_uint32(ctx->blob); 453 reg->bit_size = blob_read_uint32(ctx->blob); 454 reg->num_array_elems = blob_read_uint32(ctx->blob); 455 reg->index = blob_read_uint32(ctx->blob); 456 457 list_inithead(®->uses); 458 list_inithead(®->defs); 459 list_inithead(®->if_uses); 460 461 return reg; 462} 463 464static void 465write_reg_list(write_ctx *ctx, const struct exec_list *src) 466{ 467 blob_write_uint32(ctx->blob, exec_list_length(src)); 468 foreach_list_typed(nir_register, reg, node, src) 469 write_register(ctx, reg); 470} 471 472static void 473read_reg_list(read_ctx *ctx, struct exec_list *dst) 474{ 475 exec_list_make_empty(dst); 476 unsigned num_regs = blob_read_uint32(ctx->blob); 477 for (unsigned i = 0; i < num_regs; i++) { 478 nir_register *reg = read_register(ctx); 479 exec_list_push_tail(dst, ®->node); 480 } 481} 482 483union packed_src { 484 uint32_t u32; 485 struct { 486 unsigned is_ssa:1; /* <-- Header */ 487 unsigned is_indirect:1; 488 unsigned object_idx:20; 489 unsigned _footer:10; /* <-- Footer */ 490 } any; 491 struct { 492 unsigned _header:22; /* <-- Header */ 493 unsigned negate:1; /* <-- Footer */ 494 unsigned abs:1; 495 unsigned swizzle_x:2; 496 unsigned swizzle_y:2; 497 unsigned swizzle_z:2; 498 unsigned swizzle_w:2; 499 } alu; 500 struct { 501 unsigned _header:22; /* <-- Header */ 502 unsigned src_type:5; /* <-- Footer */ 503 unsigned _pad:5; 504 } tex; 505}; 506 507static void 508write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header) 509{ 510 /* Since sources are very frequent, we try to save some space when storing 511 * them. In particular, we store whether the source is a register and 512 * whether the register has an indirect index in the low two bits. We can 513 * assume that the high two bits of the index are zero, since otherwise our 514 * address space would've been exhausted allocating the remap table! 515 */ 516 header.any.is_ssa = src->is_ssa; 517 if (src->is_ssa) { 518 header.any.object_idx = write_lookup_object(ctx, src->ssa); 519 blob_write_uint32(ctx->blob, header.u32); 520 } else { 521 header.any.object_idx = write_lookup_object(ctx, src->reg.reg); 522 header.any.is_indirect = !!src->reg.indirect; 523 blob_write_uint32(ctx->blob, header.u32); 524 blob_write_uint32(ctx->blob, src->reg.base_offset); 525 if (src->reg.indirect) { 526 union packed_src header = {0}; 527 write_src_full(ctx, src->reg.indirect, header); 528 } 529 } 530} 531 532static void 533write_src(write_ctx *ctx, const nir_src *src) 534{ 535 union packed_src header = {0}; 536 write_src_full(ctx, src, header); 537} 538 539static union packed_src 540read_src(read_ctx *ctx, nir_src *src, void *mem_ctx) 541{ 542 STATIC_ASSERT(sizeof(union packed_src) == 4); 543 union packed_src header; 544 header.u32 = blob_read_uint32(ctx->blob); 545 546 src->is_ssa = header.any.is_ssa; 547 if (src->is_ssa) { 548 src->ssa = read_lookup_object(ctx, header.any.object_idx); 549 } else { 550 src->reg.reg = read_lookup_object(ctx, header.any.object_idx); 551 src->reg.base_offset = blob_read_uint32(ctx->blob); 552 if (header.any.is_indirect) { 553 src->reg.indirect = malloc(sizeof(nir_src)); 554 read_src(ctx, src->reg.indirect, mem_ctx); 555 } else { 556 src->reg.indirect = NULL; 557 } 558 } 559 return header; 560} 561 562union packed_dest { 563 uint8_t u8; 564 struct { 565 uint8_t is_ssa:1; 566 uint8_t num_components:3; 567 uint8_t bit_size:3; 568 uint8_t _pad:1; 569 } ssa; 570 struct { 571 uint8_t is_ssa:1; 572 uint8_t is_indirect:1; 573 uint8_t _pad:6; 574 } reg; 575}; 576 577enum intrinsic_const_indices_encoding { 578 /* Use the 9 bits of packed_const_indices to store 1-9 indices. 579 * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or 580 * 4 2-bit indices, or 5-9 1-bit indices. 581 * 582 * The common case for load_ubo is 0, 0, 0, which is trivially represented. 583 * The common cases for load_interpolated_input also fit here, e.g.: 7, 3 584 */ 585 const_indices_9bit_all_combined, 586 587 const_indices_8bit, /* 8 bits per element */ 588 const_indices_16bit, /* 16 bits per element */ 589 const_indices_32bit, /* 32 bits per element */ 590}; 591 592enum load_const_packing { 593 /* Constants are not packed and are stored in following dwords. */ 594 load_const_full, 595 596 /* packed_value contains high 19 bits, low bits are 0, 597 * good for floating-point decimals 598 */ 599 load_const_scalar_hi_19bits, 600 601 /* packed_value contains low 19 bits, high bits are sign-extended */ 602 load_const_scalar_lo_19bits_sext, 603}; 604 605union packed_instr { 606 uint32_t u32; 607 struct { 608 unsigned instr_type:4; /* always present */ 609 unsigned _pad:20; 610 unsigned dest:8; /* always last */ 611 } any; 612 struct { 613 unsigned instr_type:4; 614 unsigned exact:1; 615 unsigned no_signed_wrap:1; 616 unsigned no_unsigned_wrap:1; 617 unsigned saturate:1; 618 /* Reg: writemask; SSA: swizzles for 2 srcs */ 619 unsigned writemask_or_two_swizzles:4; 620 unsigned op:9; 621 unsigned packed_src_ssa_16bit:1; 622 /* Scalarized ALUs always have the same header. */ 623 unsigned num_followup_alu_sharing_header:2; 624 unsigned dest:8; 625 } alu; 626 struct { 627 unsigned instr_type:4; 628 unsigned deref_type:3; 629 unsigned cast_type_same_as_last:1; 630 unsigned modes:14; /* deref_var redefines this */ 631 unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */ 632 unsigned _pad:1; /* deref_var redefines this */ 633 unsigned dest:8; 634 } deref; 635 struct { 636 unsigned instr_type:4; 637 unsigned deref_type:3; 638 unsigned _pad:1; 639 unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */ 640 unsigned dest:8; 641 } deref_var; 642 struct { 643 unsigned instr_type:4; 644 unsigned intrinsic:9; 645 unsigned const_indices_encoding:2; 646 unsigned packed_const_indices:9; 647 unsigned dest:8; 648 } intrinsic; 649 struct { 650 unsigned instr_type:4; 651 unsigned last_component:4; 652 unsigned bit_size:3; 653 unsigned packing:2; /* enum load_const_packing */ 654 unsigned packed_value:19; /* meaning determined by packing */ 655 } load_const; 656 struct { 657 unsigned instr_type:4; 658 unsigned last_component:4; 659 unsigned bit_size:3; 660 unsigned _pad:21; 661 } undef; 662 struct { 663 unsigned instr_type:4; 664 unsigned num_srcs:4; 665 unsigned op:4; 666 unsigned dest:8; 667 unsigned _pad:12; 668 } tex; 669 struct { 670 unsigned instr_type:4; 671 unsigned num_srcs:20; 672 unsigned dest:8; 673 } phi; 674 struct { 675 unsigned instr_type:4; 676 unsigned type:2; 677 unsigned _pad:26; 678 } jump; 679}; 680 681/* Write "lo24" as low 24 bits in the first uint32. */ 682static void 683write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header, 684 nir_instr_type instr_type) 685{ 686 STATIC_ASSERT(sizeof(union packed_dest) == 1); 687 union packed_dest dest; 688 dest.u8 = 0; 689 690 dest.ssa.is_ssa = dst->is_ssa; 691 if (dst->is_ssa) { 692 dest.ssa.num_components = 693 encode_num_components_in_3bits(dst->ssa.num_components); 694 dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size); 695 } else { 696 dest.reg.is_indirect = !!(dst->reg.indirect); 697 } 698 header.any.dest = dest.u8; 699 700 /* Check if the current ALU instruction has the same header as the previous 701 * instruction that is also ALU. If it is, we don't have to write 702 * the current header. This is a typical occurence after scalarization. 703 */ 704 if (instr_type == nir_instr_type_alu) { 705 bool equal_header = false; 706 707 if (ctx->last_instr_type == nir_instr_type_alu) { 708 assert(ctx->last_alu_header_offset); 709 union packed_instr last_header; 710 memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset, 711 sizeof(last_header)); 712 713 /* Clear the field that counts ALUs with equal headers. */ 714 union packed_instr clean_header; 715 clean_header.u32 = last_header.u32; 716 clean_header.alu.num_followup_alu_sharing_header = 0; 717 718 /* There can be at most 4 consecutive ALU instructions 719 * sharing the same header. 720 */ 721 if (last_header.alu.num_followup_alu_sharing_header < 3 && 722 header.u32 == clean_header.u32) { 723 last_header.alu.num_followup_alu_sharing_header++; 724 memcpy(ctx->blob->data + ctx->last_alu_header_offset, 725 &last_header, sizeof(last_header)); 726 727 equal_header = true; 728 } 729 } 730 731 if (!equal_header) { 732 ctx->last_alu_header_offset = ctx->blob->size; 733 blob_write_uint32(ctx->blob, header.u32); 734 } 735 } else { 736 blob_write_uint32(ctx->blob, header.u32); 737 } 738 739 if (dest.ssa.is_ssa && 740 dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 741 blob_write_uint32(ctx->blob, dst->ssa.num_components); 742 743 if (dst->is_ssa) { 744 write_add_object(ctx, &dst->ssa); 745 } else { 746 blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg)); 747 blob_write_uint32(ctx->blob, dst->reg.base_offset); 748 if (dst->reg.indirect) 749 write_src(ctx, dst->reg.indirect); 750 } 751} 752 753static void 754read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr, 755 union packed_instr header) 756{ 757 union packed_dest dest; 758 dest.u8 = header.any.dest; 759 760 if (dest.ssa.is_ssa) { 761 unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size); 762 unsigned num_components; 763 if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7) 764 num_components = blob_read_uint32(ctx->blob); 765 else 766 num_components = decode_num_components_in_3bits(dest.ssa.num_components); 767 nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL); 768 read_add_object(ctx, &dst->ssa); 769 } else { 770 dst->reg.reg = read_object(ctx); 771 dst->reg.base_offset = blob_read_uint32(ctx->blob); 772 if (dest.reg.is_indirect) { 773 dst->reg.indirect = malloc(sizeof(nir_src)); 774 read_src(ctx, dst->reg.indirect, instr); 775 } 776 } 777} 778 779static bool 780are_object_ids_16bit(write_ctx *ctx) 781{ 782 /* Check the highest object ID, because they are monotonic. */ 783 return ctx->next_idx < (1 << 16); 784} 785 786static bool 787is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu) 788{ 789 unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 790 791 for (unsigned i = 0; i < num_srcs; i++) { 792 if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate) 793 return false; 794 795 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 796 797 for (unsigned chan = 0; chan < src_components; chan++) { 798 /* The swizzles for src0.x and src1.x are stored 799 * in writemask_or_two_swizzles for SSA ALUs. 800 */ 801 if (alu->dest.dest.is_ssa && i < 2 && chan == 0 && 802 alu->src[i].swizzle[chan] < 4) 803 continue; 804 805 if (alu->src[i].swizzle[chan] != chan) 806 return false; 807 } 808 } 809 810 return are_object_ids_16bit(ctx); 811} 812 813static void 814write_alu(write_ctx *ctx, const nir_alu_instr *alu) 815{ 816 unsigned num_srcs = nir_op_infos[alu->op].num_inputs; 817 unsigned dst_components = nir_dest_num_components(alu->dest.dest); 818 819 /* 9 bits for nir_op */ 820 STATIC_ASSERT(nir_num_opcodes <= 512); 821 union packed_instr header; 822 header.u32 = 0; 823 824 header.alu.instr_type = alu->instr.type; 825 header.alu.exact = alu->exact; 826 header.alu.no_signed_wrap = alu->no_signed_wrap; 827 header.alu.no_unsigned_wrap = alu->no_unsigned_wrap; 828 header.alu.saturate = alu->dest.saturate; 829 header.alu.op = alu->op; 830 header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu); 831 832 if (header.alu.packed_src_ssa_16bit && 833 alu->dest.dest.is_ssa) { 834 /* For packed srcs of SSA ALUs, this field stores the swizzles. */ 835 header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0]; 836 if (num_srcs > 1) 837 header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2; 838 } else if (!alu->dest.dest.is_ssa && dst_components <= 4) { 839 /* For vec4 registers, this field is a writemask. */ 840 header.alu.writemask_or_two_swizzles = alu->dest.write_mask; 841 } 842 843 write_dest(ctx, &alu->dest.dest, header, alu->instr.type); 844 845 if (!alu->dest.dest.is_ssa && dst_components > 4) 846 blob_write_uint32(ctx->blob, alu->dest.write_mask); 847 848 if (header.alu.packed_src_ssa_16bit) { 849 for (unsigned i = 0; i < num_srcs; i++) { 850 assert(alu->src[i].src.is_ssa); 851 unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa); 852 assert(idx < (1 << 16)); 853 blob_write_uint16(ctx->blob, idx); 854 } 855 } else { 856 for (unsigned i = 0; i < num_srcs; i++) { 857 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 858 unsigned src_components = nir_src_num_components(alu->src[i].src); 859 union packed_src src; 860 bool packed = src_components <= 4 && src_channels <= 4; 861 src.u32 = 0; 862 863 src.alu.negate = alu->src[i].negate; 864 src.alu.abs = alu->src[i].abs; 865 866 if (packed) { 867 src.alu.swizzle_x = alu->src[i].swizzle[0]; 868 src.alu.swizzle_y = alu->src[i].swizzle[1]; 869 src.alu.swizzle_z = alu->src[i].swizzle[2]; 870 src.alu.swizzle_w = alu->src[i].swizzle[3]; 871 } 872 873 write_src_full(ctx, &alu->src[i].src, src); 874 875 /* Store swizzles for vec8 and vec16. */ 876 if (!packed) { 877 for (unsigned o = 0; o < src_channels; o += 8) { 878 unsigned value = 0; 879 880 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 881 value |= (uint32_t)alu->src[i].swizzle[o + j] << 882 (4 * j); /* 4 bits per swizzle */ 883 } 884 885 blob_write_uint32(ctx->blob, value); 886 } 887 } 888 } 889 } 890} 891 892static nir_alu_instr * 893read_alu(read_ctx *ctx, union packed_instr header) 894{ 895 unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs; 896 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op); 897 898 alu->exact = header.alu.exact; 899 alu->no_signed_wrap = header.alu.no_signed_wrap; 900 alu->no_unsigned_wrap = header.alu.no_unsigned_wrap; 901 alu->dest.saturate = header.alu.saturate; 902 903 read_dest(ctx, &alu->dest.dest, &alu->instr, header); 904 905 unsigned dst_components = nir_dest_num_components(alu->dest.dest); 906 907 if (alu->dest.dest.is_ssa) { 908 alu->dest.write_mask = u_bit_consecutive(0, dst_components); 909 } else if (dst_components <= 4) { 910 alu->dest.write_mask = header.alu.writemask_or_two_swizzles; 911 } else { 912 alu->dest.write_mask = blob_read_uint32(ctx->blob); 913 } 914 915 if (header.alu.packed_src_ssa_16bit) { 916 for (unsigned i = 0; i < num_srcs; i++) { 917 nir_alu_src *src = &alu->src[i]; 918 src->src.is_ssa = true; 919 src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 920 921 memset(&src->swizzle, 0, sizeof(src->swizzle)); 922 923 unsigned src_components = nir_ssa_alu_instr_src_components(alu, i); 924 925 for (unsigned chan = 0; chan < src_components; chan++) 926 src->swizzle[chan] = chan; 927 } 928 } else { 929 for (unsigned i = 0; i < num_srcs; i++) { 930 union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr); 931 unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i); 932 unsigned src_components = nir_src_num_components(alu->src[i].src); 933 bool packed = src_components <= 4 && src_channels <= 4; 934 935 alu->src[i].negate = src.alu.negate; 936 alu->src[i].abs = src.alu.abs; 937 938 memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle)); 939 940 if (packed) { 941 alu->src[i].swizzle[0] = src.alu.swizzle_x; 942 alu->src[i].swizzle[1] = src.alu.swizzle_y; 943 alu->src[i].swizzle[2] = src.alu.swizzle_z; 944 alu->src[i].swizzle[3] = src.alu.swizzle_w; 945 } else { 946 /* Load swizzles for vec8 and vec16. */ 947 for (unsigned o = 0; o < src_channels; o += 8) { 948 unsigned value = blob_read_uint32(ctx->blob); 949 950 for (unsigned j = 0; j < 8 && o + j < src_channels; j++) { 951 alu->src[i].swizzle[o + j] = 952 (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */ 953 } 954 } 955 } 956 } 957 } 958 959 if (header.alu.packed_src_ssa_16bit && 960 alu->dest.dest.is_ssa) { 961 alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3; 962 if (num_srcs > 1) 963 alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2; 964 } 965 966 return alu; 967} 968 969static void 970write_deref(write_ctx *ctx, const nir_deref_instr *deref) 971{ 972 assert(deref->deref_type < 8); 973 assert(deref->modes < (1 << 14)); 974 975 union packed_instr header; 976 header.u32 = 0; 977 978 header.deref.instr_type = deref->instr.type; 979 header.deref.deref_type = deref->deref_type; 980 981 if (deref->deref_type == nir_deref_type_cast) { 982 header.deref.modes = deref->modes; 983 header.deref.cast_type_same_as_last = deref->type == ctx->last_type; 984 } 985 986 unsigned var_idx = 0; 987 if (deref->deref_type == nir_deref_type_var) { 988 var_idx = write_lookup_object(ctx, deref->var); 989 if (var_idx && var_idx < (1 << 16)) 990 header.deref_var.object_idx = var_idx; 991 } 992 993 if (deref->deref_type == nir_deref_type_array || 994 deref->deref_type == nir_deref_type_ptr_as_array) { 995 header.deref.packed_src_ssa_16bit = 996 deref->parent.is_ssa && deref->arr.index.is_ssa && 997 are_object_ids_16bit(ctx); 998 } 999 1000 write_dest(ctx, &deref->dest, header, deref->instr.type); 1001 1002 switch (deref->deref_type) { 1003 case nir_deref_type_var: 1004 if (!header.deref_var.object_idx) 1005 blob_write_uint32(ctx->blob, var_idx); 1006 break; 1007 1008 case nir_deref_type_struct: 1009 write_src(ctx, &deref->parent); 1010 blob_write_uint32(ctx->blob, deref->strct.index); 1011 break; 1012 1013 case nir_deref_type_array: 1014 case nir_deref_type_ptr_as_array: 1015 if (header.deref.packed_src_ssa_16bit) { 1016 blob_write_uint16(ctx->blob, 1017 write_lookup_object(ctx, deref->parent.ssa)); 1018 blob_write_uint16(ctx->blob, 1019 write_lookup_object(ctx, deref->arr.index.ssa)); 1020 } else { 1021 write_src(ctx, &deref->parent); 1022 write_src(ctx, &deref->arr.index); 1023 } 1024 break; 1025 1026 case nir_deref_type_cast: 1027 write_src(ctx, &deref->parent); 1028 blob_write_uint32(ctx->blob, deref->cast.ptr_stride); 1029 blob_write_uint32(ctx->blob, deref->cast.align_mul); 1030 blob_write_uint32(ctx->blob, deref->cast.align_offset); 1031 if (!header.deref.cast_type_same_as_last) { 1032 encode_type_to_blob(ctx->blob, deref->type); 1033 ctx->last_type = deref->type; 1034 } 1035 break; 1036 1037 case nir_deref_type_array_wildcard: 1038 write_src(ctx, &deref->parent); 1039 break; 1040 1041 default: 1042 unreachable("Invalid deref type"); 1043 } 1044} 1045 1046static nir_deref_instr * 1047read_deref(read_ctx *ctx, union packed_instr header) 1048{ 1049 nir_deref_type deref_type = header.deref.deref_type; 1050 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); 1051 1052 read_dest(ctx, &deref->dest, &deref->instr, header); 1053 1054 nir_deref_instr *parent; 1055 1056 switch (deref->deref_type) { 1057 case nir_deref_type_var: 1058 if (header.deref_var.object_idx) 1059 deref->var = read_lookup_object(ctx, header.deref_var.object_idx); 1060 else 1061 deref->var = read_object(ctx); 1062 1063 deref->type = deref->var->type; 1064 break; 1065 1066 case nir_deref_type_struct: 1067 read_src(ctx, &deref->parent, &deref->instr); 1068 parent = nir_src_as_deref(deref->parent); 1069 deref->strct.index = blob_read_uint32(ctx->blob); 1070 deref->type = glsl_get_struct_field(parent->type, deref->strct.index); 1071 break; 1072 1073 case nir_deref_type_array: 1074 case nir_deref_type_ptr_as_array: 1075 if (header.deref.packed_src_ssa_16bit) { 1076 deref->parent.is_ssa = true; 1077 deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 1078 deref->arr.index.is_ssa = true; 1079 deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob)); 1080 } else { 1081 read_src(ctx, &deref->parent, &deref->instr); 1082 read_src(ctx, &deref->arr.index, &deref->instr); 1083 } 1084 1085 parent = nir_src_as_deref(deref->parent); 1086 if (deref->deref_type == nir_deref_type_array) 1087 deref->type = glsl_get_array_element(parent->type); 1088 else 1089 deref->type = parent->type; 1090 break; 1091 1092 case nir_deref_type_cast: 1093 read_src(ctx, &deref->parent, &deref->instr); 1094 deref->cast.ptr_stride = blob_read_uint32(ctx->blob); 1095 deref->cast.align_mul = blob_read_uint32(ctx->blob); 1096 deref->cast.align_offset = blob_read_uint32(ctx->blob); 1097 if (header.deref.cast_type_same_as_last) { 1098 deref->type = ctx->last_type; 1099 } else { 1100 deref->type = decode_type_from_blob(ctx->blob); 1101 ctx->last_type = deref->type; 1102 } 1103 break; 1104 1105 case nir_deref_type_array_wildcard: 1106 read_src(ctx, &deref->parent, &deref->instr); 1107 parent = nir_src_as_deref(deref->parent); 1108 deref->type = glsl_get_array_element(parent->type); 1109 break; 1110 1111 default: 1112 unreachable("Invalid deref type"); 1113 } 1114 1115 if (deref_type == nir_deref_type_var) { 1116 deref->modes = deref->var->data.mode; 1117 } else if (deref->deref_type == nir_deref_type_cast) { 1118 deref->modes = header.deref.modes; 1119 } else { 1120 assert(deref->parent.is_ssa); 1121 deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes; 1122 } 1123 1124 return deref; 1125} 1126 1127static void 1128write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) 1129{ 1130 /* 9 bits for nir_intrinsic_op */ 1131 STATIC_ASSERT(nir_num_intrinsics <= 512); 1132 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; 1133 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; 1134 assert(intrin->intrinsic < 512); 1135 1136 union packed_instr header; 1137 header.u32 = 0; 1138 1139 header.intrinsic.instr_type = intrin->instr.type; 1140 header.intrinsic.intrinsic = intrin->intrinsic; 1141 1142 /* Analyze constant indices to decide how to encode them. */ 1143 if (num_indices) { 1144 unsigned max_bits = 0; 1145 for (unsigned i = 0; i < num_indices; i++) { 1146 unsigned max = util_last_bit(intrin->const_index[i]); 1147 max_bits = MAX2(max_bits, max); 1148 } 1149 1150 if (max_bits * num_indices <= 9) { 1151 header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined; 1152 1153 /* Pack all const indices into 6 bits. */ 1154 unsigned bit_size = 9 / num_indices; 1155 for (unsigned i = 0; i < num_indices; i++) { 1156 header.intrinsic.packed_const_indices |= 1157 intrin->const_index[i] << (i * bit_size); 1158 } 1159 } else if (max_bits <= 8) 1160 header.intrinsic.const_indices_encoding = const_indices_8bit; 1161 else if (max_bits <= 16) 1162 header.intrinsic.const_indices_encoding = const_indices_16bit; 1163 else 1164 header.intrinsic.const_indices_encoding = const_indices_32bit; 1165 } 1166 1167 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) 1168 write_dest(ctx, &intrin->dest, header, intrin->instr.type); 1169 else 1170 blob_write_uint32(ctx->blob, header.u32); 1171 1172 for (unsigned i = 0; i < num_srcs; i++) 1173 write_src(ctx, &intrin->src[i]); 1174 1175 if (num_indices) { 1176 switch (header.intrinsic.const_indices_encoding) { 1177 case const_indices_8bit: 1178 for (unsigned i = 0; i < num_indices; i++) 1179 blob_write_uint8(ctx->blob, intrin->const_index[i]); 1180 break; 1181 case const_indices_16bit: 1182 for (unsigned i = 0; i < num_indices; i++) 1183 blob_write_uint16(ctx->blob, intrin->const_index[i]); 1184 break; 1185 case const_indices_32bit: 1186 for (unsigned i = 0; i < num_indices; i++) 1187 blob_write_uint32(ctx->blob, intrin->const_index[i]); 1188 break; 1189 } 1190 } 1191} 1192 1193static nir_intrinsic_instr * 1194read_intrinsic(read_ctx *ctx, union packed_instr header) 1195{ 1196 nir_intrinsic_op op = header.intrinsic.intrinsic; 1197 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); 1198 1199 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 1200 unsigned num_indices = nir_intrinsic_infos[op].num_indices; 1201 1202 if (nir_intrinsic_infos[op].has_dest) 1203 read_dest(ctx, &intrin->dest, &intrin->instr, header); 1204 1205 for (unsigned i = 0; i < num_srcs; i++) 1206 read_src(ctx, &intrin->src[i], &intrin->instr); 1207 1208 /* Vectorized instrinsics have num_components same as dst or src that has 1209 * 0 components in the info. Find it. 1210 */ 1211 if (nir_intrinsic_infos[op].has_dest && 1212 nir_intrinsic_infos[op].dest_components == 0) { 1213 intrin->num_components = nir_dest_num_components(intrin->dest); 1214 } else { 1215 for (unsigned i = 0; i < num_srcs; i++) { 1216 if (nir_intrinsic_infos[op].src_components[i] == 0) { 1217 intrin->num_components = nir_src_num_components(intrin->src[i]); 1218 break; 1219 } 1220 } 1221 } 1222 1223 if (num_indices) { 1224 switch (header.intrinsic.const_indices_encoding) { 1225 case const_indices_9bit_all_combined: { 1226 unsigned bit_size = 9 / num_indices; 1227 unsigned bit_mask = u_bit_consecutive(0, bit_size); 1228 for (unsigned i = 0; i < num_indices; i++) { 1229 intrin->const_index[i] = 1230 (header.intrinsic.packed_const_indices >> (i * bit_size)) & 1231 bit_mask; 1232 } 1233 break; 1234 } 1235 case const_indices_8bit: 1236 for (unsigned i = 0; i < num_indices; i++) 1237 intrin->const_index[i] = blob_read_uint8(ctx->blob); 1238 break; 1239 case const_indices_16bit: 1240 for (unsigned i = 0; i < num_indices; i++) 1241 intrin->const_index[i] = blob_read_uint16(ctx->blob); 1242 break; 1243 case const_indices_32bit: 1244 for (unsigned i = 0; i < num_indices; i++) 1245 intrin->const_index[i] = blob_read_uint32(ctx->blob); 1246 break; 1247 } 1248 } 1249 1250 return intrin; 1251} 1252 1253static void 1254write_load_const(write_ctx *ctx, const nir_load_const_instr *lc) 1255{ 1256 assert(lc->def.num_components >= 1 && lc->def.num_components <= 16); 1257 union packed_instr header; 1258 header.u32 = 0; 1259 1260 header.load_const.instr_type = lc->instr.type; 1261 header.load_const.last_component = lc->def.num_components - 1; 1262 header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size); 1263 header.load_const.packing = load_const_full; 1264 1265 /* Try to pack 1-component constants into the 19 free bits in the header. */ 1266 if (lc->def.num_components == 1) { 1267 switch (lc->def.bit_size) { 1268 case 64: 1269 if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) { 1270 /* packed_value contains high 19 bits, low bits are 0 */ 1271 header.load_const.packing = load_const_scalar_hi_19bits; 1272 header.load_const.packed_value = lc->value[0].u64 >> 45; 1273 } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) { 1274 /* packed_value contains low 19 bits, high bits are sign-extended */ 1275 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1276 header.load_const.packed_value = lc->value[0].u64; 1277 } 1278 break; 1279 1280 case 32: 1281 if ((lc->value[0].u32 & 0x1fff) == 0) { 1282 header.load_const.packing = load_const_scalar_hi_19bits; 1283 header.load_const.packed_value = lc->value[0].u32 >> 13; 1284 } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) { 1285 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1286 header.load_const.packed_value = lc->value[0].u32; 1287 } 1288 break; 1289 1290 case 16: 1291 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1292 header.load_const.packed_value = lc->value[0].u16; 1293 break; 1294 case 8: 1295 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1296 header.load_const.packed_value = lc->value[0].u8; 1297 break; 1298 case 1: 1299 header.load_const.packing = load_const_scalar_lo_19bits_sext; 1300 header.load_const.packed_value = lc->value[0].b; 1301 break; 1302 default: 1303 unreachable("invalid bit_size"); 1304 } 1305 } 1306 1307 blob_write_uint32(ctx->blob, header.u32); 1308 1309 if (header.load_const.packing == load_const_full) { 1310 switch (lc->def.bit_size) { 1311 case 64: 1312 blob_write_bytes(ctx->blob, lc->value, 1313 sizeof(*lc->value) * lc->def.num_components); 1314 break; 1315 1316 case 32: 1317 for (unsigned i = 0; i < lc->def.num_components; i++) 1318 blob_write_uint32(ctx->blob, lc->value[i].u32); 1319 break; 1320 1321 case 16: 1322 for (unsigned i = 0; i < lc->def.num_components; i++) 1323 blob_write_uint16(ctx->blob, lc->value[i].u16); 1324 break; 1325 1326 default: 1327 assert(lc->def.bit_size <= 8); 1328 for (unsigned i = 0; i < lc->def.num_components; i++) 1329 blob_write_uint8(ctx->blob, lc->value[i].u8); 1330 break; 1331 } 1332 } 1333 1334 write_add_object(ctx, &lc->def); 1335} 1336 1337static nir_load_const_instr * 1338read_load_const(read_ctx *ctx, union packed_instr header) 1339{ 1340 nir_load_const_instr *lc = 1341 nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1, 1342 decode_bit_size_3bits(header.load_const.bit_size)); 1343 1344 switch (header.load_const.packing) { 1345 case load_const_scalar_hi_19bits: 1346 switch (lc->def.bit_size) { 1347 case 64: 1348 lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45; 1349 break; 1350 case 32: 1351 lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13; 1352 break; 1353 default: 1354 unreachable("invalid bit_size"); 1355 } 1356 break; 1357 1358 case load_const_scalar_lo_19bits_sext: 1359 switch (lc->def.bit_size) { 1360 case 64: 1361 lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45; 1362 break; 1363 case 32: 1364 lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13; 1365 break; 1366 case 16: 1367 lc->value[0].u16 = header.load_const.packed_value; 1368 break; 1369 case 8: 1370 lc->value[0].u8 = header.load_const.packed_value; 1371 break; 1372 case 1: 1373 lc->value[0].b = header.load_const.packed_value; 1374 break; 1375 default: 1376 unreachable("invalid bit_size"); 1377 } 1378 break; 1379 1380 case load_const_full: 1381 switch (lc->def.bit_size) { 1382 case 64: 1383 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 1384 break; 1385 1386 case 32: 1387 for (unsigned i = 0; i < lc->def.num_components; i++) 1388 lc->value[i].u32 = blob_read_uint32(ctx->blob); 1389 break; 1390 1391 case 16: 1392 for (unsigned i = 0; i < lc->def.num_components; i++) 1393 lc->value[i].u16 = blob_read_uint16(ctx->blob); 1394 break; 1395 1396 default: 1397 assert(lc->def.bit_size <= 8); 1398 for (unsigned i = 0; i < lc->def.num_components; i++) 1399 lc->value[i].u8 = blob_read_uint8(ctx->blob); 1400 break; 1401 } 1402 break; 1403 } 1404 1405 read_add_object(ctx, &lc->def); 1406 return lc; 1407} 1408 1409static void 1410write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef) 1411{ 1412 assert(undef->def.num_components >= 1 && undef->def.num_components <= 16); 1413 1414 union packed_instr header; 1415 header.u32 = 0; 1416 1417 header.undef.instr_type = undef->instr.type; 1418 header.undef.last_component = undef->def.num_components - 1; 1419 header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size); 1420 1421 blob_write_uint32(ctx->blob, header.u32); 1422 write_add_object(ctx, &undef->def); 1423} 1424 1425static nir_ssa_undef_instr * 1426read_ssa_undef(read_ctx *ctx, union packed_instr header) 1427{ 1428 nir_ssa_undef_instr *undef = 1429 nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1, 1430 decode_bit_size_3bits(header.undef.bit_size)); 1431 1432 read_add_object(ctx, &undef->def); 1433 return undef; 1434} 1435 1436union packed_tex_data { 1437 uint32_t u32; 1438 struct { 1439 unsigned sampler_dim:4; 1440 unsigned dest_type:8; 1441 unsigned coord_components:3; 1442 unsigned is_array:1; 1443 unsigned is_shadow:1; 1444 unsigned is_new_style_shadow:1; 1445 unsigned is_sparse:1; 1446 unsigned component:2; 1447 unsigned texture_non_uniform:1; 1448 unsigned sampler_non_uniform:1; 1449 unsigned array_is_lowered_cube:1; 1450 unsigned unused:6; /* Mark unused for valgrind. */ 1451 } u; 1452}; 1453 1454static void 1455write_tex(write_ctx *ctx, const nir_tex_instr *tex) 1456{ 1457 assert(tex->num_srcs < 16); 1458 assert(tex->op < 16); 1459 1460 union packed_instr header; 1461 header.u32 = 0; 1462 1463 header.tex.instr_type = tex->instr.type; 1464 header.tex.num_srcs = tex->num_srcs; 1465 header.tex.op = tex->op; 1466 1467 write_dest(ctx, &tex->dest, header, tex->instr.type); 1468 1469 blob_write_uint32(ctx->blob, tex->texture_index); 1470 blob_write_uint32(ctx->blob, tex->sampler_index); 1471 if (tex->op == nir_texop_tg4) 1472 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1473 1474 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); 1475 union packed_tex_data packed = { 1476 .u.sampler_dim = tex->sampler_dim, 1477 .u.dest_type = tex->dest_type, 1478 .u.coord_components = tex->coord_components, 1479 .u.is_array = tex->is_array, 1480 .u.is_shadow = tex->is_shadow, 1481 .u.is_new_style_shadow = tex->is_new_style_shadow, 1482 .u.is_sparse = tex->is_sparse, 1483 .u.component = tex->component, 1484 .u.texture_non_uniform = tex->texture_non_uniform, 1485 .u.sampler_non_uniform = tex->sampler_non_uniform, 1486 .u.array_is_lowered_cube = tex->array_is_lowered_cube, 1487 }; 1488 blob_write_uint32(ctx->blob, packed.u32); 1489 1490 for (unsigned i = 0; i < tex->num_srcs; i++) { 1491 union packed_src src; 1492 src.u32 = 0; 1493 src.tex.src_type = tex->src[i].src_type; 1494 write_src_full(ctx, &tex->src[i].src, src); 1495 } 1496} 1497 1498static nir_tex_instr * 1499read_tex(read_ctx *ctx, union packed_instr header) 1500{ 1501 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs); 1502 1503 read_dest(ctx, &tex->dest, &tex->instr, header); 1504 1505 tex->op = header.tex.op; 1506 tex->texture_index = blob_read_uint32(ctx->blob); 1507 tex->sampler_index = blob_read_uint32(ctx->blob); 1508 if (tex->op == nir_texop_tg4) 1509 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 1510 1511 union packed_tex_data packed; 1512 packed.u32 = blob_read_uint32(ctx->blob); 1513 tex->sampler_dim = packed.u.sampler_dim; 1514 tex->dest_type = packed.u.dest_type; 1515 tex->coord_components = packed.u.coord_components; 1516 tex->is_array = packed.u.is_array; 1517 tex->is_shadow = packed.u.is_shadow; 1518 tex->is_new_style_shadow = packed.u.is_new_style_shadow; 1519 tex->is_sparse = packed.u.is_sparse; 1520 tex->component = packed.u.component; 1521 tex->texture_non_uniform = packed.u.texture_non_uniform; 1522 tex->sampler_non_uniform = packed.u.sampler_non_uniform; 1523 tex->array_is_lowered_cube = packed.u.array_is_lowered_cube; 1524 1525 for (unsigned i = 0; i < tex->num_srcs; i++) { 1526 union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr); 1527 tex->src[i].src_type = src.tex.src_type; 1528 } 1529 1530 return tex; 1531} 1532 1533static void 1534write_phi(write_ctx *ctx, const nir_phi_instr *phi) 1535{ 1536 union packed_instr header; 1537 header.u32 = 0; 1538 1539 header.phi.instr_type = phi->instr.type; 1540 header.phi.num_srcs = exec_list_length(&phi->srcs); 1541 1542 /* Phi nodes are special, since they may reference SSA definitions and 1543 * basic blocks that don't exist yet. We leave two empty uint32_t's here, 1544 * and then store enough information so that a later fixup pass can fill 1545 * them in correctly. 1546 */ 1547 write_dest(ctx, &phi->dest, header, phi->instr.type); 1548 1549 nir_foreach_phi_src(src, phi) { 1550 assert(src->src.is_ssa); 1551 size_t blob_offset = blob_reserve_uint32(ctx->blob); 1552 ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob); 1553 assert(blob_offset + sizeof(uint32_t) == blob_offset2); 1554 write_phi_fixup fixup = { 1555 .blob_offset = blob_offset, 1556 .src = src->src.ssa, 1557 .block = src->pred, 1558 }; 1559 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); 1560 } 1561} 1562 1563static void 1564write_fixup_phis(write_ctx *ctx) 1565{ 1566 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { 1567 uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset); 1568 blob_ptr[0] = write_lookup_object(ctx, fixup->src); 1569 blob_ptr[1] = write_lookup_object(ctx, fixup->block); 1570 } 1571 1572 util_dynarray_clear(&ctx->phi_fixups); 1573} 1574 1575static nir_phi_instr * 1576read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header) 1577{ 1578 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); 1579 1580 read_dest(ctx, &phi->dest, &phi->instr, header); 1581 1582 /* For similar reasons as before, we just store the index directly into the 1583 * pointer, and let a later pass resolve the phi sources. 1584 * 1585 * In order to ensure that the copied sources (which are just the indices 1586 * from the blob for now) don't get inserted into the old shader's use-def 1587 * lists, we have to add the phi instruction *before* we set up its 1588 * sources. 1589 */ 1590 nir_instr_insert_after_block(blk, &phi->instr); 1591 1592 for (unsigned i = 0; i < header.phi.num_srcs; i++) { 1593 nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob); 1594 nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob); 1595 nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def)); 1596 1597 /* Since we're not letting nir_insert_instr handle use/def stuff for us, 1598 * we have to set the parent_instr manually. It doesn't really matter 1599 * when we do it, so we might as well do it here. 1600 */ 1601 src->src.parent_instr = &phi->instr; 1602 1603 /* Stash it in the list of phi sources. We'll walk this list and fix up 1604 * sources at the very end of read_function_impl. 1605 */ 1606 list_add(&src->src.use_link, &ctx->phi_srcs); 1607 } 1608 1609 return phi; 1610} 1611 1612static void 1613read_fixup_phis(read_ctx *ctx) 1614{ 1615 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { 1616 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); 1617 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); 1618 1619 /* Remove from this list */ 1620 list_del(&src->src.use_link); 1621 1622 list_addtail(&src->src.use_link, &src->src.ssa->uses); 1623 } 1624 assert(list_is_empty(&ctx->phi_srcs)); 1625} 1626 1627static void 1628write_jump(write_ctx *ctx, const nir_jump_instr *jmp) 1629{ 1630 /* These aren't handled because they require special block linking */ 1631 assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if); 1632 1633 assert(jmp->type < 4); 1634 1635 union packed_instr header; 1636 header.u32 = 0; 1637 1638 header.jump.instr_type = jmp->instr.type; 1639 header.jump.type = jmp->type; 1640 1641 blob_write_uint32(ctx->blob, header.u32); 1642} 1643 1644static nir_jump_instr * 1645read_jump(read_ctx *ctx, union packed_instr header) 1646{ 1647 /* These aren't handled because they require special block linking */ 1648 assert(header.jump.type != nir_jump_goto && 1649 header.jump.type != nir_jump_goto_if); 1650 1651 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type); 1652 return jmp; 1653} 1654 1655static void 1656write_call(write_ctx *ctx, const nir_call_instr *call) 1657{ 1658 blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee)); 1659 1660 for (unsigned i = 0; i < call->num_params; i++) 1661 write_src(ctx, &call->params[i]); 1662} 1663 1664static nir_call_instr * 1665read_call(read_ctx *ctx) 1666{ 1667 nir_function *callee = read_object(ctx); 1668 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); 1669 1670 for (unsigned i = 0; i < call->num_params; i++) 1671 read_src(ctx, &call->params[i], call); 1672 1673 return call; 1674} 1675 1676static void 1677write_instr(write_ctx *ctx, const nir_instr *instr) 1678{ 1679 /* We have only 4 bits for the instruction type. */ 1680 assert(instr->type < 16); 1681 1682 switch (instr->type) { 1683 case nir_instr_type_alu: 1684 write_alu(ctx, nir_instr_as_alu(instr)); 1685 break; 1686 case nir_instr_type_deref: 1687 write_deref(ctx, nir_instr_as_deref(instr)); 1688 break; 1689 case nir_instr_type_intrinsic: 1690 write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 1691 break; 1692 case nir_instr_type_load_const: 1693 write_load_const(ctx, nir_instr_as_load_const(instr)); 1694 break; 1695 case nir_instr_type_ssa_undef: 1696 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 1697 break; 1698 case nir_instr_type_tex: 1699 write_tex(ctx, nir_instr_as_tex(instr)); 1700 break; 1701 case nir_instr_type_phi: 1702 write_phi(ctx, nir_instr_as_phi(instr)); 1703 break; 1704 case nir_instr_type_jump: 1705 write_jump(ctx, nir_instr_as_jump(instr)); 1706 break; 1707 case nir_instr_type_call: 1708 blob_write_uint32(ctx->blob, instr->type); 1709 write_call(ctx, nir_instr_as_call(instr)); 1710 break; 1711 case nir_instr_type_parallel_copy: 1712 unreachable("Cannot write parallel copies"); 1713 default: 1714 unreachable("bad instr type"); 1715 } 1716} 1717 1718/* Return the number of instructions read. */ 1719static unsigned 1720read_instr(read_ctx *ctx, nir_block *block) 1721{ 1722 STATIC_ASSERT(sizeof(union packed_instr) == 4); 1723 union packed_instr header; 1724 header.u32 = blob_read_uint32(ctx->blob); 1725 nir_instr *instr; 1726 1727 switch (header.any.instr_type) { 1728 case nir_instr_type_alu: 1729 for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++) 1730 nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr); 1731 return header.alu.num_followup_alu_sharing_header + 1; 1732 case nir_instr_type_deref: 1733 instr = &read_deref(ctx, header)->instr; 1734 break; 1735 case nir_instr_type_intrinsic: 1736 instr = &read_intrinsic(ctx, header)->instr; 1737 break; 1738 case nir_instr_type_load_const: 1739 instr = &read_load_const(ctx, header)->instr; 1740 break; 1741 case nir_instr_type_ssa_undef: 1742 instr = &read_ssa_undef(ctx, header)->instr; 1743 break; 1744 case nir_instr_type_tex: 1745 instr = &read_tex(ctx, header)->instr; 1746 break; 1747 case nir_instr_type_phi: 1748 /* Phi instructions are a bit of a special case when reading because we 1749 * don't want inserting the instruction to automatically handle use/defs 1750 * for us. Instead, we need to wait until all the blocks/instructions 1751 * are read so that we can set their sources up. 1752 */ 1753 read_phi(ctx, block, header); 1754 return 1; 1755 case nir_instr_type_jump: 1756 instr = &read_jump(ctx, header)->instr; 1757 break; 1758 case nir_instr_type_call: 1759 instr = &read_call(ctx)->instr; 1760 break; 1761 case nir_instr_type_parallel_copy: 1762 unreachable("Cannot read parallel copies"); 1763 default: 1764 unreachable("bad instr type"); 1765 } 1766 1767 nir_instr_insert_after_block(block, instr); 1768 return 1; 1769} 1770 1771static void 1772write_block(write_ctx *ctx, const nir_block *block) 1773{ 1774 write_add_object(ctx, block); 1775 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); 1776 1777 ctx->last_instr_type = ~0; 1778 ctx->last_alu_header_offset = 0; 1779 1780 nir_foreach_instr(instr, block) { 1781 write_instr(ctx, instr); 1782 ctx->last_instr_type = instr->type; 1783 } 1784} 1785 1786static void 1787read_block(read_ctx *ctx, struct exec_list *cf_list) 1788{ 1789 /* Don't actually create a new block. Just use the one from the tail of 1790 * the list. NIR guarantees that the tail of the list is a block and that 1791 * no two blocks are side-by-side in the IR; It should be empty. 1792 */ 1793 nir_block *block = 1794 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 1795 1796 read_add_object(ctx, block); 1797 unsigned num_instrs = blob_read_uint32(ctx->blob); 1798 for (unsigned i = 0; i < num_instrs;) { 1799 i += read_instr(ctx, block); 1800 } 1801} 1802 1803static void 1804write_cf_list(write_ctx *ctx, const struct exec_list *cf_list); 1805 1806static void 1807read_cf_list(read_ctx *ctx, struct exec_list *cf_list); 1808 1809static void 1810write_if(write_ctx *ctx, nir_if *nif) 1811{ 1812 write_src(ctx, &nif->condition); 1813 blob_write_uint8(ctx->blob, nif->control); 1814 1815 write_cf_list(ctx, &nif->then_list); 1816 write_cf_list(ctx, &nif->else_list); 1817} 1818 1819static void 1820read_if(read_ctx *ctx, struct exec_list *cf_list) 1821{ 1822 nir_if *nif = nir_if_create(ctx->nir); 1823 1824 read_src(ctx, &nif->condition, nif); 1825 nif->control = blob_read_uint8(ctx->blob); 1826 1827 nir_cf_node_insert_end(cf_list, &nif->cf_node); 1828 1829 read_cf_list(ctx, &nif->then_list); 1830 read_cf_list(ctx, &nif->else_list); 1831} 1832 1833static void 1834write_loop(write_ctx *ctx, nir_loop *loop) 1835{ 1836 blob_write_uint8(ctx->blob, loop->control); 1837 write_cf_list(ctx, &loop->body); 1838} 1839 1840static void 1841read_loop(read_ctx *ctx, struct exec_list *cf_list) 1842{ 1843 nir_loop *loop = nir_loop_create(ctx->nir); 1844 1845 nir_cf_node_insert_end(cf_list, &loop->cf_node); 1846 1847 loop->control = blob_read_uint8(ctx->blob); 1848 read_cf_list(ctx, &loop->body); 1849} 1850 1851static void 1852write_cf_node(write_ctx *ctx, nir_cf_node *cf) 1853{ 1854 blob_write_uint32(ctx->blob, cf->type); 1855 1856 switch (cf->type) { 1857 case nir_cf_node_block: 1858 write_block(ctx, nir_cf_node_as_block(cf)); 1859 break; 1860 case nir_cf_node_if: 1861 write_if(ctx, nir_cf_node_as_if(cf)); 1862 break; 1863 case nir_cf_node_loop: 1864 write_loop(ctx, nir_cf_node_as_loop(cf)); 1865 break; 1866 default: 1867 unreachable("bad cf type"); 1868 } 1869} 1870 1871static void 1872read_cf_node(read_ctx *ctx, struct exec_list *list) 1873{ 1874 nir_cf_node_type type = blob_read_uint32(ctx->blob); 1875 1876 switch (type) { 1877 case nir_cf_node_block: 1878 read_block(ctx, list); 1879 break; 1880 case nir_cf_node_if: 1881 read_if(ctx, list); 1882 break; 1883 case nir_cf_node_loop: 1884 read_loop(ctx, list); 1885 break; 1886 default: 1887 unreachable("bad cf type"); 1888 } 1889} 1890 1891static void 1892write_cf_list(write_ctx *ctx, const struct exec_list *cf_list) 1893{ 1894 blob_write_uint32(ctx->blob, exec_list_length(cf_list)); 1895 foreach_list_typed(nir_cf_node, cf, node, cf_list) { 1896 write_cf_node(ctx, cf); 1897 } 1898} 1899 1900static void 1901read_cf_list(read_ctx *ctx, struct exec_list *cf_list) 1902{ 1903 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); 1904 for (unsigned i = 0; i < num_cf_nodes; i++) 1905 read_cf_node(ctx, cf_list); 1906} 1907 1908static void 1909write_function_impl(write_ctx *ctx, const nir_function_impl *fi) 1910{ 1911 blob_write_uint8(ctx->blob, fi->structured); 1912 1913 write_var_list(ctx, &fi->locals); 1914 write_reg_list(ctx, &fi->registers); 1915 blob_write_uint32(ctx->blob, fi->reg_alloc); 1916 1917 write_cf_list(ctx, &fi->body); 1918 write_fixup_phis(ctx); 1919} 1920 1921static nir_function_impl * 1922read_function_impl(read_ctx *ctx, nir_function *fxn) 1923{ 1924 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); 1925 fi->function = fxn; 1926 1927 fi->structured = blob_read_uint8(ctx->blob); 1928 1929 read_var_list(ctx, &fi->locals); 1930 read_reg_list(ctx, &fi->registers); 1931 fi->reg_alloc = blob_read_uint32(ctx->blob); 1932 1933 read_cf_list(ctx, &fi->body); 1934 read_fixup_phis(ctx); 1935 1936 fi->valid_metadata = 0; 1937 1938 return fi; 1939} 1940 1941static void 1942write_function(write_ctx *ctx, const nir_function *fxn) 1943{ 1944 uint32_t flags = fxn->is_entrypoint; 1945 if (fxn->name) 1946 flags |= 0x2; 1947 if (fxn->impl) 1948 flags |= 0x4; 1949 blob_write_uint32(ctx->blob, flags); 1950 if (fxn->name) 1951 blob_write_string(ctx->blob, fxn->name); 1952 1953 write_add_object(ctx, fxn); 1954 1955 blob_write_uint32(ctx->blob, fxn->num_params); 1956 for (unsigned i = 0; i < fxn->num_params; i++) { 1957 uint32_t val = 1958 ((uint32_t)fxn->params[i].num_components) | 1959 ((uint32_t)fxn->params[i].bit_size) << 8; 1960 blob_write_uint32(ctx->blob, val); 1961 } 1962 1963 /* At first glance, it looks like we should write the function_impl here. 1964 * However, call instructions need to be able to reference at least the 1965 * function and those will get processed as we write the function_impls. 1966 * We stop here and write function_impls as a second pass. 1967 */ 1968} 1969 1970static void 1971read_function(read_ctx *ctx) 1972{ 1973 uint32_t flags = blob_read_uint32(ctx->blob); 1974 bool has_name = flags & 0x2; 1975 char *name = has_name ? blob_read_string(ctx->blob) : NULL; 1976 1977 nir_function *fxn = nir_function_create(ctx->nir, name); 1978 1979 read_add_object(ctx, fxn); 1980 1981 fxn->num_params = blob_read_uint32(ctx->blob); 1982 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params); 1983 for (unsigned i = 0; i < fxn->num_params; i++) { 1984 uint32_t val = blob_read_uint32(ctx->blob); 1985 fxn->params[i].num_components = val & 0xff; 1986 fxn->params[i].bit_size = (val >> 8) & 0xff; 1987 } 1988 1989 fxn->is_entrypoint = flags & 0x1; 1990 if (flags & 0x4) 1991 fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL; 1992} 1993 1994/** 1995 * Serialize NIR into a binary blob. 1996 * 1997 * \param strip Don't serialize information only useful for debugging, 1998 * such as variable names, making cache hits from similar 1999 * shaders more likely. 2000 */ 2001void 2002nir_serialize(struct blob *blob, const nir_shader *nir, bool strip) 2003{ 2004 write_ctx ctx = {0}; 2005 ctx.remap_table = _mesa_pointer_hash_table_create(NULL); 2006 ctx.blob = blob; 2007 ctx.nir = nir; 2008 ctx.strip = strip; 2009 util_dynarray_init(&ctx.phi_fixups, NULL); 2010 2011 size_t idx_size_offset = blob_reserve_uint32(blob); 2012 2013 struct shader_info info = nir->info; 2014 uint32_t strings = 0; 2015 if (!strip && info.name) 2016 strings |= 0x1; 2017 if (!strip && info.label) 2018 strings |= 0x2; 2019 blob_write_uint32(blob, strings); 2020 if (!strip && info.name) 2021 blob_write_string(blob, info.name); 2022 if (!strip && info.label) 2023 blob_write_string(blob, info.label); 2024 info.name = info.label = NULL; 2025 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info)); 2026 2027 write_var_list(&ctx, &nir->variables); 2028 2029 blob_write_uint32(blob, nir->num_inputs); 2030 blob_write_uint32(blob, nir->num_uniforms); 2031 blob_write_uint32(blob, nir->num_outputs); 2032 blob_write_uint32(blob, nir->scratch_size); 2033 2034 blob_write_uint32(blob, exec_list_length(&nir->functions)); 2035 nir_foreach_function(fxn, nir) { 2036 write_function(&ctx, fxn); 2037 } 2038 2039 nir_foreach_function(fxn, nir) { 2040 if (fxn->impl) 2041 write_function_impl(&ctx, fxn->impl); 2042 } 2043 2044 blob_write_uint32(blob, nir->constant_data_size); 2045 if (nir->constant_data_size > 0) 2046 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); 2047 2048 *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx; 2049 2050 _mesa_hash_table_destroy(ctx.remap_table, NULL); 2051 util_dynarray_fini(&ctx.phi_fixups); 2052} 2053 2054nir_shader * 2055nir_deserialize(void *mem_ctx, 2056 const struct nir_shader_compiler_options *options, 2057 struct blob_reader *blob) 2058{ 2059 read_ctx ctx = {0}; 2060 ctx.blob = blob; 2061 list_inithead(&ctx.phi_srcs); 2062 ctx.idx_table_len = blob_read_uint32(blob); 2063 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); 2064 2065 uint32_t strings = blob_read_uint32(blob); 2066 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; 2067 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; 2068 2069 struct shader_info info; 2070 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info)); 2071 2072 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); 2073 2074 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; 2075 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; 2076 2077 ctx.nir->info = info; 2078 2079 read_var_list(&ctx, &ctx.nir->variables); 2080 2081 ctx.nir->num_inputs = blob_read_uint32(blob); 2082 ctx.nir->num_uniforms = blob_read_uint32(blob); 2083 ctx.nir->num_outputs = blob_read_uint32(blob); 2084 ctx.nir->scratch_size = blob_read_uint32(blob); 2085 2086 unsigned num_functions = blob_read_uint32(blob); 2087 for (unsigned i = 0; i < num_functions; i++) 2088 read_function(&ctx); 2089 2090 nir_foreach_function(fxn, ctx.nir) { 2091 if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL) 2092 fxn->impl = read_function_impl(&ctx, fxn); 2093 } 2094 2095 ctx.nir->constant_data_size = blob_read_uint32(blob); 2096 if (ctx.nir->constant_data_size > 0) { 2097 ctx.nir->constant_data = 2098 ralloc_size(ctx.nir, ctx.nir->constant_data_size); 2099 blob_copy_bytes(blob, ctx.nir->constant_data, 2100 ctx.nir->constant_data_size); 2101 } 2102 2103 free(ctx.idx_table); 2104 2105 nir_validate_shader(ctx.nir, "after deserialize"); 2106 2107 return ctx.nir; 2108} 2109 2110void 2111nir_shader_serialize_deserialize(nir_shader *shader) 2112{ 2113 const struct nir_shader_compiler_options *options = shader->options; 2114 2115 struct blob writer; 2116 blob_init(&writer); 2117 nir_serialize(&writer, shader, false); 2118 2119 /* Delete all of dest's ralloc children but leave dest alone */ 2120 void *dead_ctx = ralloc_context(NULL); 2121 ralloc_adopt(dead_ctx, shader); 2122 ralloc_free(dead_ctx); 2123 2124 dead_ctx = ralloc_context(NULL); 2125 2126 struct blob_reader reader; 2127 blob_reader_init(&reader, writer.data, writer.size); 2128 nir_shader *copy = nir_deserialize(dead_ctx, options, &reader); 2129 2130 blob_finish(&writer); 2131 2132 nir_shader_replace(shader, copy); 2133 ralloc_free(dead_ctx); 2134} 2135