1/* 2 * Copyright © 2017 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir_serialize.h" 25#include "nir_control_flow.h" 26#include "util/u_dynarray.h" 27 28typedef struct { 29 size_t blob_offset; 30 nir_ssa_def *src; 31 nir_block *block; 32} write_phi_fixup; 33 34typedef struct { 35 const nir_shader *nir; 36 37 struct blob *blob; 38 39 /* maps pointer to index */ 40 struct hash_table *remap_table; 41 42 /* the next index to assign to a NIR in-memory object */ 43 uintptr_t next_idx; 44 45 /* Array of write_phi_fixup structs representing phi sources that need to 46 * be resolved in the second pass. 47 */ 48 struct util_dynarray phi_fixups; 49} write_ctx; 50 51typedef struct { 52 nir_shader *nir; 53 54 struct blob_reader *blob; 55 56 /* the next index to assign to a NIR in-memory object */ 57 uintptr_t next_idx; 58 59 /* The length of the index -> object table */ 60 uintptr_t idx_table_len; 61 62 /* map from index to deserialized pointer */ 63 void **idx_table; 64 65 /* List of phi sources. */ 66 struct list_head phi_srcs; 67 68} read_ctx; 69 70static void 71write_add_object(write_ctx *ctx, const void *obj) 72{ 73 uintptr_t index = ctx->next_idx++; 74 _mesa_hash_table_insert(ctx->remap_table, obj, (void *) index); 75} 76 77static uintptr_t 78write_lookup_object(write_ctx *ctx, const void *obj) 79{ 80 struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj); 81 assert(entry); 82 return (uintptr_t) entry->data; 83} 84 85static void 86write_object(write_ctx *ctx, const void *obj) 87{ 88 blob_write_intptr(ctx->blob, write_lookup_object(ctx, obj)); 89} 90 91static void 92read_add_object(read_ctx *ctx, void *obj) 93{ 94 assert(ctx->next_idx < ctx->idx_table_len); 95 ctx->idx_table[ctx->next_idx++] = obj; 96} 97 98static void * 99read_lookup_object(read_ctx *ctx, uintptr_t idx) 100{ 101 assert(idx < ctx->idx_table_len); 102 return ctx->idx_table[idx]; 103} 104 105static void * 106read_object(read_ctx *ctx) 107{ 108 return read_lookup_object(ctx, blob_read_intptr(ctx->blob)); 109} 110 111static void 112write_constant(write_ctx *ctx, const nir_constant *c) 113{ 114 blob_write_bytes(ctx->blob, c->values, sizeof(c->values)); 115 blob_write_uint32(ctx->blob, c->num_elements); 116 for (unsigned i = 0; i < c->num_elements; i++) 117 write_constant(ctx, c->elements[i]); 118} 119 120static nir_constant * 121read_constant(read_ctx *ctx, nir_variable *nvar) 122{ 123 nir_constant *c = ralloc(nvar, nir_constant); 124 125 blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values)); 126 c->num_elements = blob_read_uint32(ctx->blob); 127 c->elements = ralloc_array(nvar, nir_constant *, c->num_elements); 128 for (unsigned i = 0; i < c->num_elements; i++) 129 c->elements[i] = read_constant(ctx, nvar); 130 131 return c; 132} 133 134static void 135write_variable(write_ctx *ctx, const nir_variable *var) 136{ 137 write_add_object(ctx, var); 138 encode_type_to_blob(ctx->blob, var->type); 139 blob_write_uint32(ctx->blob, !!(var->name)); 140 if (var->name) 141 blob_write_string(ctx->blob, var->name); 142 blob_write_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data)); 143 blob_write_uint32(ctx->blob, var->num_state_slots); 144 for (unsigned i = 0; i < var->num_state_slots; i++) { 145 for (unsigned j = 0; j < STATE_LENGTH; j++) 146 blob_write_uint32(ctx->blob, var->state_slots[i].tokens[j]); 147 blob_write_uint32(ctx->blob, var->state_slots[i].swizzle); 148 } 149 blob_write_uint32(ctx->blob, !!(var->constant_initializer)); 150 if (var->constant_initializer) 151 write_constant(ctx, var->constant_initializer); 152 blob_write_uint32(ctx->blob, !!(var->interface_type)); 153 if (var->interface_type) 154 encode_type_to_blob(ctx->blob, var->interface_type); 155 blob_write_uint32(ctx->blob, var->num_members); 156 if (var->num_members > 0) { 157 blob_write_bytes(ctx->blob, (uint8_t *) var->members, 158 var->num_members * sizeof(*var->members)); 159 } 160} 161 162static nir_variable * 163read_variable(read_ctx *ctx) 164{ 165 nir_variable *var = rzalloc(ctx->nir, nir_variable); 166 read_add_object(ctx, var); 167 168 var->type = decode_type_from_blob(ctx->blob); 169 bool has_name = blob_read_uint32(ctx->blob); 170 if (has_name) { 171 const char *name = blob_read_string(ctx->blob); 172 var->name = ralloc_strdup(var, name); 173 } else { 174 var->name = NULL; 175 } 176 blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data)); 177 var->num_state_slots = blob_read_uint32(ctx->blob); 178 if (var->num_state_slots != 0) { 179 var->state_slots = ralloc_array(var, nir_state_slot, 180 var->num_state_slots); 181 for (unsigned i = 0; i < var->num_state_slots; i++) { 182 for (unsigned j = 0; j < STATE_LENGTH; j++) 183 var->state_slots[i].tokens[j] = blob_read_uint32(ctx->blob); 184 var->state_slots[i].swizzle = blob_read_uint32(ctx->blob); 185 } 186 } 187 bool has_const_initializer = blob_read_uint32(ctx->blob); 188 if (has_const_initializer) 189 var->constant_initializer = read_constant(ctx, var); 190 else 191 var->constant_initializer = NULL; 192 bool has_interface_type = blob_read_uint32(ctx->blob); 193 if (has_interface_type) 194 var->interface_type = decode_type_from_blob(ctx->blob); 195 else 196 var->interface_type = NULL; 197 var->num_members = blob_read_uint32(ctx->blob); 198 if (var->num_members > 0) { 199 var->members = ralloc_array(var, struct nir_variable_data, 200 var->num_members); 201 blob_copy_bytes(ctx->blob, (uint8_t *) var->members, 202 var->num_members * sizeof(*var->members)); 203 } 204 205 return var; 206} 207 208static void 209write_var_list(write_ctx *ctx, const struct exec_list *src) 210{ 211 blob_write_uint32(ctx->blob, exec_list_length(src)); 212 foreach_list_typed(nir_variable, var, node, src) { 213 write_variable(ctx, var); 214 } 215} 216 217static void 218read_var_list(read_ctx *ctx, struct exec_list *dst) 219{ 220 exec_list_make_empty(dst); 221 unsigned num_vars = blob_read_uint32(ctx->blob); 222 for (unsigned i = 0; i < num_vars; i++) { 223 nir_variable *var = read_variable(ctx); 224 exec_list_push_tail(dst, &var->node); 225 } 226} 227 228static void 229write_register(write_ctx *ctx, const nir_register *reg) 230{ 231 write_add_object(ctx, reg); 232 blob_write_uint32(ctx->blob, reg->num_components); 233 blob_write_uint32(ctx->blob, reg->bit_size); 234 blob_write_uint32(ctx->blob, reg->num_array_elems); 235 blob_write_uint32(ctx->blob, reg->index); 236 blob_write_uint32(ctx->blob, !!(reg->name)); 237 if (reg->name) 238 blob_write_string(ctx->blob, reg->name); 239} 240 241static nir_register * 242read_register(read_ctx *ctx) 243{ 244 nir_register *reg = ralloc(ctx->nir, nir_register); 245 read_add_object(ctx, reg); 246 reg->num_components = blob_read_uint32(ctx->blob); 247 reg->bit_size = blob_read_uint32(ctx->blob); 248 reg->num_array_elems = blob_read_uint32(ctx->blob); 249 reg->index = blob_read_uint32(ctx->blob); 250 bool has_name = blob_read_uint32(ctx->blob); 251 if (has_name) { 252 const char *name = blob_read_string(ctx->blob); 253 reg->name = ralloc_strdup(reg, name); 254 } else { 255 reg->name = NULL; 256 } 257 258 list_inithead(®->uses); 259 list_inithead(®->defs); 260 list_inithead(®->if_uses); 261 262 return reg; 263} 264 265static void 266write_reg_list(write_ctx *ctx, const struct exec_list *src) 267{ 268 blob_write_uint32(ctx->blob, exec_list_length(src)); 269 foreach_list_typed(nir_register, reg, node, src) 270 write_register(ctx, reg); 271} 272 273static void 274read_reg_list(read_ctx *ctx, struct exec_list *dst) 275{ 276 exec_list_make_empty(dst); 277 unsigned num_regs = blob_read_uint32(ctx->blob); 278 for (unsigned i = 0; i < num_regs; i++) { 279 nir_register *reg = read_register(ctx); 280 exec_list_push_tail(dst, ®->node); 281 } 282} 283 284static void 285write_src(write_ctx *ctx, const nir_src *src) 286{ 287 /* Since sources are very frequent, we try to save some space when storing 288 * them. In particular, we store whether the source is a register and 289 * whether the register has an indirect index in the low two bits. We can 290 * assume that the high two bits of the index are zero, since otherwise our 291 * address space would've been exhausted allocating the remap table! 292 */ 293 if (src->is_ssa) { 294 uintptr_t idx = write_lookup_object(ctx, src->ssa) << 2; 295 idx |= 1; 296 blob_write_intptr(ctx->blob, idx); 297 } else { 298 uintptr_t idx = write_lookup_object(ctx, src->reg.reg) << 2; 299 if (src->reg.indirect) 300 idx |= 2; 301 blob_write_intptr(ctx->blob, idx); 302 blob_write_uint32(ctx->blob, src->reg.base_offset); 303 if (src->reg.indirect) { 304 write_src(ctx, src->reg.indirect); 305 } 306 } 307} 308 309static void 310read_src(read_ctx *ctx, nir_src *src, void *mem_ctx) 311{ 312 uintptr_t val = blob_read_intptr(ctx->blob); 313 uintptr_t idx = val >> 2; 314 src->is_ssa = val & 0x1; 315 if (src->is_ssa) { 316 src->ssa = read_lookup_object(ctx, idx); 317 } else { 318 bool is_indirect = val & 0x2; 319 src->reg.reg = read_lookup_object(ctx, idx); 320 src->reg.base_offset = blob_read_uint32(ctx->blob); 321 if (is_indirect) { 322 src->reg.indirect = ralloc(mem_ctx, nir_src); 323 read_src(ctx, src->reg.indirect, mem_ctx); 324 } else { 325 src->reg.indirect = NULL; 326 } 327 } 328} 329 330static void 331write_dest(write_ctx *ctx, const nir_dest *dst) 332{ 333 uint32_t val = dst->is_ssa; 334 if (dst->is_ssa) { 335 val |= !!(dst->ssa.name) << 1; 336 val |= dst->ssa.num_components << 2; 337 val |= dst->ssa.bit_size << 5; 338 } else { 339 val |= !!(dst->reg.indirect) << 1; 340 } 341 blob_write_uint32(ctx->blob, val); 342 if (dst->is_ssa) { 343 write_add_object(ctx, &dst->ssa); 344 if (dst->ssa.name) 345 blob_write_string(ctx->blob, dst->ssa.name); 346 } else { 347 blob_write_intptr(ctx->blob, write_lookup_object(ctx, dst->reg.reg)); 348 blob_write_uint32(ctx->blob, dst->reg.base_offset); 349 if (dst->reg.indirect) 350 write_src(ctx, dst->reg.indirect); 351 } 352} 353 354static void 355read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr) 356{ 357 uint32_t val = blob_read_uint32(ctx->blob); 358 bool is_ssa = val & 0x1; 359 if (is_ssa) { 360 bool has_name = val & 0x2; 361 unsigned num_components = (val >> 2) & 0x7; 362 unsigned bit_size = val >> 5; 363 char *name = has_name ? blob_read_string(ctx->blob) : NULL; 364 nir_ssa_dest_init(instr, dst, num_components, bit_size, name); 365 read_add_object(ctx, &dst->ssa); 366 } else { 367 bool is_indirect = val & 0x2; 368 dst->reg.reg = read_object(ctx); 369 dst->reg.base_offset = blob_read_uint32(ctx->blob); 370 if (is_indirect) { 371 dst->reg.indirect = ralloc(instr, nir_src); 372 read_src(ctx, dst->reg.indirect, instr); 373 } 374 } 375} 376 377static void 378write_alu(write_ctx *ctx, const nir_alu_instr *alu) 379{ 380 blob_write_uint32(ctx->blob, alu->op); 381 uint32_t flags = alu->exact; 382 flags |= alu->dest.saturate << 1; 383 flags |= alu->dest.write_mask << 2; 384 blob_write_uint32(ctx->blob, flags); 385 386 write_dest(ctx, &alu->dest.dest); 387 388 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 389 write_src(ctx, &alu->src[i].src); 390 flags = alu->src[i].negate; 391 flags |= alu->src[i].abs << 1; 392 for (unsigned j = 0; j < 4; j++) 393 flags |= alu->src[i].swizzle[j] << (2 + 2 * j); 394 blob_write_uint32(ctx->blob, flags); 395 } 396} 397 398static nir_alu_instr * 399read_alu(read_ctx *ctx) 400{ 401 nir_op op = blob_read_uint32(ctx->blob); 402 nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, op); 403 404 uint32_t flags = blob_read_uint32(ctx->blob); 405 alu->exact = flags & 1; 406 alu->dest.saturate = flags & 2; 407 alu->dest.write_mask = flags >> 2; 408 409 read_dest(ctx, &alu->dest.dest, &alu->instr); 410 411 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { 412 read_src(ctx, &alu->src[i].src, &alu->instr); 413 flags = blob_read_uint32(ctx->blob); 414 alu->src[i].negate = flags & 1; 415 alu->src[i].abs = flags & 2; 416 for (unsigned j = 0; j < 4; j++) 417 alu->src[i].swizzle[j] = (flags >> (2 * j + 2)) & 3; 418 } 419 420 return alu; 421} 422 423static void 424write_deref(write_ctx *ctx, const nir_deref_instr *deref) 425{ 426 blob_write_uint32(ctx->blob, deref->deref_type); 427 428 blob_write_uint32(ctx->blob, deref->mode); 429 encode_type_to_blob(ctx->blob, deref->type); 430 431 write_dest(ctx, &deref->dest); 432 433 if (deref->deref_type == nir_deref_type_var) { 434 write_object(ctx, deref->var); 435 return; 436 } 437 438 write_src(ctx, &deref->parent); 439 440 switch (deref->deref_type) { 441 case nir_deref_type_struct: 442 blob_write_uint32(ctx->blob, deref->strct.index); 443 break; 444 445 case nir_deref_type_array: 446 case nir_deref_type_ptr_as_array: 447 write_src(ctx, &deref->arr.index); 448 break; 449 450 case nir_deref_type_cast: 451 blob_write_uint32(ctx->blob, deref->cast.ptr_stride); 452 break; 453 454 case nir_deref_type_array_wildcard: 455 /* Nothing to do */ 456 break; 457 458 default: 459 unreachable("Invalid deref type"); 460 } 461} 462 463static nir_deref_instr * 464read_deref(read_ctx *ctx) 465{ 466 nir_deref_type deref_type = blob_read_uint32(ctx->blob); 467 nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type); 468 469 deref->mode = blob_read_uint32(ctx->blob); 470 deref->type = decode_type_from_blob(ctx->blob); 471 472 read_dest(ctx, &deref->dest, &deref->instr); 473 474 if (deref_type == nir_deref_type_var) { 475 deref->var = read_object(ctx); 476 return deref; 477 } 478 479 read_src(ctx, &deref->parent, &deref->instr); 480 481 switch (deref->deref_type) { 482 case nir_deref_type_struct: 483 deref->strct.index = blob_read_uint32(ctx->blob); 484 break; 485 486 case nir_deref_type_array: 487 case nir_deref_type_ptr_as_array: 488 read_src(ctx, &deref->arr.index, &deref->instr); 489 break; 490 491 case nir_deref_type_cast: 492 deref->cast.ptr_stride = blob_read_uint32(ctx->blob); 493 break; 494 495 case nir_deref_type_array_wildcard: 496 /* Nothing to do */ 497 break; 498 499 default: 500 unreachable("Invalid deref type"); 501 } 502 503 return deref; 504} 505 506static void 507write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin) 508{ 509 blob_write_uint32(ctx->blob, intrin->intrinsic); 510 511 unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs; 512 unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices; 513 514 blob_write_uint32(ctx->blob, intrin->num_components); 515 516 if (nir_intrinsic_infos[intrin->intrinsic].has_dest) 517 write_dest(ctx, &intrin->dest); 518 519 for (unsigned i = 0; i < num_srcs; i++) 520 write_src(ctx, &intrin->src[i]); 521 522 for (unsigned i = 0; i < num_indices; i++) 523 blob_write_uint32(ctx->blob, intrin->const_index[i]); 524} 525 526static nir_intrinsic_instr * 527read_intrinsic(read_ctx *ctx) 528{ 529 nir_intrinsic_op op = blob_read_uint32(ctx->blob); 530 531 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op); 532 533 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 534 unsigned num_indices = nir_intrinsic_infos[op].num_indices; 535 536 intrin->num_components = blob_read_uint32(ctx->blob); 537 538 if (nir_intrinsic_infos[op].has_dest) 539 read_dest(ctx, &intrin->dest, &intrin->instr); 540 541 for (unsigned i = 0; i < num_srcs; i++) 542 read_src(ctx, &intrin->src[i], &intrin->instr); 543 544 for (unsigned i = 0; i < num_indices; i++) 545 intrin->const_index[i] = blob_read_uint32(ctx->blob); 546 547 return intrin; 548} 549 550static void 551write_load_const(write_ctx *ctx, const nir_load_const_instr *lc) 552{ 553 uint32_t val = lc->def.num_components; 554 val |= lc->def.bit_size << 3; 555 blob_write_uint32(ctx->blob, val); 556 blob_write_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 557 write_add_object(ctx, &lc->def); 558} 559 560static nir_load_const_instr * 561read_load_const(read_ctx *ctx) 562{ 563 uint32_t val = blob_read_uint32(ctx->blob); 564 565 nir_load_const_instr *lc = 566 nir_load_const_instr_create(ctx->nir, val & 0x7, val >> 3); 567 568 blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components); 569 read_add_object(ctx, &lc->def); 570 return lc; 571} 572 573static void 574write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef) 575{ 576 uint32_t val = undef->def.num_components; 577 val |= undef->def.bit_size << 3; 578 blob_write_uint32(ctx->blob, val); 579 write_add_object(ctx, &undef->def); 580} 581 582static nir_ssa_undef_instr * 583read_ssa_undef(read_ctx *ctx) 584{ 585 uint32_t val = blob_read_uint32(ctx->blob); 586 587 nir_ssa_undef_instr *undef = 588 nir_ssa_undef_instr_create(ctx->nir, val & 0x7, val >> 3); 589 590 read_add_object(ctx, &undef->def); 591 return undef; 592} 593 594union packed_tex_data { 595 uint32_t u32; 596 struct { 597 enum glsl_sampler_dim sampler_dim:4; 598 nir_alu_type dest_type:8; 599 unsigned coord_components:3; 600 unsigned is_array:1; 601 unsigned is_shadow:1; 602 unsigned is_new_style_shadow:1; 603 unsigned component:2; 604 unsigned unused:10; /* Mark unused for valgrind. */ 605 } u; 606}; 607 608static void 609write_tex(write_ctx *ctx, const nir_tex_instr *tex) 610{ 611 blob_write_uint32(ctx->blob, tex->num_srcs); 612 blob_write_uint32(ctx->blob, tex->op); 613 blob_write_uint32(ctx->blob, tex->texture_index); 614 blob_write_uint32(ctx->blob, tex->texture_array_size); 615 blob_write_uint32(ctx->blob, tex->sampler_index); 616 blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 617 618 STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t)); 619 union packed_tex_data packed = { 620 .u.sampler_dim = tex->sampler_dim, 621 .u.dest_type = tex->dest_type, 622 .u.coord_components = tex->coord_components, 623 .u.is_array = tex->is_array, 624 .u.is_shadow = tex->is_shadow, 625 .u.is_new_style_shadow = tex->is_new_style_shadow, 626 .u.component = tex->component, 627 }; 628 blob_write_uint32(ctx->blob, packed.u32); 629 630 write_dest(ctx, &tex->dest); 631 for (unsigned i = 0; i < tex->num_srcs; i++) { 632 blob_write_uint32(ctx->blob, tex->src[i].src_type); 633 write_src(ctx, &tex->src[i].src); 634 } 635} 636 637static nir_tex_instr * 638read_tex(read_ctx *ctx) 639{ 640 unsigned num_srcs = blob_read_uint32(ctx->blob); 641 nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, num_srcs); 642 643 tex->op = blob_read_uint32(ctx->blob); 644 tex->texture_index = blob_read_uint32(ctx->blob); 645 tex->texture_array_size = blob_read_uint32(ctx->blob); 646 tex->sampler_index = blob_read_uint32(ctx->blob); 647 blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets)); 648 649 union packed_tex_data packed; 650 packed.u32 = blob_read_uint32(ctx->blob); 651 tex->sampler_dim = packed.u.sampler_dim; 652 tex->dest_type = packed.u.dest_type; 653 tex->coord_components = packed.u.coord_components; 654 tex->is_array = packed.u.is_array; 655 tex->is_shadow = packed.u.is_shadow; 656 tex->is_new_style_shadow = packed.u.is_new_style_shadow; 657 tex->component = packed.u.component; 658 659 read_dest(ctx, &tex->dest, &tex->instr); 660 for (unsigned i = 0; i < tex->num_srcs; i++) { 661 tex->src[i].src_type = blob_read_uint32(ctx->blob); 662 read_src(ctx, &tex->src[i].src, &tex->instr); 663 } 664 665 return tex; 666} 667 668static void 669write_phi(write_ctx *ctx, const nir_phi_instr *phi) 670{ 671 /* Phi nodes are special, since they may reference SSA definitions and 672 * basic blocks that don't exist yet. We leave two empty uintptr_t's here, 673 * and then store enough information so that a later fixup pass can fill 674 * them in correctly. 675 */ 676 write_dest(ctx, &phi->dest); 677 678 blob_write_uint32(ctx->blob, exec_list_length(&phi->srcs)); 679 680 nir_foreach_phi_src(src, phi) { 681 assert(src->src.is_ssa); 682 size_t blob_offset = blob_reserve_intptr(ctx->blob); 683 MAYBE_UNUSED size_t blob_offset2 = blob_reserve_intptr(ctx->blob); 684 assert(blob_offset + sizeof(uintptr_t) == blob_offset2); 685 write_phi_fixup fixup = { 686 .blob_offset = blob_offset, 687 .src = src->src.ssa, 688 .block = src->pred, 689 }; 690 util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup); 691 } 692} 693 694static void 695write_fixup_phis(write_ctx *ctx) 696{ 697 util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) { 698 uintptr_t *blob_ptr = (uintptr_t *)(ctx->blob->data + fixup->blob_offset); 699 blob_ptr[0] = write_lookup_object(ctx, fixup->src); 700 blob_ptr[1] = write_lookup_object(ctx, fixup->block); 701 } 702 703 util_dynarray_clear(&ctx->phi_fixups); 704} 705 706static nir_phi_instr * 707read_phi(read_ctx *ctx, nir_block *blk) 708{ 709 nir_phi_instr *phi = nir_phi_instr_create(ctx->nir); 710 711 read_dest(ctx, &phi->dest, &phi->instr); 712 713 unsigned num_srcs = blob_read_uint32(ctx->blob); 714 715 /* For similar reasons as before, we just store the index directly into the 716 * pointer, and let a later pass resolve the phi sources. 717 * 718 * In order to ensure that the copied sources (which are just the indices 719 * from the blob for now) don't get inserted into the old shader's use-def 720 * lists, we have to add the phi instruction *before* we set up its 721 * sources. 722 */ 723 nir_instr_insert_after_block(blk, &phi->instr); 724 725 for (unsigned i = 0; i < num_srcs; i++) { 726 nir_phi_src *src = ralloc(phi, nir_phi_src); 727 728 src->src.is_ssa = true; 729 src->src.ssa = (nir_ssa_def *) blob_read_intptr(ctx->blob); 730 src->pred = (nir_block *) blob_read_intptr(ctx->blob); 731 732 /* Since we're not letting nir_insert_instr handle use/def stuff for us, 733 * we have to set the parent_instr manually. It doesn't really matter 734 * when we do it, so we might as well do it here. 735 */ 736 src->src.parent_instr = &phi->instr; 737 738 /* Stash it in the list of phi sources. We'll walk this list and fix up 739 * sources at the very end of read_function_impl. 740 */ 741 list_add(&src->src.use_link, &ctx->phi_srcs); 742 743 exec_list_push_tail(&phi->srcs, &src->node); 744 } 745 746 return phi; 747} 748 749static void 750read_fixup_phis(read_ctx *ctx) 751{ 752 list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) { 753 src->pred = read_lookup_object(ctx, (uintptr_t)src->pred); 754 src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa); 755 756 /* Remove from this list */ 757 list_del(&src->src.use_link); 758 759 list_addtail(&src->src.use_link, &src->src.ssa->uses); 760 } 761 assert(list_empty(&ctx->phi_srcs)); 762} 763 764static void 765write_jump(write_ctx *ctx, const nir_jump_instr *jmp) 766{ 767 blob_write_uint32(ctx->blob, jmp->type); 768} 769 770static nir_jump_instr * 771read_jump(read_ctx *ctx) 772{ 773 nir_jump_type type = blob_read_uint32(ctx->blob); 774 nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, type); 775 return jmp; 776} 777 778static void 779write_call(write_ctx *ctx, const nir_call_instr *call) 780{ 781 blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee)); 782 783 for (unsigned i = 0; i < call->num_params; i++) 784 write_src(ctx, &call->params[i]); 785} 786 787static nir_call_instr * 788read_call(read_ctx *ctx) 789{ 790 nir_function *callee = read_object(ctx); 791 nir_call_instr *call = nir_call_instr_create(ctx->nir, callee); 792 793 for (unsigned i = 0; i < call->num_params; i++) 794 read_src(ctx, &call->params[i], call); 795 796 return call; 797} 798 799static void 800write_instr(write_ctx *ctx, const nir_instr *instr) 801{ 802 blob_write_uint32(ctx->blob, instr->type); 803 switch (instr->type) { 804 case nir_instr_type_alu: 805 write_alu(ctx, nir_instr_as_alu(instr)); 806 break; 807 case nir_instr_type_deref: 808 write_deref(ctx, nir_instr_as_deref(instr)); 809 break; 810 case nir_instr_type_intrinsic: 811 write_intrinsic(ctx, nir_instr_as_intrinsic(instr)); 812 break; 813 case nir_instr_type_load_const: 814 write_load_const(ctx, nir_instr_as_load_const(instr)); 815 break; 816 case nir_instr_type_ssa_undef: 817 write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); 818 break; 819 case nir_instr_type_tex: 820 write_tex(ctx, nir_instr_as_tex(instr)); 821 break; 822 case nir_instr_type_phi: 823 write_phi(ctx, nir_instr_as_phi(instr)); 824 break; 825 case nir_instr_type_jump: 826 write_jump(ctx, nir_instr_as_jump(instr)); 827 break; 828 case nir_instr_type_call: 829 write_call(ctx, nir_instr_as_call(instr)); 830 break; 831 case nir_instr_type_parallel_copy: 832 unreachable("Cannot write parallel copies"); 833 default: 834 unreachable("bad instr type"); 835 } 836} 837 838static void 839read_instr(read_ctx *ctx, nir_block *block) 840{ 841 nir_instr_type type = blob_read_uint32(ctx->blob); 842 nir_instr *instr; 843 switch (type) { 844 case nir_instr_type_alu: 845 instr = &read_alu(ctx)->instr; 846 break; 847 case nir_instr_type_deref: 848 instr = &read_deref(ctx)->instr; 849 break; 850 case nir_instr_type_intrinsic: 851 instr = &read_intrinsic(ctx)->instr; 852 break; 853 case nir_instr_type_load_const: 854 instr = &read_load_const(ctx)->instr; 855 break; 856 case nir_instr_type_ssa_undef: 857 instr = &read_ssa_undef(ctx)->instr; 858 break; 859 case nir_instr_type_tex: 860 instr = &read_tex(ctx)->instr; 861 break; 862 case nir_instr_type_phi: 863 /* Phi instructions are a bit of a special case when reading because we 864 * don't want inserting the instruction to automatically handle use/defs 865 * for us. Instead, we need to wait until all the blocks/instructions 866 * are read so that we can set their sources up. 867 */ 868 read_phi(ctx, block); 869 return; 870 case nir_instr_type_jump: 871 instr = &read_jump(ctx)->instr; 872 break; 873 case nir_instr_type_call: 874 instr = &read_call(ctx)->instr; 875 break; 876 case nir_instr_type_parallel_copy: 877 unreachable("Cannot read parallel copies"); 878 default: 879 unreachable("bad instr type"); 880 } 881 882 nir_instr_insert_after_block(block, instr); 883} 884 885static void 886write_block(write_ctx *ctx, const nir_block *block) 887{ 888 write_add_object(ctx, block); 889 blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list)); 890 nir_foreach_instr(instr, block) 891 write_instr(ctx, instr); 892} 893 894static void 895read_block(read_ctx *ctx, struct exec_list *cf_list) 896{ 897 /* Don't actually create a new block. Just use the one from the tail of 898 * the list. NIR guarantees that the tail of the list is a block and that 899 * no two blocks are side-by-side in the IR; It should be empty. 900 */ 901 nir_block *block = 902 exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); 903 904 read_add_object(ctx, block); 905 unsigned num_instrs = blob_read_uint32(ctx->blob); 906 for (unsigned i = 0; i < num_instrs; i++) { 907 read_instr(ctx, block); 908 } 909} 910 911static void 912write_cf_list(write_ctx *ctx, const struct exec_list *cf_list); 913 914static void 915read_cf_list(read_ctx *ctx, struct exec_list *cf_list); 916 917static void 918write_if(write_ctx *ctx, nir_if *nif) 919{ 920 write_src(ctx, &nif->condition); 921 922 write_cf_list(ctx, &nif->then_list); 923 write_cf_list(ctx, &nif->else_list); 924} 925 926static void 927read_if(read_ctx *ctx, struct exec_list *cf_list) 928{ 929 nir_if *nif = nir_if_create(ctx->nir); 930 931 read_src(ctx, &nif->condition, nif); 932 933 nir_cf_node_insert_end(cf_list, &nif->cf_node); 934 935 read_cf_list(ctx, &nif->then_list); 936 read_cf_list(ctx, &nif->else_list); 937} 938 939static void 940write_loop(write_ctx *ctx, nir_loop *loop) 941{ 942 write_cf_list(ctx, &loop->body); 943} 944 945static void 946read_loop(read_ctx *ctx, struct exec_list *cf_list) 947{ 948 nir_loop *loop = nir_loop_create(ctx->nir); 949 950 nir_cf_node_insert_end(cf_list, &loop->cf_node); 951 952 read_cf_list(ctx, &loop->body); 953} 954 955static void 956write_cf_node(write_ctx *ctx, nir_cf_node *cf) 957{ 958 blob_write_uint32(ctx->blob, cf->type); 959 960 switch (cf->type) { 961 case nir_cf_node_block: 962 write_block(ctx, nir_cf_node_as_block(cf)); 963 break; 964 case nir_cf_node_if: 965 write_if(ctx, nir_cf_node_as_if(cf)); 966 break; 967 case nir_cf_node_loop: 968 write_loop(ctx, nir_cf_node_as_loop(cf)); 969 break; 970 default: 971 unreachable("bad cf type"); 972 } 973} 974 975static void 976read_cf_node(read_ctx *ctx, struct exec_list *list) 977{ 978 nir_cf_node_type type = blob_read_uint32(ctx->blob); 979 980 switch (type) { 981 case nir_cf_node_block: 982 read_block(ctx, list); 983 break; 984 case nir_cf_node_if: 985 read_if(ctx, list); 986 break; 987 case nir_cf_node_loop: 988 read_loop(ctx, list); 989 break; 990 default: 991 unreachable("bad cf type"); 992 } 993} 994 995static void 996write_cf_list(write_ctx *ctx, const struct exec_list *cf_list) 997{ 998 blob_write_uint32(ctx->blob, exec_list_length(cf_list)); 999 foreach_list_typed(nir_cf_node, cf, node, cf_list) { 1000 write_cf_node(ctx, cf); 1001 } 1002} 1003 1004static void 1005read_cf_list(read_ctx *ctx, struct exec_list *cf_list) 1006{ 1007 uint32_t num_cf_nodes = blob_read_uint32(ctx->blob); 1008 for (unsigned i = 0; i < num_cf_nodes; i++) 1009 read_cf_node(ctx, cf_list); 1010} 1011 1012static void 1013write_function_impl(write_ctx *ctx, const nir_function_impl *fi) 1014{ 1015 write_var_list(ctx, &fi->locals); 1016 write_reg_list(ctx, &fi->registers); 1017 blob_write_uint32(ctx->blob, fi->reg_alloc); 1018 1019 write_cf_list(ctx, &fi->body); 1020 write_fixup_phis(ctx); 1021} 1022 1023static nir_function_impl * 1024read_function_impl(read_ctx *ctx, nir_function *fxn) 1025{ 1026 nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir); 1027 fi->function = fxn; 1028 1029 read_var_list(ctx, &fi->locals); 1030 read_reg_list(ctx, &fi->registers); 1031 fi->reg_alloc = blob_read_uint32(ctx->blob); 1032 1033 read_cf_list(ctx, &fi->body); 1034 read_fixup_phis(ctx); 1035 1036 fi->valid_metadata = 0; 1037 1038 return fi; 1039} 1040 1041static void 1042write_function(write_ctx *ctx, const nir_function *fxn) 1043{ 1044 blob_write_uint32(ctx->blob, !!(fxn->name)); 1045 if (fxn->name) 1046 blob_write_string(ctx->blob, fxn->name); 1047 1048 write_add_object(ctx, fxn); 1049 1050 blob_write_uint32(ctx->blob, fxn->num_params); 1051 for (unsigned i = 0; i < fxn->num_params; i++) { 1052 uint32_t val = 1053 ((uint32_t)fxn->params[i].num_components) | 1054 ((uint32_t)fxn->params[i].bit_size) << 8; 1055 blob_write_uint32(ctx->blob, val); 1056 } 1057 1058 blob_write_uint32(ctx->blob, fxn->is_entrypoint); 1059 1060 /* At first glance, it looks like we should write the function_impl here. 1061 * However, call instructions need to be able to reference at least the 1062 * function and those will get processed as we write the function_impls. 1063 * We stop here and write function_impls as a second pass. 1064 */ 1065} 1066 1067static void 1068read_function(read_ctx *ctx) 1069{ 1070 bool has_name = blob_read_uint32(ctx->blob); 1071 char *name = has_name ? blob_read_string(ctx->blob) : NULL; 1072 1073 nir_function *fxn = nir_function_create(ctx->nir, name); 1074 1075 read_add_object(ctx, fxn); 1076 1077 fxn->num_params = blob_read_uint32(ctx->blob); 1078 fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params); 1079 for (unsigned i = 0; i < fxn->num_params; i++) { 1080 uint32_t val = blob_read_uint32(ctx->blob); 1081 fxn->params[i].num_components = val & 0xff; 1082 fxn->params[i].bit_size = (val >> 8) & 0xff; 1083 } 1084 1085 fxn->is_entrypoint = blob_read_uint32(ctx->blob); 1086} 1087 1088void 1089nir_serialize(struct blob *blob, const nir_shader *nir) 1090{ 1091 write_ctx ctx; 1092 ctx.remap_table = _mesa_pointer_hash_table_create(NULL); 1093 ctx.next_idx = 0; 1094 ctx.blob = blob; 1095 ctx.nir = nir; 1096 util_dynarray_init(&ctx.phi_fixups, NULL); 1097 1098 size_t idx_size_offset = blob_reserve_intptr(blob); 1099 1100 struct shader_info info = nir->info; 1101 uint32_t strings = 0; 1102 if (info.name) 1103 strings |= 0x1; 1104 if (info.label) 1105 strings |= 0x2; 1106 blob_write_uint32(blob, strings); 1107 if (info.name) 1108 blob_write_string(blob, info.name); 1109 if (info.label) 1110 blob_write_string(blob, info.label); 1111 info.name = info.label = NULL; 1112 blob_write_bytes(blob, (uint8_t *) &info, sizeof(info)); 1113 1114 write_var_list(&ctx, &nir->uniforms); 1115 write_var_list(&ctx, &nir->inputs); 1116 write_var_list(&ctx, &nir->outputs); 1117 write_var_list(&ctx, &nir->shared); 1118 write_var_list(&ctx, &nir->globals); 1119 write_var_list(&ctx, &nir->system_values); 1120 1121 blob_write_uint32(blob, nir->num_inputs); 1122 blob_write_uint32(blob, nir->num_uniforms); 1123 blob_write_uint32(blob, nir->num_outputs); 1124 blob_write_uint32(blob, nir->num_shared); 1125 blob_write_uint32(blob, nir->scratch_size); 1126 1127 blob_write_uint32(blob, exec_list_length(&nir->functions)); 1128 nir_foreach_function(fxn, nir) { 1129 write_function(&ctx, fxn); 1130 } 1131 1132 nir_foreach_function(fxn, nir) { 1133 write_function_impl(&ctx, fxn->impl); 1134 } 1135 1136 blob_write_uint32(blob, nir->constant_data_size); 1137 if (nir->constant_data_size > 0) 1138 blob_write_bytes(blob, nir->constant_data, nir->constant_data_size); 1139 1140 *(uintptr_t *)(blob->data + idx_size_offset) = ctx.next_idx; 1141 1142 _mesa_hash_table_destroy(ctx.remap_table, NULL); 1143 util_dynarray_fini(&ctx.phi_fixups); 1144} 1145 1146nir_shader * 1147nir_deserialize(void *mem_ctx, 1148 const struct nir_shader_compiler_options *options, 1149 struct blob_reader *blob) 1150{ 1151 read_ctx ctx; 1152 ctx.blob = blob; 1153 list_inithead(&ctx.phi_srcs); 1154 ctx.idx_table_len = blob_read_intptr(blob); 1155 ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t)); 1156 ctx.next_idx = 0; 1157 1158 uint32_t strings = blob_read_uint32(blob); 1159 char *name = (strings & 0x1) ? blob_read_string(blob) : NULL; 1160 char *label = (strings & 0x2) ? blob_read_string(blob) : NULL; 1161 1162 struct shader_info info; 1163 blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info)); 1164 1165 ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL); 1166 1167 info.name = name ? ralloc_strdup(ctx.nir, name) : NULL; 1168 info.label = label ? ralloc_strdup(ctx.nir, label) : NULL; 1169 1170 ctx.nir->info = info; 1171 1172 read_var_list(&ctx, &ctx.nir->uniforms); 1173 read_var_list(&ctx, &ctx.nir->inputs); 1174 read_var_list(&ctx, &ctx.nir->outputs); 1175 read_var_list(&ctx, &ctx.nir->shared); 1176 read_var_list(&ctx, &ctx.nir->globals); 1177 read_var_list(&ctx, &ctx.nir->system_values); 1178 1179 ctx.nir->num_inputs = blob_read_uint32(blob); 1180 ctx.nir->num_uniforms = blob_read_uint32(blob); 1181 ctx.nir->num_outputs = blob_read_uint32(blob); 1182 ctx.nir->num_shared = blob_read_uint32(blob); 1183 ctx.nir->scratch_size = blob_read_uint32(blob); 1184 1185 unsigned num_functions = blob_read_uint32(blob); 1186 for (unsigned i = 0; i < num_functions; i++) 1187 read_function(&ctx); 1188 1189 nir_foreach_function(fxn, ctx.nir) 1190 fxn->impl = read_function_impl(&ctx, fxn); 1191 1192 ctx.nir->constant_data_size = blob_read_uint32(blob); 1193 if (ctx.nir->constant_data_size > 0) { 1194 ctx.nir->constant_data = 1195 ralloc_size(ctx.nir, ctx.nir->constant_data_size); 1196 blob_copy_bytes(blob, ctx.nir->constant_data, 1197 ctx.nir->constant_data_size); 1198 } 1199 1200 free(ctx.idx_table); 1201 1202 return ctx.nir; 1203} 1204 1205nir_shader * 1206nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s) 1207{ 1208 const struct nir_shader_compiler_options *options = s->options; 1209 1210 struct blob writer; 1211 blob_init(&writer); 1212 nir_serialize(&writer, s); 1213 ralloc_free(s); 1214 1215 struct blob_reader reader; 1216 blob_reader_init(&reader, writer.data, writer.size); 1217 nir_shader *ns = nir_deserialize(mem_ctx, options, &reader); 1218 1219 blob_finish(&writer); 1220 1221 return ns; 1222} 1223