101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2017 Connor Abbott
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "nir_serialize.h"
2501e04c3fSmrg#include "nir_control_flow.h"
2601e04c3fSmrg#include "util/u_dynarray.h"
277ec681f3Smrg#include "util/u_math.h"
287ec681f3Smrg
297ec681f3Smrg#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
307ec681f3Smrg#define MAX_OBJECT_IDS (1 << 20)
3101e04c3fSmrg
3201e04c3fSmrgtypedef struct {
3301e04c3fSmrg   size_t blob_offset;
3401e04c3fSmrg   nir_ssa_def *src;
3501e04c3fSmrg   nir_block *block;
3601e04c3fSmrg} write_phi_fixup;
3701e04c3fSmrg
3801e04c3fSmrgtypedef struct {
3901e04c3fSmrg   const nir_shader *nir;
4001e04c3fSmrg
4101e04c3fSmrg   struct blob *blob;
4201e04c3fSmrg
4301e04c3fSmrg   /* maps pointer to index */
4401e04c3fSmrg   struct hash_table *remap_table;
4501e04c3fSmrg
4601e04c3fSmrg   /* the next index to assign to a NIR in-memory object */
477ec681f3Smrg   uint32_t next_idx;
4801e04c3fSmrg
4901e04c3fSmrg   /* Array of write_phi_fixup structs representing phi sources that need to
5001e04c3fSmrg    * be resolved in the second pass.
5101e04c3fSmrg    */
5201e04c3fSmrg   struct util_dynarray phi_fixups;
537ec681f3Smrg
547ec681f3Smrg   /* The last serialized type. */
557ec681f3Smrg   const struct glsl_type *last_type;
567ec681f3Smrg   const struct glsl_type *last_interface_type;
577ec681f3Smrg   struct nir_variable_data last_var_data;
587ec681f3Smrg
597ec681f3Smrg   /* For skipping equal ALU headers (typical after scalarization). */
607ec681f3Smrg   nir_instr_type last_instr_type;
617ec681f3Smrg   uintptr_t last_alu_header_offset;
627ec681f3Smrg
637ec681f3Smrg   /* Don't write optional data such as variable names. */
647ec681f3Smrg   bool strip;
6501e04c3fSmrg} write_ctx;
6601e04c3fSmrg
6701e04c3fSmrgtypedef struct {
6801e04c3fSmrg   nir_shader *nir;
6901e04c3fSmrg
7001e04c3fSmrg   struct blob_reader *blob;
7101e04c3fSmrg
7201e04c3fSmrg   /* the next index to assign to a NIR in-memory object */
737ec681f3Smrg   uint32_t next_idx;
7401e04c3fSmrg
7501e04c3fSmrg   /* The length of the index -> object table */
767ec681f3Smrg   uint32_t idx_table_len;
7701e04c3fSmrg
7801e04c3fSmrg   /* map from index to deserialized pointer */
7901e04c3fSmrg   void **idx_table;
8001e04c3fSmrg
8101e04c3fSmrg   /* List of phi sources. */
8201e04c3fSmrg   struct list_head phi_srcs;
8301e04c3fSmrg
847ec681f3Smrg   /* The last deserialized type. */
857ec681f3Smrg   const struct glsl_type *last_type;
867ec681f3Smrg   const struct glsl_type *last_interface_type;
877ec681f3Smrg   struct nir_variable_data last_var_data;
8801e04c3fSmrg} read_ctx;
8901e04c3fSmrg
9001e04c3fSmrgstatic void
9101e04c3fSmrgwrite_add_object(write_ctx *ctx, const void *obj)
9201e04c3fSmrg{
937ec681f3Smrg   uint32_t index = ctx->next_idx++;
947ec681f3Smrg   assert(index != MAX_OBJECT_IDS);
957ec681f3Smrg   _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
9601e04c3fSmrg}
9701e04c3fSmrg
987ec681f3Smrgstatic uint32_t
9901e04c3fSmrgwrite_lookup_object(write_ctx *ctx, const void *obj)
10001e04c3fSmrg{
10101e04c3fSmrg   struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
10201e04c3fSmrg   assert(entry);
1037ec681f3Smrg   return (uint32_t)(uintptr_t) entry->data;
10401e04c3fSmrg}
10501e04c3fSmrg
10601e04c3fSmrgstatic void
10701e04c3fSmrgread_add_object(read_ctx *ctx, void *obj)
10801e04c3fSmrg{
10901e04c3fSmrg   assert(ctx->next_idx < ctx->idx_table_len);
11001e04c3fSmrg   ctx->idx_table[ctx->next_idx++] = obj;
11101e04c3fSmrg}
11201e04c3fSmrg
11301e04c3fSmrgstatic void *
1147ec681f3Smrgread_lookup_object(read_ctx *ctx, uint32_t idx)
11501e04c3fSmrg{
11601e04c3fSmrg   assert(idx < ctx->idx_table_len);
11701e04c3fSmrg   return ctx->idx_table[idx];
11801e04c3fSmrg}
11901e04c3fSmrg
12001e04c3fSmrgstatic void *
12101e04c3fSmrgread_object(read_ctx *ctx)
12201e04c3fSmrg{
1237ec681f3Smrg   return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
1247ec681f3Smrg}
1257ec681f3Smrg
1267ec681f3Smrgstatic uint32_t
1277ec681f3Smrgencode_bit_size_3bits(uint8_t bit_size)
1287ec681f3Smrg{
1297ec681f3Smrg   /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
1307ec681f3Smrg   assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
1317ec681f3Smrg   if (bit_size)
1327ec681f3Smrg      return util_logbase2(bit_size) + 1;
1337ec681f3Smrg   return 0;
1347ec681f3Smrg}
1357ec681f3Smrg
1367ec681f3Smrgstatic uint8_t
1377ec681f3Smrgdecode_bit_size_3bits(uint8_t bit_size)
1387ec681f3Smrg{
1397ec681f3Smrg   if (bit_size)
1407ec681f3Smrg      return 1 << (bit_size - 1);
1417ec681f3Smrg   return 0;
1427ec681f3Smrg}
1437ec681f3Smrg
1447ec681f3Smrg#define NUM_COMPONENTS_IS_SEPARATE_7   7
1457ec681f3Smrg
1467ec681f3Smrgstatic uint8_t
1477ec681f3Smrgencode_num_components_in_3bits(uint8_t num_components)
1487ec681f3Smrg{
1497ec681f3Smrg   if (num_components <= 4)
1507ec681f3Smrg      return num_components;
1517ec681f3Smrg   if (num_components == 8)
1527ec681f3Smrg      return 5;
1537ec681f3Smrg   if (num_components == 16)
1547ec681f3Smrg      return 6;
1557ec681f3Smrg
1567ec681f3Smrg   /* special value indicating that num_components is in the next uint32 */
1577ec681f3Smrg   return NUM_COMPONENTS_IS_SEPARATE_7;
1587ec681f3Smrg}
1597ec681f3Smrg
1607ec681f3Smrgstatic uint8_t
1617ec681f3Smrgdecode_num_components_in_3bits(uint8_t value)
1627ec681f3Smrg{
1637ec681f3Smrg   if (value <= 4)
1647ec681f3Smrg      return value;
1657ec681f3Smrg   if (value == 5)
1667ec681f3Smrg      return 8;
1677ec681f3Smrg   if (value == 6)
1687ec681f3Smrg      return 16;
1697ec681f3Smrg
1707ec681f3Smrg   unreachable("invalid num_components encoding");
1717ec681f3Smrg   return 0;
17201e04c3fSmrg}
17301e04c3fSmrg
17401e04c3fSmrgstatic void
17501e04c3fSmrgwrite_constant(write_ctx *ctx, const nir_constant *c)
17601e04c3fSmrg{
17701e04c3fSmrg   blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
17801e04c3fSmrg   blob_write_uint32(ctx->blob, c->num_elements);
17901e04c3fSmrg   for (unsigned i = 0; i < c->num_elements; i++)
18001e04c3fSmrg      write_constant(ctx, c->elements[i]);
18101e04c3fSmrg}
18201e04c3fSmrg
18301e04c3fSmrgstatic nir_constant *
18401e04c3fSmrgread_constant(read_ctx *ctx, nir_variable *nvar)
18501e04c3fSmrg{
18601e04c3fSmrg   nir_constant *c = ralloc(nvar, nir_constant);
18701e04c3fSmrg
18801e04c3fSmrg   blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
18901e04c3fSmrg   c->num_elements = blob_read_uint32(ctx->blob);
19001e04c3fSmrg   c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
19101e04c3fSmrg   for (unsigned i = 0; i < c->num_elements; i++)
19201e04c3fSmrg      c->elements[i] = read_constant(ctx, nvar);
19301e04c3fSmrg
19401e04c3fSmrg   return c;
19501e04c3fSmrg}
19601e04c3fSmrg
1977ec681f3Smrgenum var_data_encoding {
1987ec681f3Smrg   var_encode_full,
1997ec681f3Smrg   var_encode_shader_temp,
2007ec681f3Smrg   var_encode_function_temp,
2017ec681f3Smrg   var_encode_location_diff,
2027ec681f3Smrg};
2037ec681f3Smrg
2047ec681f3Smrgunion packed_var {
2057ec681f3Smrg   uint32_t u32;
2067ec681f3Smrg   struct {
2077ec681f3Smrg      unsigned has_name:1;
2087ec681f3Smrg      unsigned has_constant_initializer:1;
2097ec681f3Smrg      unsigned has_pointer_initializer:1;
2107ec681f3Smrg      unsigned has_interface_type:1;
2117ec681f3Smrg      unsigned num_state_slots:7;
2127ec681f3Smrg      unsigned data_encoding:2;
2137ec681f3Smrg      unsigned type_same_as_last:1;
2147ec681f3Smrg      unsigned interface_type_same_as_last:1;
2157ec681f3Smrg      unsigned _pad:1;
2167ec681f3Smrg      unsigned num_members:16;
2177ec681f3Smrg   } u;
2187ec681f3Smrg};
2197ec681f3Smrg
2207ec681f3Smrgunion packed_var_data_diff {
2217ec681f3Smrg   uint32_t u32;
2227ec681f3Smrg   struct {
2237ec681f3Smrg      int location:13;
2247ec681f3Smrg      int location_frac:3;
2257ec681f3Smrg      int driver_location:16;
2267ec681f3Smrg   } u;
2277ec681f3Smrg};
2287ec681f3Smrg
22901e04c3fSmrgstatic void
23001e04c3fSmrgwrite_variable(write_ctx *ctx, const nir_variable *var)
23101e04c3fSmrg{
23201e04c3fSmrg   write_add_object(ctx, var);
2337ec681f3Smrg
2347ec681f3Smrg   assert(var->num_state_slots < (1 << 7));
2357ec681f3Smrg
2367ec681f3Smrg   STATIC_ASSERT(sizeof(union packed_var) == 4);
2377ec681f3Smrg   union packed_var flags;
2387ec681f3Smrg   flags.u32 = 0;
2397ec681f3Smrg
2407ec681f3Smrg   flags.u.has_name = !ctx->strip && var->name;
2417ec681f3Smrg   flags.u.has_constant_initializer = !!(var->constant_initializer);
2427ec681f3Smrg   flags.u.has_pointer_initializer = !!(var->pointer_initializer);
2437ec681f3Smrg   flags.u.has_interface_type = !!(var->interface_type);
2447ec681f3Smrg   flags.u.type_same_as_last = var->type == ctx->last_type;
2457ec681f3Smrg   flags.u.interface_type_same_as_last =
2467ec681f3Smrg      var->interface_type && var->interface_type == ctx->last_interface_type;
2477ec681f3Smrg   flags.u.num_state_slots = var->num_state_slots;
2487ec681f3Smrg   flags.u.num_members = var->num_members;
2497ec681f3Smrg
2507ec681f3Smrg   struct nir_variable_data data = var->data;
2517ec681f3Smrg
2527ec681f3Smrg   /* When stripping, we expect that the location is no longer needed,
2537ec681f3Smrg    * which is typically after shaders are linked.
2547ec681f3Smrg    */
2557ec681f3Smrg   if (ctx->strip &&
2567ec681f3Smrg       data.mode != nir_var_system_value &&
2577ec681f3Smrg       data.mode != nir_var_shader_in &&
2587ec681f3Smrg       data.mode != nir_var_shader_out)
2597ec681f3Smrg      data.location = 0;
2607ec681f3Smrg
2617ec681f3Smrg   /* Temporary variables don't serialize var->data. */
2627ec681f3Smrg   if (data.mode == nir_var_shader_temp)
2637ec681f3Smrg      flags.u.data_encoding = var_encode_shader_temp;
2647ec681f3Smrg   else if (data.mode == nir_var_function_temp)
2657ec681f3Smrg      flags.u.data_encoding = var_encode_function_temp;
2667ec681f3Smrg   else {
2677ec681f3Smrg      struct nir_variable_data tmp = data;
2687ec681f3Smrg
2697ec681f3Smrg      tmp.location = ctx->last_var_data.location;
2707ec681f3Smrg      tmp.location_frac = ctx->last_var_data.location_frac;
2717ec681f3Smrg      tmp.driver_location = ctx->last_var_data.driver_location;
2727ec681f3Smrg
2737ec681f3Smrg      /* See if we can encode only the difference in locations from the last
2747ec681f3Smrg       * variable.
2757ec681f3Smrg       */
2767ec681f3Smrg      if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
2777ec681f3Smrg          abs((int)data.location -
2787ec681f3Smrg              (int)ctx->last_var_data.location) < (1 << 12) &&
2797ec681f3Smrg          abs((int)data.driver_location -
2807ec681f3Smrg              (int)ctx->last_var_data.driver_location) < (1 << 15))
2817ec681f3Smrg         flags.u.data_encoding = var_encode_location_diff;
2827ec681f3Smrg      else
2837ec681f3Smrg         flags.u.data_encoding = var_encode_full;
2847ec681f3Smrg   }
2857ec681f3Smrg
2867ec681f3Smrg   blob_write_uint32(ctx->blob, flags.u32);
2877ec681f3Smrg
2887ec681f3Smrg   if (!flags.u.type_same_as_last) {
2897ec681f3Smrg      encode_type_to_blob(ctx->blob, var->type);
2907ec681f3Smrg      ctx->last_type = var->type;
2917ec681f3Smrg   }
2927ec681f3Smrg
2937ec681f3Smrg   if (var->interface_type && !flags.u.interface_type_same_as_last) {
2947ec681f3Smrg      encode_type_to_blob(ctx->blob, var->interface_type);
2957ec681f3Smrg      ctx->last_interface_type = var->interface_type;
2967ec681f3Smrg   }
2977ec681f3Smrg
2987ec681f3Smrg   if (flags.u.has_name)
29901e04c3fSmrg      blob_write_string(ctx->blob, var->name);
3007ec681f3Smrg
3017ec681f3Smrg   if (flags.u.data_encoding == var_encode_full ||
3027ec681f3Smrg       flags.u.data_encoding == var_encode_location_diff) {
3037ec681f3Smrg      if (flags.u.data_encoding == var_encode_full) {
3047ec681f3Smrg         blob_write_bytes(ctx->blob, &data, sizeof(data));
3057ec681f3Smrg      } else {
3067ec681f3Smrg         /* Serialize only the difference in locations from the last variable.
3077ec681f3Smrg          */
3087ec681f3Smrg         union packed_var_data_diff diff;
3097ec681f3Smrg
3107ec681f3Smrg         diff.u.location = data.location - ctx->last_var_data.location;
3117ec681f3Smrg         diff.u.location_frac = data.location_frac -
3127ec681f3Smrg                                ctx->last_var_data.location_frac;
3137ec681f3Smrg         diff.u.driver_location = data.driver_location -
3147ec681f3Smrg                                  ctx->last_var_data.driver_location;
3157ec681f3Smrg
3167ec681f3Smrg         blob_write_uint32(ctx->blob, diff.u32);
3177ec681f3Smrg      }
3187ec681f3Smrg
3197ec681f3Smrg      ctx->last_var_data = data;
3207ec681f3Smrg   }
3217ec681f3Smrg
3227e102996Smaya   for (unsigned i = 0; i < var->num_state_slots; i++) {
3237ec681f3Smrg      blob_write_bytes(ctx->blob, &var->state_slots[i],
3247ec681f3Smrg                       sizeof(var->state_slots[i]));
3257e102996Smaya   }
32601e04c3fSmrg   if (var->constant_initializer)
32701e04c3fSmrg      write_constant(ctx, var->constant_initializer);
3287ec681f3Smrg   if (var->pointer_initializer)
3297ec681f3Smrg      write_lookup_object(ctx, var->pointer_initializer);
33001e04c3fSmrg   if (var->num_members > 0) {
33101e04c3fSmrg      blob_write_bytes(ctx->blob, (uint8_t *) var->members,
33201e04c3fSmrg                       var->num_members * sizeof(*var->members));
33301e04c3fSmrg   }
33401e04c3fSmrg}
33501e04c3fSmrg
33601e04c3fSmrgstatic nir_variable *
33701e04c3fSmrgread_variable(read_ctx *ctx)
33801e04c3fSmrg{
33901e04c3fSmrg   nir_variable *var = rzalloc(ctx->nir, nir_variable);
34001e04c3fSmrg   read_add_object(ctx, var);
34101e04c3fSmrg
3427ec681f3Smrg   union packed_var flags;
3437ec681f3Smrg   flags.u32 = blob_read_uint32(ctx->blob);
3447ec681f3Smrg
3457ec681f3Smrg   if (flags.u.type_same_as_last) {
3467ec681f3Smrg      var->type = ctx->last_type;
3477ec681f3Smrg   } else {
3487ec681f3Smrg      var->type = decode_type_from_blob(ctx->blob);
3497ec681f3Smrg      ctx->last_type = var->type;
3507ec681f3Smrg   }
3517ec681f3Smrg
3527ec681f3Smrg   if (flags.u.has_interface_type) {
3537ec681f3Smrg      if (flags.u.interface_type_same_as_last) {
3547ec681f3Smrg         var->interface_type = ctx->last_interface_type;
3557ec681f3Smrg      } else {
3567ec681f3Smrg         var->interface_type = decode_type_from_blob(ctx->blob);
3577ec681f3Smrg         ctx->last_interface_type = var->interface_type;
3587ec681f3Smrg      }
3597ec681f3Smrg   }
3607ec681f3Smrg
3617ec681f3Smrg   if (flags.u.has_name) {
36201e04c3fSmrg      const char *name = blob_read_string(ctx->blob);
36301e04c3fSmrg      var->name = ralloc_strdup(var, name);
36401e04c3fSmrg   } else {
36501e04c3fSmrg      var->name = NULL;
36601e04c3fSmrg   }
3677ec681f3Smrg
3687ec681f3Smrg   if (flags.u.data_encoding == var_encode_shader_temp)
3697ec681f3Smrg      var->data.mode = nir_var_shader_temp;
3707ec681f3Smrg   else if (flags.u.data_encoding == var_encode_function_temp)
3717ec681f3Smrg      var->data.mode = nir_var_function_temp;
3727ec681f3Smrg   else if (flags.u.data_encoding == var_encode_full) {
3737ec681f3Smrg      blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
3747ec681f3Smrg      ctx->last_var_data = var->data;
3757ec681f3Smrg   } else { /* var_encode_location_diff */
3767ec681f3Smrg      union packed_var_data_diff diff;
3777ec681f3Smrg      diff.u32 = blob_read_uint32(ctx->blob);
3787ec681f3Smrg
3797ec681f3Smrg      var->data = ctx->last_var_data;
3807ec681f3Smrg      var->data.location += diff.u.location;
3817ec681f3Smrg      var->data.location_frac += diff.u.location_frac;
3827ec681f3Smrg      var->data.driver_location += diff.u.driver_location;
3837ec681f3Smrg
3847ec681f3Smrg      ctx->last_var_data = var->data;
3857ec681f3Smrg   }
3867ec681f3Smrg
3877ec681f3Smrg   var->num_state_slots = flags.u.num_state_slots;
3887e102996Smaya   if (var->num_state_slots != 0) {
3897e102996Smaya      var->state_slots = ralloc_array(var, nir_state_slot,
3907e102996Smaya                                      var->num_state_slots);
3917e102996Smaya      for (unsigned i = 0; i < var->num_state_slots; i++) {
3927ec681f3Smrg         blob_copy_bytes(ctx->blob, &var->state_slots[i],
3937ec681f3Smrg                         sizeof(var->state_slots[i]));
3947e102996Smaya      }
3957e102996Smaya   }
3967ec681f3Smrg   if (flags.u.has_constant_initializer)
39701e04c3fSmrg      var->constant_initializer = read_constant(ctx, var);
39801e04c3fSmrg   else
39901e04c3fSmrg      var->constant_initializer = NULL;
4007ec681f3Smrg
4017ec681f3Smrg   if (flags.u.has_pointer_initializer)
4027ec681f3Smrg      var->pointer_initializer = read_object(ctx);
40301e04c3fSmrg   else
4047ec681f3Smrg      var->pointer_initializer = NULL;
4057ec681f3Smrg
4067ec681f3Smrg   var->num_members = flags.u.num_members;
40701e04c3fSmrg   if (var->num_members > 0) {
40801e04c3fSmrg      var->members = ralloc_array(var, struct nir_variable_data,
40901e04c3fSmrg                                  var->num_members);
41001e04c3fSmrg      blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
41101e04c3fSmrg                      var->num_members * sizeof(*var->members));
41201e04c3fSmrg   }
41301e04c3fSmrg
41401e04c3fSmrg   return var;
41501e04c3fSmrg}
41601e04c3fSmrg
41701e04c3fSmrgstatic void
41801e04c3fSmrgwrite_var_list(write_ctx *ctx, const struct exec_list *src)
41901e04c3fSmrg{
42001e04c3fSmrg   blob_write_uint32(ctx->blob, exec_list_length(src));
42101e04c3fSmrg   foreach_list_typed(nir_variable, var, node, src) {
42201e04c3fSmrg      write_variable(ctx, var);
42301e04c3fSmrg   }
42401e04c3fSmrg}
42501e04c3fSmrg
42601e04c3fSmrgstatic void
42701e04c3fSmrgread_var_list(read_ctx *ctx, struct exec_list *dst)
42801e04c3fSmrg{
42901e04c3fSmrg   exec_list_make_empty(dst);
43001e04c3fSmrg   unsigned num_vars = blob_read_uint32(ctx->blob);
43101e04c3fSmrg   for (unsigned i = 0; i < num_vars; i++) {
43201e04c3fSmrg      nir_variable *var = read_variable(ctx);
43301e04c3fSmrg      exec_list_push_tail(dst, &var->node);
43401e04c3fSmrg   }
43501e04c3fSmrg}
43601e04c3fSmrg
43701e04c3fSmrgstatic void
43801e04c3fSmrgwrite_register(write_ctx *ctx, const nir_register *reg)
43901e04c3fSmrg{
44001e04c3fSmrg   write_add_object(ctx, reg);
44101e04c3fSmrg   blob_write_uint32(ctx->blob, reg->num_components);
44201e04c3fSmrg   blob_write_uint32(ctx->blob, reg->bit_size);
44301e04c3fSmrg   blob_write_uint32(ctx->blob, reg->num_array_elems);
44401e04c3fSmrg   blob_write_uint32(ctx->blob, reg->index);
44501e04c3fSmrg}
44601e04c3fSmrg
44701e04c3fSmrgstatic nir_register *
44801e04c3fSmrgread_register(read_ctx *ctx)
44901e04c3fSmrg{
45001e04c3fSmrg   nir_register *reg = ralloc(ctx->nir, nir_register);
45101e04c3fSmrg   read_add_object(ctx, reg);
45201e04c3fSmrg   reg->num_components = blob_read_uint32(ctx->blob);
45301e04c3fSmrg   reg->bit_size = blob_read_uint32(ctx->blob);
45401e04c3fSmrg   reg->num_array_elems = blob_read_uint32(ctx->blob);
45501e04c3fSmrg   reg->index = blob_read_uint32(ctx->blob);
45601e04c3fSmrg
45701e04c3fSmrg   list_inithead(&reg->uses);
45801e04c3fSmrg   list_inithead(&reg->defs);
45901e04c3fSmrg   list_inithead(&reg->if_uses);
46001e04c3fSmrg
46101e04c3fSmrg   return reg;
46201e04c3fSmrg}
46301e04c3fSmrg
46401e04c3fSmrgstatic void
46501e04c3fSmrgwrite_reg_list(write_ctx *ctx, const struct exec_list *src)
46601e04c3fSmrg{
46701e04c3fSmrg   blob_write_uint32(ctx->blob, exec_list_length(src));
46801e04c3fSmrg   foreach_list_typed(nir_register, reg, node, src)
46901e04c3fSmrg      write_register(ctx, reg);
47001e04c3fSmrg}
47101e04c3fSmrg
47201e04c3fSmrgstatic void
47301e04c3fSmrgread_reg_list(read_ctx *ctx, struct exec_list *dst)
47401e04c3fSmrg{
47501e04c3fSmrg   exec_list_make_empty(dst);
47601e04c3fSmrg   unsigned num_regs = blob_read_uint32(ctx->blob);
47701e04c3fSmrg   for (unsigned i = 0; i < num_regs; i++) {
47801e04c3fSmrg      nir_register *reg = read_register(ctx);
47901e04c3fSmrg      exec_list_push_tail(dst, &reg->node);
48001e04c3fSmrg   }
48101e04c3fSmrg}
48201e04c3fSmrg
4837ec681f3Smrgunion packed_src {
4847ec681f3Smrg   uint32_t u32;
4857ec681f3Smrg   struct {
4867ec681f3Smrg      unsigned is_ssa:1;   /* <-- Header */
4877ec681f3Smrg      unsigned is_indirect:1;
4887ec681f3Smrg      unsigned object_idx:20;
4897ec681f3Smrg      unsigned _footer:10; /* <-- Footer */
4907ec681f3Smrg   } any;
4917ec681f3Smrg   struct {
4927ec681f3Smrg      unsigned _header:22; /* <-- Header */
4937ec681f3Smrg      unsigned negate:1;   /* <-- Footer */
4947ec681f3Smrg      unsigned abs:1;
4957ec681f3Smrg      unsigned swizzle_x:2;
4967ec681f3Smrg      unsigned swizzle_y:2;
4977ec681f3Smrg      unsigned swizzle_z:2;
4987ec681f3Smrg      unsigned swizzle_w:2;
4997ec681f3Smrg   } alu;
5007ec681f3Smrg   struct {
5017ec681f3Smrg      unsigned _header:22; /* <-- Header */
5027ec681f3Smrg      unsigned src_type:5; /* <-- Footer */
5037ec681f3Smrg      unsigned _pad:5;
5047ec681f3Smrg   } tex;
5057ec681f3Smrg};
5067ec681f3Smrg
50701e04c3fSmrgstatic void
5087ec681f3Smrgwrite_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
50901e04c3fSmrg{
51001e04c3fSmrg   /* Since sources are very frequent, we try to save some space when storing
51101e04c3fSmrg    * them. In particular, we store whether the source is a register and
51201e04c3fSmrg    * whether the register has an indirect index in the low two bits. We can
51301e04c3fSmrg    * assume that the high two bits of the index are zero, since otherwise our
51401e04c3fSmrg    * address space would've been exhausted allocating the remap table!
51501e04c3fSmrg    */
5167ec681f3Smrg   header.any.is_ssa = src->is_ssa;
51701e04c3fSmrg   if (src->is_ssa) {
5187ec681f3Smrg      header.any.object_idx = write_lookup_object(ctx, src->ssa);
5197ec681f3Smrg      blob_write_uint32(ctx->blob, header.u32);
52001e04c3fSmrg   } else {
5217ec681f3Smrg      header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
5227ec681f3Smrg      header.any.is_indirect = !!src->reg.indirect;
5237ec681f3Smrg      blob_write_uint32(ctx->blob, header.u32);
52401e04c3fSmrg      blob_write_uint32(ctx->blob, src->reg.base_offset);
52501e04c3fSmrg      if (src->reg.indirect) {
5267ec681f3Smrg         union packed_src header = {0};
5277ec681f3Smrg         write_src_full(ctx, src->reg.indirect, header);
52801e04c3fSmrg      }
52901e04c3fSmrg   }
53001e04c3fSmrg}
53101e04c3fSmrg
53201e04c3fSmrgstatic void
5337ec681f3Smrgwrite_src(write_ctx *ctx, const nir_src *src)
5347ec681f3Smrg{
5357ec681f3Smrg   union packed_src header = {0};
5367ec681f3Smrg   write_src_full(ctx, src, header);
5377ec681f3Smrg}
5387ec681f3Smrg
5397ec681f3Smrgstatic union packed_src
54001e04c3fSmrgread_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
54101e04c3fSmrg{
5427ec681f3Smrg   STATIC_ASSERT(sizeof(union packed_src) == 4);
5437ec681f3Smrg   union packed_src header;
5447ec681f3Smrg   header.u32 = blob_read_uint32(ctx->blob);
5457ec681f3Smrg
5467ec681f3Smrg   src->is_ssa = header.any.is_ssa;
54701e04c3fSmrg   if (src->is_ssa) {
5487ec681f3Smrg      src->ssa = read_lookup_object(ctx, header.any.object_idx);
54901e04c3fSmrg   } else {
5507ec681f3Smrg      src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
55101e04c3fSmrg      src->reg.base_offset = blob_read_uint32(ctx->blob);
5527ec681f3Smrg      if (header.any.is_indirect) {
5537ec681f3Smrg         src->reg.indirect = malloc(sizeof(nir_src));
55401e04c3fSmrg         read_src(ctx, src->reg.indirect, mem_ctx);
55501e04c3fSmrg      } else {
55601e04c3fSmrg         src->reg.indirect = NULL;
55701e04c3fSmrg      }
55801e04c3fSmrg   }
5597ec681f3Smrg   return header;
56001e04c3fSmrg}
56101e04c3fSmrg
5627ec681f3Smrgunion packed_dest {
5637ec681f3Smrg   uint8_t u8;
5647ec681f3Smrg   struct {
5657ec681f3Smrg      uint8_t is_ssa:1;
5667ec681f3Smrg      uint8_t num_components:3;
5677ec681f3Smrg      uint8_t bit_size:3;
5687ec681f3Smrg      uint8_t _pad:1;
5697ec681f3Smrg   } ssa;
5707ec681f3Smrg   struct {
5717ec681f3Smrg      uint8_t is_ssa:1;
5727ec681f3Smrg      uint8_t is_indirect:1;
5737ec681f3Smrg      uint8_t _pad:6;
5747ec681f3Smrg   } reg;
5757ec681f3Smrg};
5767ec681f3Smrg
5777ec681f3Smrgenum intrinsic_const_indices_encoding {
5787ec681f3Smrg   /* Use the 9 bits of packed_const_indices to store 1-9 indices.
5797ec681f3Smrg    * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
5807ec681f3Smrg    * 4 2-bit indices, or 5-9 1-bit indices.
5817ec681f3Smrg    *
5827ec681f3Smrg    * The common case for load_ubo is 0, 0, 0, which is trivially represented.
5837ec681f3Smrg    * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
5847ec681f3Smrg    */
5857ec681f3Smrg   const_indices_9bit_all_combined,
5867ec681f3Smrg
5877ec681f3Smrg   const_indices_8bit,  /* 8 bits per element */
5887ec681f3Smrg   const_indices_16bit, /* 16 bits per element */
5897ec681f3Smrg   const_indices_32bit, /* 32 bits per element */
5907ec681f3Smrg};
5917ec681f3Smrg
5927ec681f3Smrgenum load_const_packing {
5937ec681f3Smrg   /* Constants are not packed and are stored in following dwords. */
5947ec681f3Smrg   load_const_full,
5957ec681f3Smrg
5967ec681f3Smrg   /* packed_value contains high 19 bits, low bits are 0,
5977ec681f3Smrg    * good for floating-point decimals
5987ec681f3Smrg    */
5997ec681f3Smrg   load_const_scalar_hi_19bits,
6007ec681f3Smrg
6017ec681f3Smrg   /* packed_value contains low 19 bits, high bits are sign-extended */
6027ec681f3Smrg   load_const_scalar_lo_19bits_sext,
6037ec681f3Smrg};
6047ec681f3Smrg
6057ec681f3Smrgunion packed_instr {
6067ec681f3Smrg   uint32_t u32;
6077ec681f3Smrg   struct {
6087ec681f3Smrg      unsigned instr_type:4; /* always present */
6097ec681f3Smrg      unsigned _pad:20;
6107ec681f3Smrg      unsigned dest:8;       /* always last */
6117ec681f3Smrg   } any;
6127ec681f3Smrg   struct {
6137ec681f3Smrg      unsigned instr_type:4;
6147ec681f3Smrg      unsigned exact:1;
6157ec681f3Smrg      unsigned no_signed_wrap:1;
6167ec681f3Smrg      unsigned no_unsigned_wrap:1;
6177ec681f3Smrg      unsigned saturate:1;
6187ec681f3Smrg      /* Reg: writemask; SSA: swizzles for 2 srcs */
6197ec681f3Smrg      unsigned writemask_or_two_swizzles:4;
6207ec681f3Smrg      unsigned op:9;
6217ec681f3Smrg      unsigned packed_src_ssa_16bit:1;
6227ec681f3Smrg      /* Scalarized ALUs always have the same header. */
6237ec681f3Smrg      unsigned num_followup_alu_sharing_header:2;
6247ec681f3Smrg      unsigned dest:8;
6257ec681f3Smrg   } alu;
6267ec681f3Smrg   struct {
6277ec681f3Smrg      unsigned instr_type:4;
6287ec681f3Smrg      unsigned deref_type:3;
6297ec681f3Smrg      unsigned cast_type_same_as_last:1;
6307ec681f3Smrg      unsigned modes:14; /* deref_var redefines this */
6317ec681f3Smrg      unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
6327ec681f3Smrg      unsigned _pad:1;  /* deref_var redefines this */
6337ec681f3Smrg      unsigned dest:8;
6347ec681f3Smrg   } deref;
6357ec681f3Smrg   struct {
6367ec681f3Smrg      unsigned instr_type:4;
6377ec681f3Smrg      unsigned deref_type:3;
6387ec681f3Smrg      unsigned _pad:1;
6397ec681f3Smrg      unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
6407ec681f3Smrg      unsigned dest:8;
6417ec681f3Smrg   } deref_var;
6427ec681f3Smrg   struct {
6437ec681f3Smrg      unsigned instr_type:4;
6447ec681f3Smrg      unsigned intrinsic:9;
6457ec681f3Smrg      unsigned const_indices_encoding:2;
6467ec681f3Smrg      unsigned packed_const_indices:9;
6477ec681f3Smrg      unsigned dest:8;
6487ec681f3Smrg   } intrinsic;
6497ec681f3Smrg   struct {
6507ec681f3Smrg      unsigned instr_type:4;
6517ec681f3Smrg      unsigned last_component:4;
6527ec681f3Smrg      unsigned bit_size:3;
6537ec681f3Smrg      unsigned packing:2; /* enum load_const_packing */
6547ec681f3Smrg      unsigned packed_value:19; /* meaning determined by packing */
6557ec681f3Smrg   } load_const;
6567ec681f3Smrg   struct {
6577ec681f3Smrg      unsigned instr_type:4;
6587ec681f3Smrg      unsigned last_component:4;
6597ec681f3Smrg      unsigned bit_size:3;
6607ec681f3Smrg      unsigned _pad:21;
6617ec681f3Smrg   } undef;
6627ec681f3Smrg   struct {
6637ec681f3Smrg      unsigned instr_type:4;
6647ec681f3Smrg      unsigned num_srcs:4;
6657ec681f3Smrg      unsigned op:4;
6667ec681f3Smrg      unsigned dest:8;
6677ec681f3Smrg      unsigned _pad:12;
6687ec681f3Smrg   } tex;
6697ec681f3Smrg   struct {
6707ec681f3Smrg      unsigned instr_type:4;
6717ec681f3Smrg      unsigned num_srcs:20;
6727ec681f3Smrg      unsigned dest:8;
6737ec681f3Smrg   } phi;
6747ec681f3Smrg   struct {
6757ec681f3Smrg      unsigned instr_type:4;
6767ec681f3Smrg      unsigned type:2;
6777ec681f3Smrg      unsigned _pad:26;
6787ec681f3Smrg   } jump;
6797ec681f3Smrg};
6807ec681f3Smrg
6817ec681f3Smrg/* Write "lo24" as low 24 bits in the first uint32. */
68201e04c3fSmrgstatic void
6837ec681f3Smrgwrite_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
6847ec681f3Smrg           nir_instr_type instr_type)
68501e04c3fSmrg{
6867ec681f3Smrg   STATIC_ASSERT(sizeof(union packed_dest) == 1);
6877ec681f3Smrg   union packed_dest dest;
6887ec681f3Smrg   dest.u8 = 0;
6897ec681f3Smrg
6907ec681f3Smrg   dest.ssa.is_ssa = dst->is_ssa;
69101e04c3fSmrg   if (dst->is_ssa) {
6927ec681f3Smrg      dest.ssa.num_components =
6937ec681f3Smrg         encode_num_components_in_3bits(dst->ssa.num_components);
6947ec681f3Smrg      dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
6957ec681f3Smrg   } else {
6967ec681f3Smrg      dest.reg.is_indirect = !!(dst->reg.indirect);
6977ec681f3Smrg   }
6987ec681f3Smrg   header.any.dest = dest.u8;
6997ec681f3Smrg
7007ec681f3Smrg   /* Check if the current ALU instruction has the same header as the previous
7017ec681f3Smrg    * instruction that is also ALU. If it is, we don't have to write
7027ec681f3Smrg    * the current header. This is a typical occurence after scalarization.
7037ec681f3Smrg    */
7047ec681f3Smrg   if (instr_type == nir_instr_type_alu) {
7057ec681f3Smrg      bool equal_header = false;
7067ec681f3Smrg
7077ec681f3Smrg      if (ctx->last_instr_type == nir_instr_type_alu) {
7087ec681f3Smrg         assert(ctx->last_alu_header_offset);
7097ec681f3Smrg         union packed_instr last_header;
7107ec681f3Smrg         memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset,
7117ec681f3Smrg                sizeof(last_header));
7127ec681f3Smrg
7137ec681f3Smrg         /* Clear the field that counts ALUs with equal headers. */
7147ec681f3Smrg         union packed_instr clean_header;
7157ec681f3Smrg         clean_header.u32 = last_header.u32;
7167ec681f3Smrg         clean_header.alu.num_followup_alu_sharing_header = 0;
7177ec681f3Smrg
7187ec681f3Smrg         /* There can be at most 4 consecutive ALU instructions
7197ec681f3Smrg          * sharing the same header.
7207ec681f3Smrg          */
7217ec681f3Smrg         if (last_header.alu.num_followup_alu_sharing_header < 3 &&
7227ec681f3Smrg             header.u32 == clean_header.u32) {
7237ec681f3Smrg            last_header.alu.num_followup_alu_sharing_header++;
7247ec681f3Smrg            memcpy(ctx->blob->data + ctx->last_alu_header_offset,
7257ec681f3Smrg                   &last_header, sizeof(last_header));
7267ec681f3Smrg
7277ec681f3Smrg            equal_header = true;
7287ec681f3Smrg         }
7297ec681f3Smrg      }
7307ec681f3Smrg
7317ec681f3Smrg      if (!equal_header) {
7327ec681f3Smrg         ctx->last_alu_header_offset = ctx->blob->size;
7337ec681f3Smrg         blob_write_uint32(ctx->blob, header.u32);
7347ec681f3Smrg      }
73501e04c3fSmrg   } else {
7367ec681f3Smrg      blob_write_uint32(ctx->blob, header.u32);
73701e04c3fSmrg   }
7387ec681f3Smrg
7397ec681f3Smrg   if (dest.ssa.is_ssa &&
7407ec681f3Smrg       dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
7417ec681f3Smrg      blob_write_uint32(ctx->blob, dst->ssa.num_components);
7427ec681f3Smrg
74301e04c3fSmrg   if (dst->is_ssa) {
74401e04c3fSmrg      write_add_object(ctx, &dst->ssa);
74501e04c3fSmrg   } else {
7467ec681f3Smrg      blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
74701e04c3fSmrg      blob_write_uint32(ctx->blob, dst->reg.base_offset);
74801e04c3fSmrg      if (dst->reg.indirect)
74901e04c3fSmrg         write_src(ctx, dst->reg.indirect);
75001e04c3fSmrg   }
75101e04c3fSmrg}
75201e04c3fSmrg
75301e04c3fSmrgstatic void
7547ec681f3Smrgread_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
7557ec681f3Smrg          union packed_instr header)
7567ec681f3Smrg{
7577ec681f3Smrg   union packed_dest dest;
7587ec681f3Smrg   dest.u8 = header.any.dest;
7597ec681f3Smrg
7607ec681f3Smrg   if (dest.ssa.is_ssa) {
7617ec681f3Smrg      unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
7627ec681f3Smrg      unsigned num_components;
7637ec681f3Smrg      if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
7647ec681f3Smrg         num_components = blob_read_uint32(ctx->blob);
7657ec681f3Smrg      else
7667ec681f3Smrg         num_components = decode_num_components_in_3bits(dest.ssa.num_components);
7677ec681f3Smrg      nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
76801e04c3fSmrg      read_add_object(ctx, &dst->ssa);
76901e04c3fSmrg   } else {
77001e04c3fSmrg      dst->reg.reg = read_object(ctx);
77101e04c3fSmrg      dst->reg.base_offset = blob_read_uint32(ctx->blob);
7727ec681f3Smrg      if (dest.reg.is_indirect) {
7737ec681f3Smrg         dst->reg.indirect = malloc(sizeof(nir_src));
77401e04c3fSmrg         read_src(ctx, dst->reg.indirect, instr);
77501e04c3fSmrg      }
77601e04c3fSmrg   }
77701e04c3fSmrg}
77801e04c3fSmrg
7797ec681f3Smrgstatic bool
7807ec681f3Smrgare_object_ids_16bit(write_ctx *ctx)
7817ec681f3Smrg{
7827ec681f3Smrg   /* Check the highest object ID, because they are monotonic. */
7837ec681f3Smrg   return ctx->next_idx < (1 << 16);
7847ec681f3Smrg}
7857ec681f3Smrg
7867ec681f3Smrgstatic bool
7877ec681f3Smrgis_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
7887ec681f3Smrg{
7897ec681f3Smrg   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
7907ec681f3Smrg
7917ec681f3Smrg   for (unsigned i = 0; i < num_srcs; i++) {
7927ec681f3Smrg      if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
7937ec681f3Smrg         return false;
7947ec681f3Smrg
7957ec681f3Smrg      unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
7967ec681f3Smrg
7977ec681f3Smrg      for (unsigned chan = 0; chan < src_components; chan++) {
7987ec681f3Smrg         /* The swizzles for src0.x and src1.x are stored
7997ec681f3Smrg          * in writemask_or_two_swizzles for SSA ALUs.
8007ec681f3Smrg          */
8017ec681f3Smrg         if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
8027ec681f3Smrg             alu->src[i].swizzle[chan] < 4)
8037ec681f3Smrg            continue;
8047ec681f3Smrg
8057ec681f3Smrg         if (alu->src[i].swizzle[chan] != chan)
8067ec681f3Smrg            return false;
8077ec681f3Smrg      }
8087ec681f3Smrg   }
8097ec681f3Smrg
8107ec681f3Smrg   return are_object_ids_16bit(ctx);
8117ec681f3Smrg}
8127ec681f3Smrg
81301e04c3fSmrgstatic void
81401e04c3fSmrgwrite_alu(write_ctx *ctx, const nir_alu_instr *alu)
81501e04c3fSmrg{
8167ec681f3Smrg   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
8177ec681f3Smrg   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
8187ec681f3Smrg
8197ec681f3Smrg   /* 9 bits for nir_op */
8207ec681f3Smrg   STATIC_ASSERT(nir_num_opcodes <= 512);
8217ec681f3Smrg   union packed_instr header;
8227ec681f3Smrg   header.u32 = 0;
8237ec681f3Smrg
8247ec681f3Smrg   header.alu.instr_type = alu->instr.type;
8257ec681f3Smrg   header.alu.exact = alu->exact;
8267ec681f3Smrg   header.alu.no_signed_wrap = alu->no_signed_wrap;
8277ec681f3Smrg   header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
8287ec681f3Smrg   header.alu.saturate = alu->dest.saturate;
8297ec681f3Smrg   header.alu.op = alu->op;
8307ec681f3Smrg   header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
8317ec681f3Smrg
8327ec681f3Smrg   if (header.alu.packed_src_ssa_16bit &&
8337ec681f3Smrg       alu->dest.dest.is_ssa) {
8347ec681f3Smrg      /* For packed srcs of SSA ALUs, this field stores the swizzles. */
8357ec681f3Smrg      header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
8367ec681f3Smrg      if (num_srcs > 1)
8377ec681f3Smrg         header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
8387ec681f3Smrg   } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
8397ec681f3Smrg      /* For vec4 registers, this field is a writemask. */
8407ec681f3Smrg      header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
8417ec681f3Smrg   }
84201e04c3fSmrg
8437ec681f3Smrg   write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
84401e04c3fSmrg
8457ec681f3Smrg   if (!alu->dest.dest.is_ssa && dst_components > 4)
8467ec681f3Smrg      blob_write_uint32(ctx->blob, alu->dest.write_mask);
8477ec681f3Smrg
8487ec681f3Smrg   if (header.alu.packed_src_ssa_16bit) {
8497ec681f3Smrg      for (unsigned i = 0; i < num_srcs; i++) {
8507ec681f3Smrg         assert(alu->src[i].src.is_ssa);
8517ec681f3Smrg         unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
8527ec681f3Smrg         assert(idx < (1 << 16));
8537ec681f3Smrg         blob_write_uint16(ctx->blob, idx);
8547ec681f3Smrg      }
8557ec681f3Smrg   } else {
8567ec681f3Smrg      for (unsigned i = 0; i < num_srcs; i++) {
8577ec681f3Smrg         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
8587ec681f3Smrg         unsigned src_components = nir_src_num_components(alu->src[i].src);
8597ec681f3Smrg         union packed_src src;
8607ec681f3Smrg         bool packed = src_components <= 4 && src_channels <= 4;
8617ec681f3Smrg         src.u32 = 0;
8627ec681f3Smrg
8637ec681f3Smrg         src.alu.negate = alu->src[i].negate;
8647ec681f3Smrg         src.alu.abs = alu->src[i].abs;
8657ec681f3Smrg
8667ec681f3Smrg         if (packed) {
8677ec681f3Smrg            src.alu.swizzle_x = alu->src[i].swizzle[0];
8687ec681f3Smrg            src.alu.swizzle_y = alu->src[i].swizzle[1];
8697ec681f3Smrg            src.alu.swizzle_z = alu->src[i].swizzle[2];
8707ec681f3Smrg            src.alu.swizzle_w = alu->src[i].swizzle[3];
8717ec681f3Smrg         }
8727ec681f3Smrg
8737ec681f3Smrg         write_src_full(ctx, &alu->src[i].src, src);
8747ec681f3Smrg
8757ec681f3Smrg         /* Store swizzles for vec8 and vec16. */
8767ec681f3Smrg         if (!packed) {
8777ec681f3Smrg            for (unsigned o = 0; o < src_channels; o += 8) {
8787ec681f3Smrg               unsigned value = 0;
8797ec681f3Smrg
8807ec681f3Smrg               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
8817ec681f3Smrg                  value |= (uint32_t)alu->src[i].swizzle[o + j] <<
8827ec681f3Smrg                           (4 * j); /* 4 bits per swizzle */
8837ec681f3Smrg               }
8847ec681f3Smrg
8857ec681f3Smrg               blob_write_uint32(ctx->blob, value);
8867ec681f3Smrg            }
8877ec681f3Smrg         }
8887ec681f3Smrg      }
88901e04c3fSmrg   }
89001e04c3fSmrg}
89101e04c3fSmrg
89201e04c3fSmrgstatic nir_alu_instr *
8937ec681f3Smrgread_alu(read_ctx *ctx, union packed_instr header)
89401e04c3fSmrg{
8957ec681f3Smrg   unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
8967ec681f3Smrg   nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
89701e04c3fSmrg
8987ec681f3Smrg   alu->exact = header.alu.exact;
8997ec681f3Smrg   alu->no_signed_wrap = header.alu.no_signed_wrap;
9007ec681f3Smrg   alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
9017ec681f3Smrg   alu->dest.saturate = header.alu.saturate;
9027ec681f3Smrg
9037ec681f3Smrg   read_dest(ctx, &alu->dest.dest, &alu->instr, header);
9047ec681f3Smrg
9057ec681f3Smrg   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
9067ec681f3Smrg
9077ec681f3Smrg   if (alu->dest.dest.is_ssa) {
9087ec681f3Smrg      alu->dest.write_mask = u_bit_consecutive(0, dst_components);
9097ec681f3Smrg   } else if (dst_components <= 4) {
9107ec681f3Smrg      alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
9117ec681f3Smrg   } else {
9127ec681f3Smrg      alu->dest.write_mask = blob_read_uint32(ctx->blob);
9137ec681f3Smrg   }
9147ec681f3Smrg
9157ec681f3Smrg   if (header.alu.packed_src_ssa_16bit) {
9167ec681f3Smrg      for (unsigned i = 0; i < num_srcs; i++) {
9177ec681f3Smrg         nir_alu_src *src = &alu->src[i];
9187ec681f3Smrg         src->src.is_ssa = true;
9197ec681f3Smrg         src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
9207ec681f3Smrg
9217ec681f3Smrg         memset(&src->swizzle, 0, sizeof(src->swizzle));
9227ec681f3Smrg
9237ec681f3Smrg         unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
9247ec681f3Smrg
9257ec681f3Smrg         for (unsigned chan = 0; chan < src_components; chan++)
9267ec681f3Smrg            src->swizzle[chan] = chan;
9277ec681f3Smrg      }
9287ec681f3Smrg   } else {
9297ec681f3Smrg      for (unsigned i = 0; i < num_srcs; i++) {
9307ec681f3Smrg         union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
9317ec681f3Smrg         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
9327ec681f3Smrg         unsigned src_components = nir_src_num_components(alu->src[i].src);
9337ec681f3Smrg         bool packed = src_components <= 4 && src_channels <= 4;
9347ec681f3Smrg
9357ec681f3Smrg         alu->src[i].negate = src.alu.negate;
9367ec681f3Smrg         alu->src[i].abs = src.alu.abs;
9377ec681f3Smrg
9387ec681f3Smrg         memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
9397ec681f3Smrg
9407ec681f3Smrg         if (packed) {
9417ec681f3Smrg            alu->src[i].swizzle[0] = src.alu.swizzle_x;
9427ec681f3Smrg            alu->src[i].swizzle[1] = src.alu.swizzle_y;
9437ec681f3Smrg            alu->src[i].swizzle[2] = src.alu.swizzle_z;
9447ec681f3Smrg            alu->src[i].swizzle[3] = src.alu.swizzle_w;
9457ec681f3Smrg         } else {
9467ec681f3Smrg            /* Load swizzles for vec8 and vec16. */
9477ec681f3Smrg            for (unsigned o = 0; o < src_channels; o += 8) {
9487ec681f3Smrg               unsigned value = blob_read_uint32(ctx->blob);
9497ec681f3Smrg
9507ec681f3Smrg               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
9517ec681f3Smrg                  alu->src[i].swizzle[o + j] =
9527ec681f3Smrg                     (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
9537ec681f3Smrg               }
9547ec681f3Smrg            }
9557ec681f3Smrg         }
9567ec681f3Smrg      }
9577ec681f3Smrg   }
9587ec681f3Smrg
9597ec681f3Smrg   if (header.alu.packed_src_ssa_16bit &&
9607ec681f3Smrg       alu->dest.dest.is_ssa) {
9617ec681f3Smrg      alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
9627ec681f3Smrg      if (num_srcs > 1)
9637ec681f3Smrg         alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
96401e04c3fSmrg   }
96501e04c3fSmrg
96601e04c3fSmrg   return alu;
96701e04c3fSmrg}
96801e04c3fSmrg
96901e04c3fSmrgstatic void
97001e04c3fSmrgwrite_deref(write_ctx *ctx, const nir_deref_instr *deref)
97101e04c3fSmrg{
9727ec681f3Smrg   assert(deref->deref_type < 8);
9737ec681f3Smrg   assert(deref->modes < (1 << 14));
9747ec681f3Smrg
9757ec681f3Smrg   union packed_instr header;
9767ec681f3Smrg   header.u32 = 0;
97701e04c3fSmrg
9787ec681f3Smrg   header.deref.instr_type = deref->instr.type;
9797ec681f3Smrg   header.deref.deref_type = deref->deref_type;
98001e04c3fSmrg
9817ec681f3Smrg   if (deref->deref_type == nir_deref_type_cast) {
9827ec681f3Smrg      header.deref.modes = deref->modes;
9837ec681f3Smrg      header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
9847ec681f3Smrg   }
98501e04c3fSmrg
9867ec681f3Smrg   unsigned var_idx = 0;
98701e04c3fSmrg   if (deref->deref_type == nir_deref_type_var) {
9887ec681f3Smrg      var_idx = write_lookup_object(ctx, deref->var);
9897ec681f3Smrg      if (var_idx && var_idx < (1 << 16))
9907ec681f3Smrg         header.deref_var.object_idx = var_idx;
9917ec681f3Smrg   }
9927ec681f3Smrg
9937ec681f3Smrg   if (deref->deref_type == nir_deref_type_array ||
9947ec681f3Smrg       deref->deref_type == nir_deref_type_ptr_as_array) {
9957ec681f3Smrg      header.deref.packed_src_ssa_16bit =
9967ec681f3Smrg         deref->parent.is_ssa && deref->arr.index.is_ssa &&
9977ec681f3Smrg         are_object_ids_16bit(ctx);
99801e04c3fSmrg   }
99901e04c3fSmrg
10007ec681f3Smrg   write_dest(ctx, &deref->dest, header, deref->instr.type);
100101e04c3fSmrg
100201e04c3fSmrg   switch (deref->deref_type) {
10037ec681f3Smrg   case nir_deref_type_var:
10047ec681f3Smrg      if (!header.deref_var.object_idx)
10057ec681f3Smrg         blob_write_uint32(ctx->blob, var_idx);
10067ec681f3Smrg      break;
10077ec681f3Smrg
100801e04c3fSmrg   case nir_deref_type_struct:
10097ec681f3Smrg      write_src(ctx, &deref->parent);
101001e04c3fSmrg      blob_write_uint32(ctx->blob, deref->strct.index);
101101e04c3fSmrg      break;
101201e04c3fSmrg
101301e04c3fSmrg   case nir_deref_type_array:
10147e102996Smaya   case nir_deref_type_ptr_as_array:
10157ec681f3Smrg      if (header.deref.packed_src_ssa_16bit) {
10167ec681f3Smrg         blob_write_uint16(ctx->blob,
10177ec681f3Smrg                           write_lookup_object(ctx, deref->parent.ssa));
10187ec681f3Smrg         blob_write_uint16(ctx->blob,
10197ec681f3Smrg                           write_lookup_object(ctx, deref->arr.index.ssa));
10207ec681f3Smrg      } else {
10217ec681f3Smrg         write_src(ctx, &deref->parent);
10227ec681f3Smrg         write_src(ctx, &deref->arr.index);
10237ec681f3Smrg      }
102401e04c3fSmrg      break;
102501e04c3fSmrg
102601e04c3fSmrg   case nir_deref_type_cast:
10277ec681f3Smrg      write_src(ctx, &deref->parent);
10287e102996Smaya      blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
10297ec681f3Smrg      blob_write_uint32(ctx->blob, deref->cast.align_mul);
10307ec681f3Smrg      blob_write_uint32(ctx->blob, deref->cast.align_offset);
10317ec681f3Smrg      if (!header.deref.cast_type_same_as_last) {
10327ec681f3Smrg         encode_type_to_blob(ctx->blob, deref->type);
10337ec681f3Smrg         ctx->last_type = deref->type;
10347ec681f3Smrg      }
10357e102996Smaya      break;
10367e102996Smaya
10377e102996Smaya   case nir_deref_type_array_wildcard:
10387ec681f3Smrg      write_src(ctx, &deref->parent);
103901e04c3fSmrg      break;
104001e04c3fSmrg
104101e04c3fSmrg   default:
104201e04c3fSmrg      unreachable("Invalid deref type");
104301e04c3fSmrg   }
104401e04c3fSmrg}
104501e04c3fSmrg
104601e04c3fSmrgstatic nir_deref_instr *
10477ec681f3Smrgread_deref(read_ctx *ctx, union packed_instr header)
104801e04c3fSmrg{
10497ec681f3Smrg   nir_deref_type deref_type = header.deref.deref_type;
105001e04c3fSmrg   nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
105101e04c3fSmrg
10527ec681f3Smrg   read_dest(ctx, &deref->dest, &deref->instr, header);
105301e04c3fSmrg
10547ec681f3Smrg   nir_deref_instr *parent;
105501e04c3fSmrg
10567ec681f3Smrg   switch (deref->deref_type) {
10577ec681f3Smrg   case nir_deref_type_var:
10587ec681f3Smrg      if (header.deref_var.object_idx)
10597ec681f3Smrg         deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
10607ec681f3Smrg      else
10617ec681f3Smrg         deref->var = read_object(ctx);
106201e04c3fSmrg
10637ec681f3Smrg      deref->type = deref->var->type;
10647ec681f3Smrg      break;
106501e04c3fSmrg
106601e04c3fSmrg   case nir_deref_type_struct:
10677ec681f3Smrg      read_src(ctx, &deref->parent, &deref->instr);
10687ec681f3Smrg      parent = nir_src_as_deref(deref->parent);
106901e04c3fSmrg      deref->strct.index = blob_read_uint32(ctx->blob);
10707ec681f3Smrg      deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
107101e04c3fSmrg      break;
107201e04c3fSmrg
107301e04c3fSmrg   case nir_deref_type_array:
10747e102996Smaya   case nir_deref_type_ptr_as_array:
10757ec681f3Smrg      if (header.deref.packed_src_ssa_16bit) {
10767ec681f3Smrg         deref->parent.is_ssa = true;
10777ec681f3Smrg         deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
10787ec681f3Smrg         deref->arr.index.is_ssa = true;
10797ec681f3Smrg         deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
10807ec681f3Smrg      } else {
10817ec681f3Smrg         read_src(ctx, &deref->parent, &deref->instr);
10827ec681f3Smrg         read_src(ctx, &deref->arr.index, &deref->instr);
10837ec681f3Smrg      }
10847ec681f3Smrg
10857ec681f3Smrg      parent = nir_src_as_deref(deref->parent);
10867ec681f3Smrg      if (deref->deref_type == nir_deref_type_array)
10877ec681f3Smrg         deref->type = glsl_get_array_element(parent->type);
10887ec681f3Smrg      else
10897ec681f3Smrg         deref->type = parent->type;
109001e04c3fSmrg      break;
109101e04c3fSmrg
109201e04c3fSmrg   case nir_deref_type_cast:
10937ec681f3Smrg      read_src(ctx, &deref->parent, &deref->instr);
10947e102996Smaya      deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
10957ec681f3Smrg      deref->cast.align_mul = blob_read_uint32(ctx->blob);
10967ec681f3Smrg      deref->cast.align_offset = blob_read_uint32(ctx->blob);
10977ec681f3Smrg      if (header.deref.cast_type_same_as_last) {
10987ec681f3Smrg         deref->type = ctx->last_type;
10997ec681f3Smrg      } else {
11007ec681f3Smrg         deref->type = decode_type_from_blob(ctx->blob);
11017ec681f3Smrg         ctx->last_type = deref->type;
11027ec681f3Smrg      }
11037e102996Smaya      break;
11047e102996Smaya
11057e102996Smaya   case nir_deref_type_array_wildcard:
11067ec681f3Smrg      read_src(ctx, &deref->parent, &deref->instr);
11077ec681f3Smrg      parent = nir_src_as_deref(deref->parent);
11087ec681f3Smrg      deref->type = glsl_get_array_element(parent->type);
110901e04c3fSmrg      break;
111001e04c3fSmrg
111101e04c3fSmrg   default:
111201e04c3fSmrg      unreachable("Invalid deref type");
111301e04c3fSmrg   }
111401e04c3fSmrg
11157ec681f3Smrg   if (deref_type == nir_deref_type_var) {
11167ec681f3Smrg      deref->modes = deref->var->data.mode;
11177ec681f3Smrg   } else if (deref->deref_type == nir_deref_type_cast) {
11187ec681f3Smrg      deref->modes = header.deref.modes;
11197ec681f3Smrg   } else {
11207ec681f3Smrg      assert(deref->parent.is_ssa);
11217ec681f3Smrg      deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
11227ec681f3Smrg   }
11237ec681f3Smrg
112401e04c3fSmrg   return deref;
112501e04c3fSmrg}
112601e04c3fSmrg
112701e04c3fSmrgstatic void
112801e04c3fSmrgwrite_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
112901e04c3fSmrg{
11307ec681f3Smrg   /* 9 bits for nir_intrinsic_op */
11317ec681f3Smrg   STATIC_ASSERT(nir_num_intrinsics <= 512);
113201e04c3fSmrg   unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
113301e04c3fSmrg   unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
11347ec681f3Smrg   assert(intrin->intrinsic < 512);
11357ec681f3Smrg
11367ec681f3Smrg   union packed_instr header;
11377ec681f3Smrg   header.u32 = 0;
113801e04c3fSmrg
11397ec681f3Smrg   header.intrinsic.instr_type = intrin->instr.type;
11407ec681f3Smrg   header.intrinsic.intrinsic = intrin->intrinsic;
11417ec681f3Smrg
11427ec681f3Smrg   /* Analyze constant indices to decide how to encode them. */
11437ec681f3Smrg   if (num_indices) {
11447ec681f3Smrg      unsigned max_bits = 0;
11457ec681f3Smrg      for (unsigned i = 0; i < num_indices; i++) {
11467ec681f3Smrg         unsigned max = util_last_bit(intrin->const_index[i]);
11477ec681f3Smrg         max_bits = MAX2(max_bits, max);
11487ec681f3Smrg      }
11497ec681f3Smrg
11507ec681f3Smrg      if (max_bits * num_indices <= 9) {
11517ec681f3Smrg         header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
11527ec681f3Smrg
11537ec681f3Smrg         /* Pack all const indices into 6 bits. */
11547ec681f3Smrg         unsigned bit_size = 9 / num_indices;
11557ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++) {
11567ec681f3Smrg            header.intrinsic.packed_const_indices |=
11577ec681f3Smrg               intrin->const_index[i] << (i * bit_size);
11587ec681f3Smrg         }
11597ec681f3Smrg      } else if (max_bits <= 8)
11607ec681f3Smrg         header.intrinsic.const_indices_encoding = const_indices_8bit;
11617ec681f3Smrg      else if (max_bits <= 16)
11627ec681f3Smrg         header.intrinsic.const_indices_encoding = const_indices_16bit;
11637ec681f3Smrg      else
11647ec681f3Smrg         header.intrinsic.const_indices_encoding = const_indices_32bit;
11657ec681f3Smrg   }
116601e04c3fSmrg
116701e04c3fSmrg   if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
11687ec681f3Smrg      write_dest(ctx, &intrin->dest, header, intrin->instr.type);
11697ec681f3Smrg   else
11707ec681f3Smrg      blob_write_uint32(ctx->blob, header.u32);
117101e04c3fSmrg
117201e04c3fSmrg   for (unsigned i = 0; i < num_srcs; i++)
117301e04c3fSmrg      write_src(ctx, &intrin->src[i]);
117401e04c3fSmrg
11757ec681f3Smrg   if (num_indices) {
11767ec681f3Smrg      switch (header.intrinsic.const_indices_encoding) {
11777ec681f3Smrg      case const_indices_8bit:
11787ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++)
11797ec681f3Smrg            blob_write_uint8(ctx->blob, intrin->const_index[i]);
11807ec681f3Smrg         break;
11817ec681f3Smrg      case const_indices_16bit:
11827ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++)
11837ec681f3Smrg            blob_write_uint16(ctx->blob, intrin->const_index[i]);
11847ec681f3Smrg         break;
11857ec681f3Smrg      case const_indices_32bit:
11867ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++)
11877ec681f3Smrg            blob_write_uint32(ctx->blob, intrin->const_index[i]);
11887ec681f3Smrg         break;
11897ec681f3Smrg      }
11907ec681f3Smrg   }
119101e04c3fSmrg}
119201e04c3fSmrg
119301e04c3fSmrgstatic nir_intrinsic_instr *
11947ec681f3Smrgread_intrinsic(read_ctx *ctx, union packed_instr header)
119501e04c3fSmrg{
11967ec681f3Smrg   nir_intrinsic_op op = header.intrinsic.intrinsic;
119701e04c3fSmrg   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
119801e04c3fSmrg
119901e04c3fSmrg   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
120001e04c3fSmrg   unsigned num_indices = nir_intrinsic_infos[op].num_indices;
120101e04c3fSmrg
120201e04c3fSmrg   if (nir_intrinsic_infos[op].has_dest)
12037ec681f3Smrg      read_dest(ctx, &intrin->dest, &intrin->instr, header);
120401e04c3fSmrg
120501e04c3fSmrg   for (unsigned i = 0; i < num_srcs; i++)
120601e04c3fSmrg      read_src(ctx, &intrin->src[i], &intrin->instr);
120701e04c3fSmrg
12087ec681f3Smrg   /* Vectorized instrinsics have num_components same as dst or src that has
12097ec681f3Smrg    * 0 components in the info. Find it.
12107ec681f3Smrg    */
12117ec681f3Smrg   if (nir_intrinsic_infos[op].has_dest &&
12127ec681f3Smrg       nir_intrinsic_infos[op].dest_components == 0) {
12137ec681f3Smrg      intrin->num_components = nir_dest_num_components(intrin->dest);
12147ec681f3Smrg   } else {
12157ec681f3Smrg      for (unsigned i = 0; i < num_srcs; i++) {
12167ec681f3Smrg         if (nir_intrinsic_infos[op].src_components[i] == 0) {
12177ec681f3Smrg            intrin->num_components = nir_src_num_components(intrin->src[i]);
12187ec681f3Smrg            break;
12197ec681f3Smrg         }
12207ec681f3Smrg      }
12217ec681f3Smrg   }
12227ec681f3Smrg
12237ec681f3Smrg   if (num_indices) {
12247ec681f3Smrg      switch (header.intrinsic.const_indices_encoding) {
12257ec681f3Smrg      case const_indices_9bit_all_combined: {
12267ec681f3Smrg         unsigned bit_size = 9 / num_indices;
12277ec681f3Smrg         unsigned bit_mask = u_bit_consecutive(0, bit_size);
12287ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++) {
12297ec681f3Smrg            intrin->const_index[i] =
12307ec681f3Smrg               (header.intrinsic.packed_const_indices >> (i * bit_size)) &
12317ec681f3Smrg               bit_mask;
12327ec681f3Smrg         }
12337ec681f3Smrg         break;
12347ec681f3Smrg      }
12357ec681f3Smrg      case const_indices_8bit:
12367ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++)
12377ec681f3Smrg            intrin->const_index[i] = blob_read_uint8(ctx->blob);
12387ec681f3Smrg         break;
12397ec681f3Smrg      case const_indices_16bit:
12407ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++)
12417ec681f3Smrg            intrin->const_index[i] = blob_read_uint16(ctx->blob);
12427ec681f3Smrg         break;
12437ec681f3Smrg      case const_indices_32bit:
12447ec681f3Smrg         for (unsigned i = 0; i < num_indices; i++)
12457ec681f3Smrg            intrin->const_index[i] = blob_read_uint32(ctx->blob);
12467ec681f3Smrg         break;
12477ec681f3Smrg      }
12487ec681f3Smrg   }
124901e04c3fSmrg
125001e04c3fSmrg   return intrin;
125101e04c3fSmrg}
125201e04c3fSmrg
125301e04c3fSmrgstatic void
125401e04c3fSmrgwrite_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
125501e04c3fSmrg{
12567ec681f3Smrg   assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
12577ec681f3Smrg   union packed_instr header;
12587ec681f3Smrg   header.u32 = 0;
12597ec681f3Smrg
12607ec681f3Smrg   header.load_const.instr_type = lc->instr.type;
12617ec681f3Smrg   header.load_const.last_component = lc->def.num_components - 1;
12627ec681f3Smrg   header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
12637ec681f3Smrg   header.load_const.packing = load_const_full;
12647ec681f3Smrg
12657ec681f3Smrg   /* Try to pack 1-component constants into the 19 free bits in the header. */
12667ec681f3Smrg   if (lc->def.num_components == 1) {
12677ec681f3Smrg      switch (lc->def.bit_size) {
12687ec681f3Smrg      case 64:
12697ec681f3Smrg         if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
12707ec681f3Smrg            /* packed_value contains high 19 bits, low bits are 0 */
12717ec681f3Smrg            header.load_const.packing = load_const_scalar_hi_19bits;
12727ec681f3Smrg            header.load_const.packed_value = lc->value[0].u64 >> 45;
12737ec681f3Smrg         } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
12747ec681f3Smrg            /* packed_value contains low 19 bits, high bits are sign-extended */
12757ec681f3Smrg            header.load_const.packing = load_const_scalar_lo_19bits_sext;
12767ec681f3Smrg            header.load_const.packed_value = lc->value[0].u64;
12777ec681f3Smrg         }
12787ec681f3Smrg         break;
12797ec681f3Smrg
12807ec681f3Smrg      case 32:
12817ec681f3Smrg         if ((lc->value[0].u32 & 0x1fff) == 0) {
12827ec681f3Smrg            header.load_const.packing = load_const_scalar_hi_19bits;
12837ec681f3Smrg            header.load_const.packed_value = lc->value[0].u32 >> 13;
12847ec681f3Smrg         } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
12857ec681f3Smrg            header.load_const.packing = load_const_scalar_lo_19bits_sext;
12867ec681f3Smrg            header.load_const.packed_value = lc->value[0].u32;
12877ec681f3Smrg         }
12887ec681f3Smrg         break;
12897ec681f3Smrg
12907ec681f3Smrg      case 16:
12917ec681f3Smrg         header.load_const.packing = load_const_scalar_lo_19bits_sext;
12927ec681f3Smrg         header.load_const.packed_value = lc->value[0].u16;
12937ec681f3Smrg         break;
12947ec681f3Smrg      case 8:
12957ec681f3Smrg         header.load_const.packing = load_const_scalar_lo_19bits_sext;
12967ec681f3Smrg         header.load_const.packed_value = lc->value[0].u8;
12977ec681f3Smrg         break;
12987ec681f3Smrg      case 1:
12997ec681f3Smrg         header.load_const.packing = load_const_scalar_lo_19bits_sext;
13007ec681f3Smrg         header.load_const.packed_value = lc->value[0].b;
13017ec681f3Smrg         break;
13027ec681f3Smrg      default:
13037ec681f3Smrg         unreachable("invalid bit_size");
13047ec681f3Smrg      }
13057ec681f3Smrg   }
13067ec681f3Smrg
13077ec681f3Smrg   blob_write_uint32(ctx->blob, header.u32);
13087ec681f3Smrg
13097ec681f3Smrg   if (header.load_const.packing == load_const_full) {
13107ec681f3Smrg      switch (lc->def.bit_size) {
13117ec681f3Smrg      case 64:
13127ec681f3Smrg         blob_write_bytes(ctx->blob, lc->value,
13137ec681f3Smrg                          sizeof(*lc->value) * lc->def.num_components);
13147ec681f3Smrg         break;
13157ec681f3Smrg
13167ec681f3Smrg      case 32:
13177ec681f3Smrg         for (unsigned i = 0; i < lc->def.num_components; i++)
13187ec681f3Smrg            blob_write_uint32(ctx->blob, lc->value[i].u32);
13197ec681f3Smrg         break;
13207ec681f3Smrg
13217ec681f3Smrg      case 16:
13227ec681f3Smrg         for (unsigned i = 0; i < lc->def.num_components; i++)
13237ec681f3Smrg            blob_write_uint16(ctx->blob, lc->value[i].u16);
13247ec681f3Smrg         break;
13257ec681f3Smrg
13267ec681f3Smrg      default:
13277ec681f3Smrg         assert(lc->def.bit_size <= 8);
13287ec681f3Smrg         for (unsigned i = 0; i < lc->def.num_components; i++)
13297ec681f3Smrg            blob_write_uint8(ctx->blob, lc->value[i].u8);
13307ec681f3Smrg         break;
13317ec681f3Smrg      }
13327ec681f3Smrg   }
13337ec681f3Smrg
133401e04c3fSmrg   write_add_object(ctx, &lc->def);
133501e04c3fSmrg}
133601e04c3fSmrg
133701e04c3fSmrgstatic nir_load_const_instr *
13387ec681f3Smrgread_load_const(read_ctx *ctx, union packed_instr header)
133901e04c3fSmrg{
134001e04c3fSmrg   nir_load_const_instr *lc =
13417ec681f3Smrg      nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
13427ec681f3Smrg                                  decode_bit_size_3bits(header.load_const.bit_size));
13437ec681f3Smrg
13447ec681f3Smrg   switch (header.load_const.packing) {
13457ec681f3Smrg   case load_const_scalar_hi_19bits:
13467ec681f3Smrg      switch (lc->def.bit_size) {
13477ec681f3Smrg      case 64:
13487ec681f3Smrg         lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
13497ec681f3Smrg         break;
13507ec681f3Smrg      case 32:
13517ec681f3Smrg         lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
13527ec681f3Smrg         break;
13537ec681f3Smrg      default:
13547ec681f3Smrg         unreachable("invalid bit_size");
13557ec681f3Smrg      }
13567ec681f3Smrg      break;
13577ec681f3Smrg
13587ec681f3Smrg   case load_const_scalar_lo_19bits_sext:
13597ec681f3Smrg      switch (lc->def.bit_size) {
13607ec681f3Smrg      case 64:
13617ec681f3Smrg         lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
13627ec681f3Smrg         break;
13637ec681f3Smrg      case 32:
13647ec681f3Smrg         lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
13657ec681f3Smrg         break;
13667ec681f3Smrg      case 16:
13677ec681f3Smrg         lc->value[0].u16 = header.load_const.packed_value;
13687ec681f3Smrg         break;
13697ec681f3Smrg      case 8:
13707ec681f3Smrg         lc->value[0].u8 = header.load_const.packed_value;
13717ec681f3Smrg         break;
13727ec681f3Smrg      case 1:
13737ec681f3Smrg         lc->value[0].b = header.load_const.packed_value;
13747ec681f3Smrg         break;
13757ec681f3Smrg      default:
13767ec681f3Smrg         unreachable("invalid bit_size");
13777ec681f3Smrg      }
13787ec681f3Smrg      break;
13797ec681f3Smrg
13807ec681f3Smrg   case load_const_full:
13817ec681f3Smrg      switch (lc->def.bit_size) {
13827ec681f3Smrg      case 64:
13837ec681f3Smrg         blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
13847ec681f3Smrg         break;
13857ec681f3Smrg
13867ec681f3Smrg      case 32:
13877ec681f3Smrg         for (unsigned i = 0; i < lc->def.num_components; i++)
13887ec681f3Smrg            lc->value[i].u32 = blob_read_uint32(ctx->blob);
13897ec681f3Smrg         break;
13907ec681f3Smrg
13917ec681f3Smrg      case 16:
13927ec681f3Smrg         for (unsigned i = 0; i < lc->def.num_components; i++)
13937ec681f3Smrg            lc->value[i].u16 = blob_read_uint16(ctx->blob);
13947ec681f3Smrg         break;
13957ec681f3Smrg
13967ec681f3Smrg      default:
13977ec681f3Smrg         assert(lc->def.bit_size <= 8);
13987ec681f3Smrg         for (unsigned i = 0; i < lc->def.num_components; i++)
13997ec681f3Smrg            lc->value[i].u8 = blob_read_uint8(ctx->blob);
14007ec681f3Smrg         break;
14017ec681f3Smrg      }
14027ec681f3Smrg      break;
14037ec681f3Smrg   }
140401e04c3fSmrg
140501e04c3fSmrg   read_add_object(ctx, &lc->def);
140601e04c3fSmrg   return lc;
140701e04c3fSmrg}
140801e04c3fSmrg
140901e04c3fSmrgstatic void
141001e04c3fSmrgwrite_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
141101e04c3fSmrg{
14127ec681f3Smrg   assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
14137ec681f3Smrg
14147ec681f3Smrg   union packed_instr header;
14157ec681f3Smrg   header.u32 = 0;
14167ec681f3Smrg
14177ec681f3Smrg   header.undef.instr_type = undef->instr.type;
14187ec681f3Smrg   header.undef.last_component = undef->def.num_components - 1;
14197ec681f3Smrg   header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
14207ec681f3Smrg
14217ec681f3Smrg   blob_write_uint32(ctx->blob, header.u32);
142201e04c3fSmrg   write_add_object(ctx, &undef->def);
142301e04c3fSmrg}
142401e04c3fSmrg
142501e04c3fSmrgstatic nir_ssa_undef_instr *
14267ec681f3Smrgread_ssa_undef(read_ctx *ctx, union packed_instr header)
142701e04c3fSmrg{
142801e04c3fSmrg   nir_ssa_undef_instr *undef =
14297ec681f3Smrg      nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
14307ec681f3Smrg                                 decode_bit_size_3bits(header.undef.bit_size));
143101e04c3fSmrg
143201e04c3fSmrg   read_add_object(ctx, &undef->def);
143301e04c3fSmrg   return undef;
143401e04c3fSmrg}
143501e04c3fSmrg
143601e04c3fSmrgunion packed_tex_data {
143701e04c3fSmrg   uint32_t u32;
143801e04c3fSmrg   struct {
14397ec681f3Smrg      unsigned sampler_dim:4;
14407ec681f3Smrg      unsigned dest_type:8;
144101e04c3fSmrg      unsigned coord_components:3;
144201e04c3fSmrg      unsigned is_array:1;
144301e04c3fSmrg      unsigned is_shadow:1;
144401e04c3fSmrg      unsigned is_new_style_shadow:1;
14457ec681f3Smrg      unsigned is_sparse:1;
144601e04c3fSmrg      unsigned component:2;
14477ec681f3Smrg      unsigned texture_non_uniform:1;
14487ec681f3Smrg      unsigned sampler_non_uniform:1;
14497ec681f3Smrg      unsigned array_is_lowered_cube:1;
14507ec681f3Smrg      unsigned unused:6; /* Mark unused for valgrind. */
145101e04c3fSmrg   } u;
145201e04c3fSmrg};
145301e04c3fSmrg
145401e04c3fSmrgstatic void
145501e04c3fSmrgwrite_tex(write_ctx *ctx, const nir_tex_instr *tex)
145601e04c3fSmrg{
14577ec681f3Smrg   assert(tex->num_srcs < 16);
14587ec681f3Smrg   assert(tex->op < 16);
14597ec681f3Smrg
14607ec681f3Smrg   union packed_instr header;
14617ec681f3Smrg   header.u32 = 0;
14627ec681f3Smrg
14637ec681f3Smrg   header.tex.instr_type = tex->instr.type;
14647ec681f3Smrg   header.tex.num_srcs = tex->num_srcs;
14657ec681f3Smrg   header.tex.op = tex->op;
14667ec681f3Smrg
14677ec681f3Smrg   write_dest(ctx, &tex->dest, header, tex->instr.type);
14687ec681f3Smrg
146901e04c3fSmrg   blob_write_uint32(ctx->blob, tex->texture_index);
147001e04c3fSmrg   blob_write_uint32(ctx->blob, tex->sampler_index);
14717ec681f3Smrg   if (tex->op == nir_texop_tg4)
14727ec681f3Smrg      blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
147301e04c3fSmrg
147401e04c3fSmrg   STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
147501e04c3fSmrg   union packed_tex_data packed = {
147601e04c3fSmrg      .u.sampler_dim = tex->sampler_dim,
147701e04c3fSmrg      .u.dest_type = tex->dest_type,
147801e04c3fSmrg      .u.coord_components = tex->coord_components,
147901e04c3fSmrg      .u.is_array = tex->is_array,
148001e04c3fSmrg      .u.is_shadow = tex->is_shadow,
148101e04c3fSmrg      .u.is_new_style_shadow = tex->is_new_style_shadow,
14827ec681f3Smrg      .u.is_sparse = tex->is_sparse,
148301e04c3fSmrg      .u.component = tex->component,
14847ec681f3Smrg      .u.texture_non_uniform = tex->texture_non_uniform,
14857ec681f3Smrg      .u.sampler_non_uniform = tex->sampler_non_uniform,
14867ec681f3Smrg      .u.array_is_lowered_cube = tex->array_is_lowered_cube,
148701e04c3fSmrg   };
148801e04c3fSmrg   blob_write_uint32(ctx->blob, packed.u32);
148901e04c3fSmrg
149001e04c3fSmrg   for (unsigned i = 0; i < tex->num_srcs; i++) {
14917ec681f3Smrg      union packed_src src;
14927ec681f3Smrg      src.u32 = 0;
14937ec681f3Smrg      src.tex.src_type = tex->src[i].src_type;
14947ec681f3Smrg      write_src_full(ctx, &tex->src[i].src, src);
149501e04c3fSmrg   }
149601e04c3fSmrg}
149701e04c3fSmrg
149801e04c3fSmrgstatic nir_tex_instr *
14997ec681f3Smrgread_tex(read_ctx *ctx, union packed_instr header)
150001e04c3fSmrg{
15017ec681f3Smrg   nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
15027ec681f3Smrg
15037ec681f3Smrg   read_dest(ctx, &tex->dest, &tex->instr, header);
150401e04c3fSmrg
15057ec681f3Smrg   tex->op = header.tex.op;
150601e04c3fSmrg   tex->texture_index = blob_read_uint32(ctx->blob);
150701e04c3fSmrg   tex->sampler_index = blob_read_uint32(ctx->blob);
15087ec681f3Smrg   if (tex->op == nir_texop_tg4)
15097ec681f3Smrg      blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
151001e04c3fSmrg
151101e04c3fSmrg   union packed_tex_data packed;
151201e04c3fSmrg   packed.u32 = blob_read_uint32(ctx->blob);
151301e04c3fSmrg   tex->sampler_dim = packed.u.sampler_dim;
151401e04c3fSmrg   tex->dest_type = packed.u.dest_type;
151501e04c3fSmrg   tex->coord_components = packed.u.coord_components;
151601e04c3fSmrg   tex->is_array = packed.u.is_array;
151701e04c3fSmrg   tex->is_shadow = packed.u.is_shadow;
151801e04c3fSmrg   tex->is_new_style_shadow = packed.u.is_new_style_shadow;
15197ec681f3Smrg   tex->is_sparse = packed.u.is_sparse;
152001e04c3fSmrg   tex->component = packed.u.component;
15217ec681f3Smrg   tex->texture_non_uniform = packed.u.texture_non_uniform;
15227ec681f3Smrg   tex->sampler_non_uniform = packed.u.sampler_non_uniform;
15237ec681f3Smrg   tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
152401e04c3fSmrg
152501e04c3fSmrg   for (unsigned i = 0; i < tex->num_srcs; i++) {
15267ec681f3Smrg      union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
15277ec681f3Smrg      tex->src[i].src_type = src.tex.src_type;
152801e04c3fSmrg   }
152901e04c3fSmrg
153001e04c3fSmrg   return tex;
153101e04c3fSmrg}
153201e04c3fSmrg
153301e04c3fSmrgstatic void
153401e04c3fSmrgwrite_phi(write_ctx *ctx, const nir_phi_instr *phi)
153501e04c3fSmrg{
15367ec681f3Smrg   union packed_instr header;
15377ec681f3Smrg   header.u32 = 0;
15387ec681f3Smrg
15397ec681f3Smrg   header.phi.instr_type = phi->instr.type;
15407ec681f3Smrg   header.phi.num_srcs = exec_list_length(&phi->srcs);
15417ec681f3Smrg
154201e04c3fSmrg   /* Phi nodes are special, since they may reference SSA definitions and
15437ec681f3Smrg    * basic blocks that don't exist yet. We leave two empty uint32_t's here,
154401e04c3fSmrg    * and then store enough information so that a later fixup pass can fill
154501e04c3fSmrg    * them in correctly.
154601e04c3fSmrg    */
15477ec681f3Smrg   write_dest(ctx, &phi->dest, header, phi->instr.type);
154801e04c3fSmrg
154901e04c3fSmrg   nir_foreach_phi_src(src, phi) {
155001e04c3fSmrg      assert(src->src.is_ssa);
15517ec681f3Smrg      size_t blob_offset = blob_reserve_uint32(ctx->blob);
15527ec681f3Smrg      ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
15537ec681f3Smrg      assert(blob_offset + sizeof(uint32_t) == blob_offset2);
155401e04c3fSmrg      write_phi_fixup fixup = {
155501e04c3fSmrg         .blob_offset = blob_offset,
155601e04c3fSmrg         .src = src->src.ssa,
155701e04c3fSmrg         .block = src->pred,
155801e04c3fSmrg      };
155901e04c3fSmrg      util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
156001e04c3fSmrg   }
156101e04c3fSmrg}
156201e04c3fSmrg
156301e04c3fSmrgstatic void
156401e04c3fSmrgwrite_fixup_phis(write_ctx *ctx)
156501e04c3fSmrg{
156601e04c3fSmrg   util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
15677ec681f3Smrg      uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
156801e04c3fSmrg      blob_ptr[0] = write_lookup_object(ctx, fixup->src);
156901e04c3fSmrg      blob_ptr[1] = write_lookup_object(ctx, fixup->block);
157001e04c3fSmrg   }
157101e04c3fSmrg
157201e04c3fSmrg   util_dynarray_clear(&ctx->phi_fixups);
157301e04c3fSmrg}
157401e04c3fSmrg
157501e04c3fSmrgstatic nir_phi_instr *
15767ec681f3Smrgread_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
157701e04c3fSmrg{
157801e04c3fSmrg   nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
157901e04c3fSmrg
15807ec681f3Smrg   read_dest(ctx, &phi->dest, &phi->instr, header);
158101e04c3fSmrg
158201e04c3fSmrg   /* For similar reasons as before, we just store the index directly into the
158301e04c3fSmrg    * pointer, and let a later pass resolve the phi sources.
158401e04c3fSmrg    *
158501e04c3fSmrg    * In order to ensure that the copied sources (which are just the indices
158601e04c3fSmrg    * from the blob for now) don't get inserted into the old shader's use-def
158701e04c3fSmrg    * lists, we have to add the phi instruction *before* we set up its
158801e04c3fSmrg    * sources.
158901e04c3fSmrg    */
159001e04c3fSmrg   nir_instr_insert_after_block(blk, &phi->instr);
159101e04c3fSmrg
15927ec681f3Smrg   for (unsigned i = 0; i < header.phi.num_srcs; i++) {
15937ec681f3Smrg      nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
15947ec681f3Smrg      nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
15957ec681f3Smrg      nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
159601e04c3fSmrg
159701e04c3fSmrg      /* Since we're not letting nir_insert_instr handle use/def stuff for us,
159801e04c3fSmrg       * we have to set the parent_instr manually.  It doesn't really matter
159901e04c3fSmrg       * when we do it, so we might as well do it here.
160001e04c3fSmrg       */
160101e04c3fSmrg      src->src.parent_instr = &phi->instr;
160201e04c3fSmrg
160301e04c3fSmrg      /* Stash it in the list of phi sources.  We'll walk this list and fix up
160401e04c3fSmrg       * sources at the very end of read_function_impl.
160501e04c3fSmrg       */
160601e04c3fSmrg      list_add(&src->src.use_link, &ctx->phi_srcs);
160701e04c3fSmrg   }
160801e04c3fSmrg
160901e04c3fSmrg   return phi;
161001e04c3fSmrg}
161101e04c3fSmrg
161201e04c3fSmrgstatic void
161301e04c3fSmrgread_fixup_phis(read_ctx *ctx)
161401e04c3fSmrg{
161501e04c3fSmrg   list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
161601e04c3fSmrg      src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
161701e04c3fSmrg      src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
161801e04c3fSmrg
161901e04c3fSmrg      /* Remove from this list */
162001e04c3fSmrg      list_del(&src->src.use_link);
162101e04c3fSmrg
162201e04c3fSmrg      list_addtail(&src->src.use_link, &src->src.ssa->uses);
162301e04c3fSmrg   }
16247ec681f3Smrg   assert(list_is_empty(&ctx->phi_srcs));
162501e04c3fSmrg}
162601e04c3fSmrg
162701e04c3fSmrgstatic void
162801e04c3fSmrgwrite_jump(write_ctx *ctx, const nir_jump_instr *jmp)
162901e04c3fSmrg{
16307ec681f3Smrg   /* These aren't handled because they require special block linking */
16317ec681f3Smrg   assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
16327ec681f3Smrg
16337ec681f3Smrg   assert(jmp->type < 4);
16347ec681f3Smrg
16357ec681f3Smrg   union packed_instr header;
16367ec681f3Smrg   header.u32 = 0;
16377ec681f3Smrg
16387ec681f3Smrg   header.jump.instr_type = jmp->instr.type;
16397ec681f3Smrg   header.jump.type = jmp->type;
16407ec681f3Smrg
16417ec681f3Smrg   blob_write_uint32(ctx->blob, header.u32);
164201e04c3fSmrg}
164301e04c3fSmrg
164401e04c3fSmrgstatic nir_jump_instr *
16457ec681f3Smrgread_jump(read_ctx *ctx, union packed_instr header)
164601e04c3fSmrg{
16477ec681f3Smrg   /* These aren't handled because they require special block linking */
16487ec681f3Smrg   assert(header.jump.type != nir_jump_goto &&
16497ec681f3Smrg          header.jump.type != nir_jump_goto_if);
16507ec681f3Smrg
16517ec681f3Smrg   nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
165201e04c3fSmrg   return jmp;
165301e04c3fSmrg}
165401e04c3fSmrg
165501e04c3fSmrgstatic void
165601e04c3fSmrgwrite_call(write_ctx *ctx, const nir_call_instr *call)
165701e04c3fSmrg{
16587ec681f3Smrg   blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
165901e04c3fSmrg
166001e04c3fSmrg   for (unsigned i = 0; i < call->num_params; i++)
166101e04c3fSmrg      write_src(ctx, &call->params[i]);
166201e04c3fSmrg}
166301e04c3fSmrg
166401e04c3fSmrgstatic nir_call_instr *
166501e04c3fSmrgread_call(read_ctx *ctx)
166601e04c3fSmrg{
166701e04c3fSmrg   nir_function *callee = read_object(ctx);
166801e04c3fSmrg   nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
166901e04c3fSmrg
167001e04c3fSmrg   for (unsigned i = 0; i < call->num_params; i++)
167101e04c3fSmrg      read_src(ctx, &call->params[i], call);
167201e04c3fSmrg
167301e04c3fSmrg   return call;
167401e04c3fSmrg}
167501e04c3fSmrg
167601e04c3fSmrgstatic void
167701e04c3fSmrgwrite_instr(write_ctx *ctx, const nir_instr *instr)
167801e04c3fSmrg{
16797ec681f3Smrg   /* We have only 4 bits for the instruction type. */
16807ec681f3Smrg   assert(instr->type < 16);
16817ec681f3Smrg
168201e04c3fSmrg   switch (instr->type) {
168301e04c3fSmrg   case nir_instr_type_alu:
168401e04c3fSmrg      write_alu(ctx, nir_instr_as_alu(instr));
168501e04c3fSmrg      break;
168601e04c3fSmrg   case nir_instr_type_deref:
168701e04c3fSmrg      write_deref(ctx, nir_instr_as_deref(instr));
168801e04c3fSmrg      break;
168901e04c3fSmrg   case nir_instr_type_intrinsic:
169001e04c3fSmrg      write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
169101e04c3fSmrg      break;
169201e04c3fSmrg   case nir_instr_type_load_const:
169301e04c3fSmrg      write_load_const(ctx, nir_instr_as_load_const(instr));
169401e04c3fSmrg      break;
169501e04c3fSmrg   case nir_instr_type_ssa_undef:
169601e04c3fSmrg      write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
169701e04c3fSmrg      break;
169801e04c3fSmrg   case nir_instr_type_tex:
169901e04c3fSmrg      write_tex(ctx, nir_instr_as_tex(instr));
170001e04c3fSmrg      break;
170101e04c3fSmrg   case nir_instr_type_phi:
170201e04c3fSmrg      write_phi(ctx, nir_instr_as_phi(instr));
170301e04c3fSmrg      break;
170401e04c3fSmrg   case nir_instr_type_jump:
170501e04c3fSmrg      write_jump(ctx, nir_instr_as_jump(instr));
170601e04c3fSmrg      break;
170701e04c3fSmrg   case nir_instr_type_call:
17087ec681f3Smrg      blob_write_uint32(ctx->blob, instr->type);
170901e04c3fSmrg      write_call(ctx, nir_instr_as_call(instr));
171001e04c3fSmrg      break;
171101e04c3fSmrg   case nir_instr_type_parallel_copy:
171201e04c3fSmrg      unreachable("Cannot write parallel copies");
171301e04c3fSmrg   default:
171401e04c3fSmrg      unreachable("bad instr type");
171501e04c3fSmrg   }
171601e04c3fSmrg}
171701e04c3fSmrg
17187ec681f3Smrg/* Return the number of instructions read. */
17197ec681f3Smrgstatic unsigned
172001e04c3fSmrgread_instr(read_ctx *ctx, nir_block *block)
172101e04c3fSmrg{
17227ec681f3Smrg   STATIC_ASSERT(sizeof(union packed_instr) == 4);
17237ec681f3Smrg   union packed_instr header;
17247ec681f3Smrg   header.u32 = blob_read_uint32(ctx->blob);
172501e04c3fSmrg   nir_instr *instr;
17267ec681f3Smrg
17277ec681f3Smrg   switch (header.any.instr_type) {
172801e04c3fSmrg   case nir_instr_type_alu:
17297ec681f3Smrg      for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
17307ec681f3Smrg         nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
17317ec681f3Smrg      return header.alu.num_followup_alu_sharing_header + 1;
173201e04c3fSmrg   case nir_instr_type_deref:
17337ec681f3Smrg      instr = &read_deref(ctx, header)->instr;
173401e04c3fSmrg      break;
173501e04c3fSmrg   case nir_instr_type_intrinsic:
17367ec681f3Smrg      instr = &read_intrinsic(ctx, header)->instr;
173701e04c3fSmrg      break;
173801e04c3fSmrg   case nir_instr_type_load_const:
17397ec681f3Smrg      instr = &read_load_const(ctx, header)->instr;
174001e04c3fSmrg      break;
174101e04c3fSmrg   case nir_instr_type_ssa_undef:
17427ec681f3Smrg      instr = &read_ssa_undef(ctx, header)->instr;
174301e04c3fSmrg      break;
174401e04c3fSmrg   case nir_instr_type_tex:
17457ec681f3Smrg      instr = &read_tex(ctx, header)->instr;
174601e04c3fSmrg      break;
174701e04c3fSmrg   case nir_instr_type_phi:
174801e04c3fSmrg      /* Phi instructions are a bit of a special case when reading because we
174901e04c3fSmrg       * don't want inserting the instruction to automatically handle use/defs
175001e04c3fSmrg       * for us.  Instead, we need to wait until all the blocks/instructions
175101e04c3fSmrg       * are read so that we can set their sources up.
175201e04c3fSmrg       */
17537ec681f3Smrg      read_phi(ctx, block, header);
17547ec681f3Smrg      return 1;
175501e04c3fSmrg   case nir_instr_type_jump:
17567ec681f3Smrg      instr = &read_jump(ctx, header)->instr;
175701e04c3fSmrg      break;
175801e04c3fSmrg   case nir_instr_type_call:
175901e04c3fSmrg      instr = &read_call(ctx)->instr;
176001e04c3fSmrg      break;
176101e04c3fSmrg   case nir_instr_type_parallel_copy:
176201e04c3fSmrg      unreachable("Cannot read parallel copies");
176301e04c3fSmrg   default:
176401e04c3fSmrg      unreachable("bad instr type");
176501e04c3fSmrg   }
176601e04c3fSmrg
176701e04c3fSmrg   nir_instr_insert_after_block(block, instr);
17687ec681f3Smrg   return 1;
176901e04c3fSmrg}
177001e04c3fSmrg
177101e04c3fSmrgstatic void
177201e04c3fSmrgwrite_block(write_ctx *ctx, const nir_block *block)
177301e04c3fSmrg{
177401e04c3fSmrg   write_add_object(ctx, block);
177501e04c3fSmrg   blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
17767ec681f3Smrg
17777ec681f3Smrg   ctx->last_instr_type = ~0;
17787ec681f3Smrg   ctx->last_alu_header_offset = 0;
17797ec681f3Smrg
17807ec681f3Smrg   nir_foreach_instr(instr, block) {
178101e04c3fSmrg      write_instr(ctx, instr);
17827ec681f3Smrg      ctx->last_instr_type = instr->type;
17837ec681f3Smrg   }
178401e04c3fSmrg}
178501e04c3fSmrg
178601e04c3fSmrgstatic void
178701e04c3fSmrgread_block(read_ctx *ctx, struct exec_list *cf_list)
178801e04c3fSmrg{
178901e04c3fSmrg   /* Don't actually create a new block.  Just use the one from the tail of
179001e04c3fSmrg    * the list.  NIR guarantees that the tail of the list is a block and that
179101e04c3fSmrg    * no two blocks are side-by-side in the IR;  It should be empty.
179201e04c3fSmrg    */
179301e04c3fSmrg   nir_block *block =
179401e04c3fSmrg      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
179501e04c3fSmrg
179601e04c3fSmrg   read_add_object(ctx, block);
179701e04c3fSmrg   unsigned num_instrs = blob_read_uint32(ctx->blob);
17987ec681f3Smrg   for (unsigned i = 0; i < num_instrs;) {
17997ec681f3Smrg      i += read_instr(ctx, block);
180001e04c3fSmrg   }
180101e04c3fSmrg}
180201e04c3fSmrg
180301e04c3fSmrgstatic void
180401e04c3fSmrgwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
180501e04c3fSmrg
180601e04c3fSmrgstatic void
180701e04c3fSmrgread_cf_list(read_ctx *ctx, struct exec_list *cf_list);
180801e04c3fSmrg
180901e04c3fSmrgstatic void
181001e04c3fSmrgwrite_if(write_ctx *ctx, nir_if *nif)
181101e04c3fSmrg{
181201e04c3fSmrg   write_src(ctx, &nif->condition);
18137ec681f3Smrg   blob_write_uint8(ctx->blob, nif->control);
181401e04c3fSmrg
181501e04c3fSmrg   write_cf_list(ctx, &nif->then_list);
181601e04c3fSmrg   write_cf_list(ctx, &nif->else_list);
181701e04c3fSmrg}
181801e04c3fSmrg
181901e04c3fSmrgstatic void
182001e04c3fSmrgread_if(read_ctx *ctx, struct exec_list *cf_list)
182101e04c3fSmrg{
182201e04c3fSmrg   nir_if *nif = nir_if_create(ctx->nir);
182301e04c3fSmrg
182401e04c3fSmrg   read_src(ctx, &nif->condition, nif);
18257ec681f3Smrg   nif->control = blob_read_uint8(ctx->blob);
182601e04c3fSmrg
182701e04c3fSmrg   nir_cf_node_insert_end(cf_list, &nif->cf_node);
182801e04c3fSmrg
182901e04c3fSmrg   read_cf_list(ctx, &nif->then_list);
183001e04c3fSmrg   read_cf_list(ctx, &nif->else_list);
183101e04c3fSmrg}
183201e04c3fSmrg
183301e04c3fSmrgstatic void
183401e04c3fSmrgwrite_loop(write_ctx *ctx, nir_loop *loop)
183501e04c3fSmrg{
18367ec681f3Smrg   blob_write_uint8(ctx->blob, loop->control);
183701e04c3fSmrg   write_cf_list(ctx, &loop->body);
183801e04c3fSmrg}
183901e04c3fSmrg
184001e04c3fSmrgstatic void
184101e04c3fSmrgread_loop(read_ctx *ctx, struct exec_list *cf_list)
184201e04c3fSmrg{
184301e04c3fSmrg   nir_loop *loop = nir_loop_create(ctx->nir);
184401e04c3fSmrg
184501e04c3fSmrg   nir_cf_node_insert_end(cf_list, &loop->cf_node);
184601e04c3fSmrg
18477ec681f3Smrg   loop->control = blob_read_uint8(ctx->blob);
184801e04c3fSmrg   read_cf_list(ctx, &loop->body);
184901e04c3fSmrg}
185001e04c3fSmrg
185101e04c3fSmrgstatic void
185201e04c3fSmrgwrite_cf_node(write_ctx *ctx, nir_cf_node *cf)
185301e04c3fSmrg{
185401e04c3fSmrg   blob_write_uint32(ctx->blob, cf->type);
185501e04c3fSmrg
185601e04c3fSmrg   switch (cf->type) {
185701e04c3fSmrg   case nir_cf_node_block:
185801e04c3fSmrg      write_block(ctx, nir_cf_node_as_block(cf));
185901e04c3fSmrg      break;
186001e04c3fSmrg   case nir_cf_node_if:
186101e04c3fSmrg      write_if(ctx, nir_cf_node_as_if(cf));
186201e04c3fSmrg      break;
186301e04c3fSmrg   case nir_cf_node_loop:
186401e04c3fSmrg      write_loop(ctx, nir_cf_node_as_loop(cf));
186501e04c3fSmrg      break;
186601e04c3fSmrg   default:
186701e04c3fSmrg      unreachable("bad cf type");
186801e04c3fSmrg   }
186901e04c3fSmrg}
187001e04c3fSmrg
187101e04c3fSmrgstatic void
187201e04c3fSmrgread_cf_node(read_ctx *ctx, struct exec_list *list)
187301e04c3fSmrg{
187401e04c3fSmrg   nir_cf_node_type type = blob_read_uint32(ctx->blob);
187501e04c3fSmrg
187601e04c3fSmrg   switch (type) {
187701e04c3fSmrg   case nir_cf_node_block:
187801e04c3fSmrg      read_block(ctx, list);
187901e04c3fSmrg      break;
188001e04c3fSmrg   case nir_cf_node_if:
188101e04c3fSmrg      read_if(ctx, list);
188201e04c3fSmrg      break;
188301e04c3fSmrg   case nir_cf_node_loop:
188401e04c3fSmrg      read_loop(ctx, list);
188501e04c3fSmrg      break;
188601e04c3fSmrg   default:
188701e04c3fSmrg      unreachable("bad cf type");
188801e04c3fSmrg   }
188901e04c3fSmrg}
189001e04c3fSmrg
189101e04c3fSmrgstatic void
189201e04c3fSmrgwrite_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
189301e04c3fSmrg{
189401e04c3fSmrg   blob_write_uint32(ctx->blob, exec_list_length(cf_list));
189501e04c3fSmrg   foreach_list_typed(nir_cf_node, cf, node, cf_list) {
189601e04c3fSmrg      write_cf_node(ctx, cf);
189701e04c3fSmrg   }
189801e04c3fSmrg}
189901e04c3fSmrg
190001e04c3fSmrgstatic void
190101e04c3fSmrgread_cf_list(read_ctx *ctx, struct exec_list *cf_list)
190201e04c3fSmrg{
190301e04c3fSmrg   uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
190401e04c3fSmrg   for (unsigned i = 0; i < num_cf_nodes; i++)
190501e04c3fSmrg      read_cf_node(ctx, cf_list);
190601e04c3fSmrg}
190701e04c3fSmrg
190801e04c3fSmrgstatic void
190901e04c3fSmrgwrite_function_impl(write_ctx *ctx, const nir_function_impl *fi)
191001e04c3fSmrg{
19117ec681f3Smrg   blob_write_uint8(ctx->blob, fi->structured);
19127ec681f3Smrg
191301e04c3fSmrg   write_var_list(ctx, &fi->locals);
191401e04c3fSmrg   write_reg_list(ctx, &fi->registers);
191501e04c3fSmrg   blob_write_uint32(ctx->blob, fi->reg_alloc);
191601e04c3fSmrg
191701e04c3fSmrg   write_cf_list(ctx, &fi->body);
191801e04c3fSmrg   write_fixup_phis(ctx);
191901e04c3fSmrg}
192001e04c3fSmrg
192101e04c3fSmrgstatic nir_function_impl *
192201e04c3fSmrgread_function_impl(read_ctx *ctx, nir_function *fxn)
192301e04c3fSmrg{
192401e04c3fSmrg   nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
192501e04c3fSmrg   fi->function = fxn;
192601e04c3fSmrg
19277ec681f3Smrg   fi->structured = blob_read_uint8(ctx->blob);
19287ec681f3Smrg
192901e04c3fSmrg   read_var_list(ctx, &fi->locals);
193001e04c3fSmrg   read_reg_list(ctx, &fi->registers);
193101e04c3fSmrg   fi->reg_alloc = blob_read_uint32(ctx->blob);
193201e04c3fSmrg
193301e04c3fSmrg   read_cf_list(ctx, &fi->body);
193401e04c3fSmrg   read_fixup_phis(ctx);
193501e04c3fSmrg
193601e04c3fSmrg   fi->valid_metadata = 0;
193701e04c3fSmrg
193801e04c3fSmrg   return fi;
193901e04c3fSmrg}
194001e04c3fSmrg
194101e04c3fSmrgstatic void
194201e04c3fSmrgwrite_function(write_ctx *ctx, const nir_function *fxn)
194301e04c3fSmrg{
19447ec681f3Smrg   uint32_t flags = fxn->is_entrypoint;
19457ec681f3Smrg   if (fxn->name)
19467ec681f3Smrg      flags |= 0x2;
19477ec681f3Smrg   if (fxn->impl)
19487ec681f3Smrg      flags |= 0x4;
19497ec681f3Smrg   blob_write_uint32(ctx->blob, flags);
195001e04c3fSmrg   if (fxn->name)
195101e04c3fSmrg      blob_write_string(ctx->blob, fxn->name);
195201e04c3fSmrg
195301e04c3fSmrg   write_add_object(ctx, fxn);
195401e04c3fSmrg
195501e04c3fSmrg   blob_write_uint32(ctx->blob, fxn->num_params);
195601e04c3fSmrg   for (unsigned i = 0; i < fxn->num_params; i++) {
195701e04c3fSmrg      uint32_t val =
195801e04c3fSmrg         ((uint32_t)fxn->params[i].num_components) |
195901e04c3fSmrg         ((uint32_t)fxn->params[i].bit_size) << 8;
196001e04c3fSmrg      blob_write_uint32(ctx->blob, val);
196101e04c3fSmrg   }
196201e04c3fSmrg
196301e04c3fSmrg   /* At first glance, it looks like we should write the function_impl here.
196401e04c3fSmrg    * However, call instructions need to be able to reference at least the
196501e04c3fSmrg    * function and those will get processed as we write the function_impls.
196601e04c3fSmrg    * We stop here and write function_impls as a second pass.
196701e04c3fSmrg    */
196801e04c3fSmrg}
196901e04c3fSmrg
197001e04c3fSmrgstatic void
197101e04c3fSmrgread_function(read_ctx *ctx)
197201e04c3fSmrg{
19737ec681f3Smrg   uint32_t flags = blob_read_uint32(ctx->blob);
19747ec681f3Smrg   bool has_name = flags & 0x2;
197501e04c3fSmrg   char *name = has_name ? blob_read_string(ctx->blob) : NULL;
197601e04c3fSmrg
197701e04c3fSmrg   nir_function *fxn = nir_function_create(ctx->nir, name);
197801e04c3fSmrg
197901e04c3fSmrg   read_add_object(ctx, fxn);
198001e04c3fSmrg
198101e04c3fSmrg   fxn->num_params = blob_read_uint32(ctx->blob);
198201e04c3fSmrg   fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
198301e04c3fSmrg   for (unsigned i = 0; i < fxn->num_params; i++) {
198401e04c3fSmrg      uint32_t val = blob_read_uint32(ctx->blob);
198501e04c3fSmrg      fxn->params[i].num_components = val & 0xff;
198601e04c3fSmrg      fxn->params[i].bit_size = (val >> 8) & 0xff;
198701e04c3fSmrg   }
19887e102996Smaya
19897ec681f3Smrg   fxn->is_entrypoint = flags & 0x1;
19907ec681f3Smrg   if (flags & 0x4)
19917ec681f3Smrg      fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
199201e04c3fSmrg}
199301e04c3fSmrg
19947ec681f3Smrg/**
19957ec681f3Smrg * Serialize NIR into a binary blob.
19967ec681f3Smrg *
19977ec681f3Smrg * \param strip  Don't serialize information only useful for debugging,
19987ec681f3Smrg *               such as variable names, making cache hits from similar
19997ec681f3Smrg *               shaders more likely.
20007ec681f3Smrg */
200101e04c3fSmrgvoid
20027ec681f3Smrgnir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
200301e04c3fSmrg{
20047ec681f3Smrg   write_ctx ctx = {0};
20057e102996Smaya   ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
200601e04c3fSmrg   ctx.blob = blob;
200701e04c3fSmrg   ctx.nir = nir;
20087ec681f3Smrg   ctx.strip = strip;
200901e04c3fSmrg   util_dynarray_init(&ctx.phi_fixups, NULL);
201001e04c3fSmrg
20117ec681f3Smrg   size_t idx_size_offset = blob_reserve_uint32(blob);
201201e04c3fSmrg
201301e04c3fSmrg   struct shader_info info = nir->info;
201401e04c3fSmrg   uint32_t strings = 0;
20157ec681f3Smrg   if (!strip && info.name)
201601e04c3fSmrg      strings |= 0x1;
20177ec681f3Smrg   if (!strip && info.label)
201801e04c3fSmrg      strings |= 0x2;
201901e04c3fSmrg   blob_write_uint32(blob, strings);
20207ec681f3Smrg   if (!strip && info.name)
202101e04c3fSmrg      blob_write_string(blob, info.name);
20227ec681f3Smrg   if (!strip && info.label)
202301e04c3fSmrg      blob_write_string(blob, info.label);
202401e04c3fSmrg   info.name = info.label = NULL;
202501e04c3fSmrg   blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
202601e04c3fSmrg
20277ec681f3Smrg   write_var_list(&ctx, &nir->variables);
202801e04c3fSmrg
202901e04c3fSmrg   blob_write_uint32(blob, nir->num_inputs);
203001e04c3fSmrg   blob_write_uint32(blob, nir->num_uniforms);
203101e04c3fSmrg   blob_write_uint32(blob, nir->num_outputs);
20327e102996Smaya   blob_write_uint32(blob, nir->scratch_size);
203301e04c3fSmrg
203401e04c3fSmrg   blob_write_uint32(blob, exec_list_length(&nir->functions));
203501e04c3fSmrg   nir_foreach_function(fxn, nir) {
203601e04c3fSmrg      write_function(&ctx, fxn);
203701e04c3fSmrg   }
203801e04c3fSmrg
203901e04c3fSmrg   nir_foreach_function(fxn, nir) {
20407ec681f3Smrg      if (fxn->impl)
20417ec681f3Smrg         write_function_impl(&ctx, fxn->impl);
204201e04c3fSmrg   }
204301e04c3fSmrg
204401e04c3fSmrg   blob_write_uint32(blob, nir->constant_data_size);
204501e04c3fSmrg   if (nir->constant_data_size > 0)
204601e04c3fSmrg      blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
204701e04c3fSmrg
20487ec681f3Smrg   *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
204901e04c3fSmrg
205001e04c3fSmrg   _mesa_hash_table_destroy(ctx.remap_table, NULL);
205101e04c3fSmrg   util_dynarray_fini(&ctx.phi_fixups);
205201e04c3fSmrg}
205301e04c3fSmrg
205401e04c3fSmrgnir_shader *
205501e04c3fSmrgnir_deserialize(void *mem_ctx,
205601e04c3fSmrg                const struct nir_shader_compiler_options *options,
205701e04c3fSmrg                struct blob_reader *blob)
205801e04c3fSmrg{
20597ec681f3Smrg   read_ctx ctx = {0};
206001e04c3fSmrg   ctx.blob = blob;
206101e04c3fSmrg   list_inithead(&ctx.phi_srcs);
20627ec681f3Smrg   ctx.idx_table_len = blob_read_uint32(blob);
206301e04c3fSmrg   ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
206401e04c3fSmrg
206501e04c3fSmrg   uint32_t strings = blob_read_uint32(blob);
206601e04c3fSmrg   char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
206701e04c3fSmrg   char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
206801e04c3fSmrg
206901e04c3fSmrg   struct shader_info info;
207001e04c3fSmrg   blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
207101e04c3fSmrg
207201e04c3fSmrg   ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
207301e04c3fSmrg
207401e04c3fSmrg   info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
207501e04c3fSmrg   info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
207601e04c3fSmrg
207701e04c3fSmrg   ctx.nir->info = info;
207801e04c3fSmrg
20797ec681f3Smrg   read_var_list(&ctx, &ctx.nir->variables);
208001e04c3fSmrg
208101e04c3fSmrg   ctx.nir->num_inputs = blob_read_uint32(blob);
208201e04c3fSmrg   ctx.nir->num_uniforms = blob_read_uint32(blob);
208301e04c3fSmrg   ctx.nir->num_outputs = blob_read_uint32(blob);
20847e102996Smaya   ctx.nir->scratch_size = blob_read_uint32(blob);
208501e04c3fSmrg
208601e04c3fSmrg   unsigned num_functions = blob_read_uint32(blob);
208701e04c3fSmrg   for (unsigned i = 0; i < num_functions; i++)
208801e04c3fSmrg      read_function(&ctx);
208901e04c3fSmrg
20907ec681f3Smrg   nir_foreach_function(fxn, ctx.nir) {
20917ec681f3Smrg      if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
20927ec681f3Smrg         fxn->impl = read_function_impl(&ctx, fxn);
20937ec681f3Smrg   }
209401e04c3fSmrg
209501e04c3fSmrg   ctx.nir->constant_data_size = blob_read_uint32(blob);
209601e04c3fSmrg   if (ctx.nir->constant_data_size > 0) {
209701e04c3fSmrg      ctx.nir->constant_data =
209801e04c3fSmrg         ralloc_size(ctx.nir, ctx.nir->constant_data_size);
209901e04c3fSmrg      blob_copy_bytes(blob, ctx.nir->constant_data,
210001e04c3fSmrg                      ctx.nir->constant_data_size);
210101e04c3fSmrg   }
210201e04c3fSmrg
210301e04c3fSmrg   free(ctx.idx_table);
210401e04c3fSmrg
21057ec681f3Smrg   nir_validate_shader(ctx.nir, "after deserialize");
21067ec681f3Smrg
210701e04c3fSmrg   return ctx.nir;
210801e04c3fSmrg}
210901e04c3fSmrg
21107ec681f3Smrgvoid
21117ec681f3Smrgnir_shader_serialize_deserialize(nir_shader *shader)
211201e04c3fSmrg{
21137ec681f3Smrg   const struct nir_shader_compiler_options *options = shader->options;
211401e04c3fSmrg
211501e04c3fSmrg   struct blob writer;
211601e04c3fSmrg   blob_init(&writer);
21177ec681f3Smrg   nir_serialize(&writer, shader, false);
21187ec681f3Smrg
21197ec681f3Smrg   /* Delete all of dest's ralloc children but leave dest alone */
21207ec681f3Smrg   void *dead_ctx = ralloc_context(NULL);
21217ec681f3Smrg   ralloc_adopt(dead_ctx, shader);
21227ec681f3Smrg   ralloc_free(dead_ctx);
21237ec681f3Smrg
21247ec681f3Smrg   dead_ctx = ralloc_context(NULL);
212501e04c3fSmrg
212601e04c3fSmrg   struct blob_reader reader;
212701e04c3fSmrg   blob_reader_init(&reader, writer.data, writer.size);
21287ec681f3Smrg   nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
212901e04c3fSmrg
213001e04c3fSmrg   blob_finish(&writer);
213101e04c3fSmrg
21327ec681f3Smrg   nir_shader_replace(shader, copy);
21337ec681f3Smrg   ralloc_free(dead_ctx);
213401e04c3fSmrg}
2135