nir_serialize.c revision 7ec681f3
1/*
2 * Copyright © 2017 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir_serialize.h"
25#include "nir_control_flow.h"
26#include "util/u_dynarray.h"
27#include "util/u_math.h"
28
29#define NIR_SERIALIZE_FUNC_HAS_IMPL ((void *)(intptr_t)1)
30#define MAX_OBJECT_IDS (1 << 20)
31
32typedef struct {
33   size_t blob_offset;
34   nir_ssa_def *src;
35   nir_block *block;
36} write_phi_fixup;
37
38typedef struct {
39   const nir_shader *nir;
40
41   struct blob *blob;
42
43   /* maps pointer to index */
44   struct hash_table *remap_table;
45
46   /* the next index to assign to a NIR in-memory object */
47   uint32_t next_idx;
48
49   /* Array of write_phi_fixup structs representing phi sources that need to
50    * be resolved in the second pass.
51    */
52   struct util_dynarray phi_fixups;
53
54   /* The last serialized type. */
55   const struct glsl_type *last_type;
56   const struct glsl_type *last_interface_type;
57   struct nir_variable_data last_var_data;
58
59   /* For skipping equal ALU headers (typical after scalarization). */
60   nir_instr_type last_instr_type;
61   uintptr_t last_alu_header_offset;
62
63   /* Don't write optional data such as variable names. */
64   bool strip;
65} write_ctx;
66
67typedef struct {
68   nir_shader *nir;
69
70   struct blob_reader *blob;
71
72   /* the next index to assign to a NIR in-memory object */
73   uint32_t next_idx;
74
75   /* The length of the index -> object table */
76   uint32_t idx_table_len;
77
78   /* map from index to deserialized pointer */
79   void **idx_table;
80
81   /* List of phi sources. */
82   struct list_head phi_srcs;
83
84   /* The last deserialized type. */
85   const struct glsl_type *last_type;
86   const struct glsl_type *last_interface_type;
87   struct nir_variable_data last_var_data;
88} read_ctx;
89
90static void
91write_add_object(write_ctx *ctx, const void *obj)
92{
93   uint32_t index = ctx->next_idx++;
94   assert(index != MAX_OBJECT_IDS);
95   _mesa_hash_table_insert(ctx->remap_table, obj, (void *)(uintptr_t) index);
96}
97
98static uint32_t
99write_lookup_object(write_ctx *ctx, const void *obj)
100{
101   struct hash_entry *entry = _mesa_hash_table_search(ctx->remap_table, obj);
102   assert(entry);
103   return (uint32_t)(uintptr_t) entry->data;
104}
105
106static void
107read_add_object(read_ctx *ctx, void *obj)
108{
109   assert(ctx->next_idx < ctx->idx_table_len);
110   ctx->idx_table[ctx->next_idx++] = obj;
111}
112
113static void *
114read_lookup_object(read_ctx *ctx, uint32_t idx)
115{
116   assert(idx < ctx->idx_table_len);
117   return ctx->idx_table[idx];
118}
119
120static void *
121read_object(read_ctx *ctx)
122{
123   return read_lookup_object(ctx, blob_read_uint32(ctx->blob));
124}
125
126static uint32_t
127encode_bit_size_3bits(uint8_t bit_size)
128{
129   /* Encode values of 0, 1, 2, 4, 8, 16, 32, 64 in 3 bits. */
130   assert(bit_size <= 64 && util_is_power_of_two_or_zero(bit_size));
131   if (bit_size)
132      return util_logbase2(bit_size) + 1;
133   return 0;
134}
135
136static uint8_t
137decode_bit_size_3bits(uint8_t bit_size)
138{
139   if (bit_size)
140      return 1 << (bit_size - 1);
141   return 0;
142}
143
144#define NUM_COMPONENTS_IS_SEPARATE_7   7
145
146static uint8_t
147encode_num_components_in_3bits(uint8_t num_components)
148{
149   if (num_components <= 4)
150      return num_components;
151   if (num_components == 8)
152      return 5;
153   if (num_components == 16)
154      return 6;
155
156   /* special value indicating that num_components is in the next uint32 */
157   return NUM_COMPONENTS_IS_SEPARATE_7;
158}
159
160static uint8_t
161decode_num_components_in_3bits(uint8_t value)
162{
163   if (value <= 4)
164      return value;
165   if (value == 5)
166      return 8;
167   if (value == 6)
168      return 16;
169
170   unreachable("invalid num_components encoding");
171   return 0;
172}
173
174static void
175write_constant(write_ctx *ctx, const nir_constant *c)
176{
177   blob_write_bytes(ctx->blob, c->values, sizeof(c->values));
178   blob_write_uint32(ctx->blob, c->num_elements);
179   for (unsigned i = 0; i < c->num_elements; i++)
180      write_constant(ctx, c->elements[i]);
181}
182
183static nir_constant *
184read_constant(read_ctx *ctx, nir_variable *nvar)
185{
186   nir_constant *c = ralloc(nvar, nir_constant);
187
188   blob_copy_bytes(ctx->blob, (uint8_t *)c->values, sizeof(c->values));
189   c->num_elements = blob_read_uint32(ctx->blob);
190   c->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
191   for (unsigned i = 0; i < c->num_elements; i++)
192      c->elements[i] = read_constant(ctx, nvar);
193
194   return c;
195}
196
197enum var_data_encoding {
198   var_encode_full,
199   var_encode_shader_temp,
200   var_encode_function_temp,
201   var_encode_location_diff,
202};
203
204union packed_var {
205   uint32_t u32;
206   struct {
207      unsigned has_name:1;
208      unsigned has_constant_initializer:1;
209      unsigned has_pointer_initializer:1;
210      unsigned has_interface_type:1;
211      unsigned num_state_slots:7;
212      unsigned data_encoding:2;
213      unsigned type_same_as_last:1;
214      unsigned interface_type_same_as_last:1;
215      unsigned _pad:1;
216      unsigned num_members:16;
217   } u;
218};
219
220union packed_var_data_diff {
221   uint32_t u32;
222   struct {
223      int location:13;
224      int location_frac:3;
225      int driver_location:16;
226   } u;
227};
228
229static void
230write_variable(write_ctx *ctx, const nir_variable *var)
231{
232   write_add_object(ctx, var);
233
234   assert(var->num_state_slots < (1 << 7));
235
236   STATIC_ASSERT(sizeof(union packed_var) == 4);
237   union packed_var flags;
238   flags.u32 = 0;
239
240   flags.u.has_name = !ctx->strip && var->name;
241   flags.u.has_constant_initializer = !!(var->constant_initializer);
242   flags.u.has_pointer_initializer = !!(var->pointer_initializer);
243   flags.u.has_interface_type = !!(var->interface_type);
244   flags.u.type_same_as_last = var->type == ctx->last_type;
245   flags.u.interface_type_same_as_last =
246      var->interface_type && var->interface_type == ctx->last_interface_type;
247   flags.u.num_state_slots = var->num_state_slots;
248   flags.u.num_members = var->num_members;
249
250   struct nir_variable_data data = var->data;
251
252   /* When stripping, we expect that the location is no longer needed,
253    * which is typically after shaders are linked.
254    */
255   if (ctx->strip &&
256       data.mode != nir_var_system_value &&
257       data.mode != nir_var_shader_in &&
258       data.mode != nir_var_shader_out)
259      data.location = 0;
260
261   /* Temporary variables don't serialize var->data. */
262   if (data.mode == nir_var_shader_temp)
263      flags.u.data_encoding = var_encode_shader_temp;
264   else if (data.mode == nir_var_function_temp)
265      flags.u.data_encoding = var_encode_function_temp;
266   else {
267      struct nir_variable_data tmp = data;
268
269      tmp.location = ctx->last_var_data.location;
270      tmp.location_frac = ctx->last_var_data.location_frac;
271      tmp.driver_location = ctx->last_var_data.driver_location;
272
273      /* See if we can encode only the difference in locations from the last
274       * variable.
275       */
276      if (memcmp(&ctx->last_var_data, &tmp, sizeof(tmp)) == 0 &&
277          abs((int)data.location -
278              (int)ctx->last_var_data.location) < (1 << 12) &&
279          abs((int)data.driver_location -
280              (int)ctx->last_var_data.driver_location) < (1 << 15))
281         flags.u.data_encoding = var_encode_location_diff;
282      else
283         flags.u.data_encoding = var_encode_full;
284   }
285
286   blob_write_uint32(ctx->blob, flags.u32);
287
288   if (!flags.u.type_same_as_last) {
289      encode_type_to_blob(ctx->blob, var->type);
290      ctx->last_type = var->type;
291   }
292
293   if (var->interface_type && !flags.u.interface_type_same_as_last) {
294      encode_type_to_blob(ctx->blob, var->interface_type);
295      ctx->last_interface_type = var->interface_type;
296   }
297
298   if (flags.u.has_name)
299      blob_write_string(ctx->blob, var->name);
300
301   if (flags.u.data_encoding == var_encode_full ||
302       flags.u.data_encoding == var_encode_location_diff) {
303      if (flags.u.data_encoding == var_encode_full) {
304         blob_write_bytes(ctx->blob, &data, sizeof(data));
305      } else {
306         /* Serialize only the difference in locations from the last variable.
307          */
308         union packed_var_data_diff diff;
309
310         diff.u.location = data.location - ctx->last_var_data.location;
311         diff.u.location_frac = data.location_frac -
312                                ctx->last_var_data.location_frac;
313         diff.u.driver_location = data.driver_location -
314                                  ctx->last_var_data.driver_location;
315
316         blob_write_uint32(ctx->blob, diff.u32);
317      }
318
319      ctx->last_var_data = data;
320   }
321
322   for (unsigned i = 0; i < var->num_state_slots; i++) {
323      blob_write_bytes(ctx->blob, &var->state_slots[i],
324                       sizeof(var->state_slots[i]));
325   }
326   if (var->constant_initializer)
327      write_constant(ctx, var->constant_initializer);
328   if (var->pointer_initializer)
329      write_lookup_object(ctx, var->pointer_initializer);
330   if (var->num_members > 0) {
331      blob_write_bytes(ctx->blob, (uint8_t *) var->members,
332                       var->num_members * sizeof(*var->members));
333   }
334}
335
336static nir_variable *
337read_variable(read_ctx *ctx)
338{
339   nir_variable *var = rzalloc(ctx->nir, nir_variable);
340   read_add_object(ctx, var);
341
342   union packed_var flags;
343   flags.u32 = blob_read_uint32(ctx->blob);
344
345   if (flags.u.type_same_as_last) {
346      var->type = ctx->last_type;
347   } else {
348      var->type = decode_type_from_blob(ctx->blob);
349      ctx->last_type = var->type;
350   }
351
352   if (flags.u.has_interface_type) {
353      if (flags.u.interface_type_same_as_last) {
354         var->interface_type = ctx->last_interface_type;
355      } else {
356         var->interface_type = decode_type_from_blob(ctx->blob);
357         ctx->last_interface_type = var->interface_type;
358      }
359   }
360
361   if (flags.u.has_name) {
362      const char *name = blob_read_string(ctx->blob);
363      var->name = ralloc_strdup(var, name);
364   } else {
365      var->name = NULL;
366   }
367
368   if (flags.u.data_encoding == var_encode_shader_temp)
369      var->data.mode = nir_var_shader_temp;
370   else if (flags.u.data_encoding == var_encode_function_temp)
371      var->data.mode = nir_var_function_temp;
372   else if (flags.u.data_encoding == var_encode_full) {
373      blob_copy_bytes(ctx->blob, (uint8_t *) &var->data, sizeof(var->data));
374      ctx->last_var_data = var->data;
375   } else { /* var_encode_location_diff */
376      union packed_var_data_diff diff;
377      diff.u32 = blob_read_uint32(ctx->blob);
378
379      var->data = ctx->last_var_data;
380      var->data.location += diff.u.location;
381      var->data.location_frac += diff.u.location_frac;
382      var->data.driver_location += diff.u.driver_location;
383
384      ctx->last_var_data = var->data;
385   }
386
387   var->num_state_slots = flags.u.num_state_slots;
388   if (var->num_state_slots != 0) {
389      var->state_slots = ralloc_array(var, nir_state_slot,
390                                      var->num_state_slots);
391      for (unsigned i = 0; i < var->num_state_slots; i++) {
392         blob_copy_bytes(ctx->blob, &var->state_slots[i],
393                         sizeof(var->state_slots[i]));
394      }
395   }
396   if (flags.u.has_constant_initializer)
397      var->constant_initializer = read_constant(ctx, var);
398   else
399      var->constant_initializer = NULL;
400
401   if (flags.u.has_pointer_initializer)
402      var->pointer_initializer = read_object(ctx);
403   else
404      var->pointer_initializer = NULL;
405
406   var->num_members = flags.u.num_members;
407   if (var->num_members > 0) {
408      var->members = ralloc_array(var, struct nir_variable_data,
409                                  var->num_members);
410      blob_copy_bytes(ctx->blob, (uint8_t *) var->members,
411                      var->num_members * sizeof(*var->members));
412   }
413
414   return var;
415}
416
417static void
418write_var_list(write_ctx *ctx, const struct exec_list *src)
419{
420   blob_write_uint32(ctx->blob, exec_list_length(src));
421   foreach_list_typed(nir_variable, var, node, src) {
422      write_variable(ctx, var);
423   }
424}
425
426static void
427read_var_list(read_ctx *ctx, struct exec_list *dst)
428{
429   exec_list_make_empty(dst);
430   unsigned num_vars = blob_read_uint32(ctx->blob);
431   for (unsigned i = 0; i < num_vars; i++) {
432      nir_variable *var = read_variable(ctx);
433      exec_list_push_tail(dst, &var->node);
434   }
435}
436
437static void
438write_register(write_ctx *ctx, const nir_register *reg)
439{
440   write_add_object(ctx, reg);
441   blob_write_uint32(ctx->blob, reg->num_components);
442   blob_write_uint32(ctx->blob, reg->bit_size);
443   blob_write_uint32(ctx->blob, reg->num_array_elems);
444   blob_write_uint32(ctx->blob, reg->index);
445}
446
447static nir_register *
448read_register(read_ctx *ctx)
449{
450   nir_register *reg = ralloc(ctx->nir, nir_register);
451   read_add_object(ctx, reg);
452   reg->num_components = blob_read_uint32(ctx->blob);
453   reg->bit_size = blob_read_uint32(ctx->blob);
454   reg->num_array_elems = blob_read_uint32(ctx->blob);
455   reg->index = blob_read_uint32(ctx->blob);
456
457   list_inithead(&reg->uses);
458   list_inithead(&reg->defs);
459   list_inithead(&reg->if_uses);
460
461   return reg;
462}
463
464static void
465write_reg_list(write_ctx *ctx, const struct exec_list *src)
466{
467   blob_write_uint32(ctx->blob, exec_list_length(src));
468   foreach_list_typed(nir_register, reg, node, src)
469      write_register(ctx, reg);
470}
471
472static void
473read_reg_list(read_ctx *ctx, struct exec_list *dst)
474{
475   exec_list_make_empty(dst);
476   unsigned num_regs = blob_read_uint32(ctx->blob);
477   for (unsigned i = 0; i < num_regs; i++) {
478      nir_register *reg = read_register(ctx);
479      exec_list_push_tail(dst, &reg->node);
480   }
481}
482
483union packed_src {
484   uint32_t u32;
485   struct {
486      unsigned is_ssa:1;   /* <-- Header */
487      unsigned is_indirect:1;
488      unsigned object_idx:20;
489      unsigned _footer:10; /* <-- Footer */
490   } any;
491   struct {
492      unsigned _header:22; /* <-- Header */
493      unsigned negate:1;   /* <-- Footer */
494      unsigned abs:1;
495      unsigned swizzle_x:2;
496      unsigned swizzle_y:2;
497      unsigned swizzle_z:2;
498      unsigned swizzle_w:2;
499   } alu;
500   struct {
501      unsigned _header:22; /* <-- Header */
502      unsigned src_type:5; /* <-- Footer */
503      unsigned _pad:5;
504   } tex;
505};
506
507static void
508write_src_full(write_ctx *ctx, const nir_src *src, union packed_src header)
509{
510   /* Since sources are very frequent, we try to save some space when storing
511    * them. In particular, we store whether the source is a register and
512    * whether the register has an indirect index in the low two bits. We can
513    * assume that the high two bits of the index are zero, since otherwise our
514    * address space would've been exhausted allocating the remap table!
515    */
516   header.any.is_ssa = src->is_ssa;
517   if (src->is_ssa) {
518      header.any.object_idx = write_lookup_object(ctx, src->ssa);
519      blob_write_uint32(ctx->blob, header.u32);
520   } else {
521      header.any.object_idx = write_lookup_object(ctx, src->reg.reg);
522      header.any.is_indirect = !!src->reg.indirect;
523      blob_write_uint32(ctx->blob, header.u32);
524      blob_write_uint32(ctx->blob, src->reg.base_offset);
525      if (src->reg.indirect) {
526         union packed_src header = {0};
527         write_src_full(ctx, src->reg.indirect, header);
528      }
529   }
530}
531
532static void
533write_src(write_ctx *ctx, const nir_src *src)
534{
535   union packed_src header = {0};
536   write_src_full(ctx, src, header);
537}
538
539static union packed_src
540read_src(read_ctx *ctx, nir_src *src, void *mem_ctx)
541{
542   STATIC_ASSERT(sizeof(union packed_src) == 4);
543   union packed_src header;
544   header.u32 = blob_read_uint32(ctx->blob);
545
546   src->is_ssa = header.any.is_ssa;
547   if (src->is_ssa) {
548      src->ssa = read_lookup_object(ctx, header.any.object_idx);
549   } else {
550      src->reg.reg = read_lookup_object(ctx, header.any.object_idx);
551      src->reg.base_offset = blob_read_uint32(ctx->blob);
552      if (header.any.is_indirect) {
553         src->reg.indirect = malloc(sizeof(nir_src));
554         read_src(ctx, src->reg.indirect, mem_ctx);
555      } else {
556         src->reg.indirect = NULL;
557      }
558   }
559   return header;
560}
561
562union packed_dest {
563   uint8_t u8;
564   struct {
565      uint8_t is_ssa:1;
566      uint8_t num_components:3;
567      uint8_t bit_size:3;
568      uint8_t _pad:1;
569   } ssa;
570   struct {
571      uint8_t is_ssa:1;
572      uint8_t is_indirect:1;
573      uint8_t _pad:6;
574   } reg;
575};
576
577enum intrinsic_const_indices_encoding {
578   /* Use the 9 bits of packed_const_indices to store 1-9 indices.
579    * 1 9-bit index, or 2 4-bit indices, or 3 3-bit indices, or
580    * 4 2-bit indices, or 5-9 1-bit indices.
581    *
582    * The common case for load_ubo is 0, 0, 0, which is trivially represented.
583    * The common cases for load_interpolated_input also fit here, e.g.: 7, 3
584    */
585   const_indices_9bit_all_combined,
586
587   const_indices_8bit,  /* 8 bits per element */
588   const_indices_16bit, /* 16 bits per element */
589   const_indices_32bit, /* 32 bits per element */
590};
591
592enum load_const_packing {
593   /* Constants are not packed and are stored in following dwords. */
594   load_const_full,
595
596   /* packed_value contains high 19 bits, low bits are 0,
597    * good for floating-point decimals
598    */
599   load_const_scalar_hi_19bits,
600
601   /* packed_value contains low 19 bits, high bits are sign-extended */
602   load_const_scalar_lo_19bits_sext,
603};
604
605union packed_instr {
606   uint32_t u32;
607   struct {
608      unsigned instr_type:4; /* always present */
609      unsigned _pad:20;
610      unsigned dest:8;       /* always last */
611   } any;
612   struct {
613      unsigned instr_type:4;
614      unsigned exact:1;
615      unsigned no_signed_wrap:1;
616      unsigned no_unsigned_wrap:1;
617      unsigned saturate:1;
618      /* Reg: writemask; SSA: swizzles for 2 srcs */
619      unsigned writemask_or_two_swizzles:4;
620      unsigned op:9;
621      unsigned packed_src_ssa_16bit:1;
622      /* Scalarized ALUs always have the same header. */
623      unsigned num_followup_alu_sharing_header:2;
624      unsigned dest:8;
625   } alu;
626   struct {
627      unsigned instr_type:4;
628      unsigned deref_type:3;
629      unsigned cast_type_same_as_last:1;
630      unsigned modes:14; /* deref_var redefines this */
631      unsigned packed_src_ssa_16bit:1; /* deref_var redefines this */
632      unsigned _pad:1;  /* deref_var redefines this */
633      unsigned dest:8;
634   } deref;
635   struct {
636      unsigned instr_type:4;
637      unsigned deref_type:3;
638      unsigned _pad:1;
639      unsigned object_idx:16; /* if 0, the object ID is a separate uint32 */
640      unsigned dest:8;
641   } deref_var;
642   struct {
643      unsigned instr_type:4;
644      unsigned intrinsic:9;
645      unsigned const_indices_encoding:2;
646      unsigned packed_const_indices:9;
647      unsigned dest:8;
648   } intrinsic;
649   struct {
650      unsigned instr_type:4;
651      unsigned last_component:4;
652      unsigned bit_size:3;
653      unsigned packing:2; /* enum load_const_packing */
654      unsigned packed_value:19; /* meaning determined by packing */
655   } load_const;
656   struct {
657      unsigned instr_type:4;
658      unsigned last_component:4;
659      unsigned bit_size:3;
660      unsigned _pad:21;
661   } undef;
662   struct {
663      unsigned instr_type:4;
664      unsigned num_srcs:4;
665      unsigned op:4;
666      unsigned dest:8;
667      unsigned _pad:12;
668   } tex;
669   struct {
670      unsigned instr_type:4;
671      unsigned num_srcs:20;
672      unsigned dest:8;
673   } phi;
674   struct {
675      unsigned instr_type:4;
676      unsigned type:2;
677      unsigned _pad:26;
678   } jump;
679};
680
681/* Write "lo24" as low 24 bits in the first uint32. */
682static void
683write_dest(write_ctx *ctx, const nir_dest *dst, union packed_instr header,
684           nir_instr_type instr_type)
685{
686   STATIC_ASSERT(sizeof(union packed_dest) == 1);
687   union packed_dest dest;
688   dest.u8 = 0;
689
690   dest.ssa.is_ssa = dst->is_ssa;
691   if (dst->is_ssa) {
692      dest.ssa.num_components =
693         encode_num_components_in_3bits(dst->ssa.num_components);
694      dest.ssa.bit_size = encode_bit_size_3bits(dst->ssa.bit_size);
695   } else {
696      dest.reg.is_indirect = !!(dst->reg.indirect);
697   }
698   header.any.dest = dest.u8;
699
700   /* Check if the current ALU instruction has the same header as the previous
701    * instruction that is also ALU. If it is, we don't have to write
702    * the current header. This is a typical occurence after scalarization.
703    */
704   if (instr_type == nir_instr_type_alu) {
705      bool equal_header = false;
706
707      if (ctx->last_instr_type == nir_instr_type_alu) {
708         assert(ctx->last_alu_header_offset);
709         union packed_instr last_header;
710         memcpy(&last_header, ctx->blob->data + ctx->last_alu_header_offset,
711                sizeof(last_header));
712
713         /* Clear the field that counts ALUs with equal headers. */
714         union packed_instr clean_header;
715         clean_header.u32 = last_header.u32;
716         clean_header.alu.num_followup_alu_sharing_header = 0;
717
718         /* There can be at most 4 consecutive ALU instructions
719          * sharing the same header.
720          */
721         if (last_header.alu.num_followup_alu_sharing_header < 3 &&
722             header.u32 == clean_header.u32) {
723            last_header.alu.num_followup_alu_sharing_header++;
724            memcpy(ctx->blob->data + ctx->last_alu_header_offset,
725                   &last_header, sizeof(last_header));
726
727            equal_header = true;
728         }
729      }
730
731      if (!equal_header) {
732         ctx->last_alu_header_offset = ctx->blob->size;
733         blob_write_uint32(ctx->blob, header.u32);
734      }
735   } else {
736      blob_write_uint32(ctx->blob, header.u32);
737   }
738
739   if (dest.ssa.is_ssa &&
740       dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
741      blob_write_uint32(ctx->blob, dst->ssa.num_components);
742
743   if (dst->is_ssa) {
744      write_add_object(ctx, &dst->ssa);
745   } else {
746      blob_write_uint32(ctx->blob, write_lookup_object(ctx, dst->reg.reg));
747      blob_write_uint32(ctx->blob, dst->reg.base_offset);
748      if (dst->reg.indirect)
749         write_src(ctx, dst->reg.indirect);
750   }
751}
752
753static void
754read_dest(read_ctx *ctx, nir_dest *dst, nir_instr *instr,
755          union packed_instr header)
756{
757   union packed_dest dest;
758   dest.u8 = header.any.dest;
759
760   if (dest.ssa.is_ssa) {
761      unsigned bit_size = decode_bit_size_3bits(dest.ssa.bit_size);
762      unsigned num_components;
763      if (dest.ssa.num_components == NUM_COMPONENTS_IS_SEPARATE_7)
764         num_components = blob_read_uint32(ctx->blob);
765      else
766         num_components = decode_num_components_in_3bits(dest.ssa.num_components);
767      nir_ssa_dest_init(instr, dst, num_components, bit_size, NULL);
768      read_add_object(ctx, &dst->ssa);
769   } else {
770      dst->reg.reg = read_object(ctx);
771      dst->reg.base_offset = blob_read_uint32(ctx->blob);
772      if (dest.reg.is_indirect) {
773         dst->reg.indirect = malloc(sizeof(nir_src));
774         read_src(ctx, dst->reg.indirect, instr);
775      }
776   }
777}
778
779static bool
780are_object_ids_16bit(write_ctx *ctx)
781{
782   /* Check the highest object ID, because they are monotonic. */
783   return ctx->next_idx < (1 << 16);
784}
785
786static bool
787is_alu_src_ssa_16bit(write_ctx *ctx, const nir_alu_instr *alu)
788{
789   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
790
791   for (unsigned i = 0; i < num_srcs; i++) {
792      if (!alu->src[i].src.is_ssa || alu->src[i].abs || alu->src[i].negate)
793         return false;
794
795      unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
796
797      for (unsigned chan = 0; chan < src_components; chan++) {
798         /* The swizzles for src0.x and src1.x are stored
799          * in writemask_or_two_swizzles for SSA ALUs.
800          */
801         if (alu->dest.dest.is_ssa && i < 2 && chan == 0 &&
802             alu->src[i].swizzle[chan] < 4)
803            continue;
804
805         if (alu->src[i].swizzle[chan] != chan)
806            return false;
807      }
808   }
809
810   return are_object_ids_16bit(ctx);
811}
812
813static void
814write_alu(write_ctx *ctx, const nir_alu_instr *alu)
815{
816   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
817   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
818
819   /* 9 bits for nir_op */
820   STATIC_ASSERT(nir_num_opcodes <= 512);
821   union packed_instr header;
822   header.u32 = 0;
823
824   header.alu.instr_type = alu->instr.type;
825   header.alu.exact = alu->exact;
826   header.alu.no_signed_wrap = alu->no_signed_wrap;
827   header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
828   header.alu.saturate = alu->dest.saturate;
829   header.alu.op = alu->op;
830   header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
831
832   if (header.alu.packed_src_ssa_16bit &&
833       alu->dest.dest.is_ssa) {
834      /* For packed srcs of SSA ALUs, this field stores the swizzles. */
835      header.alu.writemask_or_two_swizzles = alu->src[0].swizzle[0];
836      if (num_srcs > 1)
837         header.alu.writemask_or_two_swizzles |= alu->src[1].swizzle[0] << 2;
838   } else if (!alu->dest.dest.is_ssa && dst_components <= 4) {
839      /* For vec4 registers, this field is a writemask. */
840      header.alu.writemask_or_two_swizzles = alu->dest.write_mask;
841   }
842
843   write_dest(ctx, &alu->dest.dest, header, alu->instr.type);
844
845   if (!alu->dest.dest.is_ssa && dst_components > 4)
846      blob_write_uint32(ctx->blob, alu->dest.write_mask);
847
848   if (header.alu.packed_src_ssa_16bit) {
849      for (unsigned i = 0; i < num_srcs; i++) {
850         assert(alu->src[i].src.is_ssa);
851         unsigned idx = write_lookup_object(ctx, alu->src[i].src.ssa);
852         assert(idx < (1 << 16));
853         blob_write_uint16(ctx->blob, idx);
854      }
855   } else {
856      for (unsigned i = 0; i < num_srcs; i++) {
857         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
858         unsigned src_components = nir_src_num_components(alu->src[i].src);
859         union packed_src src;
860         bool packed = src_components <= 4 && src_channels <= 4;
861         src.u32 = 0;
862
863         src.alu.negate = alu->src[i].negate;
864         src.alu.abs = alu->src[i].abs;
865
866         if (packed) {
867            src.alu.swizzle_x = alu->src[i].swizzle[0];
868            src.alu.swizzle_y = alu->src[i].swizzle[1];
869            src.alu.swizzle_z = alu->src[i].swizzle[2];
870            src.alu.swizzle_w = alu->src[i].swizzle[3];
871         }
872
873         write_src_full(ctx, &alu->src[i].src, src);
874
875         /* Store swizzles for vec8 and vec16. */
876         if (!packed) {
877            for (unsigned o = 0; o < src_channels; o += 8) {
878               unsigned value = 0;
879
880               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
881                  value |= (uint32_t)alu->src[i].swizzle[o + j] <<
882                           (4 * j); /* 4 bits per swizzle */
883               }
884
885               blob_write_uint32(ctx->blob, value);
886            }
887         }
888      }
889   }
890}
891
892static nir_alu_instr *
893read_alu(read_ctx *ctx, union packed_instr header)
894{
895   unsigned num_srcs = nir_op_infos[header.alu.op].num_inputs;
896   nir_alu_instr *alu = nir_alu_instr_create(ctx->nir, header.alu.op);
897
898   alu->exact = header.alu.exact;
899   alu->no_signed_wrap = header.alu.no_signed_wrap;
900   alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
901   alu->dest.saturate = header.alu.saturate;
902
903   read_dest(ctx, &alu->dest.dest, &alu->instr, header);
904
905   unsigned dst_components = nir_dest_num_components(alu->dest.dest);
906
907   if (alu->dest.dest.is_ssa) {
908      alu->dest.write_mask = u_bit_consecutive(0, dst_components);
909   } else if (dst_components <= 4) {
910      alu->dest.write_mask = header.alu.writemask_or_two_swizzles;
911   } else {
912      alu->dest.write_mask = blob_read_uint32(ctx->blob);
913   }
914
915   if (header.alu.packed_src_ssa_16bit) {
916      for (unsigned i = 0; i < num_srcs; i++) {
917         nir_alu_src *src = &alu->src[i];
918         src->src.is_ssa = true;
919         src->src.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
920
921         memset(&src->swizzle, 0, sizeof(src->swizzle));
922
923         unsigned src_components = nir_ssa_alu_instr_src_components(alu, i);
924
925         for (unsigned chan = 0; chan < src_components; chan++)
926            src->swizzle[chan] = chan;
927      }
928   } else {
929      for (unsigned i = 0; i < num_srcs; i++) {
930         union packed_src src = read_src(ctx, &alu->src[i].src, &alu->instr);
931         unsigned src_channels = nir_ssa_alu_instr_src_components(alu, i);
932         unsigned src_components = nir_src_num_components(alu->src[i].src);
933         bool packed = src_components <= 4 && src_channels <= 4;
934
935         alu->src[i].negate = src.alu.negate;
936         alu->src[i].abs = src.alu.abs;
937
938         memset(&alu->src[i].swizzle, 0, sizeof(alu->src[i].swizzle));
939
940         if (packed) {
941            alu->src[i].swizzle[0] = src.alu.swizzle_x;
942            alu->src[i].swizzle[1] = src.alu.swizzle_y;
943            alu->src[i].swizzle[2] = src.alu.swizzle_z;
944            alu->src[i].swizzle[3] = src.alu.swizzle_w;
945         } else {
946            /* Load swizzles for vec8 and vec16. */
947            for (unsigned o = 0; o < src_channels; o += 8) {
948               unsigned value = blob_read_uint32(ctx->blob);
949
950               for (unsigned j = 0; j < 8 && o + j < src_channels; j++) {
951                  alu->src[i].swizzle[o + j] =
952                     (value >> (4 * j)) & 0xf; /* 4 bits per swizzle */
953               }
954            }
955         }
956      }
957   }
958
959   if (header.alu.packed_src_ssa_16bit &&
960       alu->dest.dest.is_ssa) {
961      alu->src[0].swizzle[0] = header.alu.writemask_or_two_swizzles & 0x3;
962      if (num_srcs > 1)
963         alu->src[1].swizzle[0] = header.alu.writemask_or_two_swizzles >> 2;
964   }
965
966   return alu;
967}
968
969static void
970write_deref(write_ctx *ctx, const nir_deref_instr *deref)
971{
972   assert(deref->deref_type < 8);
973   assert(deref->modes < (1 << 14));
974
975   union packed_instr header;
976   header.u32 = 0;
977
978   header.deref.instr_type = deref->instr.type;
979   header.deref.deref_type = deref->deref_type;
980
981   if (deref->deref_type == nir_deref_type_cast) {
982      header.deref.modes = deref->modes;
983      header.deref.cast_type_same_as_last = deref->type == ctx->last_type;
984   }
985
986   unsigned var_idx = 0;
987   if (deref->deref_type == nir_deref_type_var) {
988      var_idx = write_lookup_object(ctx, deref->var);
989      if (var_idx && var_idx < (1 << 16))
990         header.deref_var.object_idx = var_idx;
991   }
992
993   if (deref->deref_type == nir_deref_type_array ||
994       deref->deref_type == nir_deref_type_ptr_as_array) {
995      header.deref.packed_src_ssa_16bit =
996         deref->parent.is_ssa && deref->arr.index.is_ssa &&
997         are_object_ids_16bit(ctx);
998   }
999
1000   write_dest(ctx, &deref->dest, header, deref->instr.type);
1001
1002   switch (deref->deref_type) {
1003   case nir_deref_type_var:
1004      if (!header.deref_var.object_idx)
1005         blob_write_uint32(ctx->blob, var_idx);
1006      break;
1007
1008   case nir_deref_type_struct:
1009      write_src(ctx, &deref->parent);
1010      blob_write_uint32(ctx->blob, deref->strct.index);
1011      break;
1012
1013   case nir_deref_type_array:
1014   case nir_deref_type_ptr_as_array:
1015      if (header.deref.packed_src_ssa_16bit) {
1016         blob_write_uint16(ctx->blob,
1017                           write_lookup_object(ctx, deref->parent.ssa));
1018         blob_write_uint16(ctx->blob,
1019                           write_lookup_object(ctx, deref->arr.index.ssa));
1020      } else {
1021         write_src(ctx, &deref->parent);
1022         write_src(ctx, &deref->arr.index);
1023      }
1024      break;
1025
1026   case nir_deref_type_cast:
1027      write_src(ctx, &deref->parent);
1028      blob_write_uint32(ctx->blob, deref->cast.ptr_stride);
1029      blob_write_uint32(ctx->blob, deref->cast.align_mul);
1030      blob_write_uint32(ctx->blob, deref->cast.align_offset);
1031      if (!header.deref.cast_type_same_as_last) {
1032         encode_type_to_blob(ctx->blob, deref->type);
1033         ctx->last_type = deref->type;
1034      }
1035      break;
1036
1037   case nir_deref_type_array_wildcard:
1038      write_src(ctx, &deref->parent);
1039      break;
1040
1041   default:
1042      unreachable("Invalid deref type");
1043   }
1044}
1045
1046static nir_deref_instr *
1047read_deref(read_ctx *ctx, union packed_instr header)
1048{
1049   nir_deref_type deref_type = header.deref.deref_type;
1050   nir_deref_instr *deref = nir_deref_instr_create(ctx->nir, deref_type);
1051
1052   read_dest(ctx, &deref->dest, &deref->instr, header);
1053
1054   nir_deref_instr *parent;
1055
1056   switch (deref->deref_type) {
1057   case nir_deref_type_var:
1058      if (header.deref_var.object_idx)
1059         deref->var = read_lookup_object(ctx, header.deref_var.object_idx);
1060      else
1061         deref->var = read_object(ctx);
1062
1063      deref->type = deref->var->type;
1064      break;
1065
1066   case nir_deref_type_struct:
1067      read_src(ctx, &deref->parent, &deref->instr);
1068      parent = nir_src_as_deref(deref->parent);
1069      deref->strct.index = blob_read_uint32(ctx->blob);
1070      deref->type = glsl_get_struct_field(parent->type, deref->strct.index);
1071      break;
1072
1073   case nir_deref_type_array:
1074   case nir_deref_type_ptr_as_array:
1075      if (header.deref.packed_src_ssa_16bit) {
1076         deref->parent.is_ssa = true;
1077         deref->parent.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1078         deref->arr.index.is_ssa = true;
1079         deref->arr.index.ssa = read_lookup_object(ctx, blob_read_uint16(ctx->blob));
1080      } else {
1081         read_src(ctx, &deref->parent, &deref->instr);
1082         read_src(ctx, &deref->arr.index, &deref->instr);
1083      }
1084
1085      parent = nir_src_as_deref(deref->parent);
1086      if (deref->deref_type == nir_deref_type_array)
1087         deref->type = glsl_get_array_element(parent->type);
1088      else
1089         deref->type = parent->type;
1090      break;
1091
1092   case nir_deref_type_cast:
1093      read_src(ctx, &deref->parent, &deref->instr);
1094      deref->cast.ptr_stride = blob_read_uint32(ctx->blob);
1095      deref->cast.align_mul = blob_read_uint32(ctx->blob);
1096      deref->cast.align_offset = blob_read_uint32(ctx->blob);
1097      if (header.deref.cast_type_same_as_last) {
1098         deref->type = ctx->last_type;
1099      } else {
1100         deref->type = decode_type_from_blob(ctx->blob);
1101         ctx->last_type = deref->type;
1102      }
1103      break;
1104
1105   case nir_deref_type_array_wildcard:
1106      read_src(ctx, &deref->parent, &deref->instr);
1107      parent = nir_src_as_deref(deref->parent);
1108      deref->type = glsl_get_array_element(parent->type);
1109      break;
1110
1111   default:
1112      unreachable("Invalid deref type");
1113   }
1114
1115   if (deref_type == nir_deref_type_var) {
1116      deref->modes = deref->var->data.mode;
1117   } else if (deref->deref_type == nir_deref_type_cast) {
1118      deref->modes = header.deref.modes;
1119   } else {
1120      assert(deref->parent.is_ssa);
1121      deref->modes = nir_instr_as_deref(deref->parent.ssa->parent_instr)->modes;
1122   }
1123
1124   return deref;
1125}
1126
1127static void
1128write_intrinsic(write_ctx *ctx, const nir_intrinsic_instr *intrin)
1129{
1130   /* 9 bits for nir_intrinsic_op */
1131   STATIC_ASSERT(nir_num_intrinsics <= 512);
1132   unsigned num_srcs = nir_intrinsic_infos[intrin->intrinsic].num_srcs;
1133   unsigned num_indices = nir_intrinsic_infos[intrin->intrinsic].num_indices;
1134   assert(intrin->intrinsic < 512);
1135
1136   union packed_instr header;
1137   header.u32 = 0;
1138
1139   header.intrinsic.instr_type = intrin->instr.type;
1140   header.intrinsic.intrinsic = intrin->intrinsic;
1141
1142   /* Analyze constant indices to decide how to encode them. */
1143   if (num_indices) {
1144      unsigned max_bits = 0;
1145      for (unsigned i = 0; i < num_indices; i++) {
1146         unsigned max = util_last_bit(intrin->const_index[i]);
1147         max_bits = MAX2(max_bits, max);
1148      }
1149
1150      if (max_bits * num_indices <= 9) {
1151         header.intrinsic.const_indices_encoding = const_indices_9bit_all_combined;
1152
1153         /* Pack all const indices into 6 bits. */
1154         unsigned bit_size = 9 / num_indices;
1155         for (unsigned i = 0; i < num_indices; i++) {
1156            header.intrinsic.packed_const_indices |=
1157               intrin->const_index[i] << (i * bit_size);
1158         }
1159      } else if (max_bits <= 8)
1160         header.intrinsic.const_indices_encoding = const_indices_8bit;
1161      else if (max_bits <= 16)
1162         header.intrinsic.const_indices_encoding = const_indices_16bit;
1163      else
1164         header.intrinsic.const_indices_encoding = const_indices_32bit;
1165   }
1166
1167   if (nir_intrinsic_infos[intrin->intrinsic].has_dest)
1168      write_dest(ctx, &intrin->dest, header, intrin->instr.type);
1169   else
1170      blob_write_uint32(ctx->blob, header.u32);
1171
1172   for (unsigned i = 0; i < num_srcs; i++)
1173      write_src(ctx, &intrin->src[i]);
1174
1175   if (num_indices) {
1176      switch (header.intrinsic.const_indices_encoding) {
1177      case const_indices_8bit:
1178         for (unsigned i = 0; i < num_indices; i++)
1179            blob_write_uint8(ctx->blob, intrin->const_index[i]);
1180         break;
1181      case const_indices_16bit:
1182         for (unsigned i = 0; i < num_indices; i++)
1183            blob_write_uint16(ctx->blob, intrin->const_index[i]);
1184         break;
1185      case const_indices_32bit:
1186         for (unsigned i = 0; i < num_indices; i++)
1187            blob_write_uint32(ctx->blob, intrin->const_index[i]);
1188         break;
1189      }
1190   }
1191}
1192
1193static nir_intrinsic_instr *
1194read_intrinsic(read_ctx *ctx, union packed_instr header)
1195{
1196   nir_intrinsic_op op = header.intrinsic.intrinsic;
1197   nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(ctx->nir, op);
1198
1199   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
1200   unsigned num_indices = nir_intrinsic_infos[op].num_indices;
1201
1202   if (nir_intrinsic_infos[op].has_dest)
1203      read_dest(ctx, &intrin->dest, &intrin->instr, header);
1204
1205   for (unsigned i = 0; i < num_srcs; i++)
1206      read_src(ctx, &intrin->src[i], &intrin->instr);
1207
1208   /* Vectorized instrinsics have num_components same as dst or src that has
1209    * 0 components in the info. Find it.
1210    */
1211   if (nir_intrinsic_infos[op].has_dest &&
1212       nir_intrinsic_infos[op].dest_components == 0) {
1213      intrin->num_components = nir_dest_num_components(intrin->dest);
1214   } else {
1215      for (unsigned i = 0; i < num_srcs; i++) {
1216         if (nir_intrinsic_infos[op].src_components[i] == 0) {
1217            intrin->num_components = nir_src_num_components(intrin->src[i]);
1218            break;
1219         }
1220      }
1221   }
1222
1223   if (num_indices) {
1224      switch (header.intrinsic.const_indices_encoding) {
1225      case const_indices_9bit_all_combined: {
1226         unsigned bit_size = 9 / num_indices;
1227         unsigned bit_mask = u_bit_consecutive(0, bit_size);
1228         for (unsigned i = 0; i < num_indices; i++) {
1229            intrin->const_index[i] =
1230               (header.intrinsic.packed_const_indices >> (i * bit_size)) &
1231               bit_mask;
1232         }
1233         break;
1234      }
1235      case const_indices_8bit:
1236         for (unsigned i = 0; i < num_indices; i++)
1237            intrin->const_index[i] = blob_read_uint8(ctx->blob);
1238         break;
1239      case const_indices_16bit:
1240         for (unsigned i = 0; i < num_indices; i++)
1241            intrin->const_index[i] = blob_read_uint16(ctx->blob);
1242         break;
1243      case const_indices_32bit:
1244         for (unsigned i = 0; i < num_indices; i++)
1245            intrin->const_index[i] = blob_read_uint32(ctx->blob);
1246         break;
1247      }
1248   }
1249
1250   return intrin;
1251}
1252
1253static void
1254write_load_const(write_ctx *ctx, const nir_load_const_instr *lc)
1255{
1256   assert(lc->def.num_components >= 1 && lc->def.num_components <= 16);
1257   union packed_instr header;
1258   header.u32 = 0;
1259
1260   header.load_const.instr_type = lc->instr.type;
1261   header.load_const.last_component = lc->def.num_components - 1;
1262   header.load_const.bit_size = encode_bit_size_3bits(lc->def.bit_size);
1263   header.load_const.packing = load_const_full;
1264
1265   /* Try to pack 1-component constants into the 19 free bits in the header. */
1266   if (lc->def.num_components == 1) {
1267      switch (lc->def.bit_size) {
1268      case 64:
1269         if ((lc->value[0].u64 & 0x1fffffffffffull) == 0) {
1270            /* packed_value contains high 19 bits, low bits are 0 */
1271            header.load_const.packing = load_const_scalar_hi_19bits;
1272            header.load_const.packed_value = lc->value[0].u64 >> 45;
1273         } else if (((lc->value[0].i64 << 45) >> 45) == lc->value[0].i64) {
1274            /* packed_value contains low 19 bits, high bits are sign-extended */
1275            header.load_const.packing = load_const_scalar_lo_19bits_sext;
1276            header.load_const.packed_value = lc->value[0].u64;
1277         }
1278         break;
1279
1280      case 32:
1281         if ((lc->value[0].u32 & 0x1fff) == 0) {
1282            header.load_const.packing = load_const_scalar_hi_19bits;
1283            header.load_const.packed_value = lc->value[0].u32 >> 13;
1284         } else if (((lc->value[0].i32 << 13) >> 13) == lc->value[0].i32) {
1285            header.load_const.packing = load_const_scalar_lo_19bits_sext;
1286            header.load_const.packed_value = lc->value[0].u32;
1287         }
1288         break;
1289
1290      case 16:
1291         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1292         header.load_const.packed_value = lc->value[0].u16;
1293         break;
1294      case 8:
1295         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1296         header.load_const.packed_value = lc->value[0].u8;
1297         break;
1298      case 1:
1299         header.load_const.packing = load_const_scalar_lo_19bits_sext;
1300         header.load_const.packed_value = lc->value[0].b;
1301         break;
1302      default:
1303         unreachable("invalid bit_size");
1304      }
1305   }
1306
1307   blob_write_uint32(ctx->blob, header.u32);
1308
1309   if (header.load_const.packing == load_const_full) {
1310      switch (lc->def.bit_size) {
1311      case 64:
1312         blob_write_bytes(ctx->blob, lc->value,
1313                          sizeof(*lc->value) * lc->def.num_components);
1314         break;
1315
1316      case 32:
1317         for (unsigned i = 0; i < lc->def.num_components; i++)
1318            blob_write_uint32(ctx->blob, lc->value[i].u32);
1319         break;
1320
1321      case 16:
1322         for (unsigned i = 0; i < lc->def.num_components; i++)
1323            blob_write_uint16(ctx->blob, lc->value[i].u16);
1324         break;
1325
1326      default:
1327         assert(lc->def.bit_size <= 8);
1328         for (unsigned i = 0; i < lc->def.num_components; i++)
1329            blob_write_uint8(ctx->blob, lc->value[i].u8);
1330         break;
1331      }
1332   }
1333
1334   write_add_object(ctx, &lc->def);
1335}
1336
1337static nir_load_const_instr *
1338read_load_const(read_ctx *ctx, union packed_instr header)
1339{
1340   nir_load_const_instr *lc =
1341      nir_load_const_instr_create(ctx->nir, header.load_const.last_component + 1,
1342                                  decode_bit_size_3bits(header.load_const.bit_size));
1343
1344   switch (header.load_const.packing) {
1345   case load_const_scalar_hi_19bits:
1346      switch (lc->def.bit_size) {
1347      case 64:
1348         lc->value[0].u64 = (uint64_t)header.load_const.packed_value << 45;
1349         break;
1350      case 32:
1351         lc->value[0].u32 = (uint64_t)header.load_const.packed_value << 13;
1352         break;
1353      default:
1354         unreachable("invalid bit_size");
1355      }
1356      break;
1357
1358   case load_const_scalar_lo_19bits_sext:
1359      switch (lc->def.bit_size) {
1360      case 64:
1361         lc->value[0].i64 = ((int64_t)header.load_const.packed_value << 45) >> 45;
1362         break;
1363      case 32:
1364         lc->value[0].i32 = ((int32_t)header.load_const.packed_value << 13) >> 13;
1365         break;
1366      case 16:
1367         lc->value[0].u16 = header.load_const.packed_value;
1368         break;
1369      case 8:
1370         lc->value[0].u8 = header.load_const.packed_value;
1371         break;
1372      case 1:
1373         lc->value[0].b = header.load_const.packed_value;
1374         break;
1375      default:
1376         unreachable("invalid bit_size");
1377      }
1378      break;
1379
1380   case load_const_full:
1381      switch (lc->def.bit_size) {
1382      case 64:
1383         blob_copy_bytes(ctx->blob, lc->value, sizeof(*lc->value) * lc->def.num_components);
1384         break;
1385
1386      case 32:
1387         for (unsigned i = 0; i < lc->def.num_components; i++)
1388            lc->value[i].u32 = blob_read_uint32(ctx->blob);
1389         break;
1390
1391      case 16:
1392         for (unsigned i = 0; i < lc->def.num_components; i++)
1393            lc->value[i].u16 = blob_read_uint16(ctx->blob);
1394         break;
1395
1396      default:
1397         assert(lc->def.bit_size <= 8);
1398         for (unsigned i = 0; i < lc->def.num_components; i++)
1399            lc->value[i].u8 = blob_read_uint8(ctx->blob);
1400         break;
1401      }
1402      break;
1403   }
1404
1405   read_add_object(ctx, &lc->def);
1406   return lc;
1407}
1408
1409static void
1410write_ssa_undef(write_ctx *ctx, const nir_ssa_undef_instr *undef)
1411{
1412   assert(undef->def.num_components >= 1 && undef->def.num_components <= 16);
1413
1414   union packed_instr header;
1415   header.u32 = 0;
1416
1417   header.undef.instr_type = undef->instr.type;
1418   header.undef.last_component = undef->def.num_components - 1;
1419   header.undef.bit_size = encode_bit_size_3bits(undef->def.bit_size);
1420
1421   blob_write_uint32(ctx->blob, header.u32);
1422   write_add_object(ctx, &undef->def);
1423}
1424
1425static nir_ssa_undef_instr *
1426read_ssa_undef(read_ctx *ctx, union packed_instr header)
1427{
1428   nir_ssa_undef_instr *undef =
1429      nir_ssa_undef_instr_create(ctx->nir, header.undef.last_component + 1,
1430                                 decode_bit_size_3bits(header.undef.bit_size));
1431
1432   read_add_object(ctx, &undef->def);
1433   return undef;
1434}
1435
1436union packed_tex_data {
1437   uint32_t u32;
1438   struct {
1439      unsigned sampler_dim:4;
1440      unsigned dest_type:8;
1441      unsigned coord_components:3;
1442      unsigned is_array:1;
1443      unsigned is_shadow:1;
1444      unsigned is_new_style_shadow:1;
1445      unsigned is_sparse:1;
1446      unsigned component:2;
1447      unsigned texture_non_uniform:1;
1448      unsigned sampler_non_uniform:1;
1449      unsigned array_is_lowered_cube:1;
1450      unsigned unused:6; /* Mark unused for valgrind. */
1451   } u;
1452};
1453
1454static void
1455write_tex(write_ctx *ctx, const nir_tex_instr *tex)
1456{
1457   assert(tex->num_srcs < 16);
1458   assert(tex->op < 16);
1459
1460   union packed_instr header;
1461   header.u32 = 0;
1462
1463   header.tex.instr_type = tex->instr.type;
1464   header.tex.num_srcs = tex->num_srcs;
1465   header.tex.op = tex->op;
1466
1467   write_dest(ctx, &tex->dest, header, tex->instr.type);
1468
1469   blob_write_uint32(ctx->blob, tex->texture_index);
1470   blob_write_uint32(ctx->blob, tex->sampler_index);
1471   if (tex->op == nir_texop_tg4)
1472      blob_write_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1473
1474   STATIC_ASSERT(sizeof(union packed_tex_data) == sizeof(uint32_t));
1475   union packed_tex_data packed = {
1476      .u.sampler_dim = tex->sampler_dim,
1477      .u.dest_type = tex->dest_type,
1478      .u.coord_components = tex->coord_components,
1479      .u.is_array = tex->is_array,
1480      .u.is_shadow = tex->is_shadow,
1481      .u.is_new_style_shadow = tex->is_new_style_shadow,
1482      .u.is_sparse = tex->is_sparse,
1483      .u.component = tex->component,
1484      .u.texture_non_uniform = tex->texture_non_uniform,
1485      .u.sampler_non_uniform = tex->sampler_non_uniform,
1486      .u.array_is_lowered_cube = tex->array_is_lowered_cube,
1487   };
1488   blob_write_uint32(ctx->blob, packed.u32);
1489
1490   for (unsigned i = 0; i < tex->num_srcs; i++) {
1491      union packed_src src;
1492      src.u32 = 0;
1493      src.tex.src_type = tex->src[i].src_type;
1494      write_src_full(ctx, &tex->src[i].src, src);
1495   }
1496}
1497
1498static nir_tex_instr *
1499read_tex(read_ctx *ctx, union packed_instr header)
1500{
1501   nir_tex_instr *tex = nir_tex_instr_create(ctx->nir, header.tex.num_srcs);
1502
1503   read_dest(ctx, &tex->dest, &tex->instr, header);
1504
1505   tex->op = header.tex.op;
1506   tex->texture_index = blob_read_uint32(ctx->blob);
1507   tex->sampler_index = blob_read_uint32(ctx->blob);
1508   if (tex->op == nir_texop_tg4)
1509      blob_copy_bytes(ctx->blob, tex->tg4_offsets, sizeof(tex->tg4_offsets));
1510
1511   union packed_tex_data packed;
1512   packed.u32 = blob_read_uint32(ctx->blob);
1513   tex->sampler_dim = packed.u.sampler_dim;
1514   tex->dest_type = packed.u.dest_type;
1515   tex->coord_components = packed.u.coord_components;
1516   tex->is_array = packed.u.is_array;
1517   tex->is_shadow = packed.u.is_shadow;
1518   tex->is_new_style_shadow = packed.u.is_new_style_shadow;
1519   tex->is_sparse = packed.u.is_sparse;
1520   tex->component = packed.u.component;
1521   tex->texture_non_uniform = packed.u.texture_non_uniform;
1522   tex->sampler_non_uniform = packed.u.sampler_non_uniform;
1523   tex->array_is_lowered_cube = packed.u.array_is_lowered_cube;
1524
1525   for (unsigned i = 0; i < tex->num_srcs; i++) {
1526      union packed_src src = read_src(ctx, &tex->src[i].src, &tex->instr);
1527      tex->src[i].src_type = src.tex.src_type;
1528   }
1529
1530   return tex;
1531}
1532
1533static void
1534write_phi(write_ctx *ctx, const nir_phi_instr *phi)
1535{
1536   union packed_instr header;
1537   header.u32 = 0;
1538
1539   header.phi.instr_type = phi->instr.type;
1540   header.phi.num_srcs = exec_list_length(&phi->srcs);
1541
1542   /* Phi nodes are special, since they may reference SSA definitions and
1543    * basic blocks that don't exist yet. We leave two empty uint32_t's here,
1544    * and then store enough information so that a later fixup pass can fill
1545    * them in correctly.
1546    */
1547   write_dest(ctx, &phi->dest, header, phi->instr.type);
1548
1549   nir_foreach_phi_src(src, phi) {
1550      assert(src->src.is_ssa);
1551      size_t blob_offset = blob_reserve_uint32(ctx->blob);
1552      ASSERTED size_t blob_offset2 = blob_reserve_uint32(ctx->blob);
1553      assert(blob_offset + sizeof(uint32_t) == blob_offset2);
1554      write_phi_fixup fixup = {
1555         .blob_offset = blob_offset,
1556         .src = src->src.ssa,
1557         .block = src->pred,
1558      };
1559      util_dynarray_append(&ctx->phi_fixups, write_phi_fixup, fixup);
1560   }
1561}
1562
1563static void
1564write_fixup_phis(write_ctx *ctx)
1565{
1566   util_dynarray_foreach(&ctx->phi_fixups, write_phi_fixup, fixup) {
1567      uint32_t *blob_ptr = (uint32_t *)(ctx->blob->data + fixup->blob_offset);
1568      blob_ptr[0] = write_lookup_object(ctx, fixup->src);
1569      blob_ptr[1] = write_lookup_object(ctx, fixup->block);
1570   }
1571
1572   util_dynarray_clear(&ctx->phi_fixups);
1573}
1574
1575static nir_phi_instr *
1576read_phi(read_ctx *ctx, nir_block *blk, union packed_instr header)
1577{
1578   nir_phi_instr *phi = nir_phi_instr_create(ctx->nir);
1579
1580   read_dest(ctx, &phi->dest, &phi->instr, header);
1581
1582   /* For similar reasons as before, we just store the index directly into the
1583    * pointer, and let a later pass resolve the phi sources.
1584    *
1585    * In order to ensure that the copied sources (which are just the indices
1586    * from the blob for now) don't get inserted into the old shader's use-def
1587    * lists, we have to add the phi instruction *before* we set up its
1588    * sources.
1589    */
1590   nir_instr_insert_after_block(blk, &phi->instr);
1591
1592   for (unsigned i = 0; i < header.phi.num_srcs; i++) {
1593      nir_ssa_def *def = (nir_ssa_def *)(uintptr_t) blob_read_uint32(ctx->blob);
1594      nir_block *pred = (nir_block *)(uintptr_t) blob_read_uint32(ctx->blob);
1595      nir_phi_src *src = nir_phi_instr_add_src(phi, pred, nir_src_for_ssa(def));
1596
1597      /* Since we're not letting nir_insert_instr handle use/def stuff for us,
1598       * we have to set the parent_instr manually.  It doesn't really matter
1599       * when we do it, so we might as well do it here.
1600       */
1601      src->src.parent_instr = &phi->instr;
1602
1603      /* Stash it in the list of phi sources.  We'll walk this list and fix up
1604       * sources at the very end of read_function_impl.
1605       */
1606      list_add(&src->src.use_link, &ctx->phi_srcs);
1607   }
1608
1609   return phi;
1610}
1611
1612static void
1613read_fixup_phis(read_ctx *ctx)
1614{
1615   list_for_each_entry_safe(nir_phi_src, src, &ctx->phi_srcs, src.use_link) {
1616      src->pred = read_lookup_object(ctx, (uintptr_t)src->pred);
1617      src->src.ssa = read_lookup_object(ctx, (uintptr_t)src->src.ssa);
1618
1619      /* Remove from this list */
1620      list_del(&src->src.use_link);
1621
1622      list_addtail(&src->src.use_link, &src->src.ssa->uses);
1623   }
1624   assert(list_is_empty(&ctx->phi_srcs));
1625}
1626
1627static void
1628write_jump(write_ctx *ctx, const nir_jump_instr *jmp)
1629{
1630   /* These aren't handled because they require special block linking */
1631   assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
1632
1633   assert(jmp->type < 4);
1634
1635   union packed_instr header;
1636   header.u32 = 0;
1637
1638   header.jump.instr_type = jmp->instr.type;
1639   header.jump.type = jmp->type;
1640
1641   blob_write_uint32(ctx->blob, header.u32);
1642}
1643
1644static nir_jump_instr *
1645read_jump(read_ctx *ctx, union packed_instr header)
1646{
1647   /* These aren't handled because they require special block linking */
1648   assert(header.jump.type != nir_jump_goto &&
1649          header.jump.type != nir_jump_goto_if);
1650
1651   nir_jump_instr *jmp = nir_jump_instr_create(ctx->nir, header.jump.type);
1652   return jmp;
1653}
1654
1655static void
1656write_call(write_ctx *ctx, const nir_call_instr *call)
1657{
1658   blob_write_uint32(ctx->blob, write_lookup_object(ctx, call->callee));
1659
1660   for (unsigned i = 0; i < call->num_params; i++)
1661      write_src(ctx, &call->params[i]);
1662}
1663
1664static nir_call_instr *
1665read_call(read_ctx *ctx)
1666{
1667   nir_function *callee = read_object(ctx);
1668   nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
1669
1670   for (unsigned i = 0; i < call->num_params; i++)
1671      read_src(ctx, &call->params[i], call);
1672
1673   return call;
1674}
1675
1676static void
1677write_instr(write_ctx *ctx, const nir_instr *instr)
1678{
1679   /* We have only 4 bits for the instruction type. */
1680   assert(instr->type < 16);
1681
1682   switch (instr->type) {
1683   case nir_instr_type_alu:
1684      write_alu(ctx, nir_instr_as_alu(instr));
1685      break;
1686   case nir_instr_type_deref:
1687      write_deref(ctx, nir_instr_as_deref(instr));
1688      break;
1689   case nir_instr_type_intrinsic:
1690      write_intrinsic(ctx, nir_instr_as_intrinsic(instr));
1691      break;
1692   case nir_instr_type_load_const:
1693      write_load_const(ctx, nir_instr_as_load_const(instr));
1694      break;
1695   case nir_instr_type_ssa_undef:
1696      write_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
1697      break;
1698   case nir_instr_type_tex:
1699      write_tex(ctx, nir_instr_as_tex(instr));
1700      break;
1701   case nir_instr_type_phi:
1702      write_phi(ctx, nir_instr_as_phi(instr));
1703      break;
1704   case nir_instr_type_jump:
1705      write_jump(ctx, nir_instr_as_jump(instr));
1706      break;
1707   case nir_instr_type_call:
1708      blob_write_uint32(ctx->blob, instr->type);
1709      write_call(ctx, nir_instr_as_call(instr));
1710      break;
1711   case nir_instr_type_parallel_copy:
1712      unreachable("Cannot write parallel copies");
1713   default:
1714      unreachable("bad instr type");
1715   }
1716}
1717
1718/* Return the number of instructions read. */
1719static unsigned
1720read_instr(read_ctx *ctx, nir_block *block)
1721{
1722   STATIC_ASSERT(sizeof(union packed_instr) == 4);
1723   union packed_instr header;
1724   header.u32 = blob_read_uint32(ctx->blob);
1725   nir_instr *instr;
1726
1727   switch (header.any.instr_type) {
1728   case nir_instr_type_alu:
1729      for (unsigned i = 0; i <= header.alu.num_followup_alu_sharing_header; i++)
1730         nir_instr_insert_after_block(block, &read_alu(ctx, header)->instr);
1731      return header.alu.num_followup_alu_sharing_header + 1;
1732   case nir_instr_type_deref:
1733      instr = &read_deref(ctx, header)->instr;
1734      break;
1735   case nir_instr_type_intrinsic:
1736      instr = &read_intrinsic(ctx, header)->instr;
1737      break;
1738   case nir_instr_type_load_const:
1739      instr = &read_load_const(ctx, header)->instr;
1740      break;
1741   case nir_instr_type_ssa_undef:
1742      instr = &read_ssa_undef(ctx, header)->instr;
1743      break;
1744   case nir_instr_type_tex:
1745      instr = &read_tex(ctx, header)->instr;
1746      break;
1747   case nir_instr_type_phi:
1748      /* Phi instructions are a bit of a special case when reading because we
1749       * don't want inserting the instruction to automatically handle use/defs
1750       * for us.  Instead, we need to wait until all the blocks/instructions
1751       * are read so that we can set their sources up.
1752       */
1753      read_phi(ctx, block, header);
1754      return 1;
1755   case nir_instr_type_jump:
1756      instr = &read_jump(ctx, header)->instr;
1757      break;
1758   case nir_instr_type_call:
1759      instr = &read_call(ctx)->instr;
1760      break;
1761   case nir_instr_type_parallel_copy:
1762      unreachable("Cannot read parallel copies");
1763   default:
1764      unreachable("bad instr type");
1765   }
1766
1767   nir_instr_insert_after_block(block, instr);
1768   return 1;
1769}
1770
1771static void
1772write_block(write_ctx *ctx, const nir_block *block)
1773{
1774   write_add_object(ctx, block);
1775   blob_write_uint32(ctx->blob, exec_list_length(&block->instr_list));
1776
1777   ctx->last_instr_type = ~0;
1778   ctx->last_alu_header_offset = 0;
1779
1780   nir_foreach_instr(instr, block) {
1781      write_instr(ctx, instr);
1782      ctx->last_instr_type = instr->type;
1783   }
1784}
1785
1786static void
1787read_block(read_ctx *ctx, struct exec_list *cf_list)
1788{
1789   /* Don't actually create a new block.  Just use the one from the tail of
1790    * the list.  NIR guarantees that the tail of the list is a block and that
1791    * no two blocks are side-by-side in the IR;  It should be empty.
1792    */
1793   nir_block *block =
1794      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
1795
1796   read_add_object(ctx, block);
1797   unsigned num_instrs = blob_read_uint32(ctx->blob);
1798   for (unsigned i = 0; i < num_instrs;) {
1799      i += read_instr(ctx, block);
1800   }
1801}
1802
1803static void
1804write_cf_list(write_ctx *ctx, const struct exec_list *cf_list);
1805
1806static void
1807read_cf_list(read_ctx *ctx, struct exec_list *cf_list);
1808
1809static void
1810write_if(write_ctx *ctx, nir_if *nif)
1811{
1812   write_src(ctx, &nif->condition);
1813   blob_write_uint8(ctx->blob, nif->control);
1814
1815   write_cf_list(ctx, &nif->then_list);
1816   write_cf_list(ctx, &nif->else_list);
1817}
1818
1819static void
1820read_if(read_ctx *ctx, struct exec_list *cf_list)
1821{
1822   nir_if *nif = nir_if_create(ctx->nir);
1823
1824   read_src(ctx, &nif->condition, nif);
1825   nif->control = blob_read_uint8(ctx->blob);
1826
1827   nir_cf_node_insert_end(cf_list, &nif->cf_node);
1828
1829   read_cf_list(ctx, &nif->then_list);
1830   read_cf_list(ctx, &nif->else_list);
1831}
1832
1833static void
1834write_loop(write_ctx *ctx, nir_loop *loop)
1835{
1836   blob_write_uint8(ctx->blob, loop->control);
1837   write_cf_list(ctx, &loop->body);
1838}
1839
1840static void
1841read_loop(read_ctx *ctx, struct exec_list *cf_list)
1842{
1843   nir_loop *loop = nir_loop_create(ctx->nir);
1844
1845   nir_cf_node_insert_end(cf_list, &loop->cf_node);
1846
1847   loop->control = blob_read_uint8(ctx->blob);
1848   read_cf_list(ctx, &loop->body);
1849}
1850
1851static void
1852write_cf_node(write_ctx *ctx, nir_cf_node *cf)
1853{
1854   blob_write_uint32(ctx->blob, cf->type);
1855
1856   switch (cf->type) {
1857   case nir_cf_node_block:
1858      write_block(ctx, nir_cf_node_as_block(cf));
1859      break;
1860   case nir_cf_node_if:
1861      write_if(ctx, nir_cf_node_as_if(cf));
1862      break;
1863   case nir_cf_node_loop:
1864      write_loop(ctx, nir_cf_node_as_loop(cf));
1865      break;
1866   default:
1867      unreachable("bad cf type");
1868   }
1869}
1870
1871static void
1872read_cf_node(read_ctx *ctx, struct exec_list *list)
1873{
1874   nir_cf_node_type type = blob_read_uint32(ctx->blob);
1875
1876   switch (type) {
1877   case nir_cf_node_block:
1878      read_block(ctx, list);
1879      break;
1880   case nir_cf_node_if:
1881      read_if(ctx, list);
1882      break;
1883   case nir_cf_node_loop:
1884      read_loop(ctx, list);
1885      break;
1886   default:
1887      unreachable("bad cf type");
1888   }
1889}
1890
1891static void
1892write_cf_list(write_ctx *ctx, const struct exec_list *cf_list)
1893{
1894   blob_write_uint32(ctx->blob, exec_list_length(cf_list));
1895   foreach_list_typed(nir_cf_node, cf, node, cf_list) {
1896      write_cf_node(ctx, cf);
1897   }
1898}
1899
1900static void
1901read_cf_list(read_ctx *ctx, struct exec_list *cf_list)
1902{
1903   uint32_t num_cf_nodes = blob_read_uint32(ctx->blob);
1904   for (unsigned i = 0; i < num_cf_nodes; i++)
1905      read_cf_node(ctx, cf_list);
1906}
1907
1908static void
1909write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
1910{
1911   blob_write_uint8(ctx->blob, fi->structured);
1912
1913   write_var_list(ctx, &fi->locals);
1914   write_reg_list(ctx, &fi->registers);
1915   blob_write_uint32(ctx->blob, fi->reg_alloc);
1916
1917   write_cf_list(ctx, &fi->body);
1918   write_fixup_phis(ctx);
1919}
1920
1921static nir_function_impl *
1922read_function_impl(read_ctx *ctx, nir_function *fxn)
1923{
1924   nir_function_impl *fi = nir_function_impl_create_bare(ctx->nir);
1925   fi->function = fxn;
1926
1927   fi->structured = blob_read_uint8(ctx->blob);
1928
1929   read_var_list(ctx, &fi->locals);
1930   read_reg_list(ctx, &fi->registers);
1931   fi->reg_alloc = blob_read_uint32(ctx->blob);
1932
1933   read_cf_list(ctx, &fi->body);
1934   read_fixup_phis(ctx);
1935
1936   fi->valid_metadata = 0;
1937
1938   return fi;
1939}
1940
1941static void
1942write_function(write_ctx *ctx, const nir_function *fxn)
1943{
1944   uint32_t flags = fxn->is_entrypoint;
1945   if (fxn->name)
1946      flags |= 0x2;
1947   if (fxn->impl)
1948      flags |= 0x4;
1949   blob_write_uint32(ctx->blob, flags);
1950   if (fxn->name)
1951      blob_write_string(ctx->blob, fxn->name);
1952
1953   write_add_object(ctx, fxn);
1954
1955   blob_write_uint32(ctx->blob, fxn->num_params);
1956   for (unsigned i = 0; i < fxn->num_params; i++) {
1957      uint32_t val =
1958         ((uint32_t)fxn->params[i].num_components) |
1959         ((uint32_t)fxn->params[i].bit_size) << 8;
1960      blob_write_uint32(ctx->blob, val);
1961   }
1962
1963   /* At first glance, it looks like we should write the function_impl here.
1964    * However, call instructions need to be able to reference at least the
1965    * function and those will get processed as we write the function_impls.
1966    * We stop here and write function_impls as a second pass.
1967    */
1968}
1969
1970static void
1971read_function(read_ctx *ctx)
1972{
1973   uint32_t flags = blob_read_uint32(ctx->blob);
1974   bool has_name = flags & 0x2;
1975   char *name = has_name ? blob_read_string(ctx->blob) : NULL;
1976
1977   nir_function *fxn = nir_function_create(ctx->nir, name);
1978
1979   read_add_object(ctx, fxn);
1980
1981   fxn->num_params = blob_read_uint32(ctx->blob);
1982   fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
1983   for (unsigned i = 0; i < fxn->num_params; i++) {
1984      uint32_t val = blob_read_uint32(ctx->blob);
1985      fxn->params[i].num_components = val & 0xff;
1986      fxn->params[i].bit_size = (val >> 8) & 0xff;
1987   }
1988
1989   fxn->is_entrypoint = flags & 0x1;
1990   if (flags & 0x4)
1991      fxn->impl = NIR_SERIALIZE_FUNC_HAS_IMPL;
1992}
1993
1994/**
1995 * Serialize NIR into a binary blob.
1996 *
1997 * \param strip  Don't serialize information only useful for debugging,
1998 *               such as variable names, making cache hits from similar
1999 *               shaders more likely.
2000 */
2001void
2002nir_serialize(struct blob *blob, const nir_shader *nir, bool strip)
2003{
2004   write_ctx ctx = {0};
2005   ctx.remap_table = _mesa_pointer_hash_table_create(NULL);
2006   ctx.blob = blob;
2007   ctx.nir = nir;
2008   ctx.strip = strip;
2009   util_dynarray_init(&ctx.phi_fixups, NULL);
2010
2011   size_t idx_size_offset = blob_reserve_uint32(blob);
2012
2013   struct shader_info info = nir->info;
2014   uint32_t strings = 0;
2015   if (!strip && info.name)
2016      strings |= 0x1;
2017   if (!strip && info.label)
2018      strings |= 0x2;
2019   blob_write_uint32(blob, strings);
2020   if (!strip && info.name)
2021      blob_write_string(blob, info.name);
2022   if (!strip && info.label)
2023      blob_write_string(blob, info.label);
2024   info.name = info.label = NULL;
2025   blob_write_bytes(blob, (uint8_t *) &info, sizeof(info));
2026
2027   write_var_list(&ctx, &nir->variables);
2028
2029   blob_write_uint32(blob, nir->num_inputs);
2030   blob_write_uint32(blob, nir->num_uniforms);
2031   blob_write_uint32(blob, nir->num_outputs);
2032   blob_write_uint32(blob, nir->scratch_size);
2033
2034   blob_write_uint32(blob, exec_list_length(&nir->functions));
2035   nir_foreach_function(fxn, nir) {
2036      write_function(&ctx, fxn);
2037   }
2038
2039   nir_foreach_function(fxn, nir) {
2040      if (fxn->impl)
2041         write_function_impl(&ctx, fxn->impl);
2042   }
2043
2044   blob_write_uint32(blob, nir->constant_data_size);
2045   if (nir->constant_data_size > 0)
2046      blob_write_bytes(blob, nir->constant_data, nir->constant_data_size);
2047
2048   *(uint32_t *)(blob->data + idx_size_offset) = ctx.next_idx;
2049
2050   _mesa_hash_table_destroy(ctx.remap_table, NULL);
2051   util_dynarray_fini(&ctx.phi_fixups);
2052}
2053
2054nir_shader *
2055nir_deserialize(void *mem_ctx,
2056                const struct nir_shader_compiler_options *options,
2057                struct blob_reader *blob)
2058{
2059   read_ctx ctx = {0};
2060   ctx.blob = blob;
2061   list_inithead(&ctx.phi_srcs);
2062   ctx.idx_table_len = blob_read_uint32(blob);
2063   ctx.idx_table = calloc(ctx.idx_table_len, sizeof(uintptr_t));
2064
2065   uint32_t strings = blob_read_uint32(blob);
2066   char *name = (strings & 0x1) ? blob_read_string(blob) : NULL;
2067   char *label = (strings & 0x2) ? blob_read_string(blob) : NULL;
2068
2069   struct shader_info info;
2070   blob_copy_bytes(blob, (uint8_t *) &info, sizeof(info));
2071
2072   ctx.nir = nir_shader_create(mem_ctx, info.stage, options, NULL);
2073
2074   info.name = name ? ralloc_strdup(ctx.nir, name) : NULL;
2075   info.label = label ? ralloc_strdup(ctx.nir, label) : NULL;
2076
2077   ctx.nir->info = info;
2078
2079   read_var_list(&ctx, &ctx.nir->variables);
2080
2081   ctx.nir->num_inputs = blob_read_uint32(blob);
2082   ctx.nir->num_uniforms = blob_read_uint32(blob);
2083   ctx.nir->num_outputs = blob_read_uint32(blob);
2084   ctx.nir->scratch_size = blob_read_uint32(blob);
2085
2086   unsigned num_functions = blob_read_uint32(blob);
2087   for (unsigned i = 0; i < num_functions; i++)
2088      read_function(&ctx);
2089
2090   nir_foreach_function(fxn, ctx.nir) {
2091      if (fxn->impl == NIR_SERIALIZE_FUNC_HAS_IMPL)
2092         fxn->impl = read_function_impl(&ctx, fxn);
2093   }
2094
2095   ctx.nir->constant_data_size = blob_read_uint32(blob);
2096   if (ctx.nir->constant_data_size > 0) {
2097      ctx.nir->constant_data =
2098         ralloc_size(ctx.nir, ctx.nir->constant_data_size);
2099      blob_copy_bytes(blob, ctx.nir->constant_data,
2100                      ctx.nir->constant_data_size);
2101   }
2102
2103   free(ctx.idx_table);
2104
2105   nir_validate_shader(ctx.nir, "after deserialize");
2106
2107   return ctx.nir;
2108}
2109
2110void
2111nir_shader_serialize_deserialize(nir_shader *shader)
2112{
2113   const struct nir_shader_compiler_options *options = shader->options;
2114
2115   struct blob writer;
2116   blob_init(&writer);
2117   nir_serialize(&writer, shader, false);
2118
2119   /* Delete all of dest's ralloc children but leave dest alone */
2120   void *dead_ctx = ralloc_context(NULL);
2121   ralloc_adopt(dead_ctx, shader);
2122   ralloc_free(dead_ctx);
2123
2124   dead_ctx = ralloc_context(NULL);
2125
2126   struct blob_reader reader;
2127   blob_reader_init(&reader, writer.data, writer.size);
2128   nir_shader *copy = nir_deserialize(dead_ctx, options, &reader);
2129
2130   blob_finish(&writer);
2131
2132   nir_shader_replace(shader, copy);
2133   ralloc_free(dead_ctx);
2134}
2135