1/*
2 * Copyright © 2015 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_control_flow.h"
26
27/* Secret Decoder Ring:
28 *   clone_foo():
29 *        Allocate and clone a foo.
30 *   __clone_foo():
31 *        Clone body of foo (ie. parent class, embedded struct, etc)
32 */
33
34typedef struct {
35   /* True if we are cloning an entire shader. */
36   bool global_clone;
37
38   /* If true allows the clone operation to fall back to the original pointer
39    * if no clone pointer is found in the remap table.  This allows us to
40    * clone a loop body without having to add srcs from outside the loop to
41    * the remap table. This is useful for loop unrolling.
42    */
43   bool allow_remap_fallback;
44
45   /* maps orig ptr -> cloned ptr: */
46   struct hash_table *remap_table;
47
48   /* List of phi sources. */
49   struct list_head phi_srcs;
50
51   /* new shader object, used as memctx for just about everything else: */
52   nir_shader *ns;
53} clone_state;
54
55static void
56init_clone_state(clone_state *state, struct hash_table *remap_table,
57                 bool global, bool allow_remap_fallback)
58{
59   state->global_clone = global;
60   state->allow_remap_fallback = allow_remap_fallback;
61
62   if (remap_table) {
63      state->remap_table = remap_table;
64   } else {
65      state->remap_table = _mesa_pointer_hash_table_create(NULL);
66   }
67
68   list_inithead(&state->phi_srcs);
69}
70
71static void
72free_clone_state(clone_state *state)
73{
74   _mesa_hash_table_destroy(state->remap_table, NULL);
75}
76
77static inline void *
78_lookup_ptr(clone_state *state, const void *ptr, bool global)
79{
80   struct hash_entry *entry;
81
82   if (!ptr)
83      return NULL;
84
85   if (!state->global_clone && global)
86      return (void *)ptr;
87
88   if (unlikely(!state->remap_table)) {
89      assert(state->allow_remap_fallback);
90      return (void *)ptr;
91   }
92
93   entry = _mesa_hash_table_search(state->remap_table, ptr);
94   if (!entry) {
95      assert(state->allow_remap_fallback);
96      return (void *)ptr;
97   }
98
99   return entry->data;
100}
101
102static void
103add_remap(clone_state *state, void *nptr, const void *ptr)
104{
105   _mesa_hash_table_insert(state->remap_table, ptr, nptr);
106}
107
108static void *
109remap_local(clone_state *state, const void *ptr)
110{
111   return _lookup_ptr(state, ptr, false);
112}
113
114static void *
115remap_global(clone_state *state, const void *ptr)
116{
117   return _lookup_ptr(state, ptr, true);
118}
119
120static nir_register *
121remap_reg(clone_state *state, const nir_register *reg)
122{
123   return _lookup_ptr(state, reg, false);
124}
125
126static nir_variable *
127remap_var(clone_state *state, const nir_variable *var)
128{
129   return _lookup_ptr(state, var, nir_variable_is_global(var));
130}
131
132nir_constant *
133nir_constant_clone(const nir_constant *c, nir_variable *nvar)
134{
135   nir_constant *nc = ralloc(nvar, nir_constant);
136
137   memcpy(nc->values, c->values, sizeof(nc->values));
138   nc->num_elements = c->num_elements;
139   nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
140   for (unsigned i = 0; i < c->num_elements; i++) {
141      nc->elements[i] = nir_constant_clone(c->elements[i], nvar);
142   }
143
144   return nc;
145}
146
147/* NOTE: for cloning nir_variables, bypass nir_variable_create to avoid
148 * having to deal with locals and globals separately:
149 */
150nir_variable *
151nir_variable_clone(const nir_variable *var, nir_shader *shader)
152{
153   nir_variable *nvar = rzalloc(shader, nir_variable);
154
155   nvar->type = var->type;
156   nvar->name = ralloc_strdup(nvar, var->name);
157   nvar->data = var->data;
158   nvar->num_state_slots = var->num_state_slots;
159   if (var->num_state_slots) {
160      nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
161      memcpy(nvar->state_slots, var->state_slots,
162             var->num_state_slots * sizeof(nir_state_slot));
163   }
164   if (var->constant_initializer) {
165      nvar->constant_initializer =
166         nir_constant_clone(var->constant_initializer, nvar);
167   }
168   nvar->interface_type = var->interface_type;
169
170   nvar->num_members = var->num_members;
171   if (var->num_members) {
172      nvar->members = ralloc_array(nvar, struct nir_variable_data,
173                                   var->num_members);
174      memcpy(nvar->members, var->members,
175             var->num_members * sizeof(*var->members));
176   }
177
178   return nvar;
179}
180
181static nir_variable *
182clone_variable(clone_state *state, const nir_variable *var)
183{
184   nir_variable *nvar = nir_variable_clone(var, state->ns);
185   add_remap(state, nvar, var);
186
187   return nvar;
188}
189
190/* clone list of nir_variable: */
191static void
192clone_var_list(clone_state *state, struct exec_list *dst,
193               const struct exec_list *list)
194{
195   exec_list_make_empty(dst);
196   foreach_list_typed(nir_variable, var, node, list) {
197      nir_variable *nvar = clone_variable(state, var);
198      exec_list_push_tail(dst, &nvar->node);
199   }
200}
201
202/* NOTE: for cloning nir_registers, bypass nir_global/local_reg_create()
203 * to avoid having to deal with locals and globals separately:
204 */
205static nir_register *
206clone_register(clone_state *state, const nir_register *reg)
207{
208   nir_register *nreg = rzalloc(state->ns, nir_register);
209   add_remap(state, nreg, reg);
210
211   nreg->num_components = reg->num_components;
212   nreg->bit_size = reg->bit_size;
213   nreg->num_array_elems = reg->num_array_elems;
214   nreg->index = reg->index;
215
216   /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
217   list_inithead(&nreg->uses);
218   list_inithead(&nreg->defs);
219   list_inithead(&nreg->if_uses);
220
221   return nreg;
222}
223
224/* clone list of nir_register: */
225static void
226clone_reg_list(clone_state *state, struct exec_list *dst,
227               const struct exec_list *list)
228{
229   exec_list_make_empty(dst);
230   foreach_list_typed(nir_register, reg, node, list) {
231      nir_register *nreg = clone_register(state, reg);
232      exec_list_push_tail(dst, &nreg->node);
233   }
234}
235
236static void
237__clone_src(clone_state *state, void *ninstr_or_if,
238            nir_src *nsrc, const nir_src *src)
239{
240   nsrc->is_ssa = src->is_ssa;
241   if (src->is_ssa) {
242      nsrc->ssa = remap_local(state, src->ssa);
243   } else {
244      nsrc->reg.reg = remap_reg(state, src->reg.reg);
245      if (src->reg.indirect) {
246         nsrc->reg.indirect = malloc(sizeof(nir_src));
247         __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
248      }
249      nsrc->reg.base_offset = src->reg.base_offset;
250   }
251}
252
253static void
254__clone_dst(clone_state *state, nir_instr *ninstr,
255            nir_dest *ndst, const nir_dest *dst)
256{
257   ndst->is_ssa = dst->is_ssa;
258   if (dst->is_ssa) {
259      nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components,
260                        dst->ssa.bit_size, NULL);
261      if (likely(state->remap_table))
262         add_remap(state, &ndst->ssa, &dst->ssa);
263   } else {
264      ndst->reg.reg = remap_reg(state, dst->reg.reg);
265      if (dst->reg.indirect) {
266         ndst->reg.indirect = malloc(sizeof(nir_src));
267         __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
268      }
269      ndst->reg.base_offset = dst->reg.base_offset;
270   }
271}
272
273static nir_alu_instr *
274clone_alu(clone_state *state, const nir_alu_instr *alu)
275{
276   nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
277   nalu->exact = alu->exact;
278   nalu->no_signed_wrap = alu->no_signed_wrap;
279   nalu->no_unsigned_wrap = alu->no_unsigned_wrap;
280
281   __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
282   nalu->dest.saturate = alu->dest.saturate;
283   nalu->dest.write_mask = alu->dest.write_mask;
284
285   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
286      __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
287      nalu->src[i].negate = alu->src[i].negate;
288      nalu->src[i].abs = alu->src[i].abs;
289      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
290             sizeof(nalu->src[i].swizzle));
291   }
292
293   return nalu;
294}
295
296nir_alu_instr *
297nir_alu_instr_clone(nir_shader *shader, const nir_alu_instr *orig)
298{
299   clone_state state = {
300      .allow_remap_fallback = true,
301      .ns = shader,
302   };
303   return clone_alu(&state, orig);
304}
305
306static nir_deref_instr *
307clone_deref_instr(clone_state *state, const nir_deref_instr *deref)
308{
309   nir_deref_instr *nderef =
310      nir_deref_instr_create(state->ns, deref->deref_type);
311
312   __clone_dst(state, &nderef->instr, &nderef->dest, &deref->dest);
313
314   nderef->modes = deref->modes;
315   nderef->type = deref->type;
316
317   if (deref->deref_type == nir_deref_type_var) {
318      nderef->var = remap_var(state, deref->var);
319      return nderef;
320   }
321
322   __clone_src(state, &nderef->instr, &nderef->parent, &deref->parent);
323
324   switch (deref->deref_type) {
325   case nir_deref_type_struct:
326      nderef->strct.index = deref->strct.index;
327      break;
328
329   case nir_deref_type_array:
330   case nir_deref_type_ptr_as_array:
331      __clone_src(state, &nderef->instr,
332                  &nderef->arr.index, &deref->arr.index);
333      break;
334
335   case nir_deref_type_array_wildcard:
336      /* Nothing to do */
337      break;
338
339   case nir_deref_type_cast:
340      nderef->cast.ptr_stride = deref->cast.ptr_stride;
341      nderef->cast.align_mul = deref->cast.align_mul;
342      nderef->cast.align_offset = deref->cast.align_offset;
343      break;
344
345   default:
346      unreachable("Invalid instruction deref type");
347   }
348
349   return nderef;
350}
351
352static nir_intrinsic_instr *
353clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
354{
355   nir_intrinsic_instr *nitr =
356      nir_intrinsic_instr_create(state->ns, itr->intrinsic);
357
358   unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
359
360   if (nir_intrinsic_infos[itr->intrinsic].has_dest)
361      __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
362
363   nitr->num_components = itr->num_components;
364   memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
365
366   for (unsigned i = 0; i < num_srcs; i++)
367      __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
368
369   return nitr;
370}
371
372static nir_load_const_instr *
373clone_load_const(clone_state *state, const nir_load_const_instr *lc)
374{
375   nir_load_const_instr *nlc =
376      nir_load_const_instr_create(state->ns, lc->def.num_components,
377                                  lc->def.bit_size);
378
379   memcpy(&nlc->value, &lc->value, sizeof(*nlc->value) * lc->def.num_components);
380
381   add_remap(state, &nlc->def, &lc->def);
382
383   return nlc;
384}
385
386static nir_ssa_undef_instr *
387clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
388{
389   nir_ssa_undef_instr *nsa =
390      nir_ssa_undef_instr_create(state->ns, sa->def.num_components,
391                                 sa->def.bit_size);
392
393   add_remap(state, &nsa->def, &sa->def);
394
395   return nsa;
396}
397
398static nir_tex_instr *
399clone_tex(clone_state *state, const nir_tex_instr *tex)
400{
401   nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
402
403   ntex->sampler_dim = tex->sampler_dim;
404   ntex->dest_type = tex->dest_type;
405   ntex->op = tex->op;
406   __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
407   for (unsigned i = 0; i < ntex->num_srcs; i++) {
408      ntex->src[i].src_type = tex->src[i].src_type;
409      __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
410   }
411   ntex->coord_components = tex->coord_components;
412   ntex->is_array = tex->is_array;
413   ntex->array_is_lowered_cube = tex->array_is_lowered_cube;
414   ntex->is_shadow = tex->is_shadow;
415   ntex->is_new_style_shadow = tex->is_new_style_shadow;
416   ntex->is_sparse = tex->is_sparse;
417   ntex->component = tex->component;
418   memcpy(ntex->tg4_offsets, tex->tg4_offsets, sizeof(tex->tg4_offsets));
419
420   ntex->texture_index = tex->texture_index;
421   ntex->sampler_index = tex->sampler_index;
422
423   ntex->texture_non_uniform = tex->texture_non_uniform;
424   ntex->sampler_non_uniform = tex->sampler_non_uniform;
425
426   return ntex;
427}
428
429static nir_phi_instr *
430clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
431{
432   nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
433
434   __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
435
436   /* Cloning a phi node is a bit different from other instructions.  The
437    * sources of phi instructions are the only time where we can use an SSA
438    * def before it is defined.  In order to handle this, we just copy over
439    * the sources from the old phi instruction directly and then fix them up
440    * in a second pass once all the instrutions in the function have been
441    * properly cloned.
442    *
443    * In order to ensure that the copied sources (which are the same as the
444    * old phi instruction's sources for now) don't get inserted into the old
445    * shader's use-def lists, we have to add the phi instruction *before* we
446    * set up its sources.
447    */
448   nir_instr_insert_after_block(nblk, &nphi->instr);
449
450   foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
451      nir_phi_src *nsrc = nir_phi_instr_add_src(nphi, src->pred, src->src);
452
453      /* Stash it in the list of phi sources.  We'll walk this list and fix up
454       * sources at the very end of clone_function_impl.
455       */
456      list_add(&nsrc->src.use_link, &state->phi_srcs);
457   }
458
459   return nphi;
460}
461
462static nir_jump_instr *
463clone_jump(clone_state *state, const nir_jump_instr *jmp)
464{
465   /* These aren't handled because they require special block linking */
466   assert(jmp->type != nir_jump_goto && jmp->type != nir_jump_goto_if);
467
468   nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
469
470   return njmp;
471}
472
473static nir_call_instr *
474clone_call(clone_state *state, const nir_call_instr *call)
475{
476   nir_function *ncallee = remap_global(state, call->callee);
477   nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
478
479   for (unsigned i = 0; i < ncall->num_params; i++)
480      __clone_src(state, ncall, &ncall->params[i], &call->params[i]);
481
482   return ncall;
483}
484
485static nir_instr *
486clone_instr(clone_state *state, const nir_instr *instr)
487{
488   switch (instr->type) {
489   case nir_instr_type_alu:
490      return &clone_alu(state, nir_instr_as_alu(instr))->instr;
491   case nir_instr_type_deref:
492      return &clone_deref_instr(state, nir_instr_as_deref(instr))->instr;
493   case nir_instr_type_intrinsic:
494      return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
495   case nir_instr_type_load_const:
496      return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
497   case nir_instr_type_ssa_undef:
498      return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
499   case nir_instr_type_tex:
500      return &clone_tex(state, nir_instr_as_tex(instr))->instr;
501   case nir_instr_type_phi:
502      unreachable("Cannot clone phis with clone_instr");
503   case nir_instr_type_jump:
504      return &clone_jump(state, nir_instr_as_jump(instr))->instr;
505   case nir_instr_type_call:
506      return &clone_call(state, nir_instr_as_call(instr))->instr;
507   case nir_instr_type_parallel_copy:
508      unreachable("Cannot clone parallel copies");
509   default:
510      unreachable("bad instr type");
511      return NULL;
512   }
513}
514
515nir_instr *
516nir_instr_clone(nir_shader *shader, const nir_instr *orig)
517{
518   clone_state state = {
519      .allow_remap_fallback = true,
520      .ns = shader,
521   };
522   return clone_instr(&state, orig);
523}
524
525static nir_block *
526clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
527{
528   /* Don't actually create a new block.  Just use the one from the tail of
529    * the list.  NIR guarantees that the tail of the list is a block and that
530    * no two blocks are side-by-side in the IR;  It should be empty.
531    */
532   nir_block *nblk =
533      exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
534   assert(nblk->cf_node.type == nir_cf_node_block);
535   assert(exec_list_is_empty(&nblk->instr_list));
536
537   /* We need this for phi sources */
538   add_remap(state, nblk, blk);
539
540   nir_foreach_instr(instr, blk) {
541      if (instr->type == nir_instr_type_phi) {
542         /* Phi instructions are a bit of a special case when cloning because
543          * we don't want inserting the instruction to automatically handle
544          * use/defs for us.  Instead, we need to wait until all the
545          * blocks/instructions are in so that we can set their sources up.
546          */
547         clone_phi(state, nir_instr_as_phi(instr), nblk);
548      } else {
549         nir_instr *ninstr = clone_instr(state, instr);
550         nir_instr_insert_after_block(nblk, ninstr);
551      }
552   }
553
554   return nblk;
555}
556
557static void
558clone_cf_list(clone_state *state, struct exec_list *dst,
559              const struct exec_list *list);
560
561static nir_if *
562clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
563{
564   nir_if *ni = nir_if_create(state->ns);
565   ni->control = i->control;
566
567   __clone_src(state, ni, &ni->condition, &i->condition);
568
569   nir_cf_node_insert_end(cf_list, &ni->cf_node);
570
571   clone_cf_list(state, &ni->then_list, &i->then_list);
572   clone_cf_list(state, &ni->else_list, &i->else_list);
573
574   return ni;
575}
576
577static nir_loop *
578clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
579{
580   nir_loop *nloop = nir_loop_create(state->ns);
581   nloop->control = loop->control;
582   nloop->partially_unrolled = loop->partially_unrolled;
583
584   nir_cf_node_insert_end(cf_list, &nloop->cf_node);
585
586   clone_cf_list(state, &nloop->body, &loop->body);
587
588   return nloop;
589}
590
591/* clone list of nir_cf_node: */
592static void
593clone_cf_list(clone_state *state, struct exec_list *dst,
594              const struct exec_list *list)
595{
596   foreach_list_typed(nir_cf_node, cf, node, list) {
597      switch (cf->type) {
598      case nir_cf_node_block:
599         clone_block(state, dst, nir_cf_node_as_block(cf));
600         break;
601      case nir_cf_node_if:
602         clone_if(state, dst, nir_cf_node_as_if(cf));
603         break;
604      case nir_cf_node_loop:
605         clone_loop(state, dst, nir_cf_node_as_loop(cf));
606         break;
607      default:
608         unreachable("bad cf type");
609      }
610   }
611}
612
613/* After we've cloned almost everything, we have to walk the list of phi
614 * sources and fix them up.  Thanks to loops, the block and SSA value for a
615 * phi source may not be defined when we first encounter it.  Instead, we
616 * add it to the phi_srcs list and we fix it up here.
617 */
618static void
619fixup_phi_srcs(clone_state *state)
620{
621   list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
622      src->pred = remap_local(state, src->pred);
623
624      /* Remove from this list */
625      list_del(&src->src.use_link);
626
627      if (src->src.is_ssa) {
628         src->src.ssa = remap_local(state, src->src.ssa);
629         list_addtail(&src->src.use_link, &src->src.ssa->uses);
630      } else {
631         src->src.reg.reg = remap_reg(state, src->src.reg.reg);
632         list_addtail(&src->src.use_link, &src->src.reg.reg->uses);
633      }
634   }
635   assert(list_is_empty(&state->phi_srcs));
636}
637
638void
639nir_cf_list_clone(nir_cf_list *dst, nir_cf_list *src, nir_cf_node *parent,
640                  struct hash_table *remap_table)
641{
642   exec_list_make_empty(&dst->list);
643   dst->impl = src->impl;
644
645   if (exec_list_is_empty(&src->list))
646      return;
647
648   clone_state state;
649   init_clone_state(&state, remap_table, false, true);
650
651   /* We use the same shader */
652   state.ns = src->impl->function->shader;
653
654   /* The control-flow code assumes that the list of cf_nodes always starts
655    * and ends with a block.  We start by adding an empty block.
656    */
657   nir_block *nblk = nir_block_create(state.ns);
658   nblk->cf_node.parent = parent;
659   exec_list_push_tail(&dst->list, &nblk->cf_node.node);
660
661   clone_cf_list(&state, &dst->list, &src->list);
662
663   fixup_phi_srcs(&state);
664
665   if (!remap_table)
666      free_clone_state(&state);
667}
668
669static nir_function_impl *
670clone_function_impl(clone_state *state, const nir_function_impl *fi)
671{
672   nir_function_impl *nfi = nir_function_impl_create_bare(state->ns);
673
674   clone_var_list(state, &nfi->locals, &fi->locals);
675   clone_reg_list(state, &nfi->registers, &fi->registers);
676   nfi->reg_alloc = fi->reg_alloc;
677
678   assert(list_is_empty(&state->phi_srcs));
679
680   clone_cf_list(state, &nfi->body, &fi->body);
681
682   fixup_phi_srcs(state);
683
684   /* All metadata is invalidated in the cloning process */
685   nfi->valid_metadata = 0;
686
687   return nfi;
688}
689
690nir_function_impl *
691nir_function_impl_clone(nir_shader *shader, const nir_function_impl *fi)
692{
693   clone_state state;
694   init_clone_state(&state, NULL, false, false);
695
696   state.ns = shader;
697
698   nir_function_impl *nfi = clone_function_impl(&state, fi);
699
700   free_clone_state(&state);
701
702   return nfi;
703}
704
705static nir_function *
706clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
707{
708   assert(ns == state->ns);
709   nir_function *nfxn = nir_function_create(ns, fxn->name);
710
711   /* Needed for call instructions */
712   add_remap(state, nfxn, fxn);
713
714   nfxn->num_params = fxn->num_params;
715   if (fxn->num_params) {
716           nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params);
717           memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params);
718   }
719   nfxn->is_entrypoint = fxn->is_entrypoint;
720
721   /* At first glance, it looks like we should clone the function_impl here.
722    * However, call instructions need to be able to reference at least the
723    * function and those will get processed as we clone the function_impls.
724    * We stop here and do function_impls as a second pass.
725    */
726
727   return nfxn;
728}
729
730nir_shader *
731nir_shader_clone(void *mem_ctx, const nir_shader *s)
732{
733   clone_state state;
734   init_clone_state(&state, NULL, true, false);
735
736   nir_shader *ns = nir_shader_create(mem_ctx, s->info.stage, s->options, NULL);
737   state.ns = ns;
738
739   clone_var_list(&state, &ns->variables, &s->variables);
740
741   /* Go through and clone functions */
742   foreach_list_typed(nir_function, fxn, node, &s->functions)
743      clone_function(&state, fxn, ns);
744
745   /* Only after all functions are cloned can we clone the actual function
746    * implementations.  This is because nir_call_instrs need to reference the
747    * functions of other functions and we don't know what order the functions
748    * will have in the list.
749    */
750   nir_foreach_function(fxn, s) {
751      nir_function *nfxn = remap_global(&state, fxn);
752      nfxn->impl = clone_function_impl(&state, fxn->impl);
753      nfxn->impl->function = nfxn;
754   }
755
756   ns->info = s->info;
757   ns->info.name = ralloc_strdup(ns, ns->info.name);
758   if (ns->info.label)
759      ns->info.label = ralloc_strdup(ns, ns->info.label);
760
761   ns->num_inputs = s->num_inputs;
762   ns->num_uniforms = s->num_uniforms;
763   ns->num_outputs = s->num_outputs;
764   ns->scratch_size = s->scratch_size;
765
766   ns->constant_data_size = s->constant_data_size;
767   if (s->constant_data_size > 0) {
768      ns->constant_data = ralloc_size(ns, s->constant_data_size);
769      memcpy(ns->constant_data, s->constant_data, s->constant_data_size);
770   }
771
772   free_clone_state(&state);
773
774   return ns;
775}
776
777/** Overwrites dst and replaces its contents with src
778 *
779 * Everything ralloc parented to dst and src itself (but not its children)
780 * will be freed.
781 *
782 * This should only be used by test code which needs to swap out shaders with
783 * a cloned or deserialized version.
784 */
785void
786nir_shader_replace(nir_shader *dst, nir_shader *src)
787{
788   /* Delete all of dest's ralloc children */
789   void *dead_ctx = ralloc_context(NULL);
790   ralloc_adopt(dead_ctx, dst);
791   ralloc_free(dead_ctx);
792
793   list_for_each_entry_safe(nir_instr, instr, &dst->gc_list, gc_node) {
794      nir_instr_free(instr);
795   }
796
797   /* Re-parent all of src's ralloc children to dst */
798   ralloc_adopt(dst, src);
799
800   memcpy(dst, src, sizeof(*dst));
801
802   /* We have to move all the linked lists over separately because we need the
803    * pointers in the list elements to point to the lists in dst and not src.
804    */
805   list_replace(&src->gc_list, &dst->gc_list);
806   list_inithead(&src->gc_list);
807   exec_list_move_nodes_to(&src->variables, &dst->variables);
808
809   /* Now move the functions over.  This takes a tiny bit more work */
810   exec_list_move_nodes_to(&src->functions, &dst->functions);
811   nir_foreach_function(function, dst)
812      function->shader = dst;
813
814   ralloc_free(src);
815}
816