1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015 Broadcom
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "nir.h"
25b8e80941Smrg#include "nir_builder.h"
26b8e80941Smrg
27b8e80941Smrg/** @file nir_opt_undef.c
28b8e80941Smrg *
29b8e80941Smrg * Handles optimization of operations involving ssa_undef.
30b8e80941Smrg */
31b8e80941Smrg
32b8e80941Smrg/**
33b8e80941Smrg * Turn conditional selects between an undef and some other value into a move
34b8e80941Smrg * of that other value (on the assumption that the condition's going to be
35b8e80941Smrg * choosing the defined value).  This reduces work after if flattening when
36b8e80941Smrg * each side of the if is defining a variable.
37b8e80941Smrg */
38b8e80941Smrgstatic bool
39b8e80941Smrgopt_undef_csel(nir_alu_instr *instr)
40b8e80941Smrg{
41b8e80941Smrg   if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel)
42b8e80941Smrg      return false;
43b8e80941Smrg
44b8e80941Smrg   assert(instr->dest.dest.is_ssa);
45b8e80941Smrg
46b8e80941Smrg   for (int i = 1; i <= 2; i++) {
47b8e80941Smrg      if (!instr->src[i].src.is_ssa)
48b8e80941Smrg         continue;
49b8e80941Smrg
50b8e80941Smrg      nir_instr *parent = instr->src[i].src.ssa->parent_instr;
51b8e80941Smrg      if (parent->type != nir_instr_type_ssa_undef)
52b8e80941Smrg         continue;
53b8e80941Smrg
54b8e80941Smrg      /* We can't just use nir_alu_src_copy, because we need the def/use
55b8e80941Smrg       * updated.
56b8e80941Smrg       */
57b8e80941Smrg      nir_instr_rewrite_src(&instr->instr, &instr->src[0].src,
58b8e80941Smrg                            instr->src[i == 1 ? 2 : 1].src);
59b8e80941Smrg      nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1],
60b8e80941Smrg                       ralloc_parent(instr));
61b8e80941Smrg
62b8e80941Smrg      nir_src empty_src;
63b8e80941Smrg      memset(&empty_src, 0, sizeof(empty_src));
64b8e80941Smrg      nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src);
65b8e80941Smrg      nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src);
66b8e80941Smrg      instr->op = nir_op_imov;
67b8e80941Smrg
68b8e80941Smrg      return true;
69b8e80941Smrg   }
70b8e80941Smrg
71b8e80941Smrg   return false;
72b8e80941Smrg}
73b8e80941Smrg
74b8e80941Smrg/**
75b8e80941Smrg * Replace vecN(undef, undef, ...) with a single undef.
76b8e80941Smrg */
77b8e80941Smrgstatic bool
78b8e80941Smrgopt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
79b8e80941Smrg{
80b8e80941Smrg   if (alu->op != nir_op_vec2 &&
81b8e80941Smrg       alu->op != nir_op_vec3 &&
82b8e80941Smrg       alu->op != nir_op_vec4 &&
83b8e80941Smrg       alu->op != nir_op_fmov &&
84b8e80941Smrg       alu->op != nir_op_imov)
85b8e80941Smrg      return false;
86b8e80941Smrg
87b8e80941Smrg   assert(alu->dest.dest.is_ssa);
88b8e80941Smrg
89b8e80941Smrg   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
90b8e80941Smrg      if (!alu->src[i].src.is_ssa ||
91b8e80941Smrg          alu->src[i].src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
92b8e80941Smrg         return false;
93b8e80941Smrg   }
94b8e80941Smrg
95b8e80941Smrg   b->cursor = nir_before_instr(&alu->instr);
96b8e80941Smrg   nir_ssa_def *undef = nir_ssa_undef(b, alu->dest.dest.ssa.num_components,
97b8e80941Smrg                                      nir_dest_bit_size(alu->dest.dest));
98b8e80941Smrg   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(undef));
99b8e80941Smrg
100b8e80941Smrg   return true;
101b8e80941Smrg}
102b8e80941Smrg
103b8e80941Smrg/**
104b8e80941Smrg * Remove any store intrinsics whose value is undefined (the existing
105b8e80941Smrg * value is a fine representation of "undefined").
106b8e80941Smrg */
107b8e80941Smrgstatic bool
108b8e80941Smrgopt_undef_store(nir_intrinsic_instr *intrin)
109b8e80941Smrg{
110b8e80941Smrg   int arg_index;
111b8e80941Smrg   switch (intrin->intrinsic) {
112b8e80941Smrg   case nir_intrinsic_store_deref:
113b8e80941Smrg      arg_index = 1;
114b8e80941Smrg      break;
115b8e80941Smrg   case nir_intrinsic_store_output:
116b8e80941Smrg   case nir_intrinsic_store_per_vertex_output:
117b8e80941Smrg   case nir_intrinsic_store_ssbo:
118b8e80941Smrg   case nir_intrinsic_store_shared:
119b8e80941Smrg      arg_index =  0;
120b8e80941Smrg      break;
121b8e80941Smrg   default:
122b8e80941Smrg      return false;
123b8e80941Smrg   }
124b8e80941Smrg
125b8e80941Smrg   if (!intrin->src[arg_index].is_ssa ||
126b8e80941Smrg       intrin->src[arg_index].ssa->parent_instr->type != nir_instr_type_ssa_undef)
127b8e80941Smrg      return false;
128b8e80941Smrg
129b8e80941Smrg   nir_instr_remove(&intrin->instr);
130b8e80941Smrg
131b8e80941Smrg   return true;
132b8e80941Smrg}
133b8e80941Smrg
134b8e80941Smrgbool
135b8e80941Smrgnir_opt_undef(nir_shader *shader)
136b8e80941Smrg{
137b8e80941Smrg   nir_builder b;
138b8e80941Smrg   bool progress = false;
139b8e80941Smrg
140b8e80941Smrg   nir_foreach_function(function, shader) {
141b8e80941Smrg      if (function->impl) {
142b8e80941Smrg         nir_builder_init(&b, function->impl);
143b8e80941Smrg         nir_foreach_block(block, function->impl) {
144b8e80941Smrg            nir_foreach_instr_safe(instr, block) {
145b8e80941Smrg               if (instr->type == nir_instr_type_alu) {
146b8e80941Smrg                  nir_alu_instr *alu = nir_instr_as_alu(instr);
147b8e80941Smrg
148b8e80941Smrg                  progress = opt_undef_csel(alu) || progress;
149b8e80941Smrg                  progress = opt_undef_vecN(&b, alu) || progress;
150b8e80941Smrg               } else if (instr->type == nir_instr_type_intrinsic) {
151b8e80941Smrg                  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
152b8e80941Smrg                  progress = opt_undef_store(intrin) || progress;
153b8e80941Smrg               }
154b8e80941Smrg            }
155b8e80941Smrg         }
156b8e80941Smrg
157b8e80941Smrg         if (progress) {
158b8e80941Smrg            nir_metadata_preserve(function->impl,
159b8e80941Smrg                                  nir_metadata_block_index |
160b8e80941Smrg                                  nir_metadata_dominance);
161b8e80941Smrg         } else {
162b8e80941Smrg#ifndef NDEBUG
163b8e80941Smrg            function->impl->valid_metadata &= ~nir_metadata_not_properly_reset;
164b8e80941Smrg#endif
165b8e80941Smrg         }
166b8e80941Smrg      }
167b8e80941Smrg   }
168b8e80941Smrg
169b8e80941Smrg   return progress;
170b8e80941Smrg}
171