101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2014 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * Authors:
2401e04c3fSmrg *    Jason Ekstrand (jason@jlekstrand.net)
2501e04c3fSmrg *
2601e04c3fSmrg */
2701e04c3fSmrg
2801e04c3fSmrg#include <inttypes.h>
2901e04c3fSmrg#include "nir_search.h"
3001e04c3fSmrg#include "nir_builder.h"
317ec681f3Smrg#include "nir_worklist.h"
3201e04c3fSmrg#include "util/half_float.h"
3301e04c3fSmrg
347ec681f3Smrg/* This should be the same as nir_search_max_comm_ops in nir_algebraic.py. */
357ec681f3Smrg#define NIR_SEARCH_MAX_COMM_OPS 8
367e102996Smaya
3701e04c3fSmrgstruct match_state {
3801e04c3fSmrg   bool inexact_match;
3901e04c3fSmrg   bool has_exact_alu;
407e102996Smaya   uint8_t comm_op_direction;
4101e04c3fSmrg   unsigned variables_seen;
427ec681f3Smrg
437ec681f3Smrg   /* Used for running the automaton on newly-constructed instructions. */
447ec681f3Smrg   struct util_dynarray *states;
457ec681f3Smrg   const struct per_op_table *pass_op_table;
467ec681f3Smrg
4701e04c3fSmrg   nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES];
487ec681f3Smrg   struct hash_table *range_ht;
4901e04c3fSmrg};
5001e04c3fSmrg
5101e04c3fSmrgstatic bool
5201e04c3fSmrgmatch_expression(const nir_search_expression *expr, nir_alu_instr *instr,
5301e04c3fSmrg                 unsigned num_components, const uint8_t *swizzle,
5401e04c3fSmrg                 struct match_state *state);
557ec681f3Smrgstatic bool
567ec681f3Smrgnir_algebraic_automaton(nir_instr *instr, struct util_dynarray *states,
577ec681f3Smrg                        const struct per_op_table *pass_op_table);
5801e04c3fSmrg
597ec681f3Smrgstatic const uint8_t identity_swizzle[NIR_MAX_VEC_COMPONENTS] =
607ec681f3Smrg{
617ec681f3Smrg    0,  1,  2,  3,
627ec681f3Smrg    4,  5,  6,  7,
637ec681f3Smrg    8,  9, 10, 11,
647ec681f3Smrg   12, 13, 14, 15,
657ec681f3Smrg};
6601e04c3fSmrg
6701e04c3fSmrg/**
6801e04c3fSmrg * Check if a source produces a value of the given type.
6901e04c3fSmrg *
7001e04c3fSmrg * Used for satisfying 'a@type' constraints.
7101e04c3fSmrg */
7201e04c3fSmrgstatic bool
7301e04c3fSmrgsrc_is_type(nir_src src, nir_alu_type type)
7401e04c3fSmrg{
7501e04c3fSmrg   assert(type != nir_type_invalid);
7601e04c3fSmrg
7701e04c3fSmrg   if (!src.is_ssa)
7801e04c3fSmrg      return false;
7901e04c3fSmrg
8001e04c3fSmrg   if (src.ssa->parent_instr->type == nir_instr_type_alu) {
8101e04c3fSmrg      nir_alu_instr *src_alu = nir_instr_as_alu(src.ssa->parent_instr);
8201e04c3fSmrg      nir_alu_type output_type = nir_op_infos[src_alu->op].output_type;
8301e04c3fSmrg
8401e04c3fSmrg      if (type == nir_type_bool) {
8501e04c3fSmrg         switch (src_alu->op) {
8601e04c3fSmrg         case nir_op_iand:
8701e04c3fSmrg         case nir_op_ior:
8801e04c3fSmrg         case nir_op_ixor:
8901e04c3fSmrg            return src_is_type(src_alu->src[0].src, nir_type_bool) &&
9001e04c3fSmrg                   src_is_type(src_alu->src[1].src, nir_type_bool);
9101e04c3fSmrg         case nir_op_inot:
9201e04c3fSmrg            return src_is_type(src_alu->src[0].src, nir_type_bool);
9301e04c3fSmrg         default:
9401e04c3fSmrg            break;
9501e04c3fSmrg         }
9601e04c3fSmrg      }
9701e04c3fSmrg
9801e04c3fSmrg      return nir_alu_type_get_base_type(output_type) == type;
9901e04c3fSmrg   } else if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
10001e04c3fSmrg      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
10101e04c3fSmrg
10201e04c3fSmrg      if (type == nir_type_bool) {
10301e04c3fSmrg         return intr->intrinsic == nir_intrinsic_load_front_face ||
10401e04c3fSmrg                intr->intrinsic == nir_intrinsic_load_helper_invocation;
10501e04c3fSmrg      }
10601e04c3fSmrg   }
10701e04c3fSmrg
10801e04c3fSmrg   /* don't know */
10901e04c3fSmrg   return false;
11001e04c3fSmrg}
11101e04c3fSmrg
1127e102996Smayastatic bool
1137e102996Smayanir_op_matches_search_op(nir_op nop, uint16_t sop)
1147e102996Smaya{
1157e102996Smaya   if (sop <= nir_last_opcode)
1167e102996Smaya      return nop == sop;
1177e102996Smaya
1187e102996Smaya#define MATCH_FCONV_CASE(op) \
1197e102996Smaya   case nir_search_op_##op: \
1207e102996Smaya      return nop == nir_op_##op##16 || \
1217e102996Smaya             nop == nir_op_##op##32 || \
1227e102996Smaya             nop == nir_op_##op##64;
1237e102996Smaya
1247e102996Smaya#define MATCH_ICONV_CASE(op) \
1257e102996Smaya   case nir_search_op_##op: \
1267e102996Smaya      return nop == nir_op_##op##8 || \
1277e102996Smaya             nop == nir_op_##op##16 || \
1287e102996Smaya             nop == nir_op_##op##32 || \
1297e102996Smaya             nop == nir_op_##op##64;
1307e102996Smaya
1317e102996Smaya#define MATCH_BCONV_CASE(op) \
1327e102996Smaya   case nir_search_op_##op: \
1337e102996Smaya      return nop == nir_op_##op##1 || \
1347e102996Smaya             nop == nir_op_##op##32;
1357e102996Smaya
1367e102996Smaya   switch (sop) {
1377e102996Smaya   MATCH_FCONV_CASE(i2f)
1387e102996Smaya   MATCH_FCONV_CASE(u2f)
1397e102996Smaya   MATCH_FCONV_CASE(f2f)
1407e102996Smaya   MATCH_ICONV_CASE(f2u)
1417e102996Smaya   MATCH_ICONV_CASE(f2i)
1427e102996Smaya   MATCH_ICONV_CASE(u2u)
1437e102996Smaya   MATCH_ICONV_CASE(i2i)
1447e102996Smaya   MATCH_FCONV_CASE(b2f)
1457e102996Smaya   MATCH_ICONV_CASE(b2i)
1467e102996Smaya   MATCH_BCONV_CASE(i2b)
1477e102996Smaya   MATCH_BCONV_CASE(f2b)
1487e102996Smaya   default:
1497e102996Smaya      unreachable("Invalid nir_search_op");
1507e102996Smaya   }
1517e102996Smaya
1527e102996Smaya#undef MATCH_FCONV_CASE
1537e102996Smaya#undef MATCH_ICONV_CASE
1547e102996Smaya#undef MATCH_BCONV_CASE
1557e102996Smaya}
1567e102996Smaya
1577e102996Smayauint16_t
1587e102996Smayanir_search_op_for_nir_op(nir_op nop)
1597e102996Smaya{
1607e102996Smaya#define MATCH_FCONV_CASE(op) \
1617e102996Smaya   case nir_op_##op##16: \
1627e102996Smaya   case nir_op_##op##32: \
1637e102996Smaya   case nir_op_##op##64: \
1647e102996Smaya      return nir_search_op_##op;
1657e102996Smaya
1667e102996Smaya#define MATCH_ICONV_CASE(op) \
1677e102996Smaya   case nir_op_##op##8: \
1687e102996Smaya   case nir_op_##op##16: \
1697e102996Smaya   case nir_op_##op##32: \
1707e102996Smaya   case nir_op_##op##64: \
1717e102996Smaya      return nir_search_op_##op;
1727e102996Smaya
1737e102996Smaya#define MATCH_BCONV_CASE(op) \
1747e102996Smaya   case nir_op_##op##1: \
1757e102996Smaya   case nir_op_##op##32: \
1767e102996Smaya      return nir_search_op_##op;
1777e102996Smaya
1787e102996Smaya
1797e102996Smaya   switch (nop) {
1807e102996Smaya   MATCH_FCONV_CASE(i2f)
1817e102996Smaya   MATCH_FCONV_CASE(u2f)
1827e102996Smaya   MATCH_FCONV_CASE(f2f)
1837e102996Smaya   MATCH_ICONV_CASE(f2u)
1847e102996Smaya   MATCH_ICONV_CASE(f2i)
1857e102996Smaya   MATCH_ICONV_CASE(u2u)
1867e102996Smaya   MATCH_ICONV_CASE(i2i)
1877e102996Smaya   MATCH_FCONV_CASE(b2f)
1887e102996Smaya   MATCH_ICONV_CASE(b2i)
1897e102996Smaya   MATCH_BCONV_CASE(i2b)
1907e102996Smaya   MATCH_BCONV_CASE(f2b)
1917e102996Smaya   default:
1927e102996Smaya      return nop;
1937e102996Smaya   }
1947e102996Smaya
1957e102996Smaya#undef MATCH_FCONV_CASE
1967e102996Smaya#undef MATCH_ICONV_CASE
1977e102996Smaya#undef MATCH_BCONV_CASE
1987e102996Smaya}
1997e102996Smaya
2007e102996Smayastatic nir_op
2017e102996Smayanir_op_for_search_op(uint16_t sop, unsigned bit_size)
2027e102996Smaya{
2037e102996Smaya   if (sop <= nir_last_opcode)
2047e102996Smaya      return sop;
2057e102996Smaya
2067e102996Smaya#define RET_FCONV_CASE(op) \
2077e102996Smaya   case nir_search_op_##op: \
2087e102996Smaya      switch (bit_size) { \
2097e102996Smaya      case 16: return nir_op_##op##16; \
2107e102996Smaya      case 32: return nir_op_##op##32; \
2117e102996Smaya      case 64: return nir_op_##op##64; \
2127e102996Smaya      default: unreachable("Invalid bit size"); \
2137e102996Smaya      }
2147e102996Smaya
2157e102996Smaya#define RET_ICONV_CASE(op) \
2167e102996Smaya   case nir_search_op_##op: \
2177e102996Smaya      switch (bit_size) { \
2187e102996Smaya      case 8:  return nir_op_##op##8; \
2197e102996Smaya      case 16: return nir_op_##op##16; \
2207e102996Smaya      case 32: return nir_op_##op##32; \
2217e102996Smaya      case 64: return nir_op_##op##64; \
2227e102996Smaya      default: unreachable("Invalid bit size"); \
2237e102996Smaya      }
2247e102996Smaya
2257e102996Smaya#define RET_BCONV_CASE(op) \
2267e102996Smaya   case nir_search_op_##op: \
2277e102996Smaya      switch (bit_size) { \
2287e102996Smaya      case 1: return nir_op_##op##1; \
2297e102996Smaya      case 32: return nir_op_##op##32; \
2307e102996Smaya      default: unreachable("Invalid bit size"); \
2317e102996Smaya      }
2327e102996Smaya
2337e102996Smaya   switch (sop) {
2347e102996Smaya   RET_FCONV_CASE(i2f)
2357e102996Smaya   RET_FCONV_CASE(u2f)
2367e102996Smaya   RET_FCONV_CASE(f2f)
2377e102996Smaya   RET_ICONV_CASE(f2u)
2387e102996Smaya   RET_ICONV_CASE(f2i)
2397e102996Smaya   RET_ICONV_CASE(u2u)
2407e102996Smaya   RET_ICONV_CASE(i2i)
2417e102996Smaya   RET_FCONV_CASE(b2f)
2427e102996Smaya   RET_ICONV_CASE(b2i)
2437e102996Smaya   RET_BCONV_CASE(i2b)
2447e102996Smaya   RET_BCONV_CASE(f2b)
2457e102996Smaya   default:
2467e102996Smaya      unreachable("Invalid nir_search_op");
2477e102996Smaya   }
2487e102996Smaya
2497e102996Smaya#undef RET_FCONV_CASE
2507e102996Smaya#undef RET_ICONV_CASE
2517e102996Smaya#undef RET_BCONV_CASE
2527e102996Smaya}
2537e102996Smaya
25401e04c3fSmrgstatic bool
25501e04c3fSmrgmatch_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
25601e04c3fSmrg            unsigned num_components, const uint8_t *swizzle,
25701e04c3fSmrg            struct match_state *state)
25801e04c3fSmrg{
25901e04c3fSmrg   uint8_t new_swizzle[NIR_MAX_VEC_COMPONENTS];
26001e04c3fSmrg
26101e04c3fSmrg   /* Searching only works on SSA values because, if it's not SSA, we can't
26201e04c3fSmrg    * know if the value changed between one instance of that value in the
26301e04c3fSmrg    * expression and another.  Also, the replace operation will place reads of
26401e04c3fSmrg    * that value right before the last instruction in the expression we're
26501e04c3fSmrg    * replacing so those reads will happen after the original reads and may
26601e04c3fSmrg    * not be valid if they're register reads.
26701e04c3fSmrg    */
2687e102996Smaya   assert(instr->src[src].src.is_ssa);
26901e04c3fSmrg
27001e04c3fSmrg   /* If the source is an explicitly sized source, then we need to reset
27101e04c3fSmrg    * both the number of components and the swizzle.
27201e04c3fSmrg    */
27301e04c3fSmrg   if (nir_op_infos[instr->op].input_sizes[src] != 0) {
27401e04c3fSmrg      num_components = nir_op_infos[instr->op].input_sizes[src];
27501e04c3fSmrg      swizzle = identity_swizzle;
27601e04c3fSmrg   }
27701e04c3fSmrg
27801e04c3fSmrg   for (unsigned i = 0; i < num_components; ++i)
27901e04c3fSmrg      new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
28001e04c3fSmrg
28101e04c3fSmrg   /* If the value has a specific bit size and it doesn't match, bail */
2827e102996Smaya   if (value->bit_size > 0 &&
28301e04c3fSmrg       nir_src_bit_size(instr->src[src].src) != value->bit_size)
28401e04c3fSmrg      return false;
28501e04c3fSmrg
28601e04c3fSmrg   switch (value->type) {
28701e04c3fSmrg   case nir_search_value_expression:
28801e04c3fSmrg      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
28901e04c3fSmrg         return false;
29001e04c3fSmrg
29101e04c3fSmrg      return match_expression(nir_search_value_as_expression(value),
29201e04c3fSmrg                              nir_instr_as_alu(instr->src[src].src.ssa->parent_instr),
29301e04c3fSmrg                              num_components, new_swizzle, state);
29401e04c3fSmrg
29501e04c3fSmrg   case nir_search_value_variable: {
29601e04c3fSmrg      nir_search_variable *var = nir_search_value_as_variable(value);
29701e04c3fSmrg      assert(var->variable < NIR_SEARCH_MAX_VARIABLES);
29801e04c3fSmrg
29901e04c3fSmrg      if (state->variables_seen & (1 << var->variable)) {
30001e04c3fSmrg         if (state->variables[var->variable].src.ssa != instr->src[src].src.ssa)
30101e04c3fSmrg            return false;
30201e04c3fSmrg
30301e04c3fSmrg         assert(!instr->src[src].abs && !instr->src[src].negate);
30401e04c3fSmrg
30501e04c3fSmrg         for (unsigned i = 0; i < num_components; ++i) {
30601e04c3fSmrg            if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
30701e04c3fSmrg               return false;
30801e04c3fSmrg         }
30901e04c3fSmrg
31001e04c3fSmrg         return true;
31101e04c3fSmrg      } else {
31201e04c3fSmrg         if (var->is_constant &&
31301e04c3fSmrg             instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
31401e04c3fSmrg            return false;
31501e04c3fSmrg
3167ec681f3Smrg         if (var->cond && !var->cond(state->range_ht, instr,
3177ec681f3Smrg                                     src, num_components, new_swizzle))
31801e04c3fSmrg            return false;
31901e04c3fSmrg
32001e04c3fSmrg         if (var->type != nir_type_invalid &&
32101e04c3fSmrg             !src_is_type(instr->src[src].src, var->type))
32201e04c3fSmrg            return false;
32301e04c3fSmrg
32401e04c3fSmrg         state->variables_seen |= (1 << var->variable);
32501e04c3fSmrg         state->variables[var->variable].src = instr->src[src].src;
32601e04c3fSmrg         state->variables[var->variable].abs = false;
32701e04c3fSmrg         state->variables[var->variable].negate = false;
32801e04c3fSmrg
32901e04c3fSmrg         for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) {
33001e04c3fSmrg            if (i < num_components)
33101e04c3fSmrg               state->variables[var->variable].swizzle[i] = new_swizzle[i];
33201e04c3fSmrg            else
33301e04c3fSmrg               state->variables[var->variable].swizzle[i] = 0;
33401e04c3fSmrg         }
33501e04c3fSmrg
33601e04c3fSmrg         return true;
33701e04c3fSmrg      }
33801e04c3fSmrg   }
33901e04c3fSmrg
34001e04c3fSmrg   case nir_search_value_constant: {
34101e04c3fSmrg      nir_search_constant *const_val = nir_search_value_as_constant(value);
34201e04c3fSmrg
34301e04c3fSmrg      if (!nir_src_is_const(instr->src[src].src))
34401e04c3fSmrg         return false;
34501e04c3fSmrg
34601e04c3fSmrg      switch (const_val->type) {
3477ec681f3Smrg      case nir_type_float: {
3487ec681f3Smrg         nir_load_const_instr *const load =
3497ec681f3Smrg            nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
3507ec681f3Smrg
3517ec681f3Smrg         /* There are 8-bit and 1-bit integer types, but there are no 8-bit or
3527ec681f3Smrg          * 1-bit float types.  This prevents potential assertion failures in
3537ec681f3Smrg          * nir_src_comp_as_float.
3547ec681f3Smrg          */
3557ec681f3Smrg         if (load->def.bit_size < 16)
3567ec681f3Smrg            return false;
3577ec681f3Smrg
35801e04c3fSmrg         for (unsigned i = 0; i < num_components; ++i) {
35901e04c3fSmrg            double val = nir_src_comp_as_float(instr->src[src].src,
36001e04c3fSmrg                                               new_swizzle[i]);
36101e04c3fSmrg            if (val != const_val->data.d)
36201e04c3fSmrg               return false;
36301e04c3fSmrg         }
36401e04c3fSmrg         return true;
3657ec681f3Smrg      }
36601e04c3fSmrg
36701e04c3fSmrg      case nir_type_int:
36801e04c3fSmrg      case nir_type_uint:
36901e04c3fSmrg      case nir_type_bool: {
37001e04c3fSmrg         unsigned bit_size = nir_src_bit_size(instr->src[src].src);
3717ec681f3Smrg         uint64_t mask = u_uintN_max(bit_size);
37201e04c3fSmrg         for (unsigned i = 0; i < num_components; ++i) {
37301e04c3fSmrg            uint64_t val = nir_src_comp_as_uint(instr->src[src].src,
37401e04c3fSmrg                                                new_swizzle[i]);
37501e04c3fSmrg            if ((val & mask) != (const_val->data.u & mask))
37601e04c3fSmrg               return false;
37701e04c3fSmrg         }
37801e04c3fSmrg         return true;
37901e04c3fSmrg      }
38001e04c3fSmrg
38101e04c3fSmrg      default:
38201e04c3fSmrg         unreachable("Invalid alu source type");
38301e04c3fSmrg      }
38401e04c3fSmrg   }
38501e04c3fSmrg
38601e04c3fSmrg   default:
38701e04c3fSmrg      unreachable("Invalid search value type");
38801e04c3fSmrg   }
38901e04c3fSmrg}
39001e04c3fSmrg
39101e04c3fSmrgstatic bool
39201e04c3fSmrgmatch_expression(const nir_search_expression *expr, nir_alu_instr *instr,
39301e04c3fSmrg                 unsigned num_components, const uint8_t *swizzle,
39401e04c3fSmrg                 struct match_state *state)
39501e04c3fSmrg{
39601e04c3fSmrg   if (expr->cond && !expr->cond(instr))
39701e04c3fSmrg      return false;
39801e04c3fSmrg
3997e102996Smaya   if (!nir_op_matches_search_op(instr->op, expr->opcode))
40001e04c3fSmrg      return false;
40101e04c3fSmrg
40201e04c3fSmrg   assert(instr->dest.dest.is_ssa);
40301e04c3fSmrg
4047e102996Smaya   if (expr->value.bit_size > 0 &&
40501e04c3fSmrg       instr->dest.dest.ssa.bit_size != expr->value.bit_size)
40601e04c3fSmrg      return false;
40701e04c3fSmrg
40801e04c3fSmrg   state->inexact_match = expr->inexact || state->inexact_match;
40901e04c3fSmrg   state->has_exact_alu = instr->exact || state->has_exact_alu;
41001e04c3fSmrg   if (state->inexact_match && state->has_exact_alu)
41101e04c3fSmrg      return false;
41201e04c3fSmrg
41301e04c3fSmrg   assert(!instr->dest.saturate);
41401e04c3fSmrg   assert(nir_op_infos[instr->op].num_inputs > 0);
41501e04c3fSmrg
41601e04c3fSmrg   /* If we have an explicitly sized destination, we can only handle the
41701e04c3fSmrg    * identity swizzle.  While dot(vec3(a, b, c).zxy) is a valid
41801e04c3fSmrg    * expression, we don't have the information right now to propagate that
41901e04c3fSmrg    * swizzle through.  We can only properly propagate swizzles if the
42001e04c3fSmrg    * instruction is vectorized.
42101e04c3fSmrg    */
42201e04c3fSmrg   if (nir_op_infos[instr->op].output_size != 0) {
42301e04c3fSmrg      for (unsigned i = 0; i < num_components; i++) {
42401e04c3fSmrg         if (swizzle[i] != i)
42501e04c3fSmrg            return false;
42601e04c3fSmrg      }
42701e04c3fSmrg   }
42801e04c3fSmrg
4297e102996Smaya   /* If this is a commutative expression and it's one of the first few, look
4307e102996Smaya    * up its direction for the current search operation.  We'll use that value
4317e102996Smaya    * to possibly flip the sources for the match.
43201e04c3fSmrg    */
4337e102996Smaya   unsigned comm_op_flip =
4347e102996Smaya      (expr->comm_expr_idx >= 0 &&
4357e102996Smaya       expr->comm_expr_idx < NIR_SEARCH_MAX_COMM_OPS) ?
4367e102996Smaya      ((state->comm_op_direction >> expr->comm_expr_idx) & 1) : 0;
43701e04c3fSmrg
43801e04c3fSmrg   bool matched = true;
43901e04c3fSmrg   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
4407ec681f3Smrg      /* 2src_commutative instructions that have 3 sources are only commutative
4417ec681f3Smrg       * in the first two sources.  Source 2 is always source 2.
4427ec681f3Smrg       */
4437ec681f3Smrg      if (!match_value(expr->srcs[i], instr,
4447ec681f3Smrg                       i < 2 ? i ^ comm_op_flip : i,
4457e102996Smaya                       num_components, swizzle, state)) {
44601e04c3fSmrg         matched = false;
44701e04c3fSmrg         break;
44801e04c3fSmrg      }
44901e04c3fSmrg   }
45001e04c3fSmrg
4517e102996Smaya   return matched;
45201e04c3fSmrg}
45301e04c3fSmrg
45401e04c3fSmrgstatic unsigned
4557e102996Smayareplace_bitsize(const nir_search_value *value, unsigned search_bitsize,
4567e102996Smaya                struct match_state *state)
45701e04c3fSmrg{
4587e102996Smaya   if (value->bit_size > 0)
4597e102996Smaya      return value->bit_size;
4607e102996Smaya   if (value->bit_size < 0)
4617e102996Smaya      return nir_src_bit_size(state->variables[-value->bit_size - 1].src);
4627e102996Smaya   return search_bitsize;
46301e04c3fSmrg}
46401e04c3fSmrg
46501e04c3fSmrgstatic nir_alu_src
46601e04c3fSmrgconstruct_value(nir_builder *build,
46701e04c3fSmrg                const nir_search_value *value,
4687e102996Smaya                unsigned num_components, unsigned search_bitsize,
46901e04c3fSmrg                struct match_state *state,
47001e04c3fSmrg                nir_instr *instr)
47101e04c3fSmrg{
47201e04c3fSmrg   switch (value->type) {
47301e04c3fSmrg   case nir_search_value_expression: {
47401e04c3fSmrg      const nir_search_expression *expr = nir_search_value_as_expression(value);
4757e102996Smaya      unsigned dst_bit_size = replace_bitsize(value, search_bitsize, state);
4767e102996Smaya      nir_op op = nir_op_for_search_op(expr->opcode, dst_bit_size);
47701e04c3fSmrg
4787e102996Smaya      if (nir_op_infos[op].output_size != 0)
4797e102996Smaya         num_components = nir_op_infos[op].output_size;
48001e04c3fSmrg
4817e102996Smaya      nir_alu_instr *alu = nir_alu_instr_create(build->shader, op);
48201e04c3fSmrg      nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
4837e102996Smaya                        dst_bit_size, NULL);
48401e04c3fSmrg      alu->dest.write_mask = (1 << num_components) - 1;
48501e04c3fSmrg      alu->dest.saturate = false;
48601e04c3fSmrg
48701e04c3fSmrg      /* We have no way of knowing what values in a given search expression
48801e04c3fSmrg       * map to a particular replacement value.  Therefore, if the
48901e04c3fSmrg       * expression we are replacing has any exact values, the entire
49001e04c3fSmrg       * replacement should be exact.
49101e04c3fSmrg       */
4927ec681f3Smrg      alu->exact = state->has_exact_alu || expr->exact;
49301e04c3fSmrg
4947e102996Smaya      for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
49501e04c3fSmrg         /* If the source is an explicitly sized source, then we need to reset
49601e04c3fSmrg          * the number of components to match.
49701e04c3fSmrg          */
49801e04c3fSmrg         if (nir_op_infos[alu->op].input_sizes[i] != 0)
49901e04c3fSmrg            num_components = nir_op_infos[alu->op].input_sizes[i];
50001e04c3fSmrg
50101e04c3fSmrg         alu->src[i] = construct_value(build, expr->srcs[i],
5027e102996Smaya                                       num_components, search_bitsize,
50301e04c3fSmrg                                       state, instr);
50401e04c3fSmrg      }
50501e04c3fSmrg
50601e04c3fSmrg      nir_builder_instr_insert(build, &alu->instr);
50701e04c3fSmrg
5087ec681f3Smrg      assert(alu->dest.dest.ssa.index ==
5097ec681f3Smrg             util_dynarray_num_elements(state->states, uint16_t));
5107ec681f3Smrg      util_dynarray_append(state->states, uint16_t, 0);
5117ec681f3Smrg      nir_algebraic_automaton(&alu->instr, state->states, state->pass_op_table);
5127ec681f3Smrg
51301e04c3fSmrg      nir_alu_src val;
51401e04c3fSmrg      val.src = nir_src_for_ssa(&alu->dest.dest.ssa);
51501e04c3fSmrg      val.negate = false;
51601e04c3fSmrg      val.abs = false,
51701e04c3fSmrg      memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle);
51801e04c3fSmrg
51901e04c3fSmrg      return val;
52001e04c3fSmrg   }
52101e04c3fSmrg
52201e04c3fSmrg   case nir_search_value_variable: {
52301e04c3fSmrg      const nir_search_variable *var = nir_search_value_as_variable(value);
52401e04c3fSmrg      assert(state->variables_seen & (1 << var->variable));
52501e04c3fSmrg
52601e04c3fSmrg      nir_alu_src val = { NIR_SRC_INIT };
5277ec681f3Smrg      nir_alu_src_copy(&val, &state->variables[var->variable]);
52801e04c3fSmrg      assert(!var->is_constant);
52901e04c3fSmrg
5307ec681f3Smrg      for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
5317ec681f3Smrg         val.swizzle[i] = state->variables[var->variable].swizzle[var->swizzle[i]];
5327ec681f3Smrg
53301e04c3fSmrg      return val;
53401e04c3fSmrg   }
53501e04c3fSmrg
53601e04c3fSmrg   case nir_search_value_constant: {
53701e04c3fSmrg      const nir_search_constant *c = nir_search_value_as_constant(value);
5387e102996Smaya      unsigned bit_size = replace_bitsize(value, search_bitsize, state);
53901e04c3fSmrg
54001e04c3fSmrg      nir_ssa_def *cval;
54101e04c3fSmrg      switch (c->type) {
54201e04c3fSmrg      case nir_type_float:
5437e102996Smaya         cval = nir_imm_floatN_t(build, c->data.d, bit_size);
54401e04c3fSmrg         break;
54501e04c3fSmrg
54601e04c3fSmrg      case nir_type_int:
54701e04c3fSmrg      case nir_type_uint:
5487e102996Smaya         cval = nir_imm_intN_t(build, c->data.i, bit_size);
54901e04c3fSmrg         break;
55001e04c3fSmrg
55101e04c3fSmrg      case nir_type_bool:
5527e102996Smaya         cval = nir_imm_boolN_t(build, c->data.u, bit_size);
55301e04c3fSmrg         break;
5547e102996Smaya
55501e04c3fSmrg      default:
55601e04c3fSmrg         unreachable("Invalid alu source type");
55701e04c3fSmrg      }
55801e04c3fSmrg
5597ec681f3Smrg      assert(cval->index ==
5607ec681f3Smrg             util_dynarray_num_elements(state->states, uint16_t));
5617ec681f3Smrg      util_dynarray_append(state->states, uint16_t, 0);
5627ec681f3Smrg      nir_algebraic_automaton(cval->parent_instr, state->states,
5637ec681f3Smrg                              state->pass_op_table);
5647ec681f3Smrg
56501e04c3fSmrg      nir_alu_src val;
56601e04c3fSmrg      val.src = nir_src_for_ssa(cval);
56701e04c3fSmrg      val.negate = false;
56801e04c3fSmrg      val.abs = false,
56901e04c3fSmrg      memset(val.swizzle, 0, sizeof val.swizzle);
57001e04c3fSmrg
57101e04c3fSmrg      return val;
57201e04c3fSmrg   }
57301e04c3fSmrg
57401e04c3fSmrg   default:
57501e04c3fSmrg      unreachable("Invalid search value type");
57601e04c3fSmrg   }
57701e04c3fSmrg}
57801e04c3fSmrg
5797ec681f3SmrgUNUSED static void dump_value(const nir_search_value *val)
5807e102996Smaya{
5817e102996Smaya   switch (val->type) {
5827e102996Smaya   case nir_search_value_constant: {
5837e102996Smaya      const nir_search_constant *sconst = nir_search_value_as_constant(val);
5847e102996Smaya      switch (sconst->type) {
5857e102996Smaya      case nir_type_float:
5867ec681f3Smrg         fprintf(stderr, "%f", sconst->data.d);
5877e102996Smaya         break;
5887e102996Smaya      case nir_type_int:
5897ec681f3Smrg         fprintf(stderr, "%"PRId64, sconst->data.i);
5907e102996Smaya         break;
5917e102996Smaya      case nir_type_uint:
5927ec681f3Smrg         fprintf(stderr, "0x%"PRIx64, sconst->data.u);
5937ec681f3Smrg         break;
5947ec681f3Smrg      case nir_type_bool:
5957ec681f3Smrg         fprintf(stderr, "%s", sconst->data.u != 0 ? "True" : "False");
5967e102996Smaya         break;
5977e102996Smaya      default:
5987e102996Smaya         unreachable("bad const type");
5997e102996Smaya      }
6007e102996Smaya      break;
6017e102996Smaya   }
6027e102996Smaya
6037e102996Smaya   case nir_search_value_variable: {
6047e102996Smaya      const nir_search_variable *var = nir_search_value_as_variable(val);
6057e102996Smaya      if (var->is_constant)
6067ec681f3Smrg         fprintf(stderr, "#");
6077ec681f3Smrg      fprintf(stderr, "%c", var->variable + 'a');
6087e102996Smaya      break;
6097e102996Smaya   }
6107e102996Smaya
6117e102996Smaya   case nir_search_value_expression: {
6127e102996Smaya      const nir_search_expression *expr = nir_search_value_as_expression(val);
6137ec681f3Smrg      fprintf(stderr, "(");
6147e102996Smaya      if (expr->inexact)
6157ec681f3Smrg         fprintf(stderr, "~");
6167e102996Smaya      switch (expr->opcode) {
6177e102996Smaya#define CASE(n) \
6187ec681f3Smrg      case nir_search_op_##n: fprintf(stderr, #n); break;
6197e102996Smaya      CASE(f2b)
6207e102996Smaya      CASE(b2f)
6217e102996Smaya      CASE(b2i)
6227e102996Smaya      CASE(i2b)
6237e102996Smaya      CASE(i2i)
6247e102996Smaya      CASE(f2i)
6257e102996Smaya      CASE(i2f)
6267e102996Smaya#undef CASE
6277e102996Smaya      default:
6287ec681f3Smrg         fprintf(stderr, "%s", nir_op_infos[expr->opcode].name);
6297e102996Smaya      }
6307e102996Smaya
6317e102996Smaya      unsigned num_srcs = 1;
6327e102996Smaya      if (expr->opcode <= nir_last_opcode)
6337e102996Smaya         num_srcs = nir_op_infos[expr->opcode].num_inputs;
6347e102996Smaya
6357e102996Smaya      for (unsigned i = 0; i < num_srcs; i++) {
6367ec681f3Smrg         fprintf(stderr, " ");
6377e102996Smaya         dump_value(expr->srcs[i]);
6387e102996Smaya      }
6397e102996Smaya
6407ec681f3Smrg      fprintf(stderr, ")");
6417e102996Smaya      break;
6427e102996Smaya   }
6437e102996Smaya   }
6447e102996Smaya
6457e102996Smaya   if (val->bit_size > 0)
6467ec681f3Smrg      fprintf(stderr, "@%d", val->bit_size);
6477ec681f3Smrg}
6487ec681f3Smrg
6497ec681f3Smrgstatic void
6507ec681f3Smrgadd_uses_to_worklist(nir_instr *instr,
6517ec681f3Smrg                     nir_instr_worklist *worklist,
6527ec681f3Smrg                     struct util_dynarray *states,
6537ec681f3Smrg                     const struct per_op_table *pass_op_table)
6547ec681f3Smrg{
6557ec681f3Smrg   nir_ssa_def *def = nir_instr_ssa_def(instr);
6567ec681f3Smrg
6577ec681f3Smrg   nir_foreach_use_safe(use_src, def) {
6587ec681f3Smrg      if (nir_algebraic_automaton(use_src->parent_instr, states, pass_op_table))
6597ec681f3Smrg         nir_instr_worklist_push_tail(worklist, use_src->parent_instr);
6607ec681f3Smrg   }
6617ec681f3Smrg}
6627ec681f3Smrg
6637ec681f3Smrgstatic void
6647ec681f3Smrgnir_algebraic_update_automaton(nir_instr *new_instr,
6657ec681f3Smrg                               nir_instr_worklist *algebraic_worklist,
6667ec681f3Smrg                               struct util_dynarray *states,
6677ec681f3Smrg                               const struct per_op_table *pass_op_table)
6687ec681f3Smrg{
6697ec681f3Smrg
6707ec681f3Smrg   nir_instr_worklist *automaton_worklist = nir_instr_worklist_create();
6717ec681f3Smrg
6727ec681f3Smrg   /* Walk through the tree of uses of our new instruction's SSA value,
6737ec681f3Smrg    * recursively updating the automaton state until it stabilizes.
6747ec681f3Smrg    */
6757ec681f3Smrg   add_uses_to_worklist(new_instr, automaton_worklist, states, pass_op_table);
6767ec681f3Smrg
6777ec681f3Smrg   nir_instr *instr;
6787ec681f3Smrg   while ((instr = nir_instr_worklist_pop_head(automaton_worklist))) {
6797ec681f3Smrg      nir_instr_worklist_push_tail(algebraic_worklist, instr);
6807ec681f3Smrg      add_uses_to_worklist(instr, automaton_worklist, states, pass_op_table);
6817ec681f3Smrg   }
6827ec681f3Smrg
6837ec681f3Smrg   nir_instr_worklist_destroy(automaton_worklist);
6847e102996Smaya}
6857e102996Smaya
68601e04c3fSmrgnir_ssa_def *
68701e04c3fSmrgnir_replace_instr(nir_builder *build, nir_alu_instr *instr,
6887ec681f3Smrg                  struct hash_table *range_ht,
6897ec681f3Smrg                  struct util_dynarray *states,
6907ec681f3Smrg                  const struct per_op_table *pass_op_table,
69101e04c3fSmrg                  const nir_search_expression *search,
6927ec681f3Smrg                  const nir_search_value *replace,
6937ec681f3Smrg                  nir_instr_worklist *algebraic_worklist)
69401e04c3fSmrg{
69501e04c3fSmrg   uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
69601e04c3fSmrg
69701e04c3fSmrg   for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i)
69801e04c3fSmrg      swizzle[i] = i;
69901e04c3fSmrg
70001e04c3fSmrg   assert(instr->dest.dest.is_ssa);
70101e04c3fSmrg
70201e04c3fSmrg   struct match_state state;
70301e04c3fSmrg   state.inexact_match = false;
70401e04c3fSmrg   state.has_exact_alu = false;
7057ec681f3Smrg   state.range_ht = range_ht;
7067ec681f3Smrg   state.pass_op_table = pass_op_table;
7077ec681f3Smrg
7087ec681f3Smrg   STATIC_ASSERT(sizeof(state.comm_op_direction) * 8 >= NIR_SEARCH_MAX_COMM_OPS);
70901e04c3fSmrg
7107e102996Smaya   unsigned comm_expr_combinations =
7117e102996Smaya      1 << MIN2(search->comm_exprs, NIR_SEARCH_MAX_COMM_OPS);
7127e102996Smaya
7137e102996Smaya   bool found = false;
7147e102996Smaya   for (unsigned comb = 0; comb < comm_expr_combinations; comb++) {
7157e102996Smaya      /* The bitfield of directions is just the current iteration.  Hooray for
7167e102996Smaya       * binary.
7177e102996Smaya       */
7187e102996Smaya      state.comm_op_direction = comb;
7197e102996Smaya      state.variables_seen = 0;
7207e102996Smaya
7217e102996Smaya      if (match_expression(search, instr,
7227e102996Smaya                           instr->dest.dest.ssa.num_components,
7237e102996Smaya                           swizzle, &state)) {
7247e102996Smaya         found = true;
7257e102996Smaya         break;
7267e102996Smaya      }
7277e102996Smaya   }
7287e102996Smaya   if (!found)
72901e04c3fSmrg      return NULL;
73001e04c3fSmrg
7317e102996Smaya#if 0
7327ec681f3Smrg   fprintf(stderr, "matched: ");
7337e102996Smaya   dump_value(&search->value);
7347ec681f3Smrg   fprintf(stderr, " -> ");
7357e102996Smaya   dump_value(replace);
7367ec681f3Smrg   fprintf(stderr, " ssa_%d\n", instr->dest.dest.ssa.index);
7377e102996Smaya#endif
73801e04c3fSmrg
7397ec681f3Smrg   /* If the instruction at the root of the expression tree being replaced is
7407ec681f3Smrg    * a unary operation, insert the replacement instructions at the location
7417ec681f3Smrg    * of the source of the unary operation.  Otherwise, insert the replacement
7427ec681f3Smrg    * instructions at the location of the expression tree root.
7437ec681f3Smrg    *
7447ec681f3Smrg    * For the unary operation case, this is done to prevent some spurious code
7457ec681f3Smrg    * motion that can dramatically extend live ranges.  Imagine an expression
7467ec681f3Smrg    * like -(A+B) where the addtion and the negation are separated by flow
7477ec681f3Smrg    * control and thousands of instructions.  If this expression is replaced
7487ec681f3Smrg    * with -A+-B, inserting the new instructions at the site of the negation
7497ec681f3Smrg    * could extend the live range of A and B dramtically.  This could increase
7507ec681f3Smrg    * register pressure and cause spilling.
7517ec681f3Smrg    *
7527ec681f3Smrg    * It may well be that moving instructions around is a good thing, but
7537ec681f3Smrg    * keeping algebraic optimizations and code motion optimizations separate
7547ec681f3Smrg    * seems safest.
7557ec681f3Smrg    */
7567ec681f3Smrg   nir_alu_instr *const src_instr = nir_src_as_alu_instr(instr->src[0].src);
7577ec681f3Smrg   if (src_instr != NULL &&
7587ec681f3Smrg       (instr->op == nir_op_fneg || instr->op == nir_op_fabs ||
7597ec681f3Smrg        instr->op == nir_op_ineg || instr->op == nir_op_iabs ||
7607ec681f3Smrg        instr->op == nir_op_inot)) {
7617ec681f3Smrg      /* Insert new instructions *after*.  Otherwise a hypothetical
7627ec681f3Smrg       * replacement fneg(X) -> fabs(X) would insert the fabs() instruction
7637ec681f3Smrg       * before X!  This can also occur for things like fneg(X.wzyx) -> X.wzyx
7647ec681f3Smrg       * in vector mode.  A move instruction to handle the swizzle will get
7657ec681f3Smrg       * inserted before X.
7667ec681f3Smrg       *
7677ec681f3Smrg       * This manifested in a single OpenGL ES 2.0 CTS vertex shader test on
7687ec681f3Smrg       * older Intel GPU that use vector-mode vertex processing.
7697ec681f3Smrg       */
7707ec681f3Smrg      build->cursor = nir_after_instr(&src_instr->instr);
7717ec681f3Smrg   } else {
7727ec681f3Smrg      build->cursor = nir_before_instr(&instr->instr);
7737ec681f3Smrg   }
7747ec681f3Smrg
7757ec681f3Smrg   state.states = states;
77601e04c3fSmrg
77701e04c3fSmrg   nir_alu_src val = construct_value(build, replace,
77801e04c3fSmrg                                     instr->dest.dest.ssa.num_components,
7797e102996Smaya                                     instr->dest.dest.ssa.bit_size,
7807e102996Smaya                                     &state, &instr->instr);
78101e04c3fSmrg
7827ec681f3Smrg   /* Note that NIR builder will elide the MOV if it's a no-op, which may
7837ec681f3Smrg    * allow more work to be done in a single pass through algebraic.
78401e04c3fSmrg    */
78501e04c3fSmrg   nir_ssa_def *ssa_val =
7867ec681f3Smrg      nir_mov_alu(build, val, instr->dest.dest.ssa.num_components);
7877ec681f3Smrg   if (ssa_val->index == util_dynarray_num_elements(states, uint16_t)) {
7887ec681f3Smrg      util_dynarray_append(states, uint16_t, 0);
7897ec681f3Smrg      nir_algebraic_automaton(ssa_val->parent_instr, states, pass_op_table);
7907ec681f3Smrg   }
7917ec681f3Smrg
7927ec681f3Smrg   /* Rewrite the uses of the old SSA value to the new one, and recurse
7937ec681f3Smrg    * through the uses updating the automaton's state.
7947ec681f3Smrg    */
7957ec681f3Smrg   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, ssa_val);
7967ec681f3Smrg   nir_algebraic_update_automaton(ssa_val->parent_instr, algebraic_worklist,
7977ec681f3Smrg                                  states, pass_op_table);
79801e04c3fSmrg
7997ec681f3Smrg   /* Nothing uses the instr any more, so drop it out of the program.  Note
8007ec681f3Smrg    * that the instr may be in the worklist still, so we can't free it
8017ec681f3Smrg    * directly.
80201e04c3fSmrg    */
80301e04c3fSmrg   nir_instr_remove(&instr->instr);
80401e04c3fSmrg
80501e04c3fSmrg   return ssa_val;
80601e04c3fSmrg}
8077ec681f3Smrg
8087ec681f3Smrgstatic bool
8097ec681f3Smrgnir_algebraic_automaton(nir_instr *instr, struct util_dynarray *states,
8107ec681f3Smrg                        const struct per_op_table *pass_op_table)
8117ec681f3Smrg{
8127ec681f3Smrg   switch (instr->type) {
8137ec681f3Smrg   case nir_instr_type_alu: {
8147ec681f3Smrg      nir_alu_instr *alu = nir_instr_as_alu(instr);
8157ec681f3Smrg      nir_op op = alu->op;
8167ec681f3Smrg      uint16_t search_op = nir_search_op_for_nir_op(op);
8177ec681f3Smrg      const struct per_op_table *tbl = &pass_op_table[search_op];
8187ec681f3Smrg      if (tbl->num_filtered_states == 0)
8197ec681f3Smrg         return false;
8207ec681f3Smrg
8217ec681f3Smrg      /* Calculate the index into the transition table. Note the index
8227ec681f3Smrg       * calculated must match the iteration order of Python's
8237ec681f3Smrg       * itertools.product(), which was used to emit the transition
8247ec681f3Smrg       * table.
8257ec681f3Smrg       */
8267ec681f3Smrg      unsigned index = 0;
8277ec681f3Smrg      for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
8287ec681f3Smrg         index *= tbl->num_filtered_states;
8297ec681f3Smrg         index += tbl->filter[*util_dynarray_element(states, uint16_t,
8307ec681f3Smrg                                                     alu->src[i].src.ssa->index)];
8317ec681f3Smrg      }
8327ec681f3Smrg
8337ec681f3Smrg      uint16_t *state = util_dynarray_element(states, uint16_t,
8347ec681f3Smrg                                              alu->dest.dest.ssa.index);
8357ec681f3Smrg      if (*state != tbl->table[index]) {
8367ec681f3Smrg         *state = tbl->table[index];
8377ec681f3Smrg         return true;
8387ec681f3Smrg      }
8397ec681f3Smrg      return false;
8407ec681f3Smrg   }
8417ec681f3Smrg
8427ec681f3Smrg   case nir_instr_type_load_const: {
8437ec681f3Smrg      nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
8447ec681f3Smrg      uint16_t *state = util_dynarray_element(states, uint16_t,
8457ec681f3Smrg                                              load_const->def.index);
8467ec681f3Smrg      if (*state != CONST_STATE) {
8477ec681f3Smrg         *state = CONST_STATE;
8487ec681f3Smrg         return true;
8497ec681f3Smrg      }
8507ec681f3Smrg      return false;
8517ec681f3Smrg   }
8527ec681f3Smrg
8537ec681f3Smrg   default:
8547ec681f3Smrg      return false;
8557ec681f3Smrg   }
8567ec681f3Smrg}
8577ec681f3Smrg
8587ec681f3Smrgstatic bool
8597ec681f3Smrgnir_algebraic_instr(nir_builder *build, nir_instr *instr,
8607ec681f3Smrg                    struct hash_table *range_ht,
8617ec681f3Smrg                    const bool *condition_flags,
8627ec681f3Smrg                    const struct transform **transforms,
8637ec681f3Smrg                    const uint16_t *transform_counts,
8647ec681f3Smrg                    struct util_dynarray *states,
8657ec681f3Smrg                    const struct per_op_table *pass_op_table,
8667ec681f3Smrg                    nir_instr_worklist *worklist)
8677ec681f3Smrg{
8687ec681f3Smrg
8697ec681f3Smrg   if (instr->type != nir_instr_type_alu)
8707ec681f3Smrg      return false;
8717ec681f3Smrg
8727ec681f3Smrg   nir_alu_instr *alu = nir_instr_as_alu(instr);
8737ec681f3Smrg   if (!alu->dest.dest.is_ssa)
8747ec681f3Smrg      return false;
8757ec681f3Smrg
8767ec681f3Smrg   unsigned bit_size = alu->dest.dest.ssa.bit_size;
8777ec681f3Smrg   const unsigned execution_mode =
8787ec681f3Smrg      build->shader->info.float_controls_execution_mode;
8797ec681f3Smrg   const bool ignore_inexact =
8807ec681f3Smrg      nir_is_float_control_signed_zero_inf_nan_preserve(execution_mode, bit_size) ||
8817ec681f3Smrg      nir_is_denorm_flush_to_zero(execution_mode, bit_size);
8827ec681f3Smrg
8837ec681f3Smrg   int xform_idx = *util_dynarray_element(states, uint16_t,
8847ec681f3Smrg                                          alu->dest.dest.ssa.index);
8857ec681f3Smrg   for (uint16_t i = 0; i < transform_counts[xform_idx]; i++) {
8867ec681f3Smrg      const struct transform *xform = &transforms[xform_idx][i];
8877ec681f3Smrg      if (condition_flags[xform->condition_offset] &&
8887ec681f3Smrg          !(xform->search->inexact && ignore_inexact) &&
8897ec681f3Smrg          nir_replace_instr(build, alu, range_ht, states, pass_op_table,
8907ec681f3Smrg                            xform->search, xform->replace, worklist)) {
8917ec681f3Smrg         _mesa_hash_table_clear(range_ht, NULL);
8927ec681f3Smrg         return true;
8937ec681f3Smrg      }
8947ec681f3Smrg   }
8957ec681f3Smrg
8967ec681f3Smrg   return false;
8977ec681f3Smrg}
8987ec681f3Smrg
8997ec681f3Smrgbool
9007ec681f3Smrgnir_algebraic_impl(nir_function_impl *impl,
9017ec681f3Smrg                   const bool *condition_flags,
9027ec681f3Smrg                   const struct transform **transforms,
9037ec681f3Smrg                   const uint16_t *transform_counts,
9047ec681f3Smrg                   const struct per_op_table *pass_op_table)
9057ec681f3Smrg{
9067ec681f3Smrg   bool progress = false;
9077ec681f3Smrg
9087ec681f3Smrg   nir_builder build;
9097ec681f3Smrg   nir_builder_init(&build, impl);
9107ec681f3Smrg
9117ec681f3Smrg   /* Note: it's important here that we're allocating a zeroed array, since
9127ec681f3Smrg    * state 0 is the default state, which means we don't have to visit
9137ec681f3Smrg    * anything other than constants and ALU instructions.
9147ec681f3Smrg    */
9157ec681f3Smrg   struct util_dynarray states = {0};
9167ec681f3Smrg   if (!util_dynarray_resize(&states, uint16_t, impl->ssa_alloc)) {
9177ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
9187ec681f3Smrg      return false;
9197ec681f3Smrg   }
9207ec681f3Smrg   memset(states.data, 0, states.size);
9217ec681f3Smrg
9227ec681f3Smrg   struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL);
9237ec681f3Smrg
9247ec681f3Smrg   nir_instr_worklist *worklist = nir_instr_worklist_create();
9257ec681f3Smrg
9267ec681f3Smrg   /* Walk top-to-bottom setting up the automaton state. */
9277ec681f3Smrg   nir_foreach_block(block, impl) {
9287ec681f3Smrg      nir_foreach_instr(instr, block) {
9297ec681f3Smrg         nir_algebraic_automaton(instr, &states, pass_op_table);
9307ec681f3Smrg      }
9317ec681f3Smrg   }
9327ec681f3Smrg
9337ec681f3Smrg   /* Put our instrs in the worklist such that we're popping the last instr
9347ec681f3Smrg    * first.  This will encourage us to match the biggest source patterns when
9357ec681f3Smrg    * possible.
9367ec681f3Smrg    */
9377ec681f3Smrg   nir_foreach_block_reverse(block, impl) {
9387ec681f3Smrg      nir_foreach_instr_reverse(instr, block) {
9397ec681f3Smrg         if (instr->type == nir_instr_type_alu)
9407ec681f3Smrg            nir_instr_worklist_push_tail(worklist, instr);
9417ec681f3Smrg      }
9427ec681f3Smrg   }
9437ec681f3Smrg
9447ec681f3Smrg   nir_instr *instr;
9457ec681f3Smrg   while ((instr = nir_instr_worklist_pop_head(worklist))) {
9467ec681f3Smrg      /* The worklist can have an instr pushed to it multiple times if it was
9477ec681f3Smrg       * the src of multiple instrs that also got optimized, so make sure that
9487ec681f3Smrg       * we don't try to re-optimize an instr we already handled.
9497ec681f3Smrg       */
9507ec681f3Smrg      if (exec_node_is_tail_sentinel(&instr->node))
9517ec681f3Smrg         continue;
9527ec681f3Smrg
9537ec681f3Smrg      progress |= nir_algebraic_instr(&build, instr,
9547ec681f3Smrg                                      range_ht, condition_flags,
9557ec681f3Smrg                                      transforms, transform_counts, &states,
9567ec681f3Smrg                                      pass_op_table, worklist);
9577ec681f3Smrg   }
9587ec681f3Smrg
9597ec681f3Smrg   nir_instr_worklist_destroy(worklist);
9607ec681f3Smrg   ralloc_free(range_ht);
9617ec681f3Smrg   util_dynarray_fini(&states);
9627ec681f3Smrg
9637ec681f3Smrg   if (progress) {
9647ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_block_index |
9657ec681f3Smrg                                  nir_metadata_dominance);
9667ec681f3Smrg   } else {
9677ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
9687ec681f3Smrg   }
9697ec681f3Smrg
9707ec681f3Smrg   return progress;
9717ec681f3Smrg}
972