1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Connor Abbott 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "nir_instr_set.h" 25b8e80941Smrg#include "nir_vla.h" 26b8e80941Smrg#include "util/half_float.h" 27b8e80941Smrg 28b8e80941Smrgstatic bool 29b8e80941Smrgsrc_is_ssa(nir_src *src, void *data) 30b8e80941Smrg{ 31b8e80941Smrg (void) data; 32b8e80941Smrg return src->is_ssa; 33b8e80941Smrg} 34b8e80941Smrg 35b8e80941Smrgstatic bool 36b8e80941Smrgdest_is_ssa(nir_dest *dest, void *data) 37b8e80941Smrg{ 38b8e80941Smrg (void) data; 39b8e80941Smrg return dest->is_ssa; 40b8e80941Smrg} 41b8e80941Smrg 42b8e80941Smrgstatic inline bool 43b8e80941Smrginstr_each_src_and_dest_is_ssa(const nir_instr *instr) 44b8e80941Smrg{ 45b8e80941Smrg if (!nir_foreach_dest((nir_instr *)instr, dest_is_ssa, NULL) || 46b8e80941Smrg !nir_foreach_src((nir_instr *)instr, src_is_ssa, NULL)) 47b8e80941Smrg return false; 48b8e80941Smrg 49b8e80941Smrg return true; 50b8e80941Smrg} 51b8e80941Smrg 52b8e80941Smrg/* This function determines if uses of an instruction can safely be rewritten 53b8e80941Smrg * to use another identical instruction instead. Note that this function must 54b8e80941Smrg * be kept in sync with hash_instr() and nir_instrs_equal() -- only 55b8e80941Smrg * instructions that pass this test will be handed on to those functions, and 56b8e80941Smrg * conversely they must handle everything that this function returns true for. 57b8e80941Smrg */ 58b8e80941Smrgstatic bool 59b8e80941Smrginstr_can_rewrite(const nir_instr *instr) 60b8e80941Smrg{ 61b8e80941Smrg /* We only handle SSA. */ 62b8e80941Smrg assert(instr_each_src_and_dest_is_ssa(instr)); 63b8e80941Smrg 64b8e80941Smrg switch (instr->type) { 65b8e80941Smrg case nir_instr_type_alu: 66b8e80941Smrg case nir_instr_type_deref: 67b8e80941Smrg case nir_instr_type_tex: 68b8e80941Smrg case nir_instr_type_load_const: 69b8e80941Smrg case nir_instr_type_phi: 70b8e80941Smrg return true; 71b8e80941Smrg case nir_instr_type_intrinsic: 72b8e80941Smrg return nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr)); 73b8e80941Smrg case nir_instr_type_call: 74b8e80941Smrg case nir_instr_type_jump: 75b8e80941Smrg case nir_instr_type_ssa_undef: 76b8e80941Smrg return false; 77b8e80941Smrg case nir_instr_type_parallel_copy: 78b8e80941Smrg default: 79b8e80941Smrg unreachable("Invalid instruction type"); 80b8e80941Smrg } 81b8e80941Smrg 82b8e80941Smrg return false; 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrg 86b8e80941Smrg#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) 87b8e80941Smrg 88b8e80941Smrgstatic uint32_t 89b8e80941Smrghash_src(uint32_t hash, const nir_src *src) 90b8e80941Smrg{ 91b8e80941Smrg assert(src->is_ssa); 92b8e80941Smrg hash = HASH(hash, src->ssa); 93b8e80941Smrg return hash; 94b8e80941Smrg} 95b8e80941Smrg 96b8e80941Smrgstatic uint32_t 97b8e80941Smrghash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) 98b8e80941Smrg{ 99b8e80941Smrg hash = HASH(hash, src->abs); 100b8e80941Smrg hash = HASH(hash, src->negate); 101b8e80941Smrg 102b8e80941Smrg for (unsigned i = 0; i < num_components; i++) 103b8e80941Smrg hash = HASH(hash, src->swizzle[i]); 104b8e80941Smrg 105b8e80941Smrg hash = hash_src(hash, &src->src); 106b8e80941Smrg return hash; 107b8e80941Smrg} 108b8e80941Smrg 109b8e80941Smrgstatic uint32_t 110b8e80941Smrghash_alu(uint32_t hash, const nir_alu_instr *instr) 111b8e80941Smrg{ 112b8e80941Smrg hash = HASH(hash, instr->op); 113b8e80941Smrg hash = HASH(hash, instr->dest.dest.ssa.num_components); 114b8e80941Smrg hash = HASH(hash, instr->dest.dest.ssa.bit_size); 115b8e80941Smrg /* We explicitly don't hash instr->dest.dest.exact */ 116b8e80941Smrg 117b8e80941Smrg if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { 118b8e80941Smrg assert(nir_op_infos[instr->op].num_inputs == 2); 119b8e80941Smrg uint32_t hash0 = hash_alu_src(hash, &instr->src[0], 120b8e80941Smrg nir_ssa_alu_instr_src_components(instr, 0)); 121b8e80941Smrg uint32_t hash1 = hash_alu_src(hash, &instr->src[1], 122b8e80941Smrg nir_ssa_alu_instr_src_components(instr, 1)); 123b8e80941Smrg /* For commutative operations, we need some commutative way of 124b8e80941Smrg * combining the hashes. One option would be to XOR them but that 125b8e80941Smrg * means that anything with two identical sources will hash to 0 and 126b8e80941Smrg * that's common enough we probably don't want the guaranteed 127b8e80941Smrg * collision. Either addition or multiplication will also work. 128b8e80941Smrg */ 129b8e80941Smrg hash = hash0 * hash1; 130b8e80941Smrg } else { 131b8e80941Smrg for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 132b8e80941Smrg hash = hash_alu_src(hash, &instr->src[i], 133b8e80941Smrg nir_ssa_alu_instr_src_components(instr, i)); 134b8e80941Smrg } 135b8e80941Smrg } 136b8e80941Smrg 137b8e80941Smrg return hash; 138b8e80941Smrg} 139b8e80941Smrg 140b8e80941Smrgstatic uint32_t 141b8e80941Smrghash_deref(uint32_t hash, const nir_deref_instr *instr) 142b8e80941Smrg{ 143b8e80941Smrg hash = HASH(hash, instr->deref_type); 144b8e80941Smrg hash = HASH(hash, instr->mode); 145b8e80941Smrg hash = HASH(hash, instr->type); 146b8e80941Smrg 147b8e80941Smrg if (instr->deref_type == nir_deref_type_var) 148b8e80941Smrg return HASH(hash, instr->var); 149b8e80941Smrg 150b8e80941Smrg hash = hash_src(hash, &instr->parent); 151b8e80941Smrg 152b8e80941Smrg switch (instr->deref_type) { 153b8e80941Smrg case nir_deref_type_struct: 154b8e80941Smrg hash = HASH(hash, instr->strct.index); 155b8e80941Smrg break; 156b8e80941Smrg 157b8e80941Smrg case nir_deref_type_array: 158b8e80941Smrg case nir_deref_type_ptr_as_array: 159b8e80941Smrg hash = hash_src(hash, &instr->arr.index); 160b8e80941Smrg break; 161b8e80941Smrg 162b8e80941Smrg case nir_deref_type_cast: 163b8e80941Smrg hash = HASH(hash, instr->cast.ptr_stride); 164b8e80941Smrg break; 165b8e80941Smrg 166b8e80941Smrg case nir_deref_type_var: 167b8e80941Smrg case nir_deref_type_array_wildcard: 168b8e80941Smrg /* Nothing to do */ 169b8e80941Smrg break; 170b8e80941Smrg 171b8e80941Smrg default: 172b8e80941Smrg unreachable("Invalid instruction deref type"); 173b8e80941Smrg } 174b8e80941Smrg 175b8e80941Smrg return hash; 176b8e80941Smrg} 177b8e80941Smrg 178b8e80941Smrgstatic uint32_t 179b8e80941Smrghash_load_const(uint32_t hash, const nir_load_const_instr *instr) 180b8e80941Smrg{ 181b8e80941Smrg hash = HASH(hash, instr->def.num_components); 182b8e80941Smrg 183b8e80941Smrg if (instr->def.bit_size == 1) { 184b8e80941Smrg for (unsigned i = 0; i < instr->def.num_components; i++) { 185b8e80941Smrg uint8_t b = instr->value[i].b; 186b8e80941Smrg hash = HASH(hash, b); 187b8e80941Smrg } 188b8e80941Smrg } else { 189b8e80941Smrg unsigned size = instr->def.num_components * sizeof(*instr->value); 190b8e80941Smrg hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value, size); 191b8e80941Smrg } 192b8e80941Smrg 193b8e80941Smrg return hash; 194b8e80941Smrg} 195b8e80941Smrg 196b8e80941Smrgstatic int 197b8e80941Smrgcmp_phi_src(const void *data1, const void *data2) 198b8e80941Smrg{ 199b8e80941Smrg nir_phi_src *src1 = *(nir_phi_src **)data1; 200b8e80941Smrg nir_phi_src *src2 = *(nir_phi_src **)data2; 201b8e80941Smrg return src1->pred - src2->pred; 202b8e80941Smrg} 203b8e80941Smrg 204b8e80941Smrgstatic uint32_t 205b8e80941Smrghash_phi(uint32_t hash, const nir_phi_instr *instr) 206b8e80941Smrg{ 207b8e80941Smrg hash = HASH(hash, instr->instr.block); 208b8e80941Smrg 209b8e80941Smrg /* sort sources by predecessor, since the order shouldn't matter */ 210b8e80941Smrg unsigned num_preds = instr->instr.block->predecessors->entries; 211b8e80941Smrg NIR_VLA(nir_phi_src *, srcs, num_preds); 212b8e80941Smrg unsigned i = 0; 213b8e80941Smrg nir_foreach_phi_src(src, instr) { 214b8e80941Smrg srcs[i++] = src; 215b8e80941Smrg } 216b8e80941Smrg 217b8e80941Smrg qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src); 218b8e80941Smrg 219b8e80941Smrg for (i = 0; i < num_preds; i++) { 220b8e80941Smrg hash = hash_src(hash, &srcs[i]->src); 221b8e80941Smrg hash = HASH(hash, srcs[i]->pred); 222b8e80941Smrg } 223b8e80941Smrg 224b8e80941Smrg return hash; 225b8e80941Smrg} 226b8e80941Smrg 227b8e80941Smrgstatic uint32_t 228b8e80941Smrghash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) 229b8e80941Smrg{ 230b8e80941Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; 231b8e80941Smrg hash = HASH(hash, instr->intrinsic); 232b8e80941Smrg 233b8e80941Smrg if (info->has_dest) { 234b8e80941Smrg hash = HASH(hash, instr->dest.ssa.num_components); 235b8e80941Smrg hash = HASH(hash, instr->dest.ssa.bit_size); 236b8e80941Smrg } 237b8e80941Smrg 238b8e80941Smrg hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, 239b8e80941Smrg info->num_indices 240b8e80941Smrg * sizeof(instr->const_index[0])); 241b8e80941Smrg return hash; 242b8e80941Smrg} 243b8e80941Smrg 244b8e80941Smrgstatic uint32_t 245b8e80941Smrghash_tex(uint32_t hash, const nir_tex_instr *instr) 246b8e80941Smrg{ 247b8e80941Smrg hash = HASH(hash, instr->op); 248b8e80941Smrg hash = HASH(hash, instr->num_srcs); 249b8e80941Smrg 250b8e80941Smrg for (unsigned i = 0; i < instr->num_srcs; i++) { 251b8e80941Smrg hash = HASH(hash, instr->src[i].src_type); 252b8e80941Smrg hash = hash_src(hash, &instr->src[i].src); 253b8e80941Smrg } 254b8e80941Smrg 255b8e80941Smrg hash = HASH(hash, instr->coord_components); 256b8e80941Smrg hash = HASH(hash, instr->sampler_dim); 257b8e80941Smrg hash = HASH(hash, instr->is_array); 258b8e80941Smrg hash = HASH(hash, instr->is_shadow); 259b8e80941Smrg hash = HASH(hash, instr->is_new_style_shadow); 260b8e80941Smrg unsigned component = instr->component; 261b8e80941Smrg hash = HASH(hash, component); 262b8e80941Smrg for (unsigned i = 0; i < 4; ++i) 263b8e80941Smrg for (unsigned j = 0; j < 2; ++j) 264b8e80941Smrg hash = HASH(hash, instr->tg4_offsets[i][j]); 265b8e80941Smrg hash = HASH(hash, instr->texture_index); 266b8e80941Smrg hash = HASH(hash, instr->texture_array_size); 267b8e80941Smrg hash = HASH(hash, instr->sampler_index); 268b8e80941Smrg 269b8e80941Smrg return hash; 270b8e80941Smrg} 271b8e80941Smrg 272b8e80941Smrg/* Computes a hash of an instruction for use in a hash table. Note that this 273b8e80941Smrg * will only work for instructions where instr_can_rewrite() returns true, and 274b8e80941Smrg * it should return identical hashes for two instructions that are the same 275b8e80941Smrg * according nir_instrs_equal(). 276b8e80941Smrg */ 277b8e80941Smrg 278b8e80941Smrgstatic uint32_t 279b8e80941Smrghash_instr(const void *data) 280b8e80941Smrg{ 281b8e80941Smrg const nir_instr *instr = data; 282b8e80941Smrg uint32_t hash = _mesa_fnv32_1a_offset_bias; 283b8e80941Smrg 284b8e80941Smrg switch (instr->type) { 285b8e80941Smrg case nir_instr_type_alu: 286b8e80941Smrg hash = hash_alu(hash, nir_instr_as_alu(instr)); 287b8e80941Smrg break; 288b8e80941Smrg case nir_instr_type_deref: 289b8e80941Smrg hash = hash_deref(hash, nir_instr_as_deref(instr)); 290b8e80941Smrg break; 291b8e80941Smrg case nir_instr_type_load_const: 292b8e80941Smrg hash = hash_load_const(hash, nir_instr_as_load_const(instr)); 293b8e80941Smrg break; 294b8e80941Smrg case nir_instr_type_phi: 295b8e80941Smrg hash = hash_phi(hash, nir_instr_as_phi(instr)); 296b8e80941Smrg break; 297b8e80941Smrg case nir_instr_type_intrinsic: 298b8e80941Smrg hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); 299b8e80941Smrg break; 300b8e80941Smrg case nir_instr_type_tex: 301b8e80941Smrg hash = hash_tex(hash, nir_instr_as_tex(instr)); 302b8e80941Smrg break; 303b8e80941Smrg default: 304b8e80941Smrg unreachable("Invalid instruction type"); 305b8e80941Smrg } 306b8e80941Smrg 307b8e80941Smrg return hash; 308b8e80941Smrg} 309b8e80941Smrg 310b8e80941Smrgbool 311b8e80941Smrgnir_srcs_equal(nir_src src1, nir_src src2) 312b8e80941Smrg{ 313b8e80941Smrg if (src1.is_ssa) { 314b8e80941Smrg if (src2.is_ssa) { 315b8e80941Smrg return src1.ssa == src2.ssa; 316b8e80941Smrg } else { 317b8e80941Smrg return false; 318b8e80941Smrg } 319b8e80941Smrg } else { 320b8e80941Smrg if (src2.is_ssa) { 321b8e80941Smrg return false; 322b8e80941Smrg } else { 323b8e80941Smrg if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) 324b8e80941Smrg return false; 325b8e80941Smrg 326b8e80941Smrg if (src1.reg.indirect) { 327b8e80941Smrg if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) 328b8e80941Smrg return false; 329b8e80941Smrg } 330b8e80941Smrg 331b8e80941Smrg return src1.reg.reg == src2.reg.reg && 332b8e80941Smrg src1.reg.base_offset == src2.reg.base_offset; 333b8e80941Smrg } 334b8e80941Smrg } 335b8e80941Smrg} 336b8e80941Smrg 337b8e80941Smrg/** 338b8e80941Smrg * If the \p s is an SSA value that was generated by a negation instruction, 339b8e80941Smrg * that instruction is returned as a \c nir_alu_instr. Otherwise \c NULL is 340b8e80941Smrg * returned. 341b8e80941Smrg */ 342b8e80941Smrgstatic nir_alu_instr * 343b8e80941Smrgget_neg_instr(nir_src s) 344b8e80941Smrg{ 345b8e80941Smrg nir_alu_instr *alu = nir_src_as_alu_instr(s); 346b8e80941Smrg 347b8e80941Smrg return alu != NULL && (alu->op == nir_op_fneg || alu->op == nir_op_ineg) 348b8e80941Smrg ? alu : NULL; 349b8e80941Smrg} 350b8e80941Smrg 351b8e80941Smrgbool 352b8e80941Smrgnir_const_value_negative_equal(const nir_const_value *c1, 353b8e80941Smrg const nir_const_value *c2, 354b8e80941Smrg unsigned components, 355b8e80941Smrg nir_alu_type base_type, 356b8e80941Smrg unsigned bits) 357b8e80941Smrg{ 358b8e80941Smrg assert(base_type == nir_alu_type_get_base_type(base_type)); 359b8e80941Smrg assert(base_type != nir_type_invalid); 360b8e80941Smrg 361b8e80941Smrg /* This can occur for 1-bit Boolean values. */ 362b8e80941Smrg if (bits == 1) 363b8e80941Smrg return false; 364b8e80941Smrg 365b8e80941Smrg switch (base_type) { 366b8e80941Smrg case nir_type_float: 367b8e80941Smrg switch (bits) { 368b8e80941Smrg case 16: 369b8e80941Smrg for (unsigned i = 0; i < components; i++) { 370b8e80941Smrg if (_mesa_half_to_float(c1[i].u16) != 371b8e80941Smrg -_mesa_half_to_float(c2[i].u16)) { 372b8e80941Smrg return false; 373b8e80941Smrg } 374b8e80941Smrg } 375b8e80941Smrg 376b8e80941Smrg return true; 377b8e80941Smrg 378b8e80941Smrg case 32: 379b8e80941Smrg for (unsigned i = 0; i < components; i++) { 380b8e80941Smrg if (c1[i].f32 != -c2[i].f32) 381b8e80941Smrg return false; 382b8e80941Smrg } 383b8e80941Smrg 384b8e80941Smrg return true; 385b8e80941Smrg 386b8e80941Smrg case 64: 387b8e80941Smrg for (unsigned i = 0; i < components; i++) { 388b8e80941Smrg if (c1[i].f64 != -c2[i].f64) 389b8e80941Smrg return false; 390b8e80941Smrg } 391b8e80941Smrg 392b8e80941Smrg return true; 393b8e80941Smrg 394b8e80941Smrg default: 395b8e80941Smrg unreachable("unknown bit size"); 396b8e80941Smrg } 397b8e80941Smrg 398b8e80941Smrg break; 399b8e80941Smrg 400b8e80941Smrg case nir_type_int: 401b8e80941Smrg case nir_type_uint: 402b8e80941Smrg switch (bits) { 403b8e80941Smrg case 8: 404b8e80941Smrg for (unsigned i = 0; i < components; i++) { 405b8e80941Smrg if (c1[i].i8 != -c2[i].i8) 406b8e80941Smrg return false; 407b8e80941Smrg } 408b8e80941Smrg 409b8e80941Smrg return true; 410b8e80941Smrg 411b8e80941Smrg case 16: 412b8e80941Smrg for (unsigned i = 0; i < components; i++) { 413b8e80941Smrg if (c1[i].i16 != -c2[i].i16) 414b8e80941Smrg return false; 415b8e80941Smrg } 416b8e80941Smrg 417b8e80941Smrg return true; 418b8e80941Smrg break; 419b8e80941Smrg 420b8e80941Smrg case 32: 421b8e80941Smrg for (unsigned i = 0; i < components; i++) { 422b8e80941Smrg if (c1[i].i32 != -c2[i].i32) 423b8e80941Smrg return false; 424b8e80941Smrg } 425b8e80941Smrg 426b8e80941Smrg return true; 427b8e80941Smrg 428b8e80941Smrg case 64: 429b8e80941Smrg for (unsigned i = 0; i < components; i++) { 430b8e80941Smrg if (c1[i].i64 != -c2[i].i64) 431b8e80941Smrg return false; 432b8e80941Smrg } 433b8e80941Smrg 434b8e80941Smrg return true; 435b8e80941Smrg 436b8e80941Smrg default: 437b8e80941Smrg unreachable("unknown bit size"); 438b8e80941Smrg } 439b8e80941Smrg 440b8e80941Smrg break; 441b8e80941Smrg 442b8e80941Smrg case nir_type_bool: 443b8e80941Smrg return false; 444b8e80941Smrg 445b8e80941Smrg default: 446b8e80941Smrg break; 447b8e80941Smrg } 448b8e80941Smrg 449b8e80941Smrg return false; 450b8e80941Smrg} 451b8e80941Smrg 452b8e80941Smrg/** 453b8e80941Smrg * Shallow compare of ALU srcs to determine if one is the negation of the other 454b8e80941Smrg * 455b8e80941Smrg * This function detects cases where \p alu1 is a constant and \p alu2 is a 456b8e80941Smrg * constant that is its negation. It will also detect cases where \p alu2 is 457b8e80941Smrg * an SSA value that is a \c nir_op_fneg applied to \p alu1 (and vice versa). 458b8e80941Smrg * 459b8e80941Smrg * This function does not detect the general case when \p alu1 and \p alu2 are 460b8e80941Smrg * SSA values that are the negations of each other (e.g., \p alu1 represents 461b8e80941Smrg * (a * b) and \p alu2 represents (-a * b)). 462b8e80941Smrg */ 463b8e80941Smrgbool 464b8e80941Smrgnir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 465b8e80941Smrg const nir_alu_instr *alu2, 466b8e80941Smrg unsigned src1, unsigned src2) 467b8e80941Smrg{ 468b8e80941Smrg if (alu1->src[src1].abs != alu2->src[src2].abs) 469b8e80941Smrg return false; 470b8e80941Smrg 471b8e80941Smrg bool parity = alu1->src[src1].negate != alu2->src[src2].negate; 472b8e80941Smrg 473b8e80941Smrg /* Handling load_const instructions is tricky. */ 474b8e80941Smrg 475b8e80941Smrg const nir_const_value *const const1 = 476b8e80941Smrg nir_src_as_const_value(alu1->src[src1].src); 477b8e80941Smrg 478b8e80941Smrg if (const1 != NULL) { 479b8e80941Smrg /* Assume that constant folding will eliminate source mods and unary 480b8e80941Smrg * ops. 481b8e80941Smrg */ 482b8e80941Smrg if (parity) 483b8e80941Smrg return false; 484b8e80941Smrg 485b8e80941Smrg const nir_const_value *const const2 = 486b8e80941Smrg nir_src_as_const_value(alu2->src[src2].src); 487b8e80941Smrg 488b8e80941Smrg if (const2 == NULL) 489b8e80941Smrg return false; 490b8e80941Smrg 491b8e80941Smrg if (nir_src_bit_size(alu1->src[src1].src) != 492b8e80941Smrg nir_src_bit_size(alu2->src[src2].src)) 493b8e80941Smrg return false; 494b8e80941Smrg 495b8e80941Smrg /* FINISHME: Apply the swizzle? */ 496b8e80941Smrg return nir_const_value_negative_equal(const1, 497b8e80941Smrg const2, 498b8e80941Smrg nir_ssa_alu_instr_src_components(alu1, src1), 499b8e80941Smrg nir_op_infos[alu1->op].input_types[src1], 500b8e80941Smrg nir_src_bit_size(alu1->src[src1].src)); 501b8e80941Smrg } 502b8e80941Smrg 503b8e80941Smrg uint8_t alu1_swizzle[4] = {0}; 504b8e80941Smrg nir_src alu1_actual_src; 505b8e80941Smrg nir_alu_instr *neg1 = get_neg_instr(alu1->src[src1].src); 506b8e80941Smrg 507b8e80941Smrg if (neg1) { 508b8e80941Smrg parity = !parity; 509b8e80941Smrg alu1_actual_src = neg1->src[0].src; 510b8e80941Smrg 511b8e80941Smrg for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg1, 0); i++) 512b8e80941Smrg alu1_swizzle[i] = neg1->src[0].swizzle[i]; 513b8e80941Smrg } else { 514b8e80941Smrg alu1_actual_src = alu1->src[src1].src; 515b8e80941Smrg 516b8e80941Smrg for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) 517b8e80941Smrg alu1_swizzle[i] = i; 518b8e80941Smrg } 519b8e80941Smrg 520b8e80941Smrg uint8_t alu2_swizzle[4] = {0}; 521b8e80941Smrg nir_src alu2_actual_src; 522b8e80941Smrg nir_alu_instr *neg2 = get_neg_instr(alu2->src[src2].src); 523b8e80941Smrg 524b8e80941Smrg if (neg2) { 525b8e80941Smrg parity = !parity; 526b8e80941Smrg alu2_actual_src = neg2->src[0].src; 527b8e80941Smrg 528b8e80941Smrg for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg2, 0); i++) 529b8e80941Smrg alu2_swizzle[i] = neg2->src[0].swizzle[i]; 530b8e80941Smrg } else { 531b8e80941Smrg alu2_actual_src = alu2->src[src2].src; 532b8e80941Smrg 533b8e80941Smrg for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); i++) 534b8e80941Smrg alu2_swizzle[i] = i; 535b8e80941Smrg } 536b8e80941Smrg 537b8e80941Smrg for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { 538b8e80941Smrg if (alu1_swizzle[alu1->src[src1].swizzle[i]] != 539b8e80941Smrg alu2_swizzle[alu2->src[src2].swizzle[i]]) 540b8e80941Smrg return false; 541b8e80941Smrg } 542b8e80941Smrg 543b8e80941Smrg return parity && nir_srcs_equal(alu1_actual_src, alu2_actual_src); 544b8e80941Smrg} 545b8e80941Smrg 546b8e80941Smrgbool 547b8e80941Smrgnir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 548b8e80941Smrg unsigned src1, unsigned src2) 549b8e80941Smrg{ 550b8e80941Smrg if (alu1->src[src1].abs != alu2->src[src2].abs || 551b8e80941Smrg alu1->src[src1].negate != alu2->src[src2].negate) 552b8e80941Smrg return false; 553b8e80941Smrg 554b8e80941Smrg for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { 555b8e80941Smrg if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) 556b8e80941Smrg return false; 557b8e80941Smrg } 558b8e80941Smrg 559b8e80941Smrg return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); 560b8e80941Smrg} 561b8e80941Smrg 562b8e80941Smrg/* Returns "true" if two instructions are equal. Note that this will only 563b8e80941Smrg * work for the subset of instructions defined by instr_can_rewrite(). Also, 564b8e80941Smrg * it should only return "true" for instructions that hash_instr() will return 565b8e80941Smrg * the same hash for (ignoring collisions, of course). 566b8e80941Smrg */ 567b8e80941Smrg 568b8e80941Smrgbool 569b8e80941Smrgnir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) 570b8e80941Smrg{ 571b8e80941Smrg assert(instr_can_rewrite(instr1) && instr_can_rewrite(instr2)); 572b8e80941Smrg 573b8e80941Smrg if (instr1->type != instr2->type) 574b8e80941Smrg return false; 575b8e80941Smrg 576b8e80941Smrg switch (instr1->type) { 577b8e80941Smrg case nir_instr_type_alu: { 578b8e80941Smrg nir_alu_instr *alu1 = nir_instr_as_alu(instr1); 579b8e80941Smrg nir_alu_instr *alu2 = nir_instr_as_alu(instr2); 580b8e80941Smrg 581b8e80941Smrg if (alu1->op != alu2->op) 582b8e80941Smrg return false; 583b8e80941Smrg 584b8e80941Smrg /* TODO: We can probably acutally do something more inteligent such 585b8e80941Smrg * as allowing different numbers and taking a maximum or something 586b8e80941Smrg * here */ 587b8e80941Smrg if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) 588b8e80941Smrg return false; 589b8e80941Smrg 590b8e80941Smrg if (alu1->dest.dest.ssa.bit_size != alu2->dest.dest.ssa.bit_size) 591b8e80941Smrg return false; 592b8e80941Smrg 593b8e80941Smrg /* We explicitly don't hash instr->dest.dest.exact */ 594b8e80941Smrg 595b8e80941Smrg if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { 596b8e80941Smrg assert(nir_op_infos[alu1->op].num_inputs == 2); 597b8e80941Smrg return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && 598b8e80941Smrg nir_alu_srcs_equal(alu1, alu2, 1, 1)) || 599b8e80941Smrg (nir_alu_srcs_equal(alu1, alu2, 0, 1) && 600b8e80941Smrg nir_alu_srcs_equal(alu1, alu2, 1, 0)); 601b8e80941Smrg } else { 602b8e80941Smrg for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { 603b8e80941Smrg if (!nir_alu_srcs_equal(alu1, alu2, i, i)) 604b8e80941Smrg return false; 605b8e80941Smrg } 606b8e80941Smrg } 607b8e80941Smrg return true; 608b8e80941Smrg } 609b8e80941Smrg case nir_instr_type_deref: { 610b8e80941Smrg nir_deref_instr *deref1 = nir_instr_as_deref(instr1); 611b8e80941Smrg nir_deref_instr *deref2 = nir_instr_as_deref(instr2); 612b8e80941Smrg 613b8e80941Smrg if (deref1->deref_type != deref2->deref_type || 614b8e80941Smrg deref1->mode != deref2->mode || 615b8e80941Smrg deref1->type != deref2->type) 616b8e80941Smrg return false; 617b8e80941Smrg 618b8e80941Smrg if (deref1->deref_type == nir_deref_type_var) 619b8e80941Smrg return deref1->var == deref2->var; 620b8e80941Smrg 621b8e80941Smrg if (!nir_srcs_equal(deref1->parent, deref2->parent)) 622b8e80941Smrg return false; 623b8e80941Smrg 624b8e80941Smrg switch (deref1->deref_type) { 625b8e80941Smrg case nir_deref_type_struct: 626b8e80941Smrg if (deref1->strct.index != deref2->strct.index) 627b8e80941Smrg return false; 628b8e80941Smrg break; 629b8e80941Smrg 630b8e80941Smrg case nir_deref_type_array: 631b8e80941Smrg case nir_deref_type_ptr_as_array: 632b8e80941Smrg if (!nir_srcs_equal(deref1->arr.index, deref2->arr.index)) 633b8e80941Smrg return false; 634b8e80941Smrg break; 635b8e80941Smrg 636b8e80941Smrg case nir_deref_type_cast: 637b8e80941Smrg if (deref1->cast.ptr_stride != deref2->cast.ptr_stride) 638b8e80941Smrg return false; 639b8e80941Smrg break; 640b8e80941Smrg 641b8e80941Smrg case nir_deref_type_var: 642b8e80941Smrg case nir_deref_type_array_wildcard: 643b8e80941Smrg /* Nothing to do */ 644b8e80941Smrg break; 645b8e80941Smrg 646b8e80941Smrg default: 647b8e80941Smrg unreachable("Invalid instruction deref type"); 648b8e80941Smrg } 649b8e80941Smrg return true; 650b8e80941Smrg } 651b8e80941Smrg case nir_instr_type_tex: { 652b8e80941Smrg nir_tex_instr *tex1 = nir_instr_as_tex(instr1); 653b8e80941Smrg nir_tex_instr *tex2 = nir_instr_as_tex(instr2); 654b8e80941Smrg 655b8e80941Smrg if (tex1->op != tex2->op) 656b8e80941Smrg return false; 657b8e80941Smrg 658b8e80941Smrg if (tex1->num_srcs != tex2->num_srcs) 659b8e80941Smrg return false; 660b8e80941Smrg for (unsigned i = 0; i < tex1->num_srcs; i++) { 661b8e80941Smrg if (tex1->src[i].src_type != tex2->src[i].src_type || 662b8e80941Smrg !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { 663b8e80941Smrg return false; 664b8e80941Smrg } 665b8e80941Smrg } 666b8e80941Smrg 667b8e80941Smrg if (tex1->coord_components != tex2->coord_components || 668b8e80941Smrg tex1->sampler_dim != tex2->sampler_dim || 669b8e80941Smrg tex1->is_array != tex2->is_array || 670b8e80941Smrg tex1->is_shadow != tex2->is_shadow || 671b8e80941Smrg tex1->is_new_style_shadow != tex2->is_new_style_shadow || 672b8e80941Smrg tex1->component != tex2->component || 673b8e80941Smrg tex1->texture_index != tex2->texture_index || 674b8e80941Smrg tex1->texture_array_size != tex2->texture_array_size || 675b8e80941Smrg tex1->sampler_index != tex2->sampler_index) { 676b8e80941Smrg return false; 677b8e80941Smrg } 678b8e80941Smrg 679b8e80941Smrg if (memcmp(tex1->tg4_offsets, tex2->tg4_offsets, 680b8e80941Smrg sizeof(tex1->tg4_offsets))) 681b8e80941Smrg return false; 682b8e80941Smrg 683b8e80941Smrg return true; 684b8e80941Smrg } 685b8e80941Smrg case nir_instr_type_load_const: { 686b8e80941Smrg nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); 687b8e80941Smrg nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); 688b8e80941Smrg 689b8e80941Smrg if (load1->def.num_components != load2->def.num_components) 690b8e80941Smrg return false; 691b8e80941Smrg 692b8e80941Smrg if (load1->def.bit_size != load2->def.bit_size) 693b8e80941Smrg return false; 694b8e80941Smrg 695b8e80941Smrg if (load1->def.bit_size == 1) { 696b8e80941Smrg for (unsigned i = 0; i < load1->def.num_components; ++i) { 697b8e80941Smrg if (load1->value[i].b != load2->value[i].b) 698b8e80941Smrg return false; 699b8e80941Smrg } 700b8e80941Smrg } else { 701b8e80941Smrg unsigned size = load1->def.num_components * sizeof(*load1->value); 702b8e80941Smrg if (memcmp(load1->value, load2->value, size) != 0) 703b8e80941Smrg return false; 704b8e80941Smrg } 705b8e80941Smrg return true; 706b8e80941Smrg } 707b8e80941Smrg case nir_instr_type_phi: { 708b8e80941Smrg nir_phi_instr *phi1 = nir_instr_as_phi(instr1); 709b8e80941Smrg nir_phi_instr *phi2 = nir_instr_as_phi(instr2); 710b8e80941Smrg 711b8e80941Smrg if (phi1->instr.block != phi2->instr.block) 712b8e80941Smrg return false; 713b8e80941Smrg 714b8e80941Smrg nir_foreach_phi_src(src1, phi1) { 715b8e80941Smrg nir_foreach_phi_src(src2, phi2) { 716b8e80941Smrg if (src1->pred == src2->pred) { 717b8e80941Smrg if (!nir_srcs_equal(src1->src, src2->src)) 718b8e80941Smrg return false; 719b8e80941Smrg 720b8e80941Smrg break; 721b8e80941Smrg } 722b8e80941Smrg } 723b8e80941Smrg } 724b8e80941Smrg 725b8e80941Smrg return true; 726b8e80941Smrg } 727b8e80941Smrg case nir_instr_type_intrinsic: { 728b8e80941Smrg nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); 729b8e80941Smrg nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); 730b8e80941Smrg const nir_intrinsic_info *info = 731b8e80941Smrg &nir_intrinsic_infos[intrinsic1->intrinsic]; 732b8e80941Smrg 733b8e80941Smrg if (intrinsic1->intrinsic != intrinsic2->intrinsic || 734b8e80941Smrg intrinsic1->num_components != intrinsic2->num_components) 735b8e80941Smrg return false; 736b8e80941Smrg 737b8e80941Smrg if (info->has_dest && intrinsic1->dest.ssa.num_components != 738b8e80941Smrg intrinsic2->dest.ssa.num_components) 739b8e80941Smrg return false; 740b8e80941Smrg 741b8e80941Smrg if (info->has_dest && intrinsic1->dest.ssa.bit_size != 742b8e80941Smrg intrinsic2->dest.ssa.bit_size) 743b8e80941Smrg return false; 744b8e80941Smrg 745b8e80941Smrg for (unsigned i = 0; i < info->num_srcs; i++) { 746b8e80941Smrg if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) 747b8e80941Smrg return false; 748b8e80941Smrg } 749b8e80941Smrg 750b8e80941Smrg for (unsigned i = 0; i < info->num_indices; i++) { 751b8e80941Smrg if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) 752b8e80941Smrg return false; 753b8e80941Smrg } 754b8e80941Smrg 755b8e80941Smrg return true; 756b8e80941Smrg } 757b8e80941Smrg case nir_instr_type_call: 758b8e80941Smrg case nir_instr_type_jump: 759b8e80941Smrg case nir_instr_type_ssa_undef: 760b8e80941Smrg case nir_instr_type_parallel_copy: 761b8e80941Smrg default: 762b8e80941Smrg unreachable("Invalid instruction type"); 763b8e80941Smrg } 764b8e80941Smrg 765b8e80941Smrg unreachable("All cases in the above switch should return"); 766b8e80941Smrg} 767b8e80941Smrg 768b8e80941Smrgstatic nir_ssa_def * 769b8e80941Smrgnir_instr_get_dest_ssa_def(nir_instr *instr) 770b8e80941Smrg{ 771b8e80941Smrg switch (instr->type) { 772b8e80941Smrg case nir_instr_type_alu: 773b8e80941Smrg assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); 774b8e80941Smrg return &nir_instr_as_alu(instr)->dest.dest.ssa; 775b8e80941Smrg case nir_instr_type_deref: 776b8e80941Smrg assert(nir_instr_as_deref(instr)->dest.is_ssa); 777b8e80941Smrg return &nir_instr_as_deref(instr)->dest.ssa; 778b8e80941Smrg case nir_instr_type_load_const: 779b8e80941Smrg return &nir_instr_as_load_const(instr)->def; 780b8e80941Smrg case nir_instr_type_phi: 781b8e80941Smrg assert(nir_instr_as_phi(instr)->dest.is_ssa); 782b8e80941Smrg return &nir_instr_as_phi(instr)->dest.ssa; 783b8e80941Smrg case nir_instr_type_intrinsic: 784b8e80941Smrg assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); 785b8e80941Smrg return &nir_instr_as_intrinsic(instr)->dest.ssa; 786b8e80941Smrg case nir_instr_type_tex: 787b8e80941Smrg assert(nir_instr_as_tex(instr)->dest.is_ssa); 788b8e80941Smrg return &nir_instr_as_tex(instr)->dest.ssa; 789b8e80941Smrg default: 790b8e80941Smrg unreachable("We never ask for any of these"); 791b8e80941Smrg } 792b8e80941Smrg} 793b8e80941Smrg 794b8e80941Smrgstatic bool 795b8e80941Smrgcmp_func(const void *data1, const void *data2) 796b8e80941Smrg{ 797b8e80941Smrg return nir_instrs_equal(data1, data2); 798b8e80941Smrg} 799b8e80941Smrg 800b8e80941Smrgstruct set * 801b8e80941Smrgnir_instr_set_create(void *mem_ctx) 802b8e80941Smrg{ 803b8e80941Smrg return _mesa_set_create(mem_ctx, hash_instr, cmp_func); 804b8e80941Smrg} 805b8e80941Smrg 806b8e80941Smrgvoid 807b8e80941Smrgnir_instr_set_destroy(struct set *instr_set) 808b8e80941Smrg{ 809b8e80941Smrg _mesa_set_destroy(instr_set, NULL); 810b8e80941Smrg} 811b8e80941Smrg 812b8e80941Smrgbool 813b8e80941Smrgnir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) 814b8e80941Smrg{ 815b8e80941Smrg if (!instr_can_rewrite(instr)) 816b8e80941Smrg return false; 817b8e80941Smrg 818b8e80941Smrg uint32_t hash = hash_instr(instr); 819b8e80941Smrg struct set_entry *e = _mesa_set_search_pre_hashed(instr_set, hash, instr); 820b8e80941Smrg if (e) { 821b8e80941Smrg nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); 822b8e80941Smrg nir_instr *match = (nir_instr *) e->key; 823b8e80941Smrg nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match); 824b8e80941Smrg 825b8e80941Smrg /* It's safe to replace an exact instruction with an inexact one as 826b8e80941Smrg * long as we make it exact. If we got here, the two instructions are 827b8e80941Smrg * exactly identical in every other way so, once we've set the exact 828b8e80941Smrg * bit, they are the same. 829b8e80941Smrg */ 830b8e80941Smrg if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact) 831b8e80941Smrg nir_instr_as_alu(match)->exact = true; 832b8e80941Smrg 833b8e80941Smrg nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); 834b8e80941Smrg return true; 835b8e80941Smrg } 836b8e80941Smrg 837b8e80941Smrg _mesa_set_add_pre_hashed(instr_set, hash, instr); 838b8e80941Smrg return false; 839b8e80941Smrg} 840b8e80941Smrg 841b8e80941Smrgvoid 842b8e80941Smrgnir_instr_set_remove(struct set *instr_set, nir_instr *instr) 843b8e80941Smrg{ 844b8e80941Smrg if (!instr_can_rewrite(instr)) 845b8e80941Smrg return; 846b8e80941Smrg 847b8e80941Smrg struct set_entry *entry = _mesa_set_search(instr_set, instr); 848b8e80941Smrg if (entry) 849b8e80941Smrg _mesa_set_remove(instr_set, entry); 850b8e80941Smrg} 851b8e80941Smrg 852