1/* 2 * Copyright (C) 2021 Collabora, Ltd. 3 * Copyright (C) 2014 Valve Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#include "compiler.h" 26#include "bi_builder.h" 27 28#define XXH_INLINE_ALL 29#include "xxhash.h" 30 31/* This pass handles CSE'ing repeated expressions created in the process of 32 * translating from NIR. Also, currently this is intra-block only, to make it 33 * work over multiple block we'd need to bring forward dominance calculation. 34 */ 35 36static inline uint32_t 37HASH(uint32_t hash, unsigned data) 38{ 39 return XXH32(&data, sizeof(data), hash); 40} 41 42static uint32_t 43hash_index(uint32_t hash, bi_index index) 44{ 45 hash = HASH(hash, index.value); 46 hash = HASH(hash, index.abs); 47 hash = HASH(hash, index.neg); 48 hash = HASH(hash, index.swizzle); 49 hash = HASH(hash, index.offset); 50 hash = HASH(hash, index.reg); 51 hash = HASH(hash, index.type); 52 return hash; 53} 54 55/* Hash an ALU instruction. */ 56static uint32_t 57hash_instr(const void *data) 58{ 59 const bi_instr *I = data; 60 uint32_t hash = 0; 61 62 hash = HASH(hash, I->op); 63 64 /* Explcitly skip destinations, except for size details */ 65 bi_foreach_dest(I, d) { 66 hash = HASH(hash, I->dest[d].swizzle); 67 } 68 69 bi_foreach_src(I, s) { 70 hash = hash_index(hash, I->src[s]); 71 } 72 73 /* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */ 74 hash = HASH(hash, I->dest_mod); 75 76 /* Explicitly skip other immediates */ 77 hash = HASH(hash, I->shift); 78 79 for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i) 80 hash = HASH(hash, I->flags[i]); 81 82 return hash; 83} 84 85static bool 86instrs_equal(const void *_i1, const void *_i2) 87{ 88 const bi_instr *i1 = _i1, *i2 = _i2; 89 90 if (i1->op != i2->op) 91 return false; 92 93 /* Explicitly skip destinations */ 94 95 bi_foreach_src(i1, s) { 96 bi_index s1 = i1->src[s], s2 = i2->src[s]; 97 98 if (memcmp(&s1, &s2, sizeof(s1)) != 0) 99 return false; 100 } 101 102 if (i1->dest_mod != i2->dest_mod) 103 return false; 104 105 if (i1->shift != i2->shift) 106 return false; 107 108 for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) { 109 if (i1->flags[i] != i2->flags[i]) 110 return false; 111 } 112 113 return true; 114} 115 116/* Determines what instructions the above routines have to handle */ 117 118static bool 119instr_can_cse(const bi_instr *I) 120{ 121 switch (I->op) { 122 case BI_OPCODE_DTSEL_IMM: 123 case BI_OPCODE_DISCARD_F32: 124 return false; 125 default: 126 break; 127 } 128 129 if (bi_opcode_props[I->op].message) 130 return false; 131 132 if (I->branch_target) 133 return false; 134 135 /* Refuse to CSE non-SSA destinations since the data flow analysis 136 * required is nontrivial */ 137 bi_foreach_dest(I, d) { 138 if (!bi_is_null(I->dest[d]) && !bi_is_ssa(I->dest[d])) 139 return false; 140 } 141 142 /* Similar refuse to CSE non-SSA sources */ 143 bi_foreach_src(I, s) { 144 if (I->src[s].reg || I->src[s].type == BI_INDEX_REGISTER) 145 return false; 146 } 147 148 return true; 149} 150 151void 152bi_opt_cse(bi_context *ctx) 153{ 154 struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal); 155 156 bi_foreach_block(ctx, block) { 157 bi_index *replacement = calloc(sizeof(bi_index), ((ctx->ssa_alloc + 1) << 2)); 158 _mesa_set_clear(instr_set, NULL); 159 160 bi_foreach_instr_in_block(block, instr) { 161 /* Rewrite before trying to CSE anything so we converge 162 * locally in one iteration */ 163 bi_foreach_src(instr, s) { 164 if (s == 0 && bi_opcode_props[instr->op].sr_read) 165 continue; 166 167 if (!bi_is_ssa(instr->src[s])) 168 continue; 169 170 bi_index repl = replacement[bi_word_node(instr->src[s])]; 171 if (!bi_is_null(repl)) 172 instr->src[s] = bi_replace_index(instr->src[s], repl); 173 } 174 175 if (!instr_can_cse(instr)) 176 continue; 177 178 bool found; 179 struct set_entry *entry = 180 _mesa_set_search_or_add(instr_set, instr, &found); 181 if (found) { 182 const bi_instr *match = entry->key; 183 184 bi_foreach_dest(instr, d) { 185 if (!bi_is_null(instr->dest[d])) 186 replacement[bi_word_node(instr->dest[d])] = match->dest[d]; 187 } 188 } 189 } 190 191 free(replacement); 192 } 193 194 _mesa_set_destroy(instr_set, NULL); 195} 196