1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26 27#include "compiler.h" 28 29/* Midgard's generic load/store instructions, particularly to implement SSBOs 30 * and globals, have support for address arithmetic natively. In particularly, 31 * they take two indirect arguments A, B and two immediates #s, #c, calculating 32 * the address: 33 * 34 * A + (zext?(B) << #s) + #c 35 * 36 * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe. 37 */ 38 39struct mir_address { 40 nir_ssa_scalar A; 41 nir_ssa_scalar B; 42 43 midgard_index_address_format type; 44 unsigned shift; 45 unsigned bias; 46}; 47 48static bool 49mir_args_ssa(nir_ssa_scalar s, unsigned count) 50{ 51 nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 52 53 if (count > nir_op_infos[alu->op].num_inputs) 54 return false; 55 56 for (unsigned i = 0; i < count; ++i) { 57 if (!alu->src[i].src.is_ssa) 58 return false; 59 } 60 61 return true; 62} 63 64/* Matches a constant in either slot and moves it to the bias */ 65 66static void 67mir_match_constant(struct mir_address *address) 68{ 69 if (address->A.def && nir_ssa_scalar_is_const(address->A)) { 70 address->bias += nir_ssa_scalar_as_uint(address->A); 71 address->A.def = NULL; 72 } 73 74 if (address->B.def && nir_ssa_scalar_is_const(address->B)) { 75 address->bias += nir_ssa_scalar_as_uint(address->B); 76 address->B.def = NULL; 77 } 78} 79 80/* Matches an iadd when there is a free slot or constant */ 81 82/* The offset field is a 18-bit signed integer */ 83#define MAX_POSITIVE_OFFSET ((1 << 17) - 1) 84 85static void 86mir_match_iadd(struct mir_address *address, bool first_free) 87{ 88 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 89 return; 90 91 if (!mir_args_ssa(address->B, 2)) 92 return; 93 94 nir_op op = nir_ssa_scalar_alu_op(address->B); 95 96 if (op != nir_op_iadd) return; 97 98 nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0); 99 nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1); 100 101 if (nir_ssa_scalar_is_const(op1) && 102 nir_ssa_scalar_as_uint(op1) <= MAX_POSITIVE_OFFSET) { 103 address->bias += nir_ssa_scalar_as_uint(op1); 104 address->B = op2; 105 } else if (nir_ssa_scalar_is_const(op2) && 106 nir_ssa_scalar_as_uint(op2) <= MAX_POSITIVE_OFFSET) { 107 address->bias += nir_ssa_scalar_as_uint(op2); 108 address->B = op1; 109 } else if (!nir_ssa_scalar_is_const(op1) && 110 !nir_ssa_scalar_is_const(op2) && 111 first_free && !address->A.def) { 112 address->A = op1; 113 address->B = op2; 114 } 115} 116 117/* Matches u2u64 and sets type */ 118 119static void 120mir_match_u2u64(struct mir_address *address) 121{ 122 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 123 return; 124 125 if (!mir_args_ssa(address->B, 1)) 126 return; 127 128 nir_op op = nir_ssa_scalar_alu_op(address->B); 129 if (op != nir_op_u2u64) return; 130 nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0); 131 132 address->B = arg; 133 address->type = midgard_index_address_u32; 134} 135 136/* Matches i2i64 and sets type */ 137 138static void 139mir_match_i2i64(struct mir_address *address) 140{ 141 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 142 return; 143 144 if (!mir_args_ssa(address->B, 1)) 145 return; 146 147 nir_op op = nir_ssa_scalar_alu_op(address->B); 148 if (op != nir_op_i2i64) return; 149 nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0); 150 151 address->B = arg; 152 address->type = midgard_index_address_s32; 153} 154 155/* Matches ishl to shift */ 156 157static void 158mir_match_ishl(struct mir_address *address) 159{ 160 if (!address->B.def || !nir_ssa_scalar_is_alu(address->B)) 161 return; 162 163 if (!mir_args_ssa(address->B, 2)) 164 return; 165 166 nir_op op = nir_ssa_scalar_alu_op(address->B); 167 if (op != nir_op_ishl) return; 168 nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0); 169 nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1); 170 171 if (!nir_ssa_scalar_is_const(op2)) return; 172 173 unsigned shift = nir_ssa_scalar_as_uint(op2); 174 if (shift > 0x7) return; 175 176 address->B = op1; 177 address->shift = shift; 178} 179 180/* Strings through mov which can happen from NIR vectorization */ 181 182static void 183mir_match_mov(struct mir_address *address) 184{ 185 if (address->A.def && nir_ssa_scalar_is_alu(address->A)) { 186 nir_op op = nir_ssa_scalar_alu_op(address->A); 187 188 if (op == nir_op_mov && mir_args_ssa(address->A, 1)) 189 address->A = nir_ssa_scalar_chase_alu_src(address->A, 0); 190 } 191 192 if (address->B.def && nir_ssa_scalar_is_alu(address->B)) { 193 nir_op op = nir_ssa_scalar_alu_op(address->B); 194 195 if (op == nir_op_mov && mir_args_ssa(address->B, 1)) 196 address->B = nir_ssa_scalar_chase_alu_src(address->B, 0); 197 } 198} 199 200/* Tries to pattern match into mir_address */ 201 202static struct mir_address 203mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend) 204{ 205 struct mir_address address = { 206 .B = { .def = offset }, 207 .type = extend ? midgard_index_address_u64 : midgard_index_address_u32, 208 }; 209 210 mir_match_mov(&address); 211 mir_match_constant(&address); 212 mir_match_mov(&address); 213 mir_match_iadd(&address, first_free); 214 mir_match_mov(&address); 215 216 if (extend) { 217 mir_match_u2u64(&address); 218 mir_match_i2i64(&address); 219 mir_match_mov(&address); 220 } 221 222 mir_match_ishl(&address); 223 224 return address; 225} 226 227void 228mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg) 229{ 230 for(unsigned i = 0; i < 16; ++i) { 231 ins->swizzle[1][i] = 0; 232 ins->swizzle[2][i] = 0; 233 } 234 235 /* Sign extend instead of zero extend in case the address is something 236 * like `base + offset + 20`, where offset could be negative. */ 237 bool force_sext = (nir_src_bit_size(*offset) < 64); 238 239 if (!offset->is_ssa) { 240 ins->load_store.bitsize_toggle = true; 241 ins->load_store.arg_comp = seg & 0x3; 242 ins->load_store.arg_reg = (seg >> 2) & 0x7; 243 ins->src[2] = nir_src_index(ctx, offset); 244 ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset); 245 246 if (force_sext) 247 ins->load_store.index_format = midgard_index_address_s32; 248 else 249 ins->load_store.index_format = midgard_index_address_u64; 250 251 return; 252 } 253 254 bool first_free = (seg == LDST_GLOBAL); 255 256 struct mir_address match = mir_match_offset(offset->ssa, first_free, true); 257 258 if (match.A.def) { 259 ins->src[1] = nir_ssa_index(match.A.def); 260 ins->swizzle[1][0] = match.A.comp; 261 ins->src_types[1] = nir_type_uint | match.A.def->bit_size; 262 } else { 263 ins->load_store.bitsize_toggle = true; 264 ins->load_store.arg_comp = seg & 0x3; 265 ins->load_store.arg_reg = (seg >> 2) & 0x7; 266 } 267 268 if (match.B.def) { 269 ins->src[2] = nir_ssa_index(match.B.def); 270 ins->swizzle[2][0] = match.B.comp; 271 ins->src_types[2] = nir_type_uint | match.B.def->bit_size; 272 } else 273 ins->load_store.index_reg = REGISTER_LDST_ZERO; 274 275 if (force_sext) 276 match.type = midgard_index_address_s32; 277 278 ins->load_store.index_format = match.type; 279 280 assert(match.shift <= 7); 281 ins->load_store.index_shift = match.shift; 282 283 ins->constants.u32[0] = match.bias; 284} 285 286 287void 288mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias) 289{ 290 assert(src->is_ssa); 291 struct mir_address match = mir_match_offset(src->ssa, false, false); 292 293 if (match.B.def) { 294 ins->src[2] = nir_ssa_index(match.B.def); 295 296 for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[2]); ++i) 297 ins->swizzle[2][i] = match.B.comp; 298 } 299 300 ins->load_store.index_shift = match.shift; 301 ins->constants.u32[0] = match.bias + bias; 302} 303