1/* -*- mesa-c++ -*- 2 * 3 * Copyright (c) 2018 Collabora LTD 4 * 5 * Author: Gert Wollny <gert.wollny@collabora.com> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 28#include "sfn_emitaluinstruction.h" 29#include "sfn_debug.h" 30 31#include "gallium/drivers/r600/r600_shader.h" 32 33namespace r600 { 34 35using std::vector; 36 37EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor): 38 EmitInstruction (processor) 39{ 40 41} 42 43bool EmitAluInstruction::do_emit(nir_instr* ir) 44{ 45 const nir_alu_instr& instr = *nir_instr_as_alu(ir); 46 47 r600::sfn_log << SfnLog::instr << "emit '" 48 << *ir 49 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size) 50 << "' (" << __func__ << ")\n"; 51 52 preload_src(instr); 53 54 if (get_chip_class() == CAYMAN) { 55 switch (instr.op) { 56 case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos); 57 case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee); 58 case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped); 59 case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee); 60 case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1); 61 case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin); 62 case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee); 63 default: 64 ; 65 } 66 } 67 68 switch (instr.op) { 69 /* These are in the ALU instruction list, but they should be texture instructions */ 70 case nir_op_b2b1: return emit_mov(instr); 71 case nir_op_b2b32: return emit_mov(instr); 72 case nir_op_b2f32: return emit_alu_b2f(instr); 73 case nir_op_b2i32: return emit_b2i32(instr); 74 case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); 75 case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); 76 case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); 77 case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); 78 case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); 79 case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); 80 case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); 81 case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); 82 case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); 83 case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); 84 case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); 85 case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); 86 case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); 87 case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); 88 case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); 89 case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); 90 case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); 91 case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); 92 case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); 93 case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); 94 case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); 95 case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); 96 case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); 97 case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); 98 case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); 99 case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); 100 case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int); 101 case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int); 102 103 case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int); 104 case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int); 105 case nir_op_cube_r600: return emit_cube(instr); 106 case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10); 107 case nir_op_f2b32: return emit_alu_f2b32(instr); 108 case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int); 109 case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint); 110 case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs}); 111 case nir_op_fadd: return emit_alu_op2(instr, op2_add); 112 case nir_op_fceil: return emit_alu_op1(instr, op1_ceil); 113 case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos); 114 case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1}); 115 case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2}); 116 case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2}); 117 118 /* These are in the ALU instruction list, but they should be texture instructions */ 119 case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); 120 case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); 121 case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true); 122 case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false); 123 case nir_op_fddy_coarse: 124 case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true); 125 case nir_op_fdot2: return emit_dot(instr, 2); 126 case nir_op_fdot3: return emit_dot(instr, 3); 127 case nir_op_fdot4: return emit_dot(instr, 4); 128 case nir_op_fdph: return emit_fdph(instr); 129 case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10); 130 case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10); 131 case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee); 132 case nir_op_ffloor: return emit_alu_op1(instr, op1_floor); 133 case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee); 134 case nir_op_ffract: return emit_alu_op1(instr, op1_fract); 135 case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10); 136 case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10); 137 case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int); 138 case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped); 139 case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); 140 case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); 141 case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10); 142 case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10); 143 case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee); 144 case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg}); 145 case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10); 146 case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10); 147 case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee); 148 case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne); 149 case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1); 150 case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp}); 151 case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin); 152 case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee); 153 case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1); 154 case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc); 155 case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int); 156 case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int); 157 case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt); 158 case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int); 159 case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int); 160 case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int); 161 case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int, {0, 1, 2}); 162 case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int, {0, 1, 2}); 163 case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int); 164 case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int); 165 case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int); 166 case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int); 167 case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int); 168 case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); 169 case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); 170 case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int); 171 case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int); 172 case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int); 173 case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int); 174 case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int); 175 case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int); 176 case nir_op_ineg: return emit_alu_ineg(instr); 177 case nir_op_inot: return emit_alu_op1(instr, op1_not_int); 178 case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int); 179 case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int); 180 case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int); 181 case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int); 182 case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int); 183 case nir_op_mov:return emit_mov(instr); 184 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr); 185 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr); 186 case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse); 187 case nir_op_sge: return emit_alu_op2(instr, op2_setge); 188 case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt); 189 case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint); 190 case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint); 191 case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint); 192 case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint); 193 case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); 194 case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); 195 case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2}); 196 case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint); 197 case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint); 198 case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24); 199 case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint); 200 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0); 201 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1); 202 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr); 203 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr); 204 case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int); 205 case nir_op_vec2: return emit_create_vec(instr, 2); 206 case nir_op_vec3: return emit_create_vec(instr, 3); 207 case nir_op_vec4: return emit_create_vec(instr, 4); 208 default: 209 return false; 210 } 211} 212 213void EmitAluInstruction::preload_src(const nir_alu_instr& instr) 214{ 215 const nir_op_info *op_info = &nir_op_infos[instr.op]; 216 assert(op_info->num_inputs <= 4); 217 218 unsigned nsrc_comp = num_src_comp(instr); 219 sfn_log << SfnLog::reg << "Preload:\n"; 220 for (unsigned i = 0; i < op_info->num_inputs; ++i) { 221 for (unsigned c = 0; c < nsrc_comp; ++c) { 222 m_src[i][c] = from_nir(instr.src[i], c); 223 sfn_log << SfnLog::reg << " " << *m_src[i][c]; 224 225 } 226 sfn_log << SfnLog::reg << "\n"; 227 } 228 if (instr.op == nir_op_fdph) { 229 m_src[1][3] = from_nir(instr.src[1], 3); 230 sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n"; 231 } 232 233 split_constants(instr, nsrc_comp); 234} 235 236unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr) 237{ 238 switch (instr.op) { 239 case nir_op_fdot2: 240 case nir_op_bany_inequal2: 241 case nir_op_ball_iequal2: 242 case nir_op_bany_fnequal2: 243 case nir_op_ball_fequal2: 244 case nir_op_b32any_inequal2: 245 case nir_op_b32all_iequal2: 246 case nir_op_b32any_fnequal2: 247 case nir_op_b32all_fequal2: 248 case nir_op_unpack_64_2x32_split_y: 249 return 2; 250 251 case nir_op_fdot3: 252 case nir_op_bany_inequal3: 253 case nir_op_ball_iequal3: 254 case nir_op_bany_fnequal3: 255 case nir_op_ball_fequal3: 256 case nir_op_b32any_inequal3: 257 case nir_op_b32all_iequal3: 258 case nir_op_b32any_fnequal3: 259 case nir_op_b32all_fequal3: 260 case nir_op_cube_r600: 261 return 3; 262 263 case nir_op_fdot4: 264 case nir_op_fdph: 265 case nir_op_bany_inequal4: 266 case nir_op_ball_iequal4: 267 case nir_op_bany_fnequal4: 268 case nir_op_ball_fequal4: 269 case nir_op_b32any_inequal4: 270 case nir_op_b32all_iequal4: 271 case nir_op_b32any_fnequal4: 272 case nir_op_b32all_fequal4: 273 return 4; 274 275 case nir_op_vec2: 276 case nir_op_vec3: 277 case nir_op_vec4: 278 return 1; 279 280 default: 281 return nir_dest_num_components(instr.dest.dest); 282 283 } 284} 285 286bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr) 287{ 288 AluInstruction *ir = nullptr; 289 const uint16_t src0_chan[4] = {2, 2, 0, 1}; 290 const uint16_t src1_chan[4] = {1, 0, 2, 2}; 291 292 for (int i = 0; i < 4; ++i) { 293 ir = new AluInstruction(op2_cube, from_nir(instr.dest, i), 294 from_nir(instr.src[0], src0_chan[i]), 295 from_nir(instr.src[0], src1_chan[i]), {alu_write}); 296 emit_instruction(ir); 297 } 298 ir->set_flag(alu_last_instr); 299 return true; 300} 301 302void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp) 303{ 304 const nir_op_info *op_info = &nir_op_infos[instr.op]; 305 if (op_info->num_inputs < 2) 306 return; 307 308 int nconst = 0; 309 std::array<const UniformValue *,4> c; 310 std::array<int,4> idx; 311 for (unsigned i = 0; i < op_info->num_inputs; ++i) { 312 PValue& src = m_src[i][0]; 313 assert(src); 314 sfn_log << SfnLog::reg << "Split test " << *src; 315 316 if (src->type() == Value::kconst) { 317 c[nconst] = static_cast<const UniformValue *>(src.get()); 318 idx[nconst++] = i; 319 sfn_log << SfnLog::reg << " is constant " << i; 320 } 321 sfn_log << SfnLog::reg << "\n"; 322 } 323 324 if (nconst < 2) 325 return; 326 327 unsigned sel = c[0]->sel(); 328 unsigned kcache = c[0]->kcache_bank(); 329 sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ; 330 331 for (int i = 1; i < nconst; ++i) { 332 sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n"; 333 if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) { 334 AluInstruction *ir = nullptr; 335 auto v = get_temp_vec4(); 336 for (unsigned k = 0; k < nsrc_comp; ++k) { 337 ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write}); 338 emit_instruction(ir); 339 m_src[idx[i]][k] = v[k]; 340 } 341 make_last(ir); 342 } 343 } 344} 345 346bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr) 347{ 348 if (instr.src[0].negate || instr.src[0].abs) { 349 std::cerr << "source modifiers not supported with int ops\n"; 350 return false; 351 } 352 353 AluInstruction *ir = nullptr; 354 for (int i = 0; i < 4 ; ++i) { 355 if (instr.dest.write_mask & (1 << i)){ 356 ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i), 357 m_src[0][i], write); 358 emit_instruction(ir); 359 } 360 } 361 make_last(ir); 362 return true; 363} 364 365bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, 366 const AluOpFlags& flags) 367{ 368 AluInstruction *ir = nullptr; 369 for (int i = 0; i < 4 ; ++i) { 370 if (instr.dest.write_mask & (1 << i)){ 371 ir = new AluInstruction(opcode, from_nir(instr.dest, i), 372 m_src[0][i], write); 373 374 if (flags.test(alu_src0_abs) || instr.src[0].abs) 375 ir->set_flag(alu_src0_abs); 376 377 if (instr.src[0].negate ^ flags.test(alu_src0_neg)) 378 ir->set_flag(alu_src0_neg); 379 380 if (flags.test(alu_dst_clamp) || instr.dest.saturate) 381 ir->set_flag(alu_dst_clamp); 382 383 emit_instruction(ir); 384 } 385 } 386 make_last(ir); 387 388 return true; 389} 390 391bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr) 392{ 393 /* If the op is a plain move beween SSA values we can just forward 394 * the register reference to the original register */ 395 if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa && 396 !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) { 397 bool result = true; 398 for (int i = 0; i < 4 ; ++i) { 399 if (instr.dest.write_mask & (1 << i)){ 400 result &= inject_register(instr.dest.dest.ssa.index, i, 401 m_src[0][i], true); 402 } 403 } 404 return result; 405 } else { 406 return emit_alu_op1(instr, op1_mov); 407 } 408} 409 410bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, 411 bool absolute) 412{ 413 AluInstruction *ir = nullptr; 414 std::set<int> src_idx; 415 416 if (get_chip_class() == CAYMAN) { 417 int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3; 418 for (int i = 0; i < last_slot; ++i) { 419 bool write_comp = instr.dest.write_mask & (1 << i); 420 ir = new AluInstruction(opcode, from_nir(instr.dest, i), 421 m_src[0][write_comp ? i : 0], write_comp ? write : empty); 422 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); 423 if (instr.src[0].negate) ir->set_flag(alu_src0_neg); 424 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 425 426 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr); 427 428 emit_instruction(ir); 429 } 430 } else { 431 for (int i = 0; i < 4 ; ++i) { 432 if (instr.dest.write_mask & (1 << i)){ 433 ir = new AluInstruction(opcode, from_nir(instr.dest, i), 434 m_src[0][i], last_write); 435 if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); 436 if (instr.src[0].negate) ir->set_flag(alu_src0_neg); 437 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 438 emit_instruction(ir); 439 } 440 } 441 } 442 return true; 443} 444 445bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode) 446{ 447 AluInstruction *ir = nullptr; 448 std::set<int> src_idx; 449 450 unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3; 451 452 for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) { 453 for (unsigned i = 0; i < last_slot; ++i) { 454 bool write_comp = instr.dest.write_mask & (1 << j) && (i == j); 455 ir = new AluInstruction(opcode, from_nir(instr.dest, i), 456 m_src[0][j], write_comp ? write : empty); 457 if (instr.src[0].abs) ir->set_flag(alu_src0_abs); 458 if (instr.src[0].negate) ir->set_flag(alu_src0_neg); 459 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 460 461 if (i == (last_slot - 1)) ir->set_flag(alu_last_instr); 462 463 emit_instruction(ir); 464 } 465 } 466 return true; 467} 468 469 470bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op) 471{ 472 AluInstruction *ir = nullptr; 473 474 if (get_chip_class() < CAYMAN) { 475 std::array<PValue, 4> v; 476 477 for (int i = 0; i < 4; ++i) { 478 if (!(instr.dest.write_mask & (1 << i))) 479 continue; 480 v[i] = from_nir(instr.dest, i); 481 ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write}); 482 if (instr.src[0].abs) ir->set_flag(alu_src0_abs); 483 if (instr.src[0].negate) ir->set_flag(alu_src0_neg); 484 emit_instruction(ir); 485 } 486 make_last(ir); 487 488 for (int i = 0; i < 4; ++i) { 489 if (!(instr.dest.write_mask & (1 << i))) 490 continue; 491 ir = new AluInstruction(op, v[i], v[i], {alu_write}); 492 emit_instruction(ir); 493 if (op == op1_flt_to_uint) 494 make_last(ir); 495 } 496 make_last(ir); 497 } else { 498 for (int i = 0; i < 4; ++i) { 499 if (!(instr.dest.write_mask & (1 << i))) 500 continue; 501 ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write}); 502 if (instr.src[0].abs) ir->set_flag(alu_src0_abs); 503 if (instr.src[0].negate) ir->set_flag(alu_src0_neg); 504 emit_instruction(ir); 505 if (op == op1_flt_to_uint) 506 make_last(ir); 507 } 508 make_last(ir); 509 } 510 511 return true; 512} 513 514bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr) 515{ 516 AluInstruction *ir = nullptr; 517 for (int i = 0; i < 4 ; ++i) { 518 if (instr.dest.write_mask & (1 << i)){ 519 ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i), 520 m_src[0][i], literal(0.0f), write); 521 emit_instruction(ir); 522 } 523 } 524 make_last(ir); 525 return true; 526} 527 528bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr) 529{ 530 AluInstruction *ir = nullptr; 531 for (int i = 0; i < 4 ; ++i) { 532 if (!(instr.dest.write_mask & (1 << i))) 533 continue; 534 535 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), 536 m_src[0][i], Value::one_i, write); 537 emit_instruction(ir); 538 } 539 make_last(ir); 540 541 return true; 542} 543 544bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr) 545{ 546 AluInstruction *ir = nullptr; 547 for (unsigned i = 0; i < 2; ++i) { 548 if (!(instr.dest.write_mask & (1 << i))) 549 continue; 550 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), 551 m_src[0][i], write); 552 emit_instruction(ir); 553 } 554 ir->set_flag(alu_last_instr); 555 return true; 556} 557 558bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp) 559{ 560 emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0), 561 m_src[0][comp], last_write)); 562 return true; 563} 564 565bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc) 566{ 567 AluInstruction *ir = nullptr; 568 std::set<int> src_slot; 569 for(unsigned i = 0; i < nc; ++i) { 570 if (instr.dest.write_mask & (1 << i)){ 571 auto src = m_src[i][0]; 572 ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write); 573 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 574 575 // FIXME: This is a rather crude approach to fix the problem that 576 // r600 can't read from four different slots of the same component 577 // here we check only for the register index 578 if (src->type() == Value::gpr) 579 src_slot.insert(src->sel()); 580 if (src_slot.size() >= 3) { 581 src_slot.clear(); 582 ir->set_flag(alu_last_instr); 583 } 584 emit_instruction(ir); 585 } 586 } 587 if (ir) 588 ir->set_flag(alu_last_instr); 589 return true; 590} 591 592bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n) 593{ 594 const nir_alu_src& src0 = instr.src[0]; 595 const nir_alu_src& src1 = instr.src[1]; 596 597 AluInstruction *ir = nullptr; 598 for (int i = 0; i < n ; ++i) { 599 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), 600 m_src[0][i], m_src[1][i], 601 instr.dest.write_mask & (1 << i) ? write : empty); 602 603 if (src0.negate) ir->set_flag(alu_src0_neg); 604 if (src0.abs) ir->set_flag(alu_src0_abs); 605 if (src1.negate) ir->set_flag(alu_src1_neg); 606 if (src1.abs) ir->set_flag(alu_src1_abs); 607 608 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 609 emit_instruction(ir); 610 } 611 for (int i = n; i < 4 ; ++i) { 612 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), 613 Value::zero, Value::zero, 614 instr.dest.write_mask & (1 << i) ? write : empty); 615 emit_instruction(ir); 616 } 617 618 if (ir) 619 ir->set_flag(alu_last_instr); 620 return true; 621} 622 623bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr) 624{ 625 const nir_alu_src& src0 = instr.src[0]; 626 const nir_alu_src& src1 = instr.src[1]; 627 628 AluInstruction *ir = nullptr; 629 for (int i = 0; i < 3 ; ++i) { 630 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), 631 m_src[0][i], m_src[1][i], 632 instr.dest.write_mask & (1 << i) ? write : empty); 633 if (src0.negate) ir->set_flag(alu_src0_neg); 634 if (src0.abs) ir->set_flag(alu_src0_abs); 635 if (src1.negate) ir->set_flag(alu_src1_neg); 636 if (src1.abs) ir->set_flag(alu_src1_abs); 637 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 638 emit_instruction(ir); 639 } 640 641 ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f, 642 m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty); 643 if (src1.negate) ir->set_flag(alu_src1_neg); 644 if (src1.abs) ir->set_flag(alu_src1_abs); 645 emit_instruction(ir); 646 647 ir->set_flag(alu_last_instr); 648 return true; 649 650} 651 652bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op) 653{ 654 AluInstruction *ir = nullptr; 655 for (int i = 0; i < 4 ; ++i) { 656 if (instr.dest.write_mask & (1 << i)) { 657 ir = new AluInstruction(op, from_nir(instr.dest, i), 658 m_src[0][i], Value::zero, 659 write); 660 emit_instruction(ir); 661 } 662 } 663 if (ir) 664 ir->set_flag(alu_last_instr); 665 return true; 666} 667 668bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr) 669{ 670 AluInstruction *ir = nullptr; 671 for (int i = 0; i < 4 ; ++i) { 672 if (instr.dest.write_mask & (1 << i)){ 673 ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), 674 m_src[0][i], Value::one_f, write); 675 if (instr.src[0].negate) ir->set_flag(alu_src0_neg); 676 if (instr.src[0].abs) ir->set_flag(alu_src0_abs); 677 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 678 emit_instruction(ir); 679 } 680 } 681 if (ir) 682 ir->set_flag(alu_last_instr); 683 return true; 684} 685 686bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) 687{ 688 689 AluInstruction *ir = nullptr; 690 PValue v[4]; // this might need some additional temp register creation 691 for (unsigned i = 0; i < 4 ; ++i) 692 v[i] = from_nir(instr.dest, i); 693 694 EAluOp combine = all ? op2_and_int : op2_or_int; 695 696 /* For integers we can not use the modifiers, so this needs some emulation */ 697 /* Should actually be lowered with NIR */ 698 if (instr.src[0].negate == instr.src[1].negate && 699 instr.src[0].abs == instr.src[1].abs) { 700 701 for (unsigned i = 0; i < nc ; ++i) { 702 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); 703 emit_instruction(ir); 704 } 705 if (ir) 706 ir->set_flag(alu_last_instr); 707 } else { 708 std::cerr << "Negate in iequal/inequal not (yet) supported\n"; 709 return false; 710 } 711 712 for (unsigned i = 0; i < nc/2 ; ++i) { 713 ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write); 714 emit_instruction(ir); 715 } 716 if (ir) 717 ir->set_flag(alu_last_instr); 718 719 if (nc > 2) { 720 ir = new AluInstruction(combine, v[0], v[0], v[2], last_write); 721 emit_instruction(ir); 722 } 723 724 return true; 725} 726 727bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) 728{ 729 AluInstruction *ir = nullptr; 730 PValue v[4]; // this might need some additional temp register creation 731 for (unsigned i = 0; i < 4 ; ++i) 732 v[i] = from_nir(instr.dest, i); 733 734 for (unsigned i = 0; i < nc ; ++i) { 735 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); 736 737 if (instr.src[0].abs) 738 ir->set_flag(alu_src0_abs); 739 if (instr.src[0].negate) 740 ir->set_flag(alu_src0_neg); 741 742 if (instr.src[1].abs) 743 ir->set_flag(alu_src1_abs); 744 if (instr.src[1].negate) 745 ir->set_flag(alu_src1_neg); 746 747 emit_instruction(ir); 748 } 749 if (ir) 750 ir->set_flag(alu_last_instr); 751 752 for (unsigned i = 0; i < nc ; ++i) { 753 ir = new AluInstruction(op1_max4, v[i], v[i], write); 754 if (all) ir->set_flag(alu_src0_neg); 755 emit_instruction(ir); 756 } 757 758 for (unsigned i = nc; i < 4 ; ++i) { 759 ir = new AluInstruction(op1_max4, v[i], 760 all ? Value::one_f : Value::zero, write); 761 if (all) 762 ir->set_flag(alu_src0_neg); 763 764 emit_instruction(ir); 765 } 766 767 ir->set_flag(alu_last_instr); 768 769 if (all) 770 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10; 771 else 772 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10; 773 774 ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write); 775 if (all) 776 ir->set_flag(alu_src1_neg); 777 emit_instruction(ir); 778 779 return true; 780} 781 782bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all) 783{ 784 AluInstruction *ir = nullptr; 785 PValue v[4]; // this might need some additional temp register creation 786 for (unsigned i = 0; i < 4 ; ++i) 787 v[i] = from_nir(instr.dest, i); 788 789 for (unsigned i = 0; i < 2 ; ++i) { 790 ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); 791 if (instr.src[0].abs) 792 ir->set_flag(alu_src0_abs); 793 if (instr.src[0].negate) 794 ir->set_flag(alu_src0_neg); 795 796 if (instr.src[1].abs) 797 ir->set_flag(alu_src1_abs); 798 if (instr.src[1].negate) 799 ir->set_flag(alu_src1_neg); 800 801 emit_instruction(ir); 802 } 803 if (ir) 804 ir->set_flag(alu_last_instr); 805 806 op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int; 807 ir = new AluInstruction(op, v[0], v[0], v[1], last_write); 808 emit_instruction(ir); 809 810 return true; 811} 812 813bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode) 814{ 815 const nir_alu_src& src0 = instr.src[0]; 816 const nir_alu_src& src1 = instr.src[1]; 817 818 AluInstruction *ir = nullptr; 819 820 if (get_chip_class() == CAYMAN) { 821 for (int k = 0; k < 4; ++k) { 822 if (instr.dest.write_mask & (1 << k)) { 823 824 for (int i = 0; i < 4; i++) { 825 ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty); 826 if (src0.negate) ir->set_flag(alu_src0_neg); 827 if (src0.abs) ir->set_flag(alu_src0_abs); 828 if (src1.negate) ir->set_flag(alu_src1_neg); 829 if (src1.abs) ir->set_flag(alu_src1_abs); 830 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 831 if (i == 3) ir->set_flag(alu_last_instr); 832 emit_instruction(ir); 833 } 834 } 835 } 836 } else { 837 for (int i = 0; i < 4 ; ++i) { 838 if (instr.dest.write_mask & (1 << i)){ 839 ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write); 840 if (src0.negate) ir->set_flag(alu_src0_neg); 841 if (src0.abs) ir->set_flag(alu_src0_abs); 842 if (src1.negate) ir->set_flag(alu_src1_neg); 843 if (src1.abs) ir->set_flag(alu_src1_abs); 844 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 845 emit_instruction(ir); 846 } 847 } 848 } 849 return true; 850} 851 852bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts) 853{ 854 855 const nir_alu_src& src0 = instr.src[0]; 856 const nir_alu_src& src1 = instr.src[1]; 857 858 if (src0.negate || src1.negate || 859 src0.abs || src1.abs) { 860 std::cerr << "R600: don't support modifiers with integer operations"; 861 return false; 862 } 863 return emit_alu_op2(instr, opcode, opts); 864} 865 866bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops) 867{ 868 const nir_alu_src *src0 = &instr.src[0]; 869 const nir_alu_src *src1 = &instr.src[1]; 870 871 int idx0 = 0; 872 int idx1 = 1; 873 if (ops & op2_opt_reverse) { 874 std::swap(src0, src1); 875 std::swap(idx0, idx1); 876 } 877 878 bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate; 879 880 AluInstruction *ir = nullptr; 881 for (int i = 0; i < 4 ; ++i) { 882 if (instr.dest.write_mask & (1 << i)){ 883 ir = new AluInstruction(opcode, from_nir(instr.dest, i), 884 m_src[idx0][i], m_src[idx1][i], write); 885 886 if (src0->negate) ir->set_flag(alu_src0_neg); 887 if (src0->abs) ir->set_flag(alu_src0_abs); 888 if (src1_negate) ir->set_flag(alu_src1_neg); 889 if (src1->abs) ir->set_flag(alu_src1_abs); 890 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 891 emit_instruction(ir); 892 } 893 } 894 if (ir) 895 ir->set_flag(alu_last_instr); 896 return true; 897} 898 899bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, 900 std::array<uint8_t, 3> reorder) 901{ 902 const nir_alu_src *src[3]; 903 src[0] = &instr.src[reorder[0]]; 904 src[1] = &instr.src[reorder[1]]; 905 src[2] = &instr.src[reorder[2]]; 906 907 AluInstruction *ir = nullptr; 908 for (int i = 0; i < 4 ; ++i) { 909 if (instr.dest.write_mask & (1 << i)){ 910 ir = new AluInstruction(opcode, from_nir(instr.dest, i), 911 m_src[reorder[0]][i], 912 m_src[reorder[1]][i], 913 m_src[reorder[2]][i], 914 write); 915 916 if (src[0]->negate) ir->set_flag(alu_src0_neg); 917 if (src[1]->negate) ir->set_flag(alu_src1_neg); 918 if (src[2]->negate) ir->set_flag(alu_src2_neg); 919 920 if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); 921 ir->set_flag(alu_write); 922 emit_instruction(ir); 923 } 924 } 925 make_last(ir); 926 return true; 927} 928 929bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr) 930{ 931 AluInstruction *ir = nullptr; 932 for (int i = 0; i < 4 ; ++i) { 933 if (instr.dest.write_mask & (1 << i)){ 934 ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero, 935 m_src[0][i], write); 936 emit_instruction(ir); 937 } 938 } 939 if (ir) 940 ir->set_flag(alu_last_instr); 941 942 return true; 943} 944 945static const char swz[] = "xyzw01?_"; 946 947void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, 948 const GPRVector::Values& v, GPRVector::Values& out, int ncomp) 949{ 950 951 AluInstruction *alu = nullptr; 952 for (int i = 0; i < ncomp; ++i) { 953 alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write}); 954 if (src.abs) 955 alu->set_flag(alu_src0_abs); 956 if (src.negate) 957 alu->set_flag(alu_src0_neg); 958 emit_instruction(alu); 959 } 960 make_last(alu); 961} 962 963bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, 964 bool fine) 965{ 966 967 GPRVector::Values v; 968 std::array<int, 4> writemask = {0,1,2,3}; 969 970 int ncomp = nir_dest_num_components(instr.dest.dest); 971 GPRVector::Swizzle src_swz = {7,7,7,7}; 972 for (auto i = 0; i < ncomp; ++i) 973 src_swz[i] = instr.src[0].swizzle[i]; 974 975 auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz); 976 977 if (instr.src[0].abs || instr.src[0].negate) { 978 GPRVector tmp = get_temp_vec4(); 979 split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp); 980 src = tmp; 981 } 982 983 for (int i = 0; i < 4; ++i) { 984 writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7; 985 v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0); 986 } 987 988 /* This is querying the dreivatives of the output fb, so we would either need 989 * access to the neighboring pixels or to the framebuffer. Neither is currently 990 * implemented */ 991 GPRVector dst(v); 992 993 auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue()); 994 tex->set_dest_swizzle(writemask); 995 996 if (fine) 997 tex->set_flag(TexInstruction::grad_fine); 998 999 emit_instruction(tex); 1000 1001 return true; 1002} 1003 1004bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr) 1005{ 1006 auto tmp = get_temp_register(); 1007 emit_instruction(op2_lshr_int, tmp, 1008 {m_src[0][0], PValue(new LiteralValue(16))}, 1009 {alu_write, alu_last_instr}); 1010 1011 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), 1012 {tmp}, {alu_write, alu_last_instr}); 1013 1014 return true; 1015} 1016 1017bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr) 1018{ 1019 emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), 1020 {m_src[0][0]},{alu_write, alu_last_instr}); 1021 return true; 1022} 1023 1024bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr) 1025{ 1026 PValue x = get_temp_register(); 1027 PValue y = get_temp_register(); 1028 1029 emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write}); 1030 emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr}); 1031 1032 emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr}); 1033 1034 emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr}); 1035 1036 return true; 1037} 1038 1039} 1040