1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "brw_nir.h" 25b8e80941Smrg#include "brw_vec4.h" 26b8e80941Smrg#include "brw_vec4_builder.h" 27b8e80941Smrg#include "brw_vec4_surface_builder.h" 28b8e80941Smrg 29b8e80941Smrgusing namespace brw; 30b8e80941Smrgusing namespace brw::surface_access; 31b8e80941Smrg 32b8e80941Smrgnamespace brw { 33b8e80941Smrg 34b8e80941Smrgvoid 35b8e80941Smrgvec4_visitor::emit_nir_code() 36b8e80941Smrg{ 37b8e80941Smrg if (nir->num_uniforms > 0) 38b8e80941Smrg nir_setup_uniforms(); 39b8e80941Smrg 40b8e80941Smrg nir_emit_impl(nir_shader_get_entrypoint((nir_shader *)nir)); 41b8e80941Smrg} 42b8e80941Smrg 43b8e80941Smrgvoid 44b8e80941Smrgvec4_visitor::nir_setup_uniforms() 45b8e80941Smrg{ 46b8e80941Smrg uniforms = nir->num_uniforms / 16; 47b8e80941Smrg} 48b8e80941Smrg 49b8e80941Smrgvoid 50b8e80941Smrgvec4_visitor::nir_emit_impl(nir_function_impl *impl) 51b8e80941Smrg{ 52b8e80941Smrg nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc); 53b8e80941Smrg for (unsigned i = 0; i < impl->reg_alloc; i++) { 54b8e80941Smrg nir_locals[i] = dst_reg(); 55b8e80941Smrg } 56b8e80941Smrg 57b8e80941Smrg foreach_list_typed(nir_register, reg, node, &impl->registers) { 58b8e80941Smrg unsigned array_elems = 59b8e80941Smrg reg->num_array_elems == 0 ? 1 : reg->num_array_elems; 60b8e80941Smrg const unsigned num_regs = array_elems * DIV_ROUND_UP(reg->bit_size, 32); 61b8e80941Smrg nir_locals[reg->index] = dst_reg(VGRF, alloc.allocate(num_regs)); 62b8e80941Smrg 63b8e80941Smrg if (reg->bit_size == 64) 64b8e80941Smrg nir_locals[reg->index].type = BRW_REGISTER_TYPE_DF; 65b8e80941Smrg } 66b8e80941Smrg 67b8e80941Smrg nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc); 68b8e80941Smrg 69b8e80941Smrg nir_emit_cf_list(&impl->body); 70b8e80941Smrg} 71b8e80941Smrg 72b8e80941Smrgvoid 73b8e80941Smrgvec4_visitor::nir_emit_cf_list(exec_list *list) 74b8e80941Smrg{ 75b8e80941Smrg exec_list_validate(list); 76b8e80941Smrg foreach_list_typed(nir_cf_node, node, node, list) { 77b8e80941Smrg switch (node->type) { 78b8e80941Smrg case nir_cf_node_if: 79b8e80941Smrg nir_emit_if(nir_cf_node_as_if(node)); 80b8e80941Smrg break; 81b8e80941Smrg 82b8e80941Smrg case nir_cf_node_loop: 83b8e80941Smrg nir_emit_loop(nir_cf_node_as_loop(node)); 84b8e80941Smrg break; 85b8e80941Smrg 86b8e80941Smrg case nir_cf_node_block: 87b8e80941Smrg nir_emit_block(nir_cf_node_as_block(node)); 88b8e80941Smrg break; 89b8e80941Smrg 90b8e80941Smrg default: 91b8e80941Smrg unreachable("Invalid CFG node block"); 92b8e80941Smrg } 93b8e80941Smrg } 94b8e80941Smrg} 95b8e80941Smrg 96b8e80941Smrgvoid 97b8e80941Smrgvec4_visitor::nir_emit_if(nir_if *if_stmt) 98b8e80941Smrg{ 99b8e80941Smrg /* First, put the condition in f0 */ 100b8e80941Smrg src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1); 101b8e80941Smrg vec4_instruction *inst = emit(MOV(dst_null_d(), condition)); 102b8e80941Smrg inst->conditional_mod = BRW_CONDITIONAL_NZ; 103b8e80941Smrg 104b8e80941Smrg /* We can just predicate based on the X channel, as the condition only 105b8e80941Smrg * goes on its own line */ 106b8e80941Smrg emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X)); 107b8e80941Smrg 108b8e80941Smrg nir_emit_cf_list(&if_stmt->then_list); 109b8e80941Smrg 110b8e80941Smrg /* note: if the else is empty, dead CF elimination will remove it */ 111b8e80941Smrg emit(BRW_OPCODE_ELSE); 112b8e80941Smrg 113b8e80941Smrg nir_emit_cf_list(&if_stmt->else_list); 114b8e80941Smrg 115b8e80941Smrg emit(BRW_OPCODE_ENDIF); 116b8e80941Smrg} 117b8e80941Smrg 118b8e80941Smrgvoid 119b8e80941Smrgvec4_visitor::nir_emit_loop(nir_loop *loop) 120b8e80941Smrg{ 121b8e80941Smrg emit(BRW_OPCODE_DO); 122b8e80941Smrg 123b8e80941Smrg nir_emit_cf_list(&loop->body); 124b8e80941Smrg 125b8e80941Smrg emit(BRW_OPCODE_WHILE); 126b8e80941Smrg} 127b8e80941Smrg 128b8e80941Smrgvoid 129b8e80941Smrgvec4_visitor::nir_emit_block(nir_block *block) 130b8e80941Smrg{ 131b8e80941Smrg nir_foreach_instr(instr, block) { 132b8e80941Smrg nir_emit_instr(instr); 133b8e80941Smrg } 134b8e80941Smrg} 135b8e80941Smrg 136b8e80941Smrgvoid 137b8e80941Smrgvec4_visitor::nir_emit_instr(nir_instr *instr) 138b8e80941Smrg{ 139b8e80941Smrg base_ir = instr; 140b8e80941Smrg 141b8e80941Smrg switch (instr->type) { 142b8e80941Smrg case nir_instr_type_load_const: 143b8e80941Smrg nir_emit_load_const(nir_instr_as_load_const(instr)); 144b8e80941Smrg break; 145b8e80941Smrg 146b8e80941Smrg case nir_instr_type_intrinsic: 147b8e80941Smrg nir_emit_intrinsic(nir_instr_as_intrinsic(instr)); 148b8e80941Smrg break; 149b8e80941Smrg 150b8e80941Smrg case nir_instr_type_alu: 151b8e80941Smrg nir_emit_alu(nir_instr_as_alu(instr)); 152b8e80941Smrg break; 153b8e80941Smrg 154b8e80941Smrg case nir_instr_type_jump: 155b8e80941Smrg nir_emit_jump(nir_instr_as_jump(instr)); 156b8e80941Smrg break; 157b8e80941Smrg 158b8e80941Smrg case nir_instr_type_tex: 159b8e80941Smrg nir_emit_texture(nir_instr_as_tex(instr)); 160b8e80941Smrg break; 161b8e80941Smrg 162b8e80941Smrg case nir_instr_type_ssa_undef: 163b8e80941Smrg nir_emit_undef(nir_instr_as_ssa_undef(instr)); 164b8e80941Smrg break; 165b8e80941Smrg 166b8e80941Smrg default: 167b8e80941Smrg unreachable("VS instruction not yet implemented by NIR->vec4"); 168b8e80941Smrg } 169b8e80941Smrg} 170b8e80941Smrg 171b8e80941Smrgstatic dst_reg 172b8e80941Smrgdst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg, 173b8e80941Smrg unsigned base_offset, nir_src *indirect) 174b8e80941Smrg{ 175b8e80941Smrg dst_reg reg; 176b8e80941Smrg 177b8e80941Smrg reg = v->nir_locals[nir_reg->index]; 178b8e80941Smrg if (nir_reg->bit_size == 64) 179b8e80941Smrg reg.type = BRW_REGISTER_TYPE_DF; 180b8e80941Smrg reg = offset(reg, 8, base_offset); 181b8e80941Smrg if (indirect) { 182b8e80941Smrg reg.reladdr = 183b8e80941Smrg new(v->mem_ctx) src_reg(v->get_nir_src(*indirect, 184b8e80941Smrg BRW_REGISTER_TYPE_D, 185b8e80941Smrg 1)); 186b8e80941Smrg } 187b8e80941Smrg return reg; 188b8e80941Smrg} 189b8e80941Smrg 190b8e80941Smrgdst_reg 191b8e80941Smrgvec4_visitor::get_nir_dest(const nir_dest &dest) 192b8e80941Smrg{ 193b8e80941Smrg if (dest.is_ssa) { 194b8e80941Smrg dst_reg dst = 195b8e80941Smrg dst_reg(VGRF, alloc.allocate(DIV_ROUND_UP(dest.ssa.bit_size, 32))); 196b8e80941Smrg if (dest.ssa.bit_size == 64) 197b8e80941Smrg dst.type = BRW_REGISTER_TYPE_DF; 198b8e80941Smrg nir_ssa_values[dest.ssa.index] = dst; 199b8e80941Smrg return dst; 200b8e80941Smrg } else { 201b8e80941Smrg return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, 202b8e80941Smrg dest.reg.indirect); 203b8e80941Smrg } 204b8e80941Smrg} 205b8e80941Smrg 206b8e80941Smrgdst_reg 207b8e80941Smrgvec4_visitor::get_nir_dest(const nir_dest &dest, enum brw_reg_type type) 208b8e80941Smrg{ 209b8e80941Smrg return retype(get_nir_dest(dest), type); 210b8e80941Smrg} 211b8e80941Smrg 212b8e80941Smrgdst_reg 213b8e80941Smrgvec4_visitor::get_nir_dest(const nir_dest &dest, nir_alu_type type) 214b8e80941Smrg{ 215b8e80941Smrg return get_nir_dest(dest, brw_type_for_nir_type(devinfo, type)); 216b8e80941Smrg} 217b8e80941Smrg 218b8e80941Smrgsrc_reg 219b8e80941Smrgvec4_visitor::get_nir_src(const nir_src &src, enum brw_reg_type type, 220b8e80941Smrg unsigned num_components) 221b8e80941Smrg{ 222b8e80941Smrg dst_reg reg; 223b8e80941Smrg 224b8e80941Smrg if (src.is_ssa) { 225b8e80941Smrg assert(src.ssa != NULL); 226b8e80941Smrg reg = nir_ssa_values[src.ssa->index]; 227b8e80941Smrg } 228b8e80941Smrg else { 229b8e80941Smrg reg = dst_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, 230b8e80941Smrg src.reg.indirect); 231b8e80941Smrg } 232b8e80941Smrg 233b8e80941Smrg reg = retype(reg, type); 234b8e80941Smrg 235b8e80941Smrg src_reg reg_as_src = src_reg(reg); 236b8e80941Smrg reg_as_src.swizzle = brw_swizzle_for_size(num_components); 237b8e80941Smrg return reg_as_src; 238b8e80941Smrg} 239b8e80941Smrg 240b8e80941Smrgsrc_reg 241b8e80941Smrgvec4_visitor::get_nir_src(const nir_src &src, nir_alu_type type, 242b8e80941Smrg unsigned num_components) 243b8e80941Smrg{ 244b8e80941Smrg return get_nir_src(src, brw_type_for_nir_type(devinfo, type), 245b8e80941Smrg num_components); 246b8e80941Smrg} 247b8e80941Smrg 248b8e80941Smrgsrc_reg 249b8e80941Smrgvec4_visitor::get_nir_src(const nir_src &src, unsigned num_components) 250b8e80941Smrg{ 251b8e80941Smrg /* if type is not specified, default to signed int */ 252b8e80941Smrg return get_nir_src(src, nir_type_int32, num_components); 253b8e80941Smrg} 254b8e80941Smrg 255b8e80941Smrgsrc_reg 256b8e80941Smrgvec4_visitor::get_nir_src_imm(const nir_src &src) 257b8e80941Smrg{ 258b8e80941Smrg assert(nir_src_num_components(src) == 1); 259b8e80941Smrg assert(nir_src_bit_size(src) == 32); 260b8e80941Smrg return nir_src_is_const(src) ? src_reg(brw_imm_d(nir_src_as_int(src))) : 261b8e80941Smrg get_nir_src(src, 1); 262b8e80941Smrg} 263b8e80941Smrg 264b8e80941Smrgsrc_reg 265b8e80941Smrgvec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr) 266b8e80941Smrg{ 267b8e80941Smrg nir_src *offset_src = nir_get_io_offset_src(instr); 268b8e80941Smrg 269b8e80941Smrg if (nir_src_is_const(*offset_src)) { 270b8e80941Smrg /* The only constant offset we should find is 0. brw_nir.c's 271b8e80941Smrg * add_const_offset_to_base() will fold other constant offsets 272b8e80941Smrg * into instr->const_index[0]. 273b8e80941Smrg */ 274b8e80941Smrg assert(nir_src_as_uint(*offset_src) == 0); 275b8e80941Smrg return src_reg(); 276b8e80941Smrg } 277b8e80941Smrg 278b8e80941Smrg return get_nir_src(*offset_src, BRW_REGISTER_TYPE_UD, 1); 279b8e80941Smrg} 280b8e80941Smrg 281b8e80941Smrgstatic src_reg 282b8e80941Smrgsetup_imm_df(const vec4_builder &bld, double v) 283b8e80941Smrg{ 284b8e80941Smrg const gen_device_info *devinfo = bld.shader->devinfo; 285b8e80941Smrg assert(devinfo->gen >= 7); 286b8e80941Smrg 287b8e80941Smrg if (devinfo->gen >= 8) 288b8e80941Smrg return brw_imm_df(v); 289b8e80941Smrg 290b8e80941Smrg /* gen7.5 does not support DF immediates straighforward but the DIM 291b8e80941Smrg * instruction allows to set the 64-bit immediate value. 292b8e80941Smrg */ 293b8e80941Smrg if (devinfo->is_haswell) { 294b8e80941Smrg const vec4_builder ubld = bld.exec_all(); 295b8e80941Smrg const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_DF); 296b8e80941Smrg ubld.DIM(dst, brw_imm_df(v)); 297b8e80941Smrg return swizzle(src_reg(dst), BRW_SWIZZLE_XXXX); 298b8e80941Smrg } 299b8e80941Smrg 300b8e80941Smrg /* gen7 does not support DF immediates */ 301b8e80941Smrg union { 302b8e80941Smrg double d; 303b8e80941Smrg struct { 304b8e80941Smrg uint32_t i1; 305b8e80941Smrg uint32_t i2; 306b8e80941Smrg }; 307b8e80941Smrg } di; 308b8e80941Smrg 309b8e80941Smrg di.d = v; 310b8e80941Smrg 311b8e80941Smrg /* Write the low 32-bit of the constant to the X:UD channel and the 312b8e80941Smrg * high 32-bit to the Y:UD channel to build the constant in a VGRF. 313b8e80941Smrg * We have to do this twice (offset 0 and offset 1), since a DF VGRF takes 314b8e80941Smrg * two SIMD8 registers in SIMD4x2 execution. Finally, return a swizzle 315b8e80941Smrg * XXXX so any access to the VGRF only reads the constant data in these 316b8e80941Smrg * channels. 317b8e80941Smrg */ 318b8e80941Smrg const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); 319b8e80941Smrg for (unsigned n = 0; n < 2; n++) { 320b8e80941Smrg const vec4_builder ubld = bld.exec_all().group(4, n); 321b8e80941Smrg ubld.MOV(writemask(offset(tmp, 8, n), WRITEMASK_X), brw_imm_ud(di.i1)); 322b8e80941Smrg ubld.MOV(writemask(offset(tmp, 8, n), WRITEMASK_Y), brw_imm_ud(di.i2)); 323b8e80941Smrg } 324b8e80941Smrg 325b8e80941Smrg return swizzle(src_reg(retype(tmp, BRW_REGISTER_TYPE_DF)), BRW_SWIZZLE_XXXX); 326b8e80941Smrg} 327b8e80941Smrg 328b8e80941Smrgvoid 329b8e80941Smrgvec4_visitor::nir_emit_load_const(nir_load_const_instr *instr) 330b8e80941Smrg{ 331b8e80941Smrg dst_reg reg; 332b8e80941Smrg 333b8e80941Smrg if (instr->def.bit_size == 64) { 334b8e80941Smrg reg = dst_reg(VGRF, alloc.allocate(2)); 335b8e80941Smrg reg.type = BRW_REGISTER_TYPE_DF; 336b8e80941Smrg } else { 337b8e80941Smrg reg = dst_reg(VGRF, alloc.allocate(1)); 338b8e80941Smrg reg.type = BRW_REGISTER_TYPE_D; 339b8e80941Smrg } 340b8e80941Smrg 341b8e80941Smrg const vec4_builder ibld = vec4_builder(this).at_end(); 342b8e80941Smrg unsigned remaining = brw_writemask_for_size(instr->def.num_components); 343b8e80941Smrg 344b8e80941Smrg /* @FIXME: consider emitting vector operations to save some MOVs in 345b8e80941Smrg * cases where the components are representable in 8 bits. 346b8e80941Smrg * For now, we emit a MOV for each distinct value. 347b8e80941Smrg */ 348b8e80941Smrg for (unsigned i = 0; i < instr->def.num_components; i++) { 349b8e80941Smrg unsigned writemask = 1 << i; 350b8e80941Smrg 351b8e80941Smrg if ((remaining & writemask) == 0) 352b8e80941Smrg continue; 353b8e80941Smrg 354b8e80941Smrg for (unsigned j = i; j < instr->def.num_components; j++) { 355b8e80941Smrg if ((instr->def.bit_size == 32 && 356b8e80941Smrg instr->value[i].u32 == instr->value[j].u32) || 357b8e80941Smrg (instr->def.bit_size == 64 && 358b8e80941Smrg instr->value[i].f64 == instr->value[j].f64)) { 359b8e80941Smrg writemask |= 1 << j; 360b8e80941Smrg } 361b8e80941Smrg } 362b8e80941Smrg 363b8e80941Smrg reg.writemask = writemask; 364b8e80941Smrg if (instr->def.bit_size == 64) { 365b8e80941Smrg emit(MOV(reg, setup_imm_df(ibld, instr->value[i].f64))); 366b8e80941Smrg } else { 367b8e80941Smrg emit(MOV(reg, brw_imm_d(instr->value[i].i32))); 368b8e80941Smrg } 369b8e80941Smrg 370b8e80941Smrg remaining &= ~writemask; 371b8e80941Smrg } 372b8e80941Smrg 373b8e80941Smrg /* Set final writemask */ 374b8e80941Smrg reg.writemask = brw_writemask_for_size(instr->def.num_components); 375b8e80941Smrg 376b8e80941Smrg nir_ssa_values[instr->def.index] = reg; 377b8e80941Smrg} 378b8e80941Smrg 379b8e80941Smrgsrc_reg 380b8e80941Smrgvec4_visitor::get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr) 381b8e80941Smrg{ 382b8e80941Smrg /* SSBO stores are weird in that their index is in src[1] */ 383b8e80941Smrg const unsigned src = instr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0; 384b8e80941Smrg 385b8e80941Smrg src_reg surf_index; 386b8e80941Smrg if (nir_src_is_const(instr->src[src])) { 387b8e80941Smrg unsigned index = prog_data->base.binding_table.ssbo_start + 388b8e80941Smrg nir_src_as_uint(instr->src[src]); 389b8e80941Smrg surf_index = brw_imm_ud(index); 390b8e80941Smrg } else { 391b8e80941Smrg surf_index = src_reg(this, glsl_type::uint_type); 392b8e80941Smrg emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[src], 1), 393b8e80941Smrg brw_imm_ud(prog_data->base.binding_table.ssbo_start))); 394b8e80941Smrg surf_index = emit_uniformize(surf_index); 395b8e80941Smrg } 396b8e80941Smrg 397b8e80941Smrg return surf_index; 398b8e80941Smrg} 399b8e80941Smrg 400b8e80941Smrgvoid 401b8e80941Smrgvec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) 402b8e80941Smrg{ 403b8e80941Smrg dst_reg dest; 404b8e80941Smrg src_reg src; 405b8e80941Smrg 406b8e80941Smrg switch (instr->intrinsic) { 407b8e80941Smrg 408b8e80941Smrg case nir_intrinsic_load_input: { 409b8e80941Smrg /* We set EmitNoIndirectInput for VS */ 410b8e80941Smrg unsigned load_offset = nir_src_as_uint(instr->src[0]); 411b8e80941Smrg 412b8e80941Smrg dest = get_nir_dest(instr->dest); 413b8e80941Smrg dest.writemask = brw_writemask_for_size(instr->num_components); 414b8e80941Smrg 415b8e80941Smrg src = src_reg(ATTR, instr->const_index[0] + load_offset, 416b8e80941Smrg glsl_type::uvec4_type); 417b8e80941Smrg src = retype(src, dest.type); 418b8e80941Smrg 419b8e80941Smrg bool is_64bit = nir_dest_bit_size(instr->dest) == 64; 420b8e80941Smrg if (is_64bit) { 421b8e80941Smrg dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); 422b8e80941Smrg src.swizzle = BRW_SWIZZLE_XYZW; 423b8e80941Smrg shuffle_64bit_data(tmp, src, false); 424b8e80941Smrg emit(MOV(dest, src_reg(tmp))); 425b8e80941Smrg } else { 426b8e80941Smrg /* Swizzle source based on component layout qualifier */ 427b8e80941Smrg src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); 428b8e80941Smrg emit(MOV(dest, src)); 429b8e80941Smrg } 430b8e80941Smrg break; 431b8e80941Smrg } 432b8e80941Smrg 433b8e80941Smrg case nir_intrinsic_store_output: { 434b8e80941Smrg unsigned store_offset = nir_src_as_uint(instr->src[1]); 435b8e80941Smrg int varying = instr->const_index[0] + store_offset; 436b8e80941Smrg 437b8e80941Smrg bool is_64bit = nir_src_bit_size(instr->src[0]) == 64; 438b8e80941Smrg if (is_64bit) { 439b8e80941Smrg src_reg data; 440b8e80941Smrg src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF, 441b8e80941Smrg instr->num_components); 442b8e80941Smrg data = src_reg(this, glsl_type::dvec4_type); 443b8e80941Smrg shuffle_64bit_data(dst_reg(data), src, true); 444b8e80941Smrg src = retype(data, BRW_REGISTER_TYPE_F); 445b8e80941Smrg } else { 446b8e80941Smrg src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 447b8e80941Smrg instr->num_components); 448b8e80941Smrg } 449b8e80941Smrg 450b8e80941Smrg unsigned c = nir_intrinsic_component(instr); 451b8e80941Smrg output_reg[varying][c] = dst_reg(src); 452b8e80941Smrg output_num_components[varying][c] = instr->num_components; 453b8e80941Smrg 454b8e80941Smrg unsigned num_components = instr->num_components; 455b8e80941Smrg if (is_64bit) 456b8e80941Smrg num_components *= 2; 457b8e80941Smrg 458b8e80941Smrg output_reg[varying][c] = dst_reg(src); 459b8e80941Smrg output_num_components[varying][c] = MIN2(4, num_components); 460b8e80941Smrg 461b8e80941Smrg if (is_64bit && num_components > 4) { 462b8e80941Smrg assert(num_components <= 8); 463b8e80941Smrg output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE); 464b8e80941Smrg output_num_components[varying + 1][c] = num_components - 4; 465b8e80941Smrg } 466b8e80941Smrg break; 467b8e80941Smrg } 468b8e80941Smrg 469b8e80941Smrg case nir_intrinsic_get_buffer_size: { 470b8e80941Smrg assert(nir_src_num_components(instr->src[0]) == 1); 471b8e80941Smrg unsigned ssbo_index = nir_src_is_const(instr->src[0]) ? 472b8e80941Smrg nir_src_as_uint(instr->src[0]) : 0; 473b8e80941Smrg 474b8e80941Smrg const unsigned index = 475b8e80941Smrg prog_data->base.binding_table.ssbo_start + ssbo_index; 476b8e80941Smrg dst_reg result_dst = get_nir_dest(instr->dest); 477b8e80941Smrg vec4_instruction *inst = new(mem_ctx) 478b8e80941Smrg vec4_instruction(SHADER_OPCODE_GET_BUFFER_SIZE, result_dst); 479b8e80941Smrg 480b8e80941Smrg inst->base_mrf = 2; 481b8e80941Smrg inst->mlen = 1; /* always at least one */ 482b8e80941Smrg inst->src[1] = brw_imm_ud(index); 483b8e80941Smrg 484b8e80941Smrg /* MRF for the first parameter */ 485b8e80941Smrg src_reg lod = brw_imm_d(0); 486b8e80941Smrg int param_base = inst->base_mrf; 487b8e80941Smrg int writemask = WRITEMASK_X; 488b8e80941Smrg emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod)); 489b8e80941Smrg 490b8e80941Smrg emit(inst); 491b8e80941Smrg break; 492b8e80941Smrg } 493b8e80941Smrg 494b8e80941Smrg case nir_intrinsic_store_ssbo: { 495b8e80941Smrg assert(devinfo->gen >= 7); 496b8e80941Smrg 497b8e80941Smrg /* brw_nir_lower_mem_access_bit_sizes takes care of this */ 498b8e80941Smrg assert(nir_src_bit_size(instr->src[0]) == 32); 499b8e80941Smrg assert(nir_intrinsic_write_mask(instr) == 500b8e80941Smrg (1u << instr->num_components) - 1); 501b8e80941Smrg 502b8e80941Smrg src_reg surf_index = get_nir_ssbo_intrinsic_index(instr); 503b8e80941Smrg src_reg offset_reg = retype(get_nir_src_imm(instr->src[2]), 504b8e80941Smrg BRW_REGISTER_TYPE_UD); 505b8e80941Smrg 506b8e80941Smrg /* Value */ 507b8e80941Smrg src_reg val_reg = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 4); 508b8e80941Smrg 509b8e80941Smrg /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped 510b8e80941Smrg * writes will use SIMD8 mode. In order to hide this and keep symmetry across 511b8e80941Smrg * typed and untyped messages and across hardware platforms, the 512b8e80941Smrg * current implementation of the untyped messages will transparently convert 513b8e80941Smrg * the SIMD4x2 payload into an equivalent SIMD8 payload by transposing it 514b8e80941Smrg * and enabling only channel X on the SEND instruction. 515b8e80941Smrg * 516b8e80941Smrg * The above, works well for full vector writes, but not for partial writes 517b8e80941Smrg * where we want to write some channels and not others, like when we have 518b8e80941Smrg * code such as v.xyw = vec3(1,2,4). Because the untyped write messages are 519b8e80941Smrg * quite restrictive with regards to the channel enables we can configure in 520b8e80941Smrg * the message descriptor (not all combinations are allowed) we cannot simply 521b8e80941Smrg * implement these scenarios with a single message while keeping the 522b8e80941Smrg * aforementioned symmetry in the implementation. For now we de decided that 523b8e80941Smrg * it is better to keep the symmetry to reduce complexity, so in situations 524b8e80941Smrg * such as the one described we end up emitting two untyped write messages 525b8e80941Smrg * (one for xy and another for w). 526b8e80941Smrg * 527b8e80941Smrg * The code below packs consecutive channels into a single write message, 528b8e80941Smrg * detects gaps in the vector write and if needed, sends a second message 529b8e80941Smrg * with the remaining channels. If in the future we decide that we want to 530b8e80941Smrg * emit a single message at the expense of losing the symmetry in the 531b8e80941Smrg * implementation we can: 532b8e80941Smrg * 533b8e80941Smrg * 1) For IvyBridge: Only use the red channel of the untyped write SIMD8 534b8e80941Smrg * message payload. In this mode we can write up to 8 offsets and dwords 535b8e80941Smrg * to the red channel only (for the two vec4s in the SIMD4x2 execution) 536b8e80941Smrg * and select which of the 8 channels carry data to write by setting the 537b8e80941Smrg * appropriate writemask in the dst register of the SEND instruction. 538b8e80941Smrg * It would require to write a new generator opcode specifically for 539b8e80941Smrg * IvyBridge since we would need to prepare a SIMD8 payload that could 540b8e80941Smrg * use any channel, not just X. 541b8e80941Smrg * 542b8e80941Smrg * 2) For Haswell+: Simply send a single write message but set the writemask 543b8e80941Smrg * on the dst of the SEND instruction to select the channels we want to 544b8e80941Smrg * write. It would require to modify the current messages to receive 545b8e80941Smrg * and honor the writemask provided. 546b8e80941Smrg */ 547b8e80941Smrg const vec4_builder bld = vec4_builder(this).at_end() 548b8e80941Smrg .annotate(current_annotation, base_ir); 549b8e80941Smrg 550b8e80941Smrg emit_untyped_write(bld, surf_index, offset_reg, val_reg, 551b8e80941Smrg 1 /* dims */, instr->num_components /* size */, 552b8e80941Smrg BRW_PREDICATE_NONE); 553b8e80941Smrg break; 554b8e80941Smrg } 555b8e80941Smrg 556b8e80941Smrg case nir_intrinsic_load_ssbo: { 557b8e80941Smrg assert(devinfo->gen >= 7); 558b8e80941Smrg 559b8e80941Smrg /* brw_nir_lower_mem_access_bit_sizes takes care of this */ 560b8e80941Smrg assert(nir_dest_bit_size(instr->dest) == 32); 561b8e80941Smrg 562b8e80941Smrg src_reg surf_index = get_nir_ssbo_intrinsic_index(instr); 563b8e80941Smrg src_reg offset_reg = retype(get_nir_src_imm(instr->src[1]), 564b8e80941Smrg BRW_REGISTER_TYPE_UD); 565b8e80941Smrg 566b8e80941Smrg /* Read the vector */ 567b8e80941Smrg const vec4_builder bld = vec4_builder(this).at_end() 568b8e80941Smrg .annotate(current_annotation, base_ir); 569b8e80941Smrg 570b8e80941Smrg src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, 571b8e80941Smrg 1 /* dims */, 4 /* size*/, 572b8e80941Smrg BRW_PREDICATE_NONE); 573b8e80941Smrg dst_reg dest = get_nir_dest(instr->dest); 574b8e80941Smrg read_result.type = dest.type; 575b8e80941Smrg read_result.swizzle = brw_swizzle_for_size(instr->num_components); 576b8e80941Smrg emit(MOV(dest, read_result)); 577b8e80941Smrg break; 578b8e80941Smrg } 579b8e80941Smrg 580b8e80941Smrg case nir_intrinsic_ssbo_atomic_add: { 581b8e80941Smrg int op = BRW_AOP_ADD; 582b8e80941Smrg 583b8e80941Smrg if (nir_src_is_const(instr->src[2])) { 584b8e80941Smrg int add_val = nir_src_as_int(instr->src[2]); 585b8e80941Smrg if (add_val == 1) 586b8e80941Smrg op = BRW_AOP_INC; 587b8e80941Smrg else if (add_val == -1) 588b8e80941Smrg op = BRW_AOP_DEC; 589b8e80941Smrg } 590b8e80941Smrg 591b8e80941Smrg nir_emit_ssbo_atomic(op, instr); 592b8e80941Smrg break; 593b8e80941Smrg } 594b8e80941Smrg case nir_intrinsic_ssbo_atomic_imin: 595b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr); 596b8e80941Smrg break; 597b8e80941Smrg case nir_intrinsic_ssbo_atomic_umin: 598b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr); 599b8e80941Smrg break; 600b8e80941Smrg case nir_intrinsic_ssbo_atomic_imax: 601b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr); 602b8e80941Smrg break; 603b8e80941Smrg case nir_intrinsic_ssbo_atomic_umax: 604b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr); 605b8e80941Smrg break; 606b8e80941Smrg case nir_intrinsic_ssbo_atomic_and: 607b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_AND, instr); 608b8e80941Smrg break; 609b8e80941Smrg case nir_intrinsic_ssbo_atomic_or: 610b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_OR, instr); 611b8e80941Smrg break; 612b8e80941Smrg case nir_intrinsic_ssbo_atomic_xor: 613b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_XOR, instr); 614b8e80941Smrg break; 615b8e80941Smrg case nir_intrinsic_ssbo_atomic_exchange: 616b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_MOV, instr); 617b8e80941Smrg break; 618b8e80941Smrg case nir_intrinsic_ssbo_atomic_comp_swap: 619b8e80941Smrg nir_emit_ssbo_atomic(BRW_AOP_CMPWR, instr); 620b8e80941Smrg break; 621b8e80941Smrg 622b8e80941Smrg case nir_intrinsic_load_vertex_id: 623b8e80941Smrg unreachable("should be lowered by lower_vertex_id()"); 624b8e80941Smrg 625b8e80941Smrg case nir_intrinsic_load_vertex_id_zero_base: 626b8e80941Smrg case nir_intrinsic_load_base_vertex: 627b8e80941Smrg case nir_intrinsic_load_instance_id: 628b8e80941Smrg case nir_intrinsic_load_base_instance: 629b8e80941Smrg case nir_intrinsic_load_draw_id: 630b8e80941Smrg case nir_intrinsic_load_invocation_id: 631b8e80941Smrg unreachable("should be lowered by brw_nir_lower_vs_inputs()"); 632b8e80941Smrg 633b8e80941Smrg case nir_intrinsic_load_uniform: { 634b8e80941Smrg /* Offsets are in bytes but they should always be multiples of 4 */ 635b8e80941Smrg assert(nir_intrinsic_base(instr) % 4 == 0); 636b8e80941Smrg 637b8e80941Smrg dest = get_nir_dest(instr->dest); 638b8e80941Smrg 639b8e80941Smrg src = src_reg(dst_reg(UNIFORM, nir_intrinsic_base(instr) / 16)); 640b8e80941Smrg src.type = dest.type; 641b8e80941Smrg 642b8e80941Smrg /* Uniforms don't actually have to be vec4 aligned. In the case that 643b8e80941Smrg * it isn't, we have to use a swizzle to shift things around. They 644b8e80941Smrg * do still have the std140 alignment requirement that vec2's have to 645b8e80941Smrg * be vec2-aligned and vec3's and vec4's have to be vec4-aligned. 646b8e80941Smrg * 647b8e80941Smrg * The swizzle also works in the indirect case as the generator adds 648b8e80941Smrg * the swizzle to the offset for us. 649b8e80941Smrg */ 650b8e80941Smrg const int type_size = type_sz(src.type); 651b8e80941Smrg unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size; 652b8e80941Smrg assert(shift + instr->num_components <= 4); 653b8e80941Smrg 654b8e80941Smrg if (nir_src_is_const(instr->src[0])) { 655b8e80941Smrg const unsigned load_offset = nir_src_as_uint(instr->src[0]); 656b8e80941Smrg /* Offsets are in bytes but they should always be multiples of 4 */ 657b8e80941Smrg assert(load_offset % 4 == 0); 658b8e80941Smrg 659b8e80941Smrg src.swizzle = brw_swizzle_for_size(instr->num_components); 660b8e80941Smrg dest.writemask = brw_writemask_for_size(instr->num_components); 661b8e80941Smrg unsigned offset = load_offset + shift * type_size; 662b8e80941Smrg src.offset = ROUND_DOWN_TO(offset, 16); 663b8e80941Smrg shift = (offset % 16) / type_size; 664b8e80941Smrg assert(shift + instr->num_components <= 4); 665b8e80941Smrg src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift); 666b8e80941Smrg 667b8e80941Smrg emit(MOV(dest, src)); 668b8e80941Smrg } else { 669b8e80941Smrg /* Uniform arrays are vec4 aligned, because of std140 alignment 670b8e80941Smrg * rules. 671b8e80941Smrg */ 672b8e80941Smrg assert(shift == 0); 673b8e80941Smrg 674b8e80941Smrg src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); 675b8e80941Smrg 676b8e80941Smrg /* MOV_INDIRECT is going to stomp the whole thing anyway */ 677b8e80941Smrg dest.writemask = WRITEMASK_XYZW; 678b8e80941Smrg 679b8e80941Smrg emit(SHADER_OPCODE_MOV_INDIRECT, dest, src, 680b8e80941Smrg indirect, brw_imm_ud(instr->const_index[1])); 681b8e80941Smrg } 682b8e80941Smrg break; 683b8e80941Smrg } 684b8e80941Smrg 685b8e80941Smrg case nir_intrinsic_load_ubo: { 686b8e80941Smrg src_reg surf_index; 687b8e80941Smrg 688b8e80941Smrg dest = get_nir_dest(instr->dest); 689b8e80941Smrg 690b8e80941Smrg if (nir_src_is_const(instr->src[0])) { 691b8e80941Smrg /* The block index is a constant, so just emit the binding table entry 692b8e80941Smrg * as an immediate. 693b8e80941Smrg */ 694b8e80941Smrg const unsigned index = prog_data->base.binding_table.ubo_start + 695b8e80941Smrg nir_src_as_uint(instr->src[0]); 696b8e80941Smrg surf_index = brw_imm_ud(index); 697b8e80941Smrg } else { 698b8e80941Smrg /* The block index is not a constant. Evaluate the index expression 699b8e80941Smrg * per-channel and add the base UBO index; we have to select a value 700b8e80941Smrg * from any live channel. 701b8e80941Smrg */ 702b8e80941Smrg surf_index = src_reg(this, glsl_type::uint_type); 703b8e80941Smrg emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int32, 704b8e80941Smrg instr->num_components), 705b8e80941Smrg brw_imm_ud(prog_data->base.binding_table.ubo_start))); 706b8e80941Smrg surf_index = emit_uniformize(surf_index); 707b8e80941Smrg } 708b8e80941Smrg 709b8e80941Smrg src_reg offset_reg; 710b8e80941Smrg if (nir_src_is_const(instr->src[1])) { 711b8e80941Smrg unsigned load_offset = nir_src_as_uint(instr->src[1]); 712b8e80941Smrg offset_reg = brw_imm_ud(load_offset & ~15); 713b8e80941Smrg } else { 714b8e80941Smrg offset_reg = src_reg(this, glsl_type::uint_type); 715b8e80941Smrg emit(MOV(dst_reg(offset_reg), 716b8e80941Smrg get_nir_src(instr->src[1], nir_type_uint32, 1))); 717b8e80941Smrg } 718b8e80941Smrg 719b8e80941Smrg src_reg packed_consts; 720b8e80941Smrg if (nir_dest_bit_size(instr->dest) == 32) { 721b8e80941Smrg packed_consts = src_reg(this, glsl_type::vec4_type); 722b8e80941Smrg emit_pull_constant_load_reg(dst_reg(packed_consts), 723b8e80941Smrg surf_index, 724b8e80941Smrg offset_reg, 725b8e80941Smrg NULL, NULL /* before_block/inst */); 726b8e80941Smrg } else { 727b8e80941Smrg src_reg temp = src_reg(this, glsl_type::dvec4_type); 728b8e80941Smrg src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F); 729b8e80941Smrg 730b8e80941Smrg emit_pull_constant_load_reg(dst_reg(temp_float), 731b8e80941Smrg surf_index, offset_reg, NULL, NULL); 732b8e80941Smrg if (offset_reg.file == IMM) 733b8e80941Smrg offset_reg.ud += 16; 734b8e80941Smrg else 735b8e80941Smrg emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u))); 736b8e80941Smrg emit_pull_constant_load_reg(dst_reg(byte_offset(temp_float, REG_SIZE)), 737b8e80941Smrg surf_index, offset_reg, NULL, NULL); 738b8e80941Smrg 739b8e80941Smrg packed_consts = src_reg(this, glsl_type::dvec4_type); 740b8e80941Smrg shuffle_64bit_data(dst_reg(packed_consts), temp, false); 741b8e80941Smrg } 742b8e80941Smrg 743b8e80941Smrg packed_consts.swizzle = brw_swizzle_for_size(instr->num_components); 744b8e80941Smrg if (nir_src_is_const(instr->src[1])) { 745b8e80941Smrg unsigned load_offset = nir_src_as_uint(instr->src[1]); 746b8e80941Smrg unsigned type_size = type_sz(dest.type); 747b8e80941Smrg packed_consts.swizzle += 748b8e80941Smrg BRW_SWIZZLE4(load_offset % 16 / type_size, 749b8e80941Smrg load_offset % 16 / type_size, 750b8e80941Smrg load_offset % 16 / type_size, 751b8e80941Smrg load_offset % 16 / type_size); 752b8e80941Smrg } 753b8e80941Smrg 754b8e80941Smrg emit(MOV(dest, retype(packed_consts, dest.type))); 755b8e80941Smrg 756b8e80941Smrg break; 757b8e80941Smrg } 758b8e80941Smrg 759b8e80941Smrg case nir_intrinsic_memory_barrier: { 760b8e80941Smrg const vec4_builder bld = 761b8e80941Smrg vec4_builder(this).at_end().annotate(current_annotation, base_ir); 762b8e80941Smrg const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); 763b8e80941Smrg bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0)) 764b8e80941Smrg ->size_written = 2 * REG_SIZE; 765b8e80941Smrg break; 766b8e80941Smrg } 767b8e80941Smrg 768b8e80941Smrg case nir_intrinsic_shader_clock: { 769b8e80941Smrg /* We cannot do anything if there is an event, so ignore it for now */ 770b8e80941Smrg const src_reg shader_clock = get_timestamp(); 771b8e80941Smrg const enum brw_reg_type type = brw_type_for_base_type(glsl_type::uvec2_type); 772b8e80941Smrg 773b8e80941Smrg dest = get_nir_dest(instr->dest, type); 774b8e80941Smrg emit(MOV(dest, shader_clock)); 775b8e80941Smrg break; 776b8e80941Smrg } 777b8e80941Smrg 778b8e80941Smrg default: 779b8e80941Smrg unreachable("Unknown intrinsic"); 780b8e80941Smrg } 781b8e80941Smrg} 782b8e80941Smrg 783b8e80941Smrgvoid 784b8e80941Smrgvec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) 785b8e80941Smrg{ 786b8e80941Smrg dst_reg dest; 787b8e80941Smrg if (nir_intrinsic_infos[instr->intrinsic].has_dest) 788b8e80941Smrg dest = get_nir_dest(instr->dest); 789b8e80941Smrg 790b8e80941Smrg src_reg surface = get_nir_ssbo_intrinsic_index(instr); 791b8e80941Smrg src_reg offset = get_nir_src(instr->src[1], 1); 792b8e80941Smrg src_reg data1; 793b8e80941Smrg if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) 794b8e80941Smrg data1 = get_nir_src(instr->src[2], 1); 795b8e80941Smrg src_reg data2; 796b8e80941Smrg if (op == BRW_AOP_CMPWR) 797b8e80941Smrg data2 = get_nir_src(instr->src[3], 1); 798b8e80941Smrg 799b8e80941Smrg /* Emit the actual atomic operation operation */ 800b8e80941Smrg const vec4_builder bld = 801b8e80941Smrg vec4_builder(this).at_end().annotate(current_annotation, base_ir); 802b8e80941Smrg 803b8e80941Smrg src_reg atomic_result = emit_untyped_atomic(bld, surface, offset, 804b8e80941Smrg data1, data2, 805b8e80941Smrg 1 /* dims */, 1 /* rsize */, 806b8e80941Smrg op, 807b8e80941Smrg BRW_PREDICATE_NONE); 808b8e80941Smrg dest.type = atomic_result.type; 809b8e80941Smrg bld.MOV(dest, atomic_result); 810b8e80941Smrg} 811b8e80941Smrg 812b8e80941Smrgstatic unsigned 813b8e80941Smrgbrw_swizzle_for_nir_swizzle(uint8_t swizzle[4]) 814b8e80941Smrg{ 815b8e80941Smrg return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 816b8e80941Smrg} 817b8e80941Smrg 818b8e80941Smrgstatic enum brw_conditional_mod 819b8e80941Smrgbrw_conditional_for_nir_comparison(nir_op op) 820b8e80941Smrg{ 821b8e80941Smrg switch (op) { 822b8e80941Smrg case nir_op_flt32: 823b8e80941Smrg case nir_op_ilt32: 824b8e80941Smrg case nir_op_ult32: 825b8e80941Smrg return BRW_CONDITIONAL_L; 826b8e80941Smrg 827b8e80941Smrg case nir_op_fge32: 828b8e80941Smrg case nir_op_ige32: 829b8e80941Smrg case nir_op_uge32: 830b8e80941Smrg return BRW_CONDITIONAL_GE; 831b8e80941Smrg 832b8e80941Smrg case nir_op_feq32: 833b8e80941Smrg case nir_op_ieq32: 834b8e80941Smrg case nir_op_b32all_fequal2: 835b8e80941Smrg case nir_op_b32all_iequal2: 836b8e80941Smrg case nir_op_b32all_fequal3: 837b8e80941Smrg case nir_op_b32all_iequal3: 838b8e80941Smrg case nir_op_b32all_fequal4: 839b8e80941Smrg case nir_op_b32all_iequal4: 840b8e80941Smrg return BRW_CONDITIONAL_Z; 841b8e80941Smrg 842b8e80941Smrg case nir_op_fne32: 843b8e80941Smrg case nir_op_ine32: 844b8e80941Smrg case nir_op_b32any_fnequal2: 845b8e80941Smrg case nir_op_b32any_inequal2: 846b8e80941Smrg case nir_op_b32any_fnequal3: 847b8e80941Smrg case nir_op_b32any_inequal3: 848b8e80941Smrg case nir_op_b32any_fnequal4: 849b8e80941Smrg case nir_op_b32any_inequal4: 850b8e80941Smrg return BRW_CONDITIONAL_NZ; 851b8e80941Smrg 852b8e80941Smrg default: 853b8e80941Smrg unreachable("not reached: bad operation for comparison"); 854b8e80941Smrg } 855b8e80941Smrg} 856b8e80941Smrg 857b8e80941Smrgbool 858b8e80941Smrgvec4_visitor::optimize_predicate(nir_alu_instr *instr, 859b8e80941Smrg enum brw_predicate *predicate) 860b8e80941Smrg{ 861b8e80941Smrg if (!instr->src[0].src.is_ssa || 862b8e80941Smrg instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu) 863b8e80941Smrg return false; 864b8e80941Smrg 865b8e80941Smrg nir_alu_instr *cmp_instr = 866b8e80941Smrg nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); 867b8e80941Smrg 868b8e80941Smrg switch (cmp_instr->op) { 869b8e80941Smrg case nir_op_b32any_fnequal2: 870b8e80941Smrg case nir_op_b32any_inequal2: 871b8e80941Smrg case nir_op_b32any_fnequal3: 872b8e80941Smrg case nir_op_b32any_inequal3: 873b8e80941Smrg case nir_op_b32any_fnequal4: 874b8e80941Smrg case nir_op_b32any_inequal4: 875b8e80941Smrg *predicate = BRW_PREDICATE_ALIGN16_ANY4H; 876b8e80941Smrg break; 877b8e80941Smrg case nir_op_b32all_fequal2: 878b8e80941Smrg case nir_op_b32all_iequal2: 879b8e80941Smrg case nir_op_b32all_fequal3: 880b8e80941Smrg case nir_op_b32all_iequal3: 881b8e80941Smrg case nir_op_b32all_fequal4: 882b8e80941Smrg case nir_op_b32all_iequal4: 883b8e80941Smrg *predicate = BRW_PREDICATE_ALIGN16_ALL4H; 884b8e80941Smrg break; 885b8e80941Smrg default: 886b8e80941Smrg return false; 887b8e80941Smrg } 888b8e80941Smrg 889b8e80941Smrg unsigned size_swizzle = 890b8e80941Smrg brw_swizzle_for_size(nir_op_infos[cmp_instr->op].input_sizes[0]); 891b8e80941Smrg 892b8e80941Smrg src_reg op[2]; 893b8e80941Smrg assert(nir_op_infos[cmp_instr->op].num_inputs == 2); 894b8e80941Smrg for (unsigned i = 0; i < 2; i++) { 895b8e80941Smrg nir_alu_type type = nir_op_infos[cmp_instr->op].input_types[i]; 896b8e80941Smrg unsigned bit_size = nir_src_bit_size(cmp_instr->src[i].src); 897b8e80941Smrg type = (nir_alu_type) (((unsigned) type) | bit_size); 898b8e80941Smrg op[i] = get_nir_src(cmp_instr->src[i].src, type, 4); 899b8e80941Smrg unsigned base_swizzle = 900b8e80941Smrg brw_swizzle_for_nir_swizzle(cmp_instr->src[i].swizzle); 901b8e80941Smrg op[i].swizzle = brw_compose_swizzle(size_swizzle, base_swizzle); 902b8e80941Smrg op[i].abs = cmp_instr->src[i].abs; 903b8e80941Smrg op[i].negate = cmp_instr->src[i].negate; 904b8e80941Smrg } 905b8e80941Smrg 906b8e80941Smrg emit(CMP(dst_null_d(), op[0], op[1], 907b8e80941Smrg brw_conditional_for_nir_comparison(cmp_instr->op))); 908b8e80941Smrg 909b8e80941Smrg return true; 910b8e80941Smrg} 911b8e80941Smrg 912b8e80941Smrgstatic void 913b8e80941Smrgemit_find_msb_using_lzd(const vec4_builder &bld, 914b8e80941Smrg const dst_reg &dst, 915b8e80941Smrg const src_reg &src, 916b8e80941Smrg bool is_signed) 917b8e80941Smrg{ 918b8e80941Smrg vec4_instruction *inst; 919b8e80941Smrg src_reg temp = src; 920b8e80941Smrg 921b8e80941Smrg if (is_signed) { 922b8e80941Smrg /* LZD of an absolute value source almost always does the right 923b8e80941Smrg * thing. There are two problem values: 924b8e80941Smrg * 925b8e80941Smrg * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns 926b8e80941Smrg * 0. However, findMSB(int(0x80000000)) == 30. 927b8e80941Smrg * 928b8e80941Smrg * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns 929b8e80941Smrg * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: 930b8e80941Smrg * 931b8e80941Smrg * For a value of zero or negative one, -1 will be returned. 932b8e80941Smrg * 933b8e80941Smrg * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but 934b8e80941Smrg * findMSB(-(1<<x)) should return x-1. 935b8e80941Smrg * 936b8e80941Smrg * For all negative number cases, including 0x80000000 and 937b8e80941Smrg * 0xffffffff, the correct value is obtained from LZD if instead of 938b8e80941Smrg * negating the (already negative) value the logical-not is used. A 939b8e80941Smrg * conditonal logical-not can be achieved in two instructions. 940b8e80941Smrg */ 941b8e80941Smrg temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D)); 942b8e80941Smrg 943b8e80941Smrg bld.ASR(dst_reg(temp), src, brw_imm_d(31)); 944b8e80941Smrg bld.XOR(dst_reg(temp), temp, src); 945b8e80941Smrg } 946b8e80941Smrg 947b8e80941Smrg bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), 948b8e80941Smrg retype(temp, BRW_REGISTER_TYPE_UD)); 949b8e80941Smrg 950b8e80941Smrg /* LZD counts from the MSB side, while GLSL's findMSB() wants the count 951b8e80941Smrg * from the LSB side. Subtract the result from 31 to convert the MSB count 952b8e80941Smrg * into an LSB count. If no bits are set, LZD will return 32. 31-32 = -1, 953b8e80941Smrg * which is exactly what findMSB() is supposed to return. 954b8e80941Smrg */ 955b8e80941Smrg inst = bld.ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D), 956b8e80941Smrg brw_imm_d(31)); 957b8e80941Smrg inst->src[0].negate = true; 958b8e80941Smrg} 959b8e80941Smrg 960b8e80941Smrgvoid 961b8e80941Smrgvec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src, 962b8e80941Smrg bool saturate) 963b8e80941Smrg{ 964b8e80941Smrg /* BDW PRM vol 15 - workarounds: 965b8e80941Smrg * DF->f format conversion for Align16 has wrong emask calculation when 966b8e80941Smrg * source is immediate. 967b8e80941Smrg */ 968b8e80941Smrg if (devinfo->gen == 8 && dst.type == BRW_REGISTER_TYPE_F && 969b8e80941Smrg src.file == BRW_IMMEDIATE_VALUE) { 970b8e80941Smrg vec4_instruction *inst = emit(MOV(dst, brw_imm_f(src.df))); 971b8e80941Smrg inst->saturate = saturate; 972b8e80941Smrg return; 973b8e80941Smrg } 974b8e80941Smrg 975b8e80941Smrg enum opcode op; 976b8e80941Smrg switch (dst.type) { 977b8e80941Smrg case BRW_REGISTER_TYPE_D: 978b8e80941Smrg op = VEC4_OPCODE_DOUBLE_TO_D32; 979b8e80941Smrg break; 980b8e80941Smrg case BRW_REGISTER_TYPE_UD: 981b8e80941Smrg op = VEC4_OPCODE_DOUBLE_TO_U32; 982b8e80941Smrg break; 983b8e80941Smrg case BRW_REGISTER_TYPE_F: 984b8e80941Smrg op = VEC4_OPCODE_DOUBLE_TO_F32; 985b8e80941Smrg break; 986b8e80941Smrg default: 987b8e80941Smrg unreachable("Unknown conversion"); 988b8e80941Smrg } 989b8e80941Smrg 990b8e80941Smrg dst_reg temp = dst_reg(this, glsl_type::dvec4_type); 991b8e80941Smrg emit(MOV(temp, src)); 992b8e80941Smrg dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type); 993b8e80941Smrg emit(op, temp2, src_reg(temp)); 994b8e80941Smrg 995b8e80941Smrg emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2)); 996b8e80941Smrg vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type)))); 997b8e80941Smrg inst->saturate = saturate; 998b8e80941Smrg} 999b8e80941Smrg 1000b8e80941Smrgvoid 1001b8e80941Smrgvec4_visitor::emit_conversion_to_double(dst_reg dst, src_reg src, 1002b8e80941Smrg bool saturate) 1003b8e80941Smrg{ 1004b8e80941Smrg dst_reg tmp_dst = dst_reg(src_reg(this, glsl_type::dvec4_type)); 1005b8e80941Smrg src_reg tmp_src = retype(src_reg(this, glsl_type::vec4_type), src.type); 1006b8e80941Smrg emit(MOV(dst_reg(tmp_src), src)); 1007b8e80941Smrg emit(VEC4_OPCODE_TO_DOUBLE, tmp_dst, tmp_src); 1008b8e80941Smrg vec4_instruction *inst = emit(MOV(dst, src_reg(tmp_dst))); 1009b8e80941Smrg inst->saturate = saturate; 1010b8e80941Smrg} 1011b8e80941Smrg 1012b8e80941Smrg/** 1013b8e80941Smrg * Try to use an immediate value for source 1 1014b8e80941Smrg * 1015b8e80941Smrg * In cases of flow control, constant propagation is sometimes unable to 1016b8e80941Smrg * determine that a register contains a constant value. To work around this, 1017b8e80941Smrg * try to emit a literal as the second source here. 1018b8e80941Smrg */ 1019b8e80941Smrgstatic void 1020b8e80941Smrgtry_immediate_source(const nir_alu_instr *instr, src_reg *op, 1021b8e80941Smrg MAYBE_UNUSED const gen_device_info *devinfo) 1022b8e80941Smrg{ 1023b8e80941Smrg if (nir_src_num_components(instr->src[1].src) != 1 || 1024b8e80941Smrg nir_src_bit_size(instr->src[1].src) != 32 || 1025b8e80941Smrg !nir_src_is_const(instr->src[1].src)) 1026b8e80941Smrg return; 1027b8e80941Smrg 1028b8e80941Smrg const enum brw_reg_type old_type = op->type; 1029b8e80941Smrg 1030b8e80941Smrg switch (old_type) { 1031b8e80941Smrg case BRW_REGISTER_TYPE_D: 1032b8e80941Smrg case BRW_REGISTER_TYPE_UD: { 1033b8e80941Smrg int d = nir_src_as_int(instr->src[1].src); 1034b8e80941Smrg 1035b8e80941Smrg if (op->abs) 1036b8e80941Smrg d = MAX2(-d, d); 1037b8e80941Smrg 1038b8e80941Smrg if (op->negate) { 1039b8e80941Smrg /* On Gen8+ a negation source modifier on a logical operation means 1040b8e80941Smrg * something different. Nothing should generate this, so assert that 1041b8e80941Smrg * it does not occur. 1042b8e80941Smrg */ 1043b8e80941Smrg assert(devinfo->gen < 8 || (instr->op != nir_op_iand && 1044b8e80941Smrg instr->op != nir_op_ior && 1045b8e80941Smrg instr->op != nir_op_ixor)); 1046b8e80941Smrg d = -d; 1047b8e80941Smrg } 1048b8e80941Smrg 1049b8e80941Smrg *op = retype(src_reg(brw_imm_d(d)), old_type); 1050b8e80941Smrg break; 1051b8e80941Smrg } 1052b8e80941Smrg 1053b8e80941Smrg case BRW_REGISTER_TYPE_F: { 1054b8e80941Smrg float f = nir_src_as_float(instr->src[1].src); 1055b8e80941Smrg 1056b8e80941Smrg if (op->abs) 1057b8e80941Smrg f = fabs(f); 1058b8e80941Smrg 1059b8e80941Smrg if (op->negate) 1060b8e80941Smrg f = -f; 1061b8e80941Smrg 1062b8e80941Smrg *op = src_reg(brw_imm_f(f)); 1063b8e80941Smrg assert(op->type == old_type); 1064b8e80941Smrg break; 1065b8e80941Smrg } 1066b8e80941Smrg 1067b8e80941Smrg default: 1068b8e80941Smrg unreachable("Non-32bit type."); 1069b8e80941Smrg } 1070b8e80941Smrg} 1071b8e80941Smrg 1072b8e80941Smrgvoid 1073b8e80941Smrgvec4_visitor::nir_emit_alu(nir_alu_instr *instr) 1074b8e80941Smrg{ 1075b8e80941Smrg vec4_instruction *inst; 1076b8e80941Smrg 1077b8e80941Smrg nir_alu_type dst_type = (nir_alu_type) (nir_op_infos[instr->op].output_type | 1078b8e80941Smrg nir_dest_bit_size(instr->dest.dest)); 1079b8e80941Smrg dst_reg dst = get_nir_dest(instr->dest.dest, dst_type); 1080b8e80941Smrg dst.writemask = instr->dest.write_mask; 1081b8e80941Smrg 1082b8e80941Smrg src_reg op[4]; 1083b8e80941Smrg for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 1084b8e80941Smrg nir_alu_type src_type = (nir_alu_type) 1085b8e80941Smrg (nir_op_infos[instr->op].input_types[i] | 1086b8e80941Smrg nir_src_bit_size(instr->src[i].src)); 1087b8e80941Smrg op[i] = get_nir_src(instr->src[i].src, src_type, 4); 1088b8e80941Smrg op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle); 1089b8e80941Smrg op[i].abs = instr->src[i].abs; 1090b8e80941Smrg op[i].negate = instr->src[i].negate; 1091b8e80941Smrg } 1092b8e80941Smrg 1093b8e80941Smrg switch (instr->op) { 1094b8e80941Smrg case nir_op_imov: 1095b8e80941Smrg case nir_op_fmov: 1096b8e80941Smrg inst = emit(MOV(dst, op[0])); 1097b8e80941Smrg inst->saturate = instr->dest.saturate; 1098b8e80941Smrg break; 1099b8e80941Smrg 1100b8e80941Smrg case nir_op_vec2: 1101b8e80941Smrg case nir_op_vec3: 1102b8e80941Smrg case nir_op_vec4: 1103b8e80941Smrg unreachable("not reached: should be handled by lower_vec_to_movs()"); 1104b8e80941Smrg 1105b8e80941Smrg case nir_op_i2f32: 1106b8e80941Smrg case nir_op_u2f32: 1107b8e80941Smrg inst = emit(MOV(dst, op[0])); 1108b8e80941Smrg inst->saturate = instr->dest.saturate; 1109b8e80941Smrg break; 1110b8e80941Smrg 1111b8e80941Smrg case nir_op_f2f32: 1112b8e80941Smrg case nir_op_f2i32: 1113b8e80941Smrg case nir_op_f2u32: 1114b8e80941Smrg if (nir_src_bit_size(instr->src[0].src) == 64) 1115b8e80941Smrg emit_conversion_from_double(dst, op[0], instr->dest.saturate); 1116b8e80941Smrg else 1117b8e80941Smrg inst = emit(MOV(dst, op[0])); 1118b8e80941Smrg break; 1119b8e80941Smrg 1120b8e80941Smrg case nir_op_f2f64: 1121b8e80941Smrg case nir_op_i2f64: 1122b8e80941Smrg case nir_op_u2f64: 1123b8e80941Smrg emit_conversion_to_double(dst, op[0], instr->dest.saturate); 1124b8e80941Smrg break; 1125b8e80941Smrg 1126b8e80941Smrg case nir_op_iadd: 1127b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1128b8e80941Smrg /* fall through */ 1129b8e80941Smrg case nir_op_fadd: 1130b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1131b8e80941Smrg inst = emit(ADD(dst, op[0], op[1])); 1132b8e80941Smrg inst->saturate = instr->dest.saturate; 1133b8e80941Smrg break; 1134b8e80941Smrg 1135b8e80941Smrg case nir_op_uadd_sat: 1136b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1137b8e80941Smrg inst = emit(ADD(dst, op[0], op[1])); 1138b8e80941Smrg inst->saturate = true; 1139b8e80941Smrg break; 1140b8e80941Smrg 1141b8e80941Smrg case nir_op_fmul: 1142b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1143b8e80941Smrg inst = emit(MUL(dst, op[0], op[1])); 1144b8e80941Smrg inst->saturate = instr->dest.saturate; 1145b8e80941Smrg break; 1146b8e80941Smrg 1147b8e80941Smrg case nir_op_imul: { 1148b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1149b8e80941Smrg if (devinfo->gen < 8) { 1150b8e80941Smrg /* For integer multiplication, the MUL uses the low 16 bits of one of 1151b8e80941Smrg * the operands (src0 through SNB, src1 on IVB and later). The MACH 1152b8e80941Smrg * accumulates in the contribution of the upper 16 bits of that 1153b8e80941Smrg * operand. If we can determine that one of the args is in the low 1154b8e80941Smrg * 16 bits, though, we can just emit a single MUL. 1155b8e80941Smrg */ 1156b8e80941Smrg if (nir_src_is_const(instr->src[0].src) && 1157b8e80941Smrg nir_alu_instr_src_read_mask(instr, 0) == 1 && 1158b8e80941Smrg nir_src_comp_as_uint(instr->src[0].src, 0) < (1 << 16)) { 1159b8e80941Smrg if (devinfo->gen < 7) 1160b8e80941Smrg emit(MUL(dst, op[0], op[1])); 1161b8e80941Smrg else 1162b8e80941Smrg emit(MUL(dst, op[1], op[0])); 1163b8e80941Smrg } else if (nir_src_is_const(instr->src[1].src) && 1164b8e80941Smrg nir_alu_instr_src_read_mask(instr, 1) == 1 && 1165b8e80941Smrg nir_src_comp_as_uint(instr->src[1].src, 0) < (1 << 16)) { 1166b8e80941Smrg if (devinfo->gen < 7) 1167b8e80941Smrg emit(MUL(dst, op[1], op[0])); 1168b8e80941Smrg else 1169b8e80941Smrg emit(MUL(dst, op[0], op[1])); 1170b8e80941Smrg } else { 1171b8e80941Smrg struct brw_reg acc = retype(brw_acc_reg(8), dst.type); 1172b8e80941Smrg 1173b8e80941Smrg emit(MUL(acc, op[0], op[1])); 1174b8e80941Smrg emit(MACH(dst_null_d(), op[0], op[1])); 1175b8e80941Smrg emit(MOV(dst, src_reg(acc))); 1176b8e80941Smrg } 1177b8e80941Smrg } else { 1178b8e80941Smrg emit(MUL(dst, op[0], op[1])); 1179b8e80941Smrg } 1180b8e80941Smrg break; 1181b8e80941Smrg } 1182b8e80941Smrg 1183b8e80941Smrg case nir_op_imul_high: 1184b8e80941Smrg case nir_op_umul_high: { 1185b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1186b8e80941Smrg struct brw_reg acc = retype(brw_acc_reg(8), dst.type); 1187b8e80941Smrg 1188b8e80941Smrg if (devinfo->gen >= 8) 1189b8e80941Smrg emit(MUL(acc, op[0], retype(op[1], BRW_REGISTER_TYPE_UW))); 1190b8e80941Smrg else 1191b8e80941Smrg emit(MUL(acc, op[0], op[1])); 1192b8e80941Smrg 1193b8e80941Smrg emit(MACH(dst, op[0], op[1])); 1194b8e80941Smrg break; 1195b8e80941Smrg } 1196b8e80941Smrg 1197b8e80941Smrg case nir_op_frcp: 1198b8e80941Smrg inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]); 1199b8e80941Smrg inst->saturate = instr->dest.saturate; 1200b8e80941Smrg break; 1201b8e80941Smrg 1202b8e80941Smrg case nir_op_fexp2: 1203b8e80941Smrg inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]); 1204b8e80941Smrg inst->saturate = instr->dest.saturate; 1205b8e80941Smrg break; 1206b8e80941Smrg 1207b8e80941Smrg case nir_op_flog2: 1208b8e80941Smrg inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]); 1209b8e80941Smrg inst->saturate = instr->dest.saturate; 1210b8e80941Smrg break; 1211b8e80941Smrg 1212b8e80941Smrg case nir_op_fsin: 1213b8e80941Smrg inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); 1214b8e80941Smrg inst->saturate = instr->dest.saturate; 1215b8e80941Smrg break; 1216b8e80941Smrg 1217b8e80941Smrg case nir_op_fcos: 1218b8e80941Smrg inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); 1219b8e80941Smrg inst->saturate = instr->dest.saturate; 1220b8e80941Smrg break; 1221b8e80941Smrg 1222b8e80941Smrg case nir_op_idiv: 1223b8e80941Smrg case nir_op_udiv: 1224b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1225b8e80941Smrg emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]); 1226b8e80941Smrg break; 1227b8e80941Smrg 1228b8e80941Smrg case nir_op_umod: 1229b8e80941Smrg case nir_op_irem: 1230b8e80941Smrg /* According to the sign table for INT DIV in the Ivy Bridge PRM, it 1231b8e80941Smrg * appears that our hardware just does the right thing for signed 1232b8e80941Smrg * remainder. 1233b8e80941Smrg */ 1234b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1235b8e80941Smrg emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); 1236b8e80941Smrg break; 1237b8e80941Smrg 1238b8e80941Smrg case nir_op_imod: { 1239b8e80941Smrg /* Get a regular C-style remainder. If a % b == 0, set the predicate. */ 1240b8e80941Smrg inst = emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]); 1241b8e80941Smrg 1242b8e80941Smrg /* Math instructions don't support conditional mod */ 1243b8e80941Smrg inst = emit(MOV(dst_null_d(), src_reg(dst))); 1244b8e80941Smrg inst->conditional_mod = BRW_CONDITIONAL_NZ; 1245b8e80941Smrg 1246b8e80941Smrg /* Now, we need to determine if signs of the sources are different. 1247b8e80941Smrg * When we XOR the sources, the top bit is 0 if they are the same and 1 1248b8e80941Smrg * if they are different. We can then use a conditional modifier to 1249b8e80941Smrg * turn that into a predicate. This leads us to an XOR.l instruction. 1250b8e80941Smrg * 1251b8e80941Smrg * Technically, according to the PRM, you're not allowed to use .l on a 1252b8e80941Smrg * XOR instruction. However, emperical experiments and Curro's reading 1253b8e80941Smrg * of the simulator source both indicate that it's safe. 1254b8e80941Smrg */ 1255b8e80941Smrg src_reg tmp = src_reg(this, glsl_type::ivec4_type); 1256b8e80941Smrg inst = emit(XOR(dst_reg(tmp), op[0], op[1])); 1257b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1258b8e80941Smrg inst->conditional_mod = BRW_CONDITIONAL_L; 1259b8e80941Smrg 1260b8e80941Smrg /* If the result of the initial remainder operation is non-zero and the 1261b8e80941Smrg * two sources have different signs, add in a copy of op[1] to get the 1262b8e80941Smrg * final integer modulus value. 1263b8e80941Smrg */ 1264b8e80941Smrg inst = emit(ADD(dst, src_reg(dst), op[1])); 1265b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1266b8e80941Smrg break; 1267b8e80941Smrg } 1268b8e80941Smrg 1269b8e80941Smrg case nir_op_ldexp: 1270b8e80941Smrg unreachable("not reached: should be handled by ldexp_to_arith()"); 1271b8e80941Smrg 1272b8e80941Smrg case nir_op_fsqrt: 1273b8e80941Smrg inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]); 1274b8e80941Smrg inst->saturate = instr->dest.saturate; 1275b8e80941Smrg break; 1276b8e80941Smrg 1277b8e80941Smrg case nir_op_frsq: 1278b8e80941Smrg inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]); 1279b8e80941Smrg inst->saturate = instr->dest.saturate; 1280b8e80941Smrg break; 1281b8e80941Smrg 1282b8e80941Smrg case nir_op_fpow: 1283b8e80941Smrg inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]); 1284b8e80941Smrg inst->saturate = instr->dest.saturate; 1285b8e80941Smrg break; 1286b8e80941Smrg 1287b8e80941Smrg case nir_op_uadd_carry: { 1288b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1289b8e80941Smrg struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); 1290b8e80941Smrg 1291b8e80941Smrg emit(ADDC(dst_null_ud(), op[0], op[1])); 1292b8e80941Smrg emit(MOV(dst, src_reg(acc))); 1293b8e80941Smrg break; 1294b8e80941Smrg } 1295b8e80941Smrg 1296b8e80941Smrg case nir_op_usub_borrow: { 1297b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1298b8e80941Smrg struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD); 1299b8e80941Smrg 1300b8e80941Smrg emit(SUBB(dst_null_ud(), op[0], op[1])); 1301b8e80941Smrg emit(MOV(dst, src_reg(acc))); 1302b8e80941Smrg break; 1303b8e80941Smrg } 1304b8e80941Smrg 1305b8e80941Smrg case nir_op_ftrunc: 1306b8e80941Smrg inst = emit(RNDZ(dst, op[0])); 1307b8e80941Smrg inst->saturate = instr->dest.saturate; 1308b8e80941Smrg break; 1309b8e80941Smrg 1310b8e80941Smrg case nir_op_fceil: { 1311b8e80941Smrg src_reg tmp = src_reg(this, glsl_type::float_type); 1312b8e80941Smrg tmp.swizzle = 1313b8e80941Smrg brw_swizzle_for_size(instr->src[0].src.is_ssa ? 1314b8e80941Smrg instr->src[0].src.ssa->num_components : 1315b8e80941Smrg instr->src[0].src.reg.reg->num_components); 1316b8e80941Smrg 1317b8e80941Smrg op[0].negate = !op[0].negate; 1318b8e80941Smrg emit(RNDD(dst_reg(tmp), op[0])); 1319b8e80941Smrg tmp.negate = true; 1320b8e80941Smrg inst = emit(MOV(dst, tmp)); 1321b8e80941Smrg inst->saturate = instr->dest.saturate; 1322b8e80941Smrg break; 1323b8e80941Smrg } 1324b8e80941Smrg 1325b8e80941Smrg case nir_op_ffloor: 1326b8e80941Smrg inst = emit(RNDD(dst, op[0])); 1327b8e80941Smrg inst->saturate = instr->dest.saturate; 1328b8e80941Smrg break; 1329b8e80941Smrg 1330b8e80941Smrg case nir_op_ffract: 1331b8e80941Smrg inst = emit(FRC(dst, op[0])); 1332b8e80941Smrg inst->saturate = instr->dest.saturate; 1333b8e80941Smrg break; 1334b8e80941Smrg 1335b8e80941Smrg case nir_op_fround_even: 1336b8e80941Smrg inst = emit(RNDE(dst, op[0])); 1337b8e80941Smrg inst->saturate = instr->dest.saturate; 1338b8e80941Smrg break; 1339b8e80941Smrg 1340b8e80941Smrg case nir_op_fquantize2f16: { 1341b8e80941Smrg /* See also vec4_visitor::emit_pack_half_2x16() */ 1342b8e80941Smrg src_reg tmp16 = src_reg(this, glsl_type::uvec4_type); 1343b8e80941Smrg src_reg tmp32 = src_reg(this, glsl_type::vec4_type); 1344b8e80941Smrg src_reg zero = src_reg(this, glsl_type::vec4_type); 1345b8e80941Smrg 1346b8e80941Smrg /* Check for denormal */ 1347b8e80941Smrg src_reg abs_src0 = op[0]; 1348b8e80941Smrg abs_src0.abs = true; 1349b8e80941Smrg emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), 1350b8e80941Smrg BRW_CONDITIONAL_L)); 1351b8e80941Smrg /* Get the appropriately signed zero */ 1352b8e80941Smrg emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD), 1353b8e80941Smrg retype(op[0], BRW_REGISTER_TYPE_UD), 1354b8e80941Smrg brw_imm_ud(0x80000000))); 1355b8e80941Smrg /* Do the actual F32 -> F16 -> F32 conversion */ 1356b8e80941Smrg emit(F32TO16(dst_reg(tmp16), op[0])); 1357b8e80941Smrg emit(F16TO32(dst_reg(tmp32), tmp16)); 1358b8e80941Smrg /* Select that or zero based on normal status */ 1359b8e80941Smrg inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); 1360b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1361b8e80941Smrg inst->saturate = instr->dest.saturate; 1362b8e80941Smrg break; 1363b8e80941Smrg } 1364b8e80941Smrg 1365b8e80941Smrg case nir_op_imin: 1366b8e80941Smrg case nir_op_umin: 1367b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1368b8e80941Smrg /* fall through */ 1369b8e80941Smrg case nir_op_fmin: 1370b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1371b8e80941Smrg inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]); 1372b8e80941Smrg inst->saturate = instr->dest.saturate; 1373b8e80941Smrg break; 1374b8e80941Smrg 1375b8e80941Smrg case nir_op_imax: 1376b8e80941Smrg case nir_op_umax: 1377b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1378b8e80941Smrg /* fall through */ 1379b8e80941Smrg case nir_op_fmax: 1380b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1381b8e80941Smrg inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]); 1382b8e80941Smrg inst->saturate = instr->dest.saturate; 1383b8e80941Smrg break; 1384b8e80941Smrg 1385b8e80941Smrg case nir_op_fddx: 1386b8e80941Smrg case nir_op_fddx_coarse: 1387b8e80941Smrg case nir_op_fddx_fine: 1388b8e80941Smrg case nir_op_fddy: 1389b8e80941Smrg case nir_op_fddy_coarse: 1390b8e80941Smrg case nir_op_fddy_fine: 1391b8e80941Smrg unreachable("derivatives are not valid in vertex shaders"); 1392b8e80941Smrg 1393b8e80941Smrg case nir_op_ilt32: 1394b8e80941Smrg case nir_op_ult32: 1395b8e80941Smrg case nir_op_ige32: 1396b8e80941Smrg case nir_op_uge32: 1397b8e80941Smrg case nir_op_ieq32: 1398b8e80941Smrg case nir_op_ine32: 1399b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1400b8e80941Smrg /* Fallthrough */ 1401b8e80941Smrg case nir_op_flt32: 1402b8e80941Smrg case nir_op_fge32: 1403b8e80941Smrg case nir_op_feq32: 1404b8e80941Smrg case nir_op_fne32: { 1405b8e80941Smrg enum brw_conditional_mod conditional_mod = 1406b8e80941Smrg brw_conditional_for_nir_comparison(instr->op); 1407b8e80941Smrg 1408b8e80941Smrg if (nir_src_bit_size(instr->src[0].src) < 64) { 1409b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1410b8e80941Smrg emit(CMP(dst, op[0], op[1], conditional_mod)); 1411b8e80941Smrg } else { 1412b8e80941Smrg /* Produce a 32-bit boolean result from the DF comparison by selecting 1413b8e80941Smrg * only the low 32-bit in each DF produced. Do this in a temporary 1414b8e80941Smrg * so we can then move from there to the result using align16 again 1415b8e80941Smrg * to honor the original writemask. 1416b8e80941Smrg */ 1417b8e80941Smrg dst_reg temp = dst_reg(this, glsl_type::dvec4_type); 1418b8e80941Smrg emit(CMP(temp, op[0], op[1], conditional_mod)); 1419b8e80941Smrg dst_reg result = dst_reg(this, glsl_type::bvec4_type); 1420b8e80941Smrg emit(VEC4_OPCODE_PICK_LOW_32BIT, result, src_reg(temp)); 1421b8e80941Smrg emit(MOV(dst, src_reg(result))); 1422b8e80941Smrg } 1423b8e80941Smrg break; 1424b8e80941Smrg } 1425b8e80941Smrg 1426b8e80941Smrg case nir_op_b32all_iequal2: 1427b8e80941Smrg case nir_op_b32all_iequal3: 1428b8e80941Smrg case nir_op_b32all_iequal4: 1429b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1430b8e80941Smrg /* Fallthrough */ 1431b8e80941Smrg case nir_op_b32all_fequal2: 1432b8e80941Smrg case nir_op_b32all_fequal3: 1433b8e80941Smrg case nir_op_b32all_fequal4: { 1434b8e80941Smrg unsigned swiz = 1435b8e80941Smrg brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); 1436b8e80941Smrg 1437b8e80941Smrg emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), 1438b8e80941Smrg brw_conditional_for_nir_comparison(instr->op))); 1439b8e80941Smrg emit(MOV(dst, brw_imm_d(0))); 1440b8e80941Smrg inst = emit(MOV(dst, brw_imm_d(~0))); 1441b8e80941Smrg inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; 1442b8e80941Smrg break; 1443b8e80941Smrg } 1444b8e80941Smrg 1445b8e80941Smrg case nir_op_b32any_inequal2: 1446b8e80941Smrg case nir_op_b32any_inequal3: 1447b8e80941Smrg case nir_op_b32any_inequal4: 1448b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1449b8e80941Smrg /* Fallthrough */ 1450b8e80941Smrg case nir_op_b32any_fnequal2: 1451b8e80941Smrg case nir_op_b32any_fnequal3: 1452b8e80941Smrg case nir_op_b32any_fnequal4: { 1453b8e80941Smrg unsigned swiz = 1454b8e80941Smrg brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); 1455b8e80941Smrg 1456b8e80941Smrg emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), 1457b8e80941Smrg brw_conditional_for_nir_comparison(instr->op))); 1458b8e80941Smrg 1459b8e80941Smrg emit(MOV(dst, brw_imm_d(0))); 1460b8e80941Smrg inst = emit(MOV(dst, brw_imm_d(~0))); 1461b8e80941Smrg inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; 1462b8e80941Smrg break; 1463b8e80941Smrg } 1464b8e80941Smrg 1465b8e80941Smrg case nir_op_inot: 1466b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1467b8e80941Smrg if (devinfo->gen >= 8) { 1468b8e80941Smrg op[0] = resolve_source_modifiers(op[0]); 1469b8e80941Smrg } 1470b8e80941Smrg emit(NOT(dst, op[0])); 1471b8e80941Smrg break; 1472b8e80941Smrg 1473b8e80941Smrg case nir_op_ixor: 1474b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1475b8e80941Smrg if (devinfo->gen >= 8) { 1476b8e80941Smrg op[0] = resolve_source_modifiers(op[0]); 1477b8e80941Smrg op[1] = resolve_source_modifiers(op[1]); 1478b8e80941Smrg } 1479b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1480b8e80941Smrg emit(XOR(dst, op[0], op[1])); 1481b8e80941Smrg break; 1482b8e80941Smrg 1483b8e80941Smrg case nir_op_ior: 1484b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1485b8e80941Smrg if (devinfo->gen >= 8) { 1486b8e80941Smrg op[0] = resolve_source_modifiers(op[0]); 1487b8e80941Smrg op[1] = resolve_source_modifiers(op[1]); 1488b8e80941Smrg } 1489b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1490b8e80941Smrg emit(OR(dst, op[0], op[1])); 1491b8e80941Smrg break; 1492b8e80941Smrg 1493b8e80941Smrg case nir_op_iand: 1494b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1495b8e80941Smrg if (devinfo->gen >= 8) { 1496b8e80941Smrg op[0] = resolve_source_modifiers(op[0]); 1497b8e80941Smrg op[1] = resolve_source_modifiers(op[1]); 1498b8e80941Smrg } 1499b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1500b8e80941Smrg emit(AND(dst, op[0], op[1])); 1501b8e80941Smrg break; 1502b8e80941Smrg 1503b8e80941Smrg case nir_op_b2i32: 1504b8e80941Smrg case nir_op_b2f32: 1505b8e80941Smrg case nir_op_b2f64: 1506b8e80941Smrg if (nir_dest_bit_size(instr->dest.dest) > 32) { 1507b8e80941Smrg assert(dst.type == BRW_REGISTER_TYPE_DF); 1508b8e80941Smrg emit_conversion_to_double(dst, negate(op[0]), false); 1509b8e80941Smrg } else { 1510b8e80941Smrg emit(MOV(dst, negate(op[0]))); 1511b8e80941Smrg } 1512b8e80941Smrg break; 1513b8e80941Smrg 1514b8e80941Smrg case nir_op_f2b32: 1515b8e80941Smrg if (nir_src_bit_size(instr->src[0].src) == 64) { 1516b8e80941Smrg /* We use a MOV with conditional_mod to check if the provided value is 1517b8e80941Smrg * 0.0. We want this to flush denormalized numbers to zero, so we set a 1518b8e80941Smrg * source modifier on the source operand to trigger this, as source 1519b8e80941Smrg * modifiers don't affect the result of the testing against 0.0. 1520b8e80941Smrg */ 1521b8e80941Smrg src_reg value = op[0]; 1522b8e80941Smrg value.abs = true; 1523b8e80941Smrg vec4_instruction *inst = emit(MOV(dst_null_df(), value)); 1524b8e80941Smrg inst->conditional_mod = BRW_CONDITIONAL_NZ; 1525b8e80941Smrg 1526b8e80941Smrg src_reg one = src_reg(this, glsl_type::ivec4_type); 1527b8e80941Smrg emit(MOV(dst_reg(one), brw_imm_d(~0))); 1528b8e80941Smrg inst = emit(BRW_OPCODE_SEL, dst, one, brw_imm_d(0)); 1529b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1530b8e80941Smrg } else { 1531b8e80941Smrg emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); 1532b8e80941Smrg } 1533b8e80941Smrg break; 1534b8e80941Smrg 1535b8e80941Smrg case nir_op_i2b32: 1536b8e80941Smrg emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); 1537b8e80941Smrg break; 1538b8e80941Smrg 1539b8e80941Smrg case nir_op_fnoise1_1: 1540b8e80941Smrg case nir_op_fnoise1_2: 1541b8e80941Smrg case nir_op_fnoise1_3: 1542b8e80941Smrg case nir_op_fnoise1_4: 1543b8e80941Smrg case nir_op_fnoise2_1: 1544b8e80941Smrg case nir_op_fnoise2_2: 1545b8e80941Smrg case nir_op_fnoise2_3: 1546b8e80941Smrg case nir_op_fnoise2_4: 1547b8e80941Smrg case nir_op_fnoise3_1: 1548b8e80941Smrg case nir_op_fnoise3_2: 1549b8e80941Smrg case nir_op_fnoise3_3: 1550b8e80941Smrg case nir_op_fnoise3_4: 1551b8e80941Smrg case nir_op_fnoise4_1: 1552b8e80941Smrg case nir_op_fnoise4_2: 1553b8e80941Smrg case nir_op_fnoise4_3: 1554b8e80941Smrg case nir_op_fnoise4_4: 1555b8e80941Smrg unreachable("not reached: should be handled by lower_noise"); 1556b8e80941Smrg 1557b8e80941Smrg case nir_op_unpack_half_2x16_split_x: 1558b8e80941Smrg case nir_op_unpack_half_2x16_split_y: 1559b8e80941Smrg case nir_op_pack_half_2x16_split: 1560b8e80941Smrg unreachable("not reached: should not occur in vertex shader"); 1561b8e80941Smrg 1562b8e80941Smrg case nir_op_unpack_snorm_2x16: 1563b8e80941Smrg case nir_op_unpack_unorm_2x16: 1564b8e80941Smrg case nir_op_pack_snorm_2x16: 1565b8e80941Smrg case nir_op_pack_unorm_2x16: 1566b8e80941Smrg unreachable("not reached: should be handled by lower_packing_builtins"); 1567b8e80941Smrg 1568b8e80941Smrg case nir_op_pack_uvec4_to_uint: 1569b8e80941Smrg unreachable("not reached"); 1570b8e80941Smrg 1571b8e80941Smrg case nir_op_pack_uvec2_to_uint: { 1572b8e80941Smrg dst_reg tmp1 = dst_reg(this, glsl_type::uint_type); 1573b8e80941Smrg tmp1.writemask = WRITEMASK_X; 1574b8e80941Smrg op[0].swizzle = BRW_SWIZZLE_YYYY; 1575b8e80941Smrg emit(SHL(tmp1, op[0], src_reg(brw_imm_ud(16u)))); 1576b8e80941Smrg 1577b8e80941Smrg dst_reg tmp2 = dst_reg(this, glsl_type::uint_type); 1578b8e80941Smrg tmp2.writemask = WRITEMASK_X; 1579b8e80941Smrg op[0].swizzle = BRW_SWIZZLE_XXXX; 1580b8e80941Smrg emit(AND(tmp2, op[0], src_reg(brw_imm_ud(0xffffu)))); 1581b8e80941Smrg 1582b8e80941Smrg emit(OR(dst, src_reg(tmp1), src_reg(tmp2))); 1583b8e80941Smrg break; 1584b8e80941Smrg } 1585b8e80941Smrg 1586b8e80941Smrg case nir_op_pack_64_2x32_split: { 1587b8e80941Smrg dst_reg result = dst_reg(this, glsl_type::dvec4_type); 1588b8e80941Smrg dst_reg tmp = dst_reg(this, glsl_type::uvec4_type); 1589b8e80941Smrg emit(MOV(tmp, retype(op[0], BRW_REGISTER_TYPE_UD))); 1590b8e80941Smrg emit(VEC4_OPCODE_SET_LOW_32BIT, result, src_reg(tmp)); 1591b8e80941Smrg emit(MOV(tmp, retype(op[1], BRW_REGISTER_TYPE_UD))); 1592b8e80941Smrg emit(VEC4_OPCODE_SET_HIGH_32BIT, result, src_reg(tmp)); 1593b8e80941Smrg emit(MOV(dst, src_reg(result))); 1594b8e80941Smrg break; 1595b8e80941Smrg } 1596b8e80941Smrg 1597b8e80941Smrg case nir_op_unpack_64_2x32_split_x: 1598b8e80941Smrg case nir_op_unpack_64_2x32_split_y: { 1599b8e80941Smrg enum opcode oper = (instr->op == nir_op_unpack_64_2x32_split_x) ? 1600b8e80941Smrg VEC4_OPCODE_PICK_LOW_32BIT : VEC4_OPCODE_PICK_HIGH_32BIT; 1601b8e80941Smrg dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); 1602b8e80941Smrg emit(MOV(tmp, op[0])); 1603b8e80941Smrg dst_reg tmp2 = dst_reg(this, glsl_type::uvec4_type); 1604b8e80941Smrg emit(oper, tmp2, src_reg(tmp)); 1605b8e80941Smrg emit(MOV(dst, src_reg(tmp2))); 1606b8e80941Smrg break; 1607b8e80941Smrg } 1608b8e80941Smrg 1609b8e80941Smrg case nir_op_unpack_half_2x16: 1610b8e80941Smrg /* As NIR does not guarantee that we have a correct swizzle outside the 1611b8e80941Smrg * boundaries of a vector, and the implementation of emit_unpack_half_2x16 1612b8e80941Smrg * uses the source operand in an operation with WRITEMASK_Y while our 1613b8e80941Smrg * source operand has only size 1, it accessed incorrect data producing 1614b8e80941Smrg * regressions in Piglit. We repeat the swizzle of the first component on the 1615b8e80941Smrg * rest of components to avoid regressions. In the vec4_visitor IR code path 1616b8e80941Smrg * this is not needed because the operand has already the correct swizzle. 1617b8e80941Smrg */ 1618b8e80941Smrg op[0].swizzle = brw_compose_swizzle(BRW_SWIZZLE_XXXX, op[0].swizzle); 1619b8e80941Smrg emit_unpack_half_2x16(dst, op[0]); 1620b8e80941Smrg break; 1621b8e80941Smrg 1622b8e80941Smrg case nir_op_pack_half_2x16: 1623b8e80941Smrg emit_pack_half_2x16(dst, op[0]); 1624b8e80941Smrg break; 1625b8e80941Smrg 1626b8e80941Smrg case nir_op_unpack_unorm_4x8: 1627b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1628b8e80941Smrg emit_unpack_unorm_4x8(dst, op[0]); 1629b8e80941Smrg break; 1630b8e80941Smrg 1631b8e80941Smrg case nir_op_pack_unorm_4x8: 1632b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1633b8e80941Smrg emit_pack_unorm_4x8(dst, op[0]); 1634b8e80941Smrg break; 1635b8e80941Smrg 1636b8e80941Smrg case nir_op_unpack_snorm_4x8: 1637b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1638b8e80941Smrg emit_unpack_snorm_4x8(dst, op[0]); 1639b8e80941Smrg break; 1640b8e80941Smrg 1641b8e80941Smrg case nir_op_pack_snorm_4x8: 1642b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1643b8e80941Smrg emit_pack_snorm_4x8(dst, op[0]); 1644b8e80941Smrg break; 1645b8e80941Smrg 1646b8e80941Smrg case nir_op_bitfield_reverse: 1647b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1648b8e80941Smrg emit(BFREV(dst, op[0])); 1649b8e80941Smrg break; 1650b8e80941Smrg 1651b8e80941Smrg case nir_op_bit_count: 1652b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1653b8e80941Smrg emit(CBIT(dst, op[0])); 1654b8e80941Smrg break; 1655b8e80941Smrg 1656b8e80941Smrg case nir_op_ufind_msb: 1657b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1658b8e80941Smrg emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false); 1659b8e80941Smrg break; 1660b8e80941Smrg 1661b8e80941Smrg case nir_op_ifind_msb: { 1662b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1663b8e80941Smrg vec4_builder bld = vec4_builder(this).at_end(); 1664b8e80941Smrg src_reg src(dst); 1665b8e80941Smrg 1666b8e80941Smrg if (devinfo->gen < 7) { 1667b8e80941Smrg emit_find_msb_using_lzd(bld, dst, op[0], true); 1668b8e80941Smrg } else { 1669b8e80941Smrg emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); 1670b8e80941Smrg 1671b8e80941Smrg /* FBH counts from the MSB side, while GLSL's findMSB() wants the 1672b8e80941Smrg * count from the LSB side. If FBH didn't return an error 1673b8e80941Smrg * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB 1674b8e80941Smrg * count into an LSB count. 1675b8e80941Smrg */ 1676b8e80941Smrg bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ); 1677b8e80941Smrg 1678b8e80941Smrg inst = bld.ADD(dst, src, brw_imm_d(31)); 1679b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1680b8e80941Smrg inst->src[0].negate = true; 1681b8e80941Smrg } 1682b8e80941Smrg break; 1683b8e80941Smrg } 1684b8e80941Smrg 1685b8e80941Smrg case nir_op_find_lsb: { 1686b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1687b8e80941Smrg vec4_builder bld = vec4_builder(this).at_end(); 1688b8e80941Smrg 1689b8e80941Smrg if (devinfo->gen < 7) { 1690b8e80941Smrg dst_reg temp = bld.vgrf(BRW_REGISTER_TYPE_D); 1691b8e80941Smrg 1692b8e80941Smrg /* (x & -x) generates a value that consists of only the LSB of x. 1693b8e80941Smrg * For all powers of 2, findMSB(y) == findLSB(y). 1694b8e80941Smrg */ 1695b8e80941Smrg src_reg src = src_reg(retype(op[0], BRW_REGISTER_TYPE_D)); 1696b8e80941Smrg src_reg negated_src = src; 1697b8e80941Smrg 1698b8e80941Smrg /* One must be negated, and the other must be non-negated. It 1699b8e80941Smrg * doesn't matter which is which. 1700b8e80941Smrg */ 1701b8e80941Smrg negated_src.negate = true; 1702b8e80941Smrg src.negate = false; 1703b8e80941Smrg 1704b8e80941Smrg bld.AND(temp, src, negated_src); 1705b8e80941Smrg emit_find_msb_using_lzd(bld, dst, src_reg(temp), false); 1706b8e80941Smrg } else { 1707b8e80941Smrg bld.FBL(dst, op[0]); 1708b8e80941Smrg } 1709b8e80941Smrg break; 1710b8e80941Smrg } 1711b8e80941Smrg 1712b8e80941Smrg case nir_op_ubitfield_extract: 1713b8e80941Smrg case nir_op_ibitfield_extract: 1714b8e80941Smrg unreachable("should have been lowered"); 1715b8e80941Smrg case nir_op_ubfe: 1716b8e80941Smrg case nir_op_ibfe: 1717b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1718b8e80941Smrg op[0] = fix_3src_operand(op[0]); 1719b8e80941Smrg op[1] = fix_3src_operand(op[1]); 1720b8e80941Smrg op[2] = fix_3src_operand(op[2]); 1721b8e80941Smrg 1722b8e80941Smrg emit(BFE(dst, op[2], op[1], op[0])); 1723b8e80941Smrg break; 1724b8e80941Smrg 1725b8e80941Smrg case nir_op_bfm: 1726b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1727b8e80941Smrg emit(BFI1(dst, op[0], op[1])); 1728b8e80941Smrg break; 1729b8e80941Smrg 1730b8e80941Smrg case nir_op_bfi: 1731b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1732b8e80941Smrg op[0] = fix_3src_operand(op[0]); 1733b8e80941Smrg op[1] = fix_3src_operand(op[1]); 1734b8e80941Smrg op[2] = fix_3src_operand(op[2]); 1735b8e80941Smrg 1736b8e80941Smrg emit(BFI2(dst, op[0], op[1], op[2])); 1737b8e80941Smrg break; 1738b8e80941Smrg 1739b8e80941Smrg case nir_op_bitfield_insert: 1740b8e80941Smrg unreachable("not reached: should have been lowered"); 1741b8e80941Smrg 1742b8e80941Smrg case nir_op_fsign: 1743b8e80941Smrg assert(!instr->dest.saturate); 1744b8e80941Smrg if (op[0].abs) { 1745b8e80941Smrg /* Straightforward since the source can be assumed to be either 1746b8e80941Smrg * strictly >= 0 or strictly <= 0 depending on the setting of the 1747b8e80941Smrg * negate flag. 1748b8e80941Smrg */ 1749b8e80941Smrg inst = emit(MOV(dst, op[0])); 1750b8e80941Smrg inst->conditional_mod = BRW_CONDITIONAL_NZ; 1751b8e80941Smrg 1752b8e80941Smrg inst = (op[0].negate) 1753b8e80941Smrg ? emit(MOV(dst, brw_imm_f(-1.0f))) 1754b8e80941Smrg : emit(MOV(dst, brw_imm_f(1.0f))); 1755b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1756b8e80941Smrg } else if (type_sz(op[0].type) < 8) { 1757b8e80941Smrg /* AND(val, 0x80000000) gives the sign bit. 1758b8e80941Smrg * 1759b8e80941Smrg * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not 1760b8e80941Smrg * zero. 1761b8e80941Smrg */ 1762b8e80941Smrg emit(CMP(dst_null_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ)); 1763b8e80941Smrg 1764b8e80941Smrg op[0].type = BRW_REGISTER_TYPE_UD; 1765b8e80941Smrg dst.type = BRW_REGISTER_TYPE_UD; 1766b8e80941Smrg emit(AND(dst, op[0], brw_imm_ud(0x80000000u))); 1767b8e80941Smrg 1768b8e80941Smrg inst = emit(OR(dst, src_reg(dst), brw_imm_ud(0x3f800000u))); 1769b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1770b8e80941Smrg dst.type = BRW_REGISTER_TYPE_F; 1771b8e80941Smrg } else { 1772b8e80941Smrg /* For doubles we do the same but we need to consider: 1773b8e80941Smrg * 1774b8e80941Smrg * - We use a MOV with conditional_mod instead of a CMP so that we can 1775b8e80941Smrg * skip loading a 0.0 immediate. We use a source modifier on the 1776b8e80941Smrg * source of the MOV so that we flush denormalized values to 0. 1777b8e80941Smrg * Since we want to compare against 0, this won't alter the result. 1778b8e80941Smrg * - We need to extract the high 32-bit of each DF where the sign 1779b8e80941Smrg * is stored. 1780b8e80941Smrg * - We need to produce a DF result. 1781b8e80941Smrg */ 1782b8e80941Smrg 1783b8e80941Smrg /* Check for zero */ 1784b8e80941Smrg src_reg value = op[0]; 1785b8e80941Smrg value.abs = true; 1786b8e80941Smrg inst = emit(MOV(dst_null_df(), value)); 1787b8e80941Smrg inst->conditional_mod = BRW_CONDITIONAL_NZ; 1788b8e80941Smrg 1789b8e80941Smrg /* AND each high 32-bit channel with 0x80000000u */ 1790b8e80941Smrg dst_reg tmp = dst_reg(this, glsl_type::uvec4_type); 1791b8e80941Smrg emit(VEC4_OPCODE_PICK_HIGH_32BIT, tmp, op[0]); 1792b8e80941Smrg emit(AND(tmp, src_reg(tmp), brw_imm_ud(0x80000000u))); 1793b8e80941Smrg 1794b8e80941Smrg /* Add 1.0 to each channel, predicated to skip the cases where the 1795b8e80941Smrg * channel's value was 0 1796b8e80941Smrg */ 1797b8e80941Smrg inst = emit(OR(tmp, src_reg(tmp), brw_imm_ud(0x3f800000u))); 1798b8e80941Smrg inst->predicate = BRW_PREDICATE_NORMAL; 1799b8e80941Smrg 1800b8e80941Smrg /* Now convert the result from float to double */ 1801b8e80941Smrg emit_conversion_to_double(dst, retype(src_reg(tmp), 1802b8e80941Smrg BRW_REGISTER_TYPE_F), 1803b8e80941Smrg false); 1804b8e80941Smrg } 1805b8e80941Smrg break; 1806b8e80941Smrg 1807b8e80941Smrg case nir_op_ishl: 1808b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1809b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1810b8e80941Smrg emit(SHL(dst, op[0], op[1])); 1811b8e80941Smrg break; 1812b8e80941Smrg 1813b8e80941Smrg case nir_op_ishr: 1814b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1815b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1816b8e80941Smrg emit(ASR(dst, op[0], op[1])); 1817b8e80941Smrg break; 1818b8e80941Smrg 1819b8e80941Smrg case nir_op_ushr: 1820b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1821b8e80941Smrg try_immediate_source(instr, &op[1], devinfo); 1822b8e80941Smrg emit(SHR(dst, op[0], op[1])); 1823b8e80941Smrg break; 1824b8e80941Smrg 1825b8e80941Smrg case nir_op_ffma: 1826b8e80941Smrg if (type_sz(dst.type) == 8) { 1827b8e80941Smrg dst_reg mul_dst = dst_reg(this, glsl_type::dvec4_type); 1828b8e80941Smrg emit(MUL(mul_dst, op[1], op[0])); 1829b8e80941Smrg inst = emit(ADD(dst, src_reg(mul_dst), op[2])); 1830b8e80941Smrg inst->saturate = instr->dest.saturate; 1831b8e80941Smrg } else { 1832b8e80941Smrg op[0] = fix_3src_operand(op[0]); 1833b8e80941Smrg op[1] = fix_3src_operand(op[1]); 1834b8e80941Smrg op[2] = fix_3src_operand(op[2]); 1835b8e80941Smrg 1836b8e80941Smrg inst = emit(MAD(dst, op[2], op[1], op[0])); 1837b8e80941Smrg inst->saturate = instr->dest.saturate; 1838b8e80941Smrg } 1839b8e80941Smrg break; 1840b8e80941Smrg 1841b8e80941Smrg case nir_op_flrp: 1842b8e80941Smrg inst = emit_lrp(dst, op[0], op[1], op[2]); 1843b8e80941Smrg inst->saturate = instr->dest.saturate; 1844b8e80941Smrg break; 1845b8e80941Smrg 1846b8e80941Smrg case nir_op_b32csel: 1847b8e80941Smrg enum brw_predicate predicate; 1848b8e80941Smrg if (!optimize_predicate(instr, &predicate)) { 1849b8e80941Smrg emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); 1850b8e80941Smrg switch (dst.writemask) { 1851b8e80941Smrg case WRITEMASK_X: 1852b8e80941Smrg predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X; 1853b8e80941Smrg break; 1854b8e80941Smrg case WRITEMASK_Y: 1855b8e80941Smrg predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y; 1856b8e80941Smrg break; 1857b8e80941Smrg case WRITEMASK_Z: 1858b8e80941Smrg predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z; 1859b8e80941Smrg break; 1860b8e80941Smrg case WRITEMASK_W: 1861b8e80941Smrg predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W; 1862b8e80941Smrg break; 1863b8e80941Smrg default: 1864b8e80941Smrg predicate = BRW_PREDICATE_NORMAL; 1865b8e80941Smrg break; 1866b8e80941Smrg } 1867b8e80941Smrg } 1868b8e80941Smrg inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]); 1869b8e80941Smrg inst->predicate = predicate; 1870b8e80941Smrg break; 1871b8e80941Smrg 1872b8e80941Smrg case nir_op_fdot_replicated2: 1873b8e80941Smrg inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); 1874b8e80941Smrg inst->saturate = instr->dest.saturate; 1875b8e80941Smrg break; 1876b8e80941Smrg 1877b8e80941Smrg case nir_op_fdot_replicated3: 1878b8e80941Smrg inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); 1879b8e80941Smrg inst->saturate = instr->dest.saturate; 1880b8e80941Smrg break; 1881b8e80941Smrg 1882b8e80941Smrg case nir_op_fdot_replicated4: 1883b8e80941Smrg inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); 1884b8e80941Smrg inst->saturate = instr->dest.saturate; 1885b8e80941Smrg break; 1886b8e80941Smrg 1887b8e80941Smrg case nir_op_fdph_replicated: 1888b8e80941Smrg inst = emit(BRW_OPCODE_DPH, dst, op[0], op[1]); 1889b8e80941Smrg inst->saturate = instr->dest.saturate; 1890b8e80941Smrg break; 1891b8e80941Smrg 1892b8e80941Smrg case nir_op_iabs: 1893b8e80941Smrg case nir_op_ineg: 1894b8e80941Smrg assert(nir_dest_bit_size(instr->dest.dest) < 64); 1895b8e80941Smrg /* fall through */ 1896b8e80941Smrg case nir_op_fabs: 1897b8e80941Smrg case nir_op_fneg: 1898b8e80941Smrg case nir_op_fsat: 1899b8e80941Smrg unreachable("not reached: should be lowered by lower_source mods"); 1900b8e80941Smrg 1901b8e80941Smrg case nir_op_fdiv: 1902b8e80941Smrg unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler"); 1903b8e80941Smrg 1904b8e80941Smrg case nir_op_fmod: 1905b8e80941Smrg unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler"); 1906b8e80941Smrg 1907b8e80941Smrg case nir_op_fsub: 1908b8e80941Smrg case nir_op_isub: 1909b8e80941Smrg unreachable("not reached: should be handled by ir_sub_to_add_neg"); 1910b8e80941Smrg 1911b8e80941Smrg default: 1912b8e80941Smrg unreachable("Unimplemented ALU operation"); 1913b8e80941Smrg } 1914b8e80941Smrg 1915b8e80941Smrg /* If we need to do a boolean resolve, replace the result with -(x & 1) 1916b8e80941Smrg * to sign extend the low bit to 0/~0 1917b8e80941Smrg */ 1918b8e80941Smrg if (devinfo->gen <= 5 && 1919b8e80941Smrg (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == 1920b8e80941Smrg BRW_NIR_BOOLEAN_NEEDS_RESOLVE) { 1921b8e80941Smrg dst_reg masked = dst_reg(this, glsl_type::int_type); 1922b8e80941Smrg masked.writemask = dst.writemask; 1923b8e80941Smrg emit(AND(masked, src_reg(dst), brw_imm_d(1))); 1924b8e80941Smrg src_reg masked_neg = src_reg(masked); 1925b8e80941Smrg masked_neg.negate = true; 1926b8e80941Smrg emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg)); 1927b8e80941Smrg } 1928b8e80941Smrg} 1929b8e80941Smrg 1930b8e80941Smrgvoid 1931b8e80941Smrgvec4_visitor::nir_emit_jump(nir_jump_instr *instr) 1932b8e80941Smrg{ 1933b8e80941Smrg switch (instr->type) { 1934b8e80941Smrg case nir_jump_break: 1935b8e80941Smrg emit(BRW_OPCODE_BREAK); 1936b8e80941Smrg break; 1937b8e80941Smrg 1938b8e80941Smrg case nir_jump_continue: 1939b8e80941Smrg emit(BRW_OPCODE_CONTINUE); 1940b8e80941Smrg break; 1941b8e80941Smrg 1942b8e80941Smrg case nir_jump_return: 1943b8e80941Smrg /* fall through */ 1944b8e80941Smrg default: 1945b8e80941Smrg unreachable("unknown jump"); 1946b8e80941Smrg } 1947b8e80941Smrg} 1948b8e80941Smrg 1949b8e80941Smrgstatic enum ir_texture_opcode 1950b8e80941Smrgir_texture_opcode_for_nir_texop(nir_texop texop) 1951b8e80941Smrg{ 1952b8e80941Smrg enum ir_texture_opcode op; 1953b8e80941Smrg 1954b8e80941Smrg switch (texop) { 1955b8e80941Smrg case nir_texop_lod: op = ir_lod; break; 1956b8e80941Smrg case nir_texop_query_levels: op = ir_query_levels; break; 1957b8e80941Smrg case nir_texop_texture_samples: op = ir_texture_samples; break; 1958b8e80941Smrg case nir_texop_tex: op = ir_tex; break; 1959b8e80941Smrg case nir_texop_tg4: op = ir_tg4; break; 1960b8e80941Smrg case nir_texop_txb: op = ir_txb; break; 1961b8e80941Smrg case nir_texop_txd: op = ir_txd; break; 1962b8e80941Smrg case nir_texop_txf: op = ir_txf; break; 1963b8e80941Smrg case nir_texop_txf_ms: op = ir_txf_ms; break; 1964b8e80941Smrg case nir_texop_txl: op = ir_txl; break; 1965b8e80941Smrg case nir_texop_txs: op = ir_txs; break; 1966b8e80941Smrg case nir_texop_samples_identical: op = ir_samples_identical; break; 1967b8e80941Smrg default: 1968b8e80941Smrg unreachable("unknown texture opcode"); 1969b8e80941Smrg } 1970b8e80941Smrg 1971b8e80941Smrg return op; 1972b8e80941Smrg} 1973b8e80941Smrg 1974b8e80941Smrgstatic const glsl_type * 1975b8e80941Smrgglsl_type_for_nir_alu_type(nir_alu_type alu_type, 1976b8e80941Smrg unsigned components) 1977b8e80941Smrg{ 1978b8e80941Smrg return glsl_type::get_instance(brw_glsl_base_type_for_nir_type(alu_type), 1979b8e80941Smrg components, 1); 1980b8e80941Smrg} 1981b8e80941Smrg 1982b8e80941Smrgvoid 1983b8e80941Smrgvec4_visitor::nir_emit_texture(nir_tex_instr *instr) 1984b8e80941Smrg{ 1985b8e80941Smrg unsigned texture = instr->texture_index; 1986b8e80941Smrg unsigned sampler = instr->sampler_index; 1987b8e80941Smrg src_reg texture_reg = brw_imm_ud(texture); 1988b8e80941Smrg src_reg sampler_reg = brw_imm_ud(sampler); 1989b8e80941Smrg src_reg coordinate; 1990b8e80941Smrg const glsl_type *coord_type = NULL; 1991b8e80941Smrg src_reg shadow_comparator; 1992b8e80941Smrg src_reg offset_value; 1993b8e80941Smrg src_reg lod, lod2; 1994b8e80941Smrg src_reg sample_index; 1995b8e80941Smrg src_reg mcs; 1996b8e80941Smrg 1997b8e80941Smrg const glsl_type *dest_type = 1998b8e80941Smrg glsl_type_for_nir_alu_type(instr->dest_type, 1999b8e80941Smrg nir_tex_instr_dest_size(instr)); 2000b8e80941Smrg dst_reg dest = get_nir_dest(instr->dest, instr->dest_type); 2001b8e80941Smrg 2002b8e80941Smrg /* The hardware requires a LOD for buffer textures */ 2003b8e80941Smrg if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) 2004b8e80941Smrg lod = brw_imm_d(0); 2005b8e80941Smrg 2006b8e80941Smrg /* Load the texture operation sources */ 2007b8e80941Smrg uint32_t constant_offset = 0; 2008b8e80941Smrg for (unsigned i = 0; i < instr->num_srcs; i++) { 2009b8e80941Smrg switch (instr->src[i].src_type) { 2010b8e80941Smrg case nir_tex_src_comparator: 2011b8e80941Smrg shadow_comparator = get_nir_src(instr->src[i].src, 2012b8e80941Smrg BRW_REGISTER_TYPE_F, 1); 2013b8e80941Smrg break; 2014b8e80941Smrg 2015b8e80941Smrg case nir_tex_src_coord: { 2016b8e80941Smrg unsigned src_size = nir_tex_instr_src_size(instr, i); 2017b8e80941Smrg 2018b8e80941Smrg switch (instr->op) { 2019b8e80941Smrg case nir_texop_txf: 2020b8e80941Smrg case nir_texop_txf_ms: 2021b8e80941Smrg case nir_texop_samples_identical: 2022b8e80941Smrg coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2023b8e80941Smrg src_size); 2024b8e80941Smrg coord_type = glsl_type::ivec(src_size); 2025b8e80941Smrg break; 2026b8e80941Smrg 2027b8e80941Smrg default: 2028b8e80941Smrg coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 2029b8e80941Smrg src_size); 2030b8e80941Smrg coord_type = glsl_type::vec(src_size); 2031b8e80941Smrg break; 2032b8e80941Smrg } 2033b8e80941Smrg break; 2034b8e80941Smrg } 2035b8e80941Smrg 2036b8e80941Smrg case nir_tex_src_ddx: 2037b8e80941Smrg lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 2038b8e80941Smrg nir_tex_instr_src_size(instr, i)); 2039b8e80941Smrg break; 2040b8e80941Smrg 2041b8e80941Smrg case nir_tex_src_ddy: 2042b8e80941Smrg lod2 = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 2043b8e80941Smrg nir_tex_instr_src_size(instr, i)); 2044b8e80941Smrg break; 2045b8e80941Smrg 2046b8e80941Smrg case nir_tex_src_lod: 2047b8e80941Smrg switch (instr->op) { 2048b8e80941Smrg case nir_texop_txs: 2049b8e80941Smrg case nir_texop_txf: 2050b8e80941Smrg lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); 2051b8e80941Smrg break; 2052b8e80941Smrg 2053b8e80941Smrg default: 2054b8e80941Smrg lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 1); 2055b8e80941Smrg break; 2056b8e80941Smrg } 2057b8e80941Smrg break; 2058b8e80941Smrg 2059b8e80941Smrg case nir_tex_src_ms_index: { 2060b8e80941Smrg sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1); 2061b8e80941Smrg break; 2062b8e80941Smrg } 2063b8e80941Smrg 2064b8e80941Smrg case nir_tex_src_offset: 2065b8e80941Smrg if (!brw_texture_offset(instr, i, &constant_offset)) { 2066b8e80941Smrg offset_value = 2067b8e80941Smrg get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2); 2068b8e80941Smrg } 2069b8e80941Smrg break; 2070b8e80941Smrg 2071b8e80941Smrg case nir_tex_src_texture_offset: { 2072b8e80941Smrg /* Emit code to evaluate the actual indexing expression */ 2073b8e80941Smrg src_reg src = get_nir_src(instr->src[i].src, 1); 2074b8e80941Smrg src_reg temp(this, glsl_type::uint_type); 2075b8e80941Smrg emit(ADD(dst_reg(temp), src, brw_imm_ud(texture))); 2076b8e80941Smrg texture_reg = emit_uniformize(temp); 2077b8e80941Smrg break; 2078b8e80941Smrg } 2079b8e80941Smrg 2080b8e80941Smrg case nir_tex_src_sampler_offset: { 2081b8e80941Smrg /* Emit code to evaluate the actual indexing expression */ 2082b8e80941Smrg src_reg src = get_nir_src(instr->src[i].src, 1); 2083b8e80941Smrg src_reg temp(this, glsl_type::uint_type); 2084b8e80941Smrg emit(ADD(dst_reg(temp), src, brw_imm_ud(sampler))); 2085b8e80941Smrg sampler_reg = emit_uniformize(temp); 2086b8e80941Smrg break; 2087b8e80941Smrg } 2088b8e80941Smrg 2089b8e80941Smrg case nir_tex_src_projector: 2090b8e80941Smrg unreachable("Should be lowered by do_lower_texture_projection"); 2091b8e80941Smrg 2092b8e80941Smrg case nir_tex_src_bias: 2093b8e80941Smrg unreachable("LOD bias is not valid for vertex shaders.\n"); 2094b8e80941Smrg 2095b8e80941Smrg default: 2096b8e80941Smrg unreachable("unknown texture source"); 2097b8e80941Smrg } 2098b8e80941Smrg } 2099b8e80941Smrg 2100b8e80941Smrg if (instr->op == nir_texop_txf_ms || 2101b8e80941Smrg instr->op == nir_texop_samples_identical) { 2102b8e80941Smrg assert(coord_type != NULL); 2103b8e80941Smrg if (devinfo->gen >= 7 && 2104b8e80941Smrg key_tex->compressed_multisample_layout_mask & (1 << texture)) { 2105b8e80941Smrg mcs = emit_mcs_fetch(coord_type, coordinate, texture_reg); 2106b8e80941Smrg } else { 2107b8e80941Smrg mcs = brw_imm_ud(0u); 2108b8e80941Smrg } 2109b8e80941Smrg } 2110b8e80941Smrg 2111b8e80941Smrg /* Stuff the channel select bits in the top of the texture offset */ 2112b8e80941Smrg if (instr->op == nir_texop_tg4) { 2113b8e80941Smrg if (instr->component == 1 && 2114b8e80941Smrg (key_tex->gather_channel_quirk_mask & (1 << texture))) { 2115b8e80941Smrg /* gather4 sampler is broken for green channel on RG32F -- 2116b8e80941Smrg * we must ask for blue instead. 2117b8e80941Smrg */ 2118b8e80941Smrg constant_offset |= 2 << 16; 2119b8e80941Smrg } else { 2120b8e80941Smrg constant_offset |= instr->component << 16; 2121b8e80941Smrg } 2122b8e80941Smrg } 2123b8e80941Smrg 2124b8e80941Smrg ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); 2125b8e80941Smrg 2126b8e80941Smrg emit_texture(op, dest, dest_type, coordinate, instr->coord_components, 2127b8e80941Smrg shadow_comparator, 2128b8e80941Smrg lod, lod2, sample_index, 2129b8e80941Smrg constant_offset, offset_value, mcs, 2130b8e80941Smrg texture, texture_reg, sampler_reg); 2131b8e80941Smrg} 2132b8e80941Smrg 2133b8e80941Smrgvoid 2134b8e80941Smrgvec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr) 2135b8e80941Smrg{ 2136b8e80941Smrg nir_ssa_values[instr->def.index] = 2137b8e80941Smrg dst_reg(VGRF, alloc.allocate(DIV_ROUND_UP(instr->def.bit_size, 32))); 2138b8e80941Smrg} 2139b8e80941Smrg 2140b8e80941Smrg/* SIMD4x2 64bit data is stored in register space like this: 2141b8e80941Smrg * 2142b8e80941Smrg * r0.0:DF x0 y0 z0 w0 2143b8e80941Smrg * r1.0:DF x1 y1 z1 w1 2144b8e80941Smrg * 2145b8e80941Smrg * When we need to write data such as this to memory using 32-bit write 2146b8e80941Smrg * messages we need to shuffle it in this fashion: 2147b8e80941Smrg * 2148b8e80941Smrg * r0.0:DF x0 y0 x1 y1 (to be written at base offset) 2149b8e80941Smrg * r0.0:DF z0 w0 z1 w1 (to be written at base offset + 16) 2150b8e80941Smrg * 2151b8e80941Smrg * We need to do the inverse operation when we read using 32-bit messages, 2152b8e80941Smrg * which we can do by applying the same exact shuffling on the 64-bit data 2153b8e80941Smrg * read, only that because the data for each vertex is positioned differently 2154b8e80941Smrg * we need to apply different channel enables. 2155b8e80941Smrg * 2156b8e80941Smrg * This function takes 64bit data and shuffles it as explained above. 2157b8e80941Smrg * 2158b8e80941Smrg * The @for_write parameter is used to specify if the shuffling is being done 2159b8e80941Smrg * for proper SIMD4x2 64-bit data that needs to be shuffled prior to a 32-bit 2160b8e80941Smrg * write message (for_write = true), or instead we are doing the inverse 2161b8e80941Smrg * operation and we have just read 64-bit data using a 32-bit messages that we 2162b8e80941Smrg * need to shuffle to create valid SIMD4x2 64-bit data (for_write = false). 2163b8e80941Smrg * 2164b8e80941Smrg * If @block and @ref are non-NULL, then the shuffling is done after @ref, 2165b8e80941Smrg * otherwise the instructions are emitted normally at the end. The function 2166b8e80941Smrg * returns the last instruction inserted. 2167b8e80941Smrg * 2168b8e80941Smrg * Notice that @src and @dst cannot be the same register. 2169b8e80941Smrg */ 2170b8e80941Smrgvec4_instruction * 2171b8e80941Smrgvec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, 2172b8e80941Smrg bblock_t *block, vec4_instruction *ref) 2173b8e80941Smrg{ 2174b8e80941Smrg assert(type_sz(src.type) == 8); 2175b8e80941Smrg assert(type_sz(dst.type) == 8); 2176b8e80941Smrg assert(!regions_overlap(dst, 2 * REG_SIZE, src, 2 * REG_SIZE)); 2177b8e80941Smrg assert(!ref == !block); 2178b8e80941Smrg 2179b8e80941Smrg const vec4_builder bld = !ref ? vec4_builder(this).at_end() : 2180b8e80941Smrg vec4_builder(this).at(block, ref->next); 2181b8e80941Smrg 2182b8e80941Smrg /* Resolve swizzle in src */ 2183b8e80941Smrg vec4_instruction *inst; 2184b8e80941Smrg if (src.swizzle != BRW_SWIZZLE_XYZW) { 2185b8e80941Smrg dst_reg data = dst_reg(this, glsl_type::dvec4_type); 2186b8e80941Smrg inst = bld.MOV(data, src); 2187b8e80941Smrg src = src_reg(data); 2188b8e80941Smrg } 2189b8e80941Smrg 2190b8e80941Smrg /* dst+0.XY = src+0.XY */ 2191b8e80941Smrg inst = bld.group(4, 0).MOV(writemask(dst, WRITEMASK_XY), src); 2192b8e80941Smrg 2193b8e80941Smrg /* dst+0.ZW = src+1.XY */ 2194b8e80941Smrg inst = bld.group(4, for_write ? 1 : 0) 2195b8e80941Smrg .MOV(writemask(dst, WRITEMASK_ZW), 2196b8e80941Smrg swizzle(byte_offset(src, REG_SIZE), BRW_SWIZZLE_XYXY)); 2197b8e80941Smrg 2198b8e80941Smrg /* dst+1.XY = src+0.ZW */ 2199b8e80941Smrg inst = bld.group(4, for_write ? 0 : 1) 2200b8e80941Smrg .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY), 2201b8e80941Smrg swizzle(src, BRW_SWIZZLE_ZWZW)); 2202b8e80941Smrg 2203b8e80941Smrg /* dst+1.ZW = src+1.ZW */ 2204b8e80941Smrg inst = bld.group(4, 1) 2205b8e80941Smrg .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW), 2206b8e80941Smrg byte_offset(src, REG_SIZE)); 2207b8e80941Smrg 2208b8e80941Smrg return inst; 2209b8e80941Smrg} 2210b8e80941Smrg 2211b8e80941Smrg} 2212