1b8e80941Smrg/* -*- c++ -*- */ 2b8e80941Smrg/* 3b8e80941Smrg * Copyright © 2010-2015 Intel Corporation 4b8e80941Smrg * 5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 7b8e80941Smrg * to deal in the Software without restriction, including without limitation 8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 10b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * The above copyright notice and this permission notice (including the next 13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 14b8e80941Smrg * Software. 15b8e80941Smrg * 16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22b8e80941Smrg * IN THE SOFTWARE. 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#ifndef BRW_VEC4_BUILDER_H 26b8e80941Smrg#define BRW_VEC4_BUILDER_H 27b8e80941Smrg 28b8e80941Smrg#include "brw_ir_vec4.h" 29b8e80941Smrg#include "brw_ir_allocator.h" 30b8e80941Smrg 31b8e80941Smrgnamespace brw { 32b8e80941Smrg /** 33b8e80941Smrg * Toolbox to assemble a VEC4 IR program out of individual instructions. 34b8e80941Smrg * 35b8e80941Smrg * This object is meant to have an interface consistent with 36b8e80941Smrg * brw::fs_builder. They cannot be fully interchangeable because 37b8e80941Smrg * brw::fs_builder generates scalar code while brw::vec4_builder generates 38b8e80941Smrg * vector code. 39b8e80941Smrg */ 40b8e80941Smrg class vec4_builder { 41b8e80941Smrg public: 42b8e80941Smrg /** Type used in this IR to represent a source of an instruction. */ 43b8e80941Smrg typedef brw::src_reg src_reg; 44b8e80941Smrg 45b8e80941Smrg /** Type used in this IR to represent the destination of an instruction. */ 46b8e80941Smrg typedef brw::dst_reg dst_reg; 47b8e80941Smrg 48b8e80941Smrg /** Type used in this IR to represent an instruction. */ 49b8e80941Smrg typedef vec4_instruction instruction; 50b8e80941Smrg 51b8e80941Smrg /** 52b8e80941Smrg * Construct a vec4_builder that inserts instructions into \p shader. 53b8e80941Smrg */ 54b8e80941Smrg vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) : 55b8e80941Smrg shader(shader), block(NULL), cursor(NULL), 56b8e80941Smrg _dispatch_width(dispatch_width), _group(0), 57b8e80941Smrg force_writemask_all(false), 58b8e80941Smrg annotation() 59b8e80941Smrg { 60b8e80941Smrg } 61b8e80941Smrg 62b8e80941Smrg /** 63b8e80941Smrg * Construct a vec4_builder that inserts instructions into \p shader 64b8e80941Smrg * before instruction \p inst in basic block \p block. The default 65b8e80941Smrg * execution controls and debug annotation are initialized from the 66b8e80941Smrg * instruction passed as argument. 67b8e80941Smrg */ 68b8e80941Smrg vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) : 69b8e80941Smrg shader(shader), block(block), cursor(inst), 70b8e80941Smrg _dispatch_width(inst->exec_size), _group(inst->group), 71b8e80941Smrg force_writemask_all(inst->force_writemask_all) 72b8e80941Smrg { 73b8e80941Smrg annotation.str = inst->annotation; 74b8e80941Smrg annotation.ir = inst->ir; 75b8e80941Smrg } 76b8e80941Smrg 77b8e80941Smrg /** 78b8e80941Smrg * Construct a vec4_builder that inserts instructions before \p cursor 79b8e80941Smrg * in basic block \p block, inheriting other code generation parameters 80b8e80941Smrg * from this. 81b8e80941Smrg */ 82b8e80941Smrg vec4_builder 83b8e80941Smrg at(bblock_t *block, exec_node *cursor) const 84b8e80941Smrg { 85b8e80941Smrg vec4_builder bld = *this; 86b8e80941Smrg bld.block = block; 87b8e80941Smrg bld.cursor = cursor; 88b8e80941Smrg return bld; 89b8e80941Smrg } 90b8e80941Smrg 91b8e80941Smrg /** 92b8e80941Smrg * Construct a vec4_builder appending instructions at the end of the 93b8e80941Smrg * instruction list of the shader, inheriting other code generation 94b8e80941Smrg * parameters from this. 95b8e80941Smrg */ 96b8e80941Smrg vec4_builder 97b8e80941Smrg at_end() const 98b8e80941Smrg { 99b8e80941Smrg return at(NULL, (exec_node *)&shader->instructions.tail_sentinel); 100b8e80941Smrg } 101b8e80941Smrg 102b8e80941Smrg /** 103b8e80941Smrg * Construct a builder specifying the default SIMD width and group of 104b8e80941Smrg * channel enable signals, inheriting other code generation parameters 105b8e80941Smrg * from this. 106b8e80941Smrg * 107b8e80941Smrg * \p n gives the default SIMD width, \p i gives the slot group used for 108b8e80941Smrg * predication and control flow masking in multiples of \p n channels. 109b8e80941Smrg */ 110b8e80941Smrg vec4_builder 111b8e80941Smrg group(unsigned n, unsigned i) const 112b8e80941Smrg { 113b8e80941Smrg assert(force_writemask_all || 114b8e80941Smrg (n <= dispatch_width() && i < dispatch_width() / n)); 115b8e80941Smrg vec4_builder bld = *this; 116b8e80941Smrg bld._dispatch_width = n; 117b8e80941Smrg bld._group += i * n; 118b8e80941Smrg return bld; 119b8e80941Smrg } 120b8e80941Smrg 121b8e80941Smrg /** 122b8e80941Smrg * Construct a builder with per-channel control flow execution masking 123b8e80941Smrg * disabled if \p b is true. If control flow execution masking is 124b8e80941Smrg * already disabled this has no effect. 125b8e80941Smrg */ 126b8e80941Smrg vec4_builder 127b8e80941Smrg exec_all(bool b = true) const 128b8e80941Smrg { 129b8e80941Smrg vec4_builder bld = *this; 130b8e80941Smrg if (b) 131b8e80941Smrg bld.force_writemask_all = true; 132b8e80941Smrg return bld; 133b8e80941Smrg } 134b8e80941Smrg 135b8e80941Smrg /** 136b8e80941Smrg * Construct a builder with the given debug annotation info. 137b8e80941Smrg */ 138b8e80941Smrg vec4_builder 139b8e80941Smrg annotate(const char *str, const void *ir = NULL) const 140b8e80941Smrg { 141b8e80941Smrg vec4_builder bld = *this; 142b8e80941Smrg bld.annotation.str = str; 143b8e80941Smrg bld.annotation.ir = ir; 144b8e80941Smrg return bld; 145b8e80941Smrg } 146b8e80941Smrg 147b8e80941Smrg /** 148b8e80941Smrg * Get the SIMD width in use. 149b8e80941Smrg */ 150b8e80941Smrg unsigned 151b8e80941Smrg dispatch_width() const 152b8e80941Smrg { 153b8e80941Smrg return _dispatch_width; 154b8e80941Smrg } 155b8e80941Smrg 156b8e80941Smrg /** 157b8e80941Smrg * Get the channel group in use. 158b8e80941Smrg */ 159b8e80941Smrg unsigned 160b8e80941Smrg group() const 161b8e80941Smrg { 162b8e80941Smrg return _group; 163b8e80941Smrg } 164b8e80941Smrg 165b8e80941Smrg /** 166b8e80941Smrg * Allocate a virtual register of natural vector size (four for this IR) 167b8e80941Smrg * and SIMD width. \p n gives the amount of space to allocate in 168b8e80941Smrg * dispatch_width units (which is just enough space for four logical 169b8e80941Smrg * components in this IR). 170b8e80941Smrg */ 171b8e80941Smrg dst_reg 172b8e80941Smrg vgrf(enum brw_reg_type type, unsigned n = 1) const 173b8e80941Smrg { 174b8e80941Smrg assert(dispatch_width() <= 32); 175b8e80941Smrg 176b8e80941Smrg if (n > 0) 177b8e80941Smrg return retype(dst_reg(VGRF, shader->alloc.allocate( 178b8e80941Smrg n * DIV_ROUND_UP(type_sz(type), 4))), 179b8e80941Smrg type); 180b8e80941Smrg else 181b8e80941Smrg return retype(null_reg_ud(), type); 182b8e80941Smrg } 183b8e80941Smrg 184b8e80941Smrg /** 185b8e80941Smrg * Create a null register of floating type. 186b8e80941Smrg */ 187b8e80941Smrg dst_reg 188b8e80941Smrg null_reg_f() const 189b8e80941Smrg { 190b8e80941Smrg return dst_reg(retype(brw_null_vec(dispatch_width()), 191b8e80941Smrg BRW_REGISTER_TYPE_F)); 192b8e80941Smrg } 193b8e80941Smrg 194b8e80941Smrg /** 195b8e80941Smrg * Create a null register of signed integer type. 196b8e80941Smrg */ 197b8e80941Smrg dst_reg 198b8e80941Smrg null_reg_d() const 199b8e80941Smrg { 200b8e80941Smrg return dst_reg(retype(brw_null_vec(dispatch_width()), 201b8e80941Smrg BRW_REGISTER_TYPE_D)); 202b8e80941Smrg } 203b8e80941Smrg 204b8e80941Smrg /** 205b8e80941Smrg * Create a null register of unsigned integer type. 206b8e80941Smrg */ 207b8e80941Smrg dst_reg 208b8e80941Smrg null_reg_ud() const 209b8e80941Smrg { 210b8e80941Smrg return dst_reg(retype(brw_null_vec(dispatch_width()), 211b8e80941Smrg BRW_REGISTER_TYPE_UD)); 212b8e80941Smrg } 213b8e80941Smrg 214b8e80941Smrg /** 215b8e80941Smrg * Insert an instruction into the program. 216b8e80941Smrg */ 217b8e80941Smrg instruction * 218b8e80941Smrg emit(const instruction &inst) const 219b8e80941Smrg { 220b8e80941Smrg return emit(new(shader->mem_ctx) instruction(inst)); 221b8e80941Smrg } 222b8e80941Smrg 223b8e80941Smrg /** 224b8e80941Smrg * Create and insert a nullary control instruction into the program. 225b8e80941Smrg */ 226b8e80941Smrg instruction * 227b8e80941Smrg emit(enum opcode opcode) const 228b8e80941Smrg { 229b8e80941Smrg return emit(instruction(opcode)); 230b8e80941Smrg } 231b8e80941Smrg 232b8e80941Smrg /** 233b8e80941Smrg * Create and insert a nullary instruction into the program. 234b8e80941Smrg */ 235b8e80941Smrg instruction * 236b8e80941Smrg emit(enum opcode opcode, const dst_reg &dst) const 237b8e80941Smrg { 238b8e80941Smrg return emit(instruction(opcode, dst)); 239b8e80941Smrg } 240b8e80941Smrg 241b8e80941Smrg /** 242b8e80941Smrg * Create and insert a unary instruction into the program. 243b8e80941Smrg */ 244b8e80941Smrg instruction * 245b8e80941Smrg emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const 246b8e80941Smrg { 247b8e80941Smrg switch (opcode) { 248b8e80941Smrg case SHADER_OPCODE_RCP: 249b8e80941Smrg case SHADER_OPCODE_RSQ: 250b8e80941Smrg case SHADER_OPCODE_SQRT: 251b8e80941Smrg case SHADER_OPCODE_EXP2: 252b8e80941Smrg case SHADER_OPCODE_LOG2: 253b8e80941Smrg case SHADER_OPCODE_SIN: 254b8e80941Smrg case SHADER_OPCODE_COS: 255b8e80941Smrg return fix_math_instruction( 256b8e80941Smrg emit(instruction(opcode, dst, 257b8e80941Smrg fix_math_operand(src0)))); 258b8e80941Smrg 259b8e80941Smrg default: 260b8e80941Smrg return emit(instruction(opcode, dst, src0)); 261b8e80941Smrg } 262b8e80941Smrg } 263b8e80941Smrg 264b8e80941Smrg /** 265b8e80941Smrg * Create and insert a binary instruction into the program. 266b8e80941Smrg */ 267b8e80941Smrg instruction * 268b8e80941Smrg emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 269b8e80941Smrg const src_reg &src1) const 270b8e80941Smrg { 271b8e80941Smrg switch (opcode) { 272b8e80941Smrg case SHADER_OPCODE_POW: 273b8e80941Smrg case SHADER_OPCODE_INT_QUOTIENT: 274b8e80941Smrg case SHADER_OPCODE_INT_REMAINDER: 275b8e80941Smrg return fix_math_instruction( 276b8e80941Smrg emit(instruction(opcode, dst, 277b8e80941Smrg fix_math_operand(src0), 278b8e80941Smrg fix_math_operand(src1)))); 279b8e80941Smrg 280b8e80941Smrg default: 281b8e80941Smrg return emit(instruction(opcode, dst, src0, src1)); 282b8e80941Smrg } 283b8e80941Smrg } 284b8e80941Smrg 285b8e80941Smrg /** 286b8e80941Smrg * Create and insert a ternary instruction into the program. 287b8e80941Smrg */ 288b8e80941Smrg instruction * 289b8e80941Smrg emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 290b8e80941Smrg const src_reg &src1, const src_reg &src2) const 291b8e80941Smrg { 292b8e80941Smrg switch (opcode) { 293b8e80941Smrg case BRW_OPCODE_BFE: 294b8e80941Smrg case BRW_OPCODE_BFI2: 295b8e80941Smrg case BRW_OPCODE_MAD: 296b8e80941Smrg case BRW_OPCODE_LRP: 297b8e80941Smrg return emit(instruction(opcode, dst, 298b8e80941Smrg fix_3src_operand(src0), 299b8e80941Smrg fix_3src_operand(src1), 300b8e80941Smrg fix_3src_operand(src2))); 301b8e80941Smrg 302b8e80941Smrg default: 303b8e80941Smrg return emit(instruction(opcode, dst, src0, src1, src2)); 304b8e80941Smrg } 305b8e80941Smrg } 306b8e80941Smrg 307b8e80941Smrg /** 308b8e80941Smrg * Insert a preallocated instruction into the program. 309b8e80941Smrg */ 310b8e80941Smrg instruction * 311b8e80941Smrg emit(instruction *inst) const 312b8e80941Smrg { 313b8e80941Smrg inst->exec_size = dispatch_width(); 314b8e80941Smrg inst->group = group(); 315b8e80941Smrg inst->force_writemask_all = force_writemask_all; 316b8e80941Smrg inst->size_written = inst->exec_size * type_sz(inst->dst.type); 317b8e80941Smrg inst->annotation = annotation.str; 318b8e80941Smrg inst->ir = annotation.ir; 319b8e80941Smrg 320b8e80941Smrg if (block) 321b8e80941Smrg static_cast<instruction *>(cursor)->insert_before(block, inst); 322b8e80941Smrg else 323b8e80941Smrg cursor->insert_before(inst); 324b8e80941Smrg 325b8e80941Smrg return inst; 326b8e80941Smrg } 327b8e80941Smrg 328b8e80941Smrg /** 329b8e80941Smrg * Select \p src0 if the comparison of both sources with the given 330b8e80941Smrg * conditional mod evaluates to true, otherwise select \p src1. 331b8e80941Smrg * 332b8e80941Smrg * Generally useful to get the minimum or maximum of two values. 333b8e80941Smrg */ 334b8e80941Smrg instruction * 335b8e80941Smrg emit_minmax(const dst_reg &dst, const src_reg &src0, 336b8e80941Smrg const src_reg &src1, brw_conditional_mod mod) const 337b8e80941Smrg { 338b8e80941Smrg assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); 339b8e80941Smrg 340b8e80941Smrg return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), 341b8e80941Smrg fix_unsigned_negate(src1))); 342b8e80941Smrg } 343b8e80941Smrg 344b8e80941Smrg /** 345b8e80941Smrg * Copy any live channel from \p src to the first channel of the result. 346b8e80941Smrg */ 347b8e80941Smrg src_reg 348b8e80941Smrg emit_uniformize(const src_reg &src) const 349b8e80941Smrg { 350b8e80941Smrg const vec4_builder ubld = exec_all(); 351b8e80941Smrg const dst_reg chan_index = 352b8e80941Smrg writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X); 353b8e80941Smrg const dst_reg dst = vgrf(src.type); 354b8e80941Smrg 355b8e80941Smrg ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); 356b8e80941Smrg ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index)); 357b8e80941Smrg 358b8e80941Smrg return src_reg(dst); 359b8e80941Smrg } 360b8e80941Smrg 361b8e80941Smrg /** 362b8e80941Smrg * Assorted arithmetic ops. 363b8e80941Smrg * @{ 364b8e80941Smrg */ 365b8e80941Smrg#define ALU1(op) \ 366b8e80941Smrg instruction * \ 367b8e80941Smrg op(const dst_reg &dst, const src_reg &src0) const \ 368b8e80941Smrg { \ 369b8e80941Smrg return emit(BRW_OPCODE_##op, dst, src0); \ 370b8e80941Smrg } 371b8e80941Smrg 372b8e80941Smrg#define ALU2(op) \ 373b8e80941Smrg instruction * \ 374b8e80941Smrg op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ 375b8e80941Smrg { \ 376b8e80941Smrg return emit(BRW_OPCODE_##op, dst, src0, src1); \ 377b8e80941Smrg } 378b8e80941Smrg 379b8e80941Smrg#define ALU2_ACC(op) \ 380b8e80941Smrg instruction * \ 381b8e80941Smrg op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ 382b8e80941Smrg { \ 383b8e80941Smrg instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \ 384b8e80941Smrg inst->writes_accumulator = true; \ 385b8e80941Smrg return inst; \ 386b8e80941Smrg } 387b8e80941Smrg 388b8e80941Smrg#define ALU3(op) \ 389b8e80941Smrg instruction * \ 390b8e80941Smrg op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \ 391b8e80941Smrg const src_reg &src2) const \ 392b8e80941Smrg { \ 393b8e80941Smrg return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \ 394b8e80941Smrg } 395b8e80941Smrg 396b8e80941Smrg ALU2(ADD) 397b8e80941Smrg ALU2_ACC(ADDC) 398b8e80941Smrg ALU2(AND) 399b8e80941Smrg ALU2(ASR) 400b8e80941Smrg ALU2(AVG) 401b8e80941Smrg ALU3(BFE) 402b8e80941Smrg ALU2(BFI1) 403b8e80941Smrg ALU3(BFI2) 404b8e80941Smrg ALU1(BFREV) 405b8e80941Smrg ALU1(CBIT) 406b8e80941Smrg ALU2(CMPN) 407b8e80941Smrg ALU3(CSEL) 408b8e80941Smrg ALU1(DIM) 409b8e80941Smrg ALU2(DP2) 410b8e80941Smrg ALU2(DP3) 411b8e80941Smrg ALU2(DP4) 412b8e80941Smrg ALU2(DPH) 413b8e80941Smrg ALU1(F16TO32) 414b8e80941Smrg ALU1(F32TO16) 415b8e80941Smrg ALU1(FBH) 416b8e80941Smrg ALU1(FBL) 417b8e80941Smrg ALU1(FRC) 418b8e80941Smrg ALU2(LINE) 419b8e80941Smrg ALU1(LZD) 420b8e80941Smrg ALU2(MAC) 421b8e80941Smrg ALU2_ACC(MACH) 422b8e80941Smrg ALU3(MAD) 423b8e80941Smrg ALU1(MOV) 424b8e80941Smrg ALU2(MUL) 425b8e80941Smrg ALU1(NOT) 426b8e80941Smrg ALU2(OR) 427b8e80941Smrg ALU2(PLN) 428b8e80941Smrg ALU1(RNDD) 429b8e80941Smrg ALU1(RNDE) 430b8e80941Smrg ALU1(RNDU) 431b8e80941Smrg ALU1(RNDZ) 432b8e80941Smrg ALU2(SAD2) 433b8e80941Smrg ALU2_ACC(SADA2) 434b8e80941Smrg ALU2(SEL) 435b8e80941Smrg ALU2(SHL) 436b8e80941Smrg ALU2(SHR) 437b8e80941Smrg ALU2_ACC(SUBB) 438b8e80941Smrg ALU2(XOR) 439b8e80941Smrg 440b8e80941Smrg#undef ALU3 441b8e80941Smrg#undef ALU2_ACC 442b8e80941Smrg#undef ALU2 443b8e80941Smrg#undef ALU1 444b8e80941Smrg /** @} */ 445b8e80941Smrg 446b8e80941Smrg /** 447b8e80941Smrg * CMP: Sets the low bit of the destination channels with the result 448b8e80941Smrg * of the comparison, while the upper bits are undefined, and updates 449b8e80941Smrg * the flag register with the packed 16 bits of the result. 450b8e80941Smrg */ 451b8e80941Smrg instruction * 452b8e80941Smrg CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1, 453b8e80941Smrg brw_conditional_mod condition) const 454b8e80941Smrg { 455b8e80941Smrg /* Take the instruction: 456b8e80941Smrg * 457b8e80941Smrg * CMP null<d> src0<f> src1<f> 458b8e80941Smrg * 459b8e80941Smrg * Original gen4 does type conversion to the destination type 460b8e80941Smrg * before comparison, producing garbage results for floating 461b8e80941Smrg * point comparisons. 462b8e80941Smrg * 463b8e80941Smrg * The destination type doesn't matter on newer generations, 464b8e80941Smrg * so we set the type to match src0 so we can compact the 465b8e80941Smrg * instruction. 466b8e80941Smrg */ 467b8e80941Smrg return set_condmod(condition, 468b8e80941Smrg emit(BRW_OPCODE_CMP, retype(dst, src0.type), 469b8e80941Smrg fix_unsigned_negate(src0), 470b8e80941Smrg fix_unsigned_negate(src1))); 471b8e80941Smrg } 472b8e80941Smrg 473b8e80941Smrg /** 474b8e80941Smrg * Gen4 predicated IF. 475b8e80941Smrg */ 476b8e80941Smrg instruction * 477b8e80941Smrg IF(brw_predicate predicate) const 478b8e80941Smrg { 479b8e80941Smrg return set_predicate(predicate, emit(BRW_OPCODE_IF)); 480b8e80941Smrg } 481b8e80941Smrg 482b8e80941Smrg /** 483b8e80941Smrg * Gen6 IF with embedded comparison. 484b8e80941Smrg */ 485b8e80941Smrg instruction * 486b8e80941Smrg IF(const src_reg &src0, const src_reg &src1, 487b8e80941Smrg brw_conditional_mod condition) const 488b8e80941Smrg { 489b8e80941Smrg assert(shader->devinfo->gen == 6); 490b8e80941Smrg return set_condmod(condition, 491b8e80941Smrg emit(BRW_OPCODE_IF, 492b8e80941Smrg null_reg_d(), 493b8e80941Smrg fix_unsigned_negate(src0), 494b8e80941Smrg fix_unsigned_negate(src1))); 495b8e80941Smrg } 496b8e80941Smrg 497b8e80941Smrg /** 498b8e80941Smrg * Emit a linear interpolation instruction. 499b8e80941Smrg */ 500b8e80941Smrg instruction * 501b8e80941Smrg LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, 502b8e80941Smrg const src_reg &a) const 503b8e80941Smrg { 504b8e80941Smrg if (shader->devinfo->gen >= 6 && shader->devinfo->gen <= 10) { 505b8e80941Smrg /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so 506b8e80941Smrg * we need to reorder the operands. 507b8e80941Smrg */ 508b8e80941Smrg return emit(BRW_OPCODE_LRP, dst, a, y, x); 509b8e80941Smrg 510b8e80941Smrg } else { 511b8e80941Smrg /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ 512b8e80941Smrg const dst_reg y_times_a = vgrf(dst.type); 513b8e80941Smrg const dst_reg one_minus_a = vgrf(dst.type); 514b8e80941Smrg const dst_reg x_times_one_minus_a = vgrf(dst.type); 515b8e80941Smrg 516b8e80941Smrg MUL(y_times_a, y, a); 517b8e80941Smrg ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); 518b8e80941Smrg MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); 519b8e80941Smrg return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); 520b8e80941Smrg } 521b8e80941Smrg } 522b8e80941Smrg 523b8e80941Smrg backend_shader *shader; 524b8e80941Smrg 525b8e80941Smrg protected: 526b8e80941Smrg /** 527b8e80941Smrg * Workaround for negation of UD registers. See comment in 528b8e80941Smrg * fs_generator::generate_code() for the details. 529b8e80941Smrg */ 530b8e80941Smrg src_reg 531b8e80941Smrg fix_unsigned_negate(const src_reg &src) const 532b8e80941Smrg { 533b8e80941Smrg if (src.type == BRW_REGISTER_TYPE_UD && src.negate) { 534b8e80941Smrg dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD); 535b8e80941Smrg MOV(temp, src); 536b8e80941Smrg return src_reg(temp); 537b8e80941Smrg } else { 538b8e80941Smrg return src; 539b8e80941Smrg } 540b8e80941Smrg } 541b8e80941Smrg 542b8e80941Smrg /** 543b8e80941Smrg * Workaround for register access modes not supported by the ternary 544b8e80941Smrg * instruction encoding. 545b8e80941Smrg */ 546b8e80941Smrg src_reg 547b8e80941Smrg fix_3src_operand(const src_reg &src) const 548b8e80941Smrg { 549b8e80941Smrg /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be 550b8e80941Smrg * able to use vertical stride of zero to replicate the vec4 uniform, like 551b8e80941Smrg * 552b8e80941Smrg * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7] 553b8e80941Smrg * 554b8e80941Smrg * But you can't, since vertical stride is always four in three-source 555b8e80941Smrg * instructions. Instead, insert a MOV instruction to do the replication so 556b8e80941Smrg * that the three-source instruction can consume it. 557b8e80941Smrg */ 558b8e80941Smrg 559b8e80941Smrg /* The MOV is only needed if the source is a uniform or immediate. */ 560b8e80941Smrg if (src.file != UNIFORM && src.file != IMM) 561b8e80941Smrg return src; 562b8e80941Smrg 563b8e80941Smrg if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle)) 564b8e80941Smrg return src; 565b8e80941Smrg 566b8e80941Smrg const dst_reg expanded = vgrf(src.type); 567b8e80941Smrg emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src); 568b8e80941Smrg return src_reg(expanded); 569b8e80941Smrg } 570b8e80941Smrg 571b8e80941Smrg /** 572b8e80941Smrg * Workaround for register access modes not supported by the math 573b8e80941Smrg * instruction. 574b8e80941Smrg */ 575b8e80941Smrg src_reg 576b8e80941Smrg fix_math_operand(const src_reg &src) const 577b8e80941Smrg { 578b8e80941Smrg /* The gen6 math instruction ignores the source modifiers -- 579b8e80941Smrg * swizzle, abs, negate, and at least some parts of the register 580b8e80941Smrg * region description. 581b8e80941Smrg * 582b8e80941Smrg * Rather than trying to enumerate all these cases, *always* expand the 583b8e80941Smrg * operand to a temp GRF for gen6. 584b8e80941Smrg * 585b8e80941Smrg * For gen7, keep the operand as-is, except if immediate, which gen7 still 586b8e80941Smrg * can't use. 587b8e80941Smrg */ 588b8e80941Smrg if (shader->devinfo->gen == 6 || 589b8e80941Smrg (shader->devinfo->gen == 7 && src.file == IMM)) { 590b8e80941Smrg const dst_reg tmp = vgrf(src.type); 591b8e80941Smrg MOV(tmp, src); 592b8e80941Smrg return src_reg(tmp); 593b8e80941Smrg } else { 594b8e80941Smrg return src; 595b8e80941Smrg } 596b8e80941Smrg } 597b8e80941Smrg 598b8e80941Smrg /** 599b8e80941Smrg * Workaround other weirdness of the math instruction. 600b8e80941Smrg */ 601b8e80941Smrg instruction * 602b8e80941Smrg fix_math_instruction(instruction *inst) const 603b8e80941Smrg { 604b8e80941Smrg if (shader->devinfo->gen == 6 && 605b8e80941Smrg inst->dst.writemask != WRITEMASK_XYZW) { 606b8e80941Smrg const dst_reg tmp = vgrf(inst->dst.type); 607b8e80941Smrg MOV(inst->dst, src_reg(tmp)); 608b8e80941Smrg inst->dst = tmp; 609b8e80941Smrg 610b8e80941Smrg } else if (shader->devinfo->gen < 6) { 611b8e80941Smrg const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2); 612b8e80941Smrg inst->base_mrf = 1; 613b8e80941Smrg inst->mlen = sources; 614b8e80941Smrg } 615b8e80941Smrg 616b8e80941Smrg return inst; 617b8e80941Smrg } 618b8e80941Smrg 619b8e80941Smrg bblock_t *block; 620b8e80941Smrg exec_node *cursor; 621b8e80941Smrg 622b8e80941Smrg unsigned _dispatch_width; 623b8e80941Smrg unsigned _group; 624b8e80941Smrg bool force_writemask_all; 625b8e80941Smrg 626b8e80941Smrg /** Debug annotation info. */ 627b8e80941Smrg struct { 628b8e80941Smrg const char *str; 629b8e80941Smrg const void *ir; 630b8e80941Smrg } annotation; 631b8e80941Smrg }; 632b8e80941Smrg} 633b8e80941Smrg 634b8e80941Smrg#endif 635