101e04c3fSmrg/* -*- c++ -*- */ 201e04c3fSmrg/* 301e04c3fSmrg * Copyright © 2010-2015 Intel Corporation 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2201e04c3fSmrg * IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg#ifndef BRW_VEC4_BUILDER_H 2601e04c3fSmrg#define BRW_VEC4_BUILDER_H 2701e04c3fSmrg 2801e04c3fSmrg#include "brw_ir_vec4.h" 2901e04c3fSmrg#include "brw_ir_allocator.h" 3001e04c3fSmrg 3101e04c3fSmrgnamespace brw { 3201e04c3fSmrg /** 3301e04c3fSmrg * Toolbox to assemble a VEC4 IR program out of individual instructions. 3401e04c3fSmrg * 3501e04c3fSmrg * This object is meant to have an interface consistent with 3601e04c3fSmrg * brw::fs_builder. They cannot be fully interchangeable because 3701e04c3fSmrg * brw::fs_builder generates scalar code while brw::vec4_builder generates 3801e04c3fSmrg * vector code. 3901e04c3fSmrg */ 4001e04c3fSmrg class vec4_builder { 4101e04c3fSmrg public: 4201e04c3fSmrg /** Type used in this IR to represent a source of an instruction. */ 4301e04c3fSmrg typedef brw::src_reg src_reg; 4401e04c3fSmrg 4501e04c3fSmrg /** Type used in this IR to represent the destination of an instruction. */ 4601e04c3fSmrg typedef brw::dst_reg dst_reg; 4701e04c3fSmrg 4801e04c3fSmrg /** Type used in this IR to represent an instruction. */ 4901e04c3fSmrg typedef vec4_instruction instruction; 5001e04c3fSmrg 5101e04c3fSmrg /** 5201e04c3fSmrg * Construct a vec4_builder that inserts instructions into \p shader. 5301e04c3fSmrg */ 5401e04c3fSmrg vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) : 5501e04c3fSmrg shader(shader), block(NULL), cursor(NULL), 5601e04c3fSmrg _dispatch_width(dispatch_width), _group(0), 5701e04c3fSmrg force_writemask_all(false), 5801e04c3fSmrg annotation() 5901e04c3fSmrg { 6001e04c3fSmrg } 6101e04c3fSmrg 6201e04c3fSmrg /** 6301e04c3fSmrg * Construct a vec4_builder that inserts instructions into \p shader 6401e04c3fSmrg * before instruction \p inst in basic block \p block. The default 6501e04c3fSmrg * execution controls and debug annotation are initialized from the 6601e04c3fSmrg * instruction passed as argument. 6701e04c3fSmrg */ 6801e04c3fSmrg vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) : 6901e04c3fSmrg shader(shader), block(block), cursor(inst), 7001e04c3fSmrg _dispatch_width(inst->exec_size), _group(inst->group), 7101e04c3fSmrg force_writemask_all(inst->force_writemask_all) 7201e04c3fSmrg { 7301e04c3fSmrg annotation.str = inst->annotation; 7401e04c3fSmrg annotation.ir = inst->ir; 7501e04c3fSmrg } 7601e04c3fSmrg 7701e04c3fSmrg /** 7801e04c3fSmrg * Construct a vec4_builder that inserts instructions before \p cursor 7901e04c3fSmrg * in basic block \p block, inheriting other code generation parameters 8001e04c3fSmrg * from this. 8101e04c3fSmrg */ 8201e04c3fSmrg vec4_builder 8301e04c3fSmrg at(bblock_t *block, exec_node *cursor) const 8401e04c3fSmrg { 8501e04c3fSmrg vec4_builder bld = *this; 8601e04c3fSmrg bld.block = block; 8701e04c3fSmrg bld.cursor = cursor; 8801e04c3fSmrg return bld; 8901e04c3fSmrg } 9001e04c3fSmrg 9101e04c3fSmrg /** 9201e04c3fSmrg * Construct a vec4_builder appending instructions at the end of the 9301e04c3fSmrg * instruction list of the shader, inheriting other code generation 9401e04c3fSmrg * parameters from this. 9501e04c3fSmrg */ 9601e04c3fSmrg vec4_builder 9701e04c3fSmrg at_end() const 9801e04c3fSmrg { 9901e04c3fSmrg return at(NULL, (exec_node *)&shader->instructions.tail_sentinel); 10001e04c3fSmrg } 10101e04c3fSmrg 10201e04c3fSmrg /** 10301e04c3fSmrg * Construct a builder specifying the default SIMD width and group of 10401e04c3fSmrg * channel enable signals, inheriting other code generation parameters 10501e04c3fSmrg * from this. 10601e04c3fSmrg * 10701e04c3fSmrg * \p n gives the default SIMD width, \p i gives the slot group used for 10801e04c3fSmrg * predication and control flow masking in multiples of \p n channels. 10901e04c3fSmrg */ 11001e04c3fSmrg vec4_builder 11101e04c3fSmrg group(unsigned n, unsigned i) const 11201e04c3fSmrg { 11301e04c3fSmrg assert(force_writemask_all || 11401e04c3fSmrg (n <= dispatch_width() && i < dispatch_width() / n)); 11501e04c3fSmrg vec4_builder bld = *this; 11601e04c3fSmrg bld._dispatch_width = n; 11701e04c3fSmrg bld._group += i * n; 11801e04c3fSmrg return bld; 11901e04c3fSmrg } 12001e04c3fSmrg 12101e04c3fSmrg /** 12201e04c3fSmrg * Construct a builder with per-channel control flow execution masking 12301e04c3fSmrg * disabled if \p b is true. If control flow execution masking is 12401e04c3fSmrg * already disabled this has no effect. 12501e04c3fSmrg */ 12601e04c3fSmrg vec4_builder 12701e04c3fSmrg exec_all(bool b = true) const 12801e04c3fSmrg { 12901e04c3fSmrg vec4_builder bld = *this; 13001e04c3fSmrg if (b) 13101e04c3fSmrg bld.force_writemask_all = true; 13201e04c3fSmrg return bld; 13301e04c3fSmrg } 13401e04c3fSmrg 13501e04c3fSmrg /** 13601e04c3fSmrg * Construct a builder with the given debug annotation info. 13701e04c3fSmrg */ 13801e04c3fSmrg vec4_builder 13901e04c3fSmrg annotate(const char *str, const void *ir = NULL) const 14001e04c3fSmrg { 14101e04c3fSmrg vec4_builder bld = *this; 14201e04c3fSmrg bld.annotation.str = str; 14301e04c3fSmrg bld.annotation.ir = ir; 14401e04c3fSmrg return bld; 14501e04c3fSmrg } 14601e04c3fSmrg 14701e04c3fSmrg /** 14801e04c3fSmrg * Get the SIMD width in use. 14901e04c3fSmrg */ 15001e04c3fSmrg unsigned 15101e04c3fSmrg dispatch_width() const 15201e04c3fSmrg { 15301e04c3fSmrg return _dispatch_width; 15401e04c3fSmrg } 15501e04c3fSmrg 15601e04c3fSmrg /** 15701e04c3fSmrg * Get the channel group in use. 15801e04c3fSmrg */ 15901e04c3fSmrg unsigned 16001e04c3fSmrg group() const 16101e04c3fSmrg { 16201e04c3fSmrg return _group; 16301e04c3fSmrg } 16401e04c3fSmrg 16501e04c3fSmrg /** 16601e04c3fSmrg * Allocate a virtual register of natural vector size (four for this IR) 16701e04c3fSmrg * and SIMD width. \p n gives the amount of space to allocate in 16801e04c3fSmrg * dispatch_width units (which is just enough space for four logical 16901e04c3fSmrg * components in this IR). 17001e04c3fSmrg */ 17101e04c3fSmrg dst_reg 17201e04c3fSmrg vgrf(enum brw_reg_type type, unsigned n = 1) const 17301e04c3fSmrg { 17401e04c3fSmrg assert(dispatch_width() <= 32); 17501e04c3fSmrg 17601e04c3fSmrg if (n > 0) 17701e04c3fSmrg return retype(dst_reg(VGRF, shader->alloc.allocate( 17801e04c3fSmrg n * DIV_ROUND_UP(type_sz(type), 4))), 17901e04c3fSmrg type); 18001e04c3fSmrg else 18101e04c3fSmrg return retype(null_reg_ud(), type); 18201e04c3fSmrg } 18301e04c3fSmrg 18401e04c3fSmrg /** 18501e04c3fSmrg * Create a null register of floating type. 18601e04c3fSmrg */ 18701e04c3fSmrg dst_reg 18801e04c3fSmrg null_reg_f() const 18901e04c3fSmrg { 19001e04c3fSmrg return dst_reg(retype(brw_null_vec(dispatch_width()), 19101e04c3fSmrg BRW_REGISTER_TYPE_F)); 19201e04c3fSmrg } 19301e04c3fSmrg 19401e04c3fSmrg /** 19501e04c3fSmrg * Create a null register of signed integer type. 19601e04c3fSmrg */ 19701e04c3fSmrg dst_reg 19801e04c3fSmrg null_reg_d() const 19901e04c3fSmrg { 20001e04c3fSmrg return dst_reg(retype(brw_null_vec(dispatch_width()), 20101e04c3fSmrg BRW_REGISTER_TYPE_D)); 20201e04c3fSmrg } 20301e04c3fSmrg 20401e04c3fSmrg /** 20501e04c3fSmrg * Create a null register of unsigned integer type. 20601e04c3fSmrg */ 20701e04c3fSmrg dst_reg 20801e04c3fSmrg null_reg_ud() const 20901e04c3fSmrg { 21001e04c3fSmrg return dst_reg(retype(brw_null_vec(dispatch_width()), 21101e04c3fSmrg BRW_REGISTER_TYPE_UD)); 21201e04c3fSmrg } 21301e04c3fSmrg 21401e04c3fSmrg /** 21501e04c3fSmrg * Insert an instruction into the program. 21601e04c3fSmrg */ 21701e04c3fSmrg instruction * 21801e04c3fSmrg emit(const instruction &inst) const 21901e04c3fSmrg { 22001e04c3fSmrg return emit(new(shader->mem_ctx) instruction(inst)); 22101e04c3fSmrg } 22201e04c3fSmrg 22301e04c3fSmrg /** 22401e04c3fSmrg * Create and insert a nullary control instruction into the program. 22501e04c3fSmrg */ 22601e04c3fSmrg instruction * 22701e04c3fSmrg emit(enum opcode opcode) const 22801e04c3fSmrg { 22901e04c3fSmrg return emit(instruction(opcode)); 23001e04c3fSmrg } 23101e04c3fSmrg 23201e04c3fSmrg /** 23301e04c3fSmrg * Create and insert a nullary instruction into the program. 23401e04c3fSmrg */ 23501e04c3fSmrg instruction * 23601e04c3fSmrg emit(enum opcode opcode, const dst_reg &dst) const 23701e04c3fSmrg { 23801e04c3fSmrg return emit(instruction(opcode, dst)); 23901e04c3fSmrg } 24001e04c3fSmrg 24101e04c3fSmrg /** 24201e04c3fSmrg * Create and insert a unary instruction into the program. 24301e04c3fSmrg */ 24401e04c3fSmrg instruction * 24501e04c3fSmrg emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const 24601e04c3fSmrg { 24701e04c3fSmrg switch (opcode) { 24801e04c3fSmrg case SHADER_OPCODE_RCP: 24901e04c3fSmrg case SHADER_OPCODE_RSQ: 25001e04c3fSmrg case SHADER_OPCODE_SQRT: 25101e04c3fSmrg case SHADER_OPCODE_EXP2: 25201e04c3fSmrg case SHADER_OPCODE_LOG2: 25301e04c3fSmrg case SHADER_OPCODE_SIN: 25401e04c3fSmrg case SHADER_OPCODE_COS: 25501e04c3fSmrg return fix_math_instruction( 25601e04c3fSmrg emit(instruction(opcode, dst, 25701e04c3fSmrg fix_math_operand(src0)))); 25801e04c3fSmrg 25901e04c3fSmrg default: 26001e04c3fSmrg return emit(instruction(opcode, dst, src0)); 26101e04c3fSmrg } 26201e04c3fSmrg } 26301e04c3fSmrg 26401e04c3fSmrg /** 26501e04c3fSmrg * Create and insert a binary instruction into the program. 26601e04c3fSmrg */ 26701e04c3fSmrg instruction * 26801e04c3fSmrg emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 26901e04c3fSmrg const src_reg &src1) const 27001e04c3fSmrg { 27101e04c3fSmrg switch (opcode) { 27201e04c3fSmrg case SHADER_OPCODE_POW: 27301e04c3fSmrg case SHADER_OPCODE_INT_QUOTIENT: 27401e04c3fSmrg case SHADER_OPCODE_INT_REMAINDER: 27501e04c3fSmrg return fix_math_instruction( 27601e04c3fSmrg emit(instruction(opcode, dst, 27701e04c3fSmrg fix_math_operand(src0), 27801e04c3fSmrg fix_math_operand(src1)))); 27901e04c3fSmrg 28001e04c3fSmrg default: 28101e04c3fSmrg return emit(instruction(opcode, dst, src0, src1)); 28201e04c3fSmrg } 28301e04c3fSmrg } 28401e04c3fSmrg 28501e04c3fSmrg /** 28601e04c3fSmrg * Create and insert a ternary instruction into the program. 28701e04c3fSmrg */ 28801e04c3fSmrg instruction * 28901e04c3fSmrg emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 29001e04c3fSmrg const src_reg &src1, const src_reg &src2) const 29101e04c3fSmrg { 29201e04c3fSmrg switch (opcode) { 29301e04c3fSmrg case BRW_OPCODE_BFE: 29401e04c3fSmrg case BRW_OPCODE_BFI2: 29501e04c3fSmrg case BRW_OPCODE_MAD: 29601e04c3fSmrg case BRW_OPCODE_LRP: 29701e04c3fSmrg return emit(instruction(opcode, dst, 29801e04c3fSmrg fix_3src_operand(src0), 29901e04c3fSmrg fix_3src_operand(src1), 30001e04c3fSmrg fix_3src_operand(src2))); 30101e04c3fSmrg 30201e04c3fSmrg default: 30301e04c3fSmrg return emit(instruction(opcode, dst, src0, src1, src2)); 30401e04c3fSmrg } 30501e04c3fSmrg } 30601e04c3fSmrg 30701e04c3fSmrg /** 30801e04c3fSmrg * Insert a preallocated instruction into the program. 30901e04c3fSmrg */ 31001e04c3fSmrg instruction * 31101e04c3fSmrg emit(instruction *inst) const 31201e04c3fSmrg { 31301e04c3fSmrg inst->exec_size = dispatch_width(); 31401e04c3fSmrg inst->group = group(); 31501e04c3fSmrg inst->force_writemask_all = force_writemask_all; 31601e04c3fSmrg inst->size_written = inst->exec_size * type_sz(inst->dst.type); 31701e04c3fSmrg inst->annotation = annotation.str; 31801e04c3fSmrg inst->ir = annotation.ir; 31901e04c3fSmrg 32001e04c3fSmrg if (block) 32101e04c3fSmrg static_cast<instruction *>(cursor)->insert_before(block, inst); 32201e04c3fSmrg else 32301e04c3fSmrg cursor->insert_before(inst); 32401e04c3fSmrg 32501e04c3fSmrg return inst; 32601e04c3fSmrg } 32701e04c3fSmrg 32801e04c3fSmrg /** 32901e04c3fSmrg * Select \p src0 if the comparison of both sources with the given 33001e04c3fSmrg * conditional mod evaluates to true, otherwise select \p src1. 33101e04c3fSmrg * 33201e04c3fSmrg * Generally useful to get the minimum or maximum of two values. 33301e04c3fSmrg */ 33401e04c3fSmrg instruction * 33501e04c3fSmrg emit_minmax(const dst_reg &dst, const src_reg &src0, 33601e04c3fSmrg const src_reg &src1, brw_conditional_mod mod) const 33701e04c3fSmrg { 33801e04c3fSmrg assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); 33901e04c3fSmrg 34001e04c3fSmrg return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), 34101e04c3fSmrg fix_unsigned_negate(src1))); 34201e04c3fSmrg } 34301e04c3fSmrg 34401e04c3fSmrg /** 34501e04c3fSmrg * Copy any live channel from \p src to the first channel of the result. 34601e04c3fSmrg */ 34701e04c3fSmrg src_reg 34801e04c3fSmrg emit_uniformize(const src_reg &src) const 34901e04c3fSmrg { 35001e04c3fSmrg const vec4_builder ubld = exec_all(); 35101e04c3fSmrg const dst_reg chan_index = 35201e04c3fSmrg writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X); 35301e04c3fSmrg const dst_reg dst = vgrf(src.type); 35401e04c3fSmrg 35501e04c3fSmrg ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); 35601e04c3fSmrg ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index)); 35701e04c3fSmrg 35801e04c3fSmrg return src_reg(dst); 35901e04c3fSmrg } 36001e04c3fSmrg 36101e04c3fSmrg /** 36201e04c3fSmrg * Assorted arithmetic ops. 36301e04c3fSmrg * @{ 36401e04c3fSmrg */ 36501e04c3fSmrg#define ALU1(op) \ 36601e04c3fSmrg instruction * \ 36701e04c3fSmrg op(const dst_reg &dst, const src_reg &src0) const \ 36801e04c3fSmrg { \ 36901e04c3fSmrg return emit(BRW_OPCODE_##op, dst, src0); \ 37001e04c3fSmrg } 37101e04c3fSmrg 37201e04c3fSmrg#define ALU2(op) \ 37301e04c3fSmrg instruction * \ 37401e04c3fSmrg op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ 37501e04c3fSmrg { \ 37601e04c3fSmrg return emit(BRW_OPCODE_##op, dst, src0, src1); \ 37701e04c3fSmrg } 37801e04c3fSmrg 37901e04c3fSmrg#define ALU2_ACC(op) \ 38001e04c3fSmrg instruction * \ 38101e04c3fSmrg op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ 38201e04c3fSmrg { \ 38301e04c3fSmrg instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1); \ 38401e04c3fSmrg inst->writes_accumulator = true; \ 38501e04c3fSmrg return inst; \ 38601e04c3fSmrg } 38701e04c3fSmrg 38801e04c3fSmrg#define ALU3(op) \ 38901e04c3fSmrg instruction * \ 39001e04c3fSmrg op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \ 39101e04c3fSmrg const src_reg &src2) const \ 39201e04c3fSmrg { \ 39301e04c3fSmrg return emit(BRW_OPCODE_##op, dst, src0, src1, src2); \ 39401e04c3fSmrg } 39501e04c3fSmrg 39601e04c3fSmrg ALU2(ADD) 39701e04c3fSmrg ALU2_ACC(ADDC) 39801e04c3fSmrg ALU2(AND) 39901e04c3fSmrg ALU2(ASR) 40001e04c3fSmrg ALU2(AVG) 40101e04c3fSmrg ALU3(BFE) 40201e04c3fSmrg ALU2(BFI1) 40301e04c3fSmrg ALU3(BFI2) 40401e04c3fSmrg ALU1(BFREV) 40501e04c3fSmrg ALU1(CBIT) 40601e04c3fSmrg ALU3(CSEL) 40701e04c3fSmrg ALU1(DIM) 40801e04c3fSmrg ALU2(DP2) 40901e04c3fSmrg ALU2(DP3) 41001e04c3fSmrg ALU2(DP4) 41101e04c3fSmrg ALU2(DPH) 41201e04c3fSmrg ALU1(F16TO32) 41301e04c3fSmrg ALU1(F32TO16) 41401e04c3fSmrg ALU1(FBH) 41501e04c3fSmrg ALU1(FBL) 41601e04c3fSmrg ALU1(FRC) 41701e04c3fSmrg ALU2(LINE) 41801e04c3fSmrg ALU1(LZD) 41901e04c3fSmrg ALU2(MAC) 42001e04c3fSmrg ALU2_ACC(MACH) 42101e04c3fSmrg ALU3(MAD) 42201e04c3fSmrg ALU1(MOV) 42301e04c3fSmrg ALU2(MUL) 42401e04c3fSmrg ALU1(NOT) 42501e04c3fSmrg ALU2(OR) 42601e04c3fSmrg ALU2(PLN) 42701e04c3fSmrg ALU1(RNDD) 42801e04c3fSmrg ALU1(RNDE) 42901e04c3fSmrg ALU1(RNDU) 43001e04c3fSmrg ALU1(RNDZ) 43101e04c3fSmrg ALU2(SAD2) 43201e04c3fSmrg ALU2_ACC(SADA2) 43301e04c3fSmrg ALU2(SEL) 43401e04c3fSmrg ALU2(SHL) 43501e04c3fSmrg ALU2(SHR) 43601e04c3fSmrg ALU2_ACC(SUBB) 43701e04c3fSmrg ALU2(XOR) 43801e04c3fSmrg 43901e04c3fSmrg#undef ALU3 44001e04c3fSmrg#undef ALU2_ACC 44101e04c3fSmrg#undef ALU2 44201e04c3fSmrg#undef ALU1 44301e04c3fSmrg /** @} */ 44401e04c3fSmrg 44501e04c3fSmrg /** 44601e04c3fSmrg * CMP: Sets the low bit of the destination channels with the result 44701e04c3fSmrg * of the comparison, while the upper bits are undefined, and updates 44801e04c3fSmrg * the flag register with the packed 16 bits of the result. 44901e04c3fSmrg */ 45001e04c3fSmrg instruction * 45101e04c3fSmrg CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1, 45201e04c3fSmrg brw_conditional_mod condition) const 45301e04c3fSmrg { 45401e04c3fSmrg /* Take the instruction: 45501e04c3fSmrg * 45601e04c3fSmrg * CMP null<d> src0<f> src1<f> 45701e04c3fSmrg * 4587ec681f3Smrg * Original gfx4 does type conversion to the destination type 45901e04c3fSmrg * before comparison, producing garbage results for floating 46001e04c3fSmrg * point comparisons. 46101e04c3fSmrg * 46201e04c3fSmrg * The destination type doesn't matter on newer generations, 46301e04c3fSmrg * so we set the type to match src0 so we can compact the 46401e04c3fSmrg * instruction. 46501e04c3fSmrg */ 46601e04c3fSmrg return set_condmod(condition, 46701e04c3fSmrg emit(BRW_OPCODE_CMP, retype(dst, src0.type), 46801e04c3fSmrg fix_unsigned_negate(src0), 46901e04c3fSmrg fix_unsigned_negate(src1))); 47001e04c3fSmrg } 47101e04c3fSmrg 47201e04c3fSmrg /** 4737ec681f3Smrg * CMPN: Behaves like CMP, but produces true if src1 is NaN. 4747ec681f3Smrg */ 4757ec681f3Smrg instruction * 4767ec681f3Smrg CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1, 4777ec681f3Smrg brw_conditional_mod condition) const 4787ec681f3Smrg { 4797ec681f3Smrg /* Take the instruction: 4807ec681f3Smrg * 4817ec681f3Smrg * CMPN null<d> src0<f> src1<f> 4827ec681f3Smrg * 4837ec681f3Smrg * Original gfx4 does type conversion to the destination type 4847ec681f3Smrg * before comparison, producing garbage results for floating 4857ec681f3Smrg * point comparisons. 4867ec681f3Smrg * 4877ec681f3Smrg * The destination type doesn't matter on newer generations, 4887ec681f3Smrg * so we set the type to match src0 so we can compact the 4897ec681f3Smrg * instruction. 4907ec681f3Smrg */ 4917ec681f3Smrg return set_condmod(condition, 4927ec681f3Smrg emit(BRW_OPCODE_CMPN, retype(dst, src0.type), 4937ec681f3Smrg fix_unsigned_negate(src0), 4947ec681f3Smrg fix_unsigned_negate(src1))); 4957ec681f3Smrg } 4967ec681f3Smrg 4977ec681f3Smrg /** 4987ec681f3Smrg * Gfx4 predicated IF. 49901e04c3fSmrg */ 50001e04c3fSmrg instruction * 50101e04c3fSmrg IF(brw_predicate predicate) const 50201e04c3fSmrg { 50301e04c3fSmrg return set_predicate(predicate, emit(BRW_OPCODE_IF)); 50401e04c3fSmrg } 50501e04c3fSmrg 50601e04c3fSmrg /** 5077ec681f3Smrg * Gfx6 IF with embedded comparison. 50801e04c3fSmrg */ 50901e04c3fSmrg instruction * 51001e04c3fSmrg IF(const src_reg &src0, const src_reg &src1, 51101e04c3fSmrg brw_conditional_mod condition) const 51201e04c3fSmrg { 5137ec681f3Smrg assert(shader->devinfo->ver == 6); 51401e04c3fSmrg return set_condmod(condition, 51501e04c3fSmrg emit(BRW_OPCODE_IF, 51601e04c3fSmrg null_reg_d(), 51701e04c3fSmrg fix_unsigned_negate(src0), 51801e04c3fSmrg fix_unsigned_negate(src1))); 51901e04c3fSmrg } 52001e04c3fSmrg 52101e04c3fSmrg /** 52201e04c3fSmrg * Emit a linear interpolation instruction. 52301e04c3fSmrg */ 52401e04c3fSmrg instruction * 52501e04c3fSmrg LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, 52601e04c3fSmrg const src_reg &a) const 52701e04c3fSmrg { 5287ec681f3Smrg /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so 5297ec681f3Smrg * we need to reorder the operands. 5307ec681f3Smrg */ 5317ec681f3Smrg assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9); 5327ec681f3Smrg return emit(BRW_OPCODE_LRP, dst, a, y, x); 53301e04c3fSmrg } 53401e04c3fSmrg 53501e04c3fSmrg backend_shader *shader; 53601e04c3fSmrg 53701e04c3fSmrg protected: 53801e04c3fSmrg /** 53901e04c3fSmrg * Workaround for negation of UD registers. See comment in 54001e04c3fSmrg * fs_generator::generate_code() for the details. 54101e04c3fSmrg */ 54201e04c3fSmrg src_reg 54301e04c3fSmrg fix_unsigned_negate(const src_reg &src) const 54401e04c3fSmrg { 54501e04c3fSmrg if (src.type == BRW_REGISTER_TYPE_UD && src.negate) { 54601e04c3fSmrg dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD); 54701e04c3fSmrg MOV(temp, src); 54801e04c3fSmrg return src_reg(temp); 54901e04c3fSmrg } else { 55001e04c3fSmrg return src; 55101e04c3fSmrg } 55201e04c3fSmrg } 55301e04c3fSmrg 55401e04c3fSmrg /** 55501e04c3fSmrg * Workaround for register access modes not supported by the ternary 55601e04c3fSmrg * instruction encoding. 55701e04c3fSmrg */ 55801e04c3fSmrg src_reg 55901e04c3fSmrg fix_3src_operand(const src_reg &src) const 56001e04c3fSmrg { 56101e04c3fSmrg /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be 56201e04c3fSmrg * able to use vertical stride of zero to replicate the vec4 uniform, like 56301e04c3fSmrg * 56401e04c3fSmrg * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7] 56501e04c3fSmrg * 56601e04c3fSmrg * But you can't, since vertical stride is always four in three-source 56701e04c3fSmrg * instructions. Instead, insert a MOV instruction to do the replication so 56801e04c3fSmrg * that the three-source instruction can consume it. 56901e04c3fSmrg */ 57001e04c3fSmrg 57101e04c3fSmrg /* The MOV is only needed if the source is a uniform or immediate. */ 57201e04c3fSmrg if (src.file != UNIFORM && src.file != IMM) 57301e04c3fSmrg return src; 57401e04c3fSmrg 57501e04c3fSmrg if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle)) 57601e04c3fSmrg return src; 57701e04c3fSmrg 57801e04c3fSmrg const dst_reg expanded = vgrf(src.type); 57901e04c3fSmrg emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src); 58001e04c3fSmrg return src_reg(expanded); 58101e04c3fSmrg } 58201e04c3fSmrg 58301e04c3fSmrg /** 58401e04c3fSmrg * Workaround for register access modes not supported by the math 58501e04c3fSmrg * instruction. 58601e04c3fSmrg */ 58701e04c3fSmrg src_reg 58801e04c3fSmrg fix_math_operand(const src_reg &src) const 58901e04c3fSmrg { 5907ec681f3Smrg /* The gfx6 math instruction ignores the source modifiers -- 59101e04c3fSmrg * swizzle, abs, negate, and at least some parts of the register 59201e04c3fSmrg * region description. 59301e04c3fSmrg * 59401e04c3fSmrg * Rather than trying to enumerate all these cases, *always* expand the 5957ec681f3Smrg * operand to a temp GRF for gfx6. 59601e04c3fSmrg * 5977ec681f3Smrg * For gfx7, keep the operand as-is, except if immediate, which gfx7 still 59801e04c3fSmrg * can't use. 59901e04c3fSmrg */ 6007ec681f3Smrg if (shader->devinfo->ver == 6 || 6017ec681f3Smrg (shader->devinfo->ver == 7 && src.file == IMM)) { 60201e04c3fSmrg const dst_reg tmp = vgrf(src.type); 60301e04c3fSmrg MOV(tmp, src); 60401e04c3fSmrg return src_reg(tmp); 60501e04c3fSmrg } else { 60601e04c3fSmrg return src; 60701e04c3fSmrg } 60801e04c3fSmrg } 60901e04c3fSmrg 61001e04c3fSmrg /** 61101e04c3fSmrg * Workaround other weirdness of the math instruction. 61201e04c3fSmrg */ 61301e04c3fSmrg instruction * 61401e04c3fSmrg fix_math_instruction(instruction *inst) const 61501e04c3fSmrg { 6167ec681f3Smrg if (shader->devinfo->ver == 6 && 61701e04c3fSmrg inst->dst.writemask != WRITEMASK_XYZW) { 61801e04c3fSmrg const dst_reg tmp = vgrf(inst->dst.type); 61901e04c3fSmrg MOV(inst->dst, src_reg(tmp)); 62001e04c3fSmrg inst->dst = tmp; 62101e04c3fSmrg 6227ec681f3Smrg } else if (shader->devinfo->ver < 6) { 62301e04c3fSmrg const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2); 62401e04c3fSmrg inst->base_mrf = 1; 62501e04c3fSmrg inst->mlen = sources; 62601e04c3fSmrg } 62701e04c3fSmrg 62801e04c3fSmrg return inst; 62901e04c3fSmrg } 63001e04c3fSmrg 63101e04c3fSmrg bblock_t *block; 63201e04c3fSmrg exec_node *cursor; 63301e04c3fSmrg 63401e04c3fSmrg unsigned _dispatch_width; 63501e04c3fSmrg unsigned _group; 63601e04c3fSmrg bool force_writemask_all; 63701e04c3fSmrg 63801e04c3fSmrg /** Debug annotation info. */ 63901e04c3fSmrg struct { 64001e04c3fSmrg const char *str; 64101e04c3fSmrg const void *ir; 64201e04c3fSmrg } annotation; 64301e04c3fSmrg }; 64401e04c3fSmrg} 64501e04c3fSmrg 64601e04c3fSmrg#endif 647