intel/compiler/brw_vec4_builder.h

b8e80941Smrg/* -*- c++ -*- */
b8e80941Smrg/*
b8e80941Smrg * Copyright © 2010-2015 Intel Corporation
b8e80941Smrg *
b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
b8e80941Smrg * to deal in the Software without restriction, including without limitation
b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
b8e80941Smrg *
b8e80941Smrg * The above copyright notice and this permission notice (including the next
b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
b8e80941Smrg * Software.
b8e80941Smrg *
b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
b8e80941Smrg * IN THE SOFTWARE.
b8e80941Smrg */
b8e80941Smrg
b8e80941Smrg#ifndef BRW_VEC4_BUILDER_H
b8e80941Smrg#define BRW_VEC4_BUILDER_H
b8e80941Smrg
b8e80941Smrg#include "brw_ir_vec4.h"
b8e80941Smrg#include "brw_ir_allocator.h"
b8e80941Smrg
b8e80941Smrgnamespace brw {
b8e80941Smrg   /**
b8e80941Smrg    * Toolbox to assemble a VEC4 IR program out of individual instructions.
b8e80941Smrg    *
b8e80941Smrg    * This object is meant to have an interface consistent with
b8e80941Smrg    * brw::fs_builder.  They cannot be fully interchangeable because
b8e80941Smrg    * brw::fs_builder generates scalar code while brw::vec4_builder generates
b8e80941Smrg    * vector code.
b8e80941Smrg    */
b8e80941Smrg   class vec4_builder {
b8e80941Smrg   public:
b8e80941Smrg      /** Type used in this IR to represent a source of an instruction. */
b8e80941Smrg      typedef brw::src_reg src_reg;
b8e80941Smrg
b8e80941Smrg      /** Type used in this IR to represent the destination of an instruction. */
b8e80941Smrg      typedef brw::dst_reg dst_reg;
b8e80941Smrg
b8e80941Smrg      /** Type used in this IR to represent an instruction. */
b8e80941Smrg      typedef vec4_instruction instruction;
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a vec4_builder that inserts instructions into \p shader.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) :
b8e80941Smrg         shader(shader), block(NULL), cursor(NULL),
b8e80941Smrg         _dispatch_width(dispatch_width), _group(0),
b8e80941Smrg         force_writemask_all(false),
b8e80941Smrg         annotation()
b8e80941Smrg      {
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a vec4_builder that inserts instructions into \p shader
b8e80941Smrg       * before instruction \p inst in basic block \p block.  The default
b8e80941Smrg       * execution controls and debug annotation are initialized from the
b8e80941Smrg       * instruction passed as argument.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
b8e80941Smrg         shader(shader), block(block), cursor(inst),
b8e80941Smrg         _dispatch_width(inst->exec_size), _group(inst->group),
b8e80941Smrg         force_writemask_all(inst->force_writemask_all)
b8e80941Smrg      {
b8e80941Smrg         annotation.str = inst->annotation;
b8e80941Smrg         annotation.ir = inst->ir;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a vec4_builder that inserts instructions before \p cursor
b8e80941Smrg       * in basic block \p block, inheriting other code generation parameters
b8e80941Smrg       * from this.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder
b8e80941Smrg      at(bblock_t *block, exec_node *cursor) const
b8e80941Smrg      {
b8e80941Smrg         vec4_builder bld = *this;
b8e80941Smrg         bld.block = block;
b8e80941Smrg         bld.cursor = cursor;
b8e80941Smrg         return bld;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a vec4_builder appending instructions at the end of the
b8e80941Smrg       * instruction list of the shader, inheriting other code generation
b8e80941Smrg       * parameters from this.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder
b8e80941Smrg      at_end() const
b8e80941Smrg      {
b8e80941Smrg         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a builder specifying the default SIMD width and group of
b8e80941Smrg       * channel enable signals, inheriting other code generation parameters
b8e80941Smrg       * from this.
b8e80941Smrg       *
b8e80941Smrg       * \p n gives the default SIMD width, \p i gives the slot group used for
b8e80941Smrg       * predication and control flow masking in multiples of \p n channels.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder
b8e80941Smrg      group(unsigned n, unsigned i) const
b8e80941Smrg      {
b8e80941Smrg         assert(force_writemask_all ||
b8e80941Smrg                (n <= dispatch_width() && i < dispatch_width() / n));
b8e80941Smrg         vec4_builder bld = *this;
b8e80941Smrg         bld._dispatch_width = n;
b8e80941Smrg         bld._group += i * n;
b8e80941Smrg         return bld;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a builder with per-channel control flow execution masking
b8e80941Smrg       * disabled if \p b is true.  If control flow execution masking is
b8e80941Smrg       * already disabled this has no effect.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder
b8e80941Smrg      exec_all(bool b = true) const
b8e80941Smrg      {
b8e80941Smrg         vec4_builder bld = *this;
b8e80941Smrg         if (b)
b8e80941Smrg            bld.force_writemask_all = true;
b8e80941Smrg         return bld;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Construct a builder with the given debug annotation info.
b8e80941Smrg       */
b8e80941Smrg      vec4_builder
b8e80941Smrg      annotate(const char *str, const void *ir = NULL) const
b8e80941Smrg      {
b8e80941Smrg         vec4_builder bld = *this;
b8e80941Smrg         bld.annotation.str = str;
b8e80941Smrg         bld.annotation.ir = ir;
b8e80941Smrg         return bld;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Get the SIMD width in use.
b8e80941Smrg       */
b8e80941Smrg      unsigned
b8e80941Smrg      dispatch_width() const
b8e80941Smrg      {
b8e80941Smrg         return _dispatch_width;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Get the channel group in use.
b8e80941Smrg       */
b8e80941Smrg      unsigned
b8e80941Smrg      group() const
b8e80941Smrg      {
b8e80941Smrg         return _group;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Allocate a virtual register of natural vector size (four for this IR)
b8e80941Smrg       * and SIMD width.  \p n gives the amount of space to allocate in
b8e80941Smrg       * dispatch_width units (which is just enough space for four logical
b8e80941Smrg       * components in this IR).
b8e80941Smrg       */
b8e80941Smrg      dst_reg
b8e80941Smrg      vgrf(enum brw_reg_type type, unsigned n = 1) const
b8e80941Smrg      {
b8e80941Smrg         assert(dispatch_width() <= 32);
b8e80941Smrg
b8e80941Smrg         if (n > 0)
b8e80941Smrg            return retype(dst_reg(VGRF, shader->alloc.allocate(
b8e80941Smrg                                     n * DIV_ROUND_UP(type_sz(type), 4))),
b8e80941Smrg                           type);
b8e80941Smrg         else
b8e80941Smrg            return retype(null_reg_ud(), type);
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create a null register of floating type.
b8e80941Smrg       */
b8e80941Smrg      dst_reg
b8e80941Smrg      null_reg_f() const
b8e80941Smrg      {
b8e80941Smrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
b8e80941Smrg                               BRW_REGISTER_TYPE_F));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create a null register of signed integer type.
b8e80941Smrg       */
b8e80941Smrg      dst_reg
b8e80941Smrg      null_reg_d() const
b8e80941Smrg      {
b8e80941Smrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
b8e80941Smrg                               BRW_REGISTER_TYPE_D));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create a null register of unsigned integer type.
b8e80941Smrg       */
b8e80941Smrg      dst_reg
b8e80941Smrg      null_reg_ud() const
b8e80941Smrg      {
b8e80941Smrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
b8e80941Smrg                               BRW_REGISTER_TYPE_UD));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Insert an instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(const instruction &inst) const
b8e80941Smrg      {
b8e80941Smrg         return emit(new(shader->mem_ctx) instruction(inst));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create and insert a nullary control instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(enum opcode opcode) const
b8e80941Smrg      {
b8e80941Smrg         return emit(instruction(opcode));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create and insert a nullary instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst) const
b8e80941Smrg      {
b8e80941Smrg         return emit(instruction(opcode, dst));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create and insert a unary instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
b8e80941Smrg      {
b8e80941Smrg         switch (opcode) {
b8e80941Smrg         case SHADER_OPCODE_RCP:
b8e80941Smrg         case SHADER_OPCODE_RSQ:
b8e80941Smrg         case SHADER_OPCODE_SQRT:
b8e80941Smrg         case SHADER_OPCODE_EXP2:
b8e80941Smrg         case SHADER_OPCODE_LOG2:
b8e80941Smrg         case SHADER_OPCODE_SIN:
b8e80941Smrg         case SHADER_OPCODE_COS:
b8e80941Smrg            return fix_math_instruction(
b8e80941Smrg               emit(instruction(opcode, dst,
b8e80941Smrg                                fix_math_operand(src0))));
b8e80941Smrg
b8e80941Smrg         default:
b8e80941Smrg            return emit(instruction(opcode, dst, src0));
b8e80941Smrg         }
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create and insert a binary instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
b8e80941Smrg           const src_reg &src1) const
b8e80941Smrg      {
b8e80941Smrg         switch (opcode) {
b8e80941Smrg         case SHADER_OPCODE_POW:
b8e80941Smrg         case SHADER_OPCODE_INT_QUOTIENT:
b8e80941Smrg         case SHADER_OPCODE_INT_REMAINDER:
b8e80941Smrg            return fix_math_instruction(
b8e80941Smrg               emit(instruction(opcode, dst,
b8e80941Smrg                                fix_math_operand(src0),
b8e80941Smrg                                fix_math_operand(src1))));
b8e80941Smrg
b8e80941Smrg         default:
b8e80941Smrg            return emit(instruction(opcode, dst, src0, src1));
b8e80941Smrg         }
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Create and insert a ternary instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
b8e80941Smrg           const src_reg &src1, const src_reg &src2) const
b8e80941Smrg      {
b8e80941Smrg         switch (opcode) {
b8e80941Smrg         case BRW_OPCODE_BFE:
b8e80941Smrg         case BRW_OPCODE_BFI2:
b8e80941Smrg         case BRW_OPCODE_MAD:
b8e80941Smrg         case BRW_OPCODE_LRP:
b8e80941Smrg            return emit(instruction(opcode, dst,
b8e80941Smrg                                    fix_3src_operand(src0),
b8e80941Smrg                                    fix_3src_operand(src1),
b8e80941Smrg                                    fix_3src_operand(src2)));
b8e80941Smrg
b8e80941Smrg         default:
b8e80941Smrg            return emit(instruction(opcode, dst, src0, src1, src2));
b8e80941Smrg         }
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Insert a preallocated instruction into the program.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit(instruction *inst) const
b8e80941Smrg      {
b8e80941Smrg         inst->exec_size = dispatch_width();
b8e80941Smrg         inst->group = group();
b8e80941Smrg         inst->force_writemask_all = force_writemask_all;
b8e80941Smrg         inst->size_written = inst->exec_size * type_sz(inst->dst.type);
b8e80941Smrg         inst->annotation = annotation.str;
b8e80941Smrg         inst->ir = annotation.ir;
b8e80941Smrg
b8e80941Smrg         if (block)
b8e80941Smrg            static_cast<instruction *>(cursor)->insert_before(block, inst);
b8e80941Smrg         else
b8e80941Smrg            cursor->insert_before(inst);
b8e80941Smrg
b8e80941Smrg         return inst;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Select \p src0 if the comparison of both sources with the given
b8e80941Smrg       * conditional mod evaluates to true, otherwise select \p src1.
b8e80941Smrg       *
b8e80941Smrg       * Generally useful to get the minimum or maximum of two values.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      emit_minmax(const dst_reg &dst, const src_reg &src0,
b8e80941Smrg                  const src_reg &src1, brw_conditional_mod mod) const
b8e80941Smrg      {
b8e80941Smrg         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
b8e80941Smrg
b8e80941Smrg         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
b8e80941Smrg                                     fix_unsigned_negate(src1)));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Copy any live channel from \p src to the first channel of the result.
b8e80941Smrg       */
b8e80941Smrg      src_reg
b8e80941Smrg      emit_uniformize(const src_reg &src) const
b8e80941Smrg      {
b8e80941Smrg         const vec4_builder ubld = exec_all();
b8e80941Smrg         const dst_reg chan_index =
b8e80941Smrg            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
b8e80941Smrg         const dst_reg dst = vgrf(src.type);
b8e80941Smrg
b8e80941Smrg         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
b8e80941Smrg         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
b8e80941Smrg
b8e80941Smrg         return src_reg(dst);
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Assorted arithmetic ops.
b8e80941Smrg       * @{
b8e80941Smrg       */
b8e80941Smrg#define ALU1(op)                                        \
b8e80941Smrg      instruction *                                     \
b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0) const \
b8e80941Smrg      {                                                 \
b8e80941Smrg         return emit(BRW_OPCODE_##op, dst, src0);       \
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg#define ALU2(op)                                                        \
b8e80941Smrg      instruction *                                                     \
b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
b8e80941Smrg      {                                                                 \
b8e80941Smrg         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg#define ALU2_ACC(op)                                                    \
b8e80941Smrg      instruction *                                                     \
b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
b8e80941Smrg      {                                                                 \
b8e80941Smrg         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
b8e80941Smrg         inst->writes_accumulator = true;                               \
b8e80941Smrg         return inst;                                                   \
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg#define ALU3(op)                                                        \
b8e80941Smrg      instruction *                                                     \
b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
b8e80941Smrg         const src_reg &src2) const                                     \
b8e80941Smrg      {                                                                 \
b8e80941Smrg         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      ALU2(ADD)
b8e80941Smrg      ALU2_ACC(ADDC)
b8e80941Smrg      ALU2(AND)
b8e80941Smrg      ALU2(ASR)
b8e80941Smrg      ALU2(AVG)
b8e80941Smrg      ALU3(BFE)
b8e80941Smrg      ALU2(BFI1)
b8e80941Smrg      ALU3(BFI2)
b8e80941Smrg      ALU1(BFREV)
b8e80941Smrg      ALU1(CBIT)
b8e80941Smrg      ALU2(CMPN)
b8e80941Smrg      ALU3(CSEL)
b8e80941Smrg      ALU1(DIM)
b8e80941Smrg      ALU2(DP2)
b8e80941Smrg      ALU2(DP3)
b8e80941Smrg      ALU2(DP4)
b8e80941Smrg      ALU2(DPH)
b8e80941Smrg      ALU1(F16TO32)
b8e80941Smrg      ALU1(F32TO16)
b8e80941Smrg      ALU1(FBH)
b8e80941Smrg      ALU1(FBL)
b8e80941Smrg      ALU1(FRC)
b8e80941Smrg      ALU2(LINE)
b8e80941Smrg      ALU1(LZD)
b8e80941Smrg      ALU2(MAC)
b8e80941Smrg      ALU2_ACC(MACH)
b8e80941Smrg      ALU3(MAD)
b8e80941Smrg      ALU1(MOV)
b8e80941Smrg      ALU2(MUL)
b8e80941Smrg      ALU1(NOT)
b8e80941Smrg      ALU2(OR)
b8e80941Smrg      ALU2(PLN)
b8e80941Smrg      ALU1(RNDD)
b8e80941Smrg      ALU1(RNDE)
b8e80941Smrg      ALU1(RNDU)
b8e80941Smrg      ALU1(RNDZ)
b8e80941Smrg      ALU2(SAD2)
b8e80941Smrg      ALU2_ACC(SADA2)
b8e80941Smrg      ALU2(SEL)
b8e80941Smrg      ALU2(SHL)
b8e80941Smrg      ALU2(SHR)
b8e80941Smrg      ALU2_ACC(SUBB)
b8e80941Smrg      ALU2(XOR)
b8e80941Smrg
b8e80941Smrg#undef ALU3
b8e80941Smrg#undef ALU2_ACC
b8e80941Smrg#undef ALU2
b8e80941Smrg#undef ALU1
b8e80941Smrg      /** @} */
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * CMP: Sets the low bit of the destination channels with the result
b8e80941Smrg       * of the comparison, while the upper bits are undefined, and updates
b8e80941Smrg       * the flag register with the packed 16 bits of the result.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
b8e80941Smrg          brw_conditional_mod condition) const
b8e80941Smrg      {
b8e80941Smrg         /* Take the instruction:
b8e80941Smrg          *
b8e80941Smrg          * CMP null<d> src0<f> src1<f>
b8e80941Smrg          *
b8e80941Smrg          * Original gen4 does type conversion to the destination type
b8e80941Smrg          * before comparison, producing garbage results for floating
b8e80941Smrg          * point comparisons.
b8e80941Smrg          *
b8e80941Smrg          * The destination type doesn't matter on newer generations,
b8e80941Smrg          * so we set the type to match src0 so we can compact the
b8e80941Smrg          * instruction.
b8e80941Smrg          */
b8e80941Smrg         return set_condmod(condition,
b8e80941Smrg                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
b8e80941Smrg                                 fix_unsigned_negate(src0),
b8e80941Smrg                                 fix_unsigned_negate(src1)));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Gen4 predicated IF.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      IF(brw_predicate predicate) const
b8e80941Smrg      {
b8e80941Smrg         return set_predicate(predicate, emit(BRW_OPCODE_IF));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Gen6 IF with embedded comparison.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      IF(const src_reg &src0, const src_reg &src1,
b8e80941Smrg         brw_conditional_mod condition) const
b8e80941Smrg      {
b8e80941Smrg         assert(shader->devinfo->gen == 6);
b8e80941Smrg         return set_condmod(condition,
b8e80941Smrg                            emit(BRW_OPCODE_IF,
b8e80941Smrg                                 null_reg_d(),
b8e80941Smrg                                 fix_unsigned_negate(src0),
b8e80941Smrg                                 fix_unsigned_negate(src1)));
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Emit a linear interpolation instruction.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
b8e80941Smrg          const src_reg &a) const
b8e80941Smrg      {
b8e80941Smrg         if (shader->devinfo->gen >= 6 && shader->devinfo->gen <= 10) {
b8e80941Smrg            /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
b8e80941Smrg             * we need to reorder the operands.
b8e80941Smrg             */
b8e80941Smrg            return emit(BRW_OPCODE_LRP, dst, a, y, x);
b8e80941Smrg
b8e80941Smrg         } else {
b8e80941Smrg            /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
b8e80941Smrg            const dst_reg y_times_a = vgrf(dst.type);
b8e80941Smrg            const dst_reg one_minus_a = vgrf(dst.type);
b8e80941Smrg            const dst_reg x_times_one_minus_a = vgrf(dst.type);
b8e80941Smrg
b8e80941Smrg            MUL(y_times_a, y, a);
b8e80941Smrg            ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
b8e80941Smrg            MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
b8e80941Smrg            return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
b8e80941Smrg         }
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      backend_shader *shader;
b8e80941Smrg
b8e80941Smrg   protected:
b8e80941Smrg      /**
b8e80941Smrg       * Workaround for negation of UD registers.  See comment in
b8e80941Smrg       * fs_generator::generate_code() for the details.
b8e80941Smrg       */
b8e80941Smrg      src_reg
b8e80941Smrg      fix_unsigned_negate(const src_reg &src) const
b8e80941Smrg      {
b8e80941Smrg         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
b8e80941Smrg            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
b8e80941Smrg            MOV(temp, src);
b8e80941Smrg            return src_reg(temp);
b8e80941Smrg         } else {
b8e80941Smrg            return src;
b8e80941Smrg         }
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Workaround for register access modes not supported by the ternary
b8e80941Smrg       * instruction encoding.
b8e80941Smrg       */
b8e80941Smrg      src_reg
b8e80941Smrg      fix_3src_operand(const src_reg &src) const
b8e80941Smrg      {
b8e80941Smrg         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
b8e80941Smrg          * able to use vertical stride of zero to replicate the vec4 uniform, like
b8e80941Smrg          *
b8e80941Smrg          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
b8e80941Smrg          *
b8e80941Smrg          * But you can't, since vertical stride is always four in three-source
b8e80941Smrg          * instructions. Instead, insert a MOV instruction to do the replication so
b8e80941Smrg          * that the three-source instruction can consume it.
b8e80941Smrg          */
b8e80941Smrg
b8e80941Smrg         /* The MOV is only needed if the source is a uniform or immediate. */
b8e80941Smrg         if (src.file != UNIFORM && src.file != IMM)
b8e80941Smrg            return src;
b8e80941Smrg
b8e80941Smrg         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
b8e80941Smrg            return src;
b8e80941Smrg
b8e80941Smrg         const dst_reg expanded = vgrf(src.type);
b8e80941Smrg         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
b8e80941Smrg         return src_reg(expanded);
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Workaround for register access modes not supported by the math
b8e80941Smrg       * instruction.
b8e80941Smrg       */
b8e80941Smrg      src_reg
b8e80941Smrg      fix_math_operand(const src_reg &src) const
b8e80941Smrg      {
b8e80941Smrg         /* The gen6 math instruction ignores the source modifiers --
b8e80941Smrg          * swizzle, abs, negate, and at least some parts of the register
b8e80941Smrg          * region description.
b8e80941Smrg          *
b8e80941Smrg          * Rather than trying to enumerate all these cases, *always* expand the
b8e80941Smrg          * operand to a temp GRF for gen6.
b8e80941Smrg          *
b8e80941Smrg          * For gen7, keep the operand as-is, except if immediate, which gen7 still
b8e80941Smrg          * can't use.
b8e80941Smrg          */
b8e80941Smrg         if (shader->devinfo->gen == 6 ||
b8e80941Smrg             (shader->devinfo->gen == 7 && src.file == IMM)) {
b8e80941Smrg            const dst_reg tmp = vgrf(src.type);
b8e80941Smrg            MOV(tmp, src);
b8e80941Smrg            return src_reg(tmp);
b8e80941Smrg         } else {
b8e80941Smrg            return src;
b8e80941Smrg         }
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      /**
b8e80941Smrg       * Workaround other weirdness of the math instruction.
b8e80941Smrg       */
b8e80941Smrg      instruction *
b8e80941Smrg      fix_math_instruction(instruction *inst) const
b8e80941Smrg      {
b8e80941Smrg         if (shader->devinfo->gen == 6 &&
b8e80941Smrg             inst->dst.writemask != WRITEMASK_XYZW) {
b8e80941Smrg            const dst_reg tmp = vgrf(inst->dst.type);
b8e80941Smrg            MOV(inst->dst, src_reg(tmp));
b8e80941Smrg            inst->dst = tmp;
b8e80941Smrg
b8e80941Smrg         } else if (shader->devinfo->gen < 6) {
b8e80941Smrg            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
b8e80941Smrg            inst->base_mrf = 1;
b8e80941Smrg            inst->mlen = sources;
b8e80941Smrg         }
b8e80941Smrg
b8e80941Smrg         return inst;
b8e80941Smrg      }
b8e80941Smrg
b8e80941Smrg      bblock_t *block;
b8e80941Smrg      exec_node *cursor;
b8e80941Smrg
b8e80941Smrg      unsigned _dispatch_width;
b8e80941Smrg      unsigned _group;
b8e80941Smrg      bool force_writemask_all;
b8e80941Smrg
b8e80941Smrg      /** Debug annotation info. */
b8e80941Smrg      struct {
b8e80941Smrg         const char *str;
b8e80941Smrg         const void *ir;
b8e80941Smrg      } annotation;
b8e80941Smrg   };
b8e80941Smrg}
b8e80941Smrg
b8e80941Smrg#endif