intel/compiler/brw_vec4_builder.h

01e04c3fSmrg/* -*- c++ -*- */
01e04c3fSmrg/*
01e04c3fSmrg * Copyright © 2010-2015 Intel Corporation
01e04c3fSmrg *
01e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
01e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
01e04c3fSmrg * to deal in the Software without restriction, including without limitation
01e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
01e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
01e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
01e04c3fSmrg *
01e04c3fSmrg * The above copyright notice and this permission notice (including the next
01e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
01e04c3fSmrg * Software.
01e04c3fSmrg *
01e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
01e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
01e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
01e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
01e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
01e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
01e04c3fSmrg * IN THE SOFTWARE.
01e04c3fSmrg */
01e04c3fSmrg
01e04c3fSmrg#ifndef BRW_VEC4_BUILDER_H
01e04c3fSmrg#define BRW_VEC4_BUILDER_H
01e04c3fSmrg
01e04c3fSmrg#include "brw_ir_vec4.h"
01e04c3fSmrg#include "brw_ir_allocator.h"
01e04c3fSmrg
01e04c3fSmrgnamespace brw {
01e04c3fSmrg   /**
01e04c3fSmrg    * Toolbox to assemble a VEC4 IR program out of individual instructions.
01e04c3fSmrg    *
01e04c3fSmrg    * This object is meant to have an interface consistent with
01e04c3fSmrg    * brw::fs_builder.  They cannot be fully interchangeable because
01e04c3fSmrg    * brw::fs_builder generates scalar code while brw::vec4_builder generates
01e04c3fSmrg    * vector code.
01e04c3fSmrg    */
01e04c3fSmrg   class vec4_builder {
01e04c3fSmrg   public:
01e04c3fSmrg      /** Type used in this IR to represent a source of an instruction. */
01e04c3fSmrg      typedef brw::src_reg src_reg;
01e04c3fSmrg
01e04c3fSmrg      /** Type used in this IR to represent the destination of an instruction. */
01e04c3fSmrg      typedef brw::dst_reg dst_reg;
01e04c3fSmrg
01e04c3fSmrg      /** Type used in this IR to represent an instruction. */
01e04c3fSmrg      typedef vec4_instruction instruction;
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a vec4_builder that inserts instructions into \p shader.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) :
01e04c3fSmrg         shader(shader), block(NULL), cursor(NULL),
01e04c3fSmrg         _dispatch_width(dispatch_width), _group(0),
01e04c3fSmrg         force_writemask_all(false),
01e04c3fSmrg         annotation()
01e04c3fSmrg      {
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a vec4_builder that inserts instructions into \p shader
01e04c3fSmrg       * before instruction \p inst in basic block \p block.  The default
01e04c3fSmrg       * execution controls and debug annotation are initialized from the
01e04c3fSmrg       * instruction passed as argument.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
01e04c3fSmrg         shader(shader), block(block), cursor(inst),
01e04c3fSmrg         _dispatch_width(inst->exec_size), _group(inst->group),
01e04c3fSmrg         force_writemask_all(inst->force_writemask_all)
01e04c3fSmrg      {
01e04c3fSmrg         annotation.str = inst->annotation;
01e04c3fSmrg         annotation.ir = inst->ir;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a vec4_builder that inserts instructions before \p cursor
01e04c3fSmrg       * in basic block \p block, inheriting other code generation parameters
01e04c3fSmrg       * from this.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder
01e04c3fSmrg      at(bblock_t *block, exec_node *cursor) const
01e04c3fSmrg      {
01e04c3fSmrg         vec4_builder bld = *this;
01e04c3fSmrg         bld.block = block;
01e04c3fSmrg         bld.cursor = cursor;
01e04c3fSmrg         return bld;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a vec4_builder appending instructions at the end of the
01e04c3fSmrg       * instruction list of the shader, inheriting other code generation
01e04c3fSmrg       * parameters from this.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder
01e04c3fSmrg      at_end() const
01e04c3fSmrg      {
01e04c3fSmrg         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a builder specifying the default SIMD width and group of
01e04c3fSmrg       * channel enable signals, inheriting other code generation parameters
01e04c3fSmrg       * from this.
01e04c3fSmrg       *
01e04c3fSmrg       * \p n gives the default SIMD width, \p i gives the slot group used for
01e04c3fSmrg       * predication and control flow masking in multiples of \p n channels.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder
01e04c3fSmrg      group(unsigned n, unsigned i) const
01e04c3fSmrg      {
01e04c3fSmrg         assert(force_writemask_all ||
01e04c3fSmrg                (n <= dispatch_width() && i < dispatch_width() / n));
01e04c3fSmrg         vec4_builder bld = *this;
01e04c3fSmrg         bld._dispatch_width = n;
01e04c3fSmrg         bld._group += i * n;
01e04c3fSmrg         return bld;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a builder with per-channel control flow execution masking
01e04c3fSmrg       * disabled if \p b is true.  If control flow execution masking is
01e04c3fSmrg       * already disabled this has no effect.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder
01e04c3fSmrg      exec_all(bool b = true) const
01e04c3fSmrg      {
01e04c3fSmrg         vec4_builder bld = *this;
01e04c3fSmrg         if (b)
01e04c3fSmrg            bld.force_writemask_all = true;
01e04c3fSmrg         return bld;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Construct a builder with the given debug annotation info.
01e04c3fSmrg       */
01e04c3fSmrg      vec4_builder
01e04c3fSmrg      annotate(const char *str, const void *ir = NULL) const
01e04c3fSmrg      {
01e04c3fSmrg         vec4_builder bld = *this;
01e04c3fSmrg         bld.annotation.str = str;
01e04c3fSmrg         bld.annotation.ir = ir;
01e04c3fSmrg         return bld;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Get the SIMD width in use.
01e04c3fSmrg       */
01e04c3fSmrg      unsigned
01e04c3fSmrg      dispatch_width() const
01e04c3fSmrg      {
01e04c3fSmrg         return _dispatch_width;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Get the channel group in use.
01e04c3fSmrg       */
01e04c3fSmrg      unsigned
01e04c3fSmrg      group() const
01e04c3fSmrg      {
01e04c3fSmrg         return _group;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Allocate a virtual register of natural vector size (four for this IR)
01e04c3fSmrg       * and SIMD width.  \p n gives the amount of space to allocate in
01e04c3fSmrg       * dispatch_width units (which is just enough space for four logical
01e04c3fSmrg       * components in this IR).
01e04c3fSmrg       */
01e04c3fSmrg      dst_reg
01e04c3fSmrg      vgrf(enum brw_reg_type type, unsigned n = 1) const
01e04c3fSmrg      {
01e04c3fSmrg         assert(dispatch_width() <= 32);
01e04c3fSmrg
01e04c3fSmrg         if (n > 0)
01e04c3fSmrg            return retype(dst_reg(VGRF, shader->alloc.allocate(
01e04c3fSmrg                                     n * DIV_ROUND_UP(type_sz(type), 4))),
01e04c3fSmrg                           type);
01e04c3fSmrg         else
01e04c3fSmrg            return retype(null_reg_ud(), type);
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create a null register of floating type.
01e04c3fSmrg       */
01e04c3fSmrg      dst_reg
01e04c3fSmrg      null_reg_f() const
01e04c3fSmrg      {
01e04c3fSmrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
01e04c3fSmrg                               BRW_REGISTER_TYPE_F));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create a null register of signed integer type.
01e04c3fSmrg       */
01e04c3fSmrg      dst_reg
01e04c3fSmrg      null_reg_d() const
01e04c3fSmrg      {
01e04c3fSmrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
01e04c3fSmrg                               BRW_REGISTER_TYPE_D));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create a null register of unsigned integer type.
01e04c3fSmrg       */
01e04c3fSmrg      dst_reg
01e04c3fSmrg      null_reg_ud() const
01e04c3fSmrg      {
01e04c3fSmrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
01e04c3fSmrg                               BRW_REGISTER_TYPE_UD));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Insert an instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(const instruction &inst) const
01e04c3fSmrg      {
01e04c3fSmrg         return emit(new(shader->mem_ctx) instruction(inst));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create and insert a nullary control instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(enum opcode opcode) const
01e04c3fSmrg      {
01e04c3fSmrg         return emit(instruction(opcode));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create and insert a nullary instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst) const
01e04c3fSmrg      {
01e04c3fSmrg         return emit(instruction(opcode, dst));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create and insert a unary instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
01e04c3fSmrg      {
01e04c3fSmrg         switch (opcode) {
01e04c3fSmrg         case SHADER_OPCODE_RCP:
01e04c3fSmrg         case SHADER_OPCODE_RSQ:
01e04c3fSmrg         case SHADER_OPCODE_SQRT:
01e04c3fSmrg         case SHADER_OPCODE_EXP2:
01e04c3fSmrg         case SHADER_OPCODE_LOG2:
01e04c3fSmrg         case SHADER_OPCODE_SIN:
01e04c3fSmrg         case SHADER_OPCODE_COS:
01e04c3fSmrg            return fix_math_instruction(
01e04c3fSmrg               emit(instruction(opcode, dst,
01e04c3fSmrg                                fix_math_operand(src0))));
01e04c3fSmrg
01e04c3fSmrg         default:
01e04c3fSmrg            return emit(instruction(opcode, dst, src0));
01e04c3fSmrg         }
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create and insert a binary instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
01e04c3fSmrg           const src_reg &src1) const
01e04c3fSmrg      {
01e04c3fSmrg         switch (opcode) {
01e04c3fSmrg         case SHADER_OPCODE_POW:
01e04c3fSmrg         case SHADER_OPCODE_INT_QUOTIENT:
01e04c3fSmrg         case SHADER_OPCODE_INT_REMAINDER:
01e04c3fSmrg            return fix_math_instruction(
01e04c3fSmrg               emit(instruction(opcode, dst,
01e04c3fSmrg                                fix_math_operand(src0),
01e04c3fSmrg                                fix_math_operand(src1))));
01e04c3fSmrg
01e04c3fSmrg         default:
01e04c3fSmrg            return emit(instruction(opcode, dst, src0, src1));
01e04c3fSmrg         }
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Create and insert a ternary instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
01e04c3fSmrg           const src_reg &src1, const src_reg &src2) const
01e04c3fSmrg      {
01e04c3fSmrg         switch (opcode) {
01e04c3fSmrg         case BRW_OPCODE_BFE:
01e04c3fSmrg         case BRW_OPCODE_BFI2:
01e04c3fSmrg         case BRW_OPCODE_MAD:
01e04c3fSmrg         case BRW_OPCODE_LRP:
01e04c3fSmrg            return emit(instruction(opcode, dst,
01e04c3fSmrg                                    fix_3src_operand(src0),
01e04c3fSmrg                                    fix_3src_operand(src1),
01e04c3fSmrg                                    fix_3src_operand(src2)));
01e04c3fSmrg
01e04c3fSmrg         default:
01e04c3fSmrg            return emit(instruction(opcode, dst, src0, src1, src2));
01e04c3fSmrg         }
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Insert a preallocated instruction into the program.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit(instruction *inst) const
01e04c3fSmrg      {
01e04c3fSmrg         inst->exec_size = dispatch_width();
01e04c3fSmrg         inst->group = group();
01e04c3fSmrg         inst->force_writemask_all = force_writemask_all;
01e04c3fSmrg         inst->size_written = inst->exec_size * type_sz(inst->dst.type);
01e04c3fSmrg         inst->annotation = annotation.str;
01e04c3fSmrg         inst->ir = annotation.ir;
01e04c3fSmrg
01e04c3fSmrg         if (block)
01e04c3fSmrg            static_cast<instruction *>(cursor)->insert_before(block, inst);
01e04c3fSmrg         else
01e04c3fSmrg            cursor->insert_before(inst);
01e04c3fSmrg
01e04c3fSmrg         return inst;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Select \p src0 if the comparison of both sources with the given
01e04c3fSmrg       * conditional mod evaluates to true, otherwise select \p src1.
01e04c3fSmrg       *
01e04c3fSmrg       * Generally useful to get the minimum or maximum of two values.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      emit_minmax(const dst_reg &dst, const src_reg &src0,
01e04c3fSmrg                  const src_reg &src1, brw_conditional_mod mod) const
01e04c3fSmrg      {
01e04c3fSmrg         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
01e04c3fSmrg
01e04c3fSmrg         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
01e04c3fSmrg                                     fix_unsigned_negate(src1)));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Copy any live channel from \p src to the first channel of the result.
01e04c3fSmrg       */
01e04c3fSmrg      src_reg
01e04c3fSmrg      emit_uniformize(const src_reg &src) const
01e04c3fSmrg      {
01e04c3fSmrg         const vec4_builder ubld = exec_all();
01e04c3fSmrg         const dst_reg chan_index =
01e04c3fSmrg            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
01e04c3fSmrg         const dst_reg dst = vgrf(src.type);
01e04c3fSmrg
01e04c3fSmrg         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
01e04c3fSmrg         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
01e04c3fSmrg
01e04c3fSmrg         return src_reg(dst);
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Assorted arithmetic ops.
01e04c3fSmrg       * @{
01e04c3fSmrg       */
01e04c3fSmrg#define ALU1(op)                                        \
01e04c3fSmrg      instruction *                                     \
01e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0) const \
01e04c3fSmrg      {                                                 \
01e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0);       \
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg#define ALU2(op)                                                        \
01e04c3fSmrg      instruction *                                                     \
01e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
01e04c3fSmrg      {                                                                 \
01e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg#define ALU2_ACC(op)                                                    \
01e04c3fSmrg      instruction *                                                     \
01e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
01e04c3fSmrg      {                                                                 \
01e04c3fSmrg         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
01e04c3fSmrg         inst->writes_accumulator = true;                               \
01e04c3fSmrg         return inst;                                                   \
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg#define ALU3(op)                                                        \
01e04c3fSmrg      instruction *                                                     \
01e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
01e04c3fSmrg         const src_reg &src2) const                                     \
01e04c3fSmrg      {                                                                 \
01e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      ALU2(ADD)
01e04c3fSmrg      ALU2_ACC(ADDC)
01e04c3fSmrg      ALU2(AND)
01e04c3fSmrg      ALU2(ASR)
01e04c3fSmrg      ALU2(AVG)
01e04c3fSmrg      ALU3(BFE)
01e04c3fSmrg      ALU2(BFI1)
01e04c3fSmrg      ALU3(BFI2)
01e04c3fSmrg      ALU1(BFREV)
01e04c3fSmrg      ALU1(CBIT)
01e04c3fSmrg      ALU3(CSEL)
01e04c3fSmrg      ALU1(DIM)
01e04c3fSmrg      ALU2(DP2)
01e04c3fSmrg      ALU2(DP3)
01e04c3fSmrg      ALU2(DP4)
01e04c3fSmrg      ALU2(DPH)
01e04c3fSmrg      ALU1(F16TO32)
01e04c3fSmrg      ALU1(F32TO16)
01e04c3fSmrg      ALU1(FBH)
01e04c3fSmrg      ALU1(FBL)
01e04c3fSmrg      ALU1(FRC)
01e04c3fSmrg      ALU2(LINE)
01e04c3fSmrg      ALU1(LZD)
01e04c3fSmrg      ALU2(MAC)
01e04c3fSmrg      ALU2_ACC(MACH)
01e04c3fSmrg      ALU3(MAD)
01e04c3fSmrg      ALU1(MOV)
01e04c3fSmrg      ALU2(MUL)
01e04c3fSmrg      ALU1(NOT)
01e04c3fSmrg      ALU2(OR)
01e04c3fSmrg      ALU2(PLN)
01e04c3fSmrg      ALU1(RNDD)
01e04c3fSmrg      ALU1(RNDE)
01e04c3fSmrg      ALU1(RNDU)
01e04c3fSmrg      ALU1(RNDZ)
01e04c3fSmrg      ALU2(SAD2)
01e04c3fSmrg      ALU2_ACC(SADA2)
01e04c3fSmrg      ALU2(SEL)
01e04c3fSmrg      ALU2(SHL)
01e04c3fSmrg      ALU2(SHR)
01e04c3fSmrg      ALU2_ACC(SUBB)
01e04c3fSmrg      ALU2(XOR)
01e04c3fSmrg
01e04c3fSmrg#undef ALU3
01e04c3fSmrg#undef ALU2_ACC
01e04c3fSmrg#undef ALU2
01e04c3fSmrg#undef ALU1
01e04c3fSmrg      /** @} */
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * CMP: Sets the low bit of the destination channels with the result
01e04c3fSmrg       * of the comparison, while the upper bits are undefined, and updates
01e04c3fSmrg       * the flag register with the packed 16 bits of the result.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
01e04c3fSmrg          brw_conditional_mod condition) const
01e04c3fSmrg      {
01e04c3fSmrg         /* Take the instruction:
01e04c3fSmrg          *
01e04c3fSmrg          * CMP null<d> src0<f> src1<f>
01e04c3fSmrg          *
7ec681f3Smrg          * Original gfx4 does type conversion to the destination type
01e04c3fSmrg          * before comparison, producing garbage results for floating
01e04c3fSmrg          * point comparisons.
01e04c3fSmrg          *
01e04c3fSmrg          * The destination type doesn't matter on newer generations,
01e04c3fSmrg          * so we set the type to match src0 so we can compact the
01e04c3fSmrg          * instruction.
01e04c3fSmrg          */
01e04c3fSmrg         return set_condmod(condition,
01e04c3fSmrg                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
01e04c3fSmrg                                 fix_unsigned_negate(src0),
01e04c3fSmrg                                 fix_unsigned_negate(src1)));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
7ec681f3Smrg       * CMPN: Behaves like CMP, but produces true if src1 is NaN.
7ec681f3Smrg       */
7ec681f3Smrg      instruction *
7ec681f3Smrg      CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
7ec681f3Smrg          brw_conditional_mod condition) const
7ec681f3Smrg      {
7ec681f3Smrg         /* Take the instruction:
7ec681f3Smrg          *
7ec681f3Smrg          * CMPN null<d> src0<f> src1<f>
7ec681f3Smrg          *
7ec681f3Smrg          * Original gfx4 does type conversion to the destination type
7ec681f3Smrg          * before comparison, producing garbage results for floating
7ec681f3Smrg          * point comparisons.
7ec681f3Smrg          *
7ec681f3Smrg          * The destination type doesn't matter on newer generations,
7ec681f3Smrg          * so we set the type to match src0 so we can compact the
7ec681f3Smrg          * instruction.
7ec681f3Smrg          */
7ec681f3Smrg         return set_condmod(condition,
7ec681f3Smrg                            emit(BRW_OPCODE_CMPN, retype(dst, src0.type),
7ec681f3Smrg                                 fix_unsigned_negate(src0),
7ec681f3Smrg                                 fix_unsigned_negate(src1)));
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      /**
7ec681f3Smrg       * Gfx4 predicated IF.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      IF(brw_predicate predicate) const
01e04c3fSmrg      {
01e04c3fSmrg         return set_predicate(predicate, emit(BRW_OPCODE_IF));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
7ec681f3Smrg       * Gfx6 IF with embedded comparison.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      IF(const src_reg &src0, const src_reg &src1,
01e04c3fSmrg         brw_conditional_mod condition) const
01e04c3fSmrg      {
7ec681f3Smrg         assert(shader->devinfo->ver == 6);
01e04c3fSmrg         return set_condmod(condition,
01e04c3fSmrg                            emit(BRW_OPCODE_IF,
01e04c3fSmrg                                 null_reg_d(),
01e04c3fSmrg                                 fix_unsigned_negate(src0),
01e04c3fSmrg                                 fix_unsigned_negate(src1)));
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Emit a linear interpolation instruction.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
01e04c3fSmrg          const src_reg &a) const
01e04c3fSmrg      {
7ec681f3Smrg         /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
7ec681f3Smrg          * we need to reorder the operands.
7ec681f3Smrg          */
7ec681f3Smrg         assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9);
7ec681f3Smrg         return emit(BRW_OPCODE_LRP, dst, a, y, x);
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      backend_shader *shader;
01e04c3fSmrg
01e04c3fSmrg   protected:
01e04c3fSmrg      /**
01e04c3fSmrg       * Workaround for negation of UD registers.  See comment in
01e04c3fSmrg       * fs_generator::generate_code() for the details.
01e04c3fSmrg       */
01e04c3fSmrg      src_reg
01e04c3fSmrg      fix_unsigned_negate(const src_reg &src) const
01e04c3fSmrg      {
01e04c3fSmrg         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
01e04c3fSmrg            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
01e04c3fSmrg            MOV(temp, src);
01e04c3fSmrg            return src_reg(temp);
01e04c3fSmrg         } else {
01e04c3fSmrg            return src;
01e04c3fSmrg         }
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Workaround for register access modes not supported by the ternary
01e04c3fSmrg       * instruction encoding.
01e04c3fSmrg       */
01e04c3fSmrg      src_reg
01e04c3fSmrg      fix_3src_operand(const src_reg &src) const
01e04c3fSmrg      {
01e04c3fSmrg         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
01e04c3fSmrg          * able to use vertical stride of zero to replicate the vec4 uniform, like
01e04c3fSmrg          *
01e04c3fSmrg          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
01e04c3fSmrg          *
01e04c3fSmrg          * But you can't, since vertical stride is always four in three-source
01e04c3fSmrg          * instructions. Instead, insert a MOV instruction to do the replication so
01e04c3fSmrg          * that the three-source instruction can consume it.
01e04c3fSmrg          */
01e04c3fSmrg
01e04c3fSmrg         /* The MOV is only needed if the source is a uniform or immediate. */
01e04c3fSmrg         if (src.file != UNIFORM && src.file != IMM)
01e04c3fSmrg            return src;
01e04c3fSmrg
01e04c3fSmrg         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
01e04c3fSmrg            return src;
01e04c3fSmrg
01e04c3fSmrg         const dst_reg expanded = vgrf(src.type);
01e04c3fSmrg         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
01e04c3fSmrg         return src_reg(expanded);
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Workaround for register access modes not supported by the math
01e04c3fSmrg       * instruction.
01e04c3fSmrg       */
01e04c3fSmrg      src_reg
01e04c3fSmrg      fix_math_operand(const src_reg &src) const
01e04c3fSmrg      {
7ec681f3Smrg         /* The gfx6 math instruction ignores the source modifiers --
01e04c3fSmrg          * swizzle, abs, negate, and at least some parts of the register
01e04c3fSmrg          * region description.
01e04c3fSmrg          *
01e04c3fSmrg          * Rather than trying to enumerate all these cases, *always* expand the
7ec681f3Smrg          * operand to a temp GRF for gfx6.
01e04c3fSmrg          *
7ec681f3Smrg          * For gfx7, keep the operand as-is, except if immediate, which gfx7 still
01e04c3fSmrg          * can't use.
01e04c3fSmrg          */
7ec681f3Smrg         if (shader->devinfo->ver == 6 ||
7ec681f3Smrg             (shader->devinfo->ver == 7 && src.file == IMM)) {
01e04c3fSmrg            const dst_reg tmp = vgrf(src.type);
01e04c3fSmrg            MOV(tmp, src);
01e04c3fSmrg            return src_reg(tmp);
01e04c3fSmrg         } else {
01e04c3fSmrg            return src;
01e04c3fSmrg         }
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      /**
01e04c3fSmrg       * Workaround other weirdness of the math instruction.
01e04c3fSmrg       */
01e04c3fSmrg      instruction *
01e04c3fSmrg      fix_math_instruction(instruction *inst) const
01e04c3fSmrg      {
7ec681f3Smrg         if (shader->devinfo->ver == 6 &&
01e04c3fSmrg             inst->dst.writemask != WRITEMASK_XYZW) {
01e04c3fSmrg            const dst_reg tmp = vgrf(inst->dst.type);
01e04c3fSmrg            MOV(inst->dst, src_reg(tmp));
01e04c3fSmrg            inst->dst = tmp;
01e04c3fSmrg
7ec681f3Smrg         } else if (shader->devinfo->ver < 6) {
01e04c3fSmrg            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
01e04c3fSmrg            inst->base_mrf = 1;
01e04c3fSmrg            inst->mlen = sources;
01e04c3fSmrg         }
01e04c3fSmrg
01e04c3fSmrg         return inst;
01e04c3fSmrg      }
01e04c3fSmrg
01e04c3fSmrg      bblock_t *block;
01e04c3fSmrg      exec_node *cursor;
01e04c3fSmrg
01e04c3fSmrg      unsigned _dispatch_width;
01e04c3fSmrg      unsigned _group;
01e04c3fSmrg      bool force_writemask_all;
01e04c3fSmrg
01e04c3fSmrg      /** Debug annotation info. */
01e04c3fSmrg      struct {
01e04c3fSmrg         const char *str;
01e04c3fSmrg         const void *ir;
01e04c3fSmrg      } annotation;
01e04c3fSmrg   };
01e04c3fSmrg}
01e04c3fSmrg
01e04c3fSmrg#endif