101e04c3fSmrg/* -*- c++ -*- */
201e04c3fSmrg/*
301e04c3fSmrg * Copyright © 2010-2015 Intel Corporation
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2201e04c3fSmrg * IN THE SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg#ifndef BRW_VEC4_BUILDER_H
2601e04c3fSmrg#define BRW_VEC4_BUILDER_H
2701e04c3fSmrg
2801e04c3fSmrg#include "brw_ir_vec4.h"
2901e04c3fSmrg#include "brw_ir_allocator.h"
3001e04c3fSmrg
3101e04c3fSmrgnamespace brw {
3201e04c3fSmrg   /**
3301e04c3fSmrg    * Toolbox to assemble a VEC4 IR program out of individual instructions.
3401e04c3fSmrg    *
3501e04c3fSmrg    * This object is meant to have an interface consistent with
3601e04c3fSmrg    * brw::fs_builder.  They cannot be fully interchangeable because
3701e04c3fSmrg    * brw::fs_builder generates scalar code while brw::vec4_builder generates
3801e04c3fSmrg    * vector code.
3901e04c3fSmrg    */
4001e04c3fSmrg   class vec4_builder {
4101e04c3fSmrg   public:
4201e04c3fSmrg      /** Type used in this IR to represent a source of an instruction. */
4301e04c3fSmrg      typedef brw::src_reg src_reg;
4401e04c3fSmrg
4501e04c3fSmrg      /** Type used in this IR to represent the destination of an instruction. */
4601e04c3fSmrg      typedef brw::dst_reg dst_reg;
4701e04c3fSmrg
4801e04c3fSmrg      /** Type used in this IR to represent an instruction. */
4901e04c3fSmrg      typedef vec4_instruction instruction;
5001e04c3fSmrg
5101e04c3fSmrg      /**
5201e04c3fSmrg       * Construct a vec4_builder that inserts instructions into \p shader.
5301e04c3fSmrg       */
5401e04c3fSmrg      vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) :
5501e04c3fSmrg         shader(shader), block(NULL), cursor(NULL),
5601e04c3fSmrg         _dispatch_width(dispatch_width), _group(0),
5701e04c3fSmrg         force_writemask_all(false),
5801e04c3fSmrg         annotation()
5901e04c3fSmrg      {
6001e04c3fSmrg      }
6101e04c3fSmrg
6201e04c3fSmrg      /**
6301e04c3fSmrg       * Construct a vec4_builder that inserts instructions into \p shader
6401e04c3fSmrg       * before instruction \p inst in basic block \p block.  The default
6501e04c3fSmrg       * execution controls and debug annotation are initialized from the
6601e04c3fSmrg       * instruction passed as argument.
6701e04c3fSmrg       */
6801e04c3fSmrg      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
6901e04c3fSmrg         shader(shader), block(block), cursor(inst),
7001e04c3fSmrg         _dispatch_width(inst->exec_size), _group(inst->group),
7101e04c3fSmrg         force_writemask_all(inst->force_writemask_all)
7201e04c3fSmrg      {
7301e04c3fSmrg         annotation.str = inst->annotation;
7401e04c3fSmrg         annotation.ir = inst->ir;
7501e04c3fSmrg      }
7601e04c3fSmrg
7701e04c3fSmrg      /**
7801e04c3fSmrg       * Construct a vec4_builder that inserts instructions before \p cursor
7901e04c3fSmrg       * in basic block \p block, inheriting other code generation parameters
8001e04c3fSmrg       * from this.
8101e04c3fSmrg       */
8201e04c3fSmrg      vec4_builder
8301e04c3fSmrg      at(bblock_t *block, exec_node *cursor) const
8401e04c3fSmrg      {
8501e04c3fSmrg         vec4_builder bld = *this;
8601e04c3fSmrg         bld.block = block;
8701e04c3fSmrg         bld.cursor = cursor;
8801e04c3fSmrg         return bld;
8901e04c3fSmrg      }
9001e04c3fSmrg
9101e04c3fSmrg      /**
9201e04c3fSmrg       * Construct a vec4_builder appending instructions at the end of the
9301e04c3fSmrg       * instruction list of the shader, inheriting other code generation
9401e04c3fSmrg       * parameters from this.
9501e04c3fSmrg       */
9601e04c3fSmrg      vec4_builder
9701e04c3fSmrg      at_end() const
9801e04c3fSmrg      {
9901e04c3fSmrg         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
10001e04c3fSmrg      }
10101e04c3fSmrg
10201e04c3fSmrg      /**
10301e04c3fSmrg       * Construct a builder specifying the default SIMD width and group of
10401e04c3fSmrg       * channel enable signals, inheriting other code generation parameters
10501e04c3fSmrg       * from this.
10601e04c3fSmrg       *
10701e04c3fSmrg       * \p n gives the default SIMD width, \p i gives the slot group used for
10801e04c3fSmrg       * predication and control flow masking in multiples of \p n channels.
10901e04c3fSmrg       */
11001e04c3fSmrg      vec4_builder
11101e04c3fSmrg      group(unsigned n, unsigned i) const
11201e04c3fSmrg      {
11301e04c3fSmrg         assert(force_writemask_all ||
11401e04c3fSmrg                (n <= dispatch_width() && i < dispatch_width() / n));
11501e04c3fSmrg         vec4_builder bld = *this;
11601e04c3fSmrg         bld._dispatch_width = n;
11701e04c3fSmrg         bld._group += i * n;
11801e04c3fSmrg         return bld;
11901e04c3fSmrg      }
12001e04c3fSmrg
12101e04c3fSmrg      /**
12201e04c3fSmrg       * Construct a builder with per-channel control flow execution masking
12301e04c3fSmrg       * disabled if \p b is true.  If control flow execution masking is
12401e04c3fSmrg       * already disabled this has no effect.
12501e04c3fSmrg       */
12601e04c3fSmrg      vec4_builder
12701e04c3fSmrg      exec_all(bool b = true) const
12801e04c3fSmrg      {
12901e04c3fSmrg         vec4_builder bld = *this;
13001e04c3fSmrg         if (b)
13101e04c3fSmrg            bld.force_writemask_all = true;
13201e04c3fSmrg         return bld;
13301e04c3fSmrg      }
13401e04c3fSmrg
13501e04c3fSmrg      /**
13601e04c3fSmrg       * Construct a builder with the given debug annotation info.
13701e04c3fSmrg       */
13801e04c3fSmrg      vec4_builder
13901e04c3fSmrg      annotate(const char *str, const void *ir = NULL) const
14001e04c3fSmrg      {
14101e04c3fSmrg         vec4_builder bld = *this;
14201e04c3fSmrg         bld.annotation.str = str;
14301e04c3fSmrg         bld.annotation.ir = ir;
14401e04c3fSmrg         return bld;
14501e04c3fSmrg      }
14601e04c3fSmrg
14701e04c3fSmrg      /**
14801e04c3fSmrg       * Get the SIMD width in use.
14901e04c3fSmrg       */
15001e04c3fSmrg      unsigned
15101e04c3fSmrg      dispatch_width() const
15201e04c3fSmrg      {
15301e04c3fSmrg         return _dispatch_width;
15401e04c3fSmrg      }
15501e04c3fSmrg
15601e04c3fSmrg      /**
15701e04c3fSmrg       * Get the channel group in use.
15801e04c3fSmrg       */
15901e04c3fSmrg      unsigned
16001e04c3fSmrg      group() const
16101e04c3fSmrg      {
16201e04c3fSmrg         return _group;
16301e04c3fSmrg      }
16401e04c3fSmrg
16501e04c3fSmrg      /**
16601e04c3fSmrg       * Allocate a virtual register of natural vector size (four for this IR)
16701e04c3fSmrg       * and SIMD width.  \p n gives the amount of space to allocate in
16801e04c3fSmrg       * dispatch_width units (which is just enough space for four logical
16901e04c3fSmrg       * components in this IR).
17001e04c3fSmrg       */
17101e04c3fSmrg      dst_reg
17201e04c3fSmrg      vgrf(enum brw_reg_type type, unsigned n = 1) const
17301e04c3fSmrg      {
17401e04c3fSmrg         assert(dispatch_width() <= 32);
17501e04c3fSmrg
17601e04c3fSmrg         if (n > 0)
17701e04c3fSmrg            return retype(dst_reg(VGRF, shader->alloc.allocate(
17801e04c3fSmrg                                     n * DIV_ROUND_UP(type_sz(type), 4))),
17901e04c3fSmrg                           type);
18001e04c3fSmrg         else
18101e04c3fSmrg            return retype(null_reg_ud(), type);
18201e04c3fSmrg      }
18301e04c3fSmrg
18401e04c3fSmrg      /**
18501e04c3fSmrg       * Create a null register of floating type.
18601e04c3fSmrg       */
18701e04c3fSmrg      dst_reg
18801e04c3fSmrg      null_reg_f() const
18901e04c3fSmrg      {
19001e04c3fSmrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
19101e04c3fSmrg                               BRW_REGISTER_TYPE_F));
19201e04c3fSmrg      }
19301e04c3fSmrg
19401e04c3fSmrg      /**
19501e04c3fSmrg       * Create a null register of signed integer type.
19601e04c3fSmrg       */
19701e04c3fSmrg      dst_reg
19801e04c3fSmrg      null_reg_d() const
19901e04c3fSmrg      {
20001e04c3fSmrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
20101e04c3fSmrg                               BRW_REGISTER_TYPE_D));
20201e04c3fSmrg      }
20301e04c3fSmrg
20401e04c3fSmrg      /**
20501e04c3fSmrg       * Create a null register of unsigned integer type.
20601e04c3fSmrg       */
20701e04c3fSmrg      dst_reg
20801e04c3fSmrg      null_reg_ud() const
20901e04c3fSmrg      {
21001e04c3fSmrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
21101e04c3fSmrg                               BRW_REGISTER_TYPE_UD));
21201e04c3fSmrg      }
21301e04c3fSmrg
21401e04c3fSmrg      /**
21501e04c3fSmrg       * Insert an instruction into the program.
21601e04c3fSmrg       */
21701e04c3fSmrg      instruction *
21801e04c3fSmrg      emit(const instruction &inst) const
21901e04c3fSmrg      {
22001e04c3fSmrg         return emit(new(shader->mem_ctx) instruction(inst));
22101e04c3fSmrg      }
22201e04c3fSmrg
22301e04c3fSmrg      /**
22401e04c3fSmrg       * Create and insert a nullary control instruction into the program.
22501e04c3fSmrg       */
22601e04c3fSmrg      instruction *
22701e04c3fSmrg      emit(enum opcode opcode) const
22801e04c3fSmrg      {
22901e04c3fSmrg         return emit(instruction(opcode));
23001e04c3fSmrg      }
23101e04c3fSmrg
23201e04c3fSmrg      /**
23301e04c3fSmrg       * Create and insert a nullary instruction into the program.
23401e04c3fSmrg       */
23501e04c3fSmrg      instruction *
23601e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst) const
23701e04c3fSmrg      {
23801e04c3fSmrg         return emit(instruction(opcode, dst));
23901e04c3fSmrg      }
24001e04c3fSmrg
24101e04c3fSmrg      /**
24201e04c3fSmrg       * Create and insert a unary instruction into the program.
24301e04c3fSmrg       */
24401e04c3fSmrg      instruction *
24501e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
24601e04c3fSmrg      {
24701e04c3fSmrg         switch (opcode) {
24801e04c3fSmrg         case SHADER_OPCODE_RCP:
24901e04c3fSmrg         case SHADER_OPCODE_RSQ:
25001e04c3fSmrg         case SHADER_OPCODE_SQRT:
25101e04c3fSmrg         case SHADER_OPCODE_EXP2:
25201e04c3fSmrg         case SHADER_OPCODE_LOG2:
25301e04c3fSmrg         case SHADER_OPCODE_SIN:
25401e04c3fSmrg         case SHADER_OPCODE_COS:
25501e04c3fSmrg            return fix_math_instruction(
25601e04c3fSmrg               emit(instruction(opcode, dst,
25701e04c3fSmrg                                fix_math_operand(src0))));
25801e04c3fSmrg
25901e04c3fSmrg         default:
26001e04c3fSmrg            return emit(instruction(opcode, dst, src0));
26101e04c3fSmrg         }
26201e04c3fSmrg      }
26301e04c3fSmrg
26401e04c3fSmrg      /**
26501e04c3fSmrg       * Create and insert a binary instruction into the program.
26601e04c3fSmrg       */
26701e04c3fSmrg      instruction *
26801e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
26901e04c3fSmrg           const src_reg &src1) const
27001e04c3fSmrg      {
27101e04c3fSmrg         switch (opcode) {
27201e04c3fSmrg         case SHADER_OPCODE_POW:
27301e04c3fSmrg         case SHADER_OPCODE_INT_QUOTIENT:
27401e04c3fSmrg         case SHADER_OPCODE_INT_REMAINDER:
27501e04c3fSmrg            return fix_math_instruction(
27601e04c3fSmrg               emit(instruction(opcode, dst,
27701e04c3fSmrg                                fix_math_operand(src0),
27801e04c3fSmrg                                fix_math_operand(src1))));
27901e04c3fSmrg
28001e04c3fSmrg         default:
28101e04c3fSmrg            return emit(instruction(opcode, dst, src0, src1));
28201e04c3fSmrg         }
28301e04c3fSmrg      }
28401e04c3fSmrg
28501e04c3fSmrg      /**
28601e04c3fSmrg       * Create and insert a ternary instruction into the program.
28701e04c3fSmrg       */
28801e04c3fSmrg      instruction *
28901e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
29001e04c3fSmrg           const src_reg &src1, const src_reg &src2) const
29101e04c3fSmrg      {
29201e04c3fSmrg         switch (opcode) {
29301e04c3fSmrg         case BRW_OPCODE_BFE:
29401e04c3fSmrg         case BRW_OPCODE_BFI2:
29501e04c3fSmrg         case BRW_OPCODE_MAD:
29601e04c3fSmrg         case BRW_OPCODE_LRP:
29701e04c3fSmrg            return emit(instruction(opcode, dst,
29801e04c3fSmrg                                    fix_3src_operand(src0),
29901e04c3fSmrg                                    fix_3src_operand(src1),
30001e04c3fSmrg                                    fix_3src_operand(src2)));
30101e04c3fSmrg
30201e04c3fSmrg         default:
30301e04c3fSmrg            return emit(instruction(opcode, dst, src0, src1, src2));
30401e04c3fSmrg         }
30501e04c3fSmrg      }
30601e04c3fSmrg
30701e04c3fSmrg      /**
30801e04c3fSmrg       * Insert a preallocated instruction into the program.
30901e04c3fSmrg       */
31001e04c3fSmrg      instruction *
31101e04c3fSmrg      emit(instruction *inst) const
31201e04c3fSmrg      {
31301e04c3fSmrg         inst->exec_size = dispatch_width();
31401e04c3fSmrg         inst->group = group();
31501e04c3fSmrg         inst->force_writemask_all = force_writemask_all;
31601e04c3fSmrg         inst->size_written = inst->exec_size * type_sz(inst->dst.type);
31701e04c3fSmrg         inst->annotation = annotation.str;
31801e04c3fSmrg         inst->ir = annotation.ir;
31901e04c3fSmrg
32001e04c3fSmrg         if (block)
32101e04c3fSmrg            static_cast<instruction *>(cursor)->insert_before(block, inst);
32201e04c3fSmrg         else
32301e04c3fSmrg            cursor->insert_before(inst);
32401e04c3fSmrg
32501e04c3fSmrg         return inst;
32601e04c3fSmrg      }
32701e04c3fSmrg
32801e04c3fSmrg      /**
32901e04c3fSmrg       * Select \p src0 if the comparison of both sources with the given
33001e04c3fSmrg       * conditional mod evaluates to true, otherwise select \p src1.
33101e04c3fSmrg       *
33201e04c3fSmrg       * Generally useful to get the minimum or maximum of two values.
33301e04c3fSmrg       */
33401e04c3fSmrg      instruction *
33501e04c3fSmrg      emit_minmax(const dst_reg &dst, const src_reg &src0,
33601e04c3fSmrg                  const src_reg &src1, brw_conditional_mod mod) const
33701e04c3fSmrg      {
33801e04c3fSmrg         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
33901e04c3fSmrg
34001e04c3fSmrg         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
34101e04c3fSmrg                                     fix_unsigned_negate(src1)));
34201e04c3fSmrg      }
34301e04c3fSmrg
34401e04c3fSmrg      /**
34501e04c3fSmrg       * Copy any live channel from \p src to the first channel of the result.
34601e04c3fSmrg       */
34701e04c3fSmrg      src_reg
34801e04c3fSmrg      emit_uniformize(const src_reg &src) const
34901e04c3fSmrg      {
35001e04c3fSmrg         const vec4_builder ubld = exec_all();
35101e04c3fSmrg         const dst_reg chan_index =
35201e04c3fSmrg            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
35301e04c3fSmrg         const dst_reg dst = vgrf(src.type);
35401e04c3fSmrg
35501e04c3fSmrg         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
35601e04c3fSmrg         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
35701e04c3fSmrg
35801e04c3fSmrg         return src_reg(dst);
35901e04c3fSmrg      }
36001e04c3fSmrg
36101e04c3fSmrg      /**
36201e04c3fSmrg       * Assorted arithmetic ops.
36301e04c3fSmrg       * @{
36401e04c3fSmrg       */
36501e04c3fSmrg#define ALU1(op)                                        \
36601e04c3fSmrg      instruction *                                     \
36701e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0) const \
36801e04c3fSmrg      {                                                 \
36901e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0);       \
37001e04c3fSmrg      }
37101e04c3fSmrg
37201e04c3fSmrg#define ALU2(op)                                                        \
37301e04c3fSmrg      instruction *                                                     \
37401e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
37501e04c3fSmrg      {                                                                 \
37601e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
37701e04c3fSmrg      }
37801e04c3fSmrg
37901e04c3fSmrg#define ALU2_ACC(op)                                                    \
38001e04c3fSmrg      instruction *                                                     \
38101e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
38201e04c3fSmrg      {                                                                 \
38301e04c3fSmrg         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
38401e04c3fSmrg         inst->writes_accumulator = true;                               \
38501e04c3fSmrg         return inst;                                                   \
38601e04c3fSmrg      }
38701e04c3fSmrg
38801e04c3fSmrg#define ALU3(op)                                                        \
38901e04c3fSmrg      instruction *                                                     \
39001e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
39101e04c3fSmrg         const src_reg &src2) const                                     \
39201e04c3fSmrg      {                                                                 \
39301e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
39401e04c3fSmrg      }
39501e04c3fSmrg
39601e04c3fSmrg      ALU2(ADD)
39701e04c3fSmrg      ALU2_ACC(ADDC)
39801e04c3fSmrg      ALU2(AND)
39901e04c3fSmrg      ALU2(ASR)
40001e04c3fSmrg      ALU2(AVG)
40101e04c3fSmrg      ALU3(BFE)
40201e04c3fSmrg      ALU2(BFI1)
40301e04c3fSmrg      ALU3(BFI2)
40401e04c3fSmrg      ALU1(BFREV)
40501e04c3fSmrg      ALU1(CBIT)
40601e04c3fSmrg      ALU3(CSEL)
40701e04c3fSmrg      ALU1(DIM)
40801e04c3fSmrg      ALU2(DP2)
40901e04c3fSmrg      ALU2(DP3)
41001e04c3fSmrg      ALU2(DP4)
41101e04c3fSmrg      ALU2(DPH)
41201e04c3fSmrg      ALU1(F16TO32)
41301e04c3fSmrg      ALU1(F32TO16)
41401e04c3fSmrg      ALU1(FBH)
41501e04c3fSmrg      ALU1(FBL)
41601e04c3fSmrg      ALU1(FRC)
41701e04c3fSmrg      ALU2(LINE)
41801e04c3fSmrg      ALU1(LZD)
41901e04c3fSmrg      ALU2(MAC)
42001e04c3fSmrg      ALU2_ACC(MACH)
42101e04c3fSmrg      ALU3(MAD)
42201e04c3fSmrg      ALU1(MOV)
42301e04c3fSmrg      ALU2(MUL)
42401e04c3fSmrg      ALU1(NOT)
42501e04c3fSmrg      ALU2(OR)
42601e04c3fSmrg      ALU2(PLN)
42701e04c3fSmrg      ALU1(RNDD)
42801e04c3fSmrg      ALU1(RNDE)
42901e04c3fSmrg      ALU1(RNDU)
43001e04c3fSmrg      ALU1(RNDZ)
43101e04c3fSmrg      ALU2(SAD2)
43201e04c3fSmrg      ALU2_ACC(SADA2)
43301e04c3fSmrg      ALU2(SEL)
43401e04c3fSmrg      ALU2(SHL)
43501e04c3fSmrg      ALU2(SHR)
43601e04c3fSmrg      ALU2_ACC(SUBB)
43701e04c3fSmrg      ALU2(XOR)
43801e04c3fSmrg
43901e04c3fSmrg#undef ALU3
44001e04c3fSmrg#undef ALU2_ACC
44101e04c3fSmrg#undef ALU2
44201e04c3fSmrg#undef ALU1
44301e04c3fSmrg      /** @} */
44401e04c3fSmrg
44501e04c3fSmrg      /**
44601e04c3fSmrg       * CMP: Sets the low bit of the destination channels with the result
44701e04c3fSmrg       * of the comparison, while the upper bits are undefined, and updates
44801e04c3fSmrg       * the flag register with the packed 16 bits of the result.
44901e04c3fSmrg       */
45001e04c3fSmrg      instruction *
45101e04c3fSmrg      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
45201e04c3fSmrg          brw_conditional_mod condition) const
45301e04c3fSmrg      {
45401e04c3fSmrg         /* Take the instruction:
45501e04c3fSmrg          *
45601e04c3fSmrg          * CMP null<d> src0<f> src1<f>
45701e04c3fSmrg          *
4587ec681f3Smrg          * Original gfx4 does type conversion to the destination type
45901e04c3fSmrg          * before comparison, producing garbage results for floating
46001e04c3fSmrg          * point comparisons.
46101e04c3fSmrg          *
46201e04c3fSmrg          * The destination type doesn't matter on newer generations,
46301e04c3fSmrg          * so we set the type to match src0 so we can compact the
46401e04c3fSmrg          * instruction.
46501e04c3fSmrg          */
46601e04c3fSmrg         return set_condmod(condition,
46701e04c3fSmrg                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
46801e04c3fSmrg                                 fix_unsigned_negate(src0),
46901e04c3fSmrg                                 fix_unsigned_negate(src1)));
47001e04c3fSmrg      }
47101e04c3fSmrg
47201e04c3fSmrg      /**
4737ec681f3Smrg       * CMPN: Behaves like CMP, but produces true if src1 is NaN.
4747ec681f3Smrg       */
4757ec681f3Smrg      instruction *
4767ec681f3Smrg      CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
4777ec681f3Smrg          brw_conditional_mod condition) const
4787ec681f3Smrg      {
4797ec681f3Smrg         /* Take the instruction:
4807ec681f3Smrg          *
4817ec681f3Smrg          * CMPN null<d> src0<f> src1<f>
4827ec681f3Smrg          *
4837ec681f3Smrg          * Original gfx4 does type conversion to the destination type
4847ec681f3Smrg          * before comparison, producing garbage results for floating
4857ec681f3Smrg          * point comparisons.
4867ec681f3Smrg          *
4877ec681f3Smrg          * The destination type doesn't matter on newer generations,
4887ec681f3Smrg          * so we set the type to match src0 so we can compact the
4897ec681f3Smrg          * instruction.
4907ec681f3Smrg          */
4917ec681f3Smrg         return set_condmod(condition,
4927ec681f3Smrg                            emit(BRW_OPCODE_CMPN, retype(dst, src0.type),
4937ec681f3Smrg                                 fix_unsigned_negate(src0),
4947ec681f3Smrg                                 fix_unsigned_negate(src1)));
4957ec681f3Smrg      }
4967ec681f3Smrg
4977ec681f3Smrg      /**
4987ec681f3Smrg       * Gfx4 predicated IF.
49901e04c3fSmrg       */
50001e04c3fSmrg      instruction *
50101e04c3fSmrg      IF(brw_predicate predicate) const
50201e04c3fSmrg      {
50301e04c3fSmrg         return set_predicate(predicate, emit(BRW_OPCODE_IF));
50401e04c3fSmrg      }
50501e04c3fSmrg
50601e04c3fSmrg      /**
5077ec681f3Smrg       * Gfx6 IF with embedded comparison.
50801e04c3fSmrg       */
50901e04c3fSmrg      instruction *
51001e04c3fSmrg      IF(const src_reg &src0, const src_reg &src1,
51101e04c3fSmrg         brw_conditional_mod condition) const
51201e04c3fSmrg      {
5137ec681f3Smrg         assert(shader->devinfo->ver == 6);
51401e04c3fSmrg         return set_condmod(condition,
51501e04c3fSmrg                            emit(BRW_OPCODE_IF,
51601e04c3fSmrg                                 null_reg_d(),
51701e04c3fSmrg                                 fix_unsigned_negate(src0),
51801e04c3fSmrg                                 fix_unsigned_negate(src1)));
51901e04c3fSmrg      }
52001e04c3fSmrg
52101e04c3fSmrg      /**
52201e04c3fSmrg       * Emit a linear interpolation instruction.
52301e04c3fSmrg       */
52401e04c3fSmrg      instruction *
52501e04c3fSmrg      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
52601e04c3fSmrg          const src_reg &a) const
52701e04c3fSmrg      {
5287ec681f3Smrg         /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
5297ec681f3Smrg          * we need to reorder the operands.
5307ec681f3Smrg          */
5317ec681f3Smrg         assert(shader->devinfo->ver >= 6 && shader->devinfo->ver <= 9);
5327ec681f3Smrg         return emit(BRW_OPCODE_LRP, dst, a, y, x);
53301e04c3fSmrg      }
53401e04c3fSmrg
53501e04c3fSmrg      backend_shader *shader;
53601e04c3fSmrg
53701e04c3fSmrg   protected:
53801e04c3fSmrg      /**
53901e04c3fSmrg       * Workaround for negation of UD registers.  See comment in
54001e04c3fSmrg       * fs_generator::generate_code() for the details.
54101e04c3fSmrg       */
54201e04c3fSmrg      src_reg
54301e04c3fSmrg      fix_unsigned_negate(const src_reg &src) const
54401e04c3fSmrg      {
54501e04c3fSmrg         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
54601e04c3fSmrg            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
54701e04c3fSmrg            MOV(temp, src);
54801e04c3fSmrg            return src_reg(temp);
54901e04c3fSmrg         } else {
55001e04c3fSmrg            return src;
55101e04c3fSmrg         }
55201e04c3fSmrg      }
55301e04c3fSmrg
55401e04c3fSmrg      /**
55501e04c3fSmrg       * Workaround for register access modes not supported by the ternary
55601e04c3fSmrg       * instruction encoding.
55701e04c3fSmrg       */
55801e04c3fSmrg      src_reg
55901e04c3fSmrg      fix_3src_operand(const src_reg &src) const
56001e04c3fSmrg      {
56101e04c3fSmrg         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
56201e04c3fSmrg          * able to use vertical stride of zero to replicate the vec4 uniform, like
56301e04c3fSmrg          *
56401e04c3fSmrg          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
56501e04c3fSmrg          *
56601e04c3fSmrg          * But you can't, since vertical stride is always four in three-source
56701e04c3fSmrg          * instructions. Instead, insert a MOV instruction to do the replication so
56801e04c3fSmrg          * that the three-source instruction can consume it.
56901e04c3fSmrg          */
57001e04c3fSmrg
57101e04c3fSmrg         /* The MOV is only needed if the source is a uniform or immediate. */
57201e04c3fSmrg         if (src.file != UNIFORM && src.file != IMM)
57301e04c3fSmrg            return src;
57401e04c3fSmrg
57501e04c3fSmrg         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
57601e04c3fSmrg            return src;
57701e04c3fSmrg
57801e04c3fSmrg         const dst_reg expanded = vgrf(src.type);
57901e04c3fSmrg         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
58001e04c3fSmrg         return src_reg(expanded);
58101e04c3fSmrg      }
58201e04c3fSmrg
58301e04c3fSmrg      /**
58401e04c3fSmrg       * Workaround for register access modes not supported by the math
58501e04c3fSmrg       * instruction.
58601e04c3fSmrg       */
58701e04c3fSmrg      src_reg
58801e04c3fSmrg      fix_math_operand(const src_reg &src) const
58901e04c3fSmrg      {
5907ec681f3Smrg         /* The gfx6 math instruction ignores the source modifiers --
59101e04c3fSmrg          * swizzle, abs, negate, and at least some parts of the register
59201e04c3fSmrg          * region description.
59301e04c3fSmrg          *
59401e04c3fSmrg          * Rather than trying to enumerate all these cases, *always* expand the
5957ec681f3Smrg          * operand to a temp GRF for gfx6.
59601e04c3fSmrg          *
5977ec681f3Smrg          * For gfx7, keep the operand as-is, except if immediate, which gfx7 still
59801e04c3fSmrg          * can't use.
59901e04c3fSmrg          */
6007ec681f3Smrg         if (shader->devinfo->ver == 6 ||
6017ec681f3Smrg             (shader->devinfo->ver == 7 && src.file == IMM)) {
60201e04c3fSmrg            const dst_reg tmp = vgrf(src.type);
60301e04c3fSmrg            MOV(tmp, src);
60401e04c3fSmrg            return src_reg(tmp);
60501e04c3fSmrg         } else {
60601e04c3fSmrg            return src;
60701e04c3fSmrg         }
60801e04c3fSmrg      }
60901e04c3fSmrg
61001e04c3fSmrg      /**
61101e04c3fSmrg       * Workaround other weirdness of the math instruction.
61201e04c3fSmrg       */
61301e04c3fSmrg      instruction *
61401e04c3fSmrg      fix_math_instruction(instruction *inst) const
61501e04c3fSmrg      {
6167ec681f3Smrg         if (shader->devinfo->ver == 6 &&
61701e04c3fSmrg             inst->dst.writemask != WRITEMASK_XYZW) {
61801e04c3fSmrg            const dst_reg tmp = vgrf(inst->dst.type);
61901e04c3fSmrg            MOV(inst->dst, src_reg(tmp));
62001e04c3fSmrg            inst->dst = tmp;
62101e04c3fSmrg
6227ec681f3Smrg         } else if (shader->devinfo->ver < 6) {
62301e04c3fSmrg            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
62401e04c3fSmrg            inst->base_mrf = 1;
62501e04c3fSmrg            inst->mlen = sources;
62601e04c3fSmrg         }
62701e04c3fSmrg
62801e04c3fSmrg         return inst;
62901e04c3fSmrg      }
63001e04c3fSmrg
63101e04c3fSmrg      bblock_t *block;
63201e04c3fSmrg      exec_node *cursor;
63301e04c3fSmrg
63401e04c3fSmrg      unsigned _dispatch_width;
63501e04c3fSmrg      unsigned _group;
63601e04c3fSmrg      bool force_writemask_all;
63701e04c3fSmrg
63801e04c3fSmrg      /** Debug annotation info. */
63901e04c3fSmrg      struct {
64001e04c3fSmrg         const char *str;
64101e04c3fSmrg         const void *ir;
64201e04c3fSmrg      } annotation;
64301e04c3fSmrg   };
64401e04c3fSmrg}
64501e04c3fSmrg
64601e04c3fSmrg#endif
647