101e04c3fSmrg/* -*- c++ -*- */
201e04c3fSmrg/*
301e04c3fSmrg * Copyright © 2010-2015 Intel Corporation
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2201e04c3fSmrg * IN THE SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg#ifndef BRW_FS_BUILDER_H
2601e04c3fSmrg#define BRW_FS_BUILDER_H
2701e04c3fSmrg
2801e04c3fSmrg#include "brw_ir_fs.h"
2901e04c3fSmrg#include "brw_shader.h"
3001e04c3fSmrg
3101e04c3fSmrgnamespace brw {
3201e04c3fSmrg   /**
3301e04c3fSmrg    * Toolbox to assemble an FS IR program out of individual instructions.
3401e04c3fSmrg    *
3501e04c3fSmrg    * This object is meant to have an interface consistent with
3601e04c3fSmrg    * brw::vec4_builder.  They cannot be fully interchangeable because
3701e04c3fSmrg    * brw::fs_builder generates scalar code while brw::vec4_builder generates
3801e04c3fSmrg    * vector code.
3901e04c3fSmrg    */
4001e04c3fSmrg   class fs_builder {
4101e04c3fSmrg   public:
4201e04c3fSmrg      /** Type used in this IR to represent a source of an instruction. */
4301e04c3fSmrg      typedef fs_reg src_reg;
4401e04c3fSmrg
4501e04c3fSmrg      /** Type used in this IR to represent the destination of an instruction. */
4601e04c3fSmrg      typedef fs_reg dst_reg;
4701e04c3fSmrg
4801e04c3fSmrg      /** Type used in this IR to represent an instruction. */
4901e04c3fSmrg      typedef fs_inst instruction;
5001e04c3fSmrg
5101e04c3fSmrg      /**
5201e04c3fSmrg       * Construct an fs_builder that inserts instructions into \p shader.
5301e04c3fSmrg       * \p dispatch_width gives the native execution width of the program.
5401e04c3fSmrg       */
5501e04c3fSmrg      fs_builder(backend_shader *shader,
5601e04c3fSmrg                 unsigned dispatch_width) :
5701e04c3fSmrg         shader(shader), block(NULL), cursor(NULL),
5801e04c3fSmrg         _dispatch_width(dispatch_width),
5901e04c3fSmrg         _group(0),
6001e04c3fSmrg         force_writemask_all(false),
6101e04c3fSmrg         annotation()
6201e04c3fSmrg      {
6301e04c3fSmrg      }
6401e04c3fSmrg
6501e04c3fSmrg      /**
6601e04c3fSmrg       * Construct an fs_builder that inserts instructions into \p shader
6701e04c3fSmrg       * before instruction \p inst in basic block \p block.  The default
6801e04c3fSmrg       * execution controls and debug annotation are initialized from the
6901e04c3fSmrg       * instruction passed as argument.
7001e04c3fSmrg       */
7101e04c3fSmrg      fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
7201e04c3fSmrg         shader(shader), block(block), cursor(inst),
7301e04c3fSmrg         _dispatch_width(inst->exec_size),
7401e04c3fSmrg         _group(inst->group),
7501e04c3fSmrg         force_writemask_all(inst->force_writemask_all)
7601e04c3fSmrg      {
7701e04c3fSmrg         annotation.str = inst->annotation;
7801e04c3fSmrg         annotation.ir = inst->ir;
7901e04c3fSmrg      }
8001e04c3fSmrg
8101e04c3fSmrg      /**
8201e04c3fSmrg       * Construct an fs_builder that inserts instructions before \p cursor in
8301e04c3fSmrg       * basic block \p block, inheriting other code generation parameters
8401e04c3fSmrg       * from this.
8501e04c3fSmrg       */
8601e04c3fSmrg      fs_builder
8701e04c3fSmrg      at(bblock_t *block, exec_node *cursor) const
8801e04c3fSmrg      {
8901e04c3fSmrg         fs_builder bld = *this;
9001e04c3fSmrg         bld.block = block;
9101e04c3fSmrg         bld.cursor = cursor;
9201e04c3fSmrg         return bld;
9301e04c3fSmrg      }
9401e04c3fSmrg
9501e04c3fSmrg      /**
9601e04c3fSmrg       * Construct an fs_builder appending instructions at the end of the
9701e04c3fSmrg       * instruction list of the shader, inheriting other code generation
9801e04c3fSmrg       * parameters from this.
9901e04c3fSmrg       */
10001e04c3fSmrg      fs_builder
10101e04c3fSmrg      at_end() const
10201e04c3fSmrg      {
10301e04c3fSmrg         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
10401e04c3fSmrg      }
10501e04c3fSmrg
10601e04c3fSmrg      /**
10701e04c3fSmrg       * Construct a builder specifying the default SIMD width and group of
10801e04c3fSmrg       * channel enable signals, inheriting other code generation parameters
10901e04c3fSmrg       * from this.
11001e04c3fSmrg       *
11101e04c3fSmrg       * \p n gives the default SIMD width, \p i gives the slot group used for
11201e04c3fSmrg       * predication and control flow masking in multiples of \p n channels.
11301e04c3fSmrg       */
11401e04c3fSmrg      fs_builder
11501e04c3fSmrg      group(unsigned n, unsigned i) const
11601e04c3fSmrg      {
11701e04c3fSmrg         fs_builder bld = *this;
1189f464c52Smaya
1199f464c52Smaya         if (n <= dispatch_width() && i < dispatch_width() / n) {
1209f464c52Smaya            bld._group += i * n;
1219f464c52Smaya         } else {
1229f464c52Smaya            /* The requested channel group isn't a subset of the channel group
1239f464c52Smaya             * of this builder, which means that the resulting instructions
1249f464c52Smaya             * would use (potentially undefined) channel enable signals not
1259f464c52Smaya             * specified by the parent builder.  That's only valid if the
1269f464c52Smaya             * instruction doesn't have per-channel semantics, in which case
1279f464c52Smaya             * we should clear off the default group index in order to prevent
1289f464c52Smaya             * emitting instructions with channel group not aligned to their
1299f464c52Smaya             * own execution size.
1309f464c52Smaya             */
1319f464c52Smaya            assert(force_writemask_all);
1329f464c52Smaya            bld._group = 0;
1339f464c52Smaya         }
1349f464c52Smaya
13501e04c3fSmrg         bld._dispatch_width = n;
13601e04c3fSmrg         return bld;
13701e04c3fSmrg      }
13801e04c3fSmrg
13901e04c3fSmrg      /**
14001e04c3fSmrg       * Alias for group() with width equal to eight.
14101e04c3fSmrg       */
14201e04c3fSmrg      fs_builder
1437ec681f3Smrg      quarter(unsigned i) const
14401e04c3fSmrg      {
14501e04c3fSmrg         return group(8, i);
14601e04c3fSmrg      }
14701e04c3fSmrg
14801e04c3fSmrg      /**
14901e04c3fSmrg       * Construct a builder with per-channel control flow execution masking
15001e04c3fSmrg       * disabled if \p b is true.  If control flow execution masking is
15101e04c3fSmrg       * already disabled this has no effect.
15201e04c3fSmrg       */
15301e04c3fSmrg      fs_builder
15401e04c3fSmrg      exec_all(bool b = true) const
15501e04c3fSmrg      {
15601e04c3fSmrg         fs_builder bld = *this;
15701e04c3fSmrg         if (b)
15801e04c3fSmrg            bld.force_writemask_all = true;
15901e04c3fSmrg         return bld;
16001e04c3fSmrg      }
16101e04c3fSmrg
16201e04c3fSmrg      /**
16301e04c3fSmrg       * Construct a builder with the given debug annotation info.
16401e04c3fSmrg       */
16501e04c3fSmrg      fs_builder
16601e04c3fSmrg      annotate(const char *str, const void *ir = NULL) const
16701e04c3fSmrg      {
16801e04c3fSmrg         fs_builder bld = *this;
16901e04c3fSmrg         bld.annotation.str = str;
17001e04c3fSmrg         bld.annotation.ir = ir;
17101e04c3fSmrg         return bld;
17201e04c3fSmrg      }
17301e04c3fSmrg
17401e04c3fSmrg      /**
17501e04c3fSmrg       * Get the SIMD width in use.
17601e04c3fSmrg       */
17701e04c3fSmrg      unsigned
17801e04c3fSmrg      dispatch_width() const
17901e04c3fSmrg      {
18001e04c3fSmrg         return _dispatch_width;
18101e04c3fSmrg      }
18201e04c3fSmrg
18301e04c3fSmrg      /**
18401e04c3fSmrg       * Get the channel group in use.
18501e04c3fSmrg       */
18601e04c3fSmrg      unsigned
18701e04c3fSmrg      group() const
18801e04c3fSmrg      {
18901e04c3fSmrg         return _group;
19001e04c3fSmrg      }
19101e04c3fSmrg
19201e04c3fSmrg      /**
19301e04c3fSmrg       * Allocate a virtual register of natural vector size (one for this IR)
19401e04c3fSmrg       * and SIMD width.  \p n gives the amount of space to allocate in
19501e04c3fSmrg       * dispatch_width units (which is just enough space for one logical
19601e04c3fSmrg       * component in this IR).
19701e04c3fSmrg       */
19801e04c3fSmrg      dst_reg
19901e04c3fSmrg      vgrf(enum brw_reg_type type, unsigned n = 1) const
20001e04c3fSmrg      {
20101e04c3fSmrg         assert(dispatch_width() <= 32);
20201e04c3fSmrg
20301e04c3fSmrg         if (n > 0)
20401e04c3fSmrg            return dst_reg(VGRF, shader->alloc.allocate(
20501e04c3fSmrg                              DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
20601e04c3fSmrg                                           REG_SIZE)),
20701e04c3fSmrg                           type);
20801e04c3fSmrg         else
20901e04c3fSmrg            return retype(null_reg_ud(), type);
21001e04c3fSmrg      }
21101e04c3fSmrg
21201e04c3fSmrg      /**
21301e04c3fSmrg       * Create a null register of floating type.
21401e04c3fSmrg       */
21501e04c3fSmrg      dst_reg
21601e04c3fSmrg      null_reg_f() const
21701e04c3fSmrg      {
21801e04c3fSmrg         return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_F));
21901e04c3fSmrg      }
22001e04c3fSmrg
22101e04c3fSmrg      dst_reg
22201e04c3fSmrg      null_reg_df() const
22301e04c3fSmrg      {
22401e04c3fSmrg         return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
22501e04c3fSmrg      }
22601e04c3fSmrg
22701e04c3fSmrg      /**
22801e04c3fSmrg       * Create a null register of signed integer type.
22901e04c3fSmrg       */
23001e04c3fSmrg      dst_reg
23101e04c3fSmrg      null_reg_d() const
23201e04c3fSmrg      {
23301e04c3fSmrg         return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
23401e04c3fSmrg      }
23501e04c3fSmrg
23601e04c3fSmrg      /**
23701e04c3fSmrg       * Create a null register of unsigned integer type.
23801e04c3fSmrg       */
23901e04c3fSmrg      dst_reg
24001e04c3fSmrg      null_reg_ud() const
24101e04c3fSmrg      {
24201e04c3fSmrg         return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
24301e04c3fSmrg      }
24401e04c3fSmrg
24501e04c3fSmrg      /**
24601e04c3fSmrg       * Insert an instruction into the program.
24701e04c3fSmrg       */
24801e04c3fSmrg      instruction *
24901e04c3fSmrg      emit(const instruction &inst) const
25001e04c3fSmrg      {
25101e04c3fSmrg         return emit(new(shader->mem_ctx) instruction(inst));
25201e04c3fSmrg      }
25301e04c3fSmrg
25401e04c3fSmrg      /**
25501e04c3fSmrg       * Create and insert a nullary control instruction into the program.
25601e04c3fSmrg       */
25701e04c3fSmrg      instruction *
25801e04c3fSmrg      emit(enum opcode opcode) const
25901e04c3fSmrg      {
26001e04c3fSmrg         return emit(instruction(opcode, dispatch_width()));
26101e04c3fSmrg      }
26201e04c3fSmrg
26301e04c3fSmrg      /**
26401e04c3fSmrg       * Create and insert a nullary instruction into the program.
26501e04c3fSmrg       */
26601e04c3fSmrg      instruction *
26701e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst) const
26801e04c3fSmrg      {
26901e04c3fSmrg         return emit(instruction(opcode, dispatch_width(), dst));
27001e04c3fSmrg      }
27101e04c3fSmrg
27201e04c3fSmrg      /**
27301e04c3fSmrg       * Create and insert a unary instruction into the program.
27401e04c3fSmrg       */
27501e04c3fSmrg      instruction *
27601e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
27701e04c3fSmrg      {
27801e04c3fSmrg         switch (opcode) {
27901e04c3fSmrg         case SHADER_OPCODE_RCP:
28001e04c3fSmrg         case SHADER_OPCODE_RSQ:
28101e04c3fSmrg         case SHADER_OPCODE_SQRT:
28201e04c3fSmrg         case SHADER_OPCODE_EXP2:
28301e04c3fSmrg         case SHADER_OPCODE_LOG2:
28401e04c3fSmrg         case SHADER_OPCODE_SIN:
28501e04c3fSmrg         case SHADER_OPCODE_COS:
28601e04c3fSmrg            return emit(instruction(opcode, dispatch_width(), dst,
28701e04c3fSmrg                                    fix_math_operand(src0)));
28801e04c3fSmrg
28901e04c3fSmrg         default:
29001e04c3fSmrg            return emit(instruction(opcode, dispatch_width(), dst, src0));
29101e04c3fSmrg         }
29201e04c3fSmrg      }
29301e04c3fSmrg
29401e04c3fSmrg      /**
29501e04c3fSmrg       * Create and insert a binary instruction into the program.
29601e04c3fSmrg       */
29701e04c3fSmrg      instruction *
29801e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
29901e04c3fSmrg           const src_reg &src1) const
30001e04c3fSmrg      {
30101e04c3fSmrg         switch (opcode) {
30201e04c3fSmrg         case SHADER_OPCODE_POW:
30301e04c3fSmrg         case SHADER_OPCODE_INT_QUOTIENT:
30401e04c3fSmrg         case SHADER_OPCODE_INT_REMAINDER:
30501e04c3fSmrg            return emit(instruction(opcode, dispatch_width(), dst,
30601e04c3fSmrg                                    fix_math_operand(src0),
3077ec681f3Smrg                                    fix_math_operand(src1)));
30801e04c3fSmrg
30901e04c3fSmrg         default:
3109f464c52Smaya            return emit(instruction(opcode, dispatch_width(), dst,
3117ec681f3Smrg                                    src0, src1));
31201e04c3fSmrg
31301e04c3fSmrg         }
31401e04c3fSmrg      }
31501e04c3fSmrg
31601e04c3fSmrg      /**
31701e04c3fSmrg       * Create and insert a ternary instruction into the program.
31801e04c3fSmrg       */
31901e04c3fSmrg      instruction *
32001e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
32101e04c3fSmrg           const src_reg &src1, const src_reg &src2) const
32201e04c3fSmrg      {
32301e04c3fSmrg         switch (opcode) {
32401e04c3fSmrg         case BRW_OPCODE_BFE:
32501e04c3fSmrg         case BRW_OPCODE_BFI2:
32601e04c3fSmrg         case BRW_OPCODE_MAD:
32701e04c3fSmrg         case BRW_OPCODE_LRP:
32801e04c3fSmrg            return emit(instruction(opcode, dispatch_width(), dst,
32901e04c3fSmrg                                    fix_3src_operand(src0),
3307ec681f3Smrg                                    fix_3src_operand(src1),
3317ec681f3Smrg                                    fix_3src_operand(src2)));
33201e04c3fSmrg
33301e04c3fSmrg         default:
33401e04c3fSmrg            return emit(instruction(opcode, dispatch_width(), dst,
3357ec681f3Smrg                                    src0, src1, src2));
33601e04c3fSmrg         }
33701e04c3fSmrg      }
33801e04c3fSmrg
33901e04c3fSmrg      /**
34001e04c3fSmrg       * Create and insert an instruction with a variable number of sources
34101e04c3fSmrg       * into the program.
34201e04c3fSmrg       */
34301e04c3fSmrg      instruction *
34401e04c3fSmrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
34501e04c3fSmrg           unsigned n) const
34601e04c3fSmrg      {
3477ec681f3Smrg         /* Use the emit() methods for specific operand counts to ensure that
3487ec681f3Smrg          * opcode-specific operand fixups occur.
3497ec681f3Smrg          */
3507ec681f3Smrg         if (n == 2) {
3517ec681f3Smrg            return emit(opcode, dst, srcs[0], srcs[1]);
3527ec681f3Smrg         } else if (n == 3) {
3537ec681f3Smrg            return emit(opcode, dst, srcs[0], srcs[1], srcs[2]);
3547ec681f3Smrg         } else {
3557ec681f3Smrg            return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
3567ec681f3Smrg         }
35701e04c3fSmrg      }
35801e04c3fSmrg
35901e04c3fSmrg      /**
36001e04c3fSmrg       * Insert a preallocated instruction into the program.
36101e04c3fSmrg       */
36201e04c3fSmrg      instruction *
36301e04c3fSmrg      emit(instruction *inst) const
36401e04c3fSmrg      {
36501e04c3fSmrg         assert(inst->exec_size <= 32);
36601e04c3fSmrg         assert(inst->exec_size == dispatch_width() ||
36701e04c3fSmrg                force_writemask_all);
36801e04c3fSmrg
36901e04c3fSmrg         inst->group = _group;
37001e04c3fSmrg         inst->force_writemask_all = force_writemask_all;
37101e04c3fSmrg         inst->annotation = annotation.str;
37201e04c3fSmrg         inst->ir = annotation.ir;
37301e04c3fSmrg
37401e04c3fSmrg         if (block)
37501e04c3fSmrg            static_cast<instruction *>(cursor)->insert_before(block, inst);
37601e04c3fSmrg         else
37701e04c3fSmrg            cursor->insert_before(inst);
37801e04c3fSmrg
37901e04c3fSmrg         return inst;
38001e04c3fSmrg      }
38101e04c3fSmrg
38201e04c3fSmrg      /**
38301e04c3fSmrg       * Select \p src0 if the comparison of both sources with the given
38401e04c3fSmrg       * conditional mod evaluates to true, otherwise select \p src1.
38501e04c3fSmrg       *
38601e04c3fSmrg       * Generally useful to get the minimum or maximum of two values.
38701e04c3fSmrg       */
38801e04c3fSmrg      instruction *
38901e04c3fSmrg      emit_minmax(const dst_reg &dst, const src_reg &src0,
39001e04c3fSmrg                  const src_reg &src1, brw_conditional_mod mod) const
39101e04c3fSmrg      {
39201e04c3fSmrg         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
39301e04c3fSmrg
3949f464c52Smaya         /* In some cases we can't have bytes as operand for src1, so use the
3959f464c52Smaya          * same type for both operand.
3969f464c52Smaya          */
3977ec681f3Smrg         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
3987ec681f3Smrg                                     fix_unsigned_negate(src1)));
39901e04c3fSmrg      }
40001e04c3fSmrg
40101e04c3fSmrg      /**
40201e04c3fSmrg       * Copy any live channel from \p src to the first channel of the result.
40301e04c3fSmrg       */
40401e04c3fSmrg      src_reg
40501e04c3fSmrg      emit_uniformize(const src_reg &src) const
40601e04c3fSmrg      {
40701e04c3fSmrg         /* FIXME: We use a vector chan_index and dst to allow constant and
40801e04c3fSmrg          * copy propagration to move result all the way into the consuming
40901e04c3fSmrg          * instruction (typically a surface index or sampler index for a
41001e04c3fSmrg          * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
41101e04c3fSmrg          * dispatch. Once we teach const/copy propagation about scalars we
41201e04c3fSmrg          * should go back to scalar destinations here.
41301e04c3fSmrg          */
41401e04c3fSmrg         const fs_builder ubld = exec_all();
41501e04c3fSmrg         const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
41601e04c3fSmrg         const dst_reg dst = vgrf(src.type);
41701e04c3fSmrg
4187ec681f3Smrg         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
41901e04c3fSmrg         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
42001e04c3fSmrg
42101e04c3fSmrg         return src_reg(component(dst, 0));
42201e04c3fSmrg      }
42301e04c3fSmrg
4249f464c52Smaya      src_reg
4259f464c52Smaya      move_to_vgrf(const src_reg &src, unsigned num_components) const
4269f464c52Smaya      {
4279f464c52Smaya         src_reg *const src_comps = new src_reg[num_components];
4289f464c52Smaya         for (unsigned i = 0; i < num_components; i++)
4299f464c52Smaya            src_comps[i] = offset(src, dispatch_width(), i);
4309f464c52Smaya
4319f464c52Smaya         const dst_reg dst = vgrf(src.type, num_components);
4329f464c52Smaya         LOAD_PAYLOAD(dst, src_comps, num_components, 0);
4339f464c52Smaya
4349f464c52Smaya         delete[] src_comps;
4359f464c52Smaya
4369f464c52Smaya         return src_reg(dst);
4379f464c52Smaya      }
4389f464c52Smaya
4397ec681f3Smrg      void
4407ec681f3Smrg      emit_scan_step(enum opcode opcode, brw_conditional_mod mod,
4417ec681f3Smrg                     const dst_reg &tmp,
4427ec681f3Smrg                     unsigned left_offset, unsigned left_stride,
4437ec681f3Smrg                     unsigned right_offset, unsigned right_stride) const
4447ec681f3Smrg      {
4457ec681f3Smrg         dst_reg left, right;
4467ec681f3Smrg         left = horiz_stride(horiz_offset(tmp, left_offset), left_stride);
4477ec681f3Smrg         right = horiz_stride(horiz_offset(tmp, right_offset), right_stride);
4487ec681f3Smrg         if ((tmp.type == BRW_REGISTER_TYPE_Q ||
4497ec681f3Smrg              tmp.type == BRW_REGISTER_TYPE_UQ) &&
4507ec681f3Smrg             !shader->devinfo->has_64bit_int) {
4517ec681f3Smrg            switch (opcode) {
4527ec681f3Smrg            case BRW_OPCODE_MUL:
4537ec681f3Smrg               /* This will get lowered by integer MUL lowering */
4547ec681f3Smrg               set_condmod(mod, emit(opcode, right, left, right));
4557ec681f3Smrg               break;
4567ec681f3Smrg
4577ec681f3Smrg            case BRW_OPCODE_SEL: {
4587ec681f3Smrg               /* In order for the comparisons to work out right, we need our
4597ec681f3Smrg                * comparisons to be strict.
4607ec681f3Smrg                */
4617ec681f3Smrg               assert(mod == BRW_CONDITIONAL_L || mod == BRW_CONDITIONAL_GE);
4627ec681f3Smrg               if (mod == BRW_CONDITIONAL_GE)
4637ec681f3Smrg                  mod = BRW_CONDITIONAL_G;
4647ec681f3Smrg
4657ec681f3Smrg               /* We treat the bottom 32 bits as unsigned regardless of
4667ec681f3Smrg                * whether or not the integer as a whole is signed.
4677ec681f3Smrg                */
4687ec681f3Smrg               dst_reg right_low = subscript(right, BRW_REGISTER_TYPE_UD, 0);
4697ec681f3Smrg               dst_reg left_low = subscript(left, BRW_REGISTER_TYPE_UD, 0);
4707ec681f3Smrg
4717ec681f3Smrg               /* The upper bits get the same sign as the 64-bit type */
4727ec681f3Smrg               brw_reg_type type32 = brw_reg_type_from_bit_size(32, tmp.type);
4737ec681f3Smrg               dst_reg right_high = subscript(right, type32, 1);
4747ec681f3Smrg               dst_reg left_high = subscript(left, type32, 1);
4757ec681f3Smrg
4767ec681f3Smrg               /* Build up our comparison:
4777ec681f3Smrg                *
4787ec681f3Smrg                *   l_hi < r_hi || (l_hi == r_hi && l_low < r_low)
4797ec681f3Smrg                */
4807ec681f3Smrg               CMP(null_reg_ud(), retype(left_low, BRW_REGISTER_TYPE_UD),
4817ec681f3Smrg                                  retype(right_low, BRW_REGISTER_TYPE_UD), mod);
4827ec681f3Smrg               set_predicate(BRW_PREDICATE_NORMAL,
4837ec681f3Smrg                             CMP(null_reg_ud(), left_high, right_high,
4847ec681f3Smrg                                 BRW_CONDITIONAL_EQ));
4857ec681f3Smrg               set_predicate_inv(BRW_PREDICATE_NORMAL, true,
4867ec681f3Smrg                                 CMP(null_reg_ud(), left_high, right_high, mod));
4877ec681f3Smrg
4887ec681f3Smrg               /* We could use selects here or we could use predicated MOVs
4897ec681f3Smrg                * because the destination and second source (if it were a SEL)
4907ec681f3Smrg                * are the same.
4917ec681f3Smrg                */
4927ec681f3Smrg               set_predicate(BRW_PREDICATE_NORMAL, MOV(right_low, left_low));
4937ec681f3Smrg               set_predicate(BRW_PREDICATE_NORMAL, MOV(right_high, left_high));
4947ec681f3Smrg               break;
4957ec681f3Smrg            }
4967ec681f3Smrg
4977ec681f3Smrg            default:
4987ec681f3Smrg               unreachable("Unsupported 64-bit scan op");
4997ec681f3Smrg            }
5007ec681f3Smrg         } else {
5017ec681f3Smrg            set_condmod(mod, emit(opcode, right, left, right));
5027ec681f3Smrg         }
5037ec681f3Smrg      }
5047ec681f3Smrg
50501e04c3fSmrg      void
50601e04c3fSmrg      emit_scan(enum opcode opcode, const dst_reg &tmp,
50701e04c3fSmrg                unsigned cluster_size, brw_conditional_mod mod) const
50801e04c3fSmrg      {
50901e04c3fSmrg         assert(dispatch_width() >= 8);
51001e04c3fSmrg
51101e04c3fSmrg         /* The instruction splitting code isn't advanced enough to split
51201e04c3fSmrg          * these so we need to handle that ourselves.
51301e04c3fSmrg          */
51401e04c3fSmrg         if (dispatch_width() * type_sz(tmp.type) > 2 * REG_SIZE) {
51501e04c3fSmrg            const unsigned half_width = dispatch_width() / 2;
51601e04c3fSmrg            const fs_builder ubld = exec_all().group(half_width, 0);
51701e04c3fSmrg            dst_reg left = tmp;
51801e04c3fSmrg            dst_reg right = horiz_offset(tmp, half_width);
51901e04c3fSmrg            ubld.emit_scan(opcode, left, cluster_size, mod);
52001e04c3fSmrg            ubld.emit_scan(opcode, right, cluster_size, mod);
52101e04c3fSmrg            if (cluster_size > half_width) {
5227ec681f3Smrg               ubld.emit_scan_step(opcode, mod, tmp,
5237ec681f3Smrg                                   half_width - 1, 0, half_width, 1);
52401e04c3fSmrg            }
52501e04c3fSmrg            return;
52601e04c3fSmrg         }
52701e04c3fSmrg
52801e04c3fSmrg         if (cluster_size > 1) {
52901e04c3fSmrg            const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
5307ec681f3Smrg            ubld.emit_scan_step(opcode, mod, tmp, 0, 2, 1, 2);
53101e04c3fSmrg         }
53201e04c3fSmrg
53301e04c3fSmrg         if (cluster_size > 2) {
5349f464c52Smaya            if (type_sz(tmp.type) <= 4) {
53501e04c3fSmrg               const fs_builder ubld =
53601e04c3fSmrg                  exec_all().group(dispatch_width() / 4, 0);
5377ec681f3Smrg               ubld.emit_scan_step(opcode, mod, tmp, 1, 4, 2, 4);
5387ec681f3Smrg               ubld.emit_scan_step(opcode, mod, tmp, 1, 4, 3, 4);
53901e04c3fSmrg            } else {
54001e04c3fSmrg               /* For 64-bit types, we have to do things differently because
54101e04c3fSmrg                * the code above would land us with destination strides that
54201e04c3fSmrg                * the hardware can't handle.  Fortunately, we'll only be
54301e04c3fSmrg                * 8-wide in that case and it's the same number of
54401e04c3fSmrg                * instructions.
54501e04c3fSmrg                */
54601e04c3fSmrg               const fs_builder ubld = exec_all().group(2, 0);
5477ec681f3Smrg               for (unsigned i = 0; i < dispatch_width(); i += 4)
5487ec681f3Smrg                  ubld.emit_scan_step(opcode, mod, tmp, i + 1, 0, i + 2, 1);
54901e04c3fSmrg            }
55001e04c3fSmrg         }
55101e04c3fSmrg
5527ec681f3Smrg         for (unsigned i = 4;
5537ec681f3Smrg              i < MIN2(cluster_size, dispatch_width());
5547ec681f3Smrg              i *= 2) {
5557ec681f3Smrg            const fs_builder ubld = exec_all().group(i, 0);
5567ec681f3Smrg            ubld.emit_scan_step(opcode, mod, tmp, i - 1, 0, i, 1);
55701e04c3fSmrg
5587ec681f3Smrg            if (dispatch_width() > i * 2)
5597ec681f3Smrg               ubld.emit_scan_step(opcode, mod, tmp, i * 3 - 1, 0, i * 3, 1);
56001e04c3fSmrg
5617ec681f3Smrg            if (dispatch_width() > i * 4) {
5627ec681f3Smrg               ubld.emit_scan_step(opcode, mod, tmp, i * 5 - 1, 0, i * 5, 1);
5637ec681f3Smrg               ubld.emit_scan_step(opcode, mod, tmp, i * 7 - 1, 0, i * 7, 1);
5647ec681f3Smrg            }
56501e04c3fSmrg         }
56601e04c3fSmrg      }
56701e04c3fSmrg
56801e04c3fSmrg      /**
56901e04c3fSmrg       * Assorted arithmetic ops.
57001e04c3fSmrg       * @{
57101e04c3fSmrg       */
57201e04c3fSmrg#define ALU1(op)                                        \
57301e04c3fSmrg      instruction *                                     \
57401e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0) const \
57501e04c3fSmrg      {                                                 \
57601e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0);       \
57701e04c3fSmrg      }
57801e04c3fSmrg
57901e04c3fSmrg#define ALU2(op)                                                        \
58001e04c3fSmrg      instruction *                                                     \
58101e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
58201e04c3fSmrg      {                                                                 \
58301e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
58401e04c3fSmrg      }
58501e04c3fSmrg
58601e04c3fSmrg#define ALU2_ACC(op)                                                    \
58701e04c3fSmrg      instruction *                                                     \
58801e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
58901e04c3fSmrg      {                                                                 \
59001e04c3fSmrg         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
59101e04c3fSmrg         inst->writes_accumulator = true;                               \
59201e04c3fSmrg         return inst;                                                   \
59301e04c3fSmrg      }
59401e04c3fSmrg
59501e04c3fSmrg#define ALU3(op)                                                        \
59601e04c3fSmrg      instruction *                                                     \
59701e04c3fSmrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
59801e04c3fSmrg         const src_reg &src2) const                                     \
59901e04c3fSmrg      {                                                                 \
60001e04c3fSmrg         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
60101e04c3fSmrg      }
60201e04c3fSmrg
60301e04c3fSmrg      ALU2(ADD)
6047ec681f3Smrg      ALU3(ADD3)
60501e04c3fSmrg      ALU2_ACC(ADDC)
60601e04c3fSmrg      ALU2(AND)
60701e04c3fSmrg      ALU2(ASR)
60801e04c3fSmrg      ALU2(AVG)
60901e04c3fSmrg      ALU3(BFE)
61001e04c3fSmrg      ALU2(BFI1)
61101e04c3fSmrg      ALU3(BFI2)
61201e04c3fSmrg      ALU1(BFREV)
61301e04c3fSmrg      ALU1(CBIT)
61401e04c3fSmrg      ALU1(DIM)
61501e04c3fSmrg      ALU2(DP2)
61601e04c3fSmrg      ALU2(DP3)
61701e04c3fSmrg      ALU2(DP4)
61801e04c3fSmrg      ALU2(DPH)
61901e04c3fSmrg      ALU1(F16TO32)
62001e04c3fSmrg      ALU1(F32TO16)
62101e04c3fSmrg      ALU1(FBH)
62201e04c3fSmrg      ALU1(FBL)
62301e04c3fSmrg      ALU1(FRC)
6247ec681f3Smrg      ALU3(DP4A)
62501e04c3fSmrg      ALU2(LINE)
62601e04c3fSmrg      ALU1(LZD)
62701e04c3fSmrg      ALU2(MAC)
62801e04c3fSmrg      ALU2_ACC(MACH)
62901e04c3fSmrg      ALU3(MAD)
63001e04c3fSmrg      ALU1(MOV)
63101e04c3fSmrg      ALU2(MUL)
63201e04c3fSmrg      ALU1(NOT)
63301e04c3fSmrg      ALU2(OR)
63401e04c3fSmrg      ALU2(PLN)
63501e04c3fSmrg      ALU1(RNDD)
63601e04c3fSmrg      ALU1(RNDE)
63701e04c3fSmrg      ALU1(RNDU)
63801e04c3fSmrg      ALU1(RNDZ)
6397ec681f3Smrg      ALU2(ROL)
6407ec681f3Smrg      ALU2(ROR)
64101e04c3fSmrg      ALU2(SAD2)
64201e04c3fSmrg      ALU2_ACC(SADA2)
64301e04c3fSmrg      ALU2(SEL)
64401e04c3fSmrg      ALU2(SHL)
64501e04c3fSmrg      ALU2(SHR)
64601e04c3fSmrg      ALU2_ACC(SUBB)
64701e04c3fSmrg      ALU2(XOR)
64801e04c3fSmrg
64901e04c3fSmrg#undef ALU3
65001e04c3fSmrg#undef ALU2_ACC
65101e04c3fSmrg#undef ALU2
65201e04c3fSmrg#undef ALU1
65301e04c3fSmrg      /** @} */
65401e04c3fSmrg
65501e04c3fSmrg      /**
65601e04c3fSmrg       * CMP: Sets the low bit of the destination channels with the result
65701e04c3fSmrg       * of the comparison, while the upper bits are undefined, and updates
65801e04c3fSmrg       * the flag register with the packed 16 bits of the result.
65901e04c3fSmrg       */
66001e04c3fSmrg      instruction *
66101e04c3fSmrg      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
66201e04c3fSmrg          brw_conditional_mod condition) const
66301e04c3fSmrg      {
66401e04c3fSmrg         /* Take the instruction:
66501e04c3fSmrg          *
66601e04c3fSmrg          * CMP null<d> src0<f> src1<f>
66701e04c3fSmrg          *
6687ec681f3Smrg          * Original gfx4 does type conversion to the destination type
66901e04c3fSmrg          * before comparison, producing garbage results for floating
67001e04c3fSmrg          * point comparisons.
67101e04c3fSmrg          *
67201e04c3fSmrg          * The destination type doesn't matter on newer generations,
67301e04c3fSmrg          * so we set the type to match src0 so we can compact the
67401e04c3fSmrg          * instruction.
67501e04c3fSmrg          */
67601e04c3fSmrg         return set_condmod(condition,
67701e04c3fSmrg                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
67801e04c3fSmrg                                 fix_unsigned_negate(src0),
67901e04c3fSmrg                                 fix_unsigned_negate(src1)));
68001e04c3fSmrg      }
68101e04c3fSmrg
68201e04c3fSmrg      /**
6837ec681f3Smrg       * CMPN: Behaves like CMP, but produces true if src1 is NaN.
6847ec681f3Smrg       */
6857ec681f3Smrg      instruction *
6867ec681f3Smrg      CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
6877ec681f3Smrg           brw_conditional_mod condition) const
6887ec681f3Smrg      {
6897ec681f3Smrg         /* Take the instruction:
6907ec681f3Smrg          *
6917ec681f3Smrg          * CMP null<d> src0<f> src1<f>
6927ec681f3Smrg          *
6937ec681f3Smrg          * Original gfx4 does type conversion to the destination type
6947ec681f3Smrg          * before comparison, producing garbage results for floating
6957ec681f3Smrg          * point comparisons.
6967ec681f3Smrg          *
6977ec681f3Smrg          * The destination type doesn't matter on newer generations,
6987ec681f3Smrg          * so we set the type to match src0 so we can compact the
6997ec681f3Smrg          * instruction.
7007ec681f3Smrg          */
7017ec681f3Smrg         return set_condmod(condition,
7027ec681f3Smrg                            emit(BRW_OPCODE_CMPN, retype(dst, src0.type),
7037ec681f3Smrg                                 fix_unsigned_negate(src0),
7047ec681f3Smrg                                 fix_unsigned_negate(src1)));
7057ec681f3Smrg      }
7067ec681f3Smrg
7077ec681f3Smrg      /**
7087ec681f3Smrg       * Gfx4 predicated IF.
70901e04c3fSmrg       */
71001e04c3fSmrg      instruction *
71101e04c3fSmrg      IF(brw_predicate predicate) const
71201e04c3fSmrg      {
71301e04c3fSmrg         return set_predicate(predicate, emit(BRW_OPCODE_IF));
71401e04c3fSmrg      }
71501e04c3fSmrg
71601e04c3fSmrg      /**
71701e04c3fSmrg       * CSEL: dst = src2 <op> 0.0f ? src0 : src1
71801e04c3fSmrg       */
71901e04c3fSmrg      instruction *
72001e04c3fSmrg      CSEL(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
72101e04c3fSmrg           const src_reg &src2, brw_conditional_mod condition) const
72201e04c3fSmrg      {
72301e04c3fSmrg         /* CSEL only operates on floats, so we can't do integer </<=/>=/>
72401e04c3fSmrg          * comparisons.  Zero/non-zero (== and !=) comparisons almost work.
72501e04c3fSmrg          * 0x80000000 fails because it is -0.0, and -0.0 == 0.0.
72601e04c3fSmrg          */
72701e04c3fSmrg         assert(src2.type == BRW_REGISTER_TYPE_F);
72801e04c3fSmrg
72901e04c3fSmrg         return set_condmod(condition,
73001e04c3fSmrg                            emit(BRW_OPCODE_CSEL,
73101e04c3fSmrg                                 retype(dst, BRW_REGISTER_TYPE_F),
73201e04c3fSmrg                                 retype(src0, BRW_REGISTER_TYPE_F),
7337ec681f3Smrg                                 retype(src1, BRW_REGISTER_TYPE_F),
7347ec681f3Smrg                                 src2));
73501e04c3fSmrg      }
73601e04c3fSmrg
73701e04c3fSmrg      /**
73801e04c3fSmrg       * Emit a linear interpolation instruction.
73901e04c3fSmrg       */
74001e04c3fSmrg      instruction *
74101e04c3fSmrg      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
74201e04c3fSmrg          const src_reg &a) const
74301e04c3fSmrg      {
7447ec681f3Smrg         if (shader->devinfo->ver >= 6 && shader->devinfo->ver <= 10) {
74501e04c3fSmrg            /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
74601e04c3fSmrg             * we need to reorder the operands.
74701e04c3fSmrg             */
74801e04c3fSmrg            return emit(BRW_OPCODE_LRP, dst, a, y, x);
74901e04c3fSmrg
75001e04c3fSmrg         } else {
75101e04c3fSmrg            /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
75201e04c3fSmrg            const dst_reg y_times_a = vgrf(dst.type);
75301e04c3fSmrg            const dst_reg one_minus_a = vgrf(dst.type);
75401e04c3fSmrg            const dst_reg x_times_one_minus_a = vgrf(dst.type);
75501e04c3fSmrg
75601e04c3fSmrg            MUL(y_times_a, y, a);
75701e04c3fSmrg            ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
75801e04c3fSmrg            MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
75901e04c3fSmrg            return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
76001e04c3fSmrg         }
76101e04c3fSmrg      }
76201e04c3fSmrg
76301e04c3fSmrg      /**
76401e04c3fSmrg       * Collect a number of registers in a contiguous range of registers.
76501e04c3fSmrg       */
76601e04c3fSmrg      instruction *
76701e04c3fSmrg      LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
76801e04c3fSmrg                   unsigned sources, unsigned header_size) const
76901e04c3fSmrg      {
77001e04c3fSmrg         instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
77101e04c3fSmrg         inst->header_size = header_size;
77201e04c3fSmrg         inst->size_written = header_size * REG_SIZE;
77301e04c3fSmrg         for (unsigned i = header_size; i < sources; i++) {
77401e04c3fSmrg            inst->size_written +=
77501e04c3fSmrg               ALIGN(dispatch_width() * type_sz(src[i].type) * dst.stride,
77601e04c3fSmrg                     REG_SIZE);
77701e04c3fSmrg         }
77801e04c3fSmrg
77901e04c3fSmrg         return inst;
78001e04c3fSmrg      }
78101e04c3fSmrg
7827ec681f3Smrg      instruction *
7837ec681f3Smrg      UNDEF(const dst_reg &dst) const
7849f464c52Smaya      {
7857ec681f3Smrg         assert(dst.file == VGRF);
7867ec681f3Smrg         instruction *inst = emit(SHADER_OPCODE_UNDEF,
7877ec681f3Smrg                                  retype(dst, BRW_REGISTER_TYPE_UD));
7887ec681f3Smrg         inst->size_written = shader->alloc.sizes[dst.nr] * REG_SIZE;
7899f464c52Smaya
7907ec681f3Smrg         return inst;
7919f464c52Smaya      }
7929f464c52Smaya
7937ec681f3Smrg      backend_shader *shader;
7947ec681f3Smrg
79501e04c3fSmrg   private:
79601e04c3fSmrg      /**
79701e04c3fSmrg       * Workaround for negation of UD registers.  See comment in
79801e04c3fSmrg       * fs_generator::generate_code() for more details.
79901e04c3fSmrg       */
80001e04c3fSmrg      src_reg
80101e04c3fSmrg      fix_unsigned_negate(const src_reg &src) const
80201e04c3fSmrg      {
80301e04c3fSmrg         if (src.type == BRW_REGISTER_TYPE_UD &&
80401e04c3fSmrg             src.negate) {
80501e04c3fSmrg            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
80601e04c3fSmrg            MOV(temp, src);
80701e04c3fSmrg            return src_reg(temp);
80801e04c3fSmrg         } else {
80901e04c3fSmrg            return src;
81001e04c3fSmrg         }
81101e04c3fSmrg      }
81201e04c3fSmrg
81301e04c3fSmrg      /**
81401e04c3fSmrg       * Workaround for source register modes not supported by the ternary
81501e04c3fSmrg       * instruction encoding.
81601e04c3fSmrg       */
81701e04c3fSmrg      src_reg
81801e04c3fSmrg      fix_3src_operand(const src_reg &src) const
81901e04c3fSmrg      {
8209f464c52Smaya         switch (src.file) {
8219f464c52Smaya         case FIXED_GRF:
8229f464c52Smaya            /* FINISHME: Could handle scalar region, other stride=1 regions */
8239f464c52Smaya            if (src.vstride != BRW_VERTICAL_STRIDE_8 ||
8249f464c52Smaya                src.width != BRW_WIDTH_8 ||
8259f464c52Smaya                src.hstride != BRW_HORIZONTAL_STRIDE_1)
8269f464c52Smaya               break;
8277ec681f3Smrg            FALLTHROUGH;
8289f464c52Smaya         case ATTR:
8299f464c52Smaya         case VGRF:
8309f464c52Smaya         case UNIFORM:
8319f464c52Smaya         case IMM:
83201e04c3fSmrg            return src;
8339f464c52Smaya         default:
8349f464c52Smaya            break;
83501e04c3fSmrg         }
8369f464c52Smaya
8379f464c52Smaya         dst_reg expanded = vgrf(src.type);
8389f464c52Smaya         MOV(expanded, src);
8399f464c52Smaya         return expanded;
84001e04c3fSmrg      }
84101e04c3fSmrg
84201e04c3fSmrg      /**
84301e04c3fSmrg       * Workaround for source register modes not supported by the math
84401e04c3fSmrg       * instruction.
84501e04c3fSmrg       */
84601e04c3fSmrg      src_reg
84701e04c3fSmrg      fix_math_operand(const src_reg &src) const
84801e04c3fSmrg      {
8497ec681f3Smrg         /* Can't do hstride == 0 args on gfx6 math, so expand it out. We
85001e04c3fSmrg          * might be able to do better by doing execsize = 1 math and then
85101e04c3fSmrg          * expanding that result out, but we would need to be careful with
85201e04c3fSmrg          * masking.
85301e04c3fSmrg          *
8547ec681f3Smrg          * Gfx6 hardware ignores source modifiers (negate and abs) on math
85501e04c3fSmrg          * instructions, so we also move to a temp to set those up.
85601e04c3fSmrg          *
8577ec681f3Smrg          * Gfx7 relaxes most of the above restrictions, but still can't use IMM
85801e04c3fSmrg          * operands to math
85901e04c3fSmrg          */
8607ec681f3Smrg         if ((shader->devinfo->ver == 6 &&
86101e04c3fSmrg              (src.file == IMM || src.file == UNIFORM ||
86201e04c3fSmrg               src.abs || src.negate)) ||
8637ec681f3Smrg             (shader->devinfo->ver == 7 && src.file == IMM)) {
86401e04c3fSmrg            const dst_reg tmp = vgrf(src.type);
86501e04c3fSmrg            MOV(tmp, src);
86601e04c3fSmrg            return tmp;
86701e04c3fSmrg         } else {
86801e04c3fSmrg            return src;
86901e04c3fSmrg         }
87001e04c3fSmrg      }
87101e04c3fSmrg
87201e04c3fSmrg      bblock_t *block;
87301e04c3fSmrg      exec_node *cursor;
87401e04c3fSmrg
87501e04c3fSmrg      unsigned _dispatch_width;
87601e04c3fSmrg      unsigned _group;
87701e04c3fSmrg      bool force_writemask_all;
87801e04c3fSmrg
87901e04c3fSmrg      /** Debug annotation info. */
88001e04c3fSmrg      struct {
88101e04c3fSmrg         const char *str;
88201e04c3fSmrg         const void *ir;
88301e04c3fSmrg      } annotation;
88401e04c3fSmrg   };
88501e04c3fSmrg}
88601e04c3fSmrg
88701e04c3fSmrg#endif
888