1b8e80941Smrg/* -*- c++ -*- */
2b8e80941Smrg/*
3b8e80941Smrg * Copyright © 2010-2015 Intel Corporation
4b8e80941Smrg *
5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
7b8e80941Smrg * to deal in the Software without restriction, including without limitation
8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
10b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
11b8e80941Smrg *
12b8e80941Smrg * The above copyright notice and this permission notice (including the next
13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
14b8e80941Smrg * Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22b8e80941Smrg * IN THE SOFTWARE.
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#ifndef BRW_VEC4_BUILDER_H
26b8e80941Smrg#define BRW_VEC4_BUILDER_H
27b8e80941Smrg
28b8e80941Smrg#include "brw_ir_vec4.h"
29b8e80941Smrg#include "brw_ir_allocator.h"
30b8e80941Smrg
31b8e80941Smrgnamespace brw {
32b8e80941Smrg   /**
33b8e80941Smrg    * Toolbox to assemble a VEC4 IR program out of individual instructions.
34b8e80941Smrg    *
35b8e80941Smrg    * This object is meant to have an interface consistent with
36b8e80941Smrg    * brw::fs_builder.  They cannot be fully interchangeable because
37b8e80941Smrg    * brw::fs_builder generates scalar code while brw::vec4_builder generates
38b8e80941Smrg    * vector code.
39b8e80941Smrg    */
40b8e80941Smrg   class vec4_builder {
41b8e80941Smrg   public:
42b8e80941Smrg      /** Type used in this IR to represent a source of an instruction. */
43b8e80941Smrg      typedef brw::src_reg src_reg;
44b8e80941Smrg
45b8e80941Smrg      /** Type used in this IR to represent the destination of an instruction. */
46b8e80941Smrg      typedef brw::dst_reg dst_reg;
47b8e80941Smrg
48b8e80941Smrg      /** Type used in this IR to represent an instruction. */
49b8e80941Smrg      typedef vec4_instruction instruction;
50b8e80941Smrg
51b8e80941Smrg      /**
52b8e80941Smrg       * Construct a vec4_builder that inserts instructions into \p shader.
53b8e80941Smrg       */
54b8e80941Smrg      vec4_builder(backend_shader *shader, unsigned dispatch_width = 8) :
55b8e80941Smrg         shader(shader), block(NULL), cursor(NULL),
56b8e80941Smrg         _dispatch_width(dispatch_width), _group(0),
57b8e80941Smrg         force_writemask_all(false),
58b8e80941Smrg         annotation()
59b8e80941Smrg      {
60b8e80941Smrg      }
61b8e80941Smrg
62b8e80941Smrg      /**
63b8e80941Smrg       * Construct a vec4_builder that inserts instructions into \p shader
64b8e80941Smrg       * before instruction \p inst in basic block \p block.  The default
65b8e80941Smrg       * execution controls and debug annotation are initialized from the
66b8e80941Smrg       * instruction passed as argument.
67b8e80941Smrg       */
68b8e80941Smrg      vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
69b8e80941Smrg         shader(shader), block(block), cursor(inst),
70b8e80941Smrg         _dispatch_width(inst->exec_size), _group(inst->group),
71b8e80941Smrg         force_writemask_all(inst->force_writemask_all)
72b8e80941Smrg      {
73b8e80941Smrg         annotation.str = inst->annotation;
74b8e80941Smrg         annotation.ir = inst->ir;
75b8e80941Smrg      }
76b8e80941Smrg
77b8e80941Smrg      /**
78b8e80941Smrg       * Construct a vec4_builder that inserts instructions before \p cursor
79b8e80941Smrg       * in basic block \p block, inheriting other code generation parameters
80b8e80941Smrg       * from this.
81b8e80941Smrg       */
82b8e80941Smrg      vec4_builder
83b8e80941Smrg      at(bblock_t *block, exec_node *cursor) const
84b8e80941Smrg      {
85b8e80941Smrg         vec4_builder bld = *this;
86b8e80941Smrg         bld.block = block;
87b8e80941Smrg         bld.cursor = cursor;
88b8e80941Smrg         return bld;
89b8e80941Smrg      }
90b8e80941Smrg
91b8e80941Smrg      /**
92b8e80941Smrg       * Construct a vec4_builder appending instructions at the end of the
93b8e80941Smrg       * instruction list of the shader, inheriting other code generation
94b8e80941Smrg       * parameters from this.
95b8e80941Smrg       */
96b8e80941Smrg      vec4_builder
97b8e80941Smrg      at_end() const
98b8e80941Smrg      {
99b8e80941Smrg         return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
100b8e80941Smrg      }
101b8e80941Smrg
102b8e80941Smrg      /**
103b8e80941Smrg       * Construct a builder specifying the default SIMD width and group of
104b8e80941Smrg       * channel enable signals, inheriting other code generation parameters
105b8e80941Smrg       * from this.
106b8e80941Smrg       *
107b8e80941Smrg       * \p n gives the default SIMD width, \p i gives the slot group used for
108b8e80941Smrg       * predication and control flow masking in multiples of \p n channels.
109b8e80941Smrg       */
110b8e80941Smrg      vec4_builder
111b8e80941Smrg      group(unsigned n, unsigned i) const
112b8e80941Smrg      {
113b8e80941Smrg         assert(force_writemask_all ||
114b8e80941Smrg                (n <= dispatch_width() && i < dispatch_width() / n));
115b8e80941Smrg         vec4_builder bld = *this;
116b8e80941Smrg         bld._dispatch_width = n;
117b8e80941Smrg         bld._group += i * n;
118b8e80941Smrg         return bld;
119b8e80941Smrg      }
120b8e80941Smrg
121b8e80941Smrg      /**
122b8e80941Smrg       * Construct a builder with per-channel control flow execution masking
123b8e80941Smrg       * disabled if \p b is true.  If control flow execution masking is
124b8e80941Smrg       * already disabled this has no effect.
125b8e80941Smrg       */
126b8e80941Smrg      vec4_builder
127b8e80941Smrg      exec_all(bool b = true) const
128b8e80941Smrg      {
129b8e80941Smrg         vec4_builder bld = *this;
130b8e80941Smrg         if (b)
131b8e80941Smrg            bld.force_writemask_all = true;
132b8e80941Smrg         return bld;
133b8e80941Smrg      }
134b8e80941Smrg
135b8e80941Smrg      /**
136b8e80941Smrg       * Construct a builder with the given debug annotation info.
137b8e80941Smrg       */
138b8e80941Smrg      vec4_builder
139b8e80941Smrg      annotate(const char *str, const void *ir = NULL) const
140b8e80941Smrg      {
141b8e80941Smrg         vec4_builder bld = *this;
142b8e80941Smrg         bld.annotation.str = str;
143b8e80941Smrg         bld.annotation.ir = ir;
144b8e80941Smrg         return bld;
145b8e80941Smrg      }
146b8e80941Smrg
147b8e80941Smrg      /**
148b8e80941Smrg       * Get the SIMD width in use.
149b8e80941Smrg       */
150b8e80941Smrg      unsigned
151b8e80941Smrg      dispatch_width() const
152b8e80941Smrg      {
153b8e80941Smrg         return _dispatch_width;
154b8e80941Smrg      }
155b8e80941Smrg
156b8e80941Smrg      /**
157b8e80941Smrg       * Get the channel group in use.
158b8e80941Smrg       */
159b8e80941Smrg      unsigned
160b8e80941Smrg      group() const
161b8e80941Smrg      {
162b8e80941Smrg         return _group;
163b8e80941Smrg      }
164b8e80941Smrg
165b8e80941Smrg      /**
166b8e80941Smrg       * Allocate a virtual register of natural vector size (four for this IR)
167b8e80941Smrg       * and SIMD width.  \p n gives the amount of space to allocate in
168b8e80941Smrg       * dispatch_width units (which is just enough space for four logical
169b8e80941Smrg       * components in this IR).
170b8e80941Smrg       */
171b8e80941Smrg      dst_reg
172b8e80941Smrg      vgrf(enum brw_reg_type type, unsigned n = 1) const
173b8e80941Smrg      {
174b8e80941Smrg         assert(dispatch_width() <= 32);
175b8e80941Smrg
176b8e80941Smrg         if (n > 0)
177b8e80941Smrg            return retype(dst_reg(VGRF, shader->alloc.allocate(
178b8e80941Smrg                                     n * DIV_ROUND_UP(type_sz(type), 4))),
179b8e80941Smrg                           type);
180b8e80941Smrg         else
181b8e80941Smrg            return retype(null_reg_ud(), type);
182b8e80941Smrg      }
183b8e80941Smrg
184b8e80941Smrg      /**
185b8e80941Smrg       * Create a null register of floating type.
186b8e80941Smrg       */
187b8e80941Smrg      dst_reg
188b8e80941Smrg      null_reg_f() const
189b8e80941Smrg      {
190b8e80941Smrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
191b8e80941Smrg                               BRW_REGISTER_TYPE_F));
192b8e80941Smrg      }
193b8e80941Smrg
194b8e80941Smrg      /**
195b8e80941Smrg       * Create a null register of signed integer type.
196b8e80941Smrg       */
197b8e80941Smrg      dst_reg
198b8e80941Smrg      null_reg_d() const
199b8e80941Smrg      {
200b8e80941Smrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
201b8e80941Smrg                               BRW_REGISTER_TYPE_D));
202b8e80941Smrg      }
203b8e80941Smrg
204b8e80941Smrg      /**
205b8e80941Smrg       * Create a null register of unsigned integer type.
206b8e80941Smrg       */
207b8e80941Smrg      dst_reg
208b8e80941Smrg      null_reg_ud() const
209b8e80941Smrg      {
210b8e80941Smrg         return dst_reg(retype(brw_null_vec(dispatch_width()),
211b8e80941Smrg                               BRW_REGISTER_TYPE_UD));
212b8e80941Smrg      }
213b8e80941Smrg
214b8e80941Smrg      /**
215b8e80941Smrg       * Insert an instruction into the program.
216b8e80941Smrg       */
217b8e80941Smrg      instruction *
218b8e80941Smrg      emit(const instruction &inst) const
219b8e80941Smrg      {
220b8e80941Smrg         return emit(new(shader->mem_ctx) instruction(inst));
221b8e80941Smrg      }
222b8e80941Smrg
223b8e80941Smrg      /**
224b8e80941Smrg       * Create and insert a nullary control instruction into the program.
225b8e80941Smrg       */
226b8e80941Smrg      instruction *
227b8e80941Smrg      emit(enum opcode opcode) const
228b8e80941Smrg      {
229b8e80941Smrg         return emit(instruction(opcode));
230b8e80941Smrg      }
231b8e80941Smrg
232b8e80941Smrg      /**
233b8e80941Smrg       * Create and insert a nullary instruction into the program.
234b8e80941Smrg       */
235b8e80941Smrg      instruction *
236b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst) const
237b8e80941Smrg      {
238b8e80941Smrg         return emit(instruction(opcode, dst));
239b8e80941Smrg      }
240b8e80941Smrg
241b8e80941Smrg      /**
242b8e80941Smrg       * Create and insert a unary instruction into the program.
243b8e80941Smrg       */
244b8e80941Smrg      instruction *
245b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
246b8e80941Smrg      {
247b8e80941Smrg         switch (opcode) {
248b8e80941Smrg         case SHADER_OPCODE_RCP:
249b8e80941Smrg         case SHADER_OPCODE_RSQ:
250b8e80941Smrg         case SHADER_OPCODE_SQRT:
251b8e80941Smrg         case SHADER_OPCODE_EXP2:
252b8e80941Smrg         case SHADER_OPCODE_LOG2:
253b8e80941Smrg         case SHADER_OPCODE_SIN:
254b8e80941Smrg         case SHADER_OPCODE_COS:
255b8e80941Smrg            return fix_math_instruction(
256b8e80941Smrg               emit(instruction(opcode, dst,
257b8e80941Smrg                                fix_math_operand(src0))));
258b8e80941Smrg
259b8e80941Smrg         default:
260b8e80941Smrg            return emit(instruction(opcode, dst, src0));
261b8e80941Smrg         }
262b8e80941Smrg      }
263b8e80941Smrg
264b8e80941Smrg      /**
265b8e80941Smrg       * Create and insert a binary instruction into the program.
266b8e80941Smrg       */
267b8e80941Smrg      instruction *
268b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
269b8e80941Smrg           const src_reg &src1) const
270b8e80941Smrg      {
271b8e80941Smrg         switch (opcode) {
272b8e80941Smrg         case SHADER_OPCODE_POW:
273b8e80941Smrg         case SHADER_OPCODE_INT_QUOTIENT:
274b8e80941Smrg         case SHADER_OPCODE_INT_REMAINDER:
275b8e80941Smrg            return fix_math_instruction(
276b8e80941Smrg               emit(instruction(opcode, dst,
277b8e80941Smrg                                fix_math_operand(src0),
278b8e80941Smrg                                fix_math_operand(src1))));
279b8e80941Smrg
280b8e80941Smrg         default:
281b8e80941Smrg            return emit(instruction(opcode, dst, src0, src1));
282b8e80941Smrg         }
283b8e80941Smrg      }
284b8e80941Smrg
285b8e80941Smrg      /**
286b8e80941Smrg       * Create and insert a ternary instruction into the program.
287b8e80941Smrg       */
288b8e80941Smrg      instruction *
289b8e80941Smrg      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
290b8e80941Smrg           const src_reg &src1, const src_reg &src2) const
291b8e80941Smrg      {
292b8e80941Smrg         switch (opcode) {
293b8e80941Smrg         case BRW_OPCODE_BFE:
294b8e80941Smrg         case BRW_OPCODE_BFI2:
295b8e80941Smrg         case BRW_OPCODE_MAD:
296b8e80941Smrg         case BRW_OPCODE_LRP:
297b8e80941Smrg            return emit(instruction(opcode, dst,
298b8e80941Smrg                                    fix_3src_operand(src0),
299b8e80941Smrg                                    fix_3src_operand(src1),
300b8e80941Smrg                                    fix_3src_operand(src2)));
301b8e80941Smrg
302b8e80941Smrg         default:
303b8e80941Smrg            return emit(instruction(opcode, dst, src0, src1, src2));
304b8e80941Smrg         }
305b8e80941Smrg      }
306b8e80941Smrg
307b8e80941Smrg      /**
308b8e80941Smrg       * Insert a preallocated instruction into the program.
309b8e80941Smrg       */
310b8e80941Smrg      instruction *
311b8e80941Smrg      emit(instruction *inst) const
312b8e80941Smrg      {
313b8e80941Smrg         inst->exec_size = dispatch_width();
314b8e80941Smrg         inst->group = group();
315b8e80941Smrg         inst->force_writemask_all = force_writemask_all;
316b8e80941Smrg         inst->size_written = inst->exec_size * type_sz(inst->dst.type);
317b8e80941Smrg         inst->annotation = annotation.str;
318b8e80941Smrg         inst->ir = annotation.ir;
319b8e80941Smrg
320b8e80941Smrg         if (block)
321b8e80941Smrg            static_cast<instruction *>(cursor)->insert_before(block, inst);
322b8e80941Smrg         else
323b8e80941Smrg            cursor->insert_before(inst);
324b8e80941Smrg
325b8e80941Smrg         return inst;
326b8e80941Smrg      }
327b8e80941Smrg
328b8e80941Smrg      /**
329b8e80941Smrg       * Select \p src0 if the comparison of both sources with the given
330b8e80941Smrg       * conditional mod evaluates to true, otherwise select \p src1.
331b8e80941Smrg       *
332b8e80941Smrg       * Generally useful to get the minimum or maximum of two values.
333b8e80941Smrg       */
334b8e80941Smrg      instruction *
335b8e80941Smrg      emit_minmax(const dst_reg &dst, const src_reg &src0,
336b8e80941Smrg                  const src_reg &src1, brw_conditional_mod mod) const
337b8e80941Smrg      {
338b8e80941Smrg         assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
339b8e80941Smrg
340b8e80941Smrg         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
341b8e80941Smrg                                     fix_unsigned_negate(src1)));
342b8e80941Smrg      }
343b8e80941Smrg
344b8e80941Smrg      /**
345b8e80941Smrg       * Copy any live channel from \p src to the first channel of the result.
346b8e80941Smrg       */
347b8e80941Smrg      src_reg
348b8e80941Smrg      emit_uniformize(const src_reg &src) const
349b8e80941Smrg      {
350b8e80941Smrg         const vec4_builder ubld = exec_all();
351b8e80941Smrg         const dst_reg chan_index =
352b8e80941Smrg            writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
353b8e80941Smrg         const dst_reg dst = vgrf(src.type);
354b8e80941Smrg
355b8e80941Smrg         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
356b8e80941Smrg         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
357b8e80941Smrg
358b8e80941Smrg         return src_reg(dst);
359b8e80941Smrg      }
360b8e80941Smrg
361b8e80941Smrg      /**
362b8e80941Smrg       * Assorted arithmetic ops.
363b8e80941Smrg       * @{
364b8e80941Smrg       */
365b8e80941Smrg#define ALU1(op)                                        \
366b8e80941Smrg      instruction *                                     \
367b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0) const \
368b8e80941Smrg      {                                                 \
369b8e80941Smrg         return emit(BRW_OPCODE_##op, dst, src0);       \
370b8e80941Smrg      }
371b8e80941Smrg
372b8e80941Smrg#define ALU2(op)                                                        \
373b8e80941Smrg      instruction *                                                     \
374b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
375b8e80941Smrg      {                                                                 \
376b8e80941Smrg         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
377b8e80941Smrg      }
378b8e80941Smrg
379b8e80941Smrg#define ALU2_ACC(op)                                                    \
380b8e80941Smrg      instruction *                                                     \
381b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
382b8e80941Smrg      {                                                                 \
383b8e80941Smrg         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
384b8e80941Smrg         inst->writes_accumulator = true;                               \
385b8e80941Smrg         return inst;                                                   \
386b8e80941Smrg      }
387b8e80941Smrg
388b8e80941Smrg#define ALU3(op)                                                        \
389b8e80941Smrg      instruction *                                                     \
390b8e80941Smrg      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
391b8e80941Smrg         const src_reg &src2) const                                     \
392b8e80941Smrg      {                                                                 \
393b8e80941Smrg         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
394b8e80941Smrg      }
395b8e80941Smrg
396b8e80941Smrg      ALU2(ADD)
397b8e80941Smrg      ALU2_ACC(ADDC)
398b8e80941Smrg      ALU2(AND)
399b8e80941Smrg      ALU2(ASR)
400b8e80941Smrg      ALU2(AVG)
401b8e80941Smrg      ALU3(BFE)
402b8e80941Smrg      ALU2(BFI1)
403b8e80941Smrg      ALU3(BFI2)
404b8e80941Smrg      ALU1(BFREV)
405b8e80941Smrg      ALU1(CBIT)
406b8e80941Smrg      ALU2(CMPN)
407b8e80941Smrg      ALU3(CSEL)
408b8e80941Smrg      ALU1(DIM)
409b8e80941Smrg      ALU2(DP2)
410b8e80941Smrg      ALU2(DP3)
411b8e80941Smrg      ALU2(DP4)
412b8e80941Smrg      ALU2(DPH)
413b8e80941Smrg      ALU1(F16TO32)
414b8e80941Smrg      ALU1(F32TO16)
415b8e80941Smrg      ALU1(FBH)
416b8e80941Smrg      ALU1(FBL)
417b8e80941Smrg      ALU1(FRC)
418b8e80941Smrg      ALU2(LINE)
419b8e80941Smrg      ALU1(LZD)
420b8e80941Smrg      ALU2(MAC)
421b8e80941Smrg      ALU2_ACC(MACH)
422b8e80941Smrg      ALU3(MAD)
423b8e80941Smrg      ALU1(MOV)
424b8e80941Smrg      ALU2(MUL)
425b8e80941Smrg      ALU1(NOT)
426b8e80941Smrg      ALU2(OR)
427b8e80941Smrg      ALU2(PLN)
428b8e80941Smrg      ALU1(RNDD)
429b8e80941Smrg      ALU1(RNDE)
430b8e80941Smrg      ALU1(RNDU)
431b8e80941Smrg      ALU1(RNDZ)
432b8e80941Smrg      ALU2(SAD2)
433b8e80941Smrg      ALU2_ACC(SADA2)
434b8e80941Smrg      ALU2(SEL)
435b8e80941Smrg      ALU2(SHL)
436b8e80941Smrg      ALU2(SHR)
437b8e80941Smrg      ALU2_ACC(SUBB)
438b8e80941Smrg      ALU2(XOR)
439b8e80941Smrg
440b8e80941Smrg#undef ALU3
441b8e80941Smrg#undef ALU2_ACC
442b8e80941Smrg#undef ALU2
443b8e80941Smrg#undef ALU1
444b8e80941Smrg      /** @} */
445b8e80941Smrg
446b8e80941Smrg      /**
447b8e80941Smrg       * CMP: Sets the low bit of the destination channels with the result
448b8e80941Smrg       * of the comparison, while the upper bits are undefined, and updates
449b8e80941Smrg       * the flag register with the packed 16 bits of the result.
450b8e80941Smrg       */
451b8e80941Smrg      instruction *
452b8e80941Smrg      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
453b8e80941Smrg          brw_conditional_mod condition) const
454b8e80941Smrg      {
455b8e80941Smrg         /* Take the instruction:
456b8e80941Smrg          *
457b8e80941Smrg          * CMP null<d> src0<f> src1<f>
458b8e80941Smrg          *
459b8e80941Smrg          * Original gen4 does type conversion to the destination type
460b8e80941Smrg          * before comparison, producing garbage results for floating
461b8e80941Smrg          * point comparisons.
462b8e80941Smrg          *
463b8e80941Smrg          * The destination type doesn't matter on newer generations,
464b8e80941Smrg          * so we set the type to match src0 so we can compact the
465b8e80941Smrg          * instruction.
466b8e80941Smrg          */
467b8e80941Smrg         return set_condmod(condition,
468b8e80941Smrg                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
469b8e80941Smrg                                 fix_unsigned_negate(src0),
470b8e80941Smrg                                 fix_unsigned_negate(src1)));
471b8e80941Smrg      }
472b8e80941Smrg
473b8e80941Smrg      /**
474b8e80941Smrg       * Gen4 predicated IF.
475b8e80941Smrg       */
476b8e80941Smrg      instruction *
477b8e80941Smrg      IF(brw_predicate predicate) const
478b8e80941Smrg      {
479b8e80941Smrg         return set_predicate(predicate, emit(BRW_OPCODE_IF));
480b8e80941Smrg      }
481b8e80941Smrg
482b8e80941Smrg      /**
483b8e80941Smrg       * Gen6 IF with embedded comparison.
484b8e80941Smrg       */
485b8e80941Smrg      instruction *
486b8e80941Smrg      IF(const src_reg &src0, const src_reg &src1,
487b8e80941Smrg         brw_conditional_mod condition) const
488b8e80941Smrg      {
489b8e80941Smrg         assert(shader->devinfo->gen == 6);
490b8e80941Smrg         return set_condmod(condition,
491b8e80941Smrg                            emit(BRW_OPCODE_IF,
492b8e80941Smrg                                 null_reg_d(),
493b8e80941Smrg                                 fix_unsigned_negate(src0),
494b8e80941Smrg                                 fix_unsigned_negate(src1)));
495b8e80941Smrg      }
496b8e80941Smrg
497b8e80941Smrg      /**
498b8e80941Smrg       * Emit a linear interpolation instruction.
499b8e80941Smrg       */
500b8e80941Smrg      instruction *
501b8e80941Smrg      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
502b8e80941Smrg          const src_reg &a) const
503b8e80941Smrg      {
504b8e80941Smrg         if (shader->devinfo->gen >= 6 && shader->devinfo->gen <= 10) {
505b8e80941Smrg            /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
506b8e80941Smrg             * we need to reorder the operands.
507b8e80941Smrg             */
508b8e80941Smrg            return emit(BRW_OPCODE_LRP, dst, a, y, x);
509b8e80941Smrg
510b8e80941Smrg         } else {
511b8e80941Smrg            /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
512b8e80941Smrg            const dst_reg y_times_a = vgrf(dst.type);
513b8e80941Smrg            const dst_reg one_minus_a = vgrf(dst.type);
514b8e80941Smrg            const dst_reg x_times_one_minus_a = vgrf(dst.type);
515b8e80941Smrg
516b8e80941Smrg            MUL(y_times_a, y, a);
517b8e80941Smrg            ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
518b8e80941Smrg            MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
519b8e80941Smrg            return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
520b8e80941Smrg         }
521b8e80941Smrg      }
522b8e80941Smrg
523b8e80941Smrg      backend_shader *shader;
524b8e80941Smrg
525b8e80941Smrg   protected:
526b8e80941Smrg      /**
527b8e80941Smrg       * Workaround for negation of UD registers.  See comment in
528b8e80941Smrg       * fs_generator::generate_code() for the details.
529b8e80941Smrg       */
530b8e80941Smrg      src_reg
531b8e80941Smrg      fix_unsigned_negate(const src_reg &src) const
532b8e80941Smrg      {
533b8e80941Smrg         if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
534b8e80941Smrg            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
535b8e80941Smrg            MOV(temp, src);
536b8e80941Smrg            return src_reg(temp);
537b8e80941Smrg         } else {
538b8e80941Smrg            return src;
539b8e80941Smrg         }
540b8e80941Smrg      }
541b8e80941Smrg
542b8e80941Smrg      /**
543b8e80941Smrg       * Workaround for register access modes not supported by the ternary
544b8e80941Smrg       * instruction encoding.
545b8e80941Smrg       */
546b8e80941Smrg      src_reg
547b8e80941Smrg      fix_3src_operand(const src_reg &src) const
548b8e80941Smrg      {
549b8e80941Smrg         /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
550b8e80941Smrg          * able to use vertical stride of zero to replicate the vec4 uniform, like
551b8e80941Smrg          *
552b8e80941Smrg          *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
553b8e80941Smrg          *
554b8e80941Smrg          * But you can't, since vertical stride is always four in three-source
555b8e80941Smrg          * instructions. Instead, insert a MOV instruction to do the replication so
556b8e80941Smrg          * that the three-source instruction can consume it.
557b8e80941Smrg          */
558b8e80941Smrg
559b8e80941Smrg         /* The MOV is only needed if the source is a uniform or immediate. */
560b8e80941Smrg         if (src.file != UNIFORM && src.file != IMM)
561b8e80941Smrg            return src;
562b8e80941Smrg
563b8e80941Smrg         if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
564b8e80941Smrg            return src;
565b8e80941Smrg
566b8e80941Smrg         const dst_reg expanded = vgrf(src.type);
567b8e80941Smrg         emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
568b8e80941Smrg         return src_reg(expanded);
569b8e80941Smrg      }
570b8e80941Smrg
571b8e80941Smrg      /**
572b8e80941Smrg       * Workaround for register access modes not supported by the math
573b8e80941Smrg       * instruction.
574b8e80941Smrg       */
575b8e80941Smrg      src_reg
576b8e80941Smrg      fix_math_operand(const src_reg &src) const
577b8e80941Smrg      {
578b8e80941Smrg         /* The gen6 math instruction ignores the source modifiers --
579b8e80941Smrg          * swizzle, abs, negate, and at least some parts of the register
580b8e80941Smrg          * region description.
581b8e80941Smrg          *
582b8e80941Smrg          * Rather than trying to enumerate all these cases, *always* expand the
583b8e80941Smrg          * operand to a temp GRF for gen6.
584b8e80941Smrg          *
585b8e80941Smrg          * For gen7, keep the operand as-is, except if immediate, which gen7 still
586b8e80941Smrg          * can't use.
587b8e80941Smrg          */
588b8e80941Smrg         if (shader->devinfo->gen == 6 ||
589b8e80941Smrg             (shader->devinfo->gen == 7 && src.file == IMM)) {
590b8e80941Smrg            const dst_reg tmp = vgrf(src.type);
591b8e80941Smrg            MOV(tmp, src);
592b8e80941Smrg            return src_reg(tmp);
593b8e80941Smrg         } else {
594b8e80941Smrg            return src;
595b8e80941Smrg         }
596b8e80941Smrg      }
597b8e80941Smrg
598b8e80941Smrg      /**
599b8e80941Smrg       * Workaround other weirdness of the math instruction.
600b8e80941Smrg       */
601b8e80941Smrg      instruction *
602b8e80941Smrg      fix_math_instruction(instruction *inst) const
603b8e80941Smrg      {
604b8e80941Smrg         if (shader->devinfo->gen == 6 &&
605b8e80941Smrg             inst->dst.writemask != WRITEMASK_XYZW) {
606b8e80941Smrg            const dst_reg tmp = vgrf(inst->dst.type);
607b8e80941Smrg            MOV(inst->dst, src_reg(tmp));
608b8e80941Smrg            inst->dst = tmp;
609b8e80941Smrg
610b8e80941Smrg         } else if (shader->devinfo->gen < 6) {
611b8e80941Smrg            const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
612b8e80941Smrg            inst->base_mrf = 1;
613b8e80941Smrg            inst->mlen = sources;
614b8e80941Smrg         }
615b8e80941Smrg
616b8e80941Smrg         return inst;
617b8e80941Smrg      }
618b8e80941Smrg
619b8e80941Smrg      bblock_t *block;
620b8e80941Smrg      exec_node *cursor;
621b8e80941Smrg
622b8e80941Smrg      unsigned _dispatch_width;
623b8e80941Smrg      unsigned _group;
624b8e80941Smrg      bool force_writemask_all;
625b8e80941Smrg
626b8e80941Smrg      /** Debug annotation info. */
627b8e80941Smrg      struct {
628b8e80941Smrg         const char *str;
629b8e80941Smrg         const void *ir;
630b8e80941Smrg      } annotation;
631b8e80941Smrg   };
632b8e80941Smrg}
633b8e80941Smrg
634b8e80941Smrg#endif
635