1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2010 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "brw_cfg.h"
25b8e80941Smrg#include "brw_eu.h"
26b8e80941Smrg#include "brw_fs.h"
27b8e80941Smrg#include "brw_nir.h"
28b8e80941Smrg#include "brw_vec4_tes.h"
29b8e80941Smrg#include "dev/gen_debug.h"
30b8e80941Smrg#include "main/uniforms.h"
31b8e80941Smrg#include "util/macros.h"
32b8e80941Smrg
33b8e80941Smrgenum brw_reg_type
34b8e80941Smrgbrw_type_for_base_type(const struct glsl_type *type)
35b8e80941Smrg{
36b8e80941Smrg   switch (type->base_type) {
37b8e80941Smrg   case GLSL_TYPE_FLOAT16:
38b8e80941Smrg      return BRW_REGISTER_TYPE_HF;
39b8e80941Smrg   case GLSL_TYPE_FLOAT:
40b8e80941Smrg      return BRW_REGISTER_TYPE_F;
41b8e80941Smrg   case GLSL_TYPE_INT:
42b8e80941Smrg   case GLSL_TYPE_BOOL:
43b8e80941Smrg   case GLSL_TYPE_SUBROUTINE:
44b8e80941Smrg      return BRW_REGISTER_TYPE_D;
45b8e80941Smrg   case GLSL_TYPE_INT16:
46b8e80941Smrg      return BRW_REGISTER_TYPE_W;
47b8e80941Smrg   case GLSL_TYPE_INT8:
48b8e80941Smrg      return BRW_REGISTER_TYPE_B;
49b8e80941Smrg   case GLSL_TYPE_UINT:
50b8e80941Smrg      return BRW_REGISTER_TYPE_UD;
51b8e80941Smrg   case GLSL_TYPE_UINT16:
52b8e80941Smrg      return BRW_REGISTER_TYPE_UW;
53b8e80941Smrg   case GLSL_TYPE_UINT8:
54b8e80941Smrg      return BRW_REGISTER_TYPE_UB;
55b8e80941Smrg   case GLSL_TYPE_ARRAY:
56b8e80941Smrg      return brw_type_for_base_type(type->fields.array);
57b8e80941Smrg   case GLSL_TYPE_STRUCT:
58b8e80941Smrg   case GLSL_TYPE_INTERFACE:
59b8e80941Smrg   case GLSL_TYPE_SAMPLER:
60b8e80941Smrg   case GLSL_TYPE_ATOMIC_UINT:
61b8e80941Smrg      /* These should be overridden with the type of the member when
62b8e80941Smrg       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
63b8e80941Smrg       * way to trip up if we don't.
64b8e80941Smrg       */
65b8e80941Smrg      return BRW_REGISTER_TYPE_UD;
66b8e80941Smrg   case GLSL_TYPE_IMAGE:
67b8e80941Smrg      return BRW_REGISTER_TYPE_UD;
68b8e80941Smrg   case GLSL_TYPE_DOUBLE:
69b8e80941Smrg      return BRW_REGISTER_TYPE_DF;
70b8e80941Smrg   case GLSL_TYPE_UINT64:
71b8e80941Smrg      return BRW_REGISTER_TYPE_UQ;
72b8e80941Smrg   case GLSL_TYPE_INT64:
73b8e80941Smrg      return BRW_REGISTER_TYPE_Q;
74b8e80941Smrg   case GLSL_TYPE_VOID:
75b8e80941Smrg   case GLSL_TYPE_ERROR:
76b8e80941Smrg   case GLSL_TYPE_FUNCTION:
77b8e80941Smrg      unreachable("not reached");
78b8e80941Smrg   }
79b8e80941Smrg
80b8e80941Smrg   return BRW_REGISTER_TYPE_F;
81b8e80941Smrg}
82b8e80941Smrg
83b8e80941Smrgenum brw_conditional_mod
84b8e80941Smrgbrw_conditional_for_comparison(unsigned int op)
85b8e80941Smrg{
86b8e80941Smrg   switch (op) {
87b8e80941Smrg   case ir_binop_less:
88b8e80941Smrg      return BRW_CONDITIONAL_L;
89b8e80941Smrg   case ir_binop_gequal:
90b8e80941Smrg      return BRW_CONDITIONAL_GE;
91b8e80941Smrg   case ir_binop_equal:
92b8e80941Smrg   case ir_binop_all_equal: /* same as equal for scalars */
93b8e80941Smrg      return BRW_CONDITIONAL_Z;
94b8e80941Smrg   case ir_binop_nequal:
95b8e80941Smrg   case ir_binop_any_nequal: /* same as nequal for scalars */
96b8e80941Smrg      return BRW_CONDITIONAL_NZ;
97b8e80941Smrg   default:
98b8e80941Smrg      unreachable("not reached: bad operation for comparison");
99b8e80941Smrg   }
100b8e80941Smrg}
101b8e80941Smrg
102b8e80941Smrguint32_t
103b8e80941Smrgbrw_math_function(enum opcode op)
104b8e80941Smrg{
105b8e80941Smrg   switch (op) {
106b8e80941Smrg   case SHADER_OPCODE_RCP:
107b8e80941Smrg      return BRW_MATH_FUNCTION_INV;
108b8e80941Smrg   case SHADER_OPCODE_RSQ:
109b8e80941Smrg      return BRW_MATH_FUNCTION_RSQ;
110b8e80941Smrg   case SHADER_OPCODE_SQRT:
111b8e80941Smrg      return BRW_MATH_FUNCTION_SQRT;
112b8e80941Smrg   case SHADER_OPCODE_EXP2:
113b8e80941Smrg      return BRW_MATH_FUNCTION_EXP;
114b8e80941Smrg   case SHADER_OPCODE_LOG2:
115b8e80941Smrg      return BRW_MATH_FUNCTION_LOG;
116b8e80941Smrg   case SHADER_OPCODE_POW:
117b8e80941Smrg      return BRW_MATH_FUNCTION_POW;
118b8e80941Smrg   case SHADER_OPCODE_SIN:
119b8e80941Smrg      return BRW_MATH_FUNCTION_SIN;
120b8e80941Smrg   case SHADER_OPCODE_COS:
121b8e80941Smrg      return BRW_MATH_FUNCTION_COS;
122b8e80941Smrg   case SHADER_OPCODE_INT_QUOTIENT:
123b8e80941Smrg      return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
124b8e80941Smrg   case SHADER_OPCODE_INT_REMAINDER:
125b8e80941Smrg      return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
126b8e80941Smrg   default:
127b8e80941Smrg      unreachable("not reached: unknown math function");
128b8e80941Smrg   }
129b8e80941Smrg}
130b8e80941Smrg
131b8e80941Smrgbool
132b8e80941Smrgbrw_texture_offset(const nir_tex_instr *tex, unsigned src,
133b8e80941Smrg                   uint32_t *offset_bits_out)
134b8e80941Smrg{
135b8e80941Smrg   if (!nir_src_is_const(tex->src[src].src))
136b8e80941Smrg      return false;
137b8e80941Smrg
138b8e80941Smrg   const unsigned num_components = nir_tex_instr_src_size(tex, src);
139b8e80941Smrg
140b8e80941Smrg   /* Combine all three offsets into a single unsigned dword:
141b8e80941Smrg    *
142b8e80941Smrg    *    bits 11:8 - U Offset (X component)
143b8e80941Smrg    *    bits  7:4 - V Offset (Y component)
144b8e80941Smrg    *    bits  3:0 - R Offset (Z component)
145b8e80941Smrg    */
146b8e80941Smrg   uint32_t offset_bits = 0;
147b8e80941Smrg   for (unsigned i = 0; i < num_components; i++) {
148b8e80941Smrg      int offset = nir_src_comp_as_int(tex->src[src].src, i);
149b8e80941Smrg
150b8e80941Smrg      /* offset out of bounds; caller will handle it. */
151b8e80941Smrg      if (offset > 7 || offset < -8)
152b8e80941Smrg         return false;
153b8e80941Smrg
154b8e80941Smrg      const unsigned shift = 4 * (2 - i);
155b8e80941Smrg      offset_bits |= (offset << shift) & (0xF << shift);
156b8e80941Smrg   }
157b8e80941Smrg
158b8e80941Smrg   *offset_bits_out = offset_bits;
159b8e80941Smrg
160b8e80941Smrg   return true;
161b8e80941Smrg}
162b8e80941Smrg
163b8e80941Smrgconst char *
164b8e80941Smrgbrw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
165b8e80941Smrg{
166b8e80941Smrg   switch (op) {
167b8e80941Smrg   case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
168b8e80941Smrg      /* The DO instruction doesn't exist on Gen6+, but we use it to mark the
169b8e80941Smrg       * start of a loop in the IR.
170b8e80941Smrg       */
171b8e80941Smrg      if (devinfo->gen >= 6 && op == BRW_OPCODE_DO)
172b8e80941Smrg         return "do";
173b8e80941Smrg
174b8e80941Smrg      /* The following conversion opcodes doesn't exist on Gen8+, but we use
175b8e80941Smrg       * then to mark that we want to do the conversion.
176b8e80941Smrg       */
177b8e80941Smrg      if (devinfo->gen > 7 && op == BRW_OPCODE_F32TO16)
178b8e80941Smrg         return "f32to16";
179b8e80941Smrg
180b8e80941Smrg      if (devinfo->gen > 7 && op == BRW_OPCODE_F16TO32)
181b8e80941Smrg         return "f16to32";
182b8e80941Smrg
183b8e80941Smrg      assert(brw_opcode_desc(devinfo, op)->name);
184b8e80941Smrg      return brw_opcode_desc(devinfo, op)->name;
185b8e80941Smrg   case FS_OPCODE_FB_WRITE:
186b8e80941Smrg      return "fb_write";
187b8e80941Smrg   case FS_OPCODE_FB_WRITE_LOGICAL:
188b8e80941Smrg      return "fb_write_logical";
189b8e80941Smrg   case FS_OPCODE_REP_FB_WRITE:
190b8e80941Smrg      return "rep_fb_write";
191b8e80941Smrg   case FS_OPCODE_FB_READ:
192b8e80941Smrg      return "fb_read";
193b8e80941Smrg   case FS_OPCODE_FB_READ_LOGICAL:
194b8e80941Smrg      return "fb_read_logical";
195b8e80941Smrg
196b8e80941Smrg   case SHADER_OPCODE_RCP:
197b8e80941Smrg      return "rcp";
198b8e80941Smrg   case SHADER_OPCODE_RSQ:
199b8e80941Smrg      return "rsq";
200b8e80941Smrg   case SHADER_OPCODE_SQRT:
201b8e80941Smrg      return "sqrt";
202b8e80941Smrg   case SHADER_OPCODE_EXP2:
203b8e80941Smrg      return "exp2";
204b8e80941Smrg   case SHADER_OPCODE_LOG2:
205b8e80941Smrg      return "log2";
206b8e80941Smrg   case SHADER_OPCODE_POW:
207b8e80941Smrg      return "pow";
208b8e80941Smrg   case SHADER_OPCODE_INT_QUOTIENT:
209b8e80941Smrg      return "int_quot";
210b8e80941Smrg   case SHADER_OPCODE_INT_REMAINDER:
211b8e80941Smrg      return "int_rem";
212b8e80941Smrg   case SHADER_OPCODE_SIN:
213b8e80941Smrg      return "sin";
214b8e80941Smrg   case SHADER_OPCODE_COS:
215b8e80941Smrg      return "cos";
216b8e80941Smrg
217b8e80941Smrg   case SHADER_OPCODE_SEND:
218b8e80941Smrg      return "send";
219b8e80941Smrg
220b8e80941Smrg   case SHADER_OPCODE_TEX:
221b8e80941Smrg      return "tex";
222b8e80941Smrg   case SHADER_OPCODE_TEX_LOGICAL:
223b8e80941Smrg      return "tex_logical";
224b8e80941Smrg   case SHADER_OPCODE_TXD:
225b8e80941Smrg      return "txd";
226b8e80941Smrg   case SHADER_OPCODE_TXD_LOGICAL:
227b8e80941Smrg      return "txd_logical";
228b8e80941Smrg   case SHADER_OPCODE_TXF:
229b8e80941Smrg      return "txf";
230b8e80941Smrg   case SHADER_OPCODE_TXF_LOGICAL:
231b8e80941Smrg      return "txf_logical";
232b8e80941Smrg   case SHADER_OPCODE_TXF_LZ:
233b8e80941Smrg      return "txf_lz";
234b8e80941Smrg   case SHADER_OPCODE_TXL:
235b8e80941Smrg      return "txl";
236b8e80941Smrg   case SHADER_OPCODE_TXL_LOGICAL:
237b8e80941Smrg      return "txl_logical";
238b8e80941Smrg   case SHADER_OPCODE_TXL_LZ:
239b8e80941Smrg      return "txl_lz";
240b8e80941Smrg   case SHADER_OPCODE_TXS:
241b8e80941Smrg      return "txs";
242b8e80941Smrg   case SHADER_OPCODE_TXS_LOGICAL:
243b8e80941Smrg      return "txs_logical";
244b8e80941Smrg   case FS_OPCODE_TXB:
245b8e80941Smrg      return "txb";
246b8e80941Smrg   case FS_OPCODE_TXB_LOGICAL:
247b8e80941Smrg      return "txb_logical";
248b8e80941Smrg   case SHADER_OPCODE_TXF_CMS:
249b8e80941Smrg      return "txf_cms";
250b8e80941Smrg   case SHADER_OPCODE_TXF_CMS_LOGICAL:
251b8e80941Smrg      return "txf_cms_logical";
252b8e80941Smrg   case SHADER_OPCODE_TXF_CMS_W:
253b8e80941Smrg      return "txf_cms_w";
254b8e80941Smrg   case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
255b8e80941Smrg      return "txf_cms_w_logical";
256b8e80941Smrg   case SHADER_OPCODE_TXF_UMS:
257b8e80941Smrg      return "txf_ums";
258b8e80941Smrg   case SHADER_OPCODE_TXF_UMS_LOGICAL:
259b8e80941Smrg      return "txf_ums_logical";
260b8e80941Smrg   case SHADER_OPCODE_TXF_MCS:
261b8e80941Smrg      return "txf_mcs";
262b8e80941Smrg   case SHADER_OPCODE_TXF_MCS_LOGICAL:
263b8e80941Smrg      return "txf_mcs_logical";
264b8e80941Smrg   case SHADER_OPCODE_LOD:
265b8e80941Smrg      return "lod";
266b8e80941Smrg   case SHADER_OPCODE_LOD_LOGICAL:
267b8e80941Smrg      return "lod_logical";
268b8e80941Smrg   case SHADER_OPCODE_TG4:
269b8e80941Smrg      return "tg4";
270b8e80941Smrg   case SHADER_OPCODE_TG4_LOGICAL:
271b8e80941Smrg      return "tg4_logical";
272b8e80941Smrg   case SHADER_OPCODE_TG4_OFFSET:
273b8e80941Smrg      return "tg4_offset";
274b8e80941Smrg   case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
275b8e80941Smrg      return "tg4_offset_logical";
276b8e80941Smrg   case SHADER_OPCODE_SAMPLEINFO:
277b8e80941Smrg      return "sampleinfo";
278b8e80941Smrg   case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
279b8e80941Smrg      return "sampleinfo_logical";
280b8e80941Smrg
281b8e80941Smrg   case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
282b8e80941Smrg      return "image_size_logical";
283b8e80941Smrg
284b8e80941Smrg   case SHADER_OPCODE_SHADER_TIME_ADD:
285b8e80941Smrg      return "shader_time_add";
286b8e80941Smrg
287b8e80941Smrg   case VEC4_OPCODE_UNTYPED_ATOMIC:
288b8e80941Smrg      return "untyped_atomic";
289b8e80941Smrg   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
290b8e80941Smrg      return "untyped_atomic_logical";
291b8e80941Smrg   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
292b8e80941Smrg      return "untyped_atomic_float_logical";
293b8e80941Smrg   case VEC4_OPCODE_UNTYPED_SURFACE_READ:
294b8e80941Smrg      return "untyped_surface_read";
295b8e80941Smrg   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
296b8e80941Smrg      return "untyped_surface_read_logical";
297b8e80941Smrg   case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
298b8e80941Smrg      return "untyped_surface_write";
299b8e80941Smrg   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
300b8e80941Smrg      return "untyped_surface_write_logical";
301b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
302b8e80941Smrg      return "a64_untyped_read_logical";
303b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
304b8e80941Smrg      return "a64_untyped_write_logical";
305b8e80941Smrg   case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
306b8e80941Smrg      return "a64_byte_scattered_read_logical";
307b8e80941Smrg   case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
308b8e80941Smrg      return "a64_byte_scattered_write_logical";
309b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
310b8e80941Smrg      return "a64_untyped_atomic_logical";
311b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
312b8e80941Smrg      return "a64_untyped_atomic_int64_logical";
313b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
314b8e80941Smrg      return "a64_untyped_atomic_float_logical";
315b8e80941Smrg   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
316b8e80941Smrg      return "typed_atomic_logical";
317b8e80941Smrg   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
318b8e80941Smrg      return "typed_surface_read_logical";
319b8e80941Smrg   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
320b8e80941Smrg      return "typed_surface_write_logical";
321b8e80941Smrg   case SHADER_OPCODE_MEMORY_FENCE:
322b8e80941Smrg      return "memory_fence";
323b8e80941Smrg   case SHADER_OPCODE_INTERLOCK:
324b8e80941Smrg      /* For an interlock we actually issue a memory fence via sendc. */
325b8e80941Smrg      return "interlock";
326b8e80941Smrg
327b8e80941Smrg   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
328b8e80941Smrg      return "byte_scattered_read_logical";
329b8e80941Smrg   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
330b8e80941Smrg      return "byte_scattered_write_logical";
331b8e80941Smrg
332b8e80941Smrg   case SHADER_OPCODE_LOAD_PAYLOAD:
333b8e80941Smrg      return "load_payload";
334b8e80941Smrg   case FS_OPCODE_PACK:
335b8e80941Smrg      return "pack";
336b8e80941Smrg
337b8e80941Smrg   case SHADER_OPCODE_GEN4_SCRATCH_READ:
338b8e80941Smrg      return "gen4_scratch_read";
339b8e80941Smrg   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
340b8e80941Smrg      return "gen4_scratch_write";
341b8e80941Smrg   case SHADER_OPCODE_GEN7_SCRATCH_READ:
342b8e80941Smrg      return "gen7_scratch_read";
343b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8:
344b8e80941Smrg      return "gen8_urb_write_simd8";
345b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
346b8e80941Smrg      return "gen8_urb_write_simd8_per_slot";
347b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
348b8e80941Smrg      return "gen8_urb_write_simd8_masked";
349b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
350b8e80941Smrg      return "gen8_urb_write_simd8_masked_per_slot";
351b8e80941Smrg   case SHADER_OPCODE_URB_READ_SIMD8:
352b8e80941Smrg      return "urb_read_simd8";
353b8e80941Smrg   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
354b8e80941Smrg      return "urb_read_simd8_per_slot";
355b8e80941Smrg
356b8e80941Smrg   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
357b8e80941Smrg      return "find_live_channel";
358b8e80941Smrg   case SHADER_OPCODE_BROADCAST:
359b8e80941Smrg      return "broadcast";
360b8e80941Smrg   case SHADER_OPCODE_SHUFFLE:
361b8e80941Smrg      return "shuffle";
362b8e80941Smrg   case SHADER_OPCODE_SEL_EXEC:
363b8e80941Smrg      return "sel_exec";
364b8e80941Smrg   case SHADER_OPCODE_QUAD_SWIZZLE:
365b8e80941Smrg      return "quad_swizzle";
366b8e80941Smrg   case SHADER_OPCODE_CLUSTER_BROADCAST:
367b8e80941Smrg      return "cluster_broadcast";
368b8e80941Smrg
369b8e80941Smrg   case SHADER_OPCODE_GET_BUFFER_SIZE:
370b8e80941Smrg      return "get_buffer_size";
371b8e80941Smrg
372b8e80941Smrg   case VEC4_OPCODE_MOV_BYTES:
373b8e80941Smrg      return "mov_bytes";
374b8e80941Smrg   case VEC4_OPCODE_PACK_BYTES:
375b8e80941Smrg      return "pack_bytes";
376b8e80941Smrg   case VEC4_OPCODE_UNPACK_UNIFORM:
377b8e80941Smrg      return "unpack_uniform";
378b8e80941Smrg   case VEC4_OPCODE_DOUBLE_TO_F32:
379b8e80941Smrg      return "double_to_f32";
380b8e80941Smrg   case VEC4_OPCODE_DOUBLE_TO_D32:
381b8e80941Smrg      return "double_to_d32";
382b8e80941Smrg   case VEC4_OPCODE_DOUBLE_TO_U32:
383b8e80941Smrg      return "double_to_u32";
384b8e80941Smrg   case VEC4_OPCODE_TO_DOUBLE:
385b8e80941Smrg      return "single_to_double";
386b8e80941Smrg   case VEC4_OPCODE_PICK_LOW_32BIT:
387b8e80941Smrg      return "pick_low_32bit";
388b8e80941Smrg   case VEC4_OPCODE_PICK_HIGH_32BIT:
389b8e80941Smrg      return "pick_high_32bit";
390b8e80941Smrg   case VEC4_OPCODE_SET_LOW_32BIT:
391b8e80941Smrg      return "set_low_32bit";
392b8e80941Smrg   case VEC4_OPCODE_SET_HIGH_32BIT:
393b8e80941Smrg      return "set_high_32bit";
394b8e80941Smrg
395b8e80941Smrg   case FS_OPCODE_DDX_COARSE:
396b8e80941Smrg      return "ddx_coarse";
397b8e80941Smrg   case FS_OPCODE_DDX_FINE:
398b8e80941Smrg      return "ddx_fine";
399b8e80941Smrg   case FS_OPCODE_DDY_COARSE:
400b8e80941Smrg      return "ddy_coarse";
401b8e80941Smrg   case FS_OPCODE_DDY_FINE:
402b8e80941Smrg      return "ddy_fine";
403b8e80941Smrg
404b8e80941Smrg   case FS_OPCODE_LINTERP:
405b8e80941Smrg      return "linterp";
406b8e80941Smrg
407b8e80941Smrg   case FS_OPCODE_PIXEL_X:
408b8e80941Smrg      return "pixel_x";
409b8e80941Smrg   case FS_OPCODE_PIXEL_Y:
410b8e80941Smrg      return "pixel_y";
411b8e80941Smrg
412b8e80941Smrg   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
413b8e80941Smrg      return "uniform_pull_const";
414b8e80941Smrg   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
415b8e80941Smrg      return "uniform_pull_const_gen7";
416b8e80941Smrg   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
417b8e80941Smrg      return "varying_pull_const_gen4";
418b8e80941Smrg   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
419b8e80941Smrg      return "varying_pull_const_logical";
420b8e80941Smrg
421b8e80941Smrg   case FS_OPCODE_DISCARD_JUMP:
422b8e80941Smrg      return "discard_jump";
423b8e80941Smrg
424b8e80941Smrg   case FS_OPCODE_SET_SAMPLE_ID:
425b8e80941Smrg      return "set_sample_id";
426b8e80941Smrg
427b8e80941Smrg   case FS_OPCODE_PACK_HALF_2x16_SPLIT:
428b8e80941Smrg      return "pack_half_2x16_split";
429b8e80941Smrg
430b8e80941Smrg   case FS_OPCODE_PLACEHOLDER_HALT:
431b8e80941Smrg      return "placeholder_halt";
432b8e80941Smrg
433b8e80941Smrg   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
434b8e80941Smrg      return "interp_sample";
435b8e80941Smrg   case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
436b8e80941Smrg      return "interp_shared_offset";
437b8e80941Smrg   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
438b8e80941Smrg      return "interp_per_slot_offset";
439b8e80941Smrg
440b8e80941Smrg   case VS_OPCODE_URB_WRITE:
441b8e80941Smrg      return "vs_urb_write";
442b8e80941Smrg   case VS_OPCODE_PULL_CONSTANT_LOAD:
443b8e80941Smrg      return "pull_constant_load";
444b8e80941Smrg   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
445b8e80941Smrg      return "pull_constant_load_gen7";
446b8e80941Smrg
447b8e80941Smrg   case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
448b8e80941Smrg      return "set_simd4x2_header_gen9";
449b8e80941Smrg
450b8e80941Smrg   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
451b8e80941Smrg      return "unpack_flags_simd4x2";
452b8e80941Smrg
453b8e80941Smrg   case GS_OPCODE_URB_WRITE:
454b8e80941Smrg      return "gs_urb_write";
455b8e80941Smrg   case GS_OPCODE_URB_WRITE_ALLOCATE:
456b8e80941Smrg      return "gs_urb_write_allocate";
457b8e80941Smrg   case GS_OPCODE_THREAD_END:
458b8e80941Smrg      return "gs_thread_end";
459b8e80941Smrg   case GS_OPCODE_SET_WRITE_OFFSET:
460b8e80941Smrg      return "set_write_offset";
461b8e80941Smrg   case GS_OPCODE_SET_VERTEX_COUNT:
462b8e80941Smrg      return "set_vertex_count";
463b8e80941Smrg   case GS_OPCODE_SET_DWORD_2:
464b8e80941Smrg      return "set_dword_2";
465b8e80941Smrg   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
466b8e80941Smrg      return "prepare_channel_masks";
467b8e80941Smrg   case GS_OPCODE_SET_CHANNEL_MASKS:
468b8e80941Smrg      return "set_channel_masks";
469b8e80941Smrg   case GS_OPCODE_GET_INSTANCE_ID:
470b8e80941Smrg      return "get_instance_id";
471b8e80941Smrg   case GS_OPCODE_FF_SYNC:
472b8e80941Smrg      return "ff_sync";
473b8e80941Smrg   case GS_OPCODE_SET_PRIMITIVE_ID:
474b8e80941Smrg      return "set_primitive_id";
475b8e80941Smrg   case GS_OPCODE_SVB_WRITE:
476b8e80941Smrg      return "gs_svb_write";
477b8e80941Smrg   case GS_OPCODE_SVB_SET_DST_INDEX:
478b8e80941Smrg      return "gs_svb_set_dst_index";
479b8e80941Smrg   case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
480b8e80941Smrg      return "gs_ff_sync_set_primitives";
481b8e80941Smrg   case CS_OPCODE_CS_TERMINATE:
482b8e80941Smrg      return "cs_terminate";
483b8e80941Smrg   case SHADER_OPCODE_BARRIER:
484b8e80941Smrg      return "barrier";
485b8e80941Smrg   case SHADER_OPCODE_MULH:
486b8e80941Smrg      return "mulh";
487b8e80941Smrg   case SHADER_OPCODE_MOV_INDIRECT:
488b8e80941Smrg      return "mov_indirect";
489b8e80941Smrg
490b8e80941Smrg   case VEC4_OPCODE_URB_READ:
491b8e80941Smrg      return "urb_read";
492b8e80941Smrg   case TCS_OPCODE_GET_INSTANCE_ID:
493b8e80941Smrg      return "tcs_get_instance_id";
494b8e80941Smrg   case TCS_OPCODE_URB_WRITE:
495b8e80941Smrg      return "tcs_urb_write";
496b8e80941Smrg   case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
497b8e80941Smrg      return "tcs_set_input_urb_offsets";
498b8e80941Smrg   case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
499b8e80941Smrg      return "tcs_set_output_urb_offsets";
500b8e80941Smrg   case TCS_OPCODE_GET_PRIMITIVE_ID:
501b8e80941Smrg      return "tcs_get_primitive_id";
502b8e80941Smrg   case TCS_OPCODE_CREATE_BARRIER_HEADER:
503b8e80941Smrg      return "tcs_create_barrier_header";
504b8e80941Smrg   case TCS_OPCODE_SRC0_010_IS_ZERO:
505b8e80941Smrg      return "tcs_src0<0,1,0>_is_zero";
506b8e80941Smrg   case TCS_OPCODE_RELEASE_INPUT:
507b8e80941Smrg      return "tcs_release_input";
508b8e80941Smrg   case TCS_OPCODE_THREAD_END:
509b8e80941Smrg      return "tcs_thread_end";
510b8e80941Smrg   case TES_OPCODE_CREATE_INPUT_READ_HEADER:
511b8e80941Smrg      return "tes_create_input_read_header";
512b8e80941Smrg   case TES_OPCODE_ADD_INDIRECT_URB_OFFSET:
513b8e80941Smrg      return "tes_add_indirect_urb_offset";
514b8e80941Smrg   case TES_OPCODE_GET_PRIMITIVE_ID:
515b8e80941Smrg      return "tes_get_primitive_id";
516b8e80941Smrg
517b8e80941Smrg   case SHADER_OPCODE_RND_MODE:
518b8e80941Smrg      return "rnd_mode";
519b8e80941Smrg   }
520b8e80941Smrg
521b8e80941Smrg   unreachable("not reached");
522b8e80941Smrg}
523b8e80941Smrg
524b8e80941Smrgbool
525b8e80941Smrgbrw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
526b8e80941Smrg{
527b8e80941Smrg   union {
528b8e80941Smrg      unsigned ud;
529b8e80941Smrg      int d;
530b8e80941Smrg      float f;
531b8e80941Smrg      double df;
532b8e80941Smrg   } imm, sat_imm = { 0 };
533b8e80941Smrg
534b8e80941Smrg   const unsigned size = type_sz(type);
535b8e80941Smrg
536b8e80941Smrg   /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
537b8e80941Smrg    * irrelevant, so just check the size of the type and copy from/to an
538b8e80941Smrg    * appropriately sized field.
539b8e80941Smrg    */
540b8e80941Smrg   if (size < 8)
541b8e80941Smrg      imm.ud = reg->ud;
542b8e80941Smrg   else
543b8e80941Smrg      imm.df = reg->df;
544b8e80941Smrg
545b8e80941Smrg   switch (type) {
546b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
547b8e80941Smrg   case BRW_REGISTER_TYPE_D:
548b8e80941Smrg   case BRW_REGISTER_TYPE_UW:
549b8e80941Smrg   case BRW_REGISTER_TYPE_W:
550b8e80941Smrg   case BRW_REGISTER_TYPE_UQ:
551b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
552b8e80941Smrg      /* Nothing to do. */
553b8e80941Smrg      return false;
554b8e80941Smrg   case BRW_REGISTER_TYPE_F:
555b8e80941Smrg      sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
556b8e80941Smrg      break;
557b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
558b8e80941Smrg      sat_imm.df = CLAMP(imm.df, 0.0, 1.0);
559b8e80941Smrg      break;
560b8e80941Smrg   case BRW_REGISTER_TYPE_UB:
561b8e80941Smrg   case BRW_REGISTER_TYPE_B:
562b8e80941Smrg      unreachable("no UB/B immediates");
563b8e80941Smrg   case BRW_REGISTER_TYPE_V:
564b8e80941Smrg   case BRW_REGISTER_TYPE_UV:
565b8e80941Smrg   case BRW_REGISTER_TYPE_VF:
566b8e80941Smrg      unreachable("unimplemented: saturate vector immediate");
567b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
568b8e80941Smrg      unreachable("unimplemented: saturate HF immediate");
569b8e80941Smrg   case BRW_REGISTER_TYPE_NF:
570b8e80941Smrg      unreachable("no NF immediates");
571b8e80941Smrg   }
572b8e80941Smrg
573b8e80941Smrg   if (size < 8) {
574b8e80941Smrg      if (imm.ud != sat_imm.ud) {
575b8e80941Smrg         reg->ud = sat_imm.ud;
576b8e80941Smrg         return true;
577b8e80941Smrg      }
578b8e80941Smrg   } else {
579b8e80941Smrg      if (imm.df != sat_imm.df) {
580b8e80941Smrg         reg->df = sat_imm.df;
581b8e80941Smrg         return true;
582b8e80941Smrg      }
583b8e80941Smrg   }
584b8e80941Smrg   return false;
585b8e80941Smrg}
586b8e80941Smrg
587b8e80941Smrgbool
588b8e80941Smrgbrw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
589b8e80941Smrg{
590b8e80941Smrg   switch (type) {
591b8e80941Smrg   case BRW_REGISTER_TYPE_D:
592b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
593b8e80941Smrg      reg->d = -reg->d;
594b8e80941Smrg      return true;
595b8e80941Smrg   case BRW_REGISTER_TYPE_W:
596b8e80941Smrg   case BRW_REGISTER_TYPE_UW: {
597b8e80941Smrg      uint16_t value = -(int16_t)reg->ud;
598b8e80941Smrg      reg->ud = value | (uint32_t)value << 16;
599b8e80941Smrg      return true;
600b8e80941Smrg   }
601b8e80941Smrg   case BRW_REGISTER_TYPE_F:
602b8e80941Smrg      reg->f = -reg->f;
603b8e80941Smrg      return true;
604b8e80941Smrg   case BRW_REGISTER_TYPE_VF:
605b8e80941Smrg      reg->ud ^= 0x80808080;
606b8e80941Smrg      return true;
607b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
608b8e80941Smrg      reg->df = -reg->df;
609b8e80941Smrg      return true;
610b8e80941Smrg   case BRW_REGISTER_TYPE_UQ:
611b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
612b8e80941Smrg      reg->d64 = -reg->d64;
613b8e80941Smrg      return true;
614b8e80941Smrg   case BRW_REGISTER_TYPE_UB:
615b8e80941Smrg   case BRW_REGISTER_TYPE_B:
616b8e80941Smrg      unreachable("no UB/B immediates");
617b8e80941Smrg   case BRW_REGISTER_TYPE_UV:
618b8e80941Smrg   case BRW_REGISTER_TYPE_V:
619b8e80941Smrg      assert(!"unimplemented: negate UV/V immediate");
620b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
621b8e80941Smrg      reg->ud ^= 0x80008000;
622b8e80941Smrg      return true;
623b8e80941Smrg   case BRW_REGISTER_TYPE_NF:
624b8e80941Smrg      unreachable("no NF immediates");
625b8e80941Smrg   }
626b8e80941Smrg
627b8e80941Smrg   return false;
628b8e80941Smrg}
629b8e80941Smrg
630b8e80941Smrgbool
631b8e80941Smrgbrw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
632b8e80941Smrg{
633b8e80941Smrg   switch (type) {
634b8e80941Smrg   case BRW_REGISTER_TYPE_D:
635b8e80941Smrg      reg->d = abs(reg->d);
636b8e80941Smrg      return true;
637b8e80941Smrg   case BRW_REGISTER_TYPE_W: {
638b8e80941Smrg      uint16_t value = abs((int16_t)reg->ud);
639b8e80941Smrg      reg->ud = value | (uint32_t)value << 16;
640b8e80941Smrg      return true;
641b8e80941Smrg   }
642b8e80941Smrg   case BRW_REGISTER_TYPE_F:
643b8e80941Smrg      reg->f = fabsf(reg->f);
644b8e80941Smrg      return true;
645b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
646b8e80941Smrg      reg->df = fabs(reg->df);
647b8e80941Smrg      return true;
648b8e80941Smrg   case BRW_REGISTER_TYPE_VF:
649b8e80941Smrg      reg->ud &= ~0x80808080;
650b8e80941Smrg      return true;
651b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
652b8e80941Smrg      reg->d64 = imaxabs(reg->d64);
653b8e80941Smrg      return true;
654b8e80941Smrg   case BRW_REGISTER_TYPE_UB:
655b8e80941Smrg   case BRW_REGISTER_TYPE_B:
656b8e80941Smrg      unreachable("no UB/B immediates");
657b8e80941Smrg   case BRW_REGISTER_TYPE_UQ:
658b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
659b8e80941Smrg   case BRW_REGISTER_TYPE_UW:
660b8e80941Smrg   case BRW_REGISTER_TYPE_UV:
661b8e80941Smrg      /* Presumably the absolute value modifier on an unsigned source is a
662b8e80941Smrg       * nop, but it would be nice to confirm.
663b8e80941Smrg       */
664b8e80941Smrg      assert(!"unimplemented: abs unsigned immediate");
665b8e80941Smrg   case BRW_REGISTER_TYPE_V:
666b8e80941Smrg      assert(!"unimplemented: abs V immediate");
667b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
668b8e80941Smrg      reg->ud &= ~0x80008000;
669b8e80941Smrg      return true;
670b8e80941Smrg   case BRW_REGISTER_TYPE_NF:
671b8e80941Smrg      unreachable("no NF immediates");
672b8e80941Smrg   }
673b8e80941Smrg
674b8e80941Smrg   return false;
675b8e80941Smrg}
676b8e80941Smrg
677b8e80941Smrgbackend_shader::backend_shader(const struct brw_compiler *compiler,
678b8e80941Smrg                               void *log_data,
679b8e80941Smrg                               void *mem_ctx,
680b8e80941Smrg                               const nir_shader *shader,
681b8e80941Smrg                               struct brw_stage_prog_data *stage_prog_data)
682b8e80941Smrg   : compiler(compiler),
683b8e80941Smrg     log_data(log_data),
684b8e80941Smrg     devinfo(compiler->devinfo),
685b8e80941Smrg     nir(shader),
686b8e80941Smrg     stage_prog_data(stage_prog_data),
687b8e80941Smrg     mem_ctx(mem_ctx),
688b8e80941Smrg     cfg(NULL),
689b8e80941Smrg     stage(shader->info.stage)
690b8e80941Smrg{
691b8e80941Smrg   debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
692b8e80941Smrg   stage_name = _mesa_shader_stage_to_string(stage);
693b8e80941Smrg   stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
694b8e80941Smrg}
695b8e80941Smrg
696b8e80941Smrgbackend_shader::~backend_shader()
697b8e80941Smrg{
698b8e80941Smrg}
699b8e80941Smrg
700b8e80941Smrgbool
701b8e80941Smrgbackend_reg::equals(const backend_reg &r) const
702b8e80941Smrg{
703b8e80941Smrg   return brw_regs_equal(this, &r) && offset == r.offset;
704b8e80941Smrg}
705b8e80941Smrg
706b8e80941Smrgbool
707b8e80941Smrgbackend_reg::negative_equals(const backend_reg &r) const
708b8e80941Smrg{
709b8e80941Smrg   return brw_regs_negative_equal(this, &r) && offset == r.offset;
710b8e80941Smrg}
711b8e80941Smrg
712b8e80941Smrgbool
713b8e80941Smrgbackend_reg::is_zero() const
714b8e80941Smrg{
715b8e80941Smrg   if (file != IMM)
716b8e80941Smrg      return false;
717b8e80941Smrg
718b8e80941Smrg   assert(type_sz(type) > 1);
719b8e80941Smrg
720b8e80941Smrg   switch (type) {
721b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
722b8e80941Smrg      assert((d & 0xffff) == ((d >> 16) & 0xffff));
723b8e80941Smrg      return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
724b8e80941Smrg   case BRW_REGISTER_TYPE_F:
725b8e80941Smrg      return f == 0;
726b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
727b8e80941Smrg      return df == 0;
728b8e80941Smrg   case BRW_REGISTER_TYPE_W:
729b8e80941Smrg   case BRW_REGISTER_TYPE_UW:
730b8e80941Smrg      assert((d & 0xffff) == ((d >> 16) & 0xffff));
731b8e80941Smrg      return (d & 0xffff) == 0;
732b8e80941Smrg   case BRW_REGISTER_TYPE_D:
733b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
734b8e80941Smrg      return d == 0;
735b8e80941Smrg   case BRW_REGISTER_TYPE_UQ:
736b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
737b8e80941Smrg      return u64 == 0;
738b8e80941Smrg   default:
739b8e80941Smrg      return false;
740b8e80941Smrg   }
741b8e80941Smrg}
742b8e80941Smrg
743b8e80941Smrgbool
744b8e80941Smrgbackend_reg::is_one() const
745b8e80941Smrg{
746b8e80941Smrg   if (file != IMM)
747b8e80941Smrg      return false;
748b8e80941Smrg
749b8e80941Smrg   assert(type_sz(type) > 1);
750b8e80941Smrg
751b8e80941Smrg   switch (type) {
752b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
753b8e80941Smrg      assert((d & 0xffff) == ((d >> 16) & 0xffff));
754b8e80941Smrg      return (d & 0xffff) == 0x3c00;
755b8e80941Smrg   case BRW_REGISTER_TYPE_F:
756b8e80941Smrg      return f == 1.0f;
757b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
758b8e80941Smrg      return df == 1.0;
759b8e80941Smrg   case BRW_REGISTER_TYPE_W:
760b8e80941Smrg   case BRW_REGISTER_TYPE_UW:
761b8e80941Smrg      assert((d & 0xffff) == ((d >> 16) & 0xffff));
762b8e80941Smrg      return (d & 0xffff) == 1;
763b8e80941Smrg   case BRW_REGISTER_TYPE_D:
764b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
765b8e80941Smrg      return d == 1;
766b8e80941Smrg   case BRW_REGISTER_TYPE_UQ:
767b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
768b8e80941Smrg      return u64 == 1;
769b8e80941Smrg   default:
770b8e80941Smrg      return false;
771b8e80941Smrg   }
772b8e80941Smrg}
773b8e80941Smrg
774b8e80941Smrgbool
775b8e80941Smrgbackend_reg::is_negative_one() const
776b8e80941Smrg{
777b8e80941Smrg   if (file != IMM)
778b8e80941Smrg      return false;
779b8e80941Smrg
780b8e80941Smrg   assert(type_sz(type) > 1);
781b8e80941Smrg
782b8e80941Smrg   switch (type) {
783b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
784b8e80941Smrg      assert((d & 0xffff) == ((d >> 16) & 0xffff));
785b8e80941Smrg      return (d & 0xffff) == 0xbc00;
786b8e80941Smrg   case BRW_REGISTER_TYPE_F:
787b8e80941Smrg      return f == -1.0;
788b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
789b8e80941Smrg      return df == -1.0;
790b8e80941Smrg   case BRW_REGISTER_TYPE_W:
791b8e80941Smrg      assert((d & 0xffff) == ((d >> 16) & 0xffff));
792b8e80941Smrg      return (d & 0xffff) == 0xffff;
793b8e80941Smrg   case BRW_REGISTER_TYPE_D:
794b8e80941Smrg      return d == -1;
795b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
796b8e80941Smrg      return d64 == -1;
797b8e80941Smrg   default:
798b8e80941Smrg      return false;
799b8e80941Smrg   }
800b8e80941Smrg}
801b8e80941Smrg
802b8e80941Smrgbool
803b8e80941Smrgbackend_reg::is_null() const
804b8e80941Smrg{
805b8e80941Smrg   return file == ARF && nr == BRW_ARF_NULL;
806b8e80941Smrg}
807b8e80941Smrg
808b8e80941Smrg
809b8e80941Smrgbool
810b8e80941Smrgbackend_reg::is_accumulator() const
811b8e80941Smrg{
812b8e80941Smrg   return file == ARF && nr == BRW_ARF_ACCUMULATOR;
813b8e80941Smrg}
814b8e80941Smrg
815b8e80941Smrgbool
816b8e80941Smrgbackend_instruction::is_commutative() const
817b8e80941Smrg{
818b8e80941Smrg   switch (opcode) {
819b8e80941Smrg   case BRW_OPCODE_AND:
820b8e80941Smrg   case BRW_OPCODE_OR:
821b8e80941Smrg   case BRW_OPCODE_XOR:
822b8e80941Smrg   case BRW_OPCODE_ADD:
823b8e80941Smrg   case BRW_OPCODE_MUL:
824b8e80941Smrg   case SHADER_OPCODE_MULH:
825b8e80941Smrg      return true;
826b8e80941Smrg   case BRW_OPCODE_SEL:
827b8e80941Smrg      /* MIN and MAX are commutative. */
828b8e80941Smrg      if (conditional_mod == BRW_CONDITIONAL_GE ||
829b8e80941Smrg          conditional_mod == BRW_CONDITIONAL_L) {
830b8e80941Smrg         return true;
831b8e80941Smrg      }
832b8e80941Smrg      /* fallthrough */
833b8e80941Smrg   default:
834b8e80941Smrg      return false;
835b8e80941Smrg   }
836b8e80941Smrg}
837b8e80941Smrg
838b8e80941Smrgbool
839b8e80941Smrgbackend_instruction::is_3src(const struct gen_device_info *devinfo) const
840b8e80941Smrg{
841b8e80941Smrg   return ::is_3src(devinfo, opcode);
842b8e80941Smrg}
843b8e80941Smrg
844b8e80941Smrgbool
845b8e80941Smrgbackend_instruction::is_tex() const
846b8e80941Smrg{
847b8e80941Smrg   return (opcode == SHADER_OPCODE_TEX ||
848b8e80941Smrg           opcode == FS_OPCODE_TXB ||
849b8e80941Smrg           opcode == SHADER_OPCODE_TXD ||
850b8e80941Smrg           opcode == SHADER_OPCODE_TXF ||
851b8e80941Smrg           opcode == SHADER_OPCODE_TXF_LZ ||
852b8e80941Smrg           opcode == SHADER_OPCODE_TXF_CMS ||
853b8e80941Smrg           opcode == SHADER_OPCODE_TXF_CMS_W ||
854b8e80941Smrg           opcode == SHADER_OPCODE_TXF_UMS ||
855b8e80941Smrg           opcode == SHADER_OPCODE_TXF_MCS ||
856b8e80941Smrg           opcode == SHADER_OPCODE_TXL ||
857b8e80941Smrg           opcode == SHADER_OPCODE_TXL_LZ ||
858b8e80941Smrg           opcode == SHADER_OPCODE_TXS ||
859b8e80941Smrg           opcode == SHADER_OPCODE_LOD ||
860b8e80941Smrg           opcode == SHADER_OPCODE_TG4 ||
861b8e80941Smrg           opcode == SHADER_OPCODE_TG4_OFFSET ||
862b8e80941Smrg           opcode == SHADER_OPCODE_SAMPLEINFO);
863b8e80941Smrg}
864b8e80941Smrg
865b8e80941Smrgbool
866b8e80941Smrgbackend_instruction::is_math() const
867b8e80941Smrg{
868b8e80941Smrg   return (opcode == SHADER_OPCODE_RCP ||
869b8e80941Smrg           opcode == SHADER_OPCODE_RSQ ||
870b8e80941Smrg           opcode == SHADER_OPCODE_SQRT ||
871b8e80941Smrg           opcode == SHADER_OPCODE_EXP2 ||
872b8e80941Smrg           opcode == SHADER_OPCODE_LOG2 ||
873b8e80941Smrg           opcode == SHADER_OPCODE_SIN ||
874b8e80941Smrg           opcode == SHADER_OPCODE_COS ||
875b8e80941Smrg           opcode == SHADER_OPCODE_INT_QUOTIENT ||
876b8e80941Smrg           opcode == SHADER_OPCODE_INT_REMAINDER ||
877b8e80941Smrg           opcode == SHADER_OPCODE_POW);
878b8e80941Smrg}
879b8e80941Smrg
880b8e80941Smrgbool
881b8e80941Smrgbackend_instruction::is_control_flow() const
882b8e80941Smrg{
883b8e80941Smrg   switch (opcode) {
884b8e80941Smrg   case BRW_OPCODE_DO:
885b8e80941Smrg   case BRW_OPCODE_WHILE:
886b8e80941Smrg   case BRW_OPCODE_IF:
887b8e80941Smrg   case BRW_OPCODE_ELSE:
888b8e80941Smrg   case BRW_OPCODE_ENDIF:
889b8e80941Smrg   case BRW_OPCODE_BREAK:
890b8e80941Smrg   case BRW_OPCODE_CONTINUE:
891b8e80941Smrg      return true;
892b8e80941Smrg   default:
893b8e80941Smrg      return false;
894b8e80941Smrg   }
895b8e80941Smrg}
896b8e80941Smrg
897b8e80941Smrgbool
898b8e80941Smrgbackend_instruction::can_do_source_mods() const
899b8e80941Smrg{
900b8e80941Smrg   switch (opcode) {
901b8e80941Smrg   case BRW_OPCODE_ADDC:
902b8e80941Smrg   case BRW_OPCODE_BFE:
903b8e80941Smrg   case BRW_OPCODE_BFI1:
904b8e80941Smrg   case BRW_OPCODE_BFI2:
905b8e80941Smrg   case BRW_OPCODE_BFREV:
906b8e80941Smrg   case BRW_OPCODE_CBIT:
907b8e80941Smrg   case BRW_OPCODE_FBH:
908b8e80941Smrg   case BRW_OPCODE_FBL:
909b8e80941Smrg   case BRW_OPCODE_SUBB:
910b8e80941Smrg   case SHADER_OPCODE_BROADCAST:
911b8e80941Smrg   case SHADER_OPCODE_CLUSTER_BROADCAST:
912b8e80941Smrg   case SHADER_OPCODE_MOV_INDIRECT:
913b8e80941Smrg      return false;
914b8e80941Smrg   default:
915b8e80941Smrg      return true;
916b8e80941Smrg   }
917b8e80941Smrg}
918b8e80941Smrg
919b8e80941Smrgbool
920b8e80941Smrgbackend_instruction::can_do_saturate() const
921b8e80941Smrg{
922b8e80941Smrg   switch (opcode) {
923b8e80941Smrg   case BRW_OPCODE_ADD:
924b8e80941Smrg   case BRW_OPCODE_ASR:
925b8e80941Smrg   case BRW_OPCODE_AVG:
926b8e80941Smrg   case BRW_OPCODE_DP2:
927b8e80941Smrg   case BRW_OPCODE_DP3:
928b8e80941Smrg   case BRW_OPCODE_DP4:
929b8e80941Smrg   case BRW_OPCODE_DPH:
930b8e80941Smrg   case BRW_OPCODE_F16TO32:
931b8e80941Smrg   case BRW_OPCODE_F32TO16:
932b8e80941Smrg   case BRW_OPCODE_LINE:
933b8e80941Smrg   case BRW_OPCODE_LRP:
934b8e80941Smrg   case BRW_OPCODE_MAC:
935b8e80941Smrg   case BRW_OPCODE_MAD:
936b8e80941Smrg   case BRW_OPCODE_MATH:
937b8e80941Smrg   case BRW_OPCODE_MOV:
938b8e80941Smrg   case BRW_OPCODE_MUL:
939b8e80941Smrg   case SHADER_OPCODE_MULH:
940b8e80941Smrg   case BRW_OPCODE_PLN:
941b8e80941Smrg   case BRW_OPCODE_RNDD:
942b8e80941Smrg   case BRW_OPCODE_RNDE:
943b8e80941Smrg   case BRW_OPCODE_RNDU:
944b8e80941Smrg   case BRW_OPCODE_RNDZ:
945b8e80941Smrg   case BRW_OPCODE_SEL:
946b8e80941Smrg   case BRW_OPCODE_SHL:
947b8e80941Smrg   case BRW_OPCODE_SHR:
948b8e80941Smrg   case FS_OPCODE_LINTERP:
949b8e80941Smrg   case SHADER_OPCODE_COS:
950b8e80941Smrg   case SHADER_OPCODE_EXP2:
951b8e80941Smrg   case SHADER_OPCODE_LOG2:
952b8e80941Smrg   case SHADER_OPCODE_POW:
953b8e80941Smrg   case SHADER_OPCODE_RCP:
954b8e80941Smrg   case SHADER_OPCODE_RSQ:
955b8e80941Smrg   case SHADER_OPCODE_SIN:
956b8e80941Smrg   case SHADER_OPCODE_SQRT:
957b8e80941Smrg      return true;
958b8e80941Smrg   default:
959b8e80941Smrg      return false;
960b8e80941Smrg   }
961b8e80941Smrg}
962b8e80941Smrg
963b8e80941Smrgbool
964b8e80941Smrgbackend_instruction::can_do_cmod() const
965b8e80941Smrg{
966b8e80941Smrg   switch (opcode) {
967b8e80941Smrg   case BRW_OPCODE_ADD:
968b8e80941Smrg   case BRW_OPCODE_ADDC:
969b8e80941Smrg   case BRW_OPCODE_AND:
970b8e80941Smrg   case BRW_OPCODE_ASR:
971b8e80941Smrg   case BRW_OPCODE_AVG:
972b8e80941Smrg   case BRW_OPCODE_CMP:
973b8e80941Smrg   case BRW_OPCODE_CMPN:
974b8e80941Smrg   case BRW_OPCODE_DP2:
975b8e80941Smrg   case BRW_OPCODE_DP3:
976b8e80941Smrg   case BRW_OPCODE_DP4:
977b8e80941Smrg   case BRW_OPCODE_DPH:
978b8e80941Smrg   case BRW_OPCODE_F16TO32:
979b8e80941Smrg   case BRW_OPCODE_F32TO16:
980b8e80941Smrg   case BRW_OPCODE_FRC:
981b8e80941Smrg   case BRW_OPCODE_LINE:
982b8e80941Smrg   case BRW_OPCODE_LRP:
983b8e80941Smrg   case BRW_OPCODE_LZD:
984b8e80941Smrg   case BRW_OPCODE_MAC:
985b8e80941Smrg   case BRW_OPCODE_MACH:
986b8e80941Smrg   case BRW_OPCODE_MAD:
987b8e80941Smrg   case BRW_OPCODE_MOV:
988b8e80941Smrg   case BRW_OPCODE_MUL:
989b8e80941Smrg   case BRW_OPCODE_NOT:
990b8e80941Smrg   case BRW_OPCODE_OR:
991b8e80941Smrg   case BRW_OPCODE_PLN:
992b8e80941Smrg   case BRW_OPCODE_RNDD:
993b8e80941Smrg   case BRW_OPCODE_RNDE:
994b8e80941Smrg   case BRW_OPCODE_RNDU:
995b8e80941Smrg   case BRW_OPCODE_RNDZ:
996b8e80941Smrg   case BRW_OPCODE_SAD2:
997b8e80941Smrg   case BRW_OPCODE_SADA2:
998b8e80941Smrg   case BRW_OPCODE_SHL:
999b8e80941Smrg   case BRW_OPCODE_SHR:
1000b8e80941Smrg   case BRW_OPCODE_SUBB:
1001b8e80941Smrg   case BRW_OPCODE_XOR:
1002b8e80941Smrg   case FS_OPCODE_LINTERP:
1003b8e80941Smrg      return true;
1004b8e80941Smrg   default:
1005b8e80941Smrg      return false;
1006b8e80941Smrg   }
1007b8e80941Smrg}
1008b8e80941Smrg
1009b8e80941Smrgbool
1010b8e80941Smrgbackend_instruction::reads_accumulator_implicitly() const
1011b8e80941Smrg{
1012b8e80941Smrg   switch (opcode) {
1013b8e80941Smrg   case BRW_OPCODE_MAC:
1014b8e80941Smrg   case BRW_OPCODE_MACH:
1015b8e80941Smrg   case BRW_OPCODE_SADA2:
1016b8e80941Smrg      return true;
1017b8e80941Smrg   default:
1018b8e80941Smrg      return false;
1019b8e80941Smrg   }
1020b8e80941Smrg}
1021b8e80941Smrg
1022b8e80941Smrgbool
1023b8e80941Smrgbackend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const
1024b8e80941Smrg{
1025b8e80941Smrg   return writes_accumulator ||
1026b8e80941Smrg          (devinfo->gen < 6 &&
1027b8e80941Smrg           ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
1028b8e80941Smrg            (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) ||
1029b8e80941Smrg          (opcode == FS_OPCODE_LINTERP &&
1030b8e80941Smrg           (!devinfo->has_pln || devinfo->gen <= 6));
1031b8e80941Smrg}
1032b8e80941Smrg
1033b8e80941Smrgbool
1034b8e80941Smrgbackend_instruction::has_side_effects() const
1035b8e80941Smrg{
1036b8e80941Smrg   switch (opcode) {
1037b8e80941Smrg   case SHADER_OPCODE_SEND:
1038b8e80941Smrg      return send_has_side_effects;
1039b8e80941Smrg
1040b8e80941Smrg   case VEC4_OPCODE_UNTYPED_ATOMIC:
1041b8e80941Smrg   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
1042b8e80941Smrg   case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
1043b8e80941Smrg   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
1044b8e80941Smrg   case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
1045b8e80941Smrg   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
1046b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
1047b8e80941Smrg   case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
1048b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
1049b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
1050b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
1051b8e80941Smrg   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
1052b8e80941Smrg   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
1053b8e80941Smrg   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
1054b8e80941Smrg   case SHADER_OPCODE_MEMORY_FENCE:
1055b8e80941Smrg   case SHADER_OPCODE_INTERLOCK:
1056b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8:
1057b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
1058b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
1059b8e80941Smrg   case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
1060b8e80941Smrg   case FS_OPCODE_FB_WRITE:
1061b8e80941Smrg   case FS_OPCODE_FB_WRITE_LOGICAL:
1062b8e80941Smrg   case FS_OPCODE_REP_FB_WRITE:
1063b8e80941Smrg   case SHADER_OPCODE_BARRIER:
1064b8e80941Smrg   case TCS_OPCODE_URB_WRITE:
1065b8e80941Smrg   case TCS_OPCODE_RELEASE_INPUT:
1066b8e80941Smrg   case SHADER_OPCODE_RND_MODE:
1067b8e80941Smrg      return true;
1068b8e80941Smrg   default:
1069b8e80941Smrg      return eot;
1070b8e80941Smrg   }
1071b8e80941Smrg}
1072b8e80941Smrg
1073b8e80941Smrgbool
1074b8e80941Smrgbackend_instruction::is_volatile() const
1075b8e80941Smrg{
1076b8e80941Smrg   switch (opcode) {
1077b8e80941Smrg   case SHADER_OPCODE_SEND:
1078b8e80941Smrg      return send_is_volatile;
1079b8e80941Smrg
1080b8e80941Smrg   case VEC4_OPCODE_UNTYPED_SURFACE_READ:
1081b8e80941Smrg   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
1082b8e80941Smrg   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
1083b8e80941Smrg   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
1084b8e80941Smrg   case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
1085b8e80941Smrg   case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
1086b8e80941Smrg   case SHADER_OPCODE_URB_READ_SIMD8:
1087b8e80941Smrg   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
1088b8e80941Smrg   case VEC4_OPCODE_URB_READ:
1089b8e80941Smrg      return true;
1090b8e80941Smrg   default:
1091b8e80941Smrg      return false;
1092b8e80941Smrg   }
1093b8e80941Smrg}
1094b8e80941Smrg
1095b8e80941Smrg#ifndef NDEBUG
1096b8e80941Smrgstatic bool
1097b8e80941Smrginst_is_in_block(const bblock_t *block, const backend_instruction *inst)
1098b8e80941Smrg{
1099b8e80941Smrg   bool found = false;
1100b8e80941Smrg   foreach_inst_in_block (backend_instruction, i, block) {
1101b8e80941Smrg      if (inst == i) {
1102b8e80941Smrg         found = true;
1103b8e80941Smrg      }
1104b8e80941Smrg   }
1105b8e80941Smrg   return found;
1106b8e80941Smrg}
1107b8e80941Smrg#endif
1108b8e80941Smrg
1109b8e80941Smrgstatic void
1110b8e80941Smrgadjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
1111b8e80941Smrg{
1112b8e80941Smrg   for (bblock_t *block_iter = start_block->next();
1113b8e80941Smrg        block_iter;
1114b8e80941Smrg        block_iter = block_iter->next()) {
1115b8e80941Smrg      block_iter->start_ip += ip_adjustment;
1116b8e80941Smrg      block_iter->end_ip += ip_adjustment;
1117b8e80941Smrg   }
1118b8e80941Smrg}
1119b8e80941Smrg
1120b8e80941Smrgvoid
1121b8e80941Smrgbackend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
1122b8e80941Smrg{
1123b8e80941Smrg   assert(this != inst);
1124b8e80941Smrg
1125b8e80941Smrg   if (!this->is_head_sentinel())
1126b8e80941Smrg      assert(inst_is_in_block(block, this) || !"Instruction not in block");
1127b8e80941Smrg
1128b8e80941Smrg   block->end_ip++;
1129b8e80941Smrg
1130b8e80941Smrg   adjust_later_block_ips(block, 1);
1131b8e80941Smrg
1132b8e80941Smrg   exec_node::insert_after(inst);
1133b8e80941Smrg}
1134b8e80941Smrg
1135b8e80941Smrgvoid
1136b8e80941Smrgbackend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
1137b8e80941Smrg{
1138b8e80941Smrg   assert(this != inst);
1139b8e80941Smrg
1140b8e80941Smrg   if (!this->is_tail_sentinel())
1141b8e80941Smrg      assert(inst_is_in_block(block, this) || !"Instruction not in block");
1142b8e80941Smrg
1143b8e80941Smrg   block->end_ip++;
1144b8e80941Smrg
1145b8e80941Smrg   adjust_later_block_ips(block, 1);
1146b8e80941Smrg
1147b8e80941Smrg   exec_node::insert_before(inst);
1148b8e80941Smrg}
1149b8e80941Smrg
1150b8e80941Smrgvoid
1151b8e80941Smrgbackend_instruction::insert_before(bblock_t *block, exec_list *list)
1152b8e80941Smrg{
1153b8e80941Smrg   assert(inst_is_in_block(block, this) || !"Instruction not in block");
1154b8e80941Smrg
1155b8e80941Smrg   unsigned num_inst = list->length();
1156b8e80941Smrg
1157b8e80941Smrg   block->end_ip += num_inst;
1158b8e80941Smrg
1159b8e80941Smrg   adjust_later_block_ips(block, num_inst);
1160b8e80941Smrg
1161b8e80941Smrg   exec_node::insert_before(list);
1162b8e80941Smrg}
1163b8e80941Smrg
1164b8e80941Smrgvoid
1165b8e80941Smrgbackend_instruction::remove(bblock_t *block)
1166b8e80941Smrg{
1167b8e80941Smrg   assert(inst_is_in_block(block, this) || !"Instruction not in block");
1168b8e80941Smrg
1169b8e80941Smrg   adjust_later_block_ips(block, -1);
1170b8e80941Smrg
1171b8e80941Smrg   if (block->start_ip == block->end_ip) {
1172b8e80941Smrg      block->cfg->remove_block(block);
1173b8e80941Smrg   } else {
1174b8e80941Smrg      block->end_ip--;
1175b8e80941Smrg   }
1176b8e80941Smrg
1177b8e80941Smrg   exec_node::remove();
1178b8e80941Smrg}
1179b8e80941Smrg
1180b8e80941Smrgvoid
1181b8e80941Smrgbackend_shader::dump_instructions()
1182b8e80941Smrg{
1183b8e80941Smrg   dump_instructions(NULL);
1184b8e80941Smrg}
1185b8e80941Smrg
1186b8e80941Smrgvoid
1187b8e80941Smrgbackend_shader::dump_instructions(const char *name)
1188b8e80941Smrg{
1189b8e80941Smrg   FILE *file = stderr;
1190b8e80941Smrg   if (name && geteuid() != 0) {
1191b8e80941Smrg      file = fopen(name, "w");
1192b8e80941Smrg      if (!file)
1193b8e80941Smrg         file = stderr;
1194b8e80941Smrg   }
1195b8e80941Smrg
1196b8e80941Smrg   if (cfg) {
1197b8e80941Smrg      int ip = 0;
1198b8e80941Smrg      foreach_block_and_inst(block, backend_instruction, inst, cfg) {
1199b8e80941Smrg         if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
1200b8e80941Smrg            fprintf(file, "%4d: ", ip++);
1201b8e80941Smrg         dump_instruction(inst, file);
1202b8e80941Smrg      }
1203b8e80941Smrg   } else {
1204b8e80941Smrg      int ip = 0;
1205b8e80941Smrg      foreach_in_list(backend_instruction, inst, &instructions) {
1206b8e80941Smrg         if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER))
1207b8e80941Smrg            fprintf(file, "%4d: ", ip++);
1208b8e80941Smrg         dump_instruction(inst, file);
1209b8e80941Smrg      }
1210b8e80941Smrg   }
1211b8e80941Smrg
1212b8e80941Smrg   if (file != stderr) {
1213b8e80941Smrg      fclose(file);
1214b8e80941Smrg   }
1215b8e80941Smrg}
1216b8e80941Smrg
1217b8e80941Smrgvoid
1218b8e80941Smrgbackend_shader::calculate_cfg()
1219b8e80941Smrg{
1220b8e80941Smrg   if (this->cfg)
1221b8e80941Smrg      return;
1222b8e80941Smrg   cfg = new(mem_ctx) cfg_t(&this->instructions);
1223b8e80941Smrg}
1224b8e80941Smrg
1225b8e80941Smrgextern "C" const unsigned *
1226b8e80941Smrgbrw_compile_tes(const struct brw_compiler *compiler,
1227b8e80941Smrg                void *log_data,
1228b8e80941Smrg                void *mem_ctx,
1229b8e80941Smrg                const struct brw_tes_prog_key *key,
1230b8e80941Smrg                const struct brw_vue_map *input_vue_map,
1231b8e80941Smrg                struct brw_tes_prog_data *prog_data,
1232b8e80941Smrg                nir_shader *nir,
1233b8e80941Smrg                struct gl_program *prog,
1234b8e80941Smrg                int shader_time_index,
1235b8e80941Smrg                char **error_str)
1236b8e80941Smrg{
1237b8e80941Smrg   const struct gen_device_info *devinfo = compiler->devinfo;
1238b8e80941Smrg   const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
1239b8e80941Smrg   const unsigned *assembly;
1240b8e80941Smrg
1241b8e80941Smrg   nir->info.inputs_read = key->inputs_read;
1242b8e80941Smrg   nir->info.patch_inputs_read = key->patch_inputs_read;
1243b8e80941Smrg
1244b8e80941Smrg   nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
1245b8e80941Smrg   brw_nir_lower_tes_inputs(nir, input_vue_map);
1246b8e80941Smrg   brw_nir_lower_vue_outputs(nir);
1247b8e80941Smrg   nir = brw_postprocess_nir(nir, compiler, is_scalar);
1248b8e80941Smrg
1249b8e80941Smrg   brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
1250b8e80941Smrg                       nir->info.outputs_written,
1251b8e80941Smrg                       nir->info.separate_shader);
1252b8e80941Smrg
1253b8e80941Smrg   unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
1254b8e80941Smrg
1255b8e80941Smrg   assert(output_size_bytes >= 1);
1256b8e80941Smrg   if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
1257b8e80941Smrg      if (error_str)
1258b8e80941Smrg         *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
1259b8e80941Smrg      return NULL;
1260b8e80941Smrg   }
1261b8e80941Smrg
1262b8e80941Smrg   prog_data->base.clip_distance_mask =
1263b8e80941Smrg      ((1 << nir->info.clip_distance_array_size) - 1);
1264b8e80941Smrg   prog_data->base.cull_distance_mask =
1265b8e80941Smrg      ((1 << nir->info.cull_distance_array_size) - 1) <<
1266b8e80941Smrg      nir->info.clip_distance_array_size;
1267b8e80941Smrg
1268b8e80941Smrg   /* URB entry sizes are stored as a multiple of 64 bytes. */
1269b8e80941Smrg   prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
1270b8e80941Smrg
1271b8e80941Smrg   /* On Cannonlake software shall not program an allocation size that
1272b8e80941Smrg    * specifies a size that is a multiple of 3 64B (512-bit) cachelines.
1273b8e80941Smrg    */
1274b8e80941Smrg   if (devinfo->gen == 10 &&
1275b8e80941Smrg       prog_data->base.urb_entry_size % 3 == 0)
1276b8e80941Smrg      prog_data->base.urb_entry_size++;
1277b8e80941Smrg
1278b8e80941Smrg   prog_data->base.urb_read_length = 0;
1279b8e80941Smrg
1280b8e80941Smrg   STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
1281b8e80941Smrg   STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL ==
1282b8e80941Smrg                 TESS_SPACING_FRACTIONAL_ODD - 1);
1283b8e80941Smrg   STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL ==
1284b8e80941Smrg                 TESS_SPACING_FRACTIONAL_EVEN - 1);
1285b8e80941Smrg
1286b8e80941Smrg   prog_data->partitioning =
1287b8e80941Smrg      (enum brw_tess_partitioning) (nir->info.tess.spacing - 1);
1288b8e80941Smrg
1289b8e80941Smrg   switch (nir->info.tess.primitive_mode) {
1290b8e80941Smrg   case GL_QUADS:
1291b8e80941Smrg      prog_data->domain = BRW_TESS_DOMAIN_QUAD;
1292b8e80941Smrg      break;
1293b8e80941Smrg   case GL_TRIANGLES:
1294b8e80941Smrg      prog_data->domain = BRW_TESS_DOMAIN_TRI;
1295b8e80941Smrg      break;
1296b8e80941Smrg   case GL_ISOLINES:
1297b8e80941Smrg      prog_data->domain = BRW_TESS_DOMAIN_ISOLINE;
1298b8e80941Smrg      break;
1299b8e80941Smrg   default:
1300b8e80941Smrg      unreachable("invalid domain shader primitive mode");
1301b8e80941Smrg   }
1302b8e80941Smrg
1303b8e80941Smrg   if (nir->info.tess.point_mode) {
1304b8e80941Smrg      prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
1305b8e80941Smrg   } else if (nir->info.tess.primitive_mode == GL_ISOLINES) {
1306b8e80941Smrg      prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
1307b8e80941Smrg   } else {
1308b8e80941Smrg      /* Hardware winding order is backwards from OpenGL */
1309b8e80941Smrg      prog_data->output_topology =
1310b8e80941Smrg         nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW
1311b8e80941Smrg                             : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
1312b8e80941Smrg   }
1313b8e80941Smrg
1314b8e80941Smrg   if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
1315b8e80941Smrg      fprintf(stderr, "TES Input ");
1316b8e80941Smrg      brw_print_vue_map(stderr, input_vue_map);
1317b8e80941Smrg      fprintf(stderr, "TES Output ");
1318b8e80941Smrg      brw_print_vue_map(stderr, &prog_data->base.vue_map);
1319b8e80941Smrg   }
1320b8e80941Smrg
1321b8e80941Smrg   if (is_scalar) {
1322b8e80941Smrg      fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
1323b8e80941Smrg                   &prog_data->base.base, NULL, nir, 8,
1324b8e80941Smrg                   shader_time_index, input_vue_map);
1325b8e80941Smrg      if (!v.run_tes()) {
1326b8e80941Smrg         if (error_str)
1327b8e80941Smrg            *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
1328b8e80941Smrg         return NULL;
1329b8e80941Smrg      }
1330b8e80941Smrg
1331b8e80941Smrg      prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
1332b8e80941Smrg      prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
1333b8e80941Smrg
1334b8e80941Smrg      fs_generator g(compiler, log_data, mem_ctx,
1335b8e80941Smrg                     &prog_data->base.base, v.promoted_constants, false,
1336b8e80941Smrg                     MESA_SHADER_TESS_EVAL);
1337b8e80941Smrg      if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
1338b8e80941Smrg         g.enable_debug(ralloc_asprintf(mem_ctx,
1339b8e80941Smrg                                        "%s tessellation evaluation shader %s",
1340b8e80941Smrg                                        nir->info.label ? nir->info.label
1341b8e80941Smrg                                                        : "unnamed",
1342b8e80941Smrg                                        nir->info.name));
1343b8e80941Smrg      }
1344b8e80941Smrg
1345b8e80941Smrg      g.generate_code(v.cfg, 8);
1346b8e80941Smrg
1347b8e80941Smrg      assembly = g.get_assembly();
1348b8e80941Smrg   } else {
1349b8e80941Smrg      brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
1350b8e80941Smrg			      nir, mem_ctx, shader_time_index);
1351b8e80941Smrg      if (!v.run()) {
1352b8e80941Smrg	 if (error_str)
1353b8e80941Smrg	    *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
1354b8e80941Smrg	 return NULL;
1355b8e80941Smrg      }
1356b8e80941Smrg
1357b8e80941Smrg      if (unlikely(INTEL_DEBUG & DEBUG_TES))
1358b8e80941Smrg	 v.dump_instructions();
1359b8e80941Smrg
1360b8e80941Smrg      assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
1361b8e80941Smrg                                            &prog_data->base, v.cfg);
1362b8e80941Smrg   }
1363b8e80941Smrg
1364b8e80941Smrg   return assembly;
1365b8e80941Smrg}
1366