1b8e80941Smrg/*
2b8e80941Smrg Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3b8e80941Smrg Intel funded Tungsten Graphics to
4b8e80941Smrg develop this 3D driver.
5b8e80941Smrg
6b8e80941Smrg Permission is hereby granted, free of charge, to any person obtaining
7b8e80941Smrg a copy of this software and associated documentation files (the
8b8e80941Smrg "Software"), to deal in the Software without restriction, including
9b8e80941Smrg without limitation the rights to use, copy, modify, merge, publish,
10b8e80941Smrg distribute, sublicense, and/or sell copies of the Software, and to
11b8e80941Smrg permit persons to whom the Software is furnished to do so, subject to
12b8e80941Smrg the following conditions:
13b8e80941Smrg
14b8e80941Smrg The above copyright notice and this permission notice (including the
15b8e80941Smrg next paragraph) shall be included in all copies or substantial
16b8e80941Smrg portions of the Software.
17b8e80941Smrg
18b8e80941Smrg THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19b8e80941Smrg EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20b8e80941Smrg MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21b8e80941Smrg IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22b8e80941Smrg LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23b8e80941Smrg OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24b8e80941Smrg WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25b8e80941Smrg
26b8e80941Smrg **********************************************************************/
27b8e80941Smrg /*
28b8e80941Smrg  * Authors:
29b8e80941Smrg  *   Keith Whitwell <keithw@vmware.com>
30b8e80941Smrg  */
31b8e80941Smrg
32b8e80941Smrg
33b8e80941Smrg#include "brw_eu_defines.h"
34b8e80941Smrg#include "brw_eu.h"
35b8e80941Smrg
36b8e80941Smrg#include "util/ralloc.h"
37b8e80941Smrg
38b8e80941Smrg/**
39b8e80941Smrg * Prior to Sandybridge, the SEND instruction accepted non-MRF source
40b8e80941Smrg * registers, implicitly moving the operand to a message register.
41b8e80941Smrg *
42b8e80941Smrg * On Sandybridge, this is no longer the case.  This function performs the
43b8e80941Smrg * explicit move; it should be called before emitting a SEND instruction.
44b8e80941Smrg */
45b8e80941Smrgvoid
46b8e80941Smrggen6_resolve_implied_move(struct brw_codegen *p,
47b8e80941Smrg			  struct brw_reg *src,
48b8e80941Smrg			  unsigned msg_reg_nr)
49b8e80941Smrg{
50b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
51b8e80941Smrg   if (devinfo->gen < 6)
52b8e80941Smrg      return;
53b8e80941Smrg
54b8e80941Smrg   if (src->file == BRW_MESSAGE_REGISTER_FILE)
55b8e80941Smrg      return;
56b8e80941Smrg
57b8e80941Smrg   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
58b8e80941Smrg      brw_push_insn_state(p);
59b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_8);
60b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
61b8e80941Smrg      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
62b8e80941Smrg      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
63b8e80941Smrg	      retype(*src, BRW_REGISTER_TYPE_UD));
64b8e80941Smrg      brw_pop_insn_state(p);
65b8e80941Smrg   }
66b8e80941Smrg   *src = brw_message_reg(msg_reg_nr);
67b8e80941Smrg}
68b8e80941Smrg
69b8e80941Smrgstatic void
70b8e80941Smrggen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
71b8e80941Smrg{
72b8e80941Smrg   /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
73b8e80941Smrg    * "The send with EOT should use register space R112-R127 for <src>. This is
74b8e80941Smrg    *  to enable loading of a new thread into the same slot while the message
75b8e80941Smrg    *  with EOT for current thread is pending dispatch."
76b8e80941Smrg    *
77b8e80941Smrg    * Since we're pretending to have 16 MRFs anyway, we may as well use the
78b8e80941Smrg    * registers required for messages with EOT.
79b8e80941Smrg    */
80b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
81b8e80941Smrg   if (devinfo->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
82b8e80941Smrg      reg->file = BRW_GENERAL_REGISTER_FILE;
83b8e80941Smrg      reg->nr += GEN7_MRF_HACK_START;
84b8e80941Smrg   }
85b8e80941Smrg}
86b8e80941Smrg
87b8e80941Smrgvoid
88b8e80941Smrgbrw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
89b8e80941Smrg{
90b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
91b8e80941Smrg
92b8e80941Smrg   if (dest.file == BRW_MESSAGE_REGISTER_FILE)
93b8e80941Smrg      assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
94b8e80941Smrg   else if (dest.file == BRW_GENERAL_REGISTER_FILE)
95b8e80941Smrg      assert(dest.nr < 128);
96b8e80941Smrg
97b8e80941Smrg   /* The hardware has a restriction where if the destination is Byte,
98b8e80941Smrg    * the instruction needs to have a stride of 2 (except for packed byte
99b8e80941Smrg    * MOV). This seems to be required even if the destination is the NULL
100b8e80941Smrg    * register.
101b8e80941Smrg    */
102b8e80941Smrg   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
103b8e80941Smrg       dest.nr == BRW_ARF_NULL &&
104b8e80941Smrg       type_sz(dest.type) == 1) {
105b8e80941Smrg      dest.hstride = BRW_HORIZONTAL_STRIDE_2;
106b8e80941Smrg   }
107b8e80941Smrg
108b8e80941Smrg   gen7_convert_mrf_to_grf(p, &dest);
109b8e80941Smrg
110b8e80941Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
111b8e80941Smrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
112b8e80941Smrg      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
113b8e80941Smrg             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
114b8e80941Smrg      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
115b8e80941Smrg      assert(dest.subnr % 16 == 0);
116b8e80941Smrg      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
117b8e80941Smrg             dest.vstride == dest.width + 1);
118b8e80941Smrg      assert(!dest.negate && !dest.abs);
119b8e80941Smrg      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
120b8e80941Smrg      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
121b8e80941Smrg      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
122b8e80941Smrg   } else {
123b8e80941Smrg      brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
124b8e80941Smrg      brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
125b8e80941Smrg
126b8e80941Smrg      if (dest.address_mode == BRW_ADDRESS_DIRECT) {
127b8e80941Smrg         brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
128b8e80941Smrg
129b8e80941Smrg         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
130b8e80941Smrg            brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
131b8e80941Smrg            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
132b8e80941Smrg               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
133b8e80941Smrg            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
134b8e80941Smrg         } else {
135b8e80941Smrg            brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
136b8e80941Smrg            brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
137b8e80941Smrg            if (dest.file == BRW_GENERAL_REGISTER_FILE ||
138b8e80941Smrg                dest.file == BRW_MESSAGE_REGISTER_FILE) {
139b8e80941Smrg               assert(dest.writemask != 0);
140b8e80941Smrg            }
141b8e80941Smrg            /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
142b8e80941Smrg             *    Although Dst.HorzStride is a don't care for Align16, HW needs
143b8e80941Smrg             *    this to be programmed as "01".
144b8e80941Smrg             */
145b8e80941Smrg            brw_inst_set_dst_hstride(devinfo, inst, 1);
146b8e80941Smrg         }
147b8e80941Smrg      } else {
148b8e80941Smrg         brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
149b8e80941Smrg
150b8e80941Smrg         /* These are different sizes in align1 vs align16:
151b8e80941Smrg          */
152b8e80941Smrg         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
153b8e80941Smrg            brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
154b8e80941Smrg                                          dest.indirect_offset);
155b8e80941Smrg            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
156b8e80941Smrg               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
157b8e80941Smrg            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
158b8e80941Smrg         } else {
159b8e80941Smrg            brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
160b8e80941Smrg                                           dest.indirect_offset);
161b8e80941Smrg            /* even ignored in da16, still need to set as '01' */
162b8e80941Smrg            brw_inst_set_dst_hstride(devinfo, inst, 1);
163b8e80941Smrg         }
164b8e80941Smrg      }
165b8e80941Smrg   }
166b8e80941Smrg
167b8e80941Smrg   /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
168b8e80941Smrg    * or 16 (SIMD16), as that's normally correct.  However, when dealing with
169b8e80941Smrg    * small registers, it can be useful for us to automatically reduce it to
170b8e80941Smrg    * match the register size.
171b8e80941Smrg    */
172b8e80941Smrg   if (p->automatic_exec_sizes) {
173b8e80941Smrg      /*
174b8e80941Smrg       * In platforms that support fp64 we can emit instructions with a width
175b8e80941Smrg       * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In
176b8e80941Smrg       * these cases we need to make sure that these instructions have their
177b8e80941Smrg       * exec sizes set properly when they are emitted and we can't rely on
178b8e80941Smrg       * this code to fix it.
179b8e80941Smrg       */
180b8e80941Smrg      bool fix_exec_size;
181b8e80941Smrg      if (devinfo->gen >= 6)
182b8e80941Smrg         fix_exec_size = dest.width < BRW_EXECUTE_4;
183b8e80941Smrg      else
184b8e80941Smrg         fix_exec_size = dest.width < BRW_EXECUTE_8;
185b8e80941Smrg
186b8e80941Smrg      if (fix_exec_size)
187b8e80941Smrg         brw_inst_set_exec_size(devinfo, inst, dest.width);
188b8e80941Smrg   }
189b8e80941Smrg}
190b8e80941Smrg
191b8e80941Smrgvoid
192b8e80941Smrgbrw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
193b8e80941Smrg{
194b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
195b8e80941Smrg
196b8e80941Smrg   if (reg.file == BRW_MESSAGE_REGISTER_FILE)
197b8e80941Smrg      assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
198b8e80941Smrg   else if (reg.file == BRW_GENERAL_REGISTER_FILE)
199b8e80941Smrg      assert(reg.nr < 128);
200b8e80941Smrg
201b8e80941Smrg   gen7_convert_mrf_to_grf(p, &reg);
202b8e80941Smrg
203b8e80941Smrg   if (devinfo->gen >= 6 &&
204b8e80941Smrg       (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
205b8e80941Smrg        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
206b8e80941Smrg        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
207b8e80941Smrg        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
208b8e80941Smrg      /* Any source modifiers or regions will be ignored, since this just
209b8e80941Smrg       * identifies the MRF/GRF to start reading the message contents from.
210b8e80941Smrg       * Check for some likely failures.
211b8e80941Smrg       */
212b8e80941Smrg      assert(!reg.negate);
213b8e80941Smrg      assert(!reg.abs);
214b8e80941Smrg      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
215b8e80941Smrg   }
216b8e80941Smrg
217b8e80941Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
218b8e80941Smrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
219b8e80941Smrg      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
220b8e80941Smrg      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
221b8e80941Smrg      assert(reg.subnr % 16 == 0);
222b8e80941Smrg      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
223b8e80941Smrg             reg.vstride == reg.width + 1);
224b8e80941Smrg      assert(!reg.negate && !reg.abs);
225b8e80941Smrg      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
226b8e80941Smrg      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
227b8e80941Smrg   } else {
228b8e80941Smrg      brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
229b8e80941Smrg      brw_inst_set_src0_abs(devinfo, inst, reg.abs);
230b8e80941Smrg      brw_inst_set_src0_negate(devinfo, inst, reg.negate);
231b8e80941Smrg      brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
232b8e80941Smrg
233b8e80941Smrg      if (reg.file == BRW_IMMEDIATE_VALUE) {
234b8e80941Smrg         if (reg.type == BRW_REGISTER_TYPE_DF ||
235b8e80941Smrg             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
236b8e80941Smrg            brw_inst_set_imm_df(devinfo, inst, reg.df);
237b8e80941Smrg         else if (reg.type == BRW_REGISTER_TYPE_UQ ||
238b8e80941Smrg                  reg.type == BRW_REGISTER_TYPE_Q)
239b8e80941Smrg            brw_inst_set_imm_uq(devinfo, inst, reg.u64);
240b8e80941Smrg         else
241b8e80941Smrg            brw_inst_set_imm_ud(devinfo, inst, reg.ud);
242b8e80941Smrg
243b8e80941Smrg         if (type_sz(reg.type) < 8) {
244b8e80941Smrg            brw_inst_set_src1_reg_file(devinfo, inst,
245b8e80941Smrg                                       BRW_ARCHITECTURE_REGISTER_FILE);
246b8e80941Smrg            brw_inst_set_src1_reg_hw_type(devinfo, inst,
247b8e80941Smrg                                          brw_inst_src0_reg_hw_type(devinfo, inst));
248b8e80941Smrg         }
249b8e80941Smrg      } else {
250b8e80941Smrg         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
251b8e80941Smrg            brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
252b8e80941Smrg            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
253b8e80941Smrg                brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
254b8e80941Smrg            } else {
255b8e80941Smrg               brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
256b8e80941Smrg            }
257b8e80941Smrg         } else {
258b8e80941Smrg            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
259b8e80941Smrg
260b8e80941Smrg            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
261b8e80941Smrg               brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
262b8e80941Smrg            } else {
263b8e80941Smrg               brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
264b8e80941Smrg            }
265b8e80941Smrg         }
266b8e80941Smrg
267b8e80941Smrg         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
268b8e80941Smrg            if (reg.width == BRW_WIDTH_1 &&
269b8e80941Smrg                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
270b8e80941Smrg               brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
271b8e80941Smrg               brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
272b8e80941Smrg               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
273b8e80941Smrg            } else {
274b8e80941Smrg               brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
275b8e80941Smrg               brw_inst_set_src0_width(devinfo, inst, reg.width);
276b8e80941Smrg               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
277b8e80941Smrg            }
278b8e80941Smrg         } else {
279b8e80941Smrg            brw_inst_set_src0_da16_swiz_x(devinfo, inst,
280b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
281b8e80941Smrg            brw_inst_set_src0_da16_swiz_y(devinfo, inst,
282b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
283b8e80941Smrg            brw_inst_set_src0_da16_swiz_z(devinfo, inst,
284b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
285b8e80941Smrg            brw_inst_set_src0_da16_swiz_w(devinfo, inst,
286b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
287b8e80941Smrg
288b8e80941Smrg            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
289b8e80941Smrg               /* This is an oddity of the fact we're using the same
290b8e80941Smrg                * descriptions for registers in align_16 as align_1:
291b8e80941Smrg                */
292b8e80941Smrg               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
293b8e80941Smrg            } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
294b8e80941Smrg                       reg.type == BRW_REGISTER_TYPE_DF &&
295b8e80941Smrg                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
296b8e80941Smrg               /* From SNB PRM:
297b8e80941Smrg                *
298b8e80941Smrg                * "For Align16 access mode, only encodings of 0000 and 0011
299b8e80941Smrg                *  are allowed. Other codes are reserved."
300b8e80941Smrg                *
301b8e80941Smrg                * Presumably the DevSNB behavior applies to IVB as well.
302b8e80941Smrg                */
303b8e80941Smrg               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
304b8e80941Smrg            } else {
305b8e80941Smrg               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
306b8e80941Smrg            }
307b8e80941Smrg         }
308b8e80941Smrg      }
309b8e80941Smrg   }
310b8e80941Smrg}
311b8e80941Smrg
312b8e80941Smrg
313b8e80941Smrgvoid
314b8e80941Smrgbrw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
315b8e80941Smrg{
316b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
317b8e80941Smrg
318b8e80941Smrg   if (reg.file == BRW_GENERAL_REGISTER_FILE)
319b8e80941Smrg      assert(reg.nr < 128);
320b8e80941Smrg
321b8e80941Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
322b8e80941Smrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
323b8e80941Smrg      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
324b8e80941Smrg             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
325b8e80941Smrg      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
326b8e80941Smrg      assert(reg.subnr == 0);
327b8e80941Smrg      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
328b8e80941Smrg             reg.vstride == reg.width + 1);
329b8e80941Smrg      assert(!reg.negate && !reg.abs);
330b8e80941Smrg      brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
331b8e80941Smrg      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
332b8e80941Smrg   } else {
333b8e80941Smrg      /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
334b8e80941Smrg       *
335b8e80941Smrg       *    "Accumulator registers may be accessed explicitly as src0
336b8e80941Smrg       *    operands only."
337b8e80941Smrg       */
338b8e80941Smrg      assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
339b8e80941Smrg             reg.nr != BRW_ARF_ACCUMULATOR);
340b8e80941Smrg
341b8e80941Smrg      gen7_convert_mrf_to_grf(p, &reg);
342b8e80941Smrg      assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
343b8e80941Smrg
344b8e80941Smrg      brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
345b8e80941Smrg      brw_inst_set_src1_abs(devinfo, inst, reg.abs);
346b8e80941Smrg      brw_inst_set_src1_negate(devinfo, inst, reg.negate);
347b8e80941Smrg
348b8e80941Smrg      /* Only src1 can be immediate in two-argument instructions.
349b8e80941Smrg       */
350b8e80941Smrg      assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
351b8e80941Smrg
352b8e80941Smrg      if (reg.file == BRW_IMMEDIATE_VALUE) {
353b8e80941Smrg         /* two-argument instructions can only use 32-bit immediates */
354b8e80941Smrg         assert(type_sz(reg.type) < 8);
355b8e80941Smrg         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
356b8e80941Smrg      } else {
357b8e80941Smrg         /* This is a hardware restriction, which may or may not be lifted
358b8e80941Smrg          * in the future:
359b8e80941Smrg          */
360b8e80941Smrg         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
361b8e80941Smrg         /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
362b8e80941Smrg
363b8e80941Smrg         brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
364b8e80941Smrg         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
365b8e80941Smrg            brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
366b8e80941Smrg         } else {
367b8e80941Smrg            brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
368b8e80941Smrg         }
369b8e80941Smrg
370b8e80941Smrg         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
371b8e80941Smrg            if (reg.width == BRW_WIDTH_1 &&
372b8e80941Smrg                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
373b8e80941Smrg               brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
374b8e80941Smrg               brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
375b8e80941Smrg               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
376b8e80941Smrg            } else {
377b8e80941Smrg               brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
378b8e80941Smrg               brw_inst_set_src1_width(devinfo, inst, reg.width);
379b8e80941Smrg               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
380b8e80941Smrg            }
381b8e80941Smrg         } else {
382b8e80941Smrg            brw_inst_set_src1_da16_swiz_x(devinfo, inst,
383b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
384b8e80941Smrg            brw_inst_set_src1_da16_swiz_y(devinfo, inst,
385b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
386b8e80941Smrg            brw_inst_set_src1_da16_swiz_z(devinfo, inst,
387b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
388b8e80941Smrg            brw_inst_set_src1_da16_swiz_w(devinfo, inst,
389b8e80941Smrg               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
390b8e80941Smrg
391b8e80941Smrg            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
392b8e80941Smrg               /* This is an oddity of the fact we're using the same
393b8e80941Smrg                * descriptions for registers in align_16 as align_1:
394b8e80941Smrg                */
395b8e80941Smrg               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
396b8e80941Smrg            } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
397b8e80941Smrg                       reg.type == BRW_REGISTER_TYPE_DF &&
398b8e80941Smrg                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
399b8e80941Smrg               /* From SNB PRM:
400b8e80941Smrg                *
401b8e80941Smrg                * "For Align16 access mode, only encodings of 0000 and 0011
402b8e80941Smrg                *  are allowed. Other codes are reserved."
403b8e80941Smrg                *
404b8e80941Smrg                * Presumably the DevSNB behavior applies to IVB as well.
405b8e80941Smrg                */
406b8e80941Smrg               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
407b8e80941Smrg            } else {
408b8e80941Smrg               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
409b8e80941Smrg            }
410b8e80941Smrg         }
411b8e80941Smrg      }
412b8e80941Smrg   }
413b8e80941Smrg}
414b8e80941Smrg
415b8e80941Smrg/**
416b8e80941Smrg * Specify the descriptor and extended descriptor immediate for a SEND(C)
417b8e80941Smrg * message instruction.
418b8e80941Smrg */
419b8e80941Smrgvoid
420b8e80941Smrgbrw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
421b8e80941Smrg                unsigned desc, unsigned ex_desc)
422b8e80941Smrg{
423b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
424b8e80941Smrg   assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
425b8e80941Smrg          brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC);
426b8e80941Smrg   brw_inst_set_src1_file_type(devinfo, inst,
427b8e80941Smrg                               BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
428b8e80941Smrg   brw_inst_set_send_desc(devinfo, inst, desc);
429b8e80941Smrg   if (devinfo->gen >= 9)
430b8e80941Smrg      brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
431b8e80941Smrg}
432b8e80941Smrg
433b8e80941Smrgstatic void brw_set_math_message( struct brw_codegen *p,
434b8e80941Smrg				  brw_inst *inst,
435b8e80941Smrg				  unsigned function,
436b8e80941Smrg				  unsigned integer_type,
437b8e80941Smrg				  bool low_precision,
438b8e80941Smrg				  unsigned dataType )
439b8e80941Smrg{
440b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
441b8e80941Smrg   unsigned msg_length;
442b8e80941Smrg   unsigned response_length;
443b8e80941Smrg
444b8e80941Smrg   /* Infer message length from the function */
445b8e80941Smrg   switch (function) {
446b8e80941Smrg   case BRW_MATH_FUNCTION_POW:
447b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
448b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
449b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
450b8e80941Smrg      msg_length = 2;
451b8e80941Smrg      break;
452b8e80941Smrg   default:
453b8e80941Smrg      msg_length = 1;
454b8e80941Smrg      break;
455b8e80941Smrg   }
456b8e80941Smrg
457b8e80941Smrg   /* Infer response length from the function */
458b8e80941Smrg   switch (function) {
459b8e80941Smrg   case BRW_MATH_FUNCTION_SINCOS:
460b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
461b8e80941Smrg      response_length = 2;
462b8e80941Smrg      break;
463b8e80941Smrg   default:
464b8e80941Smrg      response_length = 1;
465b8e80941Smrg      break;
466b8e80941Smrg   }
467b8e80941Smrg
468b8e80941Smrg   brw_set_desc(p, inst, brw_message_desc(
469b8e80941Smrg                   devinfo, msg_length, response_length, false));
470b8e80941Smrg
471b8e80941Smrg   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH);
472b8e80941Smrg   brw_inst_set_math_msg_function(devinfo, inst, function);
473b8e80941Smrg   brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type);
474b8e80941Smrg   brw_inst_set_math_msg_precision(devinfo, inst, low_precision);
475b8e80941Smrg   brw_inst_set_math_msg_saturate(devinfo, inst, brw_inst_saturate(devinfo, inst));
476b8e80941Smrg   brw_inst_set_math_msg_data_type(devinfo, inst, dataType);
477b8e80941Smrg   brw_inst_set_saturate(devinfo, inst, 0);
478b8e80941Smrg}
479b8e80941Smrg
480b8e80941Smrg
481b8e80941Smrgstatic void brw_set_ff_sync_message(struct brw_codegen *p,
482b8e80941Smrg				    brw_inst *insn,
483b8e80941Smrg				    bool allocate,
484b8e80941Smrg				    unsigned response_length,
485b8e80941Smrg				    bool end_of_thread)
486b8e80941Smrg{
487b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
488b8e80941Smrg
489b8e80941Smrg   brw_set_desc(p, insn, brw_message_desc(
490b8e80941Smrg                   devinfo, 1, response_length, true));
491b8e80941Smrg
492b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
493b8e80941Smrg   brw_inst_set_eot(devinfo, insn, end_of_thread);
494b8e80941Smrg   brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */
495b8e80941Smrg   brw_inst_set_urb_allocate(devinfo, insn, allocate);
496b8e80941Smrg   /* The following fields are not used by FF_SYNC: */
497b8e80941Smrg   brw_inst_set_urb_global_offset(devinfo, insn, 0);
498b8e80941Smrg   brw_inst_set_urb_swizzle_control(devinfo, insn, 0);
499b8e80941Smrg   brw_inst_set_urb_used(devinfo, insn, 0);
500b8e80941Smrg   brw_inst_set_urb_complete(devinfo, insn, 0);
501b8e80941Smrg}
502b8e80941Smrg
503b8e80941Smrgstatic void brw_set_urb_message( struct brw_codegen *p,
504b8e80941Smrg				 brw_inst *insn,
505b8e80941Smrg                                 enum brw_urb_write_flags flags,
506b8e80941Smrg				 unsigned msg_length,
507b8e80941Smrg				 unsigned response_length,
508b8e80941Smrg				 unsigned offset,
509b8e80941Smrg				 unsigned swizzle_control )
510b8e80941Smrg{
511b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
512b8e80941Smrg
513b8e80941Smrg   assert(devinfo->gen < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
514b8e80941Smrg   assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
515b8e80941Smrg   assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
516b8e80941Smrg
517b8e80941Smrg   brw_set_desc(p, insn, brw_message_desc(
518b8e80941Smrg                   devinfo, msg_length, response_length, true));
519b8e80941Smrg
520b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
521b8e80941Smrg   brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT));
522b8e80941Smrg
523b8e80941Smrg   if (flags & BRW_URB_WRITE_OWORD) {
524b8e80941Smrg      assert(msg_length == 2); /* header + one OWORD of data */
525b8e80941Smrg      brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD);
526b8e80941Smrg   } else {
527b8e80941Smrg      brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD);
528b8e80941Smrg   }
529b8e80941Smrg
530b8e80941Smrg   brw_inst_set_urb_global_offset(devinfo, insn, offset);
531b8e80941Smrg   brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control);
532b8e80941Smrg
533b8e80941Smrg   if (devinfo->gen < 8) {
534b8e80941Smrg      brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE));
535b8e80941Smrg   }
536b8e80941Smrg
537b8e80941Smrg   if (devinfo->gen < 7) {
538b8e80941Smrg      brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE));
539b8e80941Smrg      brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED));
540b8e80941Smrg   } else {
541b8e80941Smrg      brw_inst_set_urb_per_slot_offset(devinfo, insn,
542b8e80941Smrg         !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
543b8e80941Smrg   }
544b8e80941Smrg}
545b8e80941Smrg
546b8e80941Smrgstatic void
547b8e80941Smrggen7_set_dp_scratch_message(struct brw_codegen *p,
548b8e80941Smrg                            brw_inst *inst,
549b8e80941Smrg                            bool write,
550b8e80941Smrg                            bool dword,
551b8e80941Smrg                            bool invalidate_after_read,
552b8e80941Smrg                            unsigned num_regs,
553b8e80941Smrg                            unsigned addr_offset,
554b8e80941Smrg                            unsigned mlen,
555b8e80941Smrg                            unsigned rlen,
556b8e80941Smrg                            bool header_present)
557b8e80941Smrg{
558b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
559b8e80941Smrg   assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
560b8e80941Smrg          (devinfo->gen >= 8 && num_regs == 8));
561b8e80941Smrg   const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
562b8e80941Smrg                                num_regs - 1);
563b8e80941Smrg
564b8e80941Smrg   brw_set_desc(p, inst, brw_message_desc(
565b8e80941Smrg                   devinfo, mlen, rlen, header_present));
566b8e80941Smrg
567b8e80941Smrg   brw_inst_set_sfid(devinfo, inst, GEN7_SFID_DATAPORT_DATA_CACHE);
568b8e80941Smrg   brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
569b8e80941Smrg   brw_inst_set_scratch_read_write(devinfo, inst, write);
570b8e80941Smrg   brw_inst_set_scratch_type(devinfo, inst, dword);
571b8e80941Smrg   brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read);
572b8e80941Smrg   brw_inst_set_scratch_block_size(devinfo, inst, block_size);
573b8e80941Smrg   brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
574b8e80941Smrg}
575b8e80941Smrg
576b8e80941Smrgstatic void
577b8e80941Smrgbrw_inst_set_state(const struct gen_device_info *devinfo,
578b8e80941Smrg                   brw_inst *insn,
579b8e80941Smrg                   const struct brw_insn_state *state)
580b8e80941Smrg{
581b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, state->exec_size);
582b8e80941Smrg   brw_inst_set_group(devinfo, insn, state->group);
583b8e80941Smrg   brw_inst_set_compression(devinfo, insn, state->compressed);
584b8e80941Smrg   brw_inst_set_access_mode(devinfo, insn, state->access_mode);
585b8e80941Smrg   brw_inst_set_mask_control(devinfo, insn, state->mask_control);
586b8e80941Smrg   brw_inst_set_saturate(devinfo, insn, state->saturate);
587b8e80941Smrg   brw_inst_set_pred_control(devinfo, insn, state->predicate);
588b8e80941Smrg   brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
589b8e80941Smrg
590b8e80941Smrg   if (is_3src(devinfo, brw_inst_opcode(devinfo, insn)) &&
591b8e80941Smrg       state->access_mode == BRW_ALIGN_16) {
592b8e80941Smrg      brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
593b8e80941Smrg      if (devinfo->gen >= 7)
594b8e80941Smrg         brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
595b8e80941Smrg   } else {
596b8e80941Smrg      brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
597b8e80941Smrg      if (devinfo->gen >= 7)
598b8e80941Smrg         brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
599b8e80941Smrg   }
600b8e80941Smrg
601b8e80941Smrg   if (devinfo->gen >= 6)
602b8e80941Smrg      brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
603b8e80941Smrg}
604b8e80941Smrg
605b8e80941Smrg#define next_insn brw_next_insn
606b8e80941Smrgbrw_inst *
607b8e80941Smrgbrw_next_insn(struct brw_codegen *p, unsigned opcode)
608b8e80941Smrg{
609b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
610b8e80941Smrg   brw_inst *insn;
611b8e80941Smrg
612b8e80941Smrg   if (p->nr_insn + 1 > p->store_size) {
613b8e80941Smrg      p->store_size <<= 1;
614b8e80941Smrg      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
615b8e80941Smrg   }
616b8e80941Smrg
617b8e80941Smrg   p->next_insn_offset += 16;
618b8e80941Smrg   insn = &p->store[p->nr_insn++];
619b8e80941Smrg
620b8e80941Smrg   memset(insn, 0, sizeof(*insn));
621b8e80941Smrg   brw_inst_set_opcode(devinfo, insn, opcode);
622b8e80941Smrg
623b8e80941Smrg   /* Apply the default instruction state */
624b8e80941Smrg   brw_inst_set_state(devinfo, insn, p->current);
625b8e80941Smrg
626b8e80941Smrg   return insn;
627b8e80941Smrg}
628b8e80941Smrg
629b8e80941Smrgstatic brw_inst *
630b8e80941Smrgbrw_alu1(struct brw_codegen *p, unsigned opcode,
631b8e80941Smrg         struct brw_reg dest, struct brw_reg src)
632b8e80941Smrg{
633b8e80941Smrg   brw_inst *insn = next_insn(p, opcode);
634b8e80941Smrg   brw_set_dest(p, insn, dest);
635b8e80941Smrg   brw_set_src0(p, insn, src);
636b8e80941Smrg   return insn;
637b8e80941Smrg}
638b8e80941Smrg
639b8e80941Smrgstatic brw_inst *
640b8e80941Smrgbrw_alu2(struct brw_codegen *p, unsigned opcode,
641b8e80941Smrg         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
642b8e80941Smrg{
643b8e80941Smrg   /* 64-bit immediates are only supported on 1-src instructions */
644b8e80941Smrg   assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4);
645b8e80941Smrg   assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4);
646b8e80941Smrg
647b8e80941Smrg   brw_inst *insn = next_insn(p, opcode);
648b8e80941Smrg   brw_set_dest(p, insn, dest);
649b8e80941Smrg   brw_set_src0(p, insn, src0);
650b8e80941Smrg   brw_set_src1(p, insn, src1);
651b8e80941Smrg   return insn;
652b8e80941Smrg}
653b8e80941Smrg
654b8e80941Smrgstatic int
655b8e80941Smrgget_3src_subreg_nr(struct brw_reg reg)
656b8e80941Smrg{
657b8e80941Smrg   /* Normally, SubRegNum is in bytes (0..31).  However, 3-src instructions
658b8e80941Smrg    * use 32-bit units (components 0..7).  Since they only support F/D/UD
659b8e80941Smrg    * types, this doesn't lose any flexibility, but uses fewer bits.
660b8e80941Smrg    */
661b8e80941Smrg   return reg.subnr / 4;
662b8e80941Smrg}
663b8e80941Smrg
664b8e80941Smrgstatic enum gen10_align1_3src_vertical_stride
665b8e80941Smrgto_3src_align1_vstride(enum brw_vertical_stride vstride)
666b8e80941Smrg{
667b8e80941Smrg   switch (vstride) {
668b8e80941Smrg   case BRW_VERTICAL_STRIDE_0:
669b8e80941Smrg      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
670b8e80941Smrg   case BRW_VERTICAL_STRIDE_2:
671b8e80941Smrg      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
672b8e80941Smrg   case BRW_VERTICAL_STRIDE_4:
673b8e80941Smrg      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
674b8e80941Smrg   case BRW_VERTICAL_STRIDE_8:
675b8e80941Smrg   case BRW_VERTICAL_STRIDE_16:
676b8e80941Smrg      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
677b8e80941Smrg   default:
678b8e80941Smrg      unreachable("invalid vstride");
679b8e80941Smrg   }
680b8e80941Smrg}
681b8e80941Smrg
682b8e80941Smrg
683b8e80941Smrgstatic enum gen10_align1_3src_src_horizontal_stride
684b8e80941Smrgto_3src_align1_hstride(enum brw_horizontal_stride hstride)
685b8e80941Smrg{
686b8e80941Smrg   switch (hstride) {
687b8e80941Smrg   case BRW_HORIZONTAL_STRIDE_0:
688b8e80941Smrg      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
689b8e80941Smrg   case BRW_HORIZONTAL_STRIDE_1:
690b8e80941Smrg      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
691b8e80941Smrg   case BRW_HORIZONTAL_STRIDE_2:
692b8e80941Smrg      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
693b8e80941Smrg   case BRW_HORIZONTAL_STRIDE_4:
694b8e80941Smrg      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
695b8e80941Smrg   default:
696b8e80941Smrg      unreachable("invalid hstride");
697b8e80941Smrg   }
698b8e80941Smrg}
699b8e80941Smrg
700b8e80941Smrgstatic brw_inst *
701b8e80941Smrgbrw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
702b8e80941Smrg         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
703b8e80941Smrg{
704b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
705b8e80941Smrg   brw_inst *inst = next_insn(p, opcode);
706b8e80941Smrg
707b8e80941Smrg   gen7_convert_mrf_to_grf(p, &dest);
708b8e80941Smrg
709b8e80941Smrg   assert(dest.nr < 128);
710b8e80941Smrg   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
711b8e80941Smrg   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
712b8e80941Smrg   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
713b8e80941Smrg   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
714b8e80941Smrg   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
715b8e80941Smrg   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
716b8e80941Smrg   assert(src2.address_mode == BRW_ADDRESS_DIRECT);
717b8e80941Smrg
718b8e80941Smrg   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
719b8e80941Smrg      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
720b8e80941Smrg             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
721b8e80941Smrg
722b8e80941Smrg      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
723b8e80941Smrg         brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
724b8e80941Smrg                                           BRW_ALIGN1_3SRC_ACCUMULATOR);
725b8e80941Smrg         brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
726b8e80941Smrg      } else {
727b8e80941Smrg         brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
728b8e80941Smrg                                           BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
729b8e80941Smrg         brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
730b8e80941Smrg      }
731b8e80941Smrg      brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8);
732b8e80941Smrg
733b8e80941Smrg      brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
734b8e80941Smrg
735b8e80941Smrg      if (brw_reg_type_is_floating_point(dest.type)) {
736b8e80941Smrg         brw_inst_set_3src_a1_exec_type(devinfo, inst,
737b8e80941Smrg                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
738b8e80941Smrg      } else {
739b8e80941Smrg         brw_inst_set_3src_a1_exec_type(devinfo, inst,
740b8e80941Smrg                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
741b8e80941Smrg      }
742b8e80941Smrg
743b8e80941Smrg      brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
744b8e80941Smrg      brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
745b8e80941Smrg      brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
746b8e80941Smrg      brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
747b8e80941Smrg
748b8e80941Smrg      brw_inst_set_3src_a1_src0_vstride(devinfo, inst,
749b8e80941Smrg                                        to_3src_align1_vstride(src0.vstride));
750b8e80941Smrg      brw_inst_set_3src_a1_src1_vstride(devinfo, inst,
751b8e80941Smrg                                        to_3src_align1_vstride(src1.vstride));
752b8e80941Smrg      /* no vstride on src2 */
753b8e80941Smrg
754b8e80941Smrg      brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
755b8e80941Smrg                                        to_3src_align1_hstride(src0.hstride));
756b8e80941Smrg      brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
757b8e80941Smrg                                        to_3src_align1_hstride(src1.hstride));
758b8e80941Smrg      brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
759b8e80941Smrg                                        to_3src_align1_hstride(src2.hstride));
760b8e80941Smrg
761b8e80941Smrg      brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
762b8e80941Smrg      if (src0.type == BRW_REGISTER_TYPE_NF) {
763b8e80941Smrg         brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
764b8e80941Smrg      } else {
765b8e80941Smrg         brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
766b8e80941Smrg      }
767b8e80941Smrg      brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
768b8e80941Smrg      brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
769b8e80941Smrg
770b8e80941Smrg      brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr);
771b8e80941Smrg      if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
772b8e80941Smrg         brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
773b8e80941Smrg      } else {
774b8e80941Smrg         brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
775b8e80941Smrg      }
776b8e80941Smrg      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
777b8e80941Smrg      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
778b8e80941Smrg
779b8e80941Smrg      brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
780b8e80941Smrg      brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
781b8e80941Smrg      brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
782b8e80941Smrg      brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
783b8e80941Smrg
784b8e80941Smrg      assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
785b8e80941Smrg             src0.file == BRW_IMMEDIATE_VALUE ||
786b8e80941Smrg             (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
787b8e80941Smrg              src0.type == BRW_REGISTER_TYPE_NF));
788b8e80941Smrg      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
789b8e80941Smrg             src1.file == BRW_ARCHITECTURE_REGISTER_FILE);
790b8e80941Smrg      assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
791b8e80941Smrg             src2.file == BRW_IMMEDIATE_VALUE);
792b8e80941Smrg
793b8e80941Smrg      brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
794b8e80941Smrg                                         src0.file == BRW_GENERAL_REGISTER_FILE ?
795b8e80941Smrg                                         BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
796b8e80941Smrg                                         BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
797b8e80941Smrg      brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
798b8e80941Smrg                                         src1.file == BRW_GENERAL_REGISTER_FILE ?
799b8e80941Smrg                                         BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
800b8e80941Smrg                                         BRW_ALIGN1_3SRC_ACCUMULATOR);
801b8e80941Smrg      brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
802b8e80941Smrg                                         src2.file == BRW_GENERAL_REGISTER_FILE ?
803b8e80941Smrg                                         BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
804b8e80941Smrg                                         BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
805b8e80941Smrg   } else {
806b8e80941Smrg      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
807b8e80941Smrg             dest.file == BRW_MESSAGE_REGISTER_FILE);
808b8e80941Smrg      assert(dest.type == BRW_REGISTER_TYPE_F  ||
809b8e80941Smrg             dest.type == BRW_REGISTER_TYPE_DF ||
810b8e80941Smrg             dest.type == BRW_REGISTER_TYPE_D  ||
811b8e80941Smrg             dest.type == BRW_REGISTER_TYPE_UD ||
812b8e80941Smrg             (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8));
813b8e80941Smrg      if (devinfo->gen == 6) {
814b8e80941Smrg         brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
815b8e80941Smrg                                            dest.file == BRW_MESSAGE_REGISTER_FILE);
816b8e80941Smrg      }
817b8e80941Smrg      brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
818b8e80941Smrg      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
819b8e80941Smrg      brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
820b8e80941Smrg
821b8e80941Smrg      assert(src0.file == BRW_GENERAL_REGISTER_FILE);
822b8e80941Smrg      brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
823b8e80941Smrg      brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
824b8e80941Smrg      brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
825b8e80941Smrg      brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
826b8e80941Smrg      brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
827b8e80941Smrg      brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
828b8e80941Smrg                                          src0.vstride == BRW_VERTICAL_STRIDE_0);
829b8e80941Smrg
830b8e80941Smrg      assert(src1.file == BRW_GENERAL_REGISTER_FILE);
831b8e80941Smrg      brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
832b8e80941Smrg      brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
833b8e80941Smrg      brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
834b8e80941Smrg      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
835b8e80941Smrg      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
836b8e80941Smrg      brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
837b8e80941Smrg                                          src1.vstride == BRW_VERTICAL_STRIDE_0);
838b8e80941Smrg
839b8e80941Smrg      assert(src2.file == BRW_GENERAL_REGISTER_FILE);
840b8e80941Smrg      brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
841b8e80941Smrg      brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
842b8e80941Smrg      brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
843b8e80941Smrg      brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
844b8e80941Smrg      brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
845b8e80941Smrg      brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
846b8e80941Smrg                                          src2.vstride == BRW_VERTICAL_STRIDE_0);
847b8e80941Smrg
848b8e80941Smrg      if (devinfo->gen >= 7) {
849b8e80941Smrg         /* Set both the source and destination types based on dest.type,
850b8e80941Smrg          * ignoring the source register types.  The MAD and LRP emitters ensure
851b8e80941Smrg          * that all four types are float.  The BFE and BFI2 emitters, however,
852b8e80941Smrg          * may send us mixed D and UD types and want us to ignore that and use
853b8e80941Smrg          * the destination type.
854b8e80941Smrg          */
855b8e80941Smrg         brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
856b8e80941Smrg         brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
857b8e80941Smrg
858b8e80941Smrg         /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
859b8e80941Smrg          *
860b8e80941Smrg          *    "Three source instructions can use operands with mixed-mode
861b8e80941Smrg          *     precision. When SrcType field is set to :f or :hf it defines
862b8e80941Smrg          *     precision for source 0 only, and fields Src1Type and Src2Type
863b8e80941Smrg          *     define precision for other source operands:
864b8e80941Smrg          *
865b8e80941Smrg          *     0b = :f. Single precision Float (32-bit).
866b8e80941Smrg          *     1b = :hf. Half precision Float (16-bit)."
867b8e80941Smrg          */
868b8e80941Smrg         if (src1.type == BRW_REGISTER_TYPE_HF)
869b8e80941Smrg            brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
870b8e80941Smrg
871b8e80941Smrg         if (src2.type == BRW_REGISTER_TYPE_HF)
872b8e80941Smrg            brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
873b8e80941Smrg      }
874b8e80941Smrg   }
875b8e80941Smrg
876b8e80941Smrg   return inst;
877b8e80941Smrg}
878b8e80941Smrg
879b8e80941Smrg
880b8e80941Smrg/***********************************************************************
881b8e80941Smrg * Convenience routines.
882b8e80941Smrg */
883b8e80941Smrg#define ALU1(OP)					\
884b8e80941Smrgbrw_inst *brw_##OP(struct brw_codegen *p,		\
885b8e80941Smrg	      struct brw_reg dest,			\
886b8e80941Smrg	      struct brw_reg src0)   			\
887b8e80941Smrg{							\
888b8e80941Smrg   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
889b8e80941Smrg}
890b8e80941Smrg
891b8e80941Smrg#define ALU2(OP)					\
892b8e80941Smrgbrw_inst *brw_##OP(struct brw_codegen *p,		\
893b8e80941Smrg	      struct brw_reg dest,			\
894b8e80941Smrg	      struct brw_reg src0,			\
895b8e80941Smrg	      struct brw_reg src1)   			\
896b8e80941Smrg{							\
897b8e80941Smrg   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
898b8e80941Smrg}
899b8e80941Smrg
900b8e80941Smrg#define ALU3(OP)					\
901b8e80941Smrgbrw_inst *brw_##OP(struct brw_codegen *p,		\
902b8e80941Smrg	      struct brw_reg dest,			\
903b8e80941Smrg	      struct brw_reg src0,			\
904b8e80941Smrg	      struct brw_reg src1,			\
905b8e80941Smrg	      struct brw_reg src2)   			\
906b8e80941Smrg{                                                       \
907b8e80941Smrg   if (p->current->access_mode == BRW_ALIGN_16) {       \
908b8e80941Smrg      if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
909b8e80941Smrg         src0.swizzle = BRW_SWIZZLE_XXXX;               \
910b8e80941Smrg      if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
911b8e80941Smrg         src1.swizzle = BRW_SWIZZLE_XXXX;               \
912b8e80941Smrg      if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
913b8e80941Smrg         src2.swizzle = BRW_SWIZZLE_XXXX;               \
914b8e80941Smrg   }                                                    \
915b8e80941Smrg   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
916b8e80941Smrg}
917b8e80941Smrg
918b8e80941Smrg#define ALU3F(OP)                                               \
919b8e80941Smrgbrw_inst *brw_##OP(struct brw_codegen *p,         \
920b8e80941Smrg                                 struct brw_reg dest,           \
921b8e80941Smrg                                 struct brw_reg src0,           \
922b8e80941Smrg                                 struct brw_reg src1,           \
923b8e80941Smrg                                 struct brw_reg src2)           \
924b8e80941Smrg{                                                               \
925b8e80941Smrg   assert(dest.type == BRW_REGISTER_TYPE_F ||                   \
926b8e80941Smrg          dest.type == BRW_REGISTER_TYPE_DF);                   \
927b8e80941Smrg   if (dest.type == BRW_REGISTER_TYPE_F) {                      \
928b8e80941Smrg      assert(src0.type == BRW_REGISTER_TYPE_F);                 \
929b8e80941Smrg      assert(src1.type == BRW_REGISTER_TYPE_F);                 \
930b8e80941Smrg      assert(src2.type == BRW_REGISTER_TYPE_F);                 \
931b8e80941Smrg   } else if (dest.type == BRW_REGISTER_TYPE_DF) {              \
932b8e80941Smrg      assert(src0.type == BRW_REGISTER_TYPE_DF);                \
933b8e80941Smrg      assert(src1.type == BRW_REGISTER_TYPE_DF);                \
934b8e80941Smrg      assert(src2.type == BRW_REGISTER_TYPE_DF);                \
935b8e80941Smrg   }                                                            \
936b8e80941Smrg                                                                \
937b8e80941Smrg   if (p->current->access_mode == BRW_ALIGN_16) {               \
938b8e80941Smrg      if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
939b8e80941Smrg         src0.swizzle = BRW_SWIZZLE_XXXX;                       \
940b8e80941Smrg      if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
941b8e80941Smrg         src1.swizzle = BRW_SWIZZLE_XXXX;                       \
942b8e80941Smrg      if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
943b8e80941Smrg         src2.swizzle = BRW_SWIZZLE_XXXX;                       \
944b8e80941Smrg   }                                                            \
945b8e80941Smrg   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
946b8e80941Smrg}
947b8e80941Smrg
948b8e80941Smrg/* Rounding operations (other than RNDD) require two instructions - the first
949b8e80941Smrg * stores a rounded value (possibly the wrong way) in the dest register, but
950b8e80941Smrg * also sets a per-channel "increment bit" in the flag register.  A predicated
951b8e80941Smrg * add of 1.0 fixes dest to contain the desired result.
952b8e80941Smrg *
953b8e80941Smrg * Sandybridge and later appear to round correctly without an ADD.
954b8e80941Smrg */
955b8e80941Smrg#define ROUND(OP)							      \
956b8e80941Smrgvoid brw_##OP(struct brw_codegen *p,					      \
957b8e80941Smrg	      struct brw_reg dest,					      \
958b8e80941Smrg	      struct brw_reg src)					      \
959b8e80941Smrg{									      \
960b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;					      \
961b8e80941Smrg   brw_inst *rnd, *add;							      \
962b8e80941Smrg   rnd = next_insn(p, BRW_OPCODE_##OP);					      \
963b8e80941Smrg   brw_set_dest(p, rnd, dest);						      \
964b8e80941Smrg   brw_set_src0(p, rnd, src);						      \
965b8e80941Smrg									      \
966b8e80941Smrg   if (devinfo->gen < 6) {							      \
967b8e80941Smrg      /* turn on round-increments */					      \
968b8e80941Smrg      brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R);            \
969b8e80941Smrg      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));			      \
970b8e80941Smrg      brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL);          \
971b8e80941Smrg   }									      \
972b8e80941Smrg}
973b8e80941Smrg
974b8e80941Smrg
975b8e80941SmrgALU2(SEL)
976b8e80941SmrgALU1(NOT)
977b8e80941SmrgALU2(AND)
978b8e80941SmrgALU2(OR)
979b8e80941SmrgALU2(XOR)
980b8e80941SmrgALU2(SHR)
981b8e80941SmrgALU2(SHL)
982b8e80941SmrgALU1(DIM)
983b8e80941SmrgALU2(ASR)
984b8e80941SmrgALU3(CSEL)
985b8e80941SmrgALU1(FRC)
986b8e80941SmrgALU1(RNDD)
987b8e80941SmrgALU2(MAC)
988b8e80941SmrgALU2(MACH)
989b8e80941SmrgALU1(LZD)
990b8e80941SmrgALU2(DP4)
991b8e80941SmrgALU2(DPH)
992b8e80941SmrgALU2(DP3)
993b8e80941SmrgALU2(DP2)
994b8e80941SmrgALU3(MAD)
995b8e80941SmrgALU3F(LRP)
996b8e80941SmrgALU1(BFREV)
997b8e80941SmrgALU3(BFE)
998b8e80941SmrgALU2(BFI1)
999b8e80941SmrgALU3(BFI2)
1000b8e80941SmrgALU1(FBH)
1001b8e80941SmrgALU1(FBL)
1002b8e80941SmrgALU1(CBIT)
1003b8e80941SmrgALU2(ADDC)
1004b8e80941SmrgALU2(SUBB)
1005b8e80941Smrg
1006b8e80941SmrgROUND(RNDZ)
1007b8e80941SmrgROUND(RNDE)
1008b8e80941Smrg
1009b8e80941Smrgbrw_inst *
1010b8e80941Smrgbrw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
1011b8e80941Smrg{
1012b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1013b8e80941Smrg
1014b8e80941Smrg   /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
1015b8e80941Smrg    * To avoid the problems that causes, we use an <X,2,0> source region to
1016b8e80941Smrg    * read each element twice.
1017b8e80941Smrg    */
1018b8e80941Smrg   if (devinfo->gen == 7 && !devinfo->is_haswell &&
1019b8e80941Smrg       brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
1020b8e80941Smrg       dest.type == BRW_REGISTER_TYPE_DF &&
1021b8e80941Smrg       (src0.type == BRW_REGISTER_TYPE_F ||
1022b8e80941Smrg        src0.type == BRW_REGISTER_TYPE_D ||
1023b8e80941Smrg        src0.type == BRW_REGISTER_TYPE_UD) &&
1024b8e80941Smrg       !has_scalar_region(src0)) {
1025b8e80941Smrg      assert(src0.vstride == src0.width + src0.hstride);
1026b8e80941Smrg      src0.vstride = src0.hstride;
1027b8e80941Smrg      src0.width = BRW_WIDTH_2;
1028b8e80941Smrg      src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1029b8e80941Smrg   }
1030b8e80941Smrg
1031b8e80941Smrg   return brw_alu1(p, BRW_OPCODE_MOV, dest, src0);
1032b8e80941Smrg}
1033b8e80941Smrg
1034b8e80941Smrgbrw_inst *
1035b8e80941Smrgbrw_ADD(struct brw_codegen *p, struct brw_reg dest,
1036b8e80941Smrg        struct brw_reg src0, struct brw_reg src1)
1037b8e80941Smrg{
1038b8e80941Smrg   /* 6.2.2: add */
1039b8e80941Smrg   if (src0.type == BRW_REGISTER_TYPE_F ||
1040b8e80941Smrg       (src0.file == BRW_IMMEDIATE_VALUE &&
1041b8e80941Smrg	src0.type == BRW_REGISTER_TYPE_VF)) {
1042b8e80941Smrg      assert(src1.type != BRW_REGISTER_TYPE_UD);
1043b8e80941Smrg      assert(src1.type != BRW_REGISTER_TYPE_D);
1044b8e80941Smrg   }
1045b8e80941Smrg
1046b8e80941Smrg   if (src1.type == BRW_REGISTER_TYPE_F ||
1047b8e80941Smrg       (src1.file == BRW_IMMEDIATE_VALUE &&
1048b8e80941Smrg	src1.type == BRW_REGISTER_TYPE_VF)) {
1049b8e80941Smrg      assert(src0.type != BRW_REGISTER_TYPE_UD);
1050b8e80941Smrg      assert(src0.type != BRW_REGISTER_TYPE_D);
1051b8e80941Smrg   }
1052b8e80941Smrg
1053b8e80941Smrg   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1054b8e80941Smrg}
1055b8e80941Smrg
1056b8e80941Smrgbrw_inst *
1057b8e80941Smrgbrw_AVG(struct brw_codegen *p, struct brw_reg dest,
1058b8e80941Smrg        struct brw_reg src0, struct brw_reg src1)
1059b8e80941Smrg{
1060b8e80941Smrg   assert(dest.type == src0.type);
1061b8e80941Smrg   assert(src0.type == src1.type);
1062b8e80941Smrg   switch (src0.type) {
1063b8e80941Smrg   case BRW_REGISTER_TYPE_B:
1064b8e80941Smrg   case BRW_REGISTER_TYPE_UB:
1065b8e80941Smrg   case BRW_REGISTER_TYPE_W:
1066b8e80941Smrg   case BRW_REGISTER_TYPE_UW:
1067b8e80941Smrg   case BRW_REGISTER_TYPE_D:
1068b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
1069b8e80941Smrg      break;
1070b8e80941Smrg   default:
1071b8e80941Smrg      unreachable("Bad type for brw_AVG");
1072b8e80941Smrg   }
1073b8e80941Smrg
1074b8e80941Smrg   return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1075b8e80941Smrg}
1076b8e80941Smrg
1077b8e80941Smrgbrw_inst *
1078b8e80941Smrgbrw_MUL(struct brw_codegen *p, struct brw_reg dest,
1079b8e80941Smrg        struct brw_reg src0, struct brw_reg src1)
1080b8e80941Smrg{
1081b8e80941Smrg   /* 6.32.38: mul */
1082b8e80941Smrg   if (src0.type == BRW_REGISTER_TYPE_D ||
1083b8e80941Smrg       src0.type == BRW_REGISTER_TYPE_UD ||
1084b8e80941Smrg       src1.type == BRW_REGISTER_TYPE_D ||
1085b8e80941Smrg       src1.type == BRW_REGISTER_TYPE_UD) {
1086b8e80941Smrg      assert(dest.type != BRW_REGISTER_TYPE_F);
1087b8e80941Smrg   }
1088b8e80941Smrg
1089b8e80941Smrg   if (src0.type == BRW_REGISTER_TYPE_F ||
1090b8e80941Smrg       (src0.file == BRW_IMMEDIATE_VALUE &&
1091b8e80941Smrg	src0.type == BRW_REGISTER_TYPE_VF)) {
1092b8e80941Smrg      assert(src1.type != BRW_REGISTER_TYPE_UD);
1093b8e80941Smrg      assert(src1.type != BRW_REGISTER_TYPE_D);
1094b8e80941Smrg   }
1095b8e80941Smrg
1096b8e80941Smrg   if (src1.type == BRW_REGISTER_TYPE_F ||
1097b8e80941Smrg       (src1.file == BRW_IMMEDIATE_VALUE &&
1098b8e80941Smrg	src1.type == BRW_REGISTER_TYPE_VF)) {
1099b8e80941Smrg      assert(src0.type != BRW_REGISTER_TYPE_UD);
1100b8e80941Smrg      assert(src0.type != BRW_REGISTER_TYPE_D);
1101b8e80941Smrg   }
1102b8e80941Smrg
1103b8e80941Smrg   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1104b8e80941Smrg	  src0.nr != BRW_ARF_ACCUMULATOR);
1105b8e80941Smrg   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1106b8e80941Smrg	  src1.nr != BRW_ARF_ACCUMULATOR);
1107b8e80941Smrg
1108b8e80941Smrg   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1109b8e80941Smrg}
1110b8e80941Smrg
1111b8e80941Smrgbrw_inst *
1112b8e80941Smrgbrw_LINE(struct brw_codegen *p, struct brw_reg dest,
1113b8e80941Smrg         struct brw_reg src0, struct brw_reg src1)
1114b8e80941Smrg{
1115b8e80941Smrg   src0.vstride = BRW_VERTICAL_STRIDE_0;
1116b8e80941Smrg   src0.width = BRW_WIDTH_1;
1117b8e80941Smrg   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1118b8e80941Smrg   return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
1119b8e80941Smrg}
1120b8e80941Smrg
1121b8e80941Smrgbrw_inst *
1122b8e80941Smrgbrw_PLN(struct brw_codegen *p, struct brw_reg dest,
1123b8e80941Smrg        struct brw_reg src0, struct brw_reg src1)
1124b8e80941Smrg{
1125b8e80941Smrg   src0.vstride = BRW_VERTICAL_STRIDE_0;
1126b8e80941Smrg   src0.width = BRW_WIDTH_1;
1127b8e80941Smrg   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1128b8e80941Smrg   src1.vstride = BRW_VERTICAL_STRIDE_8;
1129b8e80941Smrg   src1.width = BRW_WIDTH_8;
1130b8e80941Smrg   src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1131b8e80941Smrg   return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1132b8e80941Smrg}
1133b8e80941Smrg
1134b8e80941Smrgbrw_inst *
1135b8e80941Smrgbrw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
1136b8e80941Smrg{
1137b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1138b8e80941Smrg   const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
1139b8e80941Smrg   /* The F32TO16 instruction doesn't support 32-bit destination types in
1140b8e80941Smrg    * Align1 mode, and neither does the Gen8 implementation in terms of a
1141b8e80941Smrg    * converting MOV.  Gen7 does zero out the high 16 bits in Align16 mode as
1142b8e80941Smrg    * an undocumented feature.
1143b8e80941Smrg    */
1144b8e80941Smrg   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
1145b8e80941Smrg                                 (!align16 || devinfo->gen >= 8));
1146b8e80941Smrg   brw_inst *inst;
1147b8e80941Smrg
1148b8e80941Smrg   if (align16) {
1149b8e80941Smrg      assert(dst.type == BRW_REGISTER_TYPE_UD);
1150b8e80941Smrg   } else {
1151b8e80941Smrg      assert(dst.type == BRW_REGISTER_TYPE_UD ||
1152b8e80941Smrg             dst.type == BRW_REGISTER_TYPE_W ||
1153b8e80941Smrg             dst.type == BRW_REGISTER_TYPE_UW ||
1154b8e80941Smrg             dst.type == BRW_REGISTER_TYPE_HF);
1155b8e80941Smrg   }
1156b8e80941Smrg
1157b8e80941Smrg   brw_push_insn_state(p);
1158b8e80941Smrg
1159b8e80941Smrg   if (needs_zero_fill) {
1160b8e80941Smrg      brw_set_default_access_mode(p, BRW_ALIGN_1);
1161b8e80941Smrg      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
1162b8e80941Smrg   }
1163b8e80941Smrg
1164b8e80941Smrg   if (devinfo->gen >= 8) {
1165b8e80941Smrg      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
1166b8e80941Smrg   } else {
1167b8e80941Smrg      assert(devinfo->gen == 7);
1168b8e80941Smrg      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
1169b8e80941Smrg   }
1170b8e80941Smrg
1171b8e80941Smrg   if (needs_zero_fill) {
1172b8e80941Smrg      brw_inst_set_no_dd_clear(devinfo, inst, true);
1173b8e80941Smrg      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
1174b8e80941Smrg      brw_inst_set_no_dd_check(devinfo, inst, true);
1175b8e80941Smrg   }
1176b8e80941Smrg
1177b8e80941Smrg   brw_pop_insn_state(p);
1178b8e80941Smrg   return inst;
1179b8e80941Smrg}
1180b8e80941Smrg
1181b8e80941Smrgbrw_inst *
1182b8e80941Smrgbrw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
1183b8e80941Smrg{
1184b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1185b8e80941Smrg   bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
1186b8e80941Smrg
1187b8e80941Smrg   if (align16) {
1188b8e80941Smrg      assert(src.type == BRW_REGISTER_TYPE_UD);
1189b8e80941Smrg   } else {
1190b8e80941Smrg      /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1191b8e80941Smrg       *
1192b8e80941Smrg       *   Because this instruction does not have a 16-bit floating-point
1193b8e80941Smrg       *   type, the source data type must be Word (W). The destination type
1194b8e80941Smrg       *   must be F (Float).
1195b8e80941Smrg       */
1196b8e80941Smrg      if (src.type == BRW_REGISTER_TYPE_UD)
1197b8e80941Smrg         src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
1198b8e80941Smrg
1199b8e80941Smrg      assert(src.type == BRW_REGISTER_TYPE_W ||
1200b8e80941Smrg             src.type == BRW_REGISTER_TYPE_UW ||
1201b8e80941Smrg             src.type == BRW_REGISTER_TYPE_HF);
1202b8e80941Smrg   }
1203b8e80941Smrg
1204b8e80941Smrg   if (devinfo->gen >= 8) {
1205b8e80941Smrg      return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
1206b8e80941Smrg   } else {
1207b8e80941Smrg      assert(devinfo->gen == 7);
1208b8e80941Smrg      return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
1209b8e80941Smrg   }
1210b8e80941Smrg}
1211b8e80941Smrg
1212b8e80941Smrg
1213b8e80941Smrgvoid brw_NOP(struct brw_codegen *p)
1214b8e80941Smrg{
1215b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1216b8e80941Smrg   memset(insn, 0, sizeof(*insn));
1217b8e80941Smrg   brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
1218b8e80941Smrg}
1219b8e80941Smrg
1220b8e80941Smrg
1221b8e80941Smrg
1222b8e80941Smrg
1223b8e80941Smrg
1224b8e80941Smrg/***********************************************************************
1225b8e80941Smrg * Comparisons, if/else/endif
1226b8e80941Smrg */
1227b8e80941Smrg
1228b8e80941Smrgbrw_inst *
1229b8e80941Smrgbrw_JMPI(struct brw_codegen *p, struct brw_reg index,
1230b8e80941Smrg         unsigned predicate_control)
1231b8e80941Smrg{
1232b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1233b8e80941Smrg   struct brw_reg ip = brw_ip_reg();
1234b8e80941Smrg   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1235b8e80941Smrg
1236b8e80941Smrg   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
1237b8e80941Smrg   brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1238b8e80941Smrg   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1239b8e80941Smrg   brw_inst_set_pred_control(devinfo, inst, predicate_control);
1240b8e80941Smrg
1241b8e80941Smrg   return inst;
1242b8e80941Smrg}
1243b8e80941Smrg
1244b8e80941Smrgstatic void
1245b8e80941Smrgpush_if_stack(struct brw_codegen *p, brw_inst *inst)
1246b8e80941Smrg{
1247b8e80941Smrg   p->if_stack[p->if_stack_depth] = inst - p->store;
1248b8e80941Smrg
1249b8e80941Smrg   p->if_stack_depth++;
1250b8e80941Smrg   if (p->if_stack_array_size <= p->if_stack_depth) {
1251b8e80941Smrg      p->if_stack_array_size *= 2;
1252b8e80941Smrg      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1253b8e80941Smrg			     p->if_stack_array_size);
1254b8e80941Smrg   }
1255b8e80941Smrg}
1256b8e80941Smrg
1257b8e80941Smrgstatic brw_inst *
1258b8e80941Smrgpop_if_stack(struct brw_codegen *p)
1259b8e80941Smrg{
1260b8e80941Smrg   p->if_stack_depth--;
1261b8e80941Smrg   return &p->store[p->if_stack[p->if_stack_depth]];
1262b8e80941Smrg}
1263b8e80941Smrg
1264b8e80941Smrgstatic void
1265b8e80941Smrgpush_loop_stack(struct brw_codegen *p, brw_inst *inst)
1266b8e80941Smrg{
1267b8e80941Smrg   if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
1268b8e80941Smrg      p->loop_stack_array_size *= 2;
1269b8e80941Smrg      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1270b8e80941Smrg			       p->loop_stack_array_size);
1271b8e80941Smrg      p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1272b8e80941Smrg				     p->loop_stack_array_size);
1273b8e80941Smrg   }
1274b8e80941Smrg
1275b8e80941Smrg   p->loop_stack[p->loop_stack_depth] = inst - p->store;
1276b8e80941Smrg   p->loop_stack_depth++;
1277b8e80941Smrg   p->if_depth_in_loop[p->loop_stack_depth] = 0;
1278b8e80941Smrg}
1279b8e80941Smrg
1280b8e80941Smrgstatic brw_inst *
1281b8e80941Smrgget_inner_do_insn(struct brw_codegen *p)
1282b8e80941Smrg{
1283b8e80941Smrg   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1284b8e80941Smrg}
1285b8e80941Smrg
1286b8e80941Smrg/* EU takes the value from the flag register and pushes it onto some
1287b8e80941Smrg * sort of a stack (presumably merging with any flag value already on
1288b8e80941Smrg * the stack).  Within an if block, the flags at the top of the stack
1289b8e80941Smrg * control execution on each channel of the unit, eg. on each of the
1290b8e80941Smrg * 16 pixel values in our wm programs.
1291b8e80941Smrg *
1292b8e80941Smrg * When the matching 'else' instruction is reached (presumably by
1293b8e80941Smrg * countdown of the instruction count patched in by our ELSE/ENDIF
1294b8e80941Smrg * functions), the relevant flags are inverted.
1295b8e80941Smrg *
1296b8e80941Smrg * When the matching 'endif' instruction is reached, the flags are
1297b8e80941Smrg * popped off.  If the stack is now empty, normal execution resumes.
1298b8e80941Smrg */
1299b8e80941Smrgbrw_inst *
1300b8e80941Smrgbrw_IF(struct brw_codegen *p, unsigned execute_size)
1301b8e80941Smrg{
1302b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1303b8e80941Smrg   brw_inst *insn;
1304b8e80941Smrg
1305b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_IF);
1306b8e80941Smrg
1307b8e80941Smrg   /* Override the defaults for this instruction:
1308b8e80941Smrg    */
1309b8e80941Smrg   if (devinfo->gen < 6) {
1310b8e80941Smrg      brw_set_dest(p, insn, brw_ip_reg());
1311b8e80941Smrg      brw_set_src0(p, insn, brw_ip_reg());
1312b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0));
1313b8e80941Smrg   } else if (devinfo->gen == 6) {
1314b8e80941Smrg      brw_set_dest(p, insn, brw_imm_w(0));
1315b8e80941Smrg      brw_inst_set_gen6_jump_count(devinfo, insn, 0);
1316b8e80941Smrg      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1317b8e80941Smrg      brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1318b8e80941Smrg   } else if (devinfo->gen == 7) {
1319b8e80941Smrg      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1320b8e80941Smrg      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1321b8e80941Smrg      brw_set_src1(p, insn, brw_imm_w(0));
1322b8e80941Smrg      brw_inst_set_jip(devinfo, insn, 0);
1323b8e80941Smrg      brw_inst_set_uip(devinfo, insn, 0);
1324b8e80941Smrg   } else {
1325b8e80941Smrg      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1326b8e80941Smrg      brw_set_src0(p, insn, brw_imm_d(0));
1327b8e80941Smrg      brw_inst_set_jip(devinfo, insn, 0);
1328b8e80941Smrg      brw_inst_set_uip(devinfo, insn, 0);
1329b8e80941Smrg   }
1330b8e80941Smrg
1331b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, execute_size);
1332b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1333b8e80941Smrg   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1334b8e80941Smrg   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1335b8e80941Smrg   if (!p->single_program_flow && devinfo->gen < 6)
1336b8e80941Smrg      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1337b8e80941Smrg
1338b8e80941Smrg   push_if_stack(p, insn);
1339b8e80941Smrg   p->if_depth_in_loop[p->loop_stack_depth]++;
1340b8e80941Smrg   return insn;
1341b8e80941Smrg}
1342b8e80941Smrg
1343b8e80941Smrg/* This function is only used for gen6-style IF instructions with an
1344b8e80941Smrg * embedded comparison (conditional modifier).  It is not used on gen7.
1345b8e80941Smrg */
1346b8e80941Smrgbrw_inst *
1347b8e80941Smrggen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1348b8e80941Smrg	struct brw_reg src0, struct brw_reg src1)
1349b8e80941Smrg{
1350b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1351b8e80941Smrg   brw_inst *insn;
1352b8e80941Smrg
1353b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_IF);
1354b8e80941Smrg
1355b8e80941Smrg   brw_set_dest(p, insn, brw_imm_w(0));
1356b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1357b8e80941Smrg   brw_inst_set_gen6_jump_count(devinfo, insn, 0);
1358b8e80941Smrg   brw_set_src0(p, insn, src0);
1359b8e80941Smrg   brw_set_src1(p, insn, src1);
1360b8e80941Smrg
1361b8e80941Smrg   assert(brw_inst_qtr_control(devinfo, insn) == BRW_COMPRESSION_NONE);
1362b8e80941Smrg   assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
1363b8e80941Smrg   brw_inst_set_cond_modifier(devinfo, insn, conditional);
1364b8e80941Smrg
1365b8e80941Smrg   push_if_stack(p, insn);
1366b8e80941Smrg   return insn;
1367b8e80941Smrg}
1368b8e80941Smrg
1369b8e80941Smrg/**
1370b8e80941Smrg * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1371b8e80941Smrg */
1372b8e80941Smrgstatic void
1373b8e80941Smrgconvert_IF_ELSE_to_ADD(struct brw_codegen *p,
1374b8e80941Smrg                       brw_inst *if_inst, brw_inst *else_inst)
1375b8e80941Smrg{
1376b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1377b8e80941Smrg
1378b8e80941Smrg   /* The next instruction (where the ENDIF would be, if it existed) */
1379b8e80941Smrg   brw_inst *next_inst = &p->store[p->nr_insn];
1380b8e80941Smrg
1381b8e80941Smrg   assert(p->single_program_flow);
1382b8e80941Smrg   assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF);
1383b8e80941Smrg   assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE);
1384b8e80941Smrg   assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1);
1385b8e80941Smrg
1386b8e80941Smrg   /* Convert IF to an ADD instruction that moves the instruction pointer
1387b8e80941Smrg    * to the first instruction of the ELSE block.  If there is no ELSE
1388b8e80941Smrg    * block, point to where ENDIF would be.  Reverse the predicate.
1389b8e80941Smrg    *
1390b8e80941Smrg    * There's no need to execute an ENDIF since we don't need to do any
1391b8e80941Smrg    * stack operations, and if we're currently executing, we just want to
1392b8e80941Smrg    * continue normally.
1393b8e80941Smrg    */
1394b8e80941Smrg   brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_ADD);
1395b8e80941Smrg   brw_inst_set_pred_inv(devinfo, if_inst, true);
1396b8e80941Smrg
1397b8e80941Smrg   if (else_inst != NULL) {
1398b8e80941Smrg      /* Convert ELSE to an ADD instruction that points where the ENDIF
1399b8e80941Smrg       * would be.
1400b8e80941Smrg       */
1401b8e80941Smrg      brw_inst_set_opcode(devinfo, else_inst, BRW_OPCODE_ADD);
1402b8e80941Smrg
1403b8e80941Smrg      brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16);
1404b8e80941Smrg      brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16);
1405b8e80941Smrg   } else {
1406b8e80941Smrg      brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16);
1407b8e80941Smrg   }
1408b8e80941Smrg}
1409b8e80941Smrg
1410b8e80941Smrg/**
1411b8e80941Smrg * Patch IF and ELSE instructions with appropriate jump targets.
1412b8e80941Smrg */
1413b8e80941Smrgstatic void
1414b8e80941Smrgpatch_IF_ELSE(struct brw_codegen *p,
1415b8e80941Smrg              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
1416b8e80941Smrg{
1417b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1418b8e80941Smrg
1419b8e80941Smrg   /* We shouldn't be patching IF and ELSE instructions in single program flow
1420b8e80941Smrg    * mode when gen < 6, because in single program flow mode on those
1421b8e80941Smrg    * platforms, we convert flow control instructions to conditional ADDs that
1422b8e80941Smrg    * operate on IP (see brw_ENDIF).
1423b8e80941Smrg    *
1424b8e80941Smrg    * However, on Gen6, writing to IP doesn't work in single program flow mode
1425b8e80941Smrg    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1426b8e80941Smrg    * not be updated by non-flow control instructions.").  And on later
1427b8e80941Smrg    * platforms, there is no significant benefit to converting control flow
1428b8e80941Smrg    * instructions to conditional ADDs.  So we do patch IF and ELSE
1429b8e80941Smrg    * instructions in single program flow mode on those platforms.
1430b8e80941Smrg    */
1431b8e80941Smrg   if (devinfo->gen < 6)
1432b8e80941Smrg      assert(!p->single_program_flow);
1433b8e80941Smrg
1434b8e80941Smrg   assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF);
1435b8e80941Smrg   assert(endif_inst != NULL);
1436b8e80941Smrg   assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE);
1437b8e80941Smrg
1438b8e80941Smrg   unsigned br = brw_jump_scale(devinfo);
1439b8e80941Smrg
1440b8e80941Smrg   assert(brw_inst_opcode(devinfo, endif_inst) == BRW_OPCODE_ENDIF);
1441b8e80941Smrg   brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
1442b8e80941Smrg
1443b8e80941Smrg   if (else_inst == NULL) {
1444b8e80941Smrg      /* Patch IF -> ENDIF */
1445b8e80941Smrg      if (devinfo->gen < 6) {
1446b8e80941Smrg	 /* Turn it into an IFF, which means no mask stack operations for
1447b8e80941Smrg	  * all-false and jumping past the ENDIF.
1448b8e80941Smrg	  */
1449b8e80941Smrg         brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_IFF);
1450b8e80941Smrg         brw_inst_set_gen4_jump_count(devinfo, if_inst,
1451b8e80941Smrg                                      br * (endif_inst - if_inst + 1));
1452b8e80941Smrg         brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
1453b8e80941Smrg      } else if (devinfo->gen == 6) {
1454b8e80941Smrg	 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1455b8e80941Smrg         brw_inst_set_gen6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst));
1456b8e80941Smrg      } else {
1457b8e80941Smrg         brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1458b8e80941Smrg         brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1459b8e80941Smrg      }
1460b8e80941Smrg   } else {
1461b8e80941Smrg      brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
1462b8e80941Smrg
1463b8e80941Smrg      /* Patch IF -> ELSE */
1464b8e80941Smrg      if (devinfo->gen < 6) {
1465b8e80941Smrg         brw_inst_set_gen4_jump_count(devinfo, if_inst,
1466b8e80941Smrg                                      br * (else_inst - if_inst));
1467b8e80941Smrg         brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
1468b8e80941Smrg      } else if (devinfo->gen == 6) {
1469b8e80941Smrg         brw_inst_set_gen6_jump_count(devinfo, if_inst,
1470b8e80941Smrg                                      br * (else_inst - if_inst + 1));
1471b8e80941Smrg      }
1472b8e80941Smrg
1473b8e80941Smrg      /* Patch ELSE -> ENDIF */
1474b8e80941Smrg      if (devinfo->gen < 6) {
1475b8e80941Smrg	 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1476b8e80941Smrg	  * matching ENDIF.
1477b8e80941Smrg	  */
1478b8e80941Smrg         brw_inst_set_gen4_jump_count(devinfo, else_inst,
1479b8e80941Smrg                                      br * (endif_inst - else_inst + 1));
1480b8e80941Smrg         brw_inst_set_gen4_pop_count(devinfo, else_inst, 1);
1481b8e80941Smrg      } else if (devinfo->gen == 6) {
1482b8e80941Smrg	 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1483b8e80941Smrg         brw_inst_set_gen6_jump_count(devinfo, else_inst,
1484b8e80941Smrg                                      br * (endif_inst - else_inst));
1485b8e80941Smrg      } else {
1486b8e80941Smrg	 /* The IF instruction's JIP should point just past the ELSE */
1487b8e80941Smrg         brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1488b8e80941Smrg	 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1489b8e80941Smrg         brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1490b8e80941Smrg         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1491b8e80941Smrg         if (devinfo->gen >= 8) {
1492b8e80941Smrg            /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1493b8e80941Smrg             * should point to ENDIF.
1494b8e80941Smrg             */
1495b8e80941Smrg            brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1496b8e80941Smrg         }
1497b8e80941Smrg      }
1498b8e80941Smrg   }
1499b8e80941Smrg}
1500b8e80941Smrg
1501b8e80941Smrgvoid
1502b8e80941Smrgbrw_ELSE(struct brw_codegen *p)
1503b8e80941Smrg{
1504b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1505b8e80941Smrg   brw_inst *insn;
1506b8e80941Smrg
1507b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_ELSE);
1508b8e80941Smrg
1509b8e80941Smrg   if (devinfo->gen < 6) {
1510b8e80941Smrg      brw_set_dest(p, insn, brw_ip_reg());
1511b8e80941Smrg      brw_set_src0(p, insn, brw_ip_reg());
1512b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0));
1513b8e80941Smrg   } else if (devinfo->gen == 6) {
1514b8e80941Smrg      brw_set_dest(p, insn, brw_imm_w(0));
1515b8e80941Smrg      brw_inst_set_gen6_jump_count(devinfo, insn, 0);
1516b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1517b8e80941Smrg      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1518b8e80941Smrg   } else if (devinfo->gen == 7) {
1519b8e80941Smrg      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1520b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1521b8e80941Smrg      brw_set_src1(p, insn, brw_imm_w(0));
1522b8e80941Smrg      brw_inst_set_jip(devinfo, insn, 0);
1523b8e80941Smrg      brw_inst_set_uip(devinfo, insn, 0);
1524b8e80941Smrg   } else {
1525b8e80941Smrg      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1526b8e80941Smrg      brw_set_src0(p, insn, brw_imm_d(0));
1527b8e80941Smrg      brw_inst_set_jip(devinfo, insn, 0);
1528b8e80941Smrg      brw_inst_set_uip(devinfo, insn, 0);
1529b8e80941Smrg   }
1530b8e80941Smrg
1531b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1532b8e80941Smrg   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1533b8e80941Smrg   if (!p->single_program_flow && devinfo->gen < 6)
1534b8e80941Smrg      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1535b8e80941Smrg
1536b8e80941Smrg   push_if_stack(p, insn);
1537b8e80941Smrg}
1538b8e80941Smrg
1539b8e80941Smrgvoid
1540b8e80941Smrgbrw_ENDIF(struct brw_codegen *p)
1541b8e80941Smrg{
1542b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1543b8e80941Smrg   brw_inst *insn = NULL;
1544b8e80941Smrg   brw_inst *else_inst = NULL;
1545b8e80941Smrg   brw_inst *if_inst = NULL;
1546b8e80941Smrg   brw_inst *tmp;
1547b8e80941Smrg   bool emit_endif = true;
1548b8e80941Smrg
1549b8e80941Smrg   /* In single program flow mode, we can express IF and ELSE instructions
1550b8e80941Smrg    * equivalently as ADD instructions that operate on IP.  On platforms prior
1551b8e80941Smrg    * to Gen6, flow control instructions cause an implied thread switch, so
1552b8e80941Smrg    * this is a significant savings.
1553b8e80941Smrg    *
1554b8e80941Smrg    * However, on Gen6, writing to IP doesn't work in single program flow mode
1555b8e80941Smrg    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1556b8e80941Smrg    * not be updated by non-flow control instructions.").  And on later
1557b8e80941Smrg    * platforms, there is no significant benefit to converting control flow
1558b8e80941Smrg    * instructions to conditional ADDs.  So we only do this trick on Gen4 and
1559b8e80941Smrg    * Gen5.
1560b8e80941Smrg    */
1561b8e80941Smrg   if (devinfo->gen < 6 && p->single_program_flow)
1562b8e80941Smrg      emit_endif = false;
1563b8e80941Smrg
1564b8e80941Smrg   /*
1565b8e80941Smrg    * A single next_insn() may change the base address of instruction store
1566b8e80941Smrg    * memory(p->store), so call it first before referencing the instruction
1567b8e80941Smrg    * store pointer from an index
1568b8e80941Smrg    */
1569b8e80941Smrg   if (emit_endif)
1570b8e80941Smrg      insn = next_insn(p, BRW_OPCODE_ENDIF);
1571b8e80941Smrg
1572b8e80941Smrg   /* Pop the IF and (optional) ELSE instructions from the stack */
1573b8e80941Smrg   p->if_depth_in_loop[p->loop_stack_depth]--;
1574b8e80941Smrg   tmp = pop_if_stack(p);
1575b8e80941Smrg   if (brw_inst_opcode(devinfo, tmp) == BRW_OPCODE_ELSE) {
1576b8e80941Smrg      else_inst = tmp;
1577b8e80941Smrg      tmp = pop_if_stack(p);
1578b8e80941Smrg   }
1579b8e80941Smrg   if_inst = tmp;
1580b8e80941Smrg
1581b8e80941Smrg   if (!emit_endif) {
1582b8e80941Smrg      /* ENDIF is useless; don't bother emitting it. */
1583b8e80941Smrg      convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1584b8e80941Smrg      return;
1585b8e80941Smrg   }
1586b8e80941Smrg
1587b8e80941Smrg   if (devinfo->gen < 6) {
1588b8e80941Smrg      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1589b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1590b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0));
1591b8e80941Smrg   } else if (devinfo->gen == 6) {
1592b8e80941Smrg      brw_set_dest(p, insn, brw_imm_w(0));
1593b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1594b8e80941Smrg      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1595b8e80941Smrg   } else if (devinfo->gen == 7) {
1596b8e80941Smrg      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1597b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1598b8e80941Smrg      brw_set_src1(p, insn, brw_imm_w(0));
1599b8e80941Smrg   } else {
1600b8e80941Smrg      brw_set_src0(p, insn, brw_imm_d(0));
1601b8e80941Smrg   }
1602b8e80941Smrg
1603b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1604b8e80941Smrg   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1605b8e80941Smrg   if (devinfo->gen < 6)
1606b8e80941Smrg      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1607b8e80941Smrg
1608b8e80941Smrg   /* Also pop item off the stack in the endif instruction: */
1609b8e80941Smrg   if (devinfo->gen < 6) {
1610b8e80941Smrg      brw_inst_set_gen4_jump_count(devinfo, insn, 0);
1611b8e80941Smrg      brw_inst_set_gen4_pop_count(devinfo, insn, 1);
1612b8e80941Smrg   } else if (devinfo->gen == 6) {
1613b8e80941Smrg      brw_inst_set_gen6_jump_count(devinfo, insn, 2);
1614b8e80941Smrg   } else {
1615b8e80941Smrg      brw_inst_set_jip(devinfo, insn, 2);
1616b8e80941Smrg   }
1617b8e80941Smrg   patch_IF_ELSE(p, if_inst, else_inst, insn);
1618b8e80941Smrg}
1619b8e80941Smrg
1620b8e80941Smrgbrw_inst *
1621b8e80941Smrgbrw_BREAK(struct brw_codegen *p)
1622b8e80941Smrg{
1623b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1624b8e80941Smrg   brw_inst *insn;
1625b8e80941Smrg
1626b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_BREAK);
1627b8e80941Smrg   if (devinfo->gen >= 8) {
1628b8e80941Smrg      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1629b8e80941Smrg      brw_set_src0(p, insn, brw_imm_d(0x0));
1630b8e80941Smrg   } else if (devinfo->gen >= 6) {
1631b8e80941Smrg      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1632b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1633b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0));
1634b8e80941Smrg   } else {
1635b8e80941Smrg      brw_set_dest(p, insn, brw_ip_reg());
1636b8e80941Smrg      brw_set_src0(p, insn, brw_ip_reg());
1637b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0));
1638b8e80941Smrg      brw_inst_set_gen4_pop_count(devinfo, insn,
1639b8e80941Smrg                                  p->if_depth_in_loop[p->loop_stack_depth]);
1640b8e80941Smrg   }
1641b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1642b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1643b8e80941Smrg
1644b8e80941Smrg   return insn;
1645b8e80941Smrg}
1646b8e80941Smrg
1647b8e80941Smrgbrw_inst *
1648b8e80941Smrgbrw_CONT(struct brw_codegen *p)
1649b8e80941Smrg{
1650b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1651b8e80941Smrg   brw_inst *insn;
1652b8e80941Smrg
1653b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_CONTINUE);
1654b8e80941Smrg   brw_set_dest(p, insn, brw_ip_reg());
1655b8e80941Smrg   if (devinfo->gen >= 8) {
1656b8e80941Smrg      brw_set_src0(p, insn, brw_imm_d(0x0));
1657b8e80941Smrg   } else {
1658b8e80941Smrg      brw_set_src0(p, insn, brw_ip_reg());
1659b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0));
1660b8e80941Smrg   }
1661b8e80941Smrg
1662b8e80941Smrg   if (devinfo->gen < 6) {
1663b8e80941Smrg      brw_inst_set_gen4_pop_count(devinfo, insn,
1664b8e80941Smrg                                  p->if_depth_in_loop[p->loop_stack_depth]);
1665b8e80941Smrg   }
1666b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1667b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1668b8e80941Smrg   return insn;
1669b8e80941Smrg}
1670b8e80941Smrg
1671b8e80941Smrgbrw_inst *
1672b8e80941Smrggen6_HALT(struct brw_codegen *p)
1673b8e80941Smrg{
1674b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1675b8e80941Smrg   brw_inst *insn;
1676b8e80941Smrg
1677b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_HALT);
1678b8e80941Smrg   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1679b8e80941Smrg   if (devinfo->gen >= 8) {
1680b8e80941Smrg      brw_set_src0(p, insn, brw_imm_d(0x0));
1681b8e80941Smrg   } else {
1682b8e80941Smrg      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1683b8e80941Smrg      brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1684b8e80941Smrg   }
1685b8e80941Smrg
1686b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1687b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1688b8e80941Smrg   return insn;
1689b8e80941Smrg}
1690b8e80941Smrg
1691b8e80941Smrg/* DO/WHILE loop:
1692b8e80941Smrg *
1693b8e80941Smrg * The DO/WHILE is just an unterminated loop -- break or continue are
1694b8e80941Smrg * used for control within the loop.  We have a few ways they can be
1695b8e80941Smrg * done.
1696b8e80941Smrg *
1697b8e80941Smrg * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1698b8e80941Smrg * jip and no DO instruction.
1699b8e80941Smrg *
1700b8e80941Smrg * For non-uniform control flow pre-gen6, there's a DO instruction to
1701b8e80941Smrg * push the mask, and a WHILE to jump back, and BREAK to get out and
1702b8e80941Smrg * pop the mask.
1703b8e80941Smrg *
1704b8e80941Smrg * For gen6, there's no more mask stack, so no need for DO.  WHILE
1705b8e80941Smrg * just points back to the first instruction of the loop.
1706b8e80941Smrg */
1707b8e80941Smrgbrw_inst *
1708b8e80941Smrgbrw_DO(struct brw_codegen *p, unsigned execute_size)
1709b8e80941Smrg{
1710b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1711b8e80941Smrg
1712b8e80941Smrg   if (devinfo->gen >= 6 || p->single_program_flow) {
1713b8e80941Smrg      push_loop_stack(p, &p->store[p->nr_insn]);
1714b8e80941Smrg      return &p->store[p->nr_insn];
1715b8e80941Smrg   } else {
1716b8e80941Smrg      brw_inst *insn = next_insn(p, BRW_OPCODE_DO);
1717b8e80941Smrg
1718b8e80941Smrg      push_loop_stack(p, insn);
1719b8e80941Smrg
1720b8e80941Smrg      /* Override the defaults for this instruction:
1721b8e80941Smrg       */
1722b8e80941Smrg      brw_set_dest(p, insn, brw_null_reg());
1723b8e80941Smrg      brw_set_src0(p, insn, brw_null_reg());
1724b8e80941Smrg      brw_set_src1(p, insn, brw_null_reg());
1725b8e80941Smrg
1726b8e80941Smrg      brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1727b8e80941Smrg      brw_inst_set_exec_size(devinfo, insn, execute_size);
1728b8e80941Smrg      brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE);
1729b8e80941Smrg
1730b8e80941Smrg      return insn;
1731b8e80941Smrg   }
1732b8e80941Smrg}
1733b8e80941Smrg
1734b8e80941Smrg/**
1735b8e80941Smrg * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1736b8e80941Smrg * instruction here.
1737b8e80941Smrg *
1738b8e80941Smrg * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1739b8e80941Smrg * nesting, since it can always just point to the end of the block/current loop.
1740b8e80941Smrg */
1741b8e80941Smrgstatic void
1742b8e80941Smrgbrw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst)
1743b8e80941Smrg{
1744b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1745b8e80941Smrg   brw_inst *do_inst = get_inner_do_insn(p);
1746b8e80941Smrg   brw_inst *inst;
1747b8e80941Smrg   unsigned br = brw_jump_scale(devinfo);
1748b8e80941Smrg
1749b8e80941Smrg   assert(devinfo->gen < 6);
1750b8e80941Smrg
1751b8e80941Smrg   for (inst = while_inst - 1; inst != do_inst; inst--) {
1752b8e80941Smrg      /* If the jump count is != 0, that means that this instruction has already
1753b8e80941Smrg       * been patched because it's part of a loop inside of the one we're
1754b8e80941Smrg       * patching.
1755b8e80941Smrg       */
1756b8e80941Smrg      if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_BREAK &&
1757b8e80941Smrg          brw_inst_gen4_jump_count(devinfo, inst) == 0) {
1758b8e80941Smrg         brw_inst_set_gen4_jump_count(devinfo, inst, br*((while_inst - inst) + 1));
1759b8e80941Smrg      } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CONTINUE &&
1760b8e80941Smrg                 brw_inst_gen4_jump_count(devinfo, inst) == 0) {
1761b8e80941Smrg         brw_inst_set_gen4_jump_count(devinfo, inst, br * (while_inst - inst));
1762b8e80941Smrg      }
1763b8e80941Smrg   }
1764b8e80941Smrg}
1765b8e80941Smrg
1766b8e80941Smrgbrw_inst *
1767b8e80941Smrgbrw_WHILE(struct brw_codegen *p)
1768b8e80941Smrg{
1769b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1770b8e80941Smrg   brw_inst *insn, *do_insn;
1771b8e80941Smrg   unsigned br = brw_jump_scale(devinfo);
1772b8e80941Smrg
1773b8e80941Smrg   if (devinfo->gen >= 6) {
1774b8e80941Smrg      insn = next_insn(p, BRW_OPCODE_WHILE);
1775b8e80941Smrg      do_insn = get_inner_do_insn(p);
1776b8e80941Smrg
1777b8e80941Smrg      if (devinfo->gen >= 8) {
1778b8e80941Smrg         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1779b8e80941Smrg         brw_set_src0(p, insn, brw_imm_d(0));
1780b8e80941Smrg         brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1781b8e80941Smrg      } else if (devinfo->gen == 7) {
1782b8e80941Smrg         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1783b8e80941Smrg         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1784b8e80941Smrg         brw_set_src1(p, insn, brw_imm_w(0));
1785b8e80941Smrg         brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1786b8e80941Smrg      } else {
1787b8e80941Smrg         brw_set_dest(p, insn, brw_imm_w(0));
1788b8e80941Smrg         brw_inst_set_gen6_jump_count(devinfo, insn, br * (do_insn - insn));
1789b8e80941Smrg         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1790b8e80941Smrg         brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1791b8e80941Smrg      }
1792b8e80941Smrg
1793b8e80941Smrg      brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
1794b8e80941Smrg
1795b8e80941Smrg   } else {
1796b8e80941Smrg      if (p->single_program_flow) {
1797b8e80941Smrg	 insn = next_insn(p, BRW_OPCODE_ADD);
1798b8e80941Smrg         do_insn = get_inner_do_insn(p);
1799b8e80941Smrg
1800b8e80941Smrg	 brw_set_dest(p, insn, brw_ip_reg());
1801b8e80941Smrg	 brw_set_src0(p, insn, brw_ip_reg());
1802b8e80941Smrg	 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1803b8e80941Smrg         brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1804b8e80941Smrg      } else {
1805b8e80941Smrg	 insn = next_insn(p, BRW_OPCODE_WHILE);
1806b8e80941Smrg         do_insn = get_inner_do_insn(p);
1807b8e80941Smrg
1808b8e80941Smrg         assert(brw_inst_opcode(devinfo, do_insn) == BRW_OPCODE_DO);
1809b8e80941Smrg
1810b8e80941Smrg	 brw_set_dest(p, insn, brw_ip_reg());
1811b8e80941Smrg	 brw_set_src0(p, insn, brw_ip_reg());
1812b8e80941Smrg	 brw_set_src1(p, insn, brw_imm_d(0));
1813b8e80941Smrg
1814b8e80941Smrg         brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn));
1815b8e80941Smrg         brw_inst_set_gen4_jump_count(devinfo, insn, br * (do_insn - insn + 1));
1816b8e80941Smrg         brw_inst_set_gen4_pop_count(devinfo, insn, 0);
1817b8e80941Smrg
1818b8e80941Smrg	 brw_patch_break_cont(p, insn);
1819b8e80941Smrg      }
1820b8e80941Smrg   }
1821b8e80941Smrg   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1822b8e80941Smrg
1823b8e80941Smrg   p->loop_stack_depth--;
1824b8e80941Smrg
1825b8e80941Smrg   return insn;
1826b8e80941Smrg}
1827b8e80941Smrg
1828b8e80941Smrg/* FORWARD JUMPS:
1829b8e80941Smrg */
1830b8e80941Smrgvoid brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx)
1831b8e80941Smrg{
1832b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1833b8e80941Smrg   brw_inst *jmp_insn = &p->store[jmp_insn_idx];
1834b8e80941Smrg   unsigned jmpi = 1;
1835b8e80941Smrg
1836b8e80941Smrg   if (devinfo->gen >= 5)
1837b8e80941Smrg      jmpi = 2;
1838b8e80941Smrg
1839b8e80941Smrg   assert(brw_inst_opcode(devinfo, jmp_insn) == BRW_OPCODE_JMPI);
1840b8e80941Smrg   assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE);
1841b8e80941Smrg
1842b8e80941Smrg   brw_inst_set_gen4_jump_count(devinfo, jmp_insn,
1843b8e80941Smrg                                jmpi * (p->nr_insn - jmp_insn_idx - 1));
1844b8e80941Smrg}
1845b8e80941Smrg
1846b8e80941Smrg/* To integrate with the above, it makes sense that the comparison
1847b8e80941Smrg * instruction should populate the flag register.  It might be simpler
1848b8e80941Smrg * just to use the flag reg for most WM tasks?
1849b8e80941Smrg */
1850b8e80941Smrgvoid brw_CMP(struct brw_codegen *p,
1851b8e80941Smrg	     struct brw_reg dest,
1852b8e80941Smrg	     unsigned conditional,
1853b8e80941Smrg	     struct brw_reg src0,
1854b8e80941Smrg	     struct brw_reg src1)
1855b8e80941Smrg{
1856b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1857b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1858b8e80941Smrg
1859b8e80941Smrg   brw_inst_set_cond_modifier(devinfo, insn, conditional);
1860b8e80941Smrg   brw_set_dest(p, insn, dest);
1861b8e80941Smrg   brw_set_src0(p, insn, src0);
1862b8e80941Smrg   brw_set_src1(p, insn, src1);
1863b8e80941Smrg
1864b8e80941Smrg   /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1865b8e80941Smrg    * page says:
1866b8e80941Smrg    *    "Any CMP instruction with a null destination must use a {switch}."
1867b8e80941Smrg    *
1868b8e80941Smrg    * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1869b8e80941Smrg    * mentioned on their work-arounds pages.
1870b8e80941Smrg    */
1871b8e80941Smrg   if (devinfo->gen == 7) {
1872b8e80941Smrg      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1873b8e80941Smrg          dest.nr == BRW_ARF_NULL) {
1874b8e80941Smrg         brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1875b8e80941Smrg      }
1876b8e80941Smrg   }
1877b8e80941Smrg}
1878b8e80941Smrg
1879b8e80941Smrg/***********************************************************************
1880b8e80941Smrg * Helpers for the various SEND message types:
1881b8e80941Smrg */
1882b8e80941Smrg
1883b8e80941Smrg/** Extended math function, float[8].
1884b8e80941Smrg */
1885b8e80941Smrgvoid gen4_math(struct brw_codegen *p,
1886b8e80941Smrg	       struct brw_reg dest,
1887b8e80941Smrg	       unsigned function,
1888b8e80941Smrg	       unsigned msg_reg_nr,
1889b8e80941Smrg	       struct brw_reg src,
1890b8e80941Smrg	       unsigned precision )
1891b8e80941Smrg{
1892b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1893b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
1894b8e80941Smrg   unsigned data_type;
1895b8e80941Smrg   if (has_scalar_region(src)) {
1896b8e80941Smrg      data_type = BRW_MATH_DATA_SCALAR;
1897b8e80941Smrg   } else {
1898b8e80941Smrg      data_type = BRW_MATH_DATA_VECTOR;
1899b8e80941Smrg   }
1900b8e80941Smrg
1901b8e80941Smrg   assert(devinfo->gen < 6);
1902b8e80941Smrg
1903b8e80941Smrg   /* Example code doesn't set predicate_control for send
1904b8e80941Smrg    * instructions.
1905b8e80941Smrg    */
1906b8e80941Smrg   brw_inst_set_pred_control(devinfo, insn, 0);
1907b8e80941Smrg   brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
1908b8e80941Smrg
1909b8e80941Smrg   brw_set_dest(p, insn, dest);
1910b8e80941Smrg   brw_set_src0(p, insn, src);
1911b8e80941Smrg   brw_set_math_message(p,
1912b8e80941Smrg                        insn,
1913b8e80941Smrg                        function,
1914b8e80941Smrg                        src.type == BRW_REGISTER_TYPE_D,
1915b8e80941Smrg                        precision,
1916b8e80941Smrg                        data_type);
1917b8e80941Smrg}
1918b8e80941Smrg
1919b8e80941Smrgvoid gen6_math(struct brw_codegen *p,
1920b8e80941Smrg	       struct brw_reg dest,
1921b8e80941Smrg	       unsigned function,
1922b8e80941Smrg	       struct brw_reg src0,
1923b8e80941Smrg	       struct brw_reg src1)
1924b8e80941Smrg{
1925b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1926b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1927b8e80941Smrg
1928b8e80941Smrg   assert(devinfo->gen >= 6);
1929b8e80941Smrg
1930b8e80941Smrg   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1931b8e80941Smrg          (devinfo->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1932b8e80941Smrg
1933b8e80941Smrg   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1934b8e80941Smrg   if (devinfo->gen == 6) {
1935b8e80941Smrg      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1936b8e80941Smrg      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1937b8e80941Smrg   }
1938b8e80941Smrg
1939b8e80941Smrg   if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1940b8e80941Smrg       function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1941b8e80941Smrg       function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1942b8e80941Smrg      assert(src0.type != BRW_REGISTER_TYPE_F);
1943b8e80941Smrg      assert(src1.type != BRW_REGISTER_TYPE_F);
1944b8e80941Smrg      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
1945b8e80941Smrg             (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
1946b8e80941Smrg   } else {
1947b8e80941Smrg      assert(src0.type == BRW_REGISTER_TYPE_F ||
1948b8e80941Smrg             (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
1949b8e80941Smrg      assert(src1.type == BRW_REGISTER_TYPE_F ||
1950b8e80941Smrg             (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
1951b8e80941Smrg   }
1952b8e80941Smrg
1953b8e80941Smrg   /* Source modifiers are ignored for extended math instructions on Gen6. */
1954b8e80941Smrg   if (devinfo->gen == 6) {
1955b8e80941Smrg      assert(!src0.negate);
1956b8e80941Smrg      assert(!src0.abs);
1957b8e80941Smrg      assert(!src1.negate);
1958b8e80941Smrg      assert(!src1.abs);
1959b8e80941Smrg   }
1960b8e80941Smrg
1961b8e80941Smrg   brw_inst_set_math_function(devinfo, insn, function);
1962b8e80941Smrg
1963b8e80941Smrg   brw_set_dest(p, insn, dest);
1964b8e80941Smrg   brw_set_src0(p, insn, src0);
1965b8e80941Smrg   brw_set_src1(p, insn, src1);
1966b8e80941Smrg}
1967b8e80941Smrg
1968b8e80941Smrg/**
1969b8e80941Smrg * Return the right surface index to access the thread scratch space using
1970b8e80941Smrg * stateless dataport messages.
1971b8e80941Smrg */
1972b8e80941Smrgunsigned
1973b8e80941Smrgbrw_scratch_surface_idx(const struct brw_codegen *p)
1974b8e80941Smrg{
1975b8e80941Smrg   /* The scratch space is thread-local so IA coherency is unnecessary. */
1976b8e80941Smrg   if (p->devinfo->gen >= 8)
1977b8e80941Smrg      return GEN8_BTI_STATELESS_NON_COHERENT;
1978b8e80941Smrg   else
1979b8e80941Smrg      return BRW_BTI_STATELESS;
1980b8e80941Smrg}
1981b8e80941Smrg
1982b8e80941Smrg/**
1983b8e80941Smrg * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1984b8e80941Smrg * using a constant offset per channel.
1985b8e80941Smrg *
1986b8e80941Smrg * The offset must be aligned to oword size (16 bytes).  Used for
1987b8e80941Smrg * register spilling.
1988b8e80941Smrg */
1989b8e80941Smrgvoid brw_oword_block_write_scratch(struct brw_codegen *p,
1990b8e80941Smrg				   struct brw_reg mrf,
1991b8e80941Smrg				   int num_regs,
1992b8e80941Smrg				   unsigned offset)
1993b8e80941Smrg{
1994b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
1995b8e80941Smrg   const unsigned target_cache =
1996b8e80941Smrg      (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
1997b8e80941Smrg       devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
1998b8e80941Smrg       BRW_SFID_DATAPORT_WRITE);
1999b8e80941Smrg   uint32_t msg_type;
2000b8e80941Smrg
2001b8e80941Smrg   if (devinfo->gen >= 6)
2002b8e80941Smrg      offset /= 16;
2003b8e80941Smrg
2004b8e80941Smrg   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2005b8e80941Smrg
2006b8e80941Smrg   const unsigned mlen = 1 + num_regs;
2007b8e80941Smrg
2008b8e80941Smrg   /* Set up the message header.  This is g0, with g0.2 filled with
2009b8e80941Smrg    * the offset.  We don't want to leave our offset around in g0 or
2010b8e80941Smrg    * it'll screw up texture samples, so set it up inside the message
2011b8e80941Smrg    * reg.
2012b8e80941Smrg    */
2013b8e80941Smrg   {
2014b8e80941Smrg      brw_push_insn_state(p);
2015b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_8);
2016b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2017b8e80941Smrg      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2018b8e80941Smrg
2019b8e80941Smrg      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2020b8e80941Smrg
2021b8e80941Smrg      /* set message header global offset field (reg 0, element 2) */
2022b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2023b8e80941Smrg      brw_MOV(p,
2024b8e80941Smrg	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2025b8e80941Smrg				  mrf.nr,
2026b8e80941Smrg				  2), BRW_REGISTER_TYPE_UD),
2027b8e80941Smrg	      brw_imm_ud(offset));
2028b8e80941Smrg
2029b8e80941Smrg      brw_pop_insn_state(p);
2030b8e80941Smrg   }
2031b8e80941Smrg
2032b8e80941Smrg   {
2033b8e80941Smrg      struct brw_reg dest;
2034b8e80941Smrg      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2035b8e80941Smrg      int send_commit_msg;
2036b8e80941Smrg      struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
2037b8e80941Smrg					 BRW_REGISTER_TYPE_UW);
2038b8e80941Smrg
2039b8e80941Smrg      brw_inst_set_sfid(devinfo, insn, target_cache);
2040b8e80941Smrg      brw_inst_set_compression(devinfo, insn, false);
2041b8e80941Smrg
2042b8e80941Smrg      if (brw_inst_exec_size(devinfo, insn) >= 16)
2043b8e80941Smrg	 src_header = vec16(src_header);
2044b8e80941Smrg
2045b8e80941Smrg      assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
2046b8e80941Smrg      if (devinfo->gen < 6)
2047b8e80941Smrg         brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2048b8e80941Smrg
2049b8e80941Smrg      /* Until gen6, writes followed by reads from the same location
2050b8e80941Smrg       * are not guaranteed to be ordered unless write_commit is set.
2051b8e80941Smrg       * If set, then a no-op write is issued to the destination
2052b8e80941Smrg       * register to set a dependency, and a read from the destination
2053b8e80941Smrg       * can be used to ensure the ordering.
2054b8e80941Smrg       *
2055b8e80941Smrg       * For gen6, only writes between different threads need ordering
2056b8e80941Smrg       * protection.  Our use of DP writes is all about register
2057b8e80941Smrg       * spilling within a thread.
2058b8e80941Smrg       */
2059b8e80941Smrg      if (devinfo->gen >= 6) {
2060b8e80941Smrg	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2061b8e80941Smrg	 send_commit_msg = 0;
2062b8e80941Smrg      } else {
2063b8e80941Smrg	 dest = src_header;
2064b8e80941Smrg	 send_commit_msg = 1;
2065b8e80941Smrg      }
2066b8e80941Smrg
2067b8e80941Smrg      brw_set_dest(p, insn, dest);
2068b8e80941Smrg      if (devinfo->gen >= 6) {
2069b8e80941Smrg	 brw_set_src0(p, insn, mrf);
2070b8e80941Smrg      } else {
2071b8e80941Smrg	 brw_set_src0(p, insn, brw_null_reg());
2072b8e80941Smrg      }
2073b8e80941Smrg
2074b8e80941Smrg      if (devinfo->gen >= 6)
2075b8e80941Smrg	 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2076b8e80941Smrg      else
2077b8e80941Smrg	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2078b8e80941Smrg
2079b8e80941Smrg      brw_set_desc(p, insn,
2080b8e80941Smrg                   brw_message_desc(devinfo, mlen, send_commit_msg, true) |
2081b8e80941Smrg                   brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p),
2082b8e80941Smrg                                     BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
2083b8e80941Smrg                                     msg_type, 0, /* not a render target */
2084b8e80941Smrg                                     send_commit_msg));
2085b8e80941Smrg   }
2086b8e80941Smrg}
2087b8e80941Smrg
2088b8e80941Smrg
2089b8e80941Smrg/**
2090b8e80941Smrg * Read a block of owords (half a GRF each) from the scratch buffer
2091b8e80941Smrg * using a constant index per channel.
2092b8e80941Smrg *
2093b8e80941Smrg * Offset must be aligned to oword size (16 bytes).  Used for register
2094b8e80941Smrg * spilling.
2095b8e80941Smrg */
2096b8e80941Smrgvoid
2097b8e80941Smrgbrw_oword_block_read_scratch(struct brw_codegen *p,
2098b8e80941Smrg			     struct brw_reg dest,
2099b8e80941Smrg			     struct brw_reg mrf,
2100b8e80941Smrg			     int num_regs,
2101b8e80941Smrg			     unsigned offset)
2102b8e80941Smrg{
2103b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2104b8e80941Smrg
2105b8e80941Smrg   if (devinfo->gen >= 6)
2106b8e80941Smrg      offset /= 16;
2107b8e80941Smrg
2108b8e80941Smrg   if (p->devinfo->gen >= 7) {
2109b8e80941Smrg      /* On gen 7 and above, we no longer have message registers and we can
2110b8e80941Smrg       * send from any register we want.  By using the destination register
2111b8e80941Smrg       * for the message, we guarantee that the implied message write won't
2112b8e80941Smrg       * accidentally overwrite anything.  This has been a problem because
2113b8e80941Smrg       * the MRF registers and source for the final FB write are both fixed
2114b8e80941Smrg       * and may overlap.
2115b8e80941Smrg       */
2116b8e80941Smrg      mrf = retype(dest, BRW_REGISTER_TYPE_UD);
2117b8e80941Smrg   } else {
2118b8e80941Smrg      mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2119b8e80941Smrg   }
2120b8e80941Smrg   dest = retype(dest, BRW_REGISTER_TYPE_UW);
2121b8e80941Smrg
2122b8e80941Smrg   const unsigned rlen = num_regs;
2123b8e80941Smrg   const unsigned target_cache =
2124b8e80941Smrg      (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
2125b8e80941Smrg       devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
2126b8e80941Smrg       BRW_SFID_DATAPORT_READ);
2127b8e80941Smrg
2128b8e80941Smrg   {
2129b8e80941Smrg      brw_push_insn_state(p);
2130b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_8);
2131b8e80941Smrg      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2132b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2133b8e80941Smrg
2134b8e80941Smrg      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2135b8e80941Smrg
2136b8e80941Smrg      /* set message header global offset field (reg 0, element 2) */
2137b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2138b8e80941Smrg      brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
2139b8e80941Smrg
2140b8e80941Smrg      brw_pop_insn_state(p);
2141b8e80941Smrg   }
2142b8e80941Smrg
2143b8e80941Smrg   {
2144b8e80941Smrg      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2145b8e80941Smrg
2146b8e80941Smrg      brw_inst_set_sfid(devinfo, insn, target_cache);
2147b8e80941Smrg      assert(brw_inst_pred_control(devinfo, insn) == 0);
2148b8e80941Smrg      brw_inst_set_compression(devinfo, insn, false);
2149b8e80941Smrg
2150b8e80941Smrg      brw_set_dest(p, insn, dest);	/* UW? */
2151b8e80941Smrg      if (devinfo->gen >= 6) {
2152b8e80941Smrg	 brw_set_src0(p, insn, mrf);
2153b8e80941Smrg      } else {
2154b8e80941Smrg	 brw_set_src0(p, insn, brw_null_reg());
2155b8e80941Smrg         brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2156b8e80941Smrg      }
2157b8e80941Smrg
2158b8e80941Smrg      brw_set_desc(p, insn,
2159b8e80941Smrg                   brw_message_desc(devinfo, 1, rlen, true) |
2160b8e80941Smrg                   brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p),
2161b8e80941Smrg                                    BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
2162b8e80941Smrg                                    BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2163b8e80941Smrg                                    BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
2164b8e80941Smrg   }
2165b8e80941Smrg}
2166b8e80941Smrg
2167b8e80941Smrgvoid
2168b8e80941Smrggen7_block_read_scratch(struct brw_codegen *p,
2169b8e80941Smrg                        struct brw_reg dest,
2170b8e80941Smrg                        int num_regs,
2171b8e80941Smrg                        unsigned offset)
2172b8e80941Smrg{
2173b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2174b8e80941Smrg   assert(brw_inst_pred_control(p->devinfo, insn) == BRW_PREDICATE_NONE);
2175b8e80941Smrg
2176b8e80941Smrg   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
2177b8e80941Smrg
2178b8e80941Smrg   /* The HW requires that the header is present; this is to get the g0.5
2179b8e80941Smrg    * scratch offset.
2180b8e80941Smrg    */
2181b8e80941Smrg   brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2182b8e80941Smrg
2183b8e80941Smrg   /* According to the docs, offset is "A 12-bit HWord offset into the memory
2184b8e80941Smrg    * Immediate Memory buffer as specified by binding table 0xFF."  An HWORD
2185b8e80941Smrg    * is 32 bytes, which happens to be the size of a register.
2186b8e80941Smrg    */
2187b8e80941Smrg   offset /= REG_SIZE;
2188b8e80941Smrg   assert(offset < (1 << 12));
2189b8e80941Smrg
2190b8e80941Smrg   gen7_set_dp_scratch_message(p, insn,
2191b8e80941Smrg                               false, /* scratch read */
2192b8e80941Smrg                               false, /* OWords */
2193b8e80941Smrg                               false, /* invalidate after read */
2194b8e80941Smrg                               num_regs,
2195b8e80941Smrg                               offset,
2196b8e80941Smrg                               1,        /* mlen: just g0 */
2197b8e80941Smrg                               num_regs, /* rlen */
2198b8e80941Smrg                               true);    /* header present */
2199b8e80941Smrg}
2200b8e80941Smrg
2201b8e80941Smrg/**
2202b8e80941Smrg * Read float[4] vectors from the data port constant cache.
2203b8e80941Smrg * Location (in buffer) should be a multiple of 16.
2204b8e80941Smrg * Used for fetching shader constants.
2205b8e80941Smrg */
2206b8e80941Smrgvoid brw_oword_block_read(struct brw_codegen *p,
2207b8e80941Smrg			  struct brw_reg dest,
2208b8e80941Smrg			  struct brw_reg mrf,
2209b8e80941Smrg			  uint32_t offset,
2210b8e80941Smrg			  uint32_t bind_table_index)
2211b8e80941Smrg{
2212b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2213b8e80941Smrg   const unsigned target_cache =
2214b8e80941Smrg      (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
2215b8e80941Smrg       BRW_SFID_DATAPORT_READ);
2216b8e80941Smrg   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
2217b8e80941Smrg
2218b8e80941Smrg   /* On newer hardware, offset is in units of owords. */
2219b8e80941Smrg   if (devinfo->gen >= 6)
2220b8e80941Smrg      offset /= 16;
2221b8e80941Smrg
2222b8e80941Smrg   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2223b8e80941Smrg
2224b8e80941Smrg   brw_push_insn_state(p);
2225b8e80941Smrg   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2226b8e80941Smrg   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2227b8e80941Smrg   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2228b8e80941Smrg
2229b8e80941Smrg   brw_push_insn_state(p);
2230b8e80941Smrg   brw_set_default_exec_size(p, BRW_EXECUTE_8);
2231b8e80941Smrg   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2232b8e80941Smrg
2233b8e80941Smrg   /* set message header global offset field (reg 0, element 2) */
2234b8e80941Smrg   brw_set_default_exec_size(p, BRW_EXECUTE_1);
2235b8e80941Smrg   brw_MOV(p,
2236b8e80941Smrg	   retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2237b8e80941Smrg			       mrf.nr,
2238b8e80941Smrg			       2), BRW_REGISTER_TYPE_UD),
2239b8e80941Smrg	   brw_imm_ud(offset));
2240b8e80941Smrg   brw_pop_insn_state(p);
2241b8e80941Smrg
2242b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2243b8e80941Smrg
2244b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, target_cache);
2245b8e80941Smrg
2246b8e80941Smrg   /* cast dest to a uword[8] vector */
2247b8e80941Smrg   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2248b8e80941Smrg
2249b8e80941Smrg   brw_set_dest(p, insn, dest);
2250b8e80941Smrg   if (devinfo->gen >= 6) {
2251b8e80941Smrg      brw_set_src0(p, insn, mrf);
2252b8e80941Smrg   } else {
2253b8e80941Smrg      brw_set_src0(p, insn, brw_null_reg());
2254b8e80941Smrg      brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2255b8e80941Smrg   }
2256b8e80941Smrg
2257b8e80941Smrg   brw_set_desc(p, insn,
2258b8e80941Smrg                brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
2259b8e80941Smrg                brw_dp_read_desc(devinfo, bind_table_index,
2260b8e80941Smrg                                 BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
2261b8e80941Smrg                                 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2262b8e80941Smrg                                 BRW_DATAPORT_READ_TARGET_DATA_CACHE));
2263b8e80941Smrg
2264b8e80941Smrg   brw_pop_insn_state(p);
2265b8e80941Smrg}
2266b8e80941Smrg
2267b8e80941Smrgbrw_inst *
2268b8e80941Smrgbrw_fb_WRITE(struct brw_codegen *p,
2269b8e80941Smrg             struct brw_reg payload,
2270b8e80941Smrg             struct brw_reg implied_header,
2271b8e80941Smrg             unsigned msg_control,
2272b8e80941Smrg             unsigned binding_table_index,
2273b8e80941Smrg             unsigned msg_length,
2274b8e80941Smrg             unsigned response_length,
2275b8e80941Smrg             bool eot,
2276b8e80941Smrg             bool last_render_target,
2277b8e80941Smrg             bool header_present)
2278b8e80941Smrg{
2279b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2280b8e80941Smrg   const unsigned target_cache =
2281b8e80941Smrg      (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
2282b8e80941Smrg       BRW_SFID_DATAPORT_WRITE);
2283b8e80941Smrg   brw_inst *insn;
2284b8e80941Smrg   unsigned msg_type;
2285b8e80941Smrg   struct brw_reg dest, src0;
2286b8e80941Smrg
2287b8e80941Smrg   if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16)
2288b8e80941Smrg      dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2289b8e80941Smrg   else
2290b8e80941Smrg      dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2291b8e80941Smrg
2292b8e80941Smrg   if (devinfo->gen >= 6) {
2293b8e80941Smrg      insn = next_insn(p, BRW_OPCODE_SENDC);
2294b8e80941Smrg   } else {
2295b8e80941Smrg      insn = next_insn(p, BRW_OPCODE_SEND);
2296b8e80941Smrg   }
2297b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, target_cache);
2298b8e80941Smrg   brw_inst_set_compression(devinfo, insn, false);
2299b8e80941Smrg
2300b8e80941Smrg   if (devinfo->gen >= 6) {
2301b8e80941Smrg      /* headerless version, just submit color payload */
2302b8e80941Smrg      src0 = payload;
2303b8e80941Smrg
2304b8e80941Smrg      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2305b8e80941Smrg   } else {
2306b8e80941Smrg      assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
2307b8e80941Smrg      brw_inst_set_base_mrf(devinfo, insn, payload.nr);
2308b8e80941Smrg      src0 = implied_header;
2309b8e80941Smrg
2310b8e80941Smrg      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2311b8e80941Smrg   }
2312b8e80941Smrg
2313b8e80941Smrg   brw_set_dest(p, insn, dest);
2314b8e80941Smrg   brw_set_src0(p, insn, src0);
2315b8e80941Smrg   brw_set_desc(p, insn,
2316b8e80941Smrg                brw_message_desc(devinfo, msg_length, response_length,
2317b8e80941Smrg                                 header_present) |
2318b8e80941Smrg                brw_dp_write_desc(devinfo, binding_table_index, msg_control,
2319b8e80941Smrg                                  msg_type, last_render_target,
2320b8e80941Smrg                                  0 /* send_commit_msg */));
2321b8e80941Smrg   brw_inst_set_eot(devinfo, insn, eot);
2322b8e80941Smrg
2323b8e80941Smrg   return insn;
2324b8e80941Smrg}
2325b8e80941Smrg
2326b8e80941Smrgbrw_inst *
2327b8e80941Smrggen9_fb_READ(struct brw_codegen *p,
2328b8e80941Smrg             struct brw_reg dst,
2329b8e80941Smrg             struct brw_reg payload,
2330b8e80941Smrg             unsigned binding_table_index,
2331b8e80941Smrg             unsigned msg_length,
2332b8e80941Smrg             unsigned response_length,
2333b8e80941Smrg             bool per_sample)
2334b8e80941Smrg{
2335b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2336b8e80941Smrg   assert(devinfo->gen >= 9);
2337b8e80941Smrg   const unsigned msg_subtype =
2338b8e80941Smrg      brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1;
2339b8e80941Smrg   brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
2340b8e80941Smrg
2341b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, GEN6_SFID_DATAPORT_RENDER_CACHE);
2342b8e80941Smrg   brw_set_dest(p, insn, dst);
2343b8e80941Smrg   brw_set_src0(p, insn, payload);
2344b8e80941Smrg   brw_set_desc(
2345b8e80941Smrg      p, insn,
2346b8e80941Smrg      brw_message_desc(devinfo, msg_length, response_length, true) |
2347b8e80941Smrg      brw_dp_read_desc(devinfo, binding_table_index,
2348b8e80941Smrg                       per_sample << 5 | msg_subtype,
2349b8e80941Smrg                       GEN9_DATAPORT_RC_RENDER_TARGET_READ,
2350b8e80941Smrg                       BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
2351b8e80941Smrg   brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
2352b8e80941Smrg
2353b8e80941Smrg   return insn;
2354b8e80941Smrg}
2355b8e80941Smrg
2356b8e80941Smrg/**
2357b8e80941Smrg * Texture sample instruction.
2358b8e80941Smrg * Note: the msg_type plus msg_length values determine exactly what kind
2359b8e80941Smrg * of sampling operation is performed.  See volume 4, page 161 of docs.
2360b8e80941Smrg */
2361b8e80941Smrgvoid brw_SAMPLE(struct brw_codegen *p,
2362b8e80941Smrg		struct brw_reg dest,
2363b8e80941Smrg		unsigned msg_reg_nr,
2364b8e80941Smrg		struct brw_reg src0,
2365b8e80941Smrg		unsigned binding_table_index,
2366b8e80941Smrg		unsigned sampler,
2367b8e80941Smrg		unsigned msg_type,
2368b8e80941Smrg		unsigned response_length,
2369b8e80941Smrg		unsigned msg_length,
2370b8e80941Smrg		unsigned header_present,
2371b8e80941Smrg		unsigned simd_mode,
2372b8e80941Smrg		unsigned return_format)
2373b8e80941Smrg{
2374b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2375b8e80941Smrg   brw_inst *insn;
2376b8e80941Smrg
2377b8e80941Smrg   if (msg_reg_nr != -1)
2378b8e80941Smrg      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2379b8e80941Smrg
2380b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_SEND);
2381b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
2382b8e80941Smrg   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */
2383b8e80941Smrg
2384b8e80941Smrg   /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2385b8e80941Smrg    *
2386b8e80941Smrg    *    "Instruction compression is not allowed for this instruction (that
2387b8e80941Smrg    *     is, send). The hardware behavior is undefined if this instruction is
2388b8e80941Smrg    *     set as compressed. However, compress control can be set to "SecHalf"
2389b8e80941Smrg    *     to affect the EMask generation."
2390b8e80941Smrg    *
2391b8e80941Smrg    * No similar wording is found in later PRMs, but there are examples
2392b8e80941Smrg    * utilizing send with SecHalf.  More importantly, SIMD8 sampler messages
2393b8e80941Smrg    * are allowed in SIMD16 mode and they could not work without SecHalf.  For
2394b8e80941Smrg    * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2395b8e80941Smrg    */
2396b8e80941Smrg   brw_inst_set_compression(devinfo, insn, false);
2397b8e80941Smrg
2398b8e80941Smrg   if (devinfo->gen < 6)
2399b8e80941Smrg      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2400b8e80941Smrg
2401b8e80941Smrg   brw_set_dest(p, insn, dest);
2402b8e80941Smrg   brw_set_src0(p, insn, src0);
2403b8e80941Smrg   brw_set_desc(p, insn,
2404b8e80941Smrg                brw_message_desc(devinfo, msg_length, response_length,
2405b8e80941Smrg                                 header_present) |
2406b8e80941Smrg                brw_sampler_desc(devinfo, binding_table_index, sampler,
2407b8e80941Smrg                                 msg_type, simd_mode, return_format));
2408b8e80941Smrg}
2409b8e80941Smrg
2410b8e80941Smrg/* Adjust the message header's sampler state pointer to
2411b8e80941Smrg * select the correct group of 16 samplers.
2412b8e80941Smrg */
2413b8e80941Smrgvoid brw_adjust_sampler_state_pointer(struct brw_codegen *p,
2414b8e80941Smrg                                      struct brw_reg header,
2415b8e80941Smrg                                      struct brw_reg sampler_index)
2416b8e80941Smrg{
2417b8e80941Smrg   /* The "Sampler Index" field can only store values between 0 and 15.
2418b8e80941Smrg    * However, we can add an offset to the "Sampler State Pointer"
2419b8e80941Smrg    * field, effectively selecting a different set of 16 samplers.
2420b8e80941Smrg    *
2421b8e80941Smrg    * The "Sampler State Pointer" needs to be aligned to a 32-byte
2422b8e80941Smrg    * offset, and each sampler state is only 16-bytes, so we can't
2423b8e80941Smrg    * exclusively use the offset - we have to use both.
2424b8e80941Smrg    */
2425b8e80941Smrg
2426b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2427b8e80941Smrg
2428b8e80941Smrg   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
2429b8e80941Smrg      const int sampler_state_size = 16; /* 16 bytes */
2430b8e80941Smrg      uint32_t sampler = sampler_index.ud;
2431b8e80941Smrg
2432b8e80941Smrg      if (sampler >= 16) {
2433b8e80941Smrg         assert(devinfo->is_haswell || devinfo->gen >= 8);
2434b8e80941Smrg         brw_ADD(p,
2435b8e80941Smrg                 get_element_ud(header, 3),
2436b8e80941Smrg                 get_element_ud(brw_vec8_grf(0, 0), 3),
2437b8e80941Smrg                 brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
2438b8e80941Smrg      }
2439b8e80941Smrg   } else {
2440b8e80941Smrg      /* Non-const sampler array indexing case */
2441b8e80941Smrg      if (devinfo->gen < 8 && !devinfo->is_haswell) {
2442b8e80941Smrg         return;
2443b8e80941Smrg      }
2444b8e80941Smrg
2445b8e80941Smrg      struct brw_reg temp = get_element_ud(header, 3);
2446b8e80941Smrg
2447b8e80941Smrg      brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
2448b8e80941Smrg      brw_SHL(p, temp, temp, brw_imm_ud(4));
2449b8e80941Smrg      brw_ADD(p,
2450b8e80941Smrg              get_element_ud(header, 3),
2451b8e80941Smrg              get_element_ud(brw_vec8_grf(0, 0), 3),
2452b8e80941Smrg              temp);
2453b8e80941Smrg   }
2454b8e80941Smrg}
2455b8e80941Smrg
2456b8e80941Smrg/* All these variables are pretty confusing - we might be better off
2457b8e80941Smrg * using bitmasks and macros for this, in the old style.  Or perhaps
2458b8e80941Smrg * just having the caller instantiate the fields in dword3 itself.
2459b8e80941Smrg */
2460b8e80941Smrgvoid brw_urb_WRITE(struct brw_codegen *p,
2461b8e80941Smrg		   struct brw_reg dest,
2462b8e80941Smrg		   unsigned msg_reg_nr,
2463b8e80941Smrg		   struct brw_reg src0,
2464b8e80941Smrg                   enum brw_urb_write_flags flags,
2465b8e80941Smrg		   unsigned msg_length,
2466b8e80941Smrg		   unsigned response_length,
2467b8e80941Smrg		   unsigned offset,
2468b8e80941Smrg		   unsigned swizzle)
2469b8e80941Smrg{
2470b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2471b8e80941Smrg   brw_inst *insn;
2472b8e80941Smrg
2473b8e80941Smrg   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2474b8e80941Smrg
2475b8e80941Smrg   if (devinfo->gen >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2476b8e80941Smrg      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2477b8e80941Smrg      brw_push_insn_state(p);
2478b8e80941Smrg      brw_set_default_access_mode(p, BRW_ALIGN_1);
2479b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2480b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2481b8e80941Smrg      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2482b8e80941Smrg		       BRW_REGISTER_TYPE_UD),
2483b8e80941Smrg	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2484b8e80941Smrg		brw_imm_ud(0xff00));
2485b8e80941Smrg      brw_pop_insn_state(p);
2486b8e80941Smrg   }
2487b8e80941Smrg
2488b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_SEND);
2489b8e80941Smrg
2490b8e80941Smrg   assert(msg_length < BRW_MAX_MRF(devinfo->gen));
2491b8e80941Smrg
2492b8e80941Smrg   brw_set_dest(p, insn, dest);
2493b8e80941Smrg   brw_set_src0(p, insn, src0);
2494b8e80941Smrg   brw_set_src1(p, insn, brw_imm_d(0));
2495b8e80941Smrg
2496b8e80941Smrg   if (devinfo->gen < 6)
2497b8e80941Smrg      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2498b8e80941Smrg
2499b8e80941Smrg   brw_set_urb_message(p,
2500b8e80941Smrg		       insn,
2501b8e80941Smrg		       flags,
2502b8e80941Smrg		       msg_length,
2503b8e80941Smrg		       response_length,
2504b8e80941Smrg		       offset,
2505b8e80941Smrg		       swizzle);
2506b8e80941Smrg}
2507b8e80941Smrg
2508b8e80941Smrgvoid
2509b8e80941Smrgbrw_send_indirect_message(struct brw_codegen *p,
2510b8e80941Smrg                          unsigned sfid,
2511b8e80941Smrg                          struct brw_reg dst,
2512b8e80941Smrg                          struct brw_reg payload,
2513b8e80941Smrg                          struct brw_reg desc,
2514b8e80941Smrg                          unsigned desc_imm,
2515b8e80941Smrg                          bool eot)
2516b8e80941Smrg{
2517b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2518b8e80941Smrg   struct brw_inst *send;
2519b8e80941Smrg
2520b8e80941Smrg   dst = retype(dst, BRW_REGISTER_TYPE_UW);
2521b8e80941Smrg
2522b8e80941Smrg   assert(desc.type == BRW_REGISTER_TYPE_UD);
2523b8e80941Smrg
2524b8e80941Smrg   if (desc.file == BRW_IMMEDIATE_VALUE) {
2525b8e80941Smrg      send = next_insn(p, BRW_OPCODE_SEND);
2526b8e80941Smrg      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
2527b8e80941Smrg      brw_set_desc(p, send, desc.ud | desc_imm);
2528b8e80941Smrg   } else {
2529b8e80941Smrg      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2530b8e80941Smrg
2531b8e80941Smrg      brw_push_insn_state(p);
2532b8e80941Smrg      brw_set_default_access_mode(p, BRW_ALIGN_1);
2533b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2534b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2535b8e80941Smrg      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2536b8e80941Smrg
2537b8e80941Smrg      /* Load the indirect descriptor to an address register using OR so the
2538b8e80941Smrg       * caller can specify additional descriptor bits with the desc_imm
2539b8e80941Smrg       * immediate.
2540b8e80941Smrg       */
2541b8e80941Smrg      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
2542b8e80941Smrg
2543b8e80941Smrg      brw_pop_insn_state(p);
2544b8e80941Smrg
2545b8e80941Smrg      send = next_insn(p, BRW_OPCODE_SEND);
2546b8e80941Smrg      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
2547b8e80941Smrg      brw_set_src1(p, send, addr);
2548b8e80941Smrg   }
2549b8e80941Smrg
2550b8e80941Smrg   brw_set_dest(p, send, dst);
2551b8e80941Smrg   brw_inst_set_sfid(devinfo, send, sfid);
2552b8e80941Smrg   brw_inst_set_eot(devinfo, send, eot);
2553b8e80941Smrg}
2554b8e80941Smrg
2555b8e80941Smrgvoid
2556b8e80941Smrgbrw_send_indirect_split_message(struct brw_codegen *p,
2557b8e80941Smrg                                unsigned sfid,
2558b8e80941Smrg                                struct brw_reg dst,
2559b8e80941Smrg                                struct brw_reg payload0,
2560b8e80941Smrg                                struct brw_reg payload1,
2561b8e80941Smrg                                struct brw_reg desc,
2562b8e80941Smrg                                unsigned desc_imm,
2563b8e80941Smrg                                struct brw_reg ex_desc,
2564b8e80941Smrg                                unsigned ex_desc_imm,
2565b8e80941Smrg                                bool eot)
2566b8e80941Smrg{
2567b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2568b8e80941Smrg   struct brw_inst *send;
2569b8e80941Smrg
2570b8e80941Smrg   dst = retype(dst, BRW_REGISTER_TYPE_UW);
2571b8e80941Smrg
2572b8e80941Smrg   assert(desc.type == BRW_REGISTER_TYPE_UD);
2573b8e80941Smrg
2574b8e80941Smrg   if (desc.file == BRW_IMMEDIATE_VALUE) {
2575b8e80941Smrg      desc.ud |= desc_imm;
2576b8e80941Smrg   } else {
2577b8e80941Smrg      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2578b8e80941Smrg
2579b8e80941Smrg      brw_push_insn_state(p);
2580b8e80941Smrg      brw_set_default_access_mode(p, BRW_ALIGN_1);
2581b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2582b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2583b8e80941Smrg      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2584b8e80941Smrg
2585b8e80941Smrg      /* Load the indirect descriptor to an address register using OR so the
2586b8e80941Smrg       * caller can specify additional descriptor bits with the desc_imm
2587b8e80941Smrg       * immediate.
2588b8e80941Smrg       */
2589b8e80941Smrg      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
2590b8e80941Smrg
2591b8e80941Smrg      brw_pop_insn_state(p);
2592b8e80941Smrg      desc = addr;
2593b8e80941Smrg   }
2594b8e80941Smrg
2595b8e80941Smrg   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
2596b8e80941Smrg      ex_desc.ud |= ex_desc_imm;
2597b8e80941Smrg   } else {
2598b8e80941Smrg      struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
2599b8e80941Smrg
2600b8e80941Smrg      brw_push_insn_state(p);
2601b8e80941Smrg      brw_set_default_access_mode(p, BRW_ALIGN_1);
2602b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2603b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2604b8e80941Smrg      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2605b8e80941Smrg
2606b8e80941Smrg      /* Load the indirect extended descriptor to an address register using OR
2607b8e80941Smrg       * so the caller can specify additional descriptor bits with the
2608b8e80941Smrg       * desc_imm immediate.
2609b8e80941Smrg       *
2610b8e80941Smrg       * Even though the instruction dispatcher always pulls the SFID and EOT
2611b8e80941Smrg       * fields from the instruction itself, actual external unit which
2612b8e80941Smrg       * processes the message gets the SFID and EOT from the extended
2613b8e80941Smrg       * descriptor which comes from the address register.  If we don't OR
2614b8e80941Smrg       * those two bits in, the external unit may get confused and hang.
2615b8e80941Smrg       */
2616b8e80941Smrg      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid | eot << 5));
2617b8e80941Smrg
2618b8e80941Smrg      brw_pop_insn_state(p);
2619b8e80941Smrg      ex_desc = addr;
2620b8e80941Smrg   }
2621b8e80941Smrg
2622b8e80941Smrg   send = next_insn(p, BRW_OPCODE_SENDS);
2623b8e80941Smrg   brw_set_dest(p, send, dst);
2624b8e80941Smrg   brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
2625b8e80941Smrg   brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
2626b8e80941Smrg
2627b8e80941Smrg   if (desc.file == BRW_IMMEDIATE_VALUE) {
2628b8e80941Smrg      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
2629b8e80941Smrg      brw_inst_set_send_desc(devinfo, send, desc.ud);
2630b8e80941Smrg   } else {
2631b8e80941Smrg      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
2632b8e80941Smrg      assert(desc.nr == BRW_ARF_ADDRESS);
2633b8e80941Smrg      assert(desc.subnr == 0);
2634b8e80941Smrg      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
2635b8e80941Smrg   }
2636b8e80941Smrg
2637b8e80941Smrg   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
2638b8e80941Smrg      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
2639b8e80941Smrg      brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud);
2640b8e80941Smrg   } else {
2641b8e80941Smrg      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
2642b8e80941Smrg      assert(ex_desc.nr == BRW_ARF_ADDRESS);
2643b8e80941Smrg      assert((ex_desc.subnr & 0x3) == 0);
2644b8e80941Smrg      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
2645b8e80941Smrg      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
2646b8e80941Smrg   }
2647b8e80941Smrg
2648b8e80941Smrg   brw_inst_set_sfid(devinfo, send, sfid);
2649b8e80941Smrg   brw_inst_set_eot(devinfo, send, eot);
2650b8e80941Smrg}
2651b8e80941Smrg
2652b8e80941Smrgstatic void
2653b8e80941Smrgbrw_send_indirect_surface_message(struct brw_codegen *p,
2654b8e80941Smrg                                  unsigned sfid,
2655b8e80941Smrg                                  struct brw_reg dst,
2656b8e80941Smrg                                  struct brw_reg payload,
2657b8e80941Smrg                                  struct brw_reg surface,
2658b8e80941Smrg                                  unsigned desc_imm)
2659b8e80941Smrg{
2660b8e80941Smrg   if (surface.file != BRW_IMMEDIATE_VALUE) {
2661b8e80941Smrg      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2662b8e80941Smrg
2663b8e80941Smrg      brw_push_insn_state(p);
2664b8e80941Smrg      brw_set_default_access_mode(p, BRW_ALIGN_1);
2665b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2666b8e80941Smrg      brw_set_default_exec_size(p, BRW_EXECUTE_1);
2667b8e80941Smrg      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2668b8e80941Smrg
2669b8e80941Smrg      /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2670b8e80941Smrg       * some surface array is accessed out of bounds.
2671b8e80941Smrg       */
2672b8e80941Smrg      brw_AND(p, addr,
2673b8e80941Smrg              suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
2674b8e80941Smrg                        BRW_GET_SWZ(surface.swizzle, 0)),
2675b8e80941Smrg              brw_imm_ud(0xff));
2676b8e80941Smrg
2677b8e80941Smrg      brw_pop_insn_state(p);
2678b8e80941Smrg
2679b8e80941Smrg      surface = addr;
2680b8e80941Smrg   }
2681b8e80941Smrg
2682b8e80941Smrg   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
2683b8e80941Smrg}
2684b8e80941Smrg
2685b8e80941Smrgstatic bool
2686b8e80941Smrgwhile_jumps_before_offset(const struct gen_device_info *devinfo,
2687b8e80941Smrg                          brw_inst *insn, int while_offset, int start_offset)
2688b8e80941Smrg{
2689b8e80941Smrg   int scale = 16 / brw_jump_scale(devinfo);
2690b8e80941Smrg   int jip = devinfo->gen == 6 ? brw_inst_gen6_jump_count(devinfo, insn)
2691b8e80941Smrg                               : brw_inst_jip(devinfo, insn);
2692b8e80941Smrg   assert(jip < 0);
2693b8e80941Smrg   return while_offset + jip * scale <= start_offset;
2694b8e80941Smrg}
2695b8e80941Smrg
2696b8e80941Smrg
2697b8e80941Smrgstatic int
2698b8e80941Smrgbrw_find_next_block_end(struct brw_codegen *p, int start_offset)
2699b8e80941Smrg{
2700b8e80941Smrg   int offset;
2701b8e80941Smrg   void *store = p->store;
2702b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2703b8e80941Smrg
2704b8e80941Smrg   int depth = 0;
2705b8e80941Smrg
2706b8e80941Smrg   for (offset = next_offset(devinfo, store, start_offset);
2707b8e80941Smrg        offset < p->next_insn_offset;
2708b8e80941Smrg        offset = next_offset(devinfo, store, offset)) {
2709b8e80941Smrg      brw_inst *insn = store + offset;
2710b8e80941Smrg
2711b8e80941Smrg      switch (brw_inst_opcode(devinfo, insn)) {
2712b8e80941Smrg      case BRW_OPCODE_IF:
2713b8e80941Smrg         depth++;
2714b8e80941Smrg         break;
2715b8e80941Smrg      case BRW_OPCODE_ENDIF:
2716b8e80941Smrg         if (depth == 0)
2717b8e80941Smrg            return offset;
2718b8e80941Smrg         depth--;
2719b8e80941Smrg         break;
2720b8e80941Smrg      case BRW_OPCODE_WHILE:
2721b8e80941Smrg         /* If the while doesn't jump before our instruction, it's the end
2722b8e80941Smrg          * of a sibling do...while loop.  Ignore it.
2723b8e80941Smrg          */
2724b8e80941Smrg         if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
2725b8e80941Smrg            continue;
2726b8e80941Smrg         /* fallthrough */
2727b8e80941Smrg      case BRW_OPCODE_ELSE:
2728b8e80941Smrg      case BRW_OPCODE_HALT:
2729b8e80941Smrg         if (depth == 0)
2730b8e80941Smrg            return offset;
2731b8e80941Smrg      }
2732b8e80941Smrg   }
2733b8e80941Smrg
2734b8e80941Smrg   return 0;
2735b8e80941Smrg}
2736b8e80941Smrg
2737b8e80941Smrg/* There is no DO instruction on gen6, so to find the end of the loop
2738b8e80941Smrg * we have to see if the loop is jumping back before our start
2739b8e80941Smrg * instruction.
2740b8e80941Smrg */
2741b8e80941Smrgstatic int
2742b8e80941Smrgbrw_find_loop_end(struct brw_codegen *p, int start_offset)
2743b8e80941Smrg{
2744b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2745b8e80941Smrg   int offset;
2746b8e80941Smrg   void *store = p->store;
2747b8e80941Smrg
2748b8e80941Smrg   assert(devinfo->gen >= 6);
2749b8e80941Smrg
2750b8e80941Smrg   /* Always start after the instruction (such as a WHILE) we're trying to fix
2751b8e80941Smrg    * up.
2752b8e80941Smrg    */
2753b8e80941Smrg   for (offset = next_offset(devinfo, store, start_offset);
2754b8e80941Smrg        offset < p->next_insn_offset;
2755b8e80941Smrg        offset = next_offset(devinfo, store, offset)) {
2756b8e80941Smrg      brw_inst *insn = store + offset;
2757b8e80941Smrg
2758b8e80941Smrg      if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE) {
2759b8e80941Smrg	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
2760b8e80941Smrg	    return offset;
2761b8e80941Smrg      }
2762b8e80941Smrg   }
2763b8e80941Smrg   assert(!"not reached");
2764b8e80941Smrg   return start_offset;
2765b8e80941Smrg}
2766b8e80941Smrg
2767b8e80941Smrg/* After program generation, go back and update the UIP and JIP of
2768b8e80941Smrg * BREAK, CONT, and HALT instructions to their correct locations.
2769b8e80941Smrg */
2770b8e80941Smrgvoid
2771b8e80941Smrgbrw_set_uip_jip(struct brw_codegen *p, int start_offset)
2772b8e80941Smrg{
2773b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2774b8e80941Smrg   int offset;
2775b8e80941Smrg   int br = brw_jump_scale(devinfo);
2776b8e80941Smrg   int scale = 16 / br;
2777b8e80941Smrg   void *store = p->store;
2778b8e80941Smrg
2779b8e80941Smrg   if (devinfo->gen < 6)
2780b8e80941Smrg      return;
2781b8e80941Smrg
2782b8e80941Smrg   for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
2783b8e80941Smrg      brw_inst *insn = store + offset;
2784b8e80941Smrg      assert(brw_inst_cmpt_control(devinfo, insn) == 0);
2785b8e80941Smrg
2786b8e80941Smrg      int block_end_offset = brw_find_next_block_end(p, offset);
2787b8e80941Smrg      switch (brw_inst_opcode(devinfo, insn)) {
2788b8e80941Smrg      case BRW_OPCODE_BREAK:
2789b8e80941Smrg         assert(block_end_offset != 0);
2790b8e80941Smrg         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
2791b8e80941Smrg	 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2792b8e80941Smrg         brw_inst_set_uip(devinfo, insn,
2793b8e80941Smrg	    (brw_find_loop_end(p, offset) - offset +
2794b8e80941Smrg             (devinfo->gen == 6 ? 16 : 0)) / scale);
2795b8e80941Smrg	 break;
2796b8e80941Smrg      case BRW_OPCODE_CONTINUE:
2797b8e80941Smrg         assert(block_end_offset != 0);
2798b8e80941Smrg         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
2799b8e80941Smrg         brw_inst_set_uip(devinfo, insn,
2800b8e80941Smrg            (brw_find_loop_end(p, offset) - offset) / scale);
2801b8e80941Smrg
2802b8e80941Smrg         assert(brw_inst_uip(devinfo, insn) != 0);
2803b8e80941Smrg         assert(brw_inst_jip(devinfo, insn) != 0);
2804b8e80941Smrg	 break;
2805b8e80941Smrg
2806b8e80941Smrg      case BRW_OPCODE_ENDIF: {
2807b8e80941Smrg         int32_t jump = (block_end_offset == 0) ?
2808b8e80941Smrg                        1 * br : (block_end_offset - offset) / scale;
2809b8e80941Smrg         if (devinfo->gen >= 7)
2810b8e80941Smrg            brw_inst_set_jip(devinfo, insn, jump);
2811b8e80941Smrg         else
2812b8e80941Smrg            brw_inst_set_gen6_jump_count(devinfo, insn, jump);
2813b8e80941Smrg	 break;
2814b8e80941Smrg      }
2815b8e80941Smrg
2816b8e80941Smrg      case BRW_OPCODE_HALT:
2817b8e80941Smrg	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2818b8e80941Smrg	  *
2819b8e80941Smrg	  *    "In case of the halt instruction not inside any conditional
2820b8e80941Smrg	  *     code block, the value of <JIP> and <UIP> should be the
2821b8e80941Smrg	  *     same. In case of the halt instruction inside conditional code
2822b8e80941Smrg	  *     block, the <UIP> should be the end of the program, and the
2823b8e80941Smrg	  *     <JIP> should be end of the most inner conditional code block."
2824b8e80941Smrg	  *
2825b8e80941Smrg	  * The uip will have already been set by whoever set up the
2826b8e80941Smrg	  * instruction.
2827b8e80941Smrg	  */
2828b8e80941Smrg	 if (block_end_offset == 0) {
2829b8e80941Smrg            brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
2830b8e80941Smrg	 } else {
2831b8e80941Smrg            brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
2832b8e80941Smrg	 }
2833b8e80941Smrg         assert(brw_inst_uip(devinfo, insn) != 0);
2834b8e80941Smrg         assert(brw_inst_jip(devinfo, insn) != 0);
2835b8e80941Smrg	 break;
2836b8e80941Smrg      }
2837b8e80941Smrg   }
2838b8e80941Smrg}
2839b8e80941Smrg
2840b8e80941Smrgvoid brw_ff_sync(struct brw_codegen *p,
2841b8e80941Smrg		   struct brw_reg dest,
2842b8e80941Smrg		   unsigned msg_reg_nr,
2843b8e80941Smrg		   struct brw_reg src0,
2844b8e80941Smrg		   bool allocate,
2845b8e80941Smrg		   unsigned response_length,
2846b8e80941Smrg		   bool eot)
2847b8e80941Smrg{
2848b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2849b8e80941Smrg   brw_inst *insn;
2850b8e80941Smrg
2851b8e80941Smrg   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2852b8e80941Smrg
2853b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_SEND);
2854b8e80941Smrg   brw_set_dest(p, insn, dest);
2855b8e80941Smrg   brw_set_src0(p, insn, src0);
2856b8e80941Smrg   brw_set_src1(p, insn, brw_imm_d(0));
2857b8e80941Smrg
2858b8e80941Smrg   if (devinfo->gen < 6)
2859b8e80941Smrg      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2860b8e80941Smrg
2861b8e80941Smrg   brw_set_ff_sync_message(p,
2862b8e80941Smrg			   insn,
2863b8e80941Smrg			   allocate,
2864b8e80941Smrg			   response_length,
2865b8e80941Smrg			   eot);
2866b8e80941Smrg}
2867b8e80941Smrg
2868b8e80941Smrg/**
2869b8e80941Smrg * Emit the SEND instruction necessary to generate stream output data on Gen6
2870b8e80941Smrg * (for transform feedback).
2871b8e80941Smrg *
2872b8e80941Smrg * If send_commit_msg is true, this is the last piece of stream output data
2873b8e80941Smrg * from this thread, so send the data as a committed write.  According to the
2874b8e80941Smrg * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2875b8e80941Smrg *
2876b8e80941Smrg *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2877b8e80941Smrg *   writes are complete by sending the final write as a committed write."
2878b8e80941Smrg */
2879b8e80941Smrgvoid
2880b8e80941Smrgbrw_svb_write(struct brw_codegen *p,
2881b8e80941Smrg              struct brw_reg dest,
2882b8e80941Smrg              unsigned msg_reg_nr,
2883b8e80941Smrg              struct brw_reg src0,
2884b8e80941Smrg              unsigned binding_table_index,
2885b8e80941Smrg              bool   send_commit_msg)
2886b8e80941Smrg{
2887b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2888b8e80941Smrg   const unsigned target_cache =
2889b8e80941Smrg      (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
2890b8e80941Smrg       devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
2891b8e80941Smrg       BRW_SFID_DATAPORT_WRITE);
2892b8e80941Smrg   brw_inst *insn;
2893b8e80941Smrg
2894b8e80941Smrg   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2895b8e80941Smrg
2896b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_SEND);
2897b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, target_cache);
2898b8e80941Smrg   brw_set_dest(p, insn, dest);
2899b8e80941Smrg   brw_set_src0(p, insn, src0);
2900b8e80941Smrg   brw_set_desc(p, insn,
2901b8e80941Smrg                brw_message_desc(devinfo, 1, send_commit_msg, true) |
2902b8e80941Smrg                brw_dp_write_desc(devinfo, binding_table_index,
2903b8e80941Smrg                                  0, /* msg_control: ignored */
2904b8e80941Smrg                                  GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2905b8e80941Smrg                                  0, /* last_render_target: ignored */
2906b8e80941Smrg                                  send_commit_msg)); /* send_commit_msg */
2907b8e80941Smrg}
2908b8e80941Smrg
2909b8e80941Smrgstatic unsigned
2910b8e80941Smrgbrw_surface_payload_size(struct brw_codegen *p,
2911b8e80941Smrg                         unsigned num_channels,
2912b8e80941Smrg                         unsigned exec_size /**< 0 for SIMD4x2 */)
2913b8e80941Smrg{
2914b8e80941Smrg   if (exec_size == 0)
2915b8e80941Smrg      return 1; /* SIMD4x2 */
2916b8e80941Smrg   else if (exec_size <= 8)
2917b8e80941Smrg      return num_channels;
2918b8e80941Smrg   else
2919b8e80941Smrg      return 2 * num_channels;
2920b8e80941Smrg}
2921b8e80941Smrg
2922b8e80941Smrgvoid
2923b8e80941Smrgbrw_untyped_atomic(struct brw_codegen *p,
2924b8e80941Smrg                   struct brw_reg dst,
2925b8e80941Smrg                   struct brw_reg payload,
2926b8e80941Smrg                   struct brw_reg surface,
2927b8e80941Smrg                   unsigned atomic_op,
2928b8e80941Smrg                   unsigned msg_length,
2929b8e80941Smrg                   bool response_expected,
2930b8e80941Smrg                   bool header_present)
2931b8e80941Smrg{
2932b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2933b8e80941Smrg   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2934b8e80941Smrg                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2935b8e80941Smrg                          GEN7_SFID_DATAPORT_DATA_CACHE);
2936b8e80941Smrg   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
2937b8e80941Smrg   /* SIMD4x2 untyped atomic instructions only exist on HSW+ */
2938b8e80941Smrg   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
2939b8e80941Smrg   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
2940b8e80941Smrg                              has_simd4x2 ? 0 : 8;
2941b8e80941Smrg   const unsigned response_length =
2942b8e80941Smrg      brw_surface_payload_size(p, response_expected, exec_size);
2943b8e80941Smrg   const unsigned desc =
2944b8e80941Smrg      brw_message_desc(devinfo, msg_length, response_length, header_present) |
2945b8e80941Smrg      brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
2946b8e80941Smrg                                 response_expected);
2947b8e80941Smrg   /* Mask out unused components -- This is especially important in Align16
2948b8e80941Smrg    * mode on generations that don't have native support for SIMD4x2 atomics,
2949b8e80941Smrg    * because unused but enabled components will cause the dataport to perform
2950b8e80941Smrg    * additional atomic operations on the addresses that happen to be in the
2951b8e80941Smrg    * uninitialized Y, Z and W coordinates of the payload.
2952b8e80941Smrg    */
2953b8e80941Smrg   const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
2954b8e80941Smrg
2955b8e80941Smrg   brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
2956b8e80941Smrg                                     payload, surface, desc);
2957b8e80941Smrg}
2958b8e80941Smrg
2959b8e80941Smrgvoid
2960b8e80941Smrgbrw_untyped_surface_read(struct brw_codegen *p,
2961b8e80941Smrg                         struct brw_reg dst,
2962b8e80941Smrg                         struct brw_reg payload,
2963b8e80941Smrg                         struct brw_reg surface,
2964b8e80941Smrg                         unsigned msg_length,
2965b8e80941Smrg                         unsigned num_channels)
2966b8e80941Smrg{
2967b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2968b8e80941Smrg   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2969b8e80941Smrg                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2970b8e80941Smrg                          GEN7_SFID_DATAPORT_DATA_CACHE);
2971b8e80941Smrg   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
2972b8e80941Smrg   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
2973b8e80941Smrg   const unsigned response_length =
2974b8e80941Smrg      brw_surface_payload_size(p, num_channels, exec_size);
2975b8e80941Smrg   const unsigned desc =
2976b8e80941Smrg      brw_message_desc(devinfo, msg_length, response_length, false) |
2977b8e80941Smrg      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
2978b8e80941Smrg
2979b8e80941Smrg   brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
2980b8e80941Smrg}
2981b8e80941Smrg
2982b8e80941Smrgvoid
2983b8e80941Smrgbrw_untyped_surface_write(struct brw_codegen *p,
2984b8e80941Smrg                          struct brw_reg payload,
2985b8e80941Smrg                          struct brw_reg surface,
2986b8e80941Smrg                          unsigned msg_length,
2987b8e80941Smrg                          unsigned num_channels,
2988b8e80941Smrg                          bool header_present)
2989b8e80941Smrg{
2990b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
2991b8e80941Smrg   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2992b8e80941Smrg                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2993b8e80941Smrg                          GEN7_SFID_DATAPORT_DATA_CACHE);
2994b8e80941Smrg   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
2995b8e80941Smrg   /* SIMD4x2 untyped surface write instructions only exist on HSW+ */
2996b8e80941Smrg   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
2997b8e80941Smrg   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
2998b8e80941Smrg                              has_simd4x2 ? 0 : 8;
2999b8e80941Smrg   const unsigned desc =
3000b8e80941Smrg      brw_message_desc(devinfo, msg_length, 0, header_present) |
3001b8e80941Smrg      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
3002b8e80941Smrg   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3003b8e80941Smrg   const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
3004b8e80941Smrg
3005b8e80941Smrg   brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
3006b8e80941Smrg                                     payload, surface, desc);
3007b8e80941Smrg}
3008b8e80941Smrg
3009b8e80941Smrgstatic void
3010b8e80941Smrgbrw_set_memory_fence_message(struct brw_codegen *p,
3011b8e80941Smrg                             struct brw_inst *insn,
3012b8e80941Smrg                             enum brw_message_target sfid,
3013b8e80941Smrg                             bool commit_enable)
3014b8e80941Smrg{
3015b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3016b8e80941Smrg
3017b8e80941Smrg   brw_set_desc(p, insn, brw_message_desc(
3018b8e80941Smrg                   devinfo, 1, (commit_enable ? 1 : 0), true));
3019b8e80941Smrg
3020b8e80941Smrg   brw_inst_set_sfid(devinfo, insn, sfid);
3021b8e80941Smrg
3022b8e80941Smrg   switch (sfid) {
3023b8e80941Smrg   case GEN6_SFID_DATAPORT_RENDER_CACHE:
3024b8e80941Smrg      brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE);
3025b8e80941Smrg      break;
3026b8e80941Smrg   case GEN7_SFID_DATAPORT_DATA_CACHE:
3027b8e80941Smrg      brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE);
3028b8e80941Smrg      break;
3029b8e80941Smrg   default:
3030b8e80941Smrg      unreachable("Not reached");
3031b8e80941Smrg   }
3032b8e80941Smrg
3033b8e80941Smrg   if (commit_enable)
3034b8e80941Smrg      brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
3035b8e80941Smrg}
3036b8e80941Smrg
3037b8e80941Smrgvoid
3038b8e80941Smrgbrw_memory_fence(struct brw_codegen *p,
3039b8e80941Smrg                 struct brw_reg dst,
3040b8e80941Smrg                 struct brw_reg src,
3041b8e80941Smrg                 enum opcode send_op,
3042b8e80941Smrg                 bool stall)
3043b8e80941Smrg{
3044b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3045b8e80941Smrg   const bool commit_enable = stall ||
3046b8e80941Smrg      devinfo->gen >= 10 || /* HSD ES # 1404612949 */
3047b8e80941Smrg      (devinfo->gen == 7 && !devinfo->is_haswell);
3048b8e80941Smrg   struct brw_inst *insn;
3049b8e80941Smrg
3050b8e80941Smrg   brw_push_insn_state(p);
3051b8e80941Smrg   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3052b8e80941Smrg   brw_set_default_exec_size(p, BRW_EXECUTE_1);
3053b8e80941Smrg   dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
3054b8e80941Smrg   src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
3055b8e80941Smrg
3056b8e80941Smrg   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3057b8e80941Smrg    * message doesn't write anything back.
3058b8e80941Smrg    */
3059b8e80941Smrg   insn = next_insn(p, send_op);
3060b8e80941Smrg   brw_set_dest(p, insn, dst);
3061b8e80941Smrg   brw_set_src0(p, insn, src);
3062b8e80941Smrg   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
3063b8e80941Smrg                                commit_enable);
3064b8e80941Smrg
3065b8e80941Smrg   if (devinfo->gen == 7 && !devinfo->is_haswell) {
3066b8e80941Smrg      /* IVB does typed surface access through the render cache, so we need to
3067b8e80941Smrg       * flush it too.  Use a different register so both flushes can be
3068b8e80941Smrg       * pipelined by the hardware.
3069b8e80941Smrg       */
3070b8e80941Smrg      insn = next_insn(p, send_op);
3071b8e80941Smrg      brw_set_dest(p, insn, offset(dst, 1));
3072b8e80941Smrg      brw_set_src0(p, insn, src);
3073b8e80941Smrg      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
3074b8e80941Smrg                                   commit_enable);
3075b8e80941Smrg
3076b8e80941Smrg      /* Now write the response of the second message into the response of the
3077b8e80941Smrg       * first to trigger a pipeline stall -- This way future render and data
3078b8e80941Smrg       * cache messages will be properly ordered with respect to past data and
3079b8e80941Smrg       * render cache messages.
3080b8e80941Smrg       */
3081b8e80941Smrg      brw_MOV(p, dst, offset(dst, 1));
3082b8e80941Smrg   }
3083b8e80941Smrg
3084b8e80941Smrg   if (stall)
3085b8e80941Smrg      brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
3086b8e80941Smrg
3087b8e80941Smrg   brw_pop_insn_state(p);
3088b8e80941Smrg}
3089b8e80941Smrg
3090b8e80941Smrgvoid
3091b8e80941Smrgbrw_pixel_interpolator_query(struct brw_codegen *p,
3092b8e80941Smrg                             struct brw_reg dest,
3093b8e80941Smrg                             struct brw_reg mrf,
3094b8e80941Smrg                             bool noperspective,
3095b8e80941Smrg                             unsigned mode,
3096b8e80941Smrg                             struct brw_reg data,
3097b8e80941Smrg                             unsigned msg_length,
3098b8e80941Smrg                             unsigned response_length)
3099b8e80941Smrg{
3100b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3101b8e80941Smrg   const uint16_t exec_size = brw_get_default_exec_size(p);
3102b8e80941Smrg   const unsigned slot_group = brw_get_default_group(p) / 16;
3103b8e80941Smrg   const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
3104b8e80941Smrg   const unsigned desc =
3105b8e80941Smrg      brw_message_desc(devinfo, msg_length, response_length, false) |
3106b8e80941Smrg      brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode,
3107b8e80941Smrg                            slot_group);
3108b8e80941Smrg
3109b8e80941Smrg   /* brw_send_indirect_message will automatically use a direct send message
3110b8e80941Smrg    * if data is actually immediate.
3111b8e80941Smrg    */
3112b8e80941Smrg   brw_send_indirect_message(p,
3113b8e80941Smrg                             GEN7_SFID_PIXEL_INTERPOLATOR,
3114b8e80941Smrg                             dest,
3115b8e80941Smrg                             mrf,
3116b8e80941Smrg                             vec1(data),
3117b8e80941Smrg                             desc,
3118b8e80941Smrg                             false);
3119b8e80941Smrg}
3120b8e80941Smrg
3121b8e80941Smrgvoid
3122b8e80941Smrgbrw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
3123b8e80941Smrg                      struct brw_reg mask)
3124b8e80941Smrg{
3125b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3126b8e80941Smrg   const unsigned exec_size = 1 << brw_get_default_exec_size(p);
3127b8e80941Smrg   const unsigned qtr_control = brw_get_default_group(p) / 8;
3128b8e80941Smrg   brw_inst *inst;
3129b8e80941Smrg
3130b8e80941Smrg   assert(devinfo->gen >= 7);
3131b8e80941Smrg   assert(mask.type == BRW_REGISTER_TYPE_UD);
3132b8e80941Smrg
3133b8e80941Smrg   brw_push_insn_state(p);
3134b8e80941Smrg
3135b8e80941Smrg   /* The flag register is only used on Gen7 in align1 mode, so avoid setting
3136b8e80941Smrg    * unnecessary bits in the instruction words, get the information we need
3137b8e80941Smrg    * and reset the default flag register. This allows more instructions to be
3138b8e80941Smrg    * compacted.
3139b8e80941Smrg    */
3140b8e80941Smrg   const unsigned flag_subreg = p->current->flag_subreg;
3141b8e80941Smrg   brw_set_default_flag_reg(p, 0, 0);
3142b8e80941Smrg
3143b8e80941Smrg   if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
3144b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3145b8e80941Smrg
3146b8e80941Smrg      if (devinfo->gen >= 8) {
3147b8e80941Smrg         /* Getting the first active channel index is easy on Gen8: Just find
3148b8e80941Smrg          * the first bit set in the execution mask.  The register exists on
3149b8e80941Smrg          * HSW already but it reads back as all ones when the current
3150b8e80941Smrg          * instruction has execution masking disabled, so it's kind of
3151b8e80941Smrg          * useless.
3152b8e80941Smrg          */
3153b8e80941Smrg         struct brw_reg exec_mask =
3154b8e80941Smrg            retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
3155b8e80941Smrg
3156b8e80941Smrg         brw_set_default_exec_size(p, BRW_EXECUTE_1);
3157b8e80941Smrg         if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) {
3158b8e80941Smrg            /* Unfortunately, ce0 does not take into account the thread
3159b8e80941Smrg             * dispatch mask, which may be a problem in cases where it's not
3160b8e80941Smrg             * tightly packed (i.e. it doesn't have the form '2^n - 1' for
3161b8e80941Smrg             * some n).  Combine ce0 with the given dispatch (or vector) mask
3162b8e80941Smrg             * to mask off those channels which were never dispatched by the
3163b8e80941Smrg             * hardware.
3164b8e80941Smrg             */
3165b8e80941Smrg            brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
3166b8e80941Smrg            brw_AND(p, vec1(dst), exec_mask, vec1(dst));
3167b8e80941Smrg            exec_mask = vec1(dst);
3168b8e80941Smrg         }
3169b8e80941Smrg
3170b8e80941Smrg         /* Quarter control has the effect of magically shifting the value of
3171b8e80941Smrg          * ce0 so you'll get the first active channel relative to the
3172b8e80941Smrg          * specified quarter control as result.
3173b8e80941Smrg          */
3174b8e80941Smrg         inst = brw_FBL(p, vec1(dst), exec_mask);
3175b8e80941Smrg      } else {
3176b8e80941Smrg         const struct brw_reg flag = brw_flag_subreg(flag_subreg);
3177b8e80941Smrg
3178b8e80941Smrg         brw_set_default_exec_size(p, BRW_EXECUTE_1);
3179b8e80941Smrg         brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
3180b8e80941Smrg
3181b8e80941Smrg         /* Run enough instructions returning zero with execution masking and
3182b8e80941Smrg          * a conditional modifier enabled in order to get the full execution
3183b8e80941Smrg          * mask in f1.0.  We could use a single 32-wide move here if it
3184b8e80941Smrg          * weren't because of the hardware bug that causes channel enables to
3185b8e80941Smrg          * be applied incorrectly to the second half of 32-wide instructions
3186b8e80941Smrg          * on Gen7.
3187b8e80941Smrg          */
3188b8e80941Smrg         const unsigned lower_size = MIN2(16, exec_size);
3189b8e80941Smrg         for (unsigned i = 0; i < exec_size / lower_size; i++) {
3190b8e80941Smrg            inst = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
3191b8e80941Smrg                           brw_imm_uw(0));
3192b8e80941Smrg            brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
3193b8e80941Smrg            brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
3194b8e80941Smrg            brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
3195b8e80941Smrg            brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
3196b8e80941Smrg            brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2);
3197b8e80941Smrg            brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2);
3198b8e80941Smrg         }
3199b8e80941Smrg
3200b8e80941Smrg         /* Find the first bit set in the exec_size-wide portion of the flag
3201b8e80941Smrg          * register that was updated by the last sequence of MOV
3202b8e80941Smrg          * instructions.
3203b8e80941Smrg          */
3204b8e80941Smrg         const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
3205b8e80941Smrg         brw_set_default_exec_size(p, BRW_EXECUTE_1);
3206b8e80941Smrg         brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
3207b8e80941Smrg      }
3208b8e80941Smrg   } else {
3209b8e80941Smrg      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3210b8e80941Smrg
3211b8e80941Smrg      if (devinfo->gen >= 8 &&
3212b8e80941Smrg          mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) {
3213b8e80941Smrg         /* In SIMD4x2 mode the first active channel index is just the
3214b8e80941Smrg          * negation of the first bit of the mask register.  Note that ce0
3215b8e80941Smrg          * doesn't take into account the dispatch mask, so the Gen7 path
3216b8e80941Smrg          * should be used instead unless you have the guarantee that the
3217b8e80941Smrg          * dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
3218b8e80941Smrg          * for some n).
3219b8e80941Smrg          */
3220b8e80941Smrg         inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
3221b8e80941Smrg                        negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
3222b8e80941Smrg                        brw_imm_ud(1));
3223b8e80941Smrg
3224b8e80941Smrg      } else {
3225b8e80941Smrg         /* Overwrite the destination without and with execution masking to
3226b8e80941Smrg          * find out which of the channels is active.
3227b8e80941Smrg          */
3228b8e80941Smrg         brw_push_insn_state(p);
3229b8e80941Smrg         brw_set_default_exec_size(p, BRW_EXECUTE_4);
3230b8e80941Smrg         brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
3231b8e80941Smrg                 brw_imm_ud(1));
3232b8e80941Smrg
3233b8e80941Smrg         inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
3234b8e80941Smrg                        brw_imm_ud(0));
3235b8e80941Smrg         brw_pop_insn_state(p);
3236b8e80941Smrg         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
3237b8e80941Smrg      }
3238b8e80941Smrg   }
3239b8e80941Smrg
3240b8e80941Smrg   brw_pop_insn_state(p);
3241b8e80941Smrg}
3242b8e80941Smrg
3243b8e80941Smrgvoid
3244b8e80941Smrgbrw_broadcast(struct brw_codegen *p,
3245b8e80941Smrg              struct brw_reg dst,
3246b8e80941Smrg              struct brw_reg src,
3247b8e80941Smrg              struct brw_reg idx)
3248b8e80941Smrg{
3249b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3250b8e80941Smrg   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
3251b8e80941Smrg   brw_inst *inst;
3252b8e80941Smrg
3253b8e80941Smrg   brw_push_insn_state(p);
3254b8e80941Smrg   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3255b8e80941Smrg   brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
3256b8e80941Smrg
3257b8e80941Smrg   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
3258b8e80941Smrg          src.address_mode == BRW_ADDRESS_DIRECT);
3259b8e80941Smrg   assert(!src.abs && !src.negate);
3260b8e80941Smrg   assert(src.type == dst.type);
3261b8e80941Smrg
3262b8e80941Smrg   if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
3263b8e80941Smrg       idx.file == BRW_IMMEDIATE_VALUE) {
3264b8e80941Smrg      /* Trivial, the source is already uniform or the index is a constant.
3265b8e80941Smrg       * We will typically not get here if the optimizer is doing its job, but
3266b8e80941Smrg       * asserting would be mean.
3267b8e80941Smrg       */
3268b8e80941Smrg      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
3269b8e80941Smrg      brw_MOV(p, dst,
3270b8e80941Smrg              (align1 ? stride(suboffset(src, i), 0, 1, 0) :
3271b8e80941Smrg               stride(suboffset(src, 4 * i), 0, 4, 1)));
3272b8e80941Smrg   } else {
3273b8e80941Smrg      /* From the Haswell PRM section "Register Region Restrictions":
3274b8e80941Smrg       *
3275b8e80941Smrg       *    "The lower bits of the AddressImmediate must not overflow to
3276b8e80941Smrg       *    change the register address.  The lower 5 bits of Address
3277b8e80941Smrg       *    Immediate when added to lower 5 bits of address register gives
3278b8e80941Smrg       *    the sub-register offset. The upper bits of Address Immediate
3279b8e80941Smrg       *    when added to upper bits of address register gives the register
3280b8e80941Smrg       *    address. Any overflow from sub-register offset is dropped."
3281b8e80941Smrg       *
3282b8e80941Smrg       * Fortunately, for broadcast, we never have a sub-register offset so
3283b8e80941Smrg       * this isn't an issue.
3284b8e80941Smrg       */
3285b8e80941Smrg      assert(src.subnr == 0);
3286b8e80941Smrg
3287b8e80941Smrg      if (align1) {
3288b8e80941Smrg         const struct brw_reg addr =
3289b8e80941Smrg            retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
3290b8e80941Smrg         unsigned offset = src.nr * REG_SIZE + src.subnr;
3291b8e80941Smrg         /* Limit in bytes of the signed indirect addressing immediate. */
3292b8e80941Smrg         const unsigned limit = 512;
3293b8e80941Smrg
3294b8e80941Smrg         brw_push_insn_state(p);
3295b8e80941Smrg         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3296b8e80941Smrg         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
3297b8e80941Smrg
3298b8e80941Smrg         /* Take into account the component size and horizontal stride. */
3299b8e80941Smrg         assert(src.vstride == src.hstride + src.width);
3300b8e80941Smrg         brw_SHL(p, addr, vec1(idx),
3301b8e80941Smrg                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
3302b8e80941Smrg                            src.hstride - 1));
3303b8e80941Smrg
3304b8e80941Smrg         /* We can only address up to limit bytes using the indirect
3305b8e80941Smrg          * addressing immediate, account for the difference if the source
3306b8e80941Smrg          * register is above this limit.
3307b8e80941Smrg          */
3308b8e80941Smrg         if (offset >= limit) {
3309b8e80941Smrg            brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
3310b8e80941Smrg            offset = offset % limit;
3311b8e80941Smrg         }
3312b8e80941Smrg
3313b8e80941Smrg         brw_pop_insn_state(p);
3314b8e80941Smrg
3315b8e80941Smrg         /* Use indirect addressing to fetch the specified component. */
3316b8e80941Smrg         if (type_sz(src.type) > 4 &&
3317b8e80941Smrg             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
3318b8e80941Smrg            /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
3319b8e80941Smrg             *
3320b8e80941Smrg             *    "When source or destination datatype is 64b or operation is
3321b8e80941Smrg             *    integer DWord multiply, indirect addressing must not be
3322b8e80941Smrg             *    used."
3323b8e80941Smrg             *
3324b8e80941Smrg             * To work around both of this issue, we do two integer MOVs
3325b8e80941Smrg             * insead of one 64-bit MOV.  Because no double value should ever
3326b8e80941Smrg             * cross a register boundary, it's safe to use the immediate
3327b8e80941Smrg             * offset in the indirect here to handle adding 4 bytes to the
3328b8e80941Smrg             * offset and avoid the extra ADD to the register file.
3329b8e80941Smrg             */
3330b8e80941Smrg            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
3331b8e80941Smrg                       retype(brw_vec1_indirect(addr.subnr, offset),
3332b8e80941Smrg                              BRW_REGISTER_TYPE_D));
3333b8e80941Smrg            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
3334b8e80941Smrg                       retype(brw_vec1_indirect(addr.subnr, offset + 4),
3335b8e80941Smrg                              BRW_REGISTER_TYPE_D));
3336b8e80941Smrg         } else {
3337b8e80941Smrg            brw_MOV(p, dst,
3338b8e80941Smrg                    retype(brw_vec1_indirect(addr.subnr, offset), src.type));
3339b8e80941Smrg         }
3340b8e80941Smrg      } else {
3341b8e80941Smrg         /* In SIMD4x2 mode the index can be either zero or one, replicate it
3342b8e80941Smrg          * to all bits of a flag register,
3343b8e80941Smrg          */
3344b8e80941Smrg         inst = brw_MOV(p,
3345b8e80941Smrg                        brw_null_reg(),
3346b8e80941Smrg                        stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
3347b8e80941Smrg         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
3348b8e80941Smrg         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
3349b8e80941Smrg         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3350b8e80941Smrg
3351b8e80941Smrg         /* and use predicated SEL to pick the right channel. */
3352b8e80941Smrg         inst = brw_SEL(p, dst,
3353b8e80941Smrg                        stride(suboffset(src, 4), 4, 4, 1),
3354b8e80941Smrg                        stride(src, 4, 4, 1));
3355b8e80941Smrg         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
3356b8e80941Smrg         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3357b8e80941Smrg      }
3358b8e80941Smrg   }
3359b8e80941Smrg
3360b8e80941Smrg   brw_pop_insn_state(p);
3361b8e80941Smrg}
3362b8e80941Smrg
3363b8e80941Smrg/**
3364b8e80941Smrg * This instruction is generated as a single-channel align1 instruction by
3365b8e80941Smrg * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3366b8e80941Smrg *
3367b8e80941Smrg * We can't use the typed atomic op in the FS because that has the execution
3368b8e80941Smrg * mask ANDed with the pixel mask, but we just want to write the one dword for
3369b8e80941Smrg * all the pixels.
3370b8e80941Smrg *
3371b8e80941Smrg * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3372b8e80941Smrg * one u32.  So we use the same untyped atomic write message as the pixel
3373b8e80941Smrg * shader.
3374b8e80941Smrg *
3375b8e80941Smrg * The untyped atomic operation requires a BUFFER surface type with RAW
3376b8e80941Smrg * format, and is only accessible through the legacy DATA_CACHE dataport
3377b8e80941Smrg * messages.
3378b8e80941Smrg */
3379b8e80941Smrgvoid brw_shader_time_add(struct brw_codegen *p,
3380b8e80941Smrg                         struct brw_reg payload,
3381b8e80941Smrg                         uint32_t surf_index)
3382b8e80941Smrg{
3383b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3384b8e80941Smrg   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
3385b8e80941Smrg                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3386b8e80941Smrg                          GEN7_SFID_DATAPORT_DATA_CACHE);
3387b8e80941Smrg   assert(devinfo->gen >= 7);
3388b8e80941Smrg
3389b8e80941Smrg   brw_push_insn_state(p);
3390b8e80941Smrg   brw_set_default_access_mode(p, BRW_ALIGN_1);
3391b8e80941Smrg   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3392b8e80941Smrg   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
3393b8e80941Smrg   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
3394b8e80941Smrg
3395b8e80941Smrg   /* We use brw_vec1_reg and unmasked because we want to increment the given
3396b8e80941Smrg    * offset only once.
3397b8e80941Smrg    */
3398b8e80941Smrg   brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
3399b8e80941Smrg                                      BRW_ARF_NULL, 0));
3400b8e80941Smrg   brw_set_src0(p, send, brw_vec1_reg(payload.file,
3401b8e80941Smrg                                      payload.nr, 0));
3402b8e80941Smrg   brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) |
3403b8e80941Smrg                          brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD,
3404b8e80941Smrg                                                     false)));
3405b8e80941Smrg
3406b8e80941Smrg   brw_inst_set_sfid(devinfo, send, sfid);
3407b8e80941Smrg   brw_inst_set_binding_table_index(devinfo, send, surf_index);
3408b8e80941Smrg
3409b8e80941Smrg   brw_pop_insn_state(p);
3410b8e80941Smrg}
3411b8e80941Smrg
3412b8e80941Smrg
3413b8e80941Smrg/**
3414b8e80941Smrg * Emit the SEND message for a barrier
3415b8e80941Smrg */
3416b8e80941Smrgvoid
3417b8e80941Smrgbrw_barrier(struct brw_codegen *p, struct brw_reg src)
3418b8e80941Smrg{
3419b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3420b8e80941Smrg   struct brw_inst *inst;
3421b8e80941Smrg
3422b8e80941Smrg   assert(devinfo->gen >= 7);
3423b8e80941Smrg
3424b8e80941Smrg   brw_push_insn_state(p);
3425b8e80941Smrg   brw_set_default_access_mode(p, BRW_ALIGN_1);
3426b8e80941Smrg   inst = next_insn(p, BRW_OPCODE_SEND);
3427b8e80941Smrg   brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
3428b8e80941Smrg   brw_set_src0(p, inst, src);
3429b8e80941Smrg   brw_set_src1(p, inst, brw_null_reg());
3430b8e80941Smrg   brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false));
3431b8e80941Smrg
3432b8e80941Smrg   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
3433b8e80941Smrg   brw_inst_set_gateway_notify(devinfo, inst, 1);
3434b8e80941Smrg   brw_inst_set_gateway_subfuncid(devinfo, inst,
3435b8e80941Smrg                                  BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
3436b8e80941Smrg
3437b8e80941Smrg   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
3438b8e80941Smrg   brw_pop_insn_state(p);
3439b8e80941Smrg}
3440b8e80941Smrg
3441b8e80941Smrg
3442b8e80941Smrg/**
3443b8e80941Smrg * Emit the wait instruction for a barrier
3444b8e80941Smrg */
3445b8e80941Smrgvoid
3446b8e80941Smrgbrw_WAIT(struct brw_codegen *p)
3447b8e80941Smrg{
3448b8e80941Smrg   const struct gen_device_info *devinfo = p->devinfo;
3449b8e80941Smrg   struct brw_inst *insn;
3450b8e80941Smrg
3451b8e80941Smrg   struct brw_reg src = brw_notification_reg();
3452b8e80941Smrg
3453b8e80941Smrg   insn = next_insn(p, BRW_OPCODE_WAIT);
3454b8e80941Smrg   brw_set_dest(p, insn, src);
3455b8e80941Smrg   brw_set_src0(p, insn, src);
3456b8e80941Smrg   brw_set_src1(p, insn, brw_null_reg());
3457b8e80941Smrg
3458b8e80941Smrg   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
3459b8e80941Smrg   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
3460b8e80941Smrg}
3461b8e80941Smrg
3462b8e80941Smrg/**
3463b8e80941Smrg * Changes the floating point rounding mode updating the control register
3464b8e80941Smrg * field defined at cr0.0[5-6] bits. This function supports the changes to
3465b8e80941Smrg * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
3466b8e80941Smrg * Only RTNE and RTZ rounding are enabled at nir.
3467b8e80941Smrg */
3468b8e80941Smrgvoid
3469b8e80941Smrgbrw_rounding_mode(struct brw_codegen *p,
3470b8e80941Smrg                  enum brw_rnd_mode mode)
3471b8e80941Smrg{
3472b8e80941Smrg   const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT;
3473b8e80941Smrg
3474b8e80941Smrg   if (bits != BRW_CR0_RND_MODE_MASK) {
3475b8e80941Smrg      brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
3476b8e80941Smrg                               brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
3477b8e80941Smrg      brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
3478b8e80941Smrg
3479b8e80941Smrg      /* From the Skylake PRM, Volume 7, page 760:
3480b8e80941Smrg       *  "Implementation Restriction on Register Access: When the control
3481b8e80941Smrg       *   register is used as an explicit source and/or destination, hardware
3482b8e80941Smrg       *   does not ensure execution pipeline coherency. Software must set the
3483b8e80941Smrg       *   thread control field to ‘switch’ for an instruction that uses
3484b8e80941Smrg       *   control register as an explicit operand."
3485b8e80941Smrg       */
3486b8e80941Smrg      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
3487b8e80941Smrg    }
3488b8e80941Smrg
3489b8e80941Smrg   if (bits) {
3490b8e80941Smrg      brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
3491b8e80941Smrg                              brw_imm_ud(bits));
3492b8e80941Smrg      brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
3493b8e80941Smrg      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
3494b8e80941Smrg   }
3495b8e80941Smrg}
3496