1428d7b3dSmrg/*
2428d7b3dSmrg   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3428d7b3dSmrg   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4428d7b3dSmrg   develop this 3D driver.
5428d7b3dSmrg
6428d7b3dSmrg   Permission is hereby granted, free of charge, to any person obtaining
7428d7b3dSmrg   a copy of this software and associated documentation files (the
8428d7b3dSmrg   "Software"), to deal in the Software without restriction, including
9428d7b3dSmrg   without limitation the rights to use, copy, modify, merge, publish,
10428d7b3dSmrg   distribute, sublicense, and/or sell copies of the Software, and to
11428d7b3dSmrg   permit persons to whom the Software is furnished to do so, subject to
12428d7b3dSmrg   the following conditions:
13428d7b3dSmrg
14428d7b3dSmrg   The above copyright notice and this permission notice (including the
15428d7b3dSmrg   next paragraph) shall be included in all copies or substantial
16428d7b3dSmrg   portions of the Software.
17428d7b3dSmrg
18428d7b3dSmrg   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19428d7b3dSmrg   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20428d7b3dSmrg   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21428d7b3dSmrg   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22428d7b3dSmrg   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23428d7b3dSmrg   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24428d7b3dSmrg   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25428d7b3dSmrg
26428d7b3dSmrg **********************************************************************/
27428d7b3dSmrg/*
28428d7b3dSmrg * Authors:
29428d7b3dSmrg *   Keith Whitwell <keith@tungstengraphics.com>
30428d7b3dSmrg */
31428d7b3dSmrg
32428d7b3dSmrg#include "brw_eu.h"
33428d7b3dSmrg
34428d7b3dSmrg#include <string.h>
35428d7b3dSmrg#include <stdlib.h>
36428d7b3dSmrg
37428d7b3dSmrg#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
38428d7b3dSmrg
39428d7b3dSmrg/***********************************************************************
40428d7b3dSmrg * Internal helper for constructing instructions
41428d7b3dSmrg */
42428d7b3dSmrg
43428d7b3dSmrgstatic void guess_execution_size(struct brw_compile *p,
44428d7b3dSmrg				 struct brw_instruction *insn,
45428d7b3dSmrg				 struct brw_reg reg)
46428d7b3dSmrg{
47428d7b3dSmrg	if (reg.width == BRW_WIDTH_8 && p->compressed)
48428d7b3dSmrg		insn->header.execution_size = BRW_EXECUTE_16;
49428d7b3dSmrg	else
50428d7b3dSmrg		insn->header.execution_size = reg.width;
51428d7b3dSmrg}
52428d7b3dSmrg
53428d7b3dSmrg
54428d7b3dSmrg/**
55428d7b3dSmrg * Prior to Sandybridge, the SEND instruction accepted non-MRF source
56428d7b3dSmrg * registers, implicitly moving the operand to a message register.
57428d7b3dSmrg *
58428d7b3dSmrg * On Sandybridge, this is no longer the case.  This function performs the
59428d7b3dSmrg * explicit move; it should be called before emitting a SEND instruction.
60428d7b3dSmrg */
61428d7b3dSmrgvoid
62428d7b3dSmrggen6_resolve_implied_move(struct brw_compile *p,
63428d7b3dSmrg			  struct brw_reg *src,
64428d7b3dSmrg			  unsigned msg_reg_nr)
65428d7b3dSmrg{
66428d7b3dSmrg	if (p->gen < 060)
67428d7b3dSmrg		return;
68428d7b3dSmrg
69428d7b3dSmrg	if (src->file == BRW_MESSAGE_REGISTER_FILE)
70428d7b3dSmrg		return;
71428d7b3dSmrg
72428d7b3dSmrg	if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
73428d7b3dSmrg		brw_push_insn_state(p);
74428d7b3dSmrg		brw_set_mask_control(p, BRW_MASK_DISABLE);
75428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
76428d7b3dSmrg		brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
77428d7b3dSmrg		brw_pop_insn_state(p);
78428d7b3dSmrg	}
79428d7b3dSmrg	*src = brw_message_reg(msg_reg_nr);
80428d7b3dSmrg}
81428d7b3dSmrg
82428d7b3dSmrgstatic void
83428d7b3dSmrggen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
84428d7b3dSmrg{
85428d7b3dSmrg	/* From the BSpec / ISA Reference / send - [DevIVB+]:
86428d7b3dSmrg	 * "The send with EOT should use register space R112-R127 for <src>. This is
87428d7b3dSmrg	 *  to enable loading of a new thread into the same slot while the message
88428d7b3dSmrg	 *  with EOT for current thread is pending dispatch."
89428d7b3dSmrg	 *
90428d7b3dSmrg	 * Since we're pretending to have 16 MRFs anyway, we may as well use the
91428d7b3dSmrg	 * registers required for messages with EOT.
92428d7b3dSmrg	 */
93428d7b3dSmrg	if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
94428d7b3dSmrg		reg->file = BRW_GENERAL_REGISTER_FILE;
95428d7b3dSmrg		reg->nr += 111;
96428d7b3dSmrg	}
97428d7b3dSmrg}
98428d7b3dSmrg
99428d7b3dSmrgvoid
100428d7b3dSmrgbrw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
101428d7b3dSmrg	     struct brw_reg dest)
102428d7b3dSmrg{
103428d7b3dSmrg	if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
104428d7b3dSmrg	    dest.file != BRW_MESSAGE_REGISTER_FILE)
105428d7b3dSmrg		assert(dest.nr < 128);
106428d7b3dSmrg
107428d7b3dSmrg	gen7_convert_mrf_to_grf(p, &dest);
108428d7b3dSmrg
109428d7b3dSmrg	insn->bits1.da1.dest_reg_file = dest.file;
110428d7b3dSmrg	insn->bits1.da1.dest_reg_type = dest.type;
111428d7b3dSmrg	insn->bits1.da1.dest_address_mode = dest.address_mode;
112428d7b3dSmrg
113428d7b3dSmrg	if (dest.address_mode == BRW_ADDRESS_DIRECT) {
114428d7b3dSmrg		insn->bits1.da1.dest_reg_nr = dest.nr;
115428d7b3dSmrg
116428d7b3dSmrg		if (insn->header.access_mode == BRW_ALIGN_1) {
117428d7b3dSmrg			insn->bits1.da1.dest_subreg_nr = dest.subnr;
118428d7b3dSmrg			if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
119428d7b3dSmrg				dest.hstride = BRW_HORIZONTAL_STRIDE_1;
120428d7b3dSmrg			insn->bits1.da1.dest_horiz_stride = dest.hstride;
121428d7b3dSmrg		} else {
122428d7b3dSmrg			insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
123428d7b3dSmrg			insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
124428d7b3dSmrg			/* even ignored in da16, still need to set as '01' */
125428d7b3dSmrg			insn->bits1.da16.dest_horiz_stride = 1;
126428d7b3dSmrg		}
127428d7b3dSmrg	} else {
128428d7b3dSmrg		insn->bits1.ia1.dest_subreg_nr = dest.subnr;
129428d7b3dSmrg
130428d7b3dSmrg		/* These are different sizes in align1 vs align16:
131428d7b3dSmrg		*/
132428d7b3dSmrg		if (insn->header.access_mode == BRW_ALIGN_1) {
133428d7b3dSmrg			insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
134428d7b3dSmrg			if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
135428d7b3dSmrg				dest.hstride = BRW_HORIZONTAL_STRIDE_1;
136428d7b3dSmrg			insn->bits1.ia1.dest_horiz_stride = dest.hstride;
137428d7b3dSmrg		}
138428d7b3dSmrg		else {
139428d7b3dSmrg			insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
140428d7b3dSmrg			/* even ignored in da16, still need to set as '01' */
141428d7b3dSmrg			insn->bits1.ia16.dest_horiz_stride = 1;
142428d7b3dSmrg		}
143428d7b3dSmrg	}
144428d7b3dSmrg
145428d7b3dSmrg	guess_execution_size(p, insn, dest);
146428d7b3dSmrg}
147428d7b3dSmrg
148428d7b3dSmrgstatic const int reg_type_size[8] = {
149428d7b3dSmrg	[0] = 4,
150428d7b3dSmrg	[1] = 4,
151428d7b3dSmrg	[2] = 2,
152428d7b3dSmrg	[3] = 2,
153428d7b3dSmrg	[4] = 1,
154428d7b3dSmrg	[5] = 1,
155428d7b3dSmrg	[7] = 4
156428d7b3dSmrg};
157428d7b3dSmrg
158428d7b3dSmrgstatic void
159428d7b3dSmrgvalidate_reg(struct brw_instruction *insn, struct brw_reg reg)
160428d7b3dSmrg{
161428d7b3dSmrg	int hstride_for_reg[] = {0, 1, 2, 4};
162428d7b3dSmrg	int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
163428d7b3dSmrg	int width_for_reg[] = {1, 2, 4, 8, 16};
164428d7b3dSmrg	int execsize_for_reg[] = {1, 2, 4, 8, 16};
165428d7b3dSmrg	int width, hstride, vstride, execsize;
166428d7b3dSmrg
167428d7b3dSmrg	if (reg.file == BRW_IMMEDIATE_VALUE) {
168428d7b3dSmrg		/* 3.3.6: Region Parameters.  Restriction: Immediate vectors
169428d7b3dSmrg		 * mean the destination has to be 128-bit aligned and the
170428d7b3dSmrg		 * destination horiz stride has to be a word.
171428d7b3dSmrg		 */
172428d7b3dSmrg		if (reg.type == BRW_REGISTER_TYPE_V) {
173428d7b3dSmrg			assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
174428d7b3dSmrg			       reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
175428d7b3dSmrg		}
176428d7b3dSmrg
177428d7b3dSmrg		return;
178428d7b3dSmrg	}
179428d7b3dSmrg
180428d7b3dSmrg	if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
181428d7b3dSmrg	    reg.file == BRW_ARF_NULL)
182428d7b3dSmrg		return;
183428d7b3dSmrg
184428d7b3dSmrg	assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
185428d7b3dSmrg	assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
186428d7b3dSmrg	assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
187428d7b3dSmrg	assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg));
188428d7b3dSmrg
189428d7b3dSmrg	hstride = hstride_for_reg[reg.hstride];
190428d7b3dSmrg
191428d7b3dSmrg	if (reg.vstride == 0xf) {
192428d7b3dSmrg		vstride = -1;
193428d7b3dSmrg	} else {
194428d7b3dSmrg		vstride = vstride_for_reg[reg.vstride];
195428d7b3dSmrg	}
196428d7b3dSmrg
197428d7b3dSmrg	width = width_for_reg[reg.width];
198428d7b3dSmrg
199428d7b3dSmrg	execsize = execsize_for_reg[insn->header.execution_size];
200428d7b3dSmrg
201428d7b3dSmrg	/* Restrictions from 3.3.10: Register Region Restrictions. */
202428d7b3dSmrg	/* 3. */
203428d7b3dSmrg	assert(execsize >= width);
204428d7b3dSmrg
205428d7b3dSmrg	/* 4. */
206428d7b3dSmrg	if (execsize == width && hstride != 0) {
207428d7b3dSmrg		assert(vstride == -1 || vstride == width * hstride);
208428d7b3dSmrg	}
209428d7b3dSmrg
210428d7b3dSmrg	/* 5. */
211428d7b3dSmrg	if (execsize == width && hstride == 0) {
212428d7b3dSmrg		/* no restriction on vstride. */
213428d7b3dSmrg	}
214428d7b3dSmrg
215428d7b3dSmrg	/* 6. */
216428d7b3dSmrg	if (width == 1) {
217428d7b3dSmrg		assert(hstride == 0);
218428d7b3dSmrg	}
219428d7b3dSmrg
220428d7b3dSmrg	/* 7. */
221428d7b3dSmrg	if (execsize == 1 && width == 1) {
222428d7b3dSmrg		assert(hstride == 0);
223428d7b3dSmrg		assert(vstride == 0);
224428d7b3dSmrg	}
225428d7b3dSmrg
226428d7b3dSmrg	/* 8. */
227428d7b3dSmrg	if (vstride == 0 && hstride == 0) {
228428d7b3dSmrg		assert(width == 1);
229428d7b3dSmrg	}
230428d7b3dSmrg
231428d7b3dSmrg	/* 10. Check destination issues. */
232428d7b3dSmrg}
233428d7b3dSmrg
234428d7b3dSmrgvoid
235428d7b3dSmrgbrw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
236428d7b3dSmrg	     struct brw_reg reg)
237428d7b3dSmrg{
238428d7b3dSmrg	if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
239428d7b3dSmrg		assert(reg.nr < 128);
240428d7b3dSmrg
241428d7b3dSmrg	gen7_convert_mrf_to_grf(p, &reg);
242428d7b3dSmrg
243428d7b3dSmrg	validate_reg(insn, reg);
244428d7b3dSmrg
245428d7b3dSmrg	insn->bits1.da1.src0_reg_file = reg.file;
246428d7b3dSmrg	insn->bits1.da1.src0_reg_type = reg.type;
247428d7b3dSmrg	insn->bits2.da1.src0_abs = reg.abs;
248428d7b3dSmrg	insn->bits2.da1.src0_negate = reg.negate;
249428d7b3dSmrg	insn->bits2.da1.src0_address_mode = reg.address_mode;
250428d7b3dSmrg
251428d7b3dSmrg	if (reg.file == BRW_IMMEDIATE_VALUE) {
252428d7b3dSmrg		insn->bits3.ud = reg.dw1.ud;
253428d7b3dSmrg
254428d7b3dSmrg		/* Required to set some fields in src1 as well:
255428d7b3dSmrg		*/
256428d7b3dSmrg		insn->bits1.da1.src1_reg_file = 0; /* arf */
257428d7b3dSmrg		insn->bits1.da1.src1_reg_type = reg.type;
258428d7b3dSmrg	} else {
259428d7b3dSmrg		if (reg.address_mode == BRW_ADDRESS_DIRECT) {
260428d7b3dSmrg			if (insn->header.access_mode == BRW_ALIGN_1) {
261428d7b3dSmrg				insn->bits2.da1.src0_subreg_nr = reg.subnr;
262428d7b3dSmrg				insn->bits2.da1.src0_reg_nr = reg.nr;
263428d7b3dSmrg			} else {
264428d7b3dSmrg				insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
265428d7b3dSmrg				insn->bits2.da16.src0_reg_nr = reg.nr;
266428d7b3dSmrg			}
267428d7b3dSmrg		} else {
268428d7b3dSmrg			insn->bits2.ia1.src0_subreg_nr = reg.subnr;
269428d7b3dSmrg
270428d7b3dSmrg			if (insn->header.access_mode == BRW_ALIGN_1) {
271428d7b3dSmrg				insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
272428d7b3dSmrg			} else {
273428d7b3dSmrg				insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
274428d7b3dSmrg			}
275428d7b3dSmrg		}
276428d7b3dSmrg
277428d7b3dSmrg		if (insn->header.access_mode == BRW_ALIGN_1) {
278428d7b3dSmrg			if (reg.width == BRW_WIDTH_1 &&
279428d7b3dSmrg			    insn->header.execution_size == BRW_EXECUTE_1) {
280428d7b3dSmrg				insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
281428d7b3dSmrg				insn->bits2.da1.src0_width = BRW_WIDTH_1;
282428d7b3dSmrg				insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
283428d7b3dSmrg			} else {
284428d7b3dSmrg				insn->bits2.da1.src0_horiz_stride = reg.hstride;
285428d7b3dSmrg				insn->bits2.da1.src0_width = reg.width;
286428d7b3dSmrg				insn->bits2.da1.src0_vert_stride = reg.vstride;
287428d7b3dSmrg			}
288428d7b3dSmrg		} else {
289428d7b3dSmrg			insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
290428d7b3dSmrg			insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
291428d7b3dSmrg			insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
292428d7b3dSmrg			insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
293428d7b3dSmrg
294428d7b3dSmrg			/* This is an oddity of the fact we're using the same
295428d7b3dSmrg			 * descriptions for registers in align_16 as align_1:
296428d7b3dSmrg			 */
297428d7b3dSmrg			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
298428d7b3dSmrg				insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
299428d7b3dSmrg			else
300428d7b3dSmrg				insn->bits2.da16.src0_vert_stride = reg.vstride;
301428d7b3dSmrg		}
302428d7b3dSmrg	}
303428d7b3dSmrg}
304428d7b3dSmrg
305428d7b3dSmrgvoid brw_set_src1(struct brw_compile *p,
306428d7b3dSmrg		  struct brw_instruction *insn,
307428d7b3dSmrg		  struct brw_reg reg)
308428d7b3dSmrg{
309428d7b3dSmrg	assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
310428d7b3dSmrg	assert(reg.nr < 128);
311428d7b3dSmrg
312428d7b3dSmrg	gen7_convert_mrf_to_grf(p, &reg);
313428d7b3dSmrg
314428d7b3dSmrg	validate_reg(insn, reg);
315428d7b3dSmrg
316428d7b3dSmrg	insn->bits1.da1.src1_reg_file = reg.file;
317428d7b3dSmrg	insn->bits1.da1.src1_reg_type = reg.type;
318428d7b3dSmrg	insn->bits3.da1.src1_abs = reg.abs;
319428d7b3dSmrg	insn->bits3.da1.src1_negate = reg.negate;
320428d7b3dSmrg
321428d7b3dSmrg	/* Only src1 can be immediate in two-argument instructions. */
322428d7b3dSmrg	assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
323428d7b3dSmrg
324428d7b3dSmrg	if (reg.file == BRW_IMMEDIATE_VALUE) {
325428d7b3dSmrg		insn->bits3.ud = reg.dw1.ud;
326428d7b3dSmrg	} else {
327428d7b3dSmrg		/* This is a hardware restriction, which may or may not be lifted
328428d7b3dSmrg		 * in the future:
329428d7b3dSmrg		 */
330428d7b3dSmrg		assert (reg.address_mode == BRW_ADDRESS_DIRECT);
331428d7b3dSmrg		/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
332428d7b3dSmrg
333428d7b3dSmrg		if (insn->header.access_mode == BRW_ALIGN_1) {
334428d7b3dSmrg			insn->bits3.da1.src1_subreg_nr = reg.subnr;
335428d7b3dSmrg			insn->bits3.da1.src1_reg_nr = reg.nr;
336428d7b3dSmrg		} else {
337428d7b3dSmrg			insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
338428d7b3dSmrg			insn->bits3.da16.src1_reg_nr = reg.nr;
339428d7b3dSmrg		}
340428d7b3dSmrg
341428d7b3dSmrg		if (insn->header.access_mode == BRW_ALIGN_1) {
342428d7b3dSmrg			if (reg.width == BRW_WIDTH_1 &&
343428d7b3dSmrg			    insn->header.execution_size == BRW_EXECUTE_1) {
344428d7b3dSmrg				insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
345428d7b3dSmrg				insn->bits3.da1.src1_width = BRW_WIDTH_1;
346428d7b3dSmrg				insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
347428d7b3dSmrg			} else {
348428d7b3dSmrg				insn->bits3.da1.src1_horiz_stride = reg.hstride;
349428d7b3dSmrg				insn->bits3.da1.src1_width = reg.width;
350428d7b3dSmrg				insn->bits3.da1.src1_vert_stride = reg.vstride;
351428d7b3dSmrg			}
352428d7b3dSmrg		} else {
353428d7b3dSmrg			insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
354428d7b3dSmrg			insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
355428d7b3dSmrg			insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
356428d7b3dSmrg			insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
357428d7b3dSmrg
358428d7b3dSmrg			/* This is an oddity of the fact we're using the same
359428d7b3dSmrg			 * descriptions for registers in align_16 as align_1:
360428d7b3dSmrg			 */
361428d7b3dSmrg			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
362428d7b3dSmrg				insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
363428d7b3dSmrg			else
364428d7b3dSmrg				insn->bits3.da16.src1_vert_stride = reg.vstride;
365428d7b3dSmrg		}
366428d7b3dSmrg	}
367428d7b3dSmrg}
368428d7b3dSmrg
369428d7b3dSmrg/**
370428d7b3dSmrg * Set the Message Descriptor and Extended Message Descriptor fields
371428d7b3dSmrg * for SEND messages.
372428d7b3dSmrg *
373428d7b3dSmrg * \note This zeroes out the Function Control bits, so it must be called
374428d7b3dSmrg *       \b before filling out any message-specific data.  Callers can
375428d7b3dSmrg *       choose not to fill in irrelevant bits; they will be zero.
376428d7b3dSmrg */
377428d7b3dSmrgstatic void
378428d7b3dSmrgbrw_set_message_descriptor(struct brw_compile *p,
379428d7b3dSmrg			   struct brw_instruction *inst,
380428d7b3dSmrg			   enum brw_message_target sfid,
381428d7b3dSmrg			   unsigned msg_length,
382428d7b3dSmrg			   unsigned response_length,
383428d7b3dSmrg			   bool header_present,
384428d7b3dSmrg			   bool end_of_thread)
385428d7b3dSmrg{
386428d7b3dSmrg	brw_set_src1(p, inst, brw_imm_d(0));
387428d7b3dSmrg
388428d7b3dSmrg	if (p->gen >= 050) {
389428d7b3dSmrg		inst->bits3.generic_gen5.header_present = header_present;
390428d7b3dSmrg		inst->bits3.generic_gen5.response_length = response_length;
391428d7b3dSmrg		inst->bits3.generic_gen5.msg_length = msg_length;
392428d7b3dSmrg		inst->bits3.generic_gen5.end_of_thread = end_of_thread;
393428d7b3dSmrg
394428d7b3dSmrg		if (p->gen >= 060) {
395428d7b3dSmrg			/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
396428d7b3dSmrg			inst->header.destreg__conditionalmod = sfid;
397428d7b3dSmrg		} else {
398428d7b3dSmrg			/* Set Extended Message Descriptor (ex_desc) */
399428d7b3dSmrg			inst->bits2.send_gen5.sfid = sfid;
400428d7b3dSmrg			inst->bits2.send_gen5.end_of_thread = end_of_thread;
401428d7b3dSmrg		}
402428d7b3dSmrg	} else {
403428d7b3dSmrg		inst->bits3.generic.response_length = response_length;
404428d7b3dSmrg		inst->bits3.generic.msg_length = msg_length;
405428d7b3dSmrg		inst->bits3.generic.msg_target = sfid;
406428d7b3dSmrg		inst->bits3.generic.end_of_thread = end_of_thread;
407428d7b3dSmrg	}
408428d7b3dSmrg}
409428d7b3dSmrg
410428d7b3dSmrg
411428d7b3dSmrgstatic void brw_set_math_message(struct brw_compile *p,
412428d7b3dSmrg				 struct brw_instruction *insn,
413428d7b3dSmrg				 unsigned function,
414428d7b3dSmrg				 unsigned integer_type,
415428d7b3dSmrg				 bool low_precision,
416428d7b3dSmrg				 bool saturate,
417428d7b3dSmrg				 unsigned dataType)
418428d7b3dSmrg{
419428d7b3dSmrg	unsigned msg_length;
420428d7b3dSmrg	unsigned response_length;
421428d7b3dSmrg
422428d7b3dSmrg	/* Infer message length from the function */
423428d7b3dSmrg	switch (function) {
424428d7b3dSmrg	case BRW_MATH_FUNCTION_POW:
425428d7b3dSmrg	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
426428d7b3dSmrg	case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
427428d7b3dSmrg	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
428428d7b3dSmrg		msg_length = 2;
429428d7b3dSmrg		break;
430428d7b3dSmrg	default:
431428d7b3dSmrg		msg_length = 1;
432428d7b3dSmrg		break;
433428d7b3dSmrg	}
434428d7b3dSmrg
435428d7b3dSmrg	/* Infer response length from the function */
436428d7b3dSmrg	switch (function) {
437428d7b3dSmrg	case BRW_MATH_FUNCTION_SINCOS:
438428d7b3dSmrg	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
439428d7b3dSmrg		response_length = 2;
440428d7b3dSmrg		break;
441428d7b3dSmrg	default:
442428d7b3dSmrg		response_length = 1;
443428d7b3dSmrg		break;
444428d7b3dSmrg	}
445428d7b3dSmrg
446428d7b3dSmrg	brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
447428d7b3dSmrg				   msg_length, response_length,
448428d7b3dSmrg				   false, false);
449428d7b3dSmrg	if (p->gen == 050) {
450428d7b3dSmrg		insn->bits3.math_gen5.function = function;
451428d7b3dSmrg		insn->bits3.math_gen5.int_type = integer_type;
452428d7b3dSmrg		insn->bits3.math_gen5.precision = low_precision;
453428d7b3dSmrg		insn->bits3.math_gen5.saturate = saturate;
454428d7b3dSmrg		insn->bits3.math_gen5.data_type = dataType;
455428d7b3dSmrg		insn->bits3.math_gen5.snapshot = 0;
456428d7b3dSmrg	} else {
457428d7b3dSmrg		insn->bits3.math.function = function;
458428d7b3dSmrg		insn->bits3.math.int_type = integer_type;
459428d7b3dSmrg		insn->bits3.math.precision = low_precision;
460428d7b3dSmrg		insn->bits3.math.saturate = saturate;
461428d7b3dSmrg		insn->bits3.math.data_type = dataType;
462428d7b3dSmrg	}
463428d7b3dSmrg}
464428d7b3dSmrg
465428d7b3dSmrgstatic void brw_set_ff_sync_message(struct brw_compile *p,
466428d7b3dSmrg				    struct brw_instruction *insn,
467428d7b3dSmrg				    bool allocate,
468428d7b3dSmrg				    unsigned response_length,
469428d7b3dSmrg				    bool end_of_thread)
470428d7b3dSmrg{
471428d7b3dSmrg	brw_set_message_descriptor(p, insn, BRW_SFID_URB,
472428d7b3dSmrg				   1, response_length,
473428d7b3dSmrg				   true, end_of_thread);
474428d7b3dSmrg	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
475428d7b3dSmrg	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
476428d7b3dSmrg	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
477428d7b3dSmrg	insn->bits3.urb_gen5.allocate = allocate;
478428d7b3dSmrg	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
479428d7b3dSmrg	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
480428d7b3dSmrg}
481428d7b3dSmrg
482428d7b3dSmrgstatic void brw_set_urb_message(struct brw_compile *p,
483428d7b3dSmrg				struct brw_instruction *insn,
484428d7b3dSmrg				bool allocate,
485428d7b3dSmrg				bool used,
486428d7b3dSmrg				unsigned msg_length,
487428d7b3dSmrg				unsigned response_length,
488428d7b3dSmrg				bool end_of_thread,
489428d7b3dSmrg				bool complete,
490428d7b3dSmrg				unsigned offset,
491428d7b3dSmrg				unsigned swizzle_control)
492428d7b3dSmrg{
493428d7b3dSmrg	brw_set_message_descriptor(p, insn, BRW_SFID_URB,
494428d7b3dSmrg				   msg_length, response_length, true, end_of_thread);
495428d7b3dSmrg	if (p->gen >= 070) {
496428d7b3dSmrg		insn->bits3.urb_gen7.opcode = 0;	/* URB_WRITE_HWORD */
497428d7b3dSmrg		insn->bits3.urb_gen7.offset = offset;
498428d7b3dSmrg		assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
499428d7b3dSmrg		insn->bits3.urb_gen7.swizzle_control = swizzle_control;
500428d7b3dSmrg		/* per_slot_offset = 0 makes it ignore offsets in message header */
501428d7b3dSmrg		insn->bits3.urb_gen7.per_slot_offset = 0;
502428d7b3dSmrg		insn->bits3.urb_gen7.complete = complete;
503428d7b3dSmrg	} else if (p->gen >= 050) {
504428d7b3dSmrg		insn->bits3.urb_gen5.opcode = 0;	/* URB_WRITE */
505428d7b3dSmrg		insn->bits3.urb_gen5.offset = offset;
506428d7b3dSmrg		insn->bits3.urb_gen5.swizzle_control = swizzle_control;
507428d7b3dSmrg		insn->bits3.urb_gen5.allocate = allocate;
508428d7b3dSmrg		insn->bits3.urb_gen5.used = used;	/* ? */
509428d7b3dSmrg		insn->bits3.urb_gen5.complete = complete;
510428d7b3dSmrg	} else {
511428d7b3dSmrg		insn->bits3.urb.opcode = 0;	/* ? */
512428d7b3dSmrg		insn->bits3.urb.offset = offset;
513428d7b3dSmrg		insn->bits3.urb.swizzle_control = swizzle_control;
514428d7b3dSmrg		insn->bits3.urb.allocate = allocate;
515428d7b3dSmrg		insn->bits3.urb.used = used;	/* ? */
516428d7b3dSmrg		insn->bits3.urb.complete = complete;
517428d7b3dSmrg	}
518428d7b3dSmrg}
519428d7b3dSmrg
520428d7b3dSmrgvoid
521428d7b3dSmrgbrw_set_dp_write_message(struct brw_compile *p,
522428d7b3dSmrg			 struct brw_instruction *insn,
523428d7b3dSmrg			 unsigned binding_table_index,
524428d7b3dSmrg			 unsigned msg_control,
525428d7b3dSmrg			 unsigned msg_type,
526428d7b3dSmrg			 unsigned msg_length,
527428d7b3dSmrg			 bool header_present,
528428d7b3dSmrg			 bool last_render_target,
529428d7b3dSmrg			 unsigned response_length,
530428d7b3dSmrg			 bool end_of_thread,
531428d7b3dSmrg			 bool send_commit_msg)
532428d7b3dSmrg{
533428d7b3dSmrg	unsigned sfid;
534428d7b3dSmrg
535428d7b3dSmrg	if (p->gen >= 070) {
536428d7b3dSmrg		/* Use the Render Cache for RT writes; otherwise use the Data Cache */
537428d7b3dSmrg		if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
538428d7b3dSmrg			sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
539428d7b3dSmrg		else
540428d7b3dSmrg			sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
541428d7b3dSmrg	} else if (p->gen >= 060) {
542428d7b3dSmrg		/* Use the render cache for all write messages. */
543428d7b3dSmrg		sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
544428d7b3dSmrg	} else {
545428d7b3dSmrg		sfid = BRW_SFID_DATAPORT_WRITE;
546428d7b3dSmrg	}
547428d7b3dSmrg
548428d7b3dSmrg	brw_set_message_descriptor(p, insn, sfid,
549428d7b3dSmrg				   msg_length, response_length,
550428d7b3dSmrg				   header_present, end_of_thread);
551428d7b3dSmrg
552428d7b3dSmrg	if (p->gen >= 070) {
553428d7b3dSmrg		insn->bits3.gen7_dp.binding_table_index = binding_table_index;
554428d7b3dSmrg		insn->bits3.gen7_dp.msg_control = msg_control;
555428d7b3dSmrg		insn->bits3.gen7_dp.last_render_target = last_render_target;
556428d7b3dSmrg		insn->bits3.gen7_dp.msg_type = msg_type;
557428d7b3dSmrg	} else if (p->gen >= 060) {
558428d7b3dSmrg		insn->bits3.gen6_dp.binding_table_index = binding_table_index;
559428d7b3dSmrg		insn->bits3.gen6_dp.msg_control = msg_control;
560428d7b3dSmrg		insn->bits3.gen6_dp.last_render_target = last_render_target;
561428d7b3dSmrg		insn->bits3.gen6_dp.msg_type = msg_type;
562428d7b3dSmrg		insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
563428d7b3dSmrg	} else if (p->gen >= 050) {
564428d7b3dSmrg		insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
565428d7b3dSmrg		insn->bits3.dp_write_gen5.msg_control = msg_control;
566428d7b3dSmrg		insn->bits3.dp_write_gen5.last_render_target = last_render_target;
567428d7b3dSmrg		insn->bits3.dp_write_gen5.msg_type = msg_type;
568428d7b3dSmrg		insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
569428d7b3dSmrg	} else {
570428d7b3dSmrg		insn->bits3.dp_write.binding_table_index = binding_table_index;
571428d7b3dSmrg		insn->bits3.dp_write.msg_control = msg_control;
572428d7b3dSmrg		insn->bits3.dp_write.last_render_target = last_render_target;
573428d7b3dSmrg		insn->bits3.dp_write.msg_type = msg_type;
574428d7b3dSmrg		insn->bits3.dp_write.send_commit_msg = send_commit_msg;
575428d7b3dSmrg	}
576428d7b3dSmrg}
577428d7b3dSmrg
578428d7b3dSmrgvoid
579428d7b3dSmrgbrw_set_dp_read_message(struct brw_compile *p,
580428d7b3dSmrg			struct brw_instruction *insn,
581428d7b3dSmrg			unsigned binding_table_index,
582428d7b3dSmrg			unsigned msg_control,
583428d7b3dSmrg			unsigned msg_type,
584428d7b3dSmrg			unsigned target_cache,
585428d7b3dSmrg			unsigned msg_length,
586428d7b3dSmrg			unsigned response_length)
587428d7b3dSmrg{
588428d7b3dSmrg	unsigned sfid;
589428d7b3dSmrg
590428d7b3dSmrg	if (p->gen >= 070) {
591428d7b3dSmrg		sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
592428d7b3dSmrg	} else if (p->gen >= 060) {
593428d7b3dSmrg		if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
594428d7b3dSmrg			sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
595428d7b3dSmrg		else
596428d7b3dSmrg			sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
597428d7b3dSmrg	} else {
598428d7b3dSmrg		sfid = BRW_SFID_DATAPORT_READ;
599428d7b3dSmrg	}
600428d7b3dSmrg
601428d7b3dSmrg	brw_set_message_descriptor(p, insn, sfid,
602428d7b3dSmrg				   msg_length, response_length,
603428d7b3dSmrg				   true, false);
604428d7b3dSmrg
605428d7b3dSmrg	if (p->gen >= 070) {
606428d7b3dSmrg		insn->bits3.gen7_dp.binding_table_index = binding_table_index;
607428d7b3dSmrg		insn->bits3.gen7_dp.msg_control = msg_control;
608428d7b3dSmrg		insn->bits3.gen7_dp.last_render_target = 0;
609428d7b3dSmrg		insn->bits3.gen7_dp.msg_type = msg_type;
610428d7b3dSmrg	} else if (p->gen >= 060) {
611428d7b3dSmrg		insn->bits3.gen6_dp.binding_table_index = binding_table_index;
612428d7b3dSmrg		insn->bits3.gen6_dp.msg_control = msg_control;
613428d7b3dSmrg		insn->bits3.gen6_dp.last_render_target = 0;
614428d7b3dSmrg		insn->bits3.gen6_dp.msg_type = msg_type;
615428d7b3dSmrg		insn->bits3.gen6_dp.send_commit_msg = 0;
616428d7b3dSmrg	} else if (p->gen >= 050) {
617428d7b3dSmrg		insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
618428d7b3dSmrg		insn->bits3.dp_read_gen5.msg_control = msg_control;
619428d7b3dSmrg		insn->bits3.dp_read_gen5.msg_type = msg_type;
620428d7b3dSmrg		insn->bits3.dp_read_gen5.target_cache = target_cache;
621428d7b3dSmrg	} else if (p->gen >= 045) {
622428d7b3dSmrg		insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
623428d7b3dSmrg		insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
624428d7b3dSmrg		insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
625428d7b3dSmrg		insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
626428d7b3dSmrg	} else {
627428d7b3dSmrg		insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
628428d7b3dSmrg		insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
629428d7b3dSmrg		insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
630428d7b3dSmrg		insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
631428d7b3dSmrg	}
632428d7b3dSmrg}
633428d7b3dSmrg
634428d7b3dSmrgstatic void brw_set_sampler_message(struct brw_compile *p,
635428d7b3dSmrg                                    struct brw_instruction *insn,
636428d7b3dSmrg                                    unsigned binding_table_index,
637428d7b3dSmrg                                    unsigned sampler,
638428d7b3dSmrg                                    unsigned msg_type,
639428d7b3dSmrg                                    unsigned response_length,
640428d7b3dSmrg                                    unsigned msg_length,
641428d7b3dSmrg                                    bool header_present,
642428d7b3dSmrg                                    unsigned simd_mode)
643428d7b3dSmrg{
644428d7b3dSmrg	brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
645428d7b3dSmrg				   msg_length, response_length,
646428d7b3dSmrg				   header_present, false);
647428d7b3dSmrg
648428d7b3dSmrg	if (p->gen >= 070) {
649428d7b3dSmrg		insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
650428d7b3dSmrg		insn->bits3.sampler_gen7.sampler = sampler;
651428d7b3dSmrg		insn->bits3.sampler_gen7.msg_type = msg_type;
652428d7b3dSmrg		insn->bits3.sampler_gen7.simd_mode = simd_mode;
653428d7b3dSmrg	} else if (p->gen >= 050) {
654428d7b3dSmrg		insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
655428d7b3dSmrg		insn->bits3.sampler_gen5.sampler = sampler;
656428d7b3dSmrg		insn->bits3.sampler_gen5.msg_type = msg_type;
657428d7b3dSmrg		insn->bits3.sampler_gen5.simd_mode = simd_mode;
658428d7b3dSmrg	} else if (p->gen >= 045) {
659428d7b3dSmrg		insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
660428d7b3dSmrg		insn->bits3.sampler_g4x.sampler = sampler;
661428d7b3dSmrg		insn->bits3.sampler_g4x.msg_type = msg_type;
662428d7b3dSmrg	} else {
663428d7b3dSmrg		insn->bits3.sampler.binding_table_index = binding_table_index;
664428d7b3dSmrg		insn->bits3.sampler.sampler = sampler;
665428d7b3dSmrg		insn->bits3.sampler.msg_type = msg_type;
666428d7b3dSmrg		insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
667428d7b3dSmrg	}
668428d7b3dSmrg}
669428d7b3dSmrg
670428d7b3dSmrg
671428d7b3dSmrgvoid brw_NOP(struct brw_compile *p)
672428d7b3dSmrg{
673428d7b3dSmrg	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
674428d7b3dSmrg	brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
675428d7b3dSmrg	brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
676428d7b3dSmrg	brw_set_src1(p, insn, brw_imm_ud(0x0));
677428d7b3dSmrg}
678428d7b3dSmrg
679428d7b3dSmrg/***********************************************************************
680428d7b3dSmrg * Comparisons, if/else/endif
681428d7b3dSmrg */
682428d7b3dSmrg
683428d7b3dSmrgstatic void
684428d7b3dSmrgpush_if_stack(struct brw_compile *p, struct brw_instruction *inst)
685428d7b3dSmrg{
686428d7b3dSmrg	p->if_stack[p->if_stack_depth] = inst;
687428d7b3dSmrg
688428d7b3dSmrg	p->if_stack_depth++;
689428d7b3dSmrg	if (p->if_stack_array_size <= p->if_stack_depth) {
690428d7b3dSmrg		p->if_stack_array_size *= 2;
691428d7b3dSmrg		p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
692428d7b3dSmrg	}
693428d7b3dSmrg}
694428d7b3dSmrg
695428d7b3dSmrg/* EU takes the value from the flag register and pushes it onto some
696428d7b3dSmrg * sort of a stack (presumably merging with any flag value already on
697428d7b3dSmrg * the stack).  Within an if block, the flags at the top of the stack
698428d7b3dSmrg * control execution on each channel of the unit, eg. on each of the
699428d7b3dSmrg * 16 pixel values in our wm programs.
700428d7b3dSmrg *
701428d7b3dSmrg * When the matching 'else' instruction is reached (presumably by
702428d7b3dSmrg * countdown of the instruction count patched in by our ELSE/ENDIF
703428d7b3dSmrg * functions), the relevent flags are inverted.
704428d7b3dSmrg *
705428d7b3dSmrg * When the matching 'endif' instruction is reached, the flags are
706428d7b3dSmrg * popped off.  If the stack is now empty, normal execution resumes.
707428d7b3dSmrg */
708428d7b3dSmrgstruct brw_instruction *
709428d7b3dSmrgbrw_IF(struct brw_compile *p, unsigned execute_size)
710428d7b3dSmrg{
711428d7b3dSmrg	struct brw_instruction *insn;
712428d7b3dSmrg
713428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_IF);
714428d7b3dSmrg
715428d7b3dSmrg	/* Override the defaults for this instruction: */
716428d7b3dSmrg	if (p->gen < 060) {
717428d7b3dSmrg		brw_set_dest(p, insn, brw_ip_reg());
718428d7b3dSmrg		brw_set_src0(p, insn, brw_ip_reg());
719428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_d(0x0));
720428d7b3dSmrg	} else if (p->gen < 070) {
721428d7b3dSmrg		brw_set_dest(p, insn, brw_imm_w(0));
722428d7b3dSmrg		insn->bits1.branch_gen6.jump_count = 0;
723428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
724428d7b3dSmrg		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
725428d7b3dSmrg	} else {
726428d7b3dSmrg		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
727428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
728428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_ud(0));
729428d7b3dSmrg		insn->bits3.break_cont.jip = 0;
730428d7b3dSmrg		insn->bits3.break_cont.uip = 0;
731428d7b3dSmrg	}
732428d7b3dSmrg
733428d7b3dSmrg	insn->header.execution_size = execute_size;
734428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
735428d7b3dSmrg	insn->header.predicate_control = BRW_PREDICATE_NORMAL;
736428d7b3dSmrg	insn->header.mask_control = BRW_MASK_ENABLE;
737428d7b3dSmrg	if (!p->single_program_flow)
738428d7b3dSmrg		insn->header.thread_control = BRW_THREAD_SWITCH;
739428d7b3dSmrg
740428d7b3dSmrg	p->current->header.predicate_control = BRW_PREDICATE_NONE;
741428d7b3dSmrg
742428d7b3dSmrg	push_if_stack(p, insn);
743428d7b3dSmrg	return insn;
744428d7b3dSmrg}
745428d7b3dSmrg
746428d7b3dSmrg/* This function is only used for gen6-style IF instructions with an
747428d7b3dSmrg * embedded comparison (conditional modifier).  It is not used on gen7.
748428d7b3dSmrg */
749428d7b3dSmrgstruct brw_instruction *
750428d7b3dSmrggen6_IF(struct brw_compile *p, uint32_t conditional,
751428d7b3dSmrg	struct brw_reg src0, struct brw_reg src1)
752428d7b3dSmrg{
753428d7b3dSmrg	struct brw_instruction *insn;
754428d7b3dSmrg
755428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_IF);
756428d7b3dSmrg
757428d7b3dSmrg	brw_set_dest(p, insn, brw_imm_w(0));
758428d7b3dSmrg	if (p->compressed) {
759428d7b3dSmrg		insn->header.execution_size = BRW_EXECUTE_16;
760428d7b3dSmrg	} else {
761428d7b3dSmrg		insn->header.execution_size = BRW_EXECUTE_8;
762428d7b3dSmrg	}
763428d7b3dSmrg	insn->bits1.branch_gen6.jump_count = 0;
764428d7b3dSmrg	brw_set_src0(p, insn, src0);
765428d7b3dSmrg	brw_set_src1(p, insn, src1);
766428d7b3dSmrg
767428d7b3dSmrg	assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
768428d7b3dSmrg	assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
769428d7b3dSmrg	insn->header.destreg__conditionalmod = conditional;
770428d7b3dSmrg
771428d7b3dSmrg	if (!p->single_program_flow)
772428d7b3dSmrg		insn->header.thread_control = BRW_THREAD_SWITCH;
773428d7b3dSmrg
774428d7b3dSmrg	push_if_stack(p, insn);
775428d7b3dSmrg	return insn;
776428d7b3dSmrg}
777428d7b3dSmrg
778428d7b3dSmrg/**
779428d7b3dSmrg * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
780428d7b3dSmrg */
781428d7b3dSmrgstatic void
782428d7b3dSmrgconvert_IF_ELSE_to_ADD(struct brw_compile *p,
783428d7b3dSmrg		       struct brw_instruction *if_inst,
784428d7b3dSmrg		       struct brw_instruction *else_inst)
785428d7b3dSmrg{
786428d7b3dSmrg	/* The next instruction (where the ENDIF would be, if it existed) */
787428d7b3dSmrg	struct brw_instruction *next_inst = &p->store[p->nr_insn];
788428d7b3dSmrg
789428d7b3dSmrg	assert(p->single_program_flow);
790428d7b3dSmrg	assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
791428d7b3dSmrg	assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
792428d7b3dSmrg	assert(if_inst->header.execution_size == BRW_EXECUTE_1);
793428d7b3dSmrg
794428d7b3dSmrg	/* Convert IF to an ADD instruction that moves the instruction pointer
795428d7b3dSmrg	 * to the first instruction of the ELSE block.  If there is no ELSE
796428d7b3dSmrg	 * block, point to where ENDIF would be.  Reverse the predicate.
797428d7b3dSmrg	 *
798428d7b3dSmrg	 * There's no need to execute an ENDIF since we don't need to do any
799428d7b3dSmrg	 * stack operations, and if we're currently executing, we just want to
800428d7b3dSmrg	 * continue normally.
801428d7b3dSmrg	 */
802428d7b3dSmrg	if_inst->header.opcode = BRW_OPCODE_ADD;
803428d7b3dSmrg	if_inst->header.predicate_inverse = 1;
804428d7b3dSmrg
805428d7b3dSmrg	if (else_inst != NULL) {
806428d7b3dSmrg		/* Convert ELSE to an ADD instruction that points where the ENDIF
807428d7b3dSmrg		 * would be.
808428d7b3dSmrg		 */
809428d7b3dSmrg		else_inst->header.opcode = BRW_OPCODE_ADD;
810428d7b3dSmrg
811428d7b3dSmrg		if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
812428d7b3dSmrg		else_inst->bits3.ud = (next_inst - else_inst) * 16;
813428d7b3dSmrg	} else {
814428d7b3dSmrg		if_inst->bits3.ud = (next_inst - if_inst) * 16;
815428d7b3dSmrg	}
816428d7b3dSmrg}
817428d7b3dSmrg
818428d7b3dSmrg/**
819428d7b3dSmrg * Patch IF and ELSE instructions with appropriate jump targets.
820428d7b3dSmrg */
821428d7b3dSmrgstatic void
822428d7b3dSmrgpatch_IF_ELSE(struct brw_compile *p,
823428d7b3dSmrg	      struct brw_instruction *if_inst,
824428d7b3dSmrg	      struct brw_instruction *else_inst,
825428d7b3dSmrg	      struct brw_instruction *endif_inst)
826428d7b3dSmrg{
827428d7b3dSmrg	unsigned br = 1;
828428d7b3dSmrg
829428d7b3dSmrg	assert(!p->single_program_flow);
830428d7b3dSmrg	assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
831428d7b3dSmrg	assert(endif_inst != NULL);
832428d7b3dSmrg	assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
833428d7b3dSmrg
834428d7b3dSmrg	/* Jump count is for 64bit data chunk each, so one 128bit instruction
835428d7b3dSmrg	 * requires 2 chunks.
836428d7b3dSmrg	 */
837428d7b3dSmrg	if (p->gen >= 050)
838428d7b3dSmrg		br = 2;
839428d7b3dSmrg
840428d7b3dSmrg	assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
841428d7b3dSmrg	endif_inst->header.execution_size = if_inst->header.execution_size;
842428d7b3dSmrg
843428d7b3dSmrg	if (else_inst == NULL) {
844428d7b3dSmrg		/* Patch IF -> ENDIF */
845428d7b3dSmrg		if (p->gen < 060) {
846428d7b3dSmrg			/* Turn it into an IFF, which means no mask stack operations for
847428d7b3dSmrg			 * all-false and jumping past the ENDIF.
848428d7b3dSmrg			 */
849428d7b3dSmrg			if_inst->header.opcode = BRW_OPCODE_IFF;
850428d7b3dSmrg			if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
851428d7b3dSmrg			if_inst->bits3.if_else.pop_count = 0;
852428d7b3dSmrg			if_inst->bits3.if_else.pad0 = 0;
853428d7b3dSmrg		} else if (p->gen < 070) {
854428d7b3dSmrg			/* As of gen6, there is no IFF and IF must point to the ENDIF. */
855428d7b3dSmrg			if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
856428d7b3dSmrg		} else {
857428d7b3dSmrg			if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
858428d7b3dSmrg			if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
859428d7b3dSmrg		}
860428d7b3dSmrg	} else {
861428d7b3dSmrg		else_inst->header.execution_size = if_inst->header.execution_size;
862428d7b3dSmrg
863428d7b3dSmrg		/* Patch IF -> ELSE */
864428d7b3dSmrg		if (p->gen < 060) {
865428d7b3dSmrg			if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
866428d7b3dSmrg			if_inst->bits3.if_else.pop_count = 0;
867428d7b3dSmrg			if_inst->bits3.if_else.pad0 = 0;
868428d7b3dSmrg		} else if (p->gen <= 070) {
869428d7b3dSmrg			if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
870428d7b3dSmrg		}
871428d7b3dSmrg
872428d7b3dSmrg		/* Patch ELSE -> ENDIF */
873428d7b3dSmrg		if (p->gen < 060) {
874428d7b3dSmrg			/* BRW_OPCODE_ELSE pre-gen6 should point just past the
875428d7b3dSmrg			 * matching ENDIF.
876428d7b3dSmrg			 */
877428d7b3dSmrg			else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
878428d7b3dSmrg			else_inst->bits3.if_else.pop_count = 1;
879428d7b3dSmrg			else_inst->bits3.if_else.pad0 = 0;
880428d7b3dSmrg		} else if (p->gen < 070) {
881428d7b3dSmrg			/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
882428d7b3dSmrg			else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
883428d7b3dSmrg		} else {
884428d7b3dSmrg			/* The IF instruction's JIP should point just past the ELSE */
885428d7b3dSmrg			if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
886428d7b3dSmrg			/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
887428d7b3dSmrg			if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
888428d7b3dSmrg			else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
889428d7b3dSmrg		}
890428d7b3dSmrg	}
891428d7b3dSmrg}
892428d7b3dSmrg
893428d7b3dSmrgvoid
894428d7b3dSmrgbrw_ELSE(struct brw_compile *p)
895428d7b3dSmrg{
896428d7b3dSmrg	struct brw_instruction *insn;
897428d7b3dSmrg
898428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_ELSE);
899428d7b3dSmrg
900428d7b3dSmrg	if (p->gen < 060) {
901428d7b3dSmrg		brw_set_dest(p, insn, brw_ip_reg());
902428d7b3dSmrg		brw_set_src0(p, insn, brw_ip_reg());
903428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_d(0x0));
904428d7b3dSmrg	} else if (p->gen < 070) {
905428d7b3dSmrg		brw_set_dest(p, insn, brw_imm_w(0));
906428d7b3dSmrg		insn->bits1.branch_gen6.jump_count = 0;
907428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
908428d7b3dSmrg		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
909428d7b3dSmrg	} else {
910428d7b3dSmrg		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
911428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
912428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_ud(0));
913428d7b3dSmrg		insn->bits3.break_cont.jip = 0;
914428d7b3dSmrg		insn->bits3.break_cont.uip = 0;
915428d7b3dSmrg	}
916428d7b3dSmrg
917428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
918428d7b3dSmrg	insn->header.mask_control = BRW_MASK_ENABLE;
919428d7b3dSmrg	if (!p->single_program_flow)
920428d7b3dSmrg		insn->header.thread_control = BRW_THREAD_SWITCH;
921428d7b3dSmrg
922428d7b3dSmrg	push_if_stack(p, insn);
923428d7b3dSmrg}
924428d7b3dSmrg
925428d7b3dSmrgvoid
926428d7b3dSmrgbrw_ENDIF(struct brw_compile *p)
927428d7b3dSmrg{
928428d7b3dSmrg	struct brw_instruction *insn;
929428d7b3dSmrg	struct brw_instruction *else_inst = NULL;
930428d7b3dSmrg	struct brw_instruction *if_inst = NULL;
931428d7b3dSmrg
932428d7b3dSmrg	/* Pop the IF and (optional) ELSE instructions from the stack */
933428d7b3dSmrg	p->if_stack_depth--;
934428d7b3dSmrg	if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
935428d7b3dSmrg		else_inst = p->if_stack[p->if_stack_depth];
936428d7b3dSmrg		p->if_stack_depth--;
937428d7b3dSmrg	}
938428d7b3dSmrg	if_inst = p->if_stack[p->if_stack_depth];
939428d7b3dSmrg
940428d7b3dSmrg	if (p->single_program_flow) {
941428d7b3dSmrg		/* ENDIF is useless; don't bother emitting it. */
942428d7b3dSmrg		convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
943428d7b3dSmrg		return;
944428d7b3dSmrg	}
945428d7b3dSmrg
946428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
947428d7b3dSmrg
948428d7b3dSmrg	if (p->gen < 060) {
949428d7b3dSmrg		brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
950428d7b3dSmrg		brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
951428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_d(0x0));
952428d7b3dSmrg	} else if (p->gen < 070) {
953428d7b3dSmrg		brw_set_dest(p, insn, brw_imm_w(0));
954428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
955428d7b3dSmrg		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
956428d7b3dSmrg	} else {
957428d7b3dSmrg		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
958428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
959428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_ud(0));
960428d7b3dSmrg	}
961428d7b3dSmrg
962428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
963428d7b3dSmrg	insn->header.mask_control = BRW_MASK_ENABLE;
964428d7b3dSmrg	insn->header.thread_control = BRW_THREAD_SWITCH;
965428d7b3dSmrg
966428d7b3dSmrg	/* Also pop item off the stack in the endif instruction: */
967428d7b3dSmrg	if (p->gen < 060) {
968428d7b3dSmrg		insn->bits3.if_else.jump_count = 0;
969428d7b3dSmrg		insn->bits3.if_else.pop_count = 1;
970428d7b3dSmrg		insn->bits3.if_else.pad0 = 0;
971428d7b3dSmrg	} else if (p->gen < 070) {
972428d7b3dSmrg		insn->bits1.branch_gen6.jump_count = 2;
973428d7b3dSmrg	} else {
974428d7b3dSmrg		insn->bits3.break_cont.jip = 2;
975428d7b3dSmrg	}
976428d7b3dSmrg	patch_IF_ELSE(p, if_inst, else_inst, insn);
977428d7b3dSmrg}
978428d7b3dSmrg
979428d7b3dSmrgstruct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
980428d7b3dSmrg{
981428d7b3dSmrg	struct brw_instruction *insn;
982428d7b3dSmrg
983428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_BREAK);
984428d7b3dSmrg	if (p->gen >= 060) {
985428d7b3dSmrg		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
986428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
987428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_d(0x0));
988428d7b3dSmrg	} else {
989428d7b3dSmrg		brw_set_dest(p, insn, brw_ip_reg());
990428d7b3dSmrg		brw_set_src0(p, insn, brw_ip_reg());
991428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_d(0x0));
992428d7b3dSmrg		insn->bits3.if_else.pad0 = 0;
993428d7b3dSmrg		insn->bits3.if_else.pop_count = pop_count;
994428d7b3dSmrg	}
995428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
996428d7b3dSmrg	insn->header.execution_size = BRW_EXECUTE_8;
997428d7b3dSmrg
998428d7b3dSmrg	return insn;
999428d7b3dSmrg}
1000428d7b3dSmrg
1001428d7b3dSmrgstruct brw_instruction *gen6_CONT(struct brw_compile *p,
1002428d7b3dSmrg				  struct brw_instruction *do_insn)
1003428d7b3dSmrg{
1004428d7b3dSmrg	struct brw_instruction *insn;
1005428d7b3dSmrg
1006428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1007428d7b3dSmrg	brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1008428d7b3dSmrg	brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1009428d7b3dSmrg	brw_set_dest(p, insn, brw_ip_reg());
1010428d7b3dSmrg	brw_set_src0(p, insn, brw_ip_reg());
1011428d7b3dSmrg	brw_set_src1(p, insn, brw_imm_d(0x0));
1012428d7b3dSmrg
1013428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
1014428d7b3dSmrg	insn->header.execution_size = BRW_EXECUTE_8;
1015428d7b3dSmrg	return insn;
1016428d7b3dSmrg}
1017428d7b3dSmrg
1018428d7b3dSmrgstruct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1019428d7b3dSmrg{
1020428d7b3dSmrg	struct brw_instruction *insn;
1021428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1022428d7b3dSmrg	brw_set_dest(p, insn, brw_ip_reg());
1023428d7b3dSmrg	brw_set_src0(p, insn, brw_ip_reg());
1024428d7b3dSmrg	brw_set_src1(p, insn, brw_imm_d(0x0));
1025428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
1026428d7b3dSmrg	insn->header.execution_size = BRW_EXECUTE_8;
1027428d7b3dSmrg	/* insn->header.mask_control = BRW_MASK_DISABLE; */
1028428d7b3dSmrg	insn->bits3.if_else.pad0 = 0;
1029428d7b3dSmrg	insn->bits3.if_else.pop_count = pop_count;
1030428d7b3dSmrg	return insn;
1031428d7b3dSmrg}
1032428d7b3dSmrg
1033428d7b3dSmrg/* DO/WHILE loop:
1034428d7b3dSmrg *
1035428d7b3dSmrg * The DO/WHILE is just an unterminated loop -- break or continue are
1036428d7b3dSmrg * used for control within the loop.  We have a few ways they can be
1037428d7b3dSmrg * done.
1038428d7b3dSmrg *
1039428d7b3dSmrg * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1040428d7b3dSmrg * jip and no DO instruction.
1041428d7b3dSmrg *
1042428d7b3dSmrg * For non-uniform control flow pre-gen6, there's a DO instruction to
1043428d7b3dSmrg * push the mask, and a WHILE to jump back, and BREAK to get out and
1044428d7b3dSmrg * pop the mask.
1045428d7b3dSmrg *
1046428d7b3dSmrg * For gen6, there's no more mask stack, so no need for DO.  WHILE
1047428d7b3dSmrg * just points back to the first instruction of the loop.
1048428d7b3dSmrg */
1049428d7b3dSmrgstruct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1050428d7b3dSmrg{
1051428d7b3dSmrg	if (p->gen >= 060 || p->single_program_flow) {
1052428d7b3dSmrg		return &p->store[p->nr_insn];
1053428d7b3dSmrg	} else {
1054428d7b3dSmrg		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
1055428d7b3dSmrg
1056428d7b3dSmrg		/* Override the defaults for this instruction:
1057428d7b3dSmrg		*/
1058428d7b3dSmrg		brw_set_dest(p, insn, brw_null_reg());
1059428d7b3dSmrg		brw_set_src0(p, insn, brw_null_reg());
1060428d7b3dSmrg		brw_set_src1(p, insn, brw_null_reg());
1061428d7b3dSmrg
1062428d7b3dSmrg		insn->header.compression_control = BRW_COMPRESSION_NONE;
1063428d7b3dSmrg		insn->header.execution_size = execute_size;
1064428d7b3dSmrg		insn->header.predicate_control = BRW_PREDICATE_NONE;
1065428d7b3dSmrg		/* insn->header.mask_control = BRW_MASK_ENABLE; */
1066428d7b3dSmrg		/* insn->header.mask_control = BRW_MASK_DISABLE; */
1067428d7b3dSmrg
1068428d7b3dSmrg		return insn;
1069428d7b3dSmrg	}
1070428d7b3dSmrg}
1071428d7b3dSmrg
1072428d7b3dSmrgstruct brw_instruction *brw_WHILE(struct brw_compile *p,
1073428d7b3dSmrg                                  struct brw_instruction *do_insn)
1074428d7b3dSmrg{
1075428d7b3dSmrg	struct brw_instruction *insn;
1076428d7b3dSmrg	unsigned br = 1;
1077428d7b3dSmrg
1078428d7b3dSmrg	if (p->gen >= 050)
1079428d7b3dSmrg		br = 2;
1080428d7b3dSmrg
1081428d7b3dSmrg	if (p->gen >= 070) {
1082428d7b3dSmrg		insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1083428d7b3dSmrg
1084428d7b3dSmrg		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1085428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1086428d7b3dSmrg		brw_set_src1(p, insn, brw_imm_ud(0));
1087428d7b3dSmrg		insn->bits3.break_cont.jip = br * (do_insn - insn);
1088428d7b3dSmrg
1089428d7b3dSmrg		insn->header.execution_size = BRW_EXECUTE_8;
1090428d7b3dSmrg	} else if (p->gen >= 060) {
1091428d7b3dSmrg		insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1092428d7b3dSmrg
1093428d7b3dSmrg		brw_set_dest(p, insn, brw_imm_w(0));
1094428d7b3dSmrg		insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1095428d7b3dSmrg		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1096428d7b3dSmrg		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
1097428d7b3dSmrg
1098428d7b3dSmrg		insn->header.execution_size = BRW_EXECUTE_8;
1099428d7b3dSmrg	} else {
1100428d7b3dSmrg		if (p->single_program_flow) {
1101428d7b3dSmrg			insn = brw_next_insn(p, BRW_OPCODE_ADD);
1102428d7b3dSmrg
1103428d7b3dSmrg			brw_set_dest(p, insn, brw_ip_reg());
1104428d7b3dSmrg			brw_set_src0(p, insn, brw_ip_reg());
1105428d7b3dSmrg			brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1106428d7b3dSmrg			insn->header.execution_size = BRW_EXECUTE_1;
1107428d7b3dSmrg		} else {
1108428d7b3dSmrg			insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1109428d7b3dSmrg
1110428d7b3dSmrg			assert(do_insn->header.opcode == BRW_OPCODE_DO);
1111428d7b3dSmrg
1112428d7b3dSmrg			brw_set_dest(p, insn, brw_ip_reg());
1113428d7b3dSmrg			brw_set_src0(p, insn, brw_ip_reg());
1114428d7b3dSmrg			brw_set_src1(p, insn, brw_imm_d(0));
1115428d7b3dSmrg
1116428d7b3dSmrg			insn->header.execution_size = do_insn->header.execution_size;
1117428d7b3dSmrg			insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1118428d7b3dSmrg			insn->bits3.if_else.pop_count = 0;
1119428d7b3dSmrg			insn->bits3.if_else.pad0 = 0;
1120428d7b3dSmrg		}
1121428d7b3dSmrg	}
1122428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
1123428d7b3dSmrg	p->current->header.predicate_control = BRW_PREDICATE_NONE;
1124428d7b3dSmrg
1125428d7b3dSmrg	return insn;
1126428d7b3dSmrg}
1127428d7b3dSmrg
1128428d7b3dSmrg/* FORWARD JUMPS:
1129428d7b3dSmrg */
1130428d7b3dSmrgvoid brw_land_fwd_jump(struct brw_compile *p,
1131428d7b3dSmrg		       struct brw_instruction *jmp_insn)
1132428d7b3dSmrg{
1133428d7b3dSmrg	struct brw_instruction *landing = &p->store[p->nr_insn];
1134428d7b3dSmrg	unsigned jmpi = 1;
1135428d7b3dSmrg
1136428d7b3dSmrg	if (p->gen >= 050)
1137428d7b3dSmrg		jmpi = 2;
1138428d7b3dSmrg
1139428d7b3dSmrg	assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1140428d7b3dSmrg	assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1141428d7b3dSmrg
1142428d7b3dSmrg	jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1143428d7b3dSmrg}
1144428d7b3dSmrg
1145428d7b3dSmrg
1146428d7b3dSmrg
1147428d7b3dSmrg/* To integrate with the above, it makes sense that the comparison
1148428d7b3dSmrg * instruction should populate the flag register.  It might be simpler
1149428d7b3dSmrg * just to use the flag reg for most WM tasks?
1150428d7b3dSmrg */
1151428d7b3dSmrgvoid brw_CMP(struct brw_compile *p,
1152428d7b3dSmrg	     struct brw_reg dest,
1153428d7b3dSmrg	     unsigned conditional,
1154428d7b3dSmrg	     struct brw_reg src0,
1155428d7b3dSmrg	     struct brw_reg src1)
1156428d7b3dSmrg{
1157428d7b3dSmrg	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
1158428d7b3dSmrg
1159428d7b3dSmrg	insn->header.destreg__conditionalmod = conditional;
1160428d7b3dSmrg	brw_set_dest(p, insn, dest);
1161428d7b3dSmrg	brw_set_src0(p, insn, src0);
1162428d7b3dSmrg	brw_set_src1(p, insn, src1);
1163428d7b3dSmrg
1164428d7b3dSmrg	/* Make it so that future instructions will use the computed flag
1165428d7b3dSmrg	 * value until brw_set_predicate_control_flag_value() is called
1166428d7b3dSmrg	 * again.
1167428d7b3dSmrg	 */
1168428d7b3dSmrg	if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1169428d7b3dSmrg	    dest.nr == 0) {
1170428d7b3dSmrg		p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1171428d7b3dSmrg		p->flag_value = 0xff;
1172428d7b3dSmrg	}
1173428d7b3dSmrg}
1174428d7b3dSmrg
1175428d7b3dSmrg/* Issue 'wait' instruction for n1, host could program MMIO
1176428d7b3dSmrg   to wake up thread. */
1177428d7b3dSmrgvoid brw_WAIT(struct brw_compile *p)
1178428d7b3dSmrg{
1179428d7b3dSmrg	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
1180428d7b3dSmrg	struct brw_reg src = brw_notification_1_reg();
1181428d7b3dSmrg
1182428d7b3dSmrg	brw_set_dest(p, insn, src);
1183428d7b3dSmrg	brw_set_src0(p, insn, src);
1184428d7b3dSmrg	brw_set_src1(p, insn, brw_null_reg());
1185428d7b3dSmrg	insn->header.execution_size = 0; /* must */
1186428d7b3dSmrg	insn->header.predicate_control = 0;
1187428d7b3dSmrg	insn->header.compression_control = 0;
1188428d7b3dSmrg}
1189428d7b3dSmrg
1190428d7b3dSmrg/***********************************************************************
1191428d7b3dSmrg * Helpers for the various SEND message types:
1192428d7b3dSmrg */
1193428d7b3dSmrg
1194428d7b3dSmrg/** Extended math function, float[8].
1195428d7b3dSmrg */
1196428d7b3dSmrgvoid brw_math(struct brw_compile *p,
1197428d7b3dSmrg	      struct brw_reg dest,
1198428d7b3dSmrg	      unsigned function,
1199428d7b3dSmrg	      unsigned saturate,
1200428d7b3dSmrg	      unsigned msg_reg_nr,
1201428d7b3dSmrg	      struct brw_reg src,
1202428d7b3dSmrg	      unsigned data_type,
1203428d7b3dSmrg	      unsigned precision)
1204428d7b3dSmrg{
1205428d7b3dSmrg	if (p->gen >= 060) {
1206428d7b3dSmrg		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1207428d7b3dSmrg
1208428d7b3dSmrg		assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1209428d7b3dSmrg		assert(src.file == BRW_GENERAL_REGISTER_FILE);
1210428d7b3dSmrg
1211428d7b3dSmrg		assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1212428d7b3dSmrg		assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1213428d7b3dSmrg
1214428d7b3dSmrg		/* Source modifiers are ignored for extended math instructions. */
1215428d7b3dSmrg		assert(!src.negate);
1216428d7b3dSmrg		assert(!src.abs);
1217428d7b3dSmrg
1218428d7b3dSmrg		if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1219428d7b3dSmrg		    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1220428d7b3dSmrg			assert(src.type == BRW_REGISTER_TYPE_F);
1221428d7b3dSmrg		}
1222428d7b3dSmrg
1223428d7b3dSmrg		/* Math is the same ISA format as other opcodes, except that CondModifier
1224428d7b3dSmrg		 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1225428d7b3dSmrg		 */
1226428d7b3dSmrg		insn->header.destreg__conditionalmod = function;
1227428d7b3dSmrg		insn->header.saturate = saturate;
1228428d7b3dSmrg
1229428d7b3dSmrg		brw_set_dest(p, insn, dest);
1230428d7b3dSmrg		brw_set_src0(p, insn, src);
1231428d7b3dSmrg		brw_set_src1(p, insn, brw_null_reg());
1232428d7b3dSmrg	} else {
1233428d7b3dSmrg		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1234428d7b3dSmrg		/* Example code doesn't set predicate_control for send
1235428d7b3dSmrg		 * instructions.
1236428d7b3dSmrg		 */
1237428d7b3dSmrg		insn->header.predicate_control = 0;
1238428d7b3dSmrg		insn->header.destreg__conditionalmod = msg_reg_nr;
1239428d7b3dSmrg
1240428d7b3dSmrg		brw_set_dest(p, insn, dest);
1241428d7b3dSmrg		brw_set_src0(p, insn, src);
1242428d7b3dSmrg		brw_set_math_message(p, insn, function,
1243428d7b3dSmrg				     src.type == BRW_REGISTER_TYPE_D,
1244428d7b3dSmrg				     precision,
1245428d7b3dSmrg				     saturate,
1246428d7b3dSmrg				     data_type);
1247428d7b3dSmrg	}
1248428d7b3dSmrg}
1249428d7b3dSmrg
1250428d7b3dSmrg/** Extended math function, float[8].
1251428d7b3dSmrg */
1252428d7b3dSmrgvoid brw_math2(struct brw_compile *p,
1253428d7b3dSmrg	       struct brw_reg dest,
1254428d7b3dSmrg	       unsigned function,
1255428d7b3dSmrg	       struct brw_reg src0,
1256428d7b3dSmrg	       struct brw_reg src1)
1257428d7b3dSmrg{
1258428d7b3dSmrg	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1259428d7b3dSmrg
1260428d7b3dSmrg	assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1261428d7b3dSmrg	assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1262428d7b3dSmrg	assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1263428d7b3dSmrg
1264428d7b3dSmrg	assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1265428d7b3dSmrg	assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1266428d7b3dSmrg	assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1267428d7b3dSmrg
1268428d7b3dSmrg	if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1269428d7b3dSmrg	    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1270428d7b3dSmrg		assert(src0.type == BRW_REGISTER_TYPE_F);
1271428d7b3dSmrg		assert(src1.type == BRW_REGISTER_TYPE_F);
1272428d7b3dSmrg	}
1273428d7b3dSmrg
1274428d7b3dSmrg	/* Source modifiers are ignored for extended math instructions. */
1275428d7b3dSmrg	assert(!src0.negate);
1276428d7b3dSmrg	assert(!src0.abs);
1277428d7b3dSmrg	assert(!src1.negate);
1278428d7b3dSmrg	assert(!src1.abs);
1279428d7b3dSmrg
1280428d7b3dSmrg	/* Math is the same ISA format as other opcodes, except that CondModifier
1281428d7b3dSmrg	 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1282428d7b3dSmrg	 */
1283428d7b3dSmrg	insn->header.destreg__conditionalmod = function;
1284428d7b3dSmrg
1285428d7b3dSmrg	brw_set_dest(p, insn, dest);
1286428d7b3dSmrg	brw_set_src0(p, insn, src0);
1287428d7b3dSmrg	brw_set_src1(p, insn, src1);
1288428d7b3dSmrg}
1289428d7b3dSmrg
1290428d7b3dSmrg/**
1291428d7b3dSmrg * Extended math function, float[16].
1292428d7b3dSmrg * Use 2 send instructions.
1293428d7b3dSmrg */
1294428d7b3dSmrgvoid brw_math_16(struct brw_compile *p,
1295428d7b3dSmrg		 struct brw_reg dest,
1296428d7b3dSmrg		 unsigned function,
1297428d7b3dSmrg		 unsigned saturate,
1298428d7b3dSmrg		 unsigned msg_reg_nr,
1299428d7b3dSmrg		 struct brw_reg src,
1300428d7b3dSmrg		 unsigned precision)
1301428d7b3dSmrg{
1302428d7b3dSmrg	struct brw_instruction *insn;
1303428d7b3dSmrg
1304428d7b3dSmrg	if (p->gen >= 060) {
1305428d7b3dSmrg		insn = brw_next_insn(p, BRW_OPCODE_MATH);
1306428d7b3dSmrg
1307428d7b3dSmrg		/* Math is the same ISA format as other opcodes, except that CondModifier
1308428d7b3dSmrg		 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1309428d7b3dSmrg		 */
1310428d7b3dSmrg		insn->header.destreg__conditionalmod = function;
1311428d7b3dSmrg		insn->header.saturate = saturate;
1312428d7b3dSmrg
1313428d7b3dSmrg		/* Source modifiers are ignored for extended math instructions. */
1314428d7b3dSmrg		assert(!src.negate);
1315428d7b3dSmrg		assert(!src.abs);
1316428d7b3dSmrg
1317428d7b3dSmrg		brw_set_dest(p, insn, dest);
1318428d7b3dSmrg		brw_set_src0(p, insn, src);
1319428d7b3dSmrg		brw_set_src1(p, insn, brw_null_reg());
1320428d7b3dSmrg		return;
1321428d7b3dSmrg	}
1322428d7b3dSmrg
1323428d7b3dSmrg	/* First instruction:
1324428d7b3dSmrg	*/
1325428d7b3dSmrg	brw_push_insn_state(p);
1326428d7b3dSmrg	brw_set_predicate_control_flag_value(p, 0xff);
1327428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1328428d7b3dSmrg
1329428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1330428d7b3dSmrg	insn->header.destreg__conditionalmod = msg_reg_nr;
1331428d7b3dSmrg
1332428d7b3dSmrg	brw_set_dest(p, insn, dest);
1333428d7b3dSmrg	brw_set_src0(p, insn, src);
1334428d7b3dSmrg	brw_set_math_message(p, insn, function,
1335428d7b3dSmrg			     BRW_MATH_INTEGER_UNSIGNED,
1336428d7b3dSmrg			     precision,
1337428d7b3dSmrg			     saturate,
1338428d7b3dSmrg			     BRW_MATH_DATA_VECTOR);
1339428d7b3dSmrg
1340428d7b3dSmrg	/* Second instruction:
1341428d7b3dSmrg	*/
1342428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1343428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1344428d7b3dSmrg	insn->header.destreg__conditionalmod = msg_reg_nr+1;
1345428d7b3dSmrg
1346428d7b3dSmrg	brw_set_dest(p, insn, __offset(dest,1));
1347428d7b3dSmrg	brw_set_src0(p, insn, src);
1348428d7b3dSmrg	brw_set_math_message(p, insn, function,
1349428d7b3dSmrg			     BRW_MATH_INTEGER_UNSIGNED,
1350428d7b3dSmrg			     precision,
1351428d7b3dSmrg			     saturate,
1352428d7b3dSmrg			     BRW_MATH_DATA_VECTOR);
1353428d7b3dSmrg
1354428d7b3dSmrg	brw_pop_insn_state(p);
1355428d7b3dSmrg}
1356428d7b3dSmrg
1357428d7b3dSmrg/**
1358428d7b3dSmrg * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1359428d7b3dSmrg * using a constant offset per channel.
1360428d7b3dSmrg *
1361428d7b3dSmrg * The offset must be aligned to oword size (16 bytes).  Used for
1362428d7b3dSmrg * register spilling.
1363428d7b3dSmrg */
1364428d7b3dSmrgvoid brw_oword_block_write_scratch(struct brw_compile *p,
1365428d7b3dSmrg				   struct brw_reg mrf,
1366428d7b3dSmrg				   int num_regs,
1367428d7b3dSmrg				   unsigned offset)
1368428d7b3dSmrg{
1369428d7b3dSmrg	uint32_t msg_control, msg_type;
1370428d7b3dSmrg	int mlen;
1371428d7b3dSmrg
1372428d7b3dSmrg	if (p->gen >= 060)
1373428d7b3dSmrg		offset /= 16;
1374428d7b3dSmrg
1375428d7b3dSmrg	mrf = __retype_ud(mrf);
1376428d7b3dSmrg
1377428d7b3dSmrg	if (num_regs == 1) {
1378428d7b3dSmrg		msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1379428d7b3dSmrg		mlen = 2;
1380428d7b3dSmrg	} else {
1381428d7b3dSmrg		msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1382428d7b3dSmrg		mlen = 3;
1383428d7b3dSmrg	}
1384428d7b3dSmrg
1385428d7b3dSmrg	/* Set up the message header.  This is g0, with g0.2 filled with
1386428d7b3dSmrg	 * the offset.  We don't want to leave our offset around in g0 or
1387428d7b3dSmrg	 * it'll screw up texture samples, so set it up inside the message
1388428d7b3dSmrg	 * reg.
1389428d7b3dSmrg	 */
1390428d7b3dSmrg	{
1391428d7b3dSmrg		brw_push_insn_state(p);
1392428d7b3dSmrg		brw_set_mask_control(p, BRW_MASK_DISABLE);
1393428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1394428d7b3dSmrg
1395428d7b3dSmrg		brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1396428d7b3dSmrg
1397428d7b3dSmrg		/* set message header global offset field (reg 0, element 2) */
1398428d7b3dSmrg		brw_MOV(p,
1399428d7b3dSmrg			__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1400428d7b3dSmrg			brw_imm_ud(offset));
1401428d7b3dSmrg
1402428d7b3dSmrg		brw_pop_insn_state(p);
1403428d7b3dSmrg	}
1404428d7b3dSmrg
1405428d7b3dSmrg	{
1406428d7b3dSmrg		struct brw_reg dest;
1407428d7b3dSmrg		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1408428d7b3dSmrg		int send_commit_msg;
1409428d7b3dSmrg		struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
1410428d7b3dSmrg
1411428d7b3dSmrg		if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1412428d7b3dSmrg			insn->header.compression_control = BRW_COMPRESSION_NONE;
1413428d7b3dSmrg			src_header = vec16(src_header);
1414428d7b3dSmrg		}
1415428d7b3dSmrg		assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1416428d7b3dSmrg		insn->header.destreg__conditionalmod = mrf.nr;
1417428d7b3dSmrg
1418428d7b3dSmrg		/* Until gen6, writes followed by reads from the same location
1419428d7b3dSmrg		 * are not guaranteed to be ordered unless write_commit is set.
1420428d7b3dSmrg		 * If set, then a no-op write is issued to the destination
1421428d7b3dSmrg		 * register to set a dependency, and a read from the destination
1422428d7b3dSmrg		 * can be used to ensure the ordering.
1423428d7b3dSmrg		 *
1424428d7b3dSmrg		 * For gen6, only writes between different threads need ordering
1425428d7b3dSmrg		 * protection.  Our use of DP writes is all about register
1426428d7b3dSmrg		 * spilling within a thread.
1427428d7b3dSmrg		 */
1428428d7b3dSmrg		if (p->gen >= 060) {
1429428d7b3dSmrg			dest = __retype_uw(vec16(brw_null_reg()));
1430428d7b3dSmrg			send_commit_msg = 0;
1431428d7b3dSmrg		} else {
1432428d7b3dSmrg			dest = src_header;
1433428d7b3dSmrg			send_commit_msg = 1;
1434428d7b3dSmrg		}
1435428d7b3dSmrg
1436428d7b3dSmrg		brw_set_dest(p, insn, dest);
1437428d7b3dSmrg		if (p->gen >= 060) {
1438428d7b3dSmrg			brw_set_src0(p, insn, mrf);
1439428d7b3dSmrg		} else {
1440428d7b3dSmrg			brw_set_src0(p, insn, brw_null_reg());
1441428d7b3dSmrg		}
1442428d7b3dSmrg
1443428d7b3dSmrg		if (p->gen >= 060)
1444428d7b3dSmrg			msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1445428d7b3dSmrg		else
1446428d7b3dSmrg			msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1447428d7b3dSmrg
1448428d7b3dSmrg		brw_set_dp_write_message(p,
1449428d7b3dSmrg					 insn,
1450428d7b3dSmrg					 255, /* binding table index (255=stateless) */
1451428d7b3dSmrg					 msg_control,
1452428d7b3dSmrg					 msg_type,
1453428d7b3dSmrg					 mlen,
1454428d7b3dSmrg					 true, /* header_present */
1455428d7b3dSmrg					 0, /* pixel scoreboard */
1456428d7b3dSmrg					 send_commit_msg, /* response_length */
1457428d7b3dSmrg					 0, /* eot */
1458428d7b3dSmrg					 send_commit_msg);
1459428d7b3dSmrg	}
1460428d7b3dSmrg}
1461428d7b3dSmrg
1462428d7b3dSmrg
1463428d7b3dSmrg/**
1464428d7b3dSmrg * Read a block of owords (half a GRF each) from the scratch buffer
1465428d7b3dSmrg * using a constant index per channel.
1466428d7b3dSmrg *
1467428d7b3dSmrg * Offset must be aligned to oword size (16 bytes).  Used for register
1468428d7b3dSmrg * spilling.
1469428d7b3dSmrg */
1470428d7b3dSmrgvoid
1471428d7b3dSmrgbrw_oword_block_read_scratch(struct brw_compile *p,
1472428d7b3dSmrg			     struct brw_reg dest,
1473428d7b3dSmrg			     struct brw_reg mrf,
1474428d7b3dSmrg			     int num_regs,
1475428d7b3dSmrg			     unsigned offset)
1476428d7b3dSmrg{
1477428d7b3dSmrg	uint32_t msg_control;
1478428d7b3dSmrg	int rlen;
1479428d7b3dSmrg
1480428d7b3dSmrg	if (p->gen >= 060)
1481428d7b3dSmrg		offset /= 16;
1482428d7b3dSmrg
1483428d7b3dSmrg	mrf = __retype_ud(mrf);
1484428d7b3dSmrg	dest = __retype_uw(dest);
1485428d7b3dSmrg
1486428d7b3dSmrg	if (num_regs == 1) {
1487428d7b3dSmrg		msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1488428d7b3dSmrg		rlen = 1;
1489428d7b3dSmrg	} else {
1490428d7b3dSmrg		msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1491428d7b3dSmrg		rlen = 2;
1492428d7b3dSmrg	}
1493428d7b3dSmrg
1494428d7b3dSmrg	{
1495428d7b3dSmrg		brw_push_insn_state(p);
1496428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1497428d7b3dSmrg		brw_set_mask_control(p, BRW_MASK_DISABLE);
1498428d7b3dSmrg
1499428d7b3dSmrg		brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1500428d7b3dSmrg
1501428d7b3dSmrg		/* set message header global offset field (reg 0, element 2) */
1502428d7b3dSmrg		brw_MOV(p,
1503428d7b3dSmrg			__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1504428d7b3dSmrg			brw_imm_ud(offset));
1505428d7b3dSmrg
1506428d7b3dSmrg		brw_pop_insn_state(p);
1507428d7b3dSmrg	}
1508428d7b3dSmrg
1509428d7b3dSmrg	{
1510428d7b3dSmrg		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1511428d7b3dSmrg
1512428d7b3dSmrg		assert(insn->header.predicate_control == 0);
1513428d7b3dSmrg		insn->header.compression_control = BRW_COMPRESSION_NONE;
1514428d7b3dSmrg		insn->header.destreg__conditionalmod = mrf.nr;
1515428d7b3dSmrg
1516428d7b3dSmrg		brw_set_dest(p, insn, dest); /* UW? */
1517428d7b3dSmrg		if (p->gen >= 060) {
1518428d7b3dSmrg			brw_set_src0(p, insn, mrf);
1519428d7b3dSmrg		} else {
1520428d7b3dSmrg			brw_set_src0(p, insn, brw_null_reg());
1521428d7b3dSmrg		}
1522428d7b3dSmrg
1523428d7b3dSmrg		brw_set_dp_read_message(p,
1524428d7b3dSmrg					insn,
1525428d7b3dSmrg					255, /* binding table index (255=stateless) */
1526428d7b3dSmrg					msg_control,
1527428d7b3dSmrg					BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1528428d7b3dSmrg					BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1529428d7b3dSmrg					1, /* msg_length */
1530428d7b3dSmrg					rlen);
1531428d7b3dSmrg	}
1532428d7b3dSmrg}
1533428d7b3dSmrg
1534428d7b3dSmrg/**
1535428d7b3dSmrg * Read a float[4] vector from the data port Data Cache (const buffer).
1536428d7b3dSmrg * Location (in buffer) should be a multiple of 16.
1537428d7b3dSmrg * Used for fetching shader constants.
1538428d7b3dSmrg */
1539428d7b3dSmrgvoid brw_oword_block_read(struct brw_compile *p,
1540428d7b3dSmrg			  struct brw_reg dest,
1541428d7b3dSmrg			  struct brw_reg mrf,
1542428d7b3dSmrg			  uint32_t offset,
1543428d7b3dSmrg			  uint32_t bind_table_index)
1544428d7b3dSmrg{
1545428d7b3dSmrg	struct brw_instruction *insn;
1546428d7b3dSmrg
1547428d7b3dSmrg	/* On newer hardware, offset is in units of owords. */
1548428d7b3dSmrg	if (p->gen >= 060)
1549428d7b3dSmrg		offset /= 16;
1550428d7b3dSmrg
1551428d7b3dSmrg	mrf = __retype_ud(mrf);
1552428d7b3dSmrg
1553428d7b3dSmrg	brw_push_insn_state(p);
1554428d7b3dSmrg	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1555428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1556428d7b3dSmrg	brw_set_mask_control(p, BRW_MASK_DISABLE);
1557428d7b3dSmrg
1558428d7b3dSmrg	brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1559428d7b3dSmrg
1560428d7b3dSmrg	/* set message header global offset field (reg 0, element 2) */
1561428d7b3dSmrg	brw_MOV(p,
1562428d7b3dSmrg		__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1563428d7b3dSmrg		brw_imm_ud(offset));
1564428d7b3dSmrg
1565428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1566428d7b3dSmrg	insn->header.destreg__conditionalmod = mrf.nr;
1567428d7b3dSmrg
1568428d7b3dSmrg	/* cast dest to a uword[8] vector */
1569428d7b3dSmrg	dest = __retype_uw(vec8(dest));
1570428d7b3dSmrg
1571428d7b3dSmrg	brw_set_dest(p, insn, dest);
1572428d7b3dSmrg	if (p->gen >= 060) {
1573428d7b3dSmrg		brw_set_src0(p, insn, mrf);
1574428d7b3dSmrg	} else {
1575428d7b3dSmrg		brw_set_src0(p, insn, brw_null_reg());
1576428d7b3dSmrg	}
1577428d7b3dSmrg
1578428d7b3dSmrg	brw_set_dp_read_message(p,
1579428d7b3dSmrg				insn,
1580428d7b3dSmrg				bind_table_index,
1581428d7b3dSmrg				BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1582428d7b3dSmrg				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1583428d7b3dSmrg				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1584428d7b3dSmrg				1, /* msg_length */
1585428d7b3dSmrg				1); /* response_length (1 reg, 2 owords!) */
1586428d7b3dSmrg
1587428d7b3dSmrg	brw_pop_insn_state(p);
1588428d7b3dSmrg}
1589428d7b3dSmrg
1590428d7b3dSmrg/**
1591428d7b3dSmrg * Read a set of dwords from the data port Data Cache (const buffer).
1592428d7b3dSmrg *
1593428d7b3dSmrg * Location (in buffer) appears as UD offsets in the register after
1594428d7b3dSmrg * the provided mrf header reg.
1595428d7b3dSmrg */
1596428d7b3dSmrgvoid brw_dword_scattered_read(struct brw_compile *p,
1597428d7b3dSmrg			      struct brw_reg dest,
1598428d7b3dSmrg			      struct brw_reg mrf,
1599428d7b3dSmrg			      uint32_t bind_table_index)
1600428d7b3dSmrg{
1601428d7b3dSmrg	struct brw_instruction *insn;
1602428d7b3dSmrg
1603428d7b3dSmrg	mrf = __retype_ud(mrf);
1604428d7b3dSmrg
1605428d7b3dSmrg	brw_push_insn_state(p);
1606428d7b3dSmrg	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1607428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1608428d7b3dSmrg	brw_set_mask_control(p, BRW_MASK_DISABLE);
1609428d7b3dSmrg	brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1610428d7b3dSmrg	brw_pop_insn_state(p);
1611428d7b3dSmrg
1612428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1613428d7b3dSmrg	insn->header.destreg__conditionalmod = mrf.nr;
1614428d7b3dSmrg
1615428d7b3dSmrg	/* cast dest to a uword[8] vector */
1616428d7b3dSmrg	dest = __retype_uw(vec8(dest));
1617428d7b3dSmrg
1618428d7b3dSmrg	brw_set_dest(p, insn, dest);
1619428d7b3dSmrg	brw_set_src0(p, insn, brw_null_reg());
1620428d7b3dSmrg
1621428d7b3dSmrg	brw_set_dp_read_message(p,
1622428d7b3dSmrg				insn,
1623428d7b3dSmrg				bind_table_index,
1624428d7b3dSmrg				BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1625428d7b3dSmrg				BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1626428d7b3dSmrg				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1627428d7b3dSmrg				2, /* msg_length */
1628428d7b3dSmrg				1); /* response_length */
1629428d7b3dSmrg}
1630428d7b3dSmrg
1631428d7b3dSmrg/**
1632428d7b3dSmrg * Read float[4] constant(s) from VS constant buffer.
1633428d7b3dSmrg * For relative addressing, two float[4] constants will be read into 'dest'.
1634428d7b3dSmrg * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1635428d7b3dSmrg */
1636428d7b3dSmrgvoid brw_dp_READ_4_vs(struct brw_compile *p,
1637428d7b3dSmrg                      struct brw_reg dest,
1638428d7b3dSmrg                      unsigned location,
1639428d7b3dSmrg                      unsigned bind_table_index)
1640428d7b3dSmrg{
1641428d7b3dSmrg	struct brw_instruction *insn;
1642428d7b3dSmrg	unsigned msg_reg_nr = 1;
1643428d7b3dSmrg
1644428d7b3dSmrg	if (p->gen >= 060)
1645428d7b3dSmrg		location /= 16;
1646428d7b3dSmrg
1647428d7b3dSmrg	/* Setup MRF[1] with location/offset into const buffer */
1648428d7b3dSmrg	brw_push_insn_state(p);
1649428d7b3dSmrg	brw_set_access_mode(p, BRW_ALIGN_1);
1650428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1651428d7b3dSmrg	brw_set_mask_control(p, BRW_MASK_DISABLE);
1652428d7b3dSmrg	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1653428d7b3dSmrg	brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
1654428d7b3dSmrg		brw_imm_ud(location));
1655428d7b3dSmrg	brw_pop_insn_state(p);
1656428d7b3dSmrg
1657428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1658428d7b3dSmrg
1659428d7b3dSmrg	insn->header.predicate_control = BRW_PREDICATE_NONE;
1660428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
1661428d7b3dSmrg	insn->header.destreg__conditionalmod = msg_reg_nr;
1662428d7b3dSmrg	insn->header.mask_control = BRW_MASK_DISABLE;
1663428d7b3dSmrg
1664428d7b3dSmrg	brw_set_dest(p, insn, dest);
1665428d7b3dSmrg	if (p->gen >= 060) {
1666428d7b3dSmrg		brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1667428d7b3dSmrg	} else {
1668428d7b3dSmrg		brw_set_src0(p, insn, brw_null_reg());
1669428d7b3dSmrg	}
1670428d7b3dSmrg
1671428d7b3dSmrg	brw_set_dp_read_message(p,
1672428d7b3dSmrg				insn,
1673428d7b3dSmrg				bind_table_index,
1674428d7b3dSmrg				0,
1675428d7b3dSmrg				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1676428d7b3dSmrg				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1677428d7b3dSmrg				1, /* msg_length */
1678428d7b3dSmrg				1); /* response_length (1 Oword) */
1679428d7b3dSmrg}
1680428d7b3dSmrg
1681428d7b3dSmrg/**
1682428d7b3dSmrg * Read a float[4] constant per vertex from VS constant buffer, with
1683428d7b3dSmrg * relative addressing.
1684428d7b3dSmrg */
1685428d7b3dSmrgvoid brw_dp_READ_4_vs_relative(struct brw_compile *p,
1686428d7b3dSmrg			       struct brw_reg dest,
1687428d7b3dSmrg			       struct brw_reg addr_reg,
1688428d7b3dSmrg			       unsigned offset,
1689428d7b3dSmrg			       unsigned bind_table_index)
1690428d7b3dSmrg{
1691428d7b3dSmrg	struct brw_reg src = brw_vec8_grf(0, 0);
1692428d7b3dSmrg	struct brw_instruction *insn;
1693428d7b3dSmrg	int msg_type;
1694428d7b3dSmrg
1695428d7b3dSmrg	/* Setup MRF[1] with offset into const buffer */
1696428d7b3dSmrg	brw_push_insn_state(p);
1697428d7b3dSmrg	brw_set_access_mode(p, BRW_ALIGN_1);
1698428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1699428d7b3dSmrg	brw_set_mask_control(p, BRW_MASK_DISABLE);
1700428d7b3dSmrg	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1701428d7b3dSmrg
1702428d7b3dSmrg	/* M1.0 is block offset 0, M1.4 is block offset 1, all other
1703428d7b3dSmrg	 * fields ignored.
1704428d7b3dSmrg	 */
1705428d7b3dSmrg	brw_ADD(p, __retype_d(brw_message_reg(1)),
1706428d7b3dSmrg		addr_reg, brw_imm_d(offset));
1707428d7b3dSmrg	brw_pop_insn_state(p);
1708428d7b3dSmrg
1709428d7b3dSmrg	gen6_resolve_implied_move(p, &src, 0);
1710428d7b3dSmrg
1711428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1712428d7b3dSmrg	insn->header.predicate_control = BRW_PREDICATE_NONE;
1713428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
1714428d7b3dSmrg	insn->header.destreg__conditionalmod = 0;
1715428d7b3dSmrg	insn->header.mask_control = BRW_MASK_DISABLE;
1716428d7b3dSmrg
1717428d7b3dSmrg	brw_set_dest(p, insn, dest);
1718428d7b3dSmrg	brw_set_src0(p, insn, src);
1719428d7b3dSmrg
1720428d7b3dSmrg	if (p->gen >= 060)
1721428d7b3dSmrg		msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1722428d7b3dSmrg	else if (p->gen >= 045)
1723428d7b3dSmrg		msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1724428d7b3dSmrg	else
1725428d7b3dSmrg		msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1726428d7b3dSmrg
1727428d7b3dSmrg	brw_set_dp_read_message(p,
1728428d7b3dSmrg				insn,
1729428d7b3dSmrg				bind_table_index,
1730428d7b3dSmrg				BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1731428d7b3dSmrg				msg_type,
1732428d7b3dSmrg				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1733428d7b3dSmrg				2, /* msg_length */
1734428d7b3dSmrg				1); /* response_length */
1735428d7b3dSmrg}
1736428d7b3dSmrg
1737428d7b3dSmrgvoid brw_fb_WRITE(struct brw_compile *p,
1738428d7b3dSmrg		  int dispatch_width,
1739428d7b3dSmrg                  unsigned msg_reg_nr,
1740428d7b3dSmrg                  struct brw_reg src0,
1741428d7b3dSmrg                  unsigned msg_control,
1742428d7b3dSmrg                  unsigned binding_table_index,
1743428d7b3dSmrg                  unsigned msg_length,
1744428d7b3dSmrg                  unsigned response_length,
1745428d7b3dSmrg                  bool eot,
1746428d7b3dSmrg                  bool header_present)
1747428d7b3dSmrg{
1748428d7b3dSmrg	struct brw_instruction *insn;
1749428d7b3dSmrg	unsigned msg_type;
1750428d7b3dSmrg	struct brw_reg dest;
1751428d7b3dSmrg
1752428d7b3dSmrg	if (dispatch_width == 16)
1753428d7b3dSmrg		dest = __retype_uw(vec16(brw_null_reg()));
1754428d7b3dSmrg	else
1755428d7b3dSmrg		dest = __retype_uw(vec8(brw_null_reg()));
1756428d7b3dSmrg
1757428d7b3dSmrg	if (p->gen >= 060 && binding_table_index == 0) {
1758428d7b3dSmrg		insn = brw_next_insn(p, BRW_OPCODE_SENDC);
1759428d7b3dSmrg	} else {
1760428d7b3dSmrg		insn = brw_next_insn(p, BRW_OPCODE_SEND);
1761428d7b3dSmrg	}
1762428d7b3dSmrg	/* The execution mask is ignored for render target writes. */
1763428d7b3dSmrg	insn->header.predicate_control = 0;
1764428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
1765428d7b3dSmrg
1766428d7b3dSmrg	if (p->gen >= 060) {
1767428d7b3dSmrg		/* headerless version, just submit color payload */
1768428d7b3dSmrg		src0 = brw_message_reg(msg_reg_nr);
1769428d7b3dSmrg
1770428d7b3dSmrg		msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1771428d7b3dSmrg	} else {
1772428d7b3dSmrg		insn->header.destreg__conditionalmod = msg_reg_nr;
1773428d7b3dSmrg
1774428d7b3dSmrg		msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1775428d7b3dSmrg	}
1776428d7b3dSmrg
1777428d7b3dSmrg	brw_set_dest(p, insn, dest);
1778428d7b3dSmrg	brw_set_src0(p, insn, src0);
1779428d7b3dSmrg	brw_set_dp_write_message(p,
1780428d7b3dSmrg				 insn,
1781428d7b3dSmrg				 binding_table_index,
1782428d7b3dSmrg				 msg_control,
1783428d7b3dSmrg				 msg_type,
1784428d7b3dSmrg				 msg_length,
1785428d7b3dSmrg				 header_present,
1786428d7b3dSmrg				 eot,
1787428d7b3dSmrg				 response_length,
1788428d7b3dSmrg				 eot,
1789428d7b3dSmrg				 0 /* send_commit_msg */);
1790428d7b3dSmrg}
1791428d7b3dSmrg
1792428d7b3dSmrg/**
1793428d7b3dSmrg * Texture sample instruction.
1794428d7b3dSmrg * Note: the msg_type plus msg_length values determine exactly what kind
1795428d7b3dSmrg * of sampling operation is performed.  See volume 4, page 161 of docs.
1796428d7b3dSmrg */
1797428d7b3dSmrgvoid brw_SAMPLE(struct brw_compile *p,
1798428d7b3dSmrg		struct brw_reg dest,
1799428d7b3dSmrg		unsigned msg_reg_nr,
1800428d7b3dSmrg		struct brw_reg src0,
1801428d7b3dSmrg		unsigned binding_table_index,
1802428d7b3dSmrg		unsigned sampler,
1803428d7b3dSmrg		unsigned writemask,
1804428d7b3dSmrg		unsigned msg_type,
1805428d7b3dSmrg		unsigned response_length,
1806428d7b3dSmrg		unsigned msg_length,
1807428d7b3dSmrg		bool header_present,
1808428d7b3dSmrg		unsigned simd_mode)
1809428d7b3dSmrg{
1810428d7b3dSmrg	assert(writemask);
1811428d7b3dSmrg
1812428d7b3dSmrg	if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
1813428d7b3dSmrg		struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1814428d7b3dSmrg
1815428d7b3dSmrg		writemask = ~writemask & WRITEMASK_XYZW;
1816428d7b3dSmrg
1817428d7b3dSmrg		brw_push_insn_state(p);
1818428d7b3dSmrg
1819428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1820428d7b3dSmrg		brw_set_mask_control(p, BRW_MASK_DISABLE);
1821428d7b3dSmrg
1822428d7b3dSmrg		brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
1823428d7b3dSmrg		brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
1824428d7b3dSmrg
1825428d7b3dSmrg		brw_pop_insn_state(p);
1826428d7b3dSmrg
1827428d7b3dSmrg		src0 = __retype_uw(brw_null_reg());
1828428d7b3dSmrg	}
1829428d7b3dSmrg
1830428d7b3dSmrg	{
1831428d7b3dSmrg		struct brw_instruction *insn;
1832428d7b3dSmrg
1833428d7b3dSmrg		gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1834428d7b3dSmrg
1835428d7b3dSmrg		insn = brw_next_insn(p, BRW_OPCODE_SEND);
1836428d7b3dSmrg		insn->header.predicate_control = 0; /* XXX */
1837428d7b3dSmrg		insn->header.compression_control = BRW_COMPRESSION_NONE;
1838428d7b3dSmrg		if (p->gen < 060)
1839428d7b3dSmrg			insn->header.destreg__conditionalmod = msg_reg_nr;
1840428d7b3dSmrg
1841428d7b3dSmrg		brw_set_dest(p, insn, dest);
1842428d7b3dSmrg		brw_set_src0(p, insn, src0);
1843428d7b3dSmrg		brw_set_sampler_message(p, insn,
1844428d7b3dSmrg					binding_table_index,
1845428d7b3dSmrg					sampler,
1846428d7b3dSmrg					msg_type,
1847428d7b3dSmrg					response_length,
1848428d7b3dSmrg					msg_length,
1849428d7b3dSmrg					header_present,
1850428d7b3dSmrg					simd_mode);
1851428d7b3dSmrg	}
1852428d7b3dSmrg}
1853428d7b3dSmrg
1854428d7b3dSmrg/* All these variables are pretty confusing - we might be better off
1855428d7b3dSmrg * using bitmasks and macros for this, in the old style.  Or perhaps
1856428d7b3dSmrg * just having the caller instantiate the fields in dword3 itself.
1857428d7b3dSmrg */
1858428d7b3dSmrgvoid brw_urb_WRITE(struct brw_compile *p,
1859428d7b3dSmrg		   struct brw_reg dest,
1860428d7b3dSmrg		   unsigned msg_reg_nr,
1861428d7b3dSmrg		   struct brw_reg src0,
1862428d7b3dSmrg		   bool allocate,
1863428d7b3dSmrg		   bool used,
1864428d7b3dSmrg		   unsigned msg_length,
1865428d7b3dSmrg		   unsigned response_length,
1866428d7b3dSmrg		   bool eot,
1867428d7b3dSmrg		   bool writes_complete,
1868428d7b3dSmrg		   unsigned offset,
1869428d7b3dSmrg		   unsigned swizzle)
1870428d7b3dSmrg{
1871428d7b3dSmrg	struct brw_instruction *insn;
1872428d7b3dSmrg
1873428d7b3dSmrg	gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1874428d7b3dSmrg
1875428d7b3dSmrg	if (p->gen >= 070) {
1876428d7b3dSmrg		/* Enable Channel Masks in the URB_WRITE_HWORD message header */
1877428d7b3dSmrg		brw_push_insn_state(p);
1878428d7b3dSmrg		brw_set_access_mode(p, BRW_ALIGN_1);
1879428d7b3dSmrg		brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
1880428d7b3dSmrg		       __retype_ud(brw_vec1_grf(0, 5)),
1881428d7b3dSmrg		       brw_imm_ud(0xff00));
1882428d7b3dSmrg		brw_pop_insn_state(p);
1883428d7b3dSmrg	}
1884428d7b3dSmrg
1885428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1886428d7b3dSmrg
1887428d7b3dSmrg	assert(msg_length < BRW_MAX_MRF);
1888428d7b3dSmrg
1889428d7b3dSmrg	brw_set_dest(p, insn, dest);
1890428d7b3dSmrg	brw_set_src0(p, insn, src0);
1891428d7b3dSmrg	brw_set_src1(p, insn, brw_imm_d(0));
1892428d7b3dSmrg
1893428d7b3dSmrg	if (p->gen <= 060)
1894428d7b3dSmrg		insn->header.destreg__conditionalmod = msg_reg_nr;
1895428d7b3dSmrg
1896428d7b3dSmrg	brw_set_urb_message(p,
1897428d7b3dSmrg			    insn,
1898428d7b3dSmrg			    allocate,
1899428d7b3dSmrg			    used,
1900428d7b3dSmrg			    msg_length,
1901428d7b3dSmrg			    response_length,
1902428d7b3dSmrg			    eot,
1903428d7b3dSmrg			    writes_complete,
1904428d7b3dSmrg			    offset,
1905428d7b3dSmrg			    swizzle);
1906428d7b3dSmrg}
1907428d7b3dSmrg
1908428d7b3dSmrgstatic int
1909428d7b3dSmrgbrw_find_next_block_end(struct brw_compile *p, int start)
1910428d7b3dSmrg{
1911428d7b3dSmrg	int ip;
1912428d7b3dSmrg
1913428d7b3dSmrg	for (ip = start + 1; ip < p->nr_insn; ip++) {
1914428d7b3dSmrg		struct brw_instruction *insn = &p->store[ip];
1915428d7b3dSmrg
1916428d7b3dSmrg		switch (insn->header.opcode) {
1917428d7b3dSmrg		case BRW_OPCODE_ENDIF:
1918428d7b3dSmrg		case BRW_OPCODE_ELSE:
1919428d7b3dSmrg		case BRW_OPCODE_WHILE:
1920428d7b3dSmrg			return ip;
1921428d7b3dSmrg		}
1922428d7b3dSmrg	}
1923428d7b3dSmrg	assert(!"not reached");
1924428d7b3dSmrg	return start + 1;
1925428d7b3dSmrg}
1926428d7b3dSmrg
1927428d7b3dSmrg/* There is no DO instruction on gen6, so to find the end of the loop
1928428d7b3dSmrg * we have to see if the loop is jumping back before our start
1929428d7b3dSmrg * instruction.
1930428d7b3dSmrg */
1931428d7b3dSmrgstatic int
1932428d7b3dSmrgbrw_find_loop_end(struct brw_compile *p, int start)
1933428d7b3dSmrg{
1934428d7b3dSmrg	int ip;
1935428d7b3dSmrg	int br = 2;
1936428d7b3dSmrg
1937428d7b3dSmrg	for (ip = start + 1; ip < p->nr_insn; ip++) {
1938428d7b3dSmrg		struct brw_instruction *insn = &p->store[ip];
1939428d7b3dSmrg
1940428d7b3dSmrg		if (insn->header.opcode == BRW_OPCODE_WHILE) {
1941428d7b3dSmrg			int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
1942428d7b3dSmrg				: insn->bits3.break_cont.jip;
1943428d7b3dSmrg			if (ip + jip / br <= start)
1944428d7b3dSmrg				return ip;
1945428d7b3dSmrg		}
1946428d7b3dSmrg	}
1947428d7b3dSmrg	assert(!"not reached");
1948428d7b3dSmrg	return start + 1;
1949428d7b3dSmrg}
1950428d7b3dSmrg
1951428d7b3dSmrg/* After program generation, go back and update the UIP and JIP of
1952428d7b3dSmrg * BREAK and CONT instructions to their correct locations.
1953428d7b3dSmrg */
1954428d7b3dSmrgvoid
1955428d7b3dSmrgbrw_set_uip_jip(struct brw_compile *p)
1956428d7b3dSmrg{
1957428d7b3dSmrg	int ip;
1958428d7b3dSmrg	int br = 2;
1959428d7b3dSmrg
1960428d7b3dSmrg	if (p->gen <= 060)
1961428d7b3dSmrg		return;
1962428d7b3dSmrg
1963428d7b3dSmrg	for (ip = 0; ip < p->nr_insn; ip++) {
1964428d7b3dSmrg		struct brw_instruction *insn = &p->store[ip];
1965428d7b3dSmrg
1966428d7b3dSmrg		switch (insn->header.opcode) {
1967428d7b3dSmrg		case BRW_OPCODE_BREAK:
1968428d7b3dSmrg			insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1969428d7b3dSmrg			/* Gen7 UIP points to WHILE; Gen6 points just after it */
1970428d7b3dSmrg			insn->bits3.break_cont.uip =
1971428d7b3dSmrg				br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
1972428d7b3dSmrg			break;
1973428d7b3dSmrg		case BRW_OPCODE_CONTINUE:
1974428d7b3dSmrg			insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1975428d7b3dSmrg			insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
1976428d7b3dSmrg
1977428d7b3dSmrg			assert(insn->bits3.break_cont.uip != 0);
1978428d7b3dSmrg			assert(insn->bits3.break_cont.jip != 0);
1979428d7b3dSmrg			break;
1980428d7b3dSmrg		}
1981428d7b3dSmrg	}
1982428d7b3dSmrg}
1983428d7b3dSmrg
1984428d7b3dSmrgvoid brw_ff_sync(struct brw_compile *p,
1985428d7b3dSmrg		   struct brw_reg dest,
1986428d7b3dSmrg		   unsigned msg_reg_nr,
1987428d7b3dSmrg		   struct brw_reg src0,
1988428d7b3dSmrg		   bool allocate,
1989428d7b3dSmrg		   unsigned response_length,
1990428d7b3dSmrg		   bool eot)
1991428d7b3dSmrg{
1992428d7b3dSmrg	struct brw_instruction *insn;
1993428d7b3dSmrg
1994428d7b3dSmrg	gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1995428d7b3dSmrg
1996428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1997428d7b3dSmrg	brw_set_dest(p, insn, dest);
1998428d7b3dSmrg	brw_set_src0(p, insn, src0);
1999428d7b3dSmrg	brw_set_src1(p, insn, brw_imm_d(0));
2000428d7b3dSmrg
2001428d7b3dSmrg	if (p->gen < 060)
2002428d7b3dSmrg		insn->header.destreg__conditionalmod = msg_reg_nr;
2003428d7b3dSmrg
2004428d7b3dSmrg	brw_set_ff_sync_message(p,
2005428d7b3dSmrg				insn,
2006428d7b3dSmrg				allocate,
2007428d7b3dSmrg				response_length,
2008428d7b3dSmrg				eot);
2009428d7b3dSmrg}
2010