1428d7b3dSmrg#include "brw.h"
2428d7b3dSmrg
3428d7b3dSmrg#define X16 8
4428d7b3dSmrg#define Y16 10
5428d7b3dSmrg
6428d7b3dSmrgstatic void brw_wm_xy(struct brw_compile *p, int dw)
7428d7b3dSmrg{
8428d7b3dSmrg	struct brw_reg r1 = brw_vec1_grf(1, 0);
9428d7b3dSmrg	struct brw_reg r1_uw = __retype_uw(r1);
10428d7b3dSmrg	struct brw_reg x_uw, y_uw;
11428d7b3dSmrg
12428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
13428d7b3dSmrg
14428d7b3dSmrg	if (dw == 16) {
15428d7b3dSmrg		x_uw = brw_uw16_grf(30, 0);
16428d7b3dSmrg		y_uw = brw_uw16_grf(28, 0);
17428d7b3dSmrg	} else {
18428d7b3dSmrg		x_uw = brw_uw8_grf(30, 0);
19428d7b3dSmrg		y_uw = brw_uw8_grf(28, 0);
20428d7b3dSmrg	}
21428d7b3dSmrg
22428d7b3dSmrg	brw_ADD(p,
23428d7b3dSmrg		x_uw,
24428d7b3dSmrg		__stride(__suboffset(r1_uw, 4), 2, 4, 0),
25428d7b3dSmrg		brw_imm_v(0x10101010));
26428d7b3dSmrg	brw_ADD(p,
27428d7b3dSmrg		y_uw,
28428d7b3dSmrg		__stride(__suboffset(r1_uw, 5), 2, 4, 0),
29428d7b3dSmrg		brw_imm_v(0x11001100));
30428d7b3dSmrg
31428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
32428d7b3dSmrg
33428d7b3dSmrg	brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
34428d7b3dSmrg	brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
35428d7b3dSmrg}
36428d7b3dSmrg
37428d7b3dSmrgstatic void brw_wm_affine_st(struct brw_compile *p, int dw,
38428d7b3dSmrg			     int channel, int msg)
39428d7b3dSmrg{
40428d7b3dSmrg	int uv;
41428d7b3dSmrg
42428d7b3dSmrg	if (dw == 16) {
43428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
44428d7b3dSmrg		uv = p->gen >= 060 ? 6 : 3;
45428d7b3dSmrg	} else {
46428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
47428d7b3dSmrg		uv = p->gen >= 060 ? 4 : 3;
48428d7b3dSmrg	}
49428d7b3dSmrg	uv += 2*channel;
50428d7b3dSmrg
51428d7b3dSmrg	msg++;
52428d7b3dSmrg	if (p->gen >= 060) {
53428d7b3dSmrg		brw_PLN(p,
54428d7b3dSmrg			brw_message_reg(msg),
55428d7b3dSmrg			brw_vec1_grf(uv, 0),
56428d7b3dSmrg			brw_vec8_grf(2, 0));
57428d7b3dSmrg		msg += dw/8;
58428d7b3dSmrg
59428d7b3dSmrg		brw_PLN(p,
60428d7b3dSmrg			brw_message_reg(msg),
61428d7b3dSmrg			brw_vec1_grf(uv, 4),
62428d7b3dSmrg			brw_vec8_grf(2, 0));
63428d7b3dSmrg	} else {
64428d7b3dSmrg		struct brw_reg r = brw_vec1_grf(uv, 0);
65428d7b3dSmrg
66428d7b3dSmrg		brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
67428d7b3dSmrg		brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
68428d7b3dSmrg		msg += dw/8;
69428d7b3dSmrg
70428d7b3dSmrg		brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
71428d7b3dSmrg		brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
72428d7b3dSmrg	}
73428d7b3dSmrg}
74428d7b3dSmrg
75428d7b3dSmrgstatic inline unsigned simd(int dw)
76428d7b3dSmrg{
77428d7b3dSmrg	return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
78428d7b3dSmrg}
79428d7b3dSmrg
80428d7b3dSmrgstatic inline struct brw_reg sample_result(int dw, int result)
81428d7b3dSmrg{
82428d7b3dSmrg	return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
83428d7b3dSmrg		       BRW_REGISTER_TYPE_UW,
84428d7b3dSmrg		       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
85428d7b3dSmrg		       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
86428d7b3dSmrg		       BRW_HORIZONTAL_STRIDE_1,
87428d7b3dSmrg		       BRW_SWIZZLE_XYZW,
88428d7b3dSmrg		       WRITEMASK_XYZW);
89428d7b3dSmrg}
90428d7b3dSmrg
91428d7b3dSmrgstatic int brw_wm_sample(struct brw_compile *p, int dw,
92428d7b3dSmrg			 int channel, int msg, int result)
93428d7b3dSmrg{
94428d7b3dSmrg	struct brw_reg src0;
95428d7b3dSmrg	bool header;
96428d7b3dSmrg	int len;
97428d7b3dSmrg
98428d7b3dSmrg	len = dw == 16 ? 4 : 2;
99428d7b3dSmrg	if (p->gen >= 060) {
100428d7b3dSmrg		header = false;
101428d7b3dSmrg		src0 = brw_message_reg(++msg);
102428d7b3dSmrg	} else {
103428d7b3dSmrg		header = true;
104428d7b3dSmrg		src0 = brw_vec8_grf(0, 0);
105428d7b3dSmrg	}
106428d7b3dSmrg
107428d7b3dSmrg	brw_SAMPLE(p, sample_result(dw, result), msg, src0,
108428d7b3dSmrg		   channel+1, channel, WRITEMASK_XYZW, 0,
109428d7b3dSmrg		   2*len, len+header, header, simd(dw));
110428d7b3dSmrg	return result;
111428d7b3dSmrg}
112428d7b3dSmrg
113428d7b3dSmrgstatic int brw_wm_sample__alpha(struct brw_compile *p, int dw,
114428d7b3dSmrg				int channel, int msg, int result)
115428d7b3dSmrg{
116428d7b3dSmrg	struct brw_reg src0;
117428d7b3dSmrg	int mlen, rlen;
118428d7b3dSmrg
119428d7b3dSmrg	if (dw == 8) {
120428d7b3dSmrg		/* SIMD8 sample return is not masked */
121428d7b3dSmrg		mlen = 3;
122428d7b3dSmrg		rlen = 4;
123428d7b3dSmrg	} else {
124428d7b3dSmrg		mlen = 5;
125428d7b3dSmrg		rlen = 2;
126428d7b3dSmrg	}
127428d7b3dSmrg
128428d7b3dSmrg	if (p->gen >= 060)
129428d7b3dSmrg		src0 = brw_message_reg(msg);
130428d7b3dSmrg	else
131428d7b3dSmrg		src0 = brw_vec8_grf(0, 0);
132428d7b3dSmrg
133428d7b3dSmrg	brw_SAMPLE(p, sample_result(dw, result), msg, src0,
134428d7b3dSmrg		   channel+1, channel, WRITEMASK_W, 0,
135428d7b3dSmrg		   rlen, mlen, true, simd(dw));
136428d7b3dSmrg
137428d7b3dSmrg	if (dw == 8)
138428d7b3dSmrg		result += 3;
139428d7b3dSmrg
140428d7b3dSmrg	return result;
141428d7b3dSmrg}
142428d7b3dSmrg
143428d7b3dSmrgstatic int brw_wm_affine(struct brw_compile *p, int dw,
144428d7b3dSmrg			 int channel, int msg, int result)
145428d7b3dSmrg{
146428d7b3dSmrg	brw_wm_affine_st(p, dw, channel, msg);
147428d7b3dSmrg	return brw_wm_sample(p, dw, channel, msg, result);
148428d7b3dSmrg}
149428d7b3dSmrg
150428d7b3dSmrgstatic int brw_wm_affine__alpha(struct brw_compile *p, int dw,
151428d7b3dSmrg				int channel, int msg, int result)
152428d7b3dSmrg{
153428d7b3dSmrg	brw_wm_affine_st(p, dw, channel, msg);
154428d7b3dSmrg	return brw_wm_sample__alpha(p, dw, channel, msg, result);
155428d7b3dSmrg}
156428d7b3dSmrg
157428d7b3dSmrgstatic inline struct brw_reg null_result(int dw)
158428d7b3dSmrg{
159428d7b3dSmrg	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
160428d7b3dSmrg		       BRW_REGISTER_TYPE_UW,
161428d7b3dSmrg		       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
162428d7b3dSmrg		       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
163428d7b3dSmrg		       BRW_HORIZONTAL_STRIDE_1,
164428d7b3dSmrg		       BRW_SWIZZLE_XYZW,
165428d7b3dSmrg		       WRITEMASK_XYZW);
166428d7b3dSmrg}
167428d7b3dSmrg
168428d7b3dSmrgstatic void brw_fb_write(struct brw_compile *p, int dw)
169428d7b3dSmrg{
170428d7b3dSmrg	struct brw_instruction *insn;
171428d7b3dSmrg	unsigned msg_control, msg_type, msg_len;
172428d7b3dSmrg	struct brw_reg src0;
173428d7b3dSmrg	bool header;
174428d7b3dSmrg
175428d7b3dSmrg	if (dw == 16) {
176428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
177428d7b3dSmrg		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
178428d7b3dSmrg		msg_len = 8;
179428d7b3dSmrg	} else {
180428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
181428d7b3dSmrg		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
182428d7b3dSmrg		msg_len = 4;
183428d7b3dSmrg	}
184428d7b3dSmrg
185428d7b3dSmrg	if (p->gen < 060) {
186428d7b3dSmrg		brw_push_insn_state(p);
187428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
188428d7b3dSmrg		brw_set_mask_control(p, BRW_MASK_DISABLE);
189428d7b3dSmrg		brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
190428d7b3dSmrg		brw_pop_insn_state(p);
191428d7b3dSmrg
192428d7b3dSmrg		msg_len += 2;
193428d7b3dSmrg	}
194428d7b3dSmrg
195428d7b3dSmrg	/* The execution mask is ignored for render target writes. */
196428d7b3dSmrg	insn = brw_next_insn(p, BRW_OPCODE_SEND);
197428d7b3dSmrg	insn->header.predicate_control = 0;
198428d7b3dSmrg	insn->header.compression_control = BRW_COMPRESSION_NONE;
199428d7b3dSmrg
200428d7b3dSmrg	if (p->gen >= 060) {
201428d7b3dSmrg		msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
202428d7b3dSmrg		src0 = brw_message_reg(2);
203428d7b3dSmrg		header = false;
204428d7b3dSmrg	} else {
205428d7b3dSmrg		insn->header.destreg__conditionalmod = 0;
206428d7b3dSmrg		msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
207428d7b3dSmrg		src0 = __retype_uw(brw_vec8_grf(0, 0));
208428d7b3dSmrg		header = true;
209428d7b3dSmrg	}
210428d7b3dSmrg
211428d7b3dSmrg	brw_set_dest(p, insn, null_result(dw));
212428d7b3dSmrg	brw_set_src0(p, insn, src0);
213428d7b3dSmrg	brw_set_dp_write_message(p, insn, 0,
214428d7b3dSmrg				 msg_control, msg_type, msg_len,
215428d7b3dSmrg				 header, true, 0, true, false);
216428d7b3dSmrg}
217428d7b3dSmrg
218428d7b3dSmrgstatic void brw_wm_write(struct brw_compile *p, int dw, int src)
219428d7b3dSmrg{
220428d7b3dSmrg	int n;
221428d7b3dSmrg
222428d7b3dSmrg	if (dw == 8 && p->gen >= 060) {
223428d7b3dSmrg		/* XXX pixel execution mask? */
224428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
225428d7b3dSmrg
226428d7b3dSmrg		brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
227428d7b3dSmrg		brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
228428d7b3dSmrg		brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
229428d7b3dSmrg		brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
230428d7b3dSmrg		goto done;
231428d7b3dSmrg	}
232428d7b3dSmrg
233428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
234428d7b3dSmrg
235428d7b3dSmrg	for (n = 0; n < 4; n++) {
236428d7b3dSmrg		if (p->gen >= 060) {
237428d7b3dSmrg			brw_MOV(p,
238428d7b3dSmrg				brw_message_reg(2 + 2*n),
239428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0));
240428d7b3dSmrg		} else if (p->gen >= 045 && dw == 16) {
241428d7b3dSmrg			brw_MOV(p,
242428d7b3dSmrg				brw_message_reg(2 + n + BRW_MRF_COMPR4),
243428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0));
244428d7b3dSmrg		} else {
245428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
246428d7b3dSmrg			brw_MOV(p,
247428d7b3dSmrg				brw_message_reg(2 + n),
248428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0));
249428d7b3dSmrg
250428d7b3dSmrg			if (dw == 16) {
251428d7b3dSmrg				brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
252428d7b3dSmrg				brw_MOV(p,
253428d7b3dSmrg					brw_message_reg(2 + n + 4),
254428d7b3dSmrg					brw_vec8_grf(src + 2*n+1, 0));
255428d7b3dSmrg			}
256428d7b3dSmrg		}
257428d7b3dSmrg	}
258428d7b3dSmrg
259428d7b3dSmrgdone:
260428d7b3dSmrg	brw_fb_write(p, dw);
261428d7b3dSmrg}
262428d7b3dSmrg
263428d7b3dSmrgstatic void brw_wm_write__mask(struct brw_compile *p, int dw,
264428d7b3dSmrg			       int src, int mask)
265428d7b3dSmrg{
266428d7b3dSmrg	int n;
267428d7b3dSmrg
268428d7b3dSmrg	if (dw == 8 && p->gen >= 060) {
269428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
270428d7b3dSmrg
271428d7b3dSmrg		brw_MUL(p,
272428d7b3dSmrg			brw_message_reg(2),
273428d7b3dSmrg			brw_vec8_grf(src+0, 0),
274428d7b3dSmrg			brw_vec8_grf(mask, 0));
275428d7b3dSmrg		brw_MUL(p,
276428d7b3dSmrg			brw_message_reg(3),
277428d7b3dSmrg			brw_vec8_grf(src+1, 0),
278428d7b3dSmrg			brw_vec8_grf(mask, 0));
279428d7b3dSmrg		brw_MUL(p,
280428d7b3dSmrg			brw_message_reg(4),
281428d7b3dSmrg			brw_vec8_grf(src+2, 0),
282428d7b3dSmrg			brw_vec8_grf(mask, 0));
283428d7b3dSmrg		brw_MUL(p,
284428d7b3dSmrg			brw_message_reg(5),
285428d7b3dSmrg			brw_vec8_grf(src+3, 0),
286428d7b3dSmrg			brw_vec8_grf(mask, 0));
287428d7b3dSmrg
288428d7b3dSmrg		goto done;
289428d7b3dSmrg	}
290428d7b3dSmrg
291428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
292428d7b3dSmrg
293428d7b3dSmrg	for (n = 0; n < 4; n++) {
294428d7b3dSmrg		if (p->gen >= 060) {
295428d7b3dSmrg			brw_MUL(p,
296428d7b3dSmrg				brw_message_reg(2 + 2*n),
297428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
298428d7b3dSmrg				brw_vec8_grf(mask, 0));
299428d7b3dSmrg		} else if (p->gen >= 045 && dw == 16) {
300428d7b3dSmrg			brw_MUL(p,
301428d7b3dSmrg				brw_message_reg(2 + n + BRW_MRF_COMPR4),
302428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
303428d7b3dSmrg				brw_vec8_grf(mask, 0));
304428d7b3dSmrg		} else {
305428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
306428d7b3dSmrg			brw_MUL(p,
307428d7b3dSmrg				brw_message_reg(2 + n),
308428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
309428d7b3dSmrg				brw_vec8_grf(mask, 0));
310428d7b3dSmrg
311428d7b3dSmrg			if (dw == 16) {
312428d7b3dSmrg				brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
313428d7b3dSmrg				brw_MUL(p,
314428d7b3dSmrg					brw_message_reg(2 + n + 4),
315428d7b3dSmrg					brw_vec8_grf(src + 2*n+1, 0),
316428d7b3dSmrg					brw_vec8_grf(mask+1, 0));
317428d7b3dSmrg			}
318428d7b3dSmrg		}
319428d7b3dSmrg	}
320428d7b3dSmrg
321428d7b3dSmrgdone:
322428d7b3dSmrg	brw_fb_write(p, dw);
323428d7b3dSmrg}
324428d7b3dSmrg
325428d7b3dSmrgstatic void brw_wm_write__opacity(struct brw_compile *p, int dw,
326428d7b3dSmrg				  int src, int mask)
327428d7b3dSmrg{
328428d7b3dSmrg	int n;
329428d7b3dSmrg
330428d7b3dSmrg	if (dw == 8 && p->gen >= 060) {
331428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
332428d7b3dSmrg
333428d7b3dSmrg		brw_MUL(p,
334428d7b3dSmrg			brw_message_reg(2),
335428d7b3dSmrg			brw_vec8_grf(src+0, 0),
336428d7b3dSmrg			brw_vec1_grf(mask, 3));
337428d7b3dSmrg		brw_MUL(p,
338428d7b3dSmrg			brw_message_reg(3),
339428d7b3dSmrg			brw_vec8_grf(src+1, 0),
340428d7b3dSmrg			brw_vec1_grf(mask, 3));
341428d7b3dSmrg		brw_MUL(p,
342428d7b3dSmrg			brw_message_reg(4),
343428d7b3dSmrg			brw_vec8_grf(src+2, 0),
344428d7b3dSmrg			brw_vec1_grf(mask, 3));
345428d7b3dSmrg		brw_MUL(p,
346428d7b3dSmrg			brw_message_reg(5),
347428d7b3dSmrg			brw_vec8_grf(src+3, 0),
348428d7b3dSmrg			brw_vec1_grf(mask, 3));
349428d7b3dSmrg
350428d7b3dSmrg		goto done;
351428d7b3dSmrg	}
352428d7b3dSmrg
353428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
354428d7b3dSmrg
355428d7b3dSmrg	for (n = 0; n < 4; n++) {
356428d7b3dSmrg		if (p->gen >= 060) {
357428d7b3dSmrg			brw_MUL(p,
358428d7b3dSmrg				brw_message_reg(2 + 2*n),
359428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
360428d7b3dSmrg				brw_vec1_grf(mask, 3));
361428d7b3dSmrg		} else if (p->gen >= 045 && dw == 16) {
362428d7b3dSmrg			brw_MUL(p,
363428d7b3dSmrg				brw_message_reg(2 + n + BRW_MRF_COMPR4),
364428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
365428d7b3dSmrg				brw_vec1_grf(mask, 3));
366428d7b3dSmrg		} else {
367428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
368428d7b3dSmrg			brw_MUL(p,
369428d7b3dSmrg				brw_message_reg(2 + n),
370428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
371428d7b3dSmrg				brw_vec1_grf(mask, 3));
372428d7b3dSmrg
373428d7b3dSmrg			if (dw == 16) {
374428d7b3dSmrg				brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
375428d7b3dSmrg				brw_MUL(p,
376428d7b3dSmrg					brw_message_reg(2 + n + 4),
377428d7b3dSmrg					brw_vec8_grf(src + 2*n+1, 0),
378428d7b3dSmrg					brw_vec1_grf(mask, 3));
379428d7b3dSmrg			}
380428d7b3dSmrg		}
381428d7b3dSmrg	}
382428d7b3dSmrg
383428d7b3dSmrgdone:
384428d7b3dSmrg	brw_fb_write(p, dw);
385428d7b3dSmrg}
386428d7b3dSmrg
387428d7b3dSmrgstatic void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
388428d7b3dSmrg				  int src, int mask)
389428d7b3dSmrg{
390428d7b3dSmrg	int n;
391428d7b3dSmrg
392428d7b3dSmrg	if (dw == 8 && p->gen >= 060) {
393428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
394428d7b3dSmrg
395428d7b3dSmrg		brw_MUL(p,
396428d7b3dSmrg			brw_message_reg(2),
397428d7b3dSmrg			brw_vec8_grf(src  + 0, 0),
398428d7b3dSmrg			brw_vec8_grf(mask + 0, 0));
399428d7b3dSmrg		brw_MUL(p,
400428d7b3dSmrg			brw_message_reg(3),
401428d7b3dSmrg			brw_vec8_grf(src  + 1, 0),
402428d7b3dSmrg			brw_vec8_grf(mask + 1, 0));
403428d7b3dSmrg		brw_MUL(p,
404428d7b3dSmrg			brw_message_reg(4),
405428d7b3dSmrg			brw_vec8_grf(src  + 2, 0),
406428d7b3dSmrg			brw_vec8_grf(mask + 2, 0));
407428d7b3dSmrg		brw_MUL(p,
408428d7b3dSmrg			brw_message_reg(5),
409428d7b3dSmrg			brw_vec8_grf(src  + 3, 0),
410428d7b3dSmrg			brw_vec8_grf(mask + 3, 0));
411428d7b3dSmrg
412428d7b3dSmrg		goto done;
413428d7b3dSmrg	}
414428d7b3dSmrg
415428d7b3dSmrg	brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
416428d7b3dSmrg
417428d7b3dSmrg	for (n = 0; n < 4; n++) {
418428d7b3dSmrg		if (p->gen >= 060) {
419428d7b3dSmrg			brw_MUL(p,
420428d7b3dSmrg				brw_message_reg(2 + 2*n),
421428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
422428d7b3dSmrg				brw_vec8_grf(mask + 2*n, 0));
423428d7b3dSmrg		} else if (p->gen >= 045 && dw == 16) {
424428d7b3dSmrg			brw_MUL(p,
425428d7b3dSmrg				brw_message_reg(2 + n + BRW_MRF_COMPR4),
426428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
427428d7b3dSmrg				brw_vec8_grf(mask + 2*n, 0));
428428d7b3dSmrg		} else {
429428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
430428d7b3dSmrg			brw_MUL(p,
431428d7b3dSmrg				brw_message_reg(2 + n),
432428d7b3dSmrg				brw_vec8_grf(src + 2*n, 0),
433428d7b3dSmrg				brw_vec8_grf(mask + 2*n, 0));
434428d7b3dSmrg
435428d7b3dSmrg			if (dw == 16) {
436428d7b3dSmrg				brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
437428d7b3dSmrg				brw_MUL(p,
438428d7b3dSmrg					brw_message_reg(2 + n + 4),
439428d7b3dSmrg					brw_vec8_grf(src + 2*n + 1, 0),
440428d7b3dSmrg					brw_vec8_grf(mask + 2*n + 1, 0));
441428d7b3dSmrg			}
442428d7b3dSmrg		}
443428d7b3dSmrg	}
444428d7b3dSmrg
445428d7b3dSmrgdone:
446428d7b3dSmrg	brw_fb_write(p, dw);
447428d7b3dSmrg}
448428d7b3dSmrg
449428d7b3dSmrgbool
450428d7b3dSmrgbrw_wm_kernel__affine(struct brw_compile *p, int dispatch)
451428d7b3dSmrg{
452428d7b3dSmrg	if (p->gen < 060)
453428d7b3dSmrg		brw_wm_xy(p, dispatch);
454428d7b3dSmrg	brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
455428d7b3dSmrg
456428d7b3dSmrg	return true;
457428d7b3dSmrg}
458428d7b3dSmrg
459428d7b3dSmrgbool
460428d7b3dSmrgbrw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
461428d7b3dSmrg{
462428d7b3dSmrg	int src, mask;
463428d7b3dSmrg
464428d7b3dSmrg	if (p->gen < 060)
465428d7b3dSmrg		brw_wm_xy(p, dispatch);
466428d7b3dSmrg
467428d7b3dSmrg	src = brw_wm_affine(p, dispatch, 0, 1, 12);
468428d7b3dSmrg	mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
469428d7b3dSmrg	brw_wm_write__mask(p, dispatch, src, mask);
470428d7b3dSmrg
471428d7b3dSmrg	return true;
472428d7b3dSmrg}
473428d7b3dSmrg
474428d7b3dSmrgbool
475428d7b3dSmrgbrw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
476428d7b3dSmrg{
477428d7b3dSmrg	int src, mask;
478428d7b3dSmrg
479428d7b3dSmrg	if (p->gen < 060)
480428d7b3dSmrg		brw_wm_xy(p, dispatch);
481428d7b3dSmrg
482428d7b3dSmrg	src = brw_wm_affine(p, dispatch, 0, 1, 12);
483428d7b3dSmrg	mask = brw_wm_affine(p, dispatch, 1, 6, 20);
484428d7b3dSmrg	brw_wm_write__mask_ca(p, dispatch, src, mask);
485428d7b3dSmrg
486428d7b3dSmrg	return true;
487428d7b3dSmrg}
488428d7b3dSmrg
489428d7b3dSmrgbool
490428d7b3dSmrgbrw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
491428d7b3dSmrg{
492428d7b3dSmrg	int src, mask;
493428d7b3dSmrg
494428d7b3dSmrg	if (p->gen < 060)
495428d7b3dSmrg		brw_wm_xy(p, dispatch);
496428d7b3dSmrg
497428d7b3dSmrg	src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
498428d7b3dSmrg	mask = brw_wm_affine(p, dispatch, 1, 6, 16);
499428d7b3dSmrg	brw_wm_write__mask(p, dispatch, mask, src);
500428d7b3dSmrg
501428d7b3dSmrg	return true;
502428d7b3dSmrg}
503428d7b3dSmrg
504428d7b3dSmrg/* Projective variants */
505428d7b3dSmrg
506428d7b3dSmrgstatic void brw_wm_projective_st(struct brw_compile *p, int dw,
507428d7b3dSmrg				 int channel, int msg)
508428d7b3dSmrg{
509428d7b3dSmrg	int uv;
510428d7b3dSmrg
511428d7b3dSmrg	if (dw == 16) {
512428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
513428d7b3dSmrg		uv = p->gen >= 060 ? 6 : 3;
514428d7b3dSmrg	} else {
515428d7b3dSmrg		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
516428d7b3dSmrg		uv = p->gen >= 060 ? 4 : 3;
517428d7b3dSmrg	}
518428d7b3dSmrg	uv += 2*channel;
519428d7b3dSmrg
520428d7b3dSmrg	msg++;
521428d7b3dSmrg	if (p->gen >= 060) {
522428d7b3dSmrg		/* First compute 1/z */
523428d7b3dSmrg		brw_PLN(p,
524428d7b3dSmrg			brw_vec8_grf(30, 0),
525428d7b3dSmrg			brw_vec1_grf(uv+1, 0),
526428d7b3dSmrg			brw_vec8_grf(2, 0));
527428d7b3dSmrg
528428d7b3dSmrg		if (dw == 16) {
529428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
530428d7b3dSmrg			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
531428d7b3dSmrg			brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
532428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
533428d7b3dSmrg		} else
534428d7b3dSmrg			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
535428d7b3dSmrg
536428d7b3dSmrg		brw_PLN(p,
537428d7b3dSmrg			brw_vec8_grf(26, 0),
538428d7b3dSmrg			brw_vec1_grf(uv, 0),
539428d7b3dSmrg			brw_vec8_grf(2, 0));
540428d7b3dSmrg		brw_PLN(p,
541428d7b3dSmrg			brw_vec8_grf(28, 0),
542428d7b3dSmrg			brw_vec1_grf(uv, 4),
543428d7b3dSmrg			brw_vec8_grf(2, 0));
544428d7b3dSmrg
545428d7b3dSmrg		brw_MUL(p,
546428d7b3dSmrg			brw_message_reg(msg),
547428d7b3dSmrg			brw_vec8_grf(26, 0),
548428d7b3dSmrg			brw_vec8_grf(30, 0));
549428d7b3dSmrg		brw_MUL(p,
550428d7b3dSmrg			brw_message_reg(msg + dw/8),
551428d7b3dSmrg			brw_vec8_grf(28, 0),
552428d7b3dSmrg			brw_vec8_grf(30, 0));
553428d7b3dSmrg	} else {
554428d7b3dSmrg		struct brw_reg r = brw_vec1_grf(uv, 0);
555428d7b3dSmrg
556428d7b3dSmrg		/* First compute 1/z */
557428d7b3dSmrg		brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
558428d7b3dSmrg		brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
559428d7b3dSmrg
560428d7b3dSmrg		if (dw == 16) {
561428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_NONE);
562428d7b3dSmrg			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
563428d7b3dSmrg			brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
564428d7b3dSmrg			brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
565428d7b3dSmrg		} else
566428d7b3dSmrg			brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
567428d7b3dSmrg
568428d7b3dSmrg		/* Now compute the output s,t values */
569428d7b3dSmrg		brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
570428d7b3dSmrg		brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
571428d7b3dSmrg		brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
572428d7b3dSmrg		msg += dw/8;
573428d7b3dSmrg
574428d7b3dSmrg		brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
575428d7b3dSmrg		brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
576428d7b3dSmrg		brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
577428d7b3dSmrg	}
578428d7b3dSmrg}
579428d7b3dSmrg
580428d7b3dSmrgstatic int brw_wm_projective(struct brw_compile *p, int dw,
581428d7b3dSmrg			     int channel, int msg, int result)
582428d7b3dSmrg{
583428d7b3dSmrg	brw_wm_projective_st(p, dw, channel, msg);
584428d7b3dSmrg	return brw_wm_sample(p, dw, channel, msg, result);
585428d7b3dSmrg}
586428d7b3dSmrg
587428d7b3dSmrgstatic int brw_wm_projective__alpha(struct brw_compile *p, int dw,
588428d7b3dSmrg				     int channel, int msg, int result)
589428d7b3dSmrg{
590428d7b3dSmrg	brw_wm_projective_st(p, dw, channel, msg);
591428d7b3dSmrg	return brw_wm_sample__alpha(p, dw, channel, msg, result);
592428d7b3dSmrg}
593428d7b3dSmrg
594428d7b3dSmrgbool
595428d7b3dSmrgbrw_wm_kernel__projective(struct brw_compile *p, int dispatch)
596428d7b3dSmrg{
597428d7b3dSmrg	if (p->gen < 060)
598428d7b3dSmrg		brw_wm_xy(p, dispatch);
599428d7b3dSmrg	brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
600428d7b3dSmrg
601428d7b3dSmrg	return true;
602428d7b3dSmrg}
603428d7b3dSmrg
604428d7b3dSmrgbool
605428d7b3dSmrgbrw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
606428d7b3dSmrg{
607428d7b3dSmrg	int src, mask;
608428d7b3dSmrg
609428d7b3dSmrg	if (p->gen < 060)
610428d7b3dSmrg		brw_wm_xy(p, dispatch);
611428d7b3dSmrg
612428d7b3dSmrg	src = brw_wm_projective(p, dispatch, 0, 1, 12);
613428d7b3dSmrg	mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
614428d7b3dSmrg	brw_wm_write__mask(p, dispatch, src, mask);
615428d7b3dSmrg
616428d7b3dSmrg	return true;
617428d7b3dSmrg}
618428d7b3dSmrg
619428d7b3dSmrgbool
620428d7b3dSmrgbrw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
621428d7b3dSmrg{
622428d7b3dSmrg	int src, mask;
623428d7b3dSmrg
624428d7b3dSmrg	if (p->gen < 060)
625428d7b3dSmrg		brw_wm_xy(p, dispatch);
626428d7b3dSmrg
627428d7b3dSmrg	src = brw_wm_projective(p, dispatch, 0, 1, 12);
628428d7b3dSmrg	mask = brw_wm_projective(p, dispatch, 1, 6, 20);
629428d7b3dSmrg	brw_wm_write__mask_ca(p, dispatch, src, mask);
630428d7b3dSmrg
631428d7b3dSmrg	return true;
632428d7b3dSmrg}
633428d7b3dSmrg
634428d7b3dSmrgbool
635428d7b3dSmrgbrw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
636428d7b3dSmrg{
637428d7b3dSmrg	int src, mask;
638428d7b3dSmrg
639428d7b3dSmrg	if (p->gen < 060)
640428d7b3dSmrg		brw_wm_xy(p, dispatch);
641428d7b3dSmrg
642428d7b3dSmrg	src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
643428d7b3dSmrg	mask = brw_wm_projective(p, dispatch, 1, 6, 16);
644428d7b3dSmrg	brw_wm_write__mask(p, dispatch, mask, src);
645428d7b3dSmrg
646428d7b3dSmrg	return true;
647428d7b3dSmrg}
648428d7b3dSmrg
649428d7b3dSmrgbool
650428d7b3dSmrgbrw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
651428d7b3dSmrg{
652428d7b3dSmrg	int src, mask;
653428d7b3dSmrg
654428d7b3dSmrg	if (p->gen < 060) {
655428d7b3dSmrg		brw_wm_xy(p, dispatch);
656428d7b3dSmrg		mask = 5;
657428d7b3dSmrg	} else
658428d7b3dSmrg		mask = dispatch == 16 ? 8 : 6;
659428d7b3dSmrg
660428d7b3dSmrg	src = brw_wm_affine(p, dispatch, 0, 1, 12);
661428d7b3dSmrg	brw_wm_write__opacity(p, dispatch, src, mask);
662428d7b3dSmrg
663428d7b3dSmrg	return true;
664428d7b3dSmrg}
665428d7b3dSmrg
666428d7b3dSmrgbool
667428d7b3dSmrgbrw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
668428d7b3dSmrg{
669428d7b3dSmrg	int src, mask;
670428d7b3dSmrg
671428d7b3dSmrg	if (p->gen < 060) {
672428d7b3dSmrg		brw_wm_xy(p, dispatch);
673428d7b3dSmrg		mask = 5;
674428d7b3dSmrg	} else
675428d7b3dSmrg		mask = dispatch == 16 ? 8 : 6;
676428d7b3dSmrg
677428d7b3dSmrg	src = brw_wm_projective(p, dispatch, 0, 1, 12);
678428d7b3dSmrg	brw_wm_write__opacity(p, dispatch, src, mask);
679428d7b3dSmrg
680428d7b3dSmrg	return true;
681428d7b3dSmrg}
682