gen8_eu.c revision 42542f5f
1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#include <string.h>
29
30#include "compiler.h"
31#include "brw/brw.h"
32#include "gen8_eu.h"
33
34#ifndef ARRAY_SIZE
35#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
36#endif
37
38/* EU ISA */
39
40#define MRF_HACK_START 111
41
42struct gen8_instruction {
43	uint32_t data[4];
44};
45
46static inline unsigned
47__gen8_mask(unsigned high, unsigned low)
48{
49	assert(high >= low);
50	return (1 << (high - low + 1)) - 1;
51}
52
53/**
54 * Fetch a set of contiguous bits from the instruction.
55 *
56 * Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
57 */
58static inline unsigned
59__gen8_bits(struct gen8_instruction *insn, unsigned high, unsigned low)
60{
61	/* We assume the field doesn't cross 32-bit boundaries. */
62	const unsigned word = high / 32;
63
64	assert(word == low / 32);
65
66	high %= 32;
67	low %= 32;
68
69	return (insn->data[word] >> low) & __gen8_mask(high, low);
70}
71
72/**
73 * Set bits in the instruction, with proper shifting and masking.
74 *
75 * Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
76 */
77static inline void
78__gen8_set_bits(struct gen8_instruction *insn,
79		unsigned high,
80		unsigned low,
81		unsigned value)
82{
83	const unsigned word = high / 32;
84	unsigned mask;
85
86	assert(word == low / 32);
87
88	high %= 32;
89	low %= 32;
90	assert(value < __gen8_mask(high, low) + 1);
91
92	mask = __gen8_mask(high, low) << low;
93	insn->data[word] &= ~mask;
94	insn->data[word] |= (value << low) & mask;
95
96	assert(__gen8_bits(insn, 32*word+high, 32*word+low) == value);
97}
98
99#define F(name, high, low) \
100static inline void __gen8_set_##name(struct gen8_instruction *insn, unsigned v) \
101{ \
102	__gen8_set_bits(insn, high, low, v); \
103} \
104static inline unsigned __gen8_##name(struct gen8_instruction *insn) \
105{ \
106	return __gen8_bits(insn, high, low); \
107}
108
109/**
110* Direct addressing only:
111*  @{
112*/
113F(src1_da_reg_nr,      108, 101);
114F(src0_da_reg_nr,       76,  69);
115F(dst_da1_hstride,      62,  61);
116F(dst_da_reg_nr,        60,  53);
117F(dst_da16_subreg_nr,   52,  52);
118F(dst_da1_subreg_nr,    52,  48);
119F(da16_writemask,       51,  48); /* Dst.ChanEn */
120/** @} */
121
122F(src1_vert_stride,    120, 117)
123F(src1_da1_width,      116, 114)
124F(src1_da16_swiz_w,    115, 114)
125F(src1_da16_swiz_z,    113, 112)
126F(src1_da1_hstride,    113, 112)
127F(src1_address_mode,   111, 111)
128/** Src1.SrcMod @{ */
129F(src1_negate,         110, 110)
130F(src1_abs,            109, 109)
131/** @} */
132F(src1_da16_subreg_nr, 100, 100)
133F(src1_da1_subreg_nr,  100,  96)
134F(src1_da16_swiz_y,     99,  98)
135F(src1_da16_swiz_x,     97,  96)
136F(src1_reg_type,        94,  91)
137F(src1_reg_file,        90,  89)
138F(src0_vert_stride,     88,  85)
139F(src0_da1_width,       84,  82)
140F(src0_da16_swiz_w,     83,  82)
141F(src0_da16_swiz_z,     81,  80)
142F(src0_da1_hstride,     81,  80)
143F(src0_address_mode,    79,  79)
144/** Src0.SrcMod @{ */
145F(src0_negate,          78,  78)
146F(src0_abs,             77,  77)
147/** @} */
148F(src0_da16_subreg_nr,  68,  68)
149F(src0_da1_subreg_nr,   68,  64)
150F(src0_da16_swiz_y,     67,  66)
151F(src0_da16_swiz_x,     65,  64)
152F(dst_address_mode,     63,  63)
153F(src0_reg_type,        46,  43)
154F(src0_reg_file,        42,  41)
155F(dst_reg_type,         40,  37)
156F(dst_reg_file,         36,  35)
157F(mask_control,         34,  34)
158F(flag_reg_nr,          33,  33)
159F(flag_subreg_nr,       32,  32)
160F(saturate,             31,  31)
161F(branch_control,       30,  30)
162F(debug_control,        30,  30)
163F(cmpt_control,         29,  29)
164F(acc_wr_control,       28,  28)
165F(cond_modifier,        27,  24)
166F(exec_size,            23,  21)
167F(pred_inv,             20,  20)
168F(pred_control,         19,  16)
169F(thread_control,       15,  14)
170F(qtr_control,          13,  12)
171F(nib_control,          11,  11)
172F(dep_control,          10,   9)
173F(access_mode,           8,   8)
174/* Bit 7 is Reserved (for future Opcode expansion) */
175F(opcode,                6,   0)
176
177/**
178* Three-source instructions:
179*  @{
180*/
181F(src2_3src_reg_nr,    125, 118)
182F(src2_3src_subreg_nr, 117, 115)
183F(src2_3src_swizzle,   114, 107)
184F(src2_3src_rep_ctrl,  106, 106)
185F(src1_3src_reg_nr,    104,  97)
186F(src1_3src_subreg_hi,  96,  96)
187F(src1_3src_subreg_lo,  95,  94)
188F(src1_3src_swizzle,    93,  86)
189F(src1_3src_rep_ctrl,   85,  85)
190F(src0_3src_reg_nr,     83,  76)
191F(src0_3src_subreg_nr,  75,  73)
192F(src0_3src_swizzle,    72,  65)
193F(src0_3src_rep_ctrl,   64,  64)
194F(dst_3src_reg_nr,      63,  56)
195F(dst_3src_subreg_nr,   55,  53)
196F(dst_3src_writemask,   52,  49)
197F(dst_3src_type,        48,  46)
198F(src_3src_type,        45,  43)
199F(src2_3src_negate,     42,  42)
200F(src2_3src_abs,        41,  41)
201F(src1_3src_negate,     40,  40)
202F(src1_3src_abs,        39,  39)
203F(src0_3src_negate,     38,  38)
204F(src0_3src_abs,        37,  37)
205/** @} */
206
207/**
208* Fields for SEND messages:
209*  @{
210*/
211F(eot,                 127, 127)
212F(mlen,                124, 121)
213F(rlen,                120, 116)
214F(header_present,      115, 115)
215F(function_control,    114,  96)
216F(sfid,                 27,  24)
217F(math_function,        27,  24)
218/** @} */
219
220/**
221* URB message function control bits:
222*  @{
223*/
224F(urb_per_slot_offset, 113, 113)
225F(urb_interleave,      111, 111)
226F(urb_global_offset,   110, 100)
227F(urb_opcode,           99,  96)
228/** @} */
229
230/**
231* Sampler message function control bits:
232*  @{
233*/
234F(sampler_simd_mode,   114, 113)
235F(sampler_msg_type,    112, 108)
236F(sampler,             107, 104)
237F(binding_table_index, 103,  96)
238/** @} */
239
240/**
241 * Data port message function control bits:
242 *  @ {
243 */
244F(dp_category,            114, 114)
245F(dp_message_type,        113, 110)
246F(dp_message_control,     109, 104)
247F(dp_binding_table_index, 103,  96)
248/** @} */
249
250/**
251 * Thread Spawn message function control bits:
252 *  @ {
253 */
254F(ts_resource_select,     100, 100)
255F(ts_request_type,         97,  97)
256F(ts_opcode,               96,  96)
257/** @} */
258
259/**
260 * Video Motion Estimation message function control bits:
261 *  @ {
262 */
263F(vme_message_type,        110, 109)
264F(vme_binding_table_index, 103,  96)
265/** @} */
266
267/**
268 * Check & Refinement Engine message function control bits:
269 *  @ {
270 */
271F(cre_message_type,        110, 109)
272F(cre_binding_table_index, 103,  96)
273/** @} */
274
275#undef F
276
277/**
278* Flow control instruction bits:
279*  @{
280*/
281static inline unsigned __gen8_uip(struct gen8_instruction *insn)
282{
283	return insn->data[2];
284}
285
286static inline void __gen8_set_uip(struct gen8_instruction *insn, unsigned uip)
287{
288	insn->data[2] = uip;
289}
290
291static inline unsigned __gen8_jip(struct gen8_instruction *insn)
292{
293	return insn->data[3];
294}
295
296static inline void __gen8_set_jip(struct gen8_instruction *insn, unsigned jip)
297{
298	insn->data[3] = jip;
299}
300/** @} */
301
302static inline int __gen8_src1_imm_d(struct gen8_instruction *insn)
303{
304	return insn->data[3];
305}
306
307static inline unsigned __gen8_src1_imm_ud(struct gen8_instruction *insn)
308{
309	return insn->data[3];
310}
311
312static inline float __gen8_src1_imm_f(struct gen8_instruction *insn)
313{
314	union {
315		uint32_t u;
316		float f;
317	} ft = { insn->data[3] };
318	return ft.f;
319}
320
321static void
322__gen8_set_dst(struct brw_compile *p,
323	       struct gen8_instruction *inst,
324	       struct brw_reg reg)
325{
326	/* MRFs haven't existed since Gen7, so we better not be using them. */
327	if (reg.file == BRW_MESSAGE_REGISTER_FILE) {
328		reg.file = BRW_GENERAL_REGISTER_FILE;
329		reg.nr += MRF_HACK_START;
330	}
331
332	assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
333
334	if (reg.file == BRW_GENERAL_REGISTER_FILE)
335		assert(reg.nr < BRW_MAX_GRF);
336
337	__gen8_set_dst_reg_file(inst, reg.file);
338	__gen8_set_dst_reg_type(inst, reg.type);
339
340	assert(reg.address_mode == BRW_ADDRESS_DIRECT);
341
342	__gen8_set_dst_da_reg_nr(inst, reg.nr);
343
344	if (__gen8_access_mode(inst) == BRW_ALIGN_1) {
345		/* Set Dst.SubRegNum[4:0] */
346		__gen8_set_dst_da1_subreg_nr(inst, reg.subnr);
347
348		/* Set Dst.HorzStride */
349		if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
350			reg.hstride = BRW_HORIZONTAL_STRIDE_1;
351		__gen8_set_dst_da1_hstride(inst, reg.hstride);
352	} else {
353		/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
354		assert(reg.subnr == 0 || reg.subnr == 16);
355		__gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4);
356		__gen8_set_da16_writemask(inst, reg.dw1.bits.writemask);
357	}
358
359#if 1
360	if (reg.width == BRW_WIDTH_8 && p->compressed)
361		__gen8_set_exec_size(inst, BRW_EXECUTE_16);
362	else
363		__gen8_set_exec_size(inst, reg.width);
364#else
365	if (reg.width < BRW_EXECUTE_8)
366		__gen8_set_exec_size(inst, reg.width);
367#endif
368}
369
370static void
371__gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg)
372{
373	int hstride_for_reg[] = {0, 1, 2, 4};
374	int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
375	int width_for_reg[] = {1, 2, 4, 8, 16};
376	int execsize_for_reg[] = {1, 2, 4, 8, 16};
377	int width, hstride, vstride, execsize;
378
379	if (reg.file == BRW_IMMEDIATE_VALUE) {
380		/* TODO: check immediate vectors */
381		return;
382	}
383
384	if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE)
385		return;
386
387	assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
388	hstride = hstride_for_reg[reg.hstride];
389
390	if (reg.vstride == 0xf) {
391		vstride = -1;
392	} else {
393		assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
394		vstride = vstride_for_reg[reg.vstride];
395	}
396
397	assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
398	width = width_for_reg[reg.width];
399
400	assert(__gen8_exec_size(inst) >= 0 &&
401	       __gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg));
402	execsize = execsize_for_reg[__gen8_exec_size(inst)];
403
404	/* Restrictions from 3.3.10: Register Region Restrictions. */
405	/* 3. */
406	assert(execsize >= width);
407
408	/* 4. */
409	if (execsize == width && hstride != 0) {
410		assert(vstride == -1 || vstride == width * hstride);
411	}
412
413	/* 5. */
414	if (execsize == width && hstride == 0) {
415		/* no restriction on vstride. */
416	}
417
418	/* 6. */
419	if (width == 1) {
420		assert(hstride == 0);
421	}
422
423	/* 7. */
424	if (execsize == 1 && width == 1) {
425		assert(hstride == 0);
426		assert(vstride == 0);
427	}
428
429	/* 8. */
430	if (vstride == 0 && hstride == 0) {
431		assert(width == 1);
432	}
433
434	/* 10. Check destination issues. */
435}
436
437static void
438__gen8_set_src0(struct gen8_instruction *inst, struct brw_reg reg)
439{
440	/* MRFs haven't existed since Gen7, so we better not be using them. */
441	if (reg.file == BRW_MESSAGE_REGISTER_FILE) {
442		reg.file = BRW_GENERAL_REGISTER_FILE;
443		reg.nr += MRF_HACK_START;
444	}
445
446	if (reg.file == BRW_GENERAL_REGISTER_FILE)
447		assert(reg.nr < BRW_MAX_GRF);
448
449	__gen8_validate_reg(inst, reg);
450
451	__gen8_set_src0_reg_file(inst, reg.file);
452	__gen8_set_src0_reg_type(inst, reg.type);
453	__gen8_set_src0_abs(inst, reg.abs);
454	__gen8_set_src0_negate(inst, reg.negate);
455
456	assert(reg.address_mode == BRW_ADDRESS_DIRECT);
457
458	if (reg.file == BRW_IMMEDIATE_VALUE) {
459		inst->data[3] = reg.dw1.ud;
460
461		/* Required to set some fields in src1 as well: */
462		__gen8_set_src1_reg_file(inst, 0); /* arf */
463		__gen8_set_src1_reg_type(inst, reg.type);
464	} else {
465		__gen8_set_src0_da_reg_nr(inst, reg.nr);
466
467		if (__gen8_access_mode(inst) == BRW_ALIGN_1) {
468			/* Set Src0.SubRegNum[4:0] */
469			__gen8_set_src0_da1_subreg_nr(inst, reg.subnr);
470
471			if (reg.width == BRW_WIDTH_1 &&
472			    __gen8_exec_size(inst) == BRW_EXECUTE_1) {
473				__gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
474				__gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
475			} else {
476				__gen8_set_src0_da1_hstride(inst, reg.hstride);
477				__gen8_set_src0_vert_stride(inst, reg.vstride);
478			}
479			__gen8_set_src0_da1_width(inst, reg.width);
480		} else {
481			/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
482			assert(reg.subnr == 0 || reg.subnr == 16);
483			__gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4);
484
485			__gen8_set_src0_da16_swiz_x(inst,
486						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
487							      BRW_CHANNEL_X));
488			__gen8_set_src0_da16_swiz_y(inst,
489						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
490							      BRW_CHANNEL_Y));
491			__gen8_set_src0_da16_swiz_z(inst,
492						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
493							      BRW_CHANNEL_Z));
494			__gen8_set_src0_da16_swiz_w(inst,
495						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
496							      BRW_CHANNEL_W));
497
498			/* This is an oddity of the fact that we're using the same
499			 * descriptions for registers in both Align16 and Align1 modes.
500			 */
501			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
502				__gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
503			else
504				__gen8_set_src0_vert_stride(inst, reg.vstride);
505		}
506	}
507}
508
509static void
510__gen8_set_src1(struct gen8_instruction *inst, struct brw_reg reg)
511{
512	/* MRFs haven't existed since Gen7, so we better not be using them. */
513	if (reg.file == BRW_MESSAGE_REGISTER_FILE) {
514		reg.file = BRW_GENERAL_REGISTER_FILE;
515		reg.nr += MRF_HACK_START;
516	}
517
518	if (reg.file == BRW_GENERAL_REGISTER_FILE)
519		assert(reg.nr < BRW_MAX_GRF);
520
521	__gen8_validate_reg(inst, reg);
522
523	__gen8_set_src1_reg_file(inst, reg.file);
524	__gen8_set_src1_reg_type(inst, reg.type);
525	__gen8_set_src1_abs(inst, reg.abs);
526	__gen8_set_src1_negate(inst, reg.negate);
527
528	/* Only src1 can be an immediate in two-argument instructions. */
529	assert(__gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE);
530
531	assert(reg.address_mode == BRW_ADDRESS_DIRECT);
532
533	if (reg.file == BRW_IMMEDIATE_VALUE) {
534		inst->data[3] = reg.dw1.ud;
535	} else {
536		__gen8_set_src1_da_reg_nr(inst, reg.nr);
537
538		if (__gen8_access_mode(inst) == BRW_ALIGN_1) {
539			/* Set Src0.SubRegNum[4:0] */
540			__gen8_set_src1_da1_subreg_nr(inst, reg.subnr);
541
542			if (reg.width == BRW_WIDTH_1 &&
543			    __gen8_exec_size(inst) == BRW_EXECUTE_1) {
544				__gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
545				__gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
546			} else {
547				__gen8_set_src1_da1_hstride(inst, reg.hstride);
548				__gen8_set_src1_vert_stride(inst, reg.vstride);
549			}
550			__gen8_set_src1_da1_width(inst, reg.width);
551		} else {
552			/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
553			assert(reg.subnr == 0 || reg.subnr == 16);
554			__gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4);
555
556			__gen8_set_src1_da16_swiz_x(inst,
557						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
558							      BRW_CHANNEL_X));
559			__gen8_set_src1_da16_swiz_y(inst,
560						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
561							      BRW_CHANNEL_Y));
562			__gen8_set_src1_da16_swiz_z(inst,
563						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
564							      BRW_CHANNEL_Z));
565			__gen8_set_src1_da16_swiz_w(inst,
566						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
567							      BRW_CHANNEL_W));
568
569			/* This is an oddity of the fact that we're using the same
570			 * descriptions for registers in both Align16 and Align1 modes.
571			 */
572			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
573				__gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
574			else
575				__gen8_set_src1_vert_stride(inst, reg.vstride);
576		}
577	}
578}
579
580/**
581 * Set the Message Descriptor and Extended Message Descriptor fields
582 * for SEND messages.
583 *
584 * \note This zeroes out the Function Control bits, so it must be called
585 *       \b before filling out any message-specific data.  Callers can
586 *       choose not to fill in irrelevant bits; they will be zero.
587 */
588static void
589__gen8_set_message_descriptor(struct gen8_instruction *inst,
590			      enum brw_message_target sfid,
591			      unsigned msg_length,
592			      unsigned response_length,
593			      bool header_present,
594			      bool end_of_thread)
595{
596	__gen8_set_src1(inst, brw_imm_d(0));
597
598	__gen8_set_sfid(inst, sfid);
599	__gen8_set_mlen(inst, msg_length);
600	__gen8_set_rlen(inst, response_length);
601	__gen8_set_header_present(inst, header_present);
602	__gen8_set_eot(inst, end_of_thread);
603}
604
605#if 0
606static void
607__gen8_set_urb_message(struct gen8_instruction *inst,
608		       unsigned opcode,
609		       unsigned msg_length,
610		       unsigned response_length,
611		       bool end_of_thread,
612		       unsigned offset,
613		       bool interleave)
614{
615	__gen8_set_message_descriptor(inst, BRW_SFID_URB, msg_length, response_length,
616				      true, end_of_thread);
617	__gen8_set_src0(inst, brw_vec8_grf(MRF_HACK_START + 1, 0));
618	__gen8_set_urb_opcode(inst, 0); /* URB_WRITE_HWORD */
619	__gen8_set_urb_global_offset(inst, offset);
620	__gen8_set_urb_interleave(inst, interleave);
621	/* per_slot_offset = 0 makes it ignore offsets in message header */
622	__gen8_set_urb_per_slot_offset(inst, 0);
623}
624#endif
625
626static void
627__gen8_set_sampler_message(struct gen8_instruction *inst,
628			   unsigned binding_table_index,
629			   unsigned sampler,
630			   unsigned msg_type,
631			   unsigned response_length,
632			   unsigned msg_length,
633			   bool header_present,
634			   unsigned simd_mode)
635{
636	__gen8_set_message_descriptor(inst, BRW_SFID_SAMPLER, msg_length,
637				      response_length, header_present, false);
638
639	__gen8_set_binding_table_index(inst, binding_table_index);
640	__gen8_set_sampler(inst, sampler);
641	__gen8_set_sampler_msg_type(inst, msg_type);
642	__gen8_set_sampler_simd_mode(inst, simd_mode);
643}
644
645static void
646__gen8_set_dp_message(struct gen8_instruction *inst,
647		      enum brw_message_target sfid,
648		      unsigned binding_table_index,
649		      unsigned msg_type,
650		      unsigned msg_control,
651		      unsigned mlen,
652		      unsigned rlen,
653		      bool header_present,
654		      bool end_of_thread)
655{
656	/* Binding table index is from 0..255 */
657	assert((binding_table_index & 0xff) == binding_table_index);
658
659	/* Message Type is only 5 bits */
660	assert((msg_type & 0x1f) == msg_type);
661
662	/* Message Control is only 6 bits */
663	assert((msg_control & 0x3f) == msg_control);
664
665	__gen8_set_message_descriptor(inst, sfid, mlen, rlen, header_present,
666				      end_of_thread);
667	__gen8_set_function_control(inst,
668				    binding_table_index | msg_type << 14 | msg_control << 8);
669}
670
671static inline struct gen8_instruction *
672gen8_next_insn(struct brw_compile *p, int opcode)
673{
674	struct gen8_instruction *insn;
675
676	assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
677
678	insn = memcpy(&p->store[p->nr_insn++], p->current, sizeof(*insn));
679	__gen8_set_opcode(insn, opcode);
680
681	return insn;
682}
683
684static void gen8_math(struct brw_compile *p,
685		      struct brw_reg dst,
686		      unsigned function,
687		      unsigned saturate,
688		      unsigned msg_reg_nr,
689		      struct brw_reg src,
690		      unsigned data_type,
691		      unsigned precision)
692{
693	struct gen8_instruction *insn = gen8_next_insn(p, BRW_OPCODE_MATH);
694
695	assert(dst.file == BRW_GENERAL_REGISTER_FILE);
696	assert(src.file == BRW_GENERAL_REGISTER_FILE);
697
698	assert(dst.hstride == BRW_HORIZONTAL_STRIDE_1);
699	assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
700
701	/* Source modifiers are ignored for extended math instructions. */
702	assert(!src.negate);
703	assert(!src.abs);
704
705	if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
706	    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
707		assert(src.type == BRW_REGISTER_TYPE_F);
708	}
709
710	/* Math is the same ISA format as other opcodes, except that CondModifier
711	 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
712	 */
713	__gen8_set_cond_modifier(insn, function);
714	__gen8_set_saturate(insn, saturate);
715
716	__gen8_set_dst(p, insn, dst);
717	__gen8_set_src0(insn, src);
718	__gen8_set_src1(insn, brw_null_reg());
719}
720
721static inline void gen8_math_invert(struct brw_compile *p,
722				    struct brw_reg dst,
723				    struct brw_reg src)
724{
725	gen8_math(p,
726		  dst,
727		  BRW_MATH_FUNCTION_INV,
728		  BRW_MATH_SATURATE_NONE,
729		  0,
730		  src,
731		  BRW_MATH_PRECISION_FULL,
732		  BRW_MATH_DATA_VECTOR);
733
734}
735
736/* Helpers for regular instructions: */
737static inline struct gen8_instruction *gen8_alu1(struct brw_compile *p,
738						 unsigned opcode,
739						 struct brw_reg dst,
740						 struct brw_reg src)
741{
742	struct gen8_instruction *insn = gen8_next_insn(p, opcode);
743	__gen8_set_dst(p, insn, dst);
744	__gen8_set_src0(insn, src);
745	return insn;
746}
747
748static inline struct gen8_instruction *gen8_alu2(struct brw_compile *p,
749						 unsigned opcode,
750						 struct brw_reg dst,
751						 struct brw_reg src0,
752						 struct brw_reg src1)
753{
754	struct gen8_instruction *insn = gen8_next_insn(p, opcode);
755	__gen8_set_dst(p, insn, dst);
756	__gen8_set_src0(insn, src0);
757	__gen8_set_src1(insn, src1);
758	return insn;
759}
760
761#define ALU1(OP)							\
762static inline struct gen8_instruction *gen8_##OP(struct brw_compile *p,	\
763						 struct brw_reg dst,	\
764						 struct brw_reg src0)	\
765{									\
766   return gen8_alu1(p, BRW_OPCODE_##OP, dst, src0);			\
767}
768
769#define ALU2(OP)							\
770static inline struct gen8_instruction *gen8_##OP(struct brw_compile *p,	\
771						 struct brw_reg dst,	\
772						 struct brw_reg src0,	\
773						 struct brw_reg src1)	\
774{									\
775   return gen8_alu2(p, BRW_OPCODE_##OP, dst, src0, src1);		\
776}
777
778static inline struct gen8_instruction *gen8_ADD(struct brw_compile *p,
779						struct brw_reg dst,
780						struct brw_reg src0,
781						struct brw_reg src1)
782{
783	/* 6.2.2: add */
784	if (src0.type == BRW_REGISTER_TYPE_F ||
785	    (src0.file == BRW_IMMEDIATE_VALUE &&
786	     src0.type == BRW_REGISTER_TYPE_VF)) {
787		assert(src1.type != BRW_REGISTER_TYPE_UD);
788		assert(src1.type != BRW_REGISTER_TYPE_D);
789	}
790
791	if (src1.type == BRW_REGISTER_TYPE_F ||
792	    (src1.file == BRW_IMMEDIATE_VALUE &&
793	     src1.type == BRW_REGISTER_TYPE_VF)) {
794		assert(src0.type != BRW_REGISTER_TYPE_UD);
795		assert(src0.type != BRW_REGISTER_TYPE_D);
796	}
797
798	return gen8_alu2(p, BRW_OPCODE_ADD, dst, src0, src1);
799}
800
801static inline struct gen8_instruction *gen8_MUL(struct brw_compile *p,
802						struct brw_reg dst,
803						struct brw_reg src0,
804						struct brw_reg src1)
805{
806	/* 6.32.38: mul */
807	if (src0.type == BRW_REGISTER_TYPE_D ||
808	    src0.type == BRW_REGISTER_TYPE_UD ||
809	    src1.type == BRW_REGISTER_TYPE_D ||
810	    src1.type == BRW_REGISTER_TYPE_UD) {
811		assert(dst.type != BRW_REGISTER_TYPE_F);
812	}
813
814	if (src0.type == BRW_REGISTER_TYPE_F ||
815	    (src0.file == BRW_IMMEDIATE_VALUE &&
816	     src0.type == BRW_REGISTER_TYPE_VF)) {
817		assert(src1.type != BRW_REGISTER_TYPE_UD);
818		assert(src1.type != BRW_REGISTER_TYPE_D);
819	}
820
821	if (src1.type == BRW_REGISTER_TYPE_F ||
822	    (src1.file == BRW_IMMEDIATE_VALUE &&
823	     src1.type == BRW_REGISTER_TYPE_VF)) {
824		assert(src0.type != BRW_REGISTER_TYPE_UD);
825		assert(src0.type != BRW_REGISTER_TYPE_D);
826	}
827
828	assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
829	       src0.nr != BRW_ARF_ACCUMULATOR);
830	assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
831	       src1.nr != BRW_ARF_ACCUMULATOR);
832
833	return gen8_alu2(p, BRW_OPCODE_MUL, dst, src0, src1);
834}
835
836ALU1(MOV);
837ALU2(SEL);
838ALU1(NOT);
839ALU2(AND);
840ALU2(OR);
841ALU2(XOR);
842ALU2(SHR);
843ALU2(SHL);
844ALU2(RSR);
845ALU2(RSL);
846ALU2(ASR);
847ALU1(FRC);
848ALU1(RNDD);
849ALU2(MAC);
850ALU2(MACH);
851ALU1(LZD);
852ALU2(DP4);
853ALU2(DPH);
854ALU2(DP3);
855ALU2(DP2);
856ALU2(LINE);
857ALU2(PLN);
858
859ALU1(RNDZ);
860ALU1(RNDE);
861
862#undef ALU1
863#undef ALU2
864
865static void gen8_set_compression_control(struct brw_compile *p,
866					 enum brw_compression compression_control)
867{
868	unsigned v;
869
870	p->compressed = compression_control == BRW_COMPRESSION_COMPRESSED;
871
872	switch (compression_control) {
873	default: assert(0);
874	case BRW_COMPRESSION_NONE:       v = GEN6_COMPRESSION_1Q; break;
875	case BRW_COMPRESSION_2NDHALF:    v = GEN6_COMPRESSION_2Q; break;
876	case BRW_COMPRESSION_COMPRESSED: v = GEN6_COMPRESSION_1H; break;
877	}
878	__gen8_set_cmpt_control((struct gen8_instruction *)p->current, v);
879}
880
881static inline void gen8_set_mask_control(struct brw_compile *p, unsigned value)
882{
883	__gen8_set_mask_control((struct gen8_instruction *)p->current, value);
884}
885
886static inline void gen8_set_saturate(struct brw_compile *p, unsigned value)
887{
888	__gen8_set_saturate((struct gen8_instruction *)p->current, value);
889}
890
891static inline void gen8_set_acc_write_control(struct brw_compile *p, unsigned value)
892{
893	__gen8_set_acc_wr_control((struct gen8_instruction *)p->current, value);
894}
895
896static void gen8_SAMPLE(struct brw_compile *p,
897			struct brw_reg dst,
898			unsigned msg_reg_nr,
899			unsigned binding_table_index,
900			unsigned sampler,
901			unsigned writemask,
902			unsigned msg_type,
903			unsigned response_length,
904			unsigned msg_length,
905			bool header_present,
906			unsigned simd_mode)
907{
908	struct brw_reg src0 = brw_message_reg(msg_reg_nr);
909
910	assert(writemask);
911
912	if (writemask != WRITEMASK_XYZW) {
913		writemask = ~writemask & WRITEMASK_XYZW;
914
915		brw_push_insn_state(p);
916
917		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
918		gen8_set_mask_control(p, BRW_MASK_DISABLE);
919
920		gen8_MOV(p, __retype_ud(src0), __retype_ud(brw_vec8_grf(0,0)));
921		gen8_MOV(p, get_element_ud(src0, 2), brw_imm_ud(writemask << 12));
922
923		brw_pop_insn_state(p);
924	}
925
926	{
927		struct gen8_instruction *insn;
928
929		insn = gen8_next_insn(p, BRW_OPCODE_SEND);
930		__gen8_set_pred_control(insn, 0); /* XXX */
931		__gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q);
932
933		__gen8_set_dst(p, insn, dst);
934		__gen8_set_src0(insn, src0);
935		__gen8_set_sampler_message(insn,
936					   binding_table_index,
937					   sampler,
938					   msg_type,
939					   response_length,
940					   msg_length,
941					   header_present,
942					   simd_mode);
943	}
944}
945
946/* shader logic */
947
948static void wm_affine_st(struct brw_compile *p, int dw, int channel, int msg)
949{
950	int uv;
951
952	if (dw == 16) {
953		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
954		uv = 6;
955	} else {
956		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
957		uv = 4;
958	}
959	uv += 2*channel;
960
961	msg++;
962	gen8_PLN(p,
963		 brw_message_reg(msg),
964		 brw_vec1_grf(uv, 0),
965		 brw_vec8_grf(2, 0));
966	msg += dw/8;
967
968	gen8_PLN(p,
969		 brw_message_reg(msg),
970		 brw_vec1_grf(uv, 4),
971		 brw_vec8_grf(2, 0));
972}
973
974static inline unsigned simd(int dw)
975{
976	return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
977}
978
979static inline struct brw_reg sample_result(int dw, int result)
980{
981	return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
982		       BRW_REGISTER_TYPE_UW,
983		       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
984		       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
985		       BRW_HORIZONTAL_STRIDE_1,
986		       BRW_SWIZZLE_XYZW,
987		       WRITEMASK_XYZW);
988}
989
990static int wm_sample(struct brw_compile *p, int dw,
991		     int channel, int msg, int result)
992{
993	int len = dw == 16 ? 4 : 2;
994	gen8_SAMPLE(p, sample_result(dw, result), ++msg,
995		    channel+1, channel, WRITEMASK_XYZW, 0,
996		    2*len, len, false, simd(dw));
997	return result;
998}
999
1000static int wm_sample__alpha(struct brw_compile *p, int dw,
1001			    int channel, int msg, int result)
1002{
1003	int mlen, rlen;
1004
1005	if (dw == 8) {
1006		mlen = 3;
1007		rlen = 1;
1008	} else {
1009		mlen = 5;
1010		rlen = 2;
1011	}
1012
1013	gen8_SAMPLE(p, sample_result(dw, result), msg,
1014		    channel+1, channel, WRITEMASK_W, 0,
1015		    rlen, mlen, true, simd(dw));
1016
1017	return result;
1018}
1019
1020static int wm_affine(struct brw_compile *p, int dw,
1021		     int channel, int msg, int result)
1022{
1023	wm_affine_st(p, dw, channel, msg);
1024	return wm_sample(p, dw, channel, msg, result);
1025}
1026
1027static int wm_affine__alpha(struct brw_compile *p, int dw,
1028			    int channel, int msg, int result)
1029{
1030	wm_affine_st(p, dw, channel, msg);
1031	return wm_sample__alpha(p, dw, channel, msg, result);
1032}
1033
1034static inline struct brw_reg null_result(int dw)
1035{
1036	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
1037		       BRW_REGISTER_TYPE_UW,
1038		       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
1039		       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
1040		       BRW_HORIZONTAL_STRIDE_1,
1041		       BRW_SWIZZLE_XYZW,
1042		       WRITEMASK_XYZW);
1043}
1044
1045static void fb_write(struct brw_compile *p, int dw)
1046{
1047	struct gen8_instruction *insn;
1048	unsigned msg_control, msg_len;
1049	struct brw_reg src0;
1050
1051	if (dw == 16) {
1052		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1053		msg_len = 8;
1054	} else {
1055		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1056		msg_len = 4;
1057	}
1058	msg_control |= 1 << 4; /* Last Render Target */
1059
1060	/* The execution mask is ignored for render target writes. */
1061	insn = gen8_next_insn(p, BRW_OPCODE_SEND);
1062	__gen8_set_pred_control(insn, 0);
1063	__gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q);
1064
1065	src0 = brw_message_reg(2);
1066
1067	__gen8_set_dst(p, insn, null_result(dw));
1068	__gen8_set_src0(insn, src0);
1069	__gen8_set_dp_message(insn,
1070			      GEN6_SFID_DATAPORT_RENDER_CACHE,
1071			      0,
1072			      GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
1073			      msg_control,
1074			      msg_len, 0,
1075			      false, true);
1076}
1077
1078static void wm_write(struct brw_compile *p, int dw, int src)
1079{
1080	int n;
1081
1082	if (dw == 8) {
1083		/* XXX pixel execution mask? */
1084		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1085		for (n = 0; n < 4; n++)
1086			gen8_MOV(p,
1087				 brw_message_reg(2 + n),
1088				 brw_vec8_grf(src + n, 0));
1089	} else {
1090		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1091		for (n = 0; n < 4; n++)
1092			gen8_MOV(p,
1093				 brw_message_reg(2 + 2*n),
1094				 brw_vec8_grf(src + 2*n, 0));
1095	}
1096
1097	fb_write(p, dw);
1098}
1099
1100static void wm_write__mask(struct brw_compile *p, int dw,
1101			   int src, int mask)
1102{
1103	int n;
1104
1105	if (dw == 8) {
1106		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1107		for (n = 0; n < 4; n++)
1108			gen8_MUL(p,
1109				 brw_message_reg(2 + n),
1110				 brw_vec8_grf(src + n, 0),
1111				 brw_vec8_grf(mask, 0));
1112	} else {
1113		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1114		for (n = 0; n < 4; n++)
1115			gen8_MUL(p,
1116				 brw_message_reg(2 + 2*n),
1117				 brw_vec8_grf(src + 2*n, 0),
1118				 brw_vec8_grf(mask, 0));
1119	}
1120
1121	fb_write(p, dw);
1122}
1123
1124static void wm_write__opacity(struct brw_compile *p, int dw, int src, int mask)
1125{
1126	int n;
1127
1128	if (dw == 8) {
1129		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1130		for (n = 0; n < 4; n++)
1131			gen8_MUL(p,
1132				 brw_message_reg(2 + n),
1133				 brw_vec8_grf(src + n, 0),
1134				 brw_vec1_grf(mask, 3));
1135	} else {
1136		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1137		for (n = 0; n < 4; n++)
1138			gen8_MUL(p,
1139				 brw_message_reg(2 + 2*n),
1140				 brw_vec8_grf(src + 2*n, 0),
1141				 brw_vec1_grf(mask, 3));
1142	}
1143
1144	fb_write(p, dw);
1145}
1146
1147static void wm_write__mask_ca(struct brw_compile *p, int dw,
1148			      int src, int mask)
1149{
1150	int n;
1151
1152	if (dw == 8) {
1153		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1154		for (n = 0; n < 4; n++)
1155			gen8_MUL(p,
1156				 brw_message_reg(2 + n),
1157				 brw_vec8_grf(src + n, 0),
1158				 brw_vec8_grf(mask + n, 0));
1159	} else {
1160		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1161		for (n = 0; n < 4; n++)
1162			gen8_MUL(p,
1163				 brw_message_reg(2 + 2*n),
1164				 brw_vec8_grf(src + 2*n, 0),
1165				 brw_vec8_grf(mask + 2*n, 0));
1166	}
1167
1168	fb_write(p, dw);
1169}
1170
1171static void gen8_compile_init(struct brw_compile *p)
1172{
1173	struct gen8_instruction *insn = memset(p->current, 0, sizeof(*insn));
1174	COMPILE_TIME_ASSERT(sizeof(*insn) == sizeof(*p->current));
1175	__gen8_set_mask_control(insn, BRW_MASK_ENABLE);
1176	__gen8_set_saturate(insn, 0);
1177	__gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q);
1178	//__gen8_set_pred_control(insn, 0xf);
1179}
1180
1181bool
1182gen8_wm_kernel__affine(struct brw_compile *p, int dispatch)
1183{
1184	gen8_compile_init(p);
1185
1186	wm_write(p, dispatch, wm_affine(p, dispatch, 0, 1, 12));
1187	return true;
1188}
1189
1190bool
1191gen8_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
1192{
1193	int src, mask;
1194
1195	gen8_compile_init(p);
1196
1197	src = wm_affine(p, dispatch, 0, 1, 12);
1198	mask = wm_affine__alpha(p, dispatch, 1, 6, 20);
1199	wm_write__mask(p, dispatch, src, mask);
1200
1201	return true;
1202}
1203
1204bool
1205gen8_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
1206{
1207	int src, mask;
1208
1209	gen8_compile_init(p);
1210
1211	src = wm_affine(p, dispatch, 0, 1, 12);
1212	mask = wm_affine(p, dispatch, 1, 6, 20);
1213	wm_write__mask_ca(p, dispatch, src, mask);
1214
1215	return true;
1216}
1217
1218bool
1219gen8_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
1220{
1221	int src, mask;
1222
1223	gen8_compile_init(p);
1224
1225	src = wm_affine__alpha(p, dispatch, 0, 1, 12);
1226	mask = wm_affine(p, dispatch, 1, 6, 16);
1227	wm_write__mask(p, dispatch, mask, src);
1228
1229	return true;
1230}
1231
1232/* Projective variants */
1233
1234static void wm_projective_st(struct brw_compile *p, int dw,
1235			     int channel, int msg)
1236{
1237	int uv;
1238
1239	gen8_compile_init(p);
1240
1241	if (dw == 16) {
1242		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1243		uv = 6;
1244	} else {
1245		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1246		uv = 4;
1247	}
1248	uv += 2*channel;
1249
1250	msg++;
1251	/* First compute 1/z */
1252	gen8_PLN(p,
1253		 brw_vec8_grf(30, 0),
1254		 brw_vec1_grf(uv+1, 0),
1255		 brw_vec8_grf(2, 0));
1256
1257	if (dw == 16) {
1258		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1259		gen8_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
1260		gen8_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
1261		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1262	} else
1263		gen8_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
1264
1265	gen8_PLN(p,
1266		 brw_vec8_grf(26, 0),
1267		 brw_vec1_grf(uv, 0),
1268		 brw_vec8_grf(2, 0));
1269	gen8_PLN(p,
1270		 brw_vec8_grf(28, 0),
1271		 brw_vec1_grf(uv, 0),
1272		 brw_vec8_grf(4, 0));
1273
1274	gen8_MUL(p,
1275		 brw_message_reg(msg),
1276		 brw_vec8_grf(26, 0),
1277		 brw_vec8_grf(30, 0));
1278	gen8_MUL(p,
1279		 brw_message_reg(msg + dw/8),
1280		 brw_vec8_grf(28, 0),
1281		 brw_vec8_grf(30, 0));
1282}
1283
1284static int wm_projective(struct brw_compile *p, int dw,
1285			 int channel, int msg, int result)
1286{
1287	gen8_compile_init(p);
1288
1289	wm_projective_st(p, dw, channel, msg);
1290	return wm_sample(p, dw, channel, msg, result);
1291}
1292
1293static int wm_projective__alpha(struct brw_compile *p, int dw,
1294				int channel, int msg, int result)
1295{
1296	gen8_compile_init(p);
1297
1298	wm_projective_st(p, dw, channel, msg);
1299	return wm_sample__alpha(p, dw, channel, msg, result);
1300}
1301
1302bool
1303gen8_wm_kernel__projective(struct brw_compile *p, int dispatch)
1304{
1305	gen8_compile_init(p);
1306
1307	wm_write(p, dispatch, wm_projective(p, dispatch, 0, 1, 12));
1308	return true;
1309}
1310
1311bool
1312gen8_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
1313{
1314	int src, mask;
1315
1316	gen8_compile_init(p);
1317
1318	src = wm_projective(p, dispatch, 0, 1, 12);
1319	mask = wm_projective__alpha(p, dispatch, 1, 6, 20);
1320	wm_write__mask(p, dispatch, src, mask);
1321
1322	return true;
1323}
1324
1325bool
1326gen8_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
1327{
1328	int src, mask;
1329
1330	gen8_compile_init(p);
1331
1332	src = wm_projective(p, dispatch, 0, 1, 12);
1333	mask = wm_projective(p, dispatch, 1, 6, 20);
1334	wm_write__mask_ca(p, dispatch, src, mask);
1335
1336	return true;
1337}
1338
1339bool
1340gen8_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
1341{
1342	int src, mask;
1343
1344	gen8_compile_init(p);
1345
1346	src = wm_projective__alpha(p, dispatch, 0, 1, 12);
1347	mask = wm_projective(p, dispatch, 1, 6, 16);
1348	wm_write__mask(p, dispatch, mask, src);
1349
1350	return true;
1351}
1352
1353bool
1354gen8_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
1355{
1356	int src, mask;
1357
1358	gen8_compile_init(p);
1359
1360	src = wm_affine(p, dispatch, 0, 1, 12);
1361	mask = dispatch == 16 ? 8 : 6;
1362	wm_write__opacity(p, dispatch, src, mask);
1363
1364	return true;
1365}
1366
1367bool
1368gen8_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
1369{
1370	int src, mask;
1371
1372	gen8_compile_init(p);
1373
1374	mask = dispatch == 16 ? 8 : 6;
1375	src = wm_projective(p, dispatch, 0, 1, 12);
1376	wm_write__opacity(p, dispatch, src, mask);
1377
1378	return true;
1379}
1380