1428d7b3dSmrg/*
2428d7b3dSmrg * Copyright © 2013 Intel Corporation
3428d7b3dSmrg *
4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"),
6428d7b3dSmrg * to deal in the Software without restriction, including without limitation
7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the
9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions:
10428d7b3dSmrg *
11428d7b3dSmrg * The above copyright notice and this permission notice (including the next
12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the
13428d7b3dSmrg * Software.
14428d7b3dSmrg *
15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20428d7b3dSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21428d7b3dSmrg * IN THE SOFTWARE.
22428d7b3dSmrg */
23428d7b3dSmrg
24428d7b3dSmrg#ifdef HAVE_CONFIG_H
25428d7b3dSmrg#include "config.h"
26428d7b3dSmrg#endif
27428d7b3dSmrg
28428d7b3dSmrg#include <string.h>
29428d7b3dSmrg
30428d7b3dSmrg#include "compiler.h"
31428d7b3dSmrg#include "brw/brw.h"
32428d7b3dSmrg#include "gen8_eu.h"
33428d7b3dSmrg
34428d7b3dSmrg#ifndef ARRAY_SIZE
35428d7b3dSmrg#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
36428d7b3dSmrg#endif
37428d7b3dSmrg
38428d7b3dSmrg/* EU ISA */
39428d7b3dSmrg
40428d7b3dSmrg#define MRF_HACK_START 111
41428d7b3dSmrg
42428d7b3dSmrgstruct gen8_instruction {
43428d7b3dSmrg	uint32_t data[4];
44428d7b3dSmrg};
45428d7b3dSmrg
46428d7b3dSmrgstatic inline unsigned
47428d7b3dSmrg__gen8_mask(unsigned high, unsigned low)
48428d7b3dSmrg{
49428d7b3dSmrg	assert(high >= low);
50428d7b3dSmrg	return (1 << (high - low + 1)) - 1;
51428d7b3dSmrg}
52428d7b3dSmrg
53428d7b3dSmrg/**
54428d7b3dSmrg * Fetch a set of contiguous bits from the instruction.
55428d7b3dSmrg *
56428d7b3dSmrg * Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
57428d7b3dSmrg */
58428d7b3dSmrgstatic inline unsigned
59428d7b3dSmrg__gen8_bits(struct gen8_instruction *insn, unsigned high, unsigned low)
60428d7b3dSmrg{
61428d7b3dSmrg	/* We assume the field doesn't cross 32-bit boundaries. */
62428d7b3dSmrg	const unsigned word = high / 32;
63428d7b3dSmrg
64428d7b3dSmrg	assert(word == low / 32);
65428d7b3dSmrg
66428d7b3dSmrg	high %= 32;
67428d7b3dSmrg	low %= 32;
68428d7b3dSmrg
69428d7b3dSmrg	return (insn->data[word] >> low) & __gen8_mask(high, low);
70428d7b3dSmrg}
71428d7b3dSmrg
72428d7b3dSmrg/**
73428d7b3dSmrg * Set bits in the instruction, with proper shifting and masking.
74428d7b3dSmrg *
75428d7b3dSmrg * Bits indexes range from 0..127; fields may not cross 32-bit boundaries.
76428d7b3dSmrg */
77428d7b3dSmrgstatic inline void
78428d7b3dSmrg__gen8_set_bits(struct gen8_instruction *insn,
79428d7b3dSmrg		unsigned high,
80428d7b3dSmrg		unsigned low,
81428d7b3dSmrg		unsigned value)
82428d7b3dSmrg{
83428d7b3dSmrg	const unsigned word = high / 32;
84428d7b3dSmrg	unsigned mask;
85428d7b3dSmrg
86428d7b3dSmrg	assert(word == low / 32);
87428d7b3dSmrg
88428d7b3dSmrg	high %= 32;
89428d7b3dSmrg	low %= 32;
90428d7b3dSmrg	assert(value < __gen8_mask(high, low) + 1);
91428d7b3dSmrg
92428d7b3dSmrg	mask = __gen8_mask(high, low) << low;
93428d7b3dSmrg	insn->data[word] &= ~mask;
94428d7b3dSmrg	insn->data[word] |= (value << low) & mask;
95428d7b3dSmrg
96428d7b3dSmrg	assert(__gen8_bits(insn, 32*word+high, 32*word+low) == value);
97428d7b3dSmrg}
98428d7b3dSmrg
99428d7b3dSmrg#define F(name, high, low) \
100428d7b3dSmrgstatic inline void __gen8_set_##name(struct gen8_instruction *insn, unsigned v) \
101428d7b3dSmrg{ \
102428d7b3dSmrg	__gen8_set_bits(insn, high, low, v); \
103428d7b3dSmrg} \
104428d7b3dSmrgstatic inline unsigned __gen8_##name(struct gen8_instruction *insn) \
105428d7b3dSmrg{ \
106428d7b3dSmrg	return __gen8_bits(insn, high, low); \
107428d7b3dSmrg}
108428d7b3dSmrg
109428d7b3dSmrg/**
110428d7b3dSmrg* Direct addressing only:
111428d7b3dSmrg*  @{
112428d7b3dSmrg*/
113428d7b3dSmrgF(src1_da_reg_nr,      108, 101);
114428d7b3dSmrgF(src0_da_reg_nr,       76,  69);
115428d7b3dSmrgF(dst_da1_hstride,      62,  61);
116428d7b3dSmrgF(dst_da_reg_nr,        60,  53);
117428d7b3dSmrgF(dst_da16_subreg_nr,   52,  52);
118428d7b3dSmrgF(dst_da1_subreg_nr,    52,  48);
119428d7b3dSmrgF(da16_writemask,       51,  48); /* Dst.ChanEn */
120428d7b3dSmrg/** @} */
121428d7b3dSmrg
122428d7b3dSmrgF(src1_vert_stride,    120, 117)
123428d7b3dSmrgF(src1_da1_width,      116, 114)
124428d7b3dSmrgF(src1_da16_swiz_w,    115, 114)
125428d7b3dSmrgF(src1_da16_swiz_z,    113, 112)
126428d7b3dSmrgF(src1_da1_hstride,    113, 112)
127428d7b3dSmrgF(src1_address_mode,   111, 111)
128428d7b3dSmrg/** Src1.SrcMod @{ */
129428d7b3dSmrgF(src1_negate,         110, 110)
130428d7b3dSmrgF(src1_abs,            109, 109)
131428d7b3dSmrg/** @} */
132428d7b3dSmrgF(src1_da16_subreg_nr, 100, 100)
133428d7b3dSmrgF(src1_da1_subreg_nr,  100,  96)
134428d7b3dSmrgF(src1_da16_swiz_y,     99,  98)
135428d7b3dSmrgF(src1_da16_swiz_x,     97,  96)
136428d7b3dSmrgF(src1_reg_type,        94,  91)
137428d7b3dSmrgF(src1_reg_file,        90,  89)
138428d7b3dSmrgF(src0_vert_stride,     88,  85)
139428d7b3dSmrgF(src0_da1_width,       84,  82)
140428d7b3dSmrgF(src0_da16_swiz_w,     83,  82)
141428d7b3dSmrgF(src0_da16_swiz_z,     81,  80)
142428d7b3dSmrgF(src0_da1_hstride,     81,  80)
143428d7b3dSmrgF(src0_address_mode,    79,  79)
144428d7b3dSmrg/** Src0.SrcMod @{ */
145428d7b3dSmrgF(src0_negate,          78,  78)
146428d7b3dSmrgF(src0_abs,             77,  77)
147428d7b3dSmrg/** @} */
148428d7b3dSmrgF(src0_da16_subreg_nr,  68,  68)
149428d7b3dSmrgF(src0_da1_subreg_nr,   68,  64)
150428d7b3dSmrgF(src0_da16_swiz_y,     67,  66)
151428d7b3dSmrgF(src0_da16_swiz_x,     65,  64)
152428d7b3dSmrgF(dst_address_mode,     63,  63)
153428d7b3dSmrgF(src0_reg_type,        46,  43)
154428d7b3dSmrgF(src0_reg_file,        42,  41)
155428d7b3dSmrgF(dst_reg_type,         40,  37)
156428d7b3dSmrgF(dst_reg_file,         36,  35)
157428d7b3dSmrgF(mask_control,         34,  34)
158428d7b3dSmrgF(flag_reg_nr,          33,  33)
159428d7b3dSmrgF(flag_subreg_nr,       32,  32)
160428d7b3dSmrgF(saturate,             31,  31)
161428d7b3dSmrgF(branch_control,       30,  30)
162428d7b3dSmrgF(debug_control,        30,  30)
163428d7b3dSmrgF(cmpt_control,         29,  29)
164428d7b3dSmrgF(acc_wr_control,       28,  28)
165428d7b3dSmrgF(cond_modifier,        27,  24)
166428d7b3dSmrgF(exec_size,            23,  21)
167428d7b3dSmrgF(pred_inv,             20,  20)
168428d7b3dSmrgF(pred_control,         19,  16)
169428d7b3dSmrgF(thread_control,       15,  14)
170428d7b3dSmrgF(qtr_control,          13,  12)
171428d7b3dSmrgF(nib_control,          11,  11)
172428d7b3dSmrgF(dep_control,          10,   9)
173428d7b3dSmrgF(access_mode,           8,   8)
174428d7b3dSmrg/* Bit 7 is Reserved (for future Opcode expansion) */
175428d7b3dSmrgF(opcode,                6,   0)
176428d7b3dSmrg
177428d7b3dSmrg/**
178428d7b3dSmrg* Three-source instructions:
179428d7b3dSmrg*  @{
180428d7b3dSmrg*/
181428d7b3dSmrgF(src2_3src_reg_nr,    125, 118)
182428d7b3dSmrgF(src2_3src_subreg_nr, 117, 115)
183428d7b3dSmrgF(src2_3src_swizzle,   114, 107)
184428d7b3dSmrgF(src2_3src_rep_ctrl,  106, 106)
185428d7b3dSmrgF(src1_3src_reg_nr,    104,  97)
186428d7b3dSmrgF(src1_3src_subreg_hi,  96,  96)
187428d7b3dSmrgF(src1_3src_subreg_lo,  95,  94)
188428d7b3dSmrgF(src1_3src_swizzle,    93,  86)
189428d7b3dSmrgF(src1_3src_rep_ctrl,   85,  85)
190428d7b3dSmrgF(src0_3src_reg_nr,     83,  76)
191428d7b3dSmrgF(src0_3src_subreg_nr,  75,  73)
192428d7b3dSmrgF(src0_3src_swizzle,    72,  65)
193428d7b3dSmrgF(src0_3src_rep_ctrl,   64,  64)
194428d7b3dSmrgF(dst_3src_reg_nr,      63,  56)
195428d7b3dSmrgF(dst_3src_subreg_nr,   55,  53)
196428d7b3dSmrgF(dst_3src_writemask,   52,  49)
197428d7b3dSmrgF(dst_3src_type,        48,  46)
198428d7b3dSmrgF(src_3src_type,        45,  43)
199428d7b3dSmrgF(src2_3src_negate,     42,  42)
200428d7b3dSmrgF(src2_3src_abs,        41,  41)
201428d7b3dSmrgF(src1_3src_negate,     40,  40)
202428d7b3dSmrgF(src1_3src_abs,        39,  39)
203428d7b3dSmrgF(src0_3src_negate,     38,  38)
204428d7b3dSmrgF(src0_3src_abs,        37,  37)
205428d7b3dSmrg/** @} */
206428d7b3dSmrg
207428d7b3dSmrg/**
208428d7b3dSmrg* Fields for SEND messages:
209428d7b3dSmrg*  @{
210428d7b3dSmrg*/
211428d7b3dSmrgF(eot,                 127, 127)
212428d7b3dSmrgF(mlen,                124, 121)
213428d7b3dSmrgF(rlen,                120, 116)
214428d7b3dSmrgF(header_present,      115, 115)
215428d7b3dSmrgF(function_control,    114,  96)
216428d7b3dSmrgF(sfid,                 27,  24)
217428d7b3dSmrgF(math_function,        27,  24)
218428d7b3dSmrg/** @} */
219428d7b3dSmrg
220428d7b3dSmrg/**
221428d7b3dSmrg* URB message function control bits:
222428d7b3dSmrg*  @{
223428d7b3dSmrg*/
224428d7b3dSmrgF(urb_per_slot_offset, 113, 113)
225428d7b3dSmrgF(urb_interleave,      111, 111)
226428d7b3dSmrgF(urb_global_offset,   110, 100)
227428d7b3dSmrgF(urb_opcode,           99,  96)
228428d7b3dSmrg/** @} */
229428d7b3dSmrg
230428d7b3dSmrg/**
231428d7b3dSmrg* Sampler message function control bits:
232428d7b3dSmrg*  @{
233428d7b3dSmrg*/
234428d7b3dSmrgF(sampler_simd_mode,   114, 113)
235428d7b3dSmrgF(sampler_msg_type,    112, 108)
236428d7b3dSmrgF(sampler,             107, 104)
237428d7b3dSmrgF(binding_table_index, 103,  96)
238428d7b3dSmrg/** @} */
239428d7b3dSmrg
240428d7b3dSmrg/**
241428d7b3dSmrg * Data port message function control bits:
242428d7b3dSmrg *  @ {
243428d7b3dSmrg */
244428d7b3dSmrgF(dp_category,            114, 114)
245428d7b3dSmrgF(dp_message_type,        113, 110)
246428d7b3dSmrgF(dp_message_control,     109, 104)
247428d7b3dSmrgF(dp_binding_table_index, 103,  96)
248428d7b3dSmrg/** @} */
249428d7b3dSmrg
250428d7b3dSmrg/**
251428d7b3dSmrg * Thread Spawn message function control bits:
252428d7b3dSmrg *  @ {
253428d7b3dSmrg */
254428d7b3dSmrgF(ts_resource_select,     100, 100)
255428d7b3dSmrgF(ts_request_type,         97,  97)
256428d7b3dSmrgF(ts_opcode,               96,  96)
257428d7b3dSmrg/** @} */
258428d7b3dSmrg
259428d7b3dSmrg/**
260428d7b3dSmrg * Video Motion Estimation message function control bits:
261428d7b3dSmrg *  @ {
262428d7b3dSmrg */
263428d7b3dSmrgF(vme_message_type,        110, 109)
264428d7b3dSmrgF(vme_binding_table_index, 103,  96)
265428d7b3dSmrg/** @} */
266428d7b3dSmrg
267428d7b3dSmrg/**
268428d7b3dSmrg * Check & Refinement Engine message function control bits:
269428d7b3dSmrg *  @ {
270428d7b3dSmrg */
271428d7b3dSmrgF(cre_message_type,        110, 109)
272428d7b3dSmrgF(cre_binding_table_index, 103,  96)
273428d7b3dSmrg/** @} */
274428d7b3dSmrg
275428d7b3dSmrg#undef F
276428d7b3dSmrg
277428d7b3dSmrg/**
278428d7b3dSmrg* Flow control instruction bits:
279428d7b3dSmrg*  @{
280428d7b3dSmrg*/
281428d7b3dSmrgstatic inline unsigned __gen8_uip(struct gen8_instruction *insn)
282428d7b3dSmrg{
283428d7b3dSmrg	return insn->data[2];
284428d7b3dSmrg}
285428d7b3dSmrg
286428d7b3dSmrgstatic inline void __gen8_set_uip(struct gen8_instruction *insn, unsigned uip)
287428d7b3dSmrg{
288428d7b3dSmrg	insn->data[2] = uip;
289428d7b3dSmrg}
290428d7b3dSmrg
291428d7b3dSmrgstatic inline unsigned __gen8_jip(struct gen8_instruction *insn)
292428d7b3dSmrg{
293428d7b3dSmrg	return insn->data[3];
294428d7b3dSmrg}
295428d7b3dSmrg
296428d7b3dSmrgstatic inline void __gen8_set_jip(struct gen8_instruction *insn, unsigned jip)
297428d7b3dSmrg{
298428d7b3dSmrg	insn->data[3] = jip;
299428d7b3dSmrg}
300428d7b3dSmrg/** @} */
301428d7b3dSmrg
302428d7b3dSmrgstatic inline int __gen8_src1_imm_d(struct gen8_instruction *insn)
303428d7b3dSmrg{
304428d7b3dSmrg	return insn->data[3];
305428d7b3dSmrg}
306428d7b3dSmrg
307428d7b3dSmrgstatic inline unsigned __gen8_src1_imm_ud(struct gen8_instruction *insn)
308428d7b3dSmrg{
309428d7b3dSmrg	return insn->data[3];
310428d7b3dSmrg}
311428d7b3dSmrg
312428d7b3dSmrgstatic inline float __gen8_src1_imm_f(struct gen8_instruction *insn)
313428d7b3dSmrg{
314428d7b3dSmrg	union {
315428d7b3dSmrg		uint32_t u;
316428d7b3dSmrg		float f;
317428d7b3dSmrg	} ft = { insn->data[3] };
318428d7b3dSmrg	return ft.f;
319428d7b3dSmrg}
320428d7b3dSmrg
321428d7b3dSmrgstatic void
322428d7b3dSmrg__gen8_set_dst(struct brw_compile *p,
323428d7b3dSmrg	       struct gen8_instruction *inst,
324428d7b3dSmrg	       struct brw_reg reg)
325428d7b3dSmrg{
326428d7b3dSmrg	/* MRFs haven't existed since Gen7, so we better not be using them. */
327428d7b3dSmrg	if (reg.file == BRW_MESSAGE_REGISTER_FILE) {
328428d7b3dSmrg		reg.file = BRW_GENERAL_REGISTER_FILE;
329428d7b3dSmrg		reg.nr += MRF_HACK_START;
330428d7b3dSmrg	}
331428d7b3dSmrg
332428d7b3dSmrg	assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
333428d7b3dSmrg
334428d7b3dSmrg	if (reg.file == BRW_GENERAL_REGISTER_FILE)
335428d7b3dSmrg		assert(reg.nr < BRW_MAX_GRF);
336428d7b3dSmrg
337428d7b3dSmrg	__gen8_set_dst_reg_file(inst, reg.file);
338428d7b3dSmrg	__gen8_set_dst_reg_type(inst, reg.type);
339428d7b3dSmrg
340428d7b3dSmrg	assert(reg.address_mode == BRW_ADDRESS_DIRECT);
341428d7b3dSmrg
342428d7b3dSmrg	__gen8_set_dst_da_reg_nr(inst, reg.nr);
343428d7b3dSmrg
344428d7b3dSmrg	if (__gen8_access_mode(inst) == BRW_ALIGN_1) {
345428d7b3dSmrg		/* Set Dst.SubRegNum[4:0] */
346428d7b3dSmrg		__gen8_set_dst_da1_subreg_nr(inst, reg.subnr);
347428d7b3dSmrg
348428d7b3dSmrg		/* Set Dst.HorzStride */
349428d7b3dSmrg		if (reg.hstride == BRW_HORIZONTAL_STRIDE_0)
350428d7b3dSmrg			reg.hstride = BRW_HORIZONTAL_STRIDE_1;
351428d7b3dSmrg		__gen8_set_dst_da1_hstride(inst, reg.hstride);
352428d7b3dSmrg	} else {
353428d7b3dSmrg		/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
354428d7b3dSmrg		assert(reg.subnr == 0 || reg.subnr == 16);
355428d7b3dSmrg		__gen8_set_dst_da16_subreg_nr(inst, reg.subnr >> 4);
356428d7b3dSmrg		__gen8_set_da16_writemask(inst, reg.dw1.bits.writemask);
357428d7b3dSmrg	}
358428d7b3dSmrg
359428d7b3dSmrg#if 1
360428d7b3dSmrg	if (reg.width == BRW_WIDTH_8 && p->compressed)
361428d7b3dSmrg		__gen8_set_exec_size(inst, BRW_EXECUTE_16);
362428d7b3dSmrg	else
363428d7b3dSmrg		__gen8_set_exec_size(inst, reg.width);
364428d7b3dSmrg#else
365428d7b3dSmrg	if (reg.width < BRW_EXECUTE_8)
366428d7b3dSmrg		__gen8_set_exec_size(inst, reg.width);
367428d7b3dSmrg#endif
368428d7b3dSmrg}
369428d7b3dSmrg
370428d7b3dSmrgstatic void
371428d7b3dSmrg__gen8_validate_reg(struct gen8_instruction *inst, struct brw_reg reg)
372428d7b3dSmrg{
373428d7b3dSmrg	int hstride_for_reg[] = {0, 1, 2, 4};
374428d7b3dSmrg	int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
375428d7b3dSmrg	int width_for_reg[] = {1, 2, 4, 8, 16};
376428d7b3dSmrg	int execsize_for_reg[] = {1, 2, 4, 8, 16};
377428d7b3dSmrg	int width, hstride, vstride, execsize;
378428d7b3dSmrg
379428d7b3dSmrg	if (reg.file == BRW_IMMEDIATE_VALUE) {
380428d7b3dSmrg		/* TODO: check immediate vectors */
381428d7b3dSmrg		return;
382428d7b3dSmrg	}
383428d7b3dSmrg
384428d7b3dSmrg	if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE)
385428d7b3dSmrg		return;
386428d7b3dSmrg
387428d7b3dSmrg	assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
388428d7b3dSmrg	hstride = hstride_for_reg[reg.hstride];
389428d7b3dSmrg
390428d7b3dSmrg	if (reg.vstride == 0xf) {
391428d7b3dSmrg		vstride = -1;
392428d7b3dSmrg	} else {
393428d7b3dSmrg		assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
394428d7b3dSmrg		vstride = vstride_for_reg[reg.vstride];
395428d7b3dSmrg	}
396428d7b3dSmrg
397428d7b3dSmrg	assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
398428d7b3dSmrg	width = width_for_reg[reg.width];
399428d7b3dSmrg
400428d7b3dSmrg	assert(__gen8_exec_size(inst) >= 0 &&
401428d7b3dSmrg	       __gen8_exec_size(inst) < ARRAY_SIZE(execsize_for_reg));
402428d7b3dSmrg	execsize = execsize_for_reg[__gen8_exec_size(inst)];
403428d7b3dSmrg
404428d7b3dSmrg	/* Restrictions from 3.3.10: Register Region Restrictions. */
405428d7b3dSmrg	/* 3. */
406428d7b3dSmrg	assert(execsize >= width);
407428d7b3dSmrg
408428d7b3dSmrg	/* 4. */
409428d7b3dSmrg	if (execsize == width && hstride != 0) {
410428d7b3dSmrg		assert(vstride == -1 || vstride == width * hstride);
411428d7b3dSmrg	}
412428d7b3dSmrg
413428d7b3dSmrg	/* 5. */
414428d7b3dSmrg	if (execsize == width && hstride == 0) {
415428d7b3dSmrg		/* no restriction on vstride. */
416428d7b3dSmrg	}
417428d7b3dSmrg
418428d7b3dSmrg	/* 6. */
419428d7b3dSmrg	if (width == 1) {
420428d7b3dSmrg		assert(hstride == 0);
421428d7b3dSmrg	}
422428d7b3dSmrg
423428d7b3dSmrg	/* 7. */
424428d7b3dSmrg	if (execsize == 1 && width == 1) {
425428d7b3dSmrg		assert(hstride == 0);
426428d7b3dSmrg		assert(vstride == 0);
427428d7b3dSmrg	}
428428d7b3dSmrg
429428d7b3dSmrg	/* 8. */
430428d7b3dSmrg	if (vstride == 0 && hstride == 0) {
431428d7b3dSmrg		assert(width == 1);
432428d7b3dSmrg	}
433428d7b3dSmrg
434428d7b3dSmrg	/* 10. Check destination issues. */
435428d7b3dSmrg}
436428d7b3dSmrg
437428d7b3dSmrgstatic void
438428d7b3dSmrg__gen8_set_src0(struct gen8_instruction *inst, struct brw_reg reg)
439428d7b3dSmrg{
440428d7b3dSmrg	/* MRFs haven't existed since Gen7, so we better not be using them. */
441428d7b3dSmrg	if (reg.file == BRW_MESSAGE_REGISTER_FILE) {
442428d7b3dSmrg		reg.file = BRW_GENERAL_REGISTER_FILE;
443428d7b3dSmrg		reg.nr += MRF_HACK_START;
444428d7b3dSmrg	}
445428d7b3dSmrg
446428d7b3dSmrg	if (reg.file == BRW_GENERAL_REGISTER_FILE)
447428d7b3dSmrg		assert(reg.nr < BRW_MAX_GRF);
448428d7b3dSmrg
449428d7b3dSmrg	__gen8_validate_reg(inst, reg);
450428d7b3dSmrg
451428d7b3dSmrg	__gen8_set_src0_reg_file(inst, reg.file);
452428d7b3dSmrg	__gen8_set_src0_reg_type(inst, reg.type);
453428d7b3dSmrg	__gen8_set_src0_abs(inst, reg.abs);
454428d7b3dSmrg	__gen8_set_src0_negate(inst, reg.negate);
455428d7b3dSmrg
456428d7b3dSmrg	assert(reg.address_mode == BRW_ADDRESS_DIRECT);
457428d7b3dSmrg
458428d7b3dSmrg	if (reg.file == BRW_IMMEDIATE_VALUE) {
459428d7b3dSmrg		inst->data[3] = reg.dw1.ud;
460428d7b3dSmrg
461428d7b3dSmrg		/* Required to set some fields in src1 as well: */
462428d7b3dSmrg		__gen8_set_src1_reg_file(inst, 0); /* arf */
463428d7b3dSmrg		__gen8_set_src1_reg_type(inst, reg.type);
464428d7b3dSmrg	} else {
465428d7b3dSmrg		__gen8_set_src0_da_reg_nr(inst, reg.nr);
466428d7b3dSmrg
467428d7b3dSmrg		if (__gen8_access_mode(inst) == BRW_ALIGN_1) {
468428d7b3dSmrg			/* Set Src0.SubRegNum[4:0] */
469428d7b3dSmrg			__gen8_set_src0_da1_subreg_nr(inst, reg.subnr);
470428d7b3dSmrg
471428d7b3dSmrg			if (reg.width == BRW_WIDTH_1 &&
472428d7b3dSmrg			    __gen8_exec_size(inst) == BRW_EXECUTE_1) {
473428d7b3dSmrg				__gen8_set_src0_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
474428d7b3dSmrg				__gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
475428d7b3dSmrg			} else {
476428d7b3dSmrg				__gen8_set_src0_da1_hstride(inst, reg.hstride);
477428d7b3dSmrg				__gen8_set_src0_vert_stride(inst, reg.vstride);
478428d7b3dSmrg			}
479428d7b3dSmrg			__gen8_set_src0_da1_width(inst, reg.width);
480428d7b3dSmrg		} else {
481428d7b3dSmrg			/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
482428d7b3dSmrg			assert(reg.subnr == 0 || reg.subnr == 16);
483428d7b3dSmrg			__gen8_set_src0_da16_subreg_nr(inst, reg.subnr >> 4);
484428d7b3dSmrg
485428d7b3dSmrg			__gen8_set_src0_da16_swiz_x(inst,
486428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
487428d7b3dSmrg							      BRW_CHANNEL_X));
488428d7b3dSmrg			__gen8_set_src0_da16_swiz_y(inst,
489428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
490428d7b3dSmrg							      BRW_CHANNEL_Y));
491428d7b3dSmrg			__gen8_set_src0_da16_swiz_z(inst,
492428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
493428d7b3dSmrg							      BRW_CHANNEL_Z));
494428d7b3dSmrg			__gen8_set_src0_da16_swiz_w(inst,
495428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
496428d7b3dSmrg							      BRW_CHANNEL_W));
497428d7b3dSmrg
498428d7b3dSmrg			/* This is an oddity of the fact that we're using the same
499428d7b3dSmrg			 * descriptions for registers in both Align16 and Align1 modes.
500428d7b3dSmrg			 */
501428d7b3dSmrg			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
502428d7b3dSmrg				__gen8_set_src0_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
503428d7b3dSmrg			else
504428d7b3dSmrg				__gen8_set_src0_vert_stride(inst, reg.vstride);
505428d7b3dSmrg		}
506428d7b3dSmrg	}
507428d7b3dSmrg}
508428d7b3dSmrg
509428d7b3dSmrgstatic void
510428d7b3dSmrg__gen8_set_src1(struct gen8_instruction *inst, struct brw_reg reg)
511428d7b3dSmrg{
512428d7b3dSmrg	/* MRFs haven't existed since Gen7, so we better not be using them. */
513428d7b3dSmrg	if (reg.file == BRW_MESSAGE_REGISTER_FILE) {
514428d7b3dSmrg		reg.file = BRW_GENERAL_REGISTER_FILE;
515428d7b3dSmrg		reg.nr += MRF_HACK_START;
516428d7b3dSmrg	}
517428d7b3dSmrg
518428d7b3dSmrg	if (reg.file == BRW_GENERAL_REGISTER_FILE)
519428d7b3dSmrg		assert(reg.nr < BRW_MAX_GRF);
520428d7b3dSmrg
521428d7b3dSmrg	__gen8_validate_reg(inst, reg);
522428d7b3dSmrg
523428d7b3dSmrg	__gen8_set_src1_reg_file(inst, reg.file);
524428d7b3dSmrg	__gen8_set_src1_reg_type(inst, reg.type);
525428d7b3dSmrg	__gen8_set_src1_abs(inst, reg.abs);
526428d7b3dSmrg	__gen8_set_src1_negate(inst, reg.negate);
527428d7b3dSmrg
528428d7b3dSmrg	/* Only src1 can be an immediate in two-argument instructions. */
529428d7b3dSmrg	assert(__gen8_src0_reg_file(inst) != BRW_IMMEDIATE_VALUE);
530428d7b3dSmrg
531428d7b3dSmrg	assert(reg.address_mode == BRW_ADDRESS_DIRECT);
532428d7b3dSmrg
533428d7b3dSmrg	if (reg.file == BRW_IMMEDIATE_VALUE) {
534428d7b3dSmrg		inst->data[3] = reg.dw1.ud;
535428d7b3dSmrg	} else {
536428d7b3dSmrg		__gen8_set_src1_da_reg_nr(inst, reg.nr);
537428d7b3dSmrg
538428d7b3dSmrg		if (__gen8_access_mode(inst) == BRW_ALIGN_1) {
539428d7b3dSmrg			/* Set Src0.SubRegNum[4:0] */
540428d7b3dSmrg			__gen8_set_src1_da1_subreg_nr(inst, reg.subnr);
541428d7b3dSmrg
542428d7b3dSmrg			if (reg.width == BRW_WIDTH_1 &&
543428d7b3dSmrg			    __gen8_exec_size(inst) == BRW_EXECUTE_1) {
544428d7b3dSmrg				__gen8_set_src1_da1_hstride(inst, BRW_HORIZONTAL_STRIDE_0);
545428d7b3dSmrg				__gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_0);
546428d7b3dSmrg			} else {
547428d7b3dSmrg				__gen8_set_src1_da1_hstride(inst, reg.hstride);
548428d7b3dSmrg				__gen8_set_src1_vert_stride(inst, reg.vstride);
549428d7b3dSmrg			}
550428d7b3dSmrg			__gen8_set_src1_da1_width(inst, reg.width);
551428d7b3dSmrg		} else {
552428d7b3dSmrg			/* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
553428d7b3dSmrg			assert(reg.subnr == 0 || reg.subnr == 16);
554428d7b3dSmrg			__gen8_set_src1_da16_subreg_nr(inst, reg.subnr >> 4);
555428d7b3dSmrg
556428d7b3dSmrg			__gen8_set_src1_da16_swiz_x(inst,
557428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
558428d7b3dSmrg							      BRW_CHANNEL_X));
559428d7b3dSmrg			__gen8_set_src1_da16_swiz_y(inst,
560428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
561428d7b3dSmrg							      BRW_CHANNEL_Y));
562428d7b3dSmrg			__gen8_set_src1_da16_swiz_z(inst,
563428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
564428d7b3dSmrg							      BRW_CHANNEL_Z));
565428d7b3dSmrg			__gen8_set_src1_da16_swiz_w(inst,
566428d7b3dSmrg						  BRW_GET_SWZ(reg.dw1.bits.swizzle,
567428d7b3dSmrg							      BRW_CHANNEL_W));
568428d7b3dSmrg
569428d7b3dSmrg			/* This is an oddity of the fact that we're using the same
570428d7b3dSmrg			 * descriptions for registers in both Align16 and Align1 modes.
571428d7b3dSmrg			 */
572428d7b3dSmrg			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
573428d7b3dSmrg				__gen8_set_src1_vert_stride(inst, BRW_VERTICAL_STRIDE_4);
574428d7b3dSmrg			else
575428d7b3dSmrg				__gen8_set_src1_vert_stride(inst, reg.vstride);
576428d7b3dSmrg		}
577428d7b3dSmrg	}
578428d7b3dSmrg}
579428d7b3dSmrg
580428d7b3dSmrg/**
581428d7b3dSmrg * Set the Message Descriptor and Extended Message Descriptor fields
582428d7b3dSmrg * for SEND messages.
583428d7b3dSmrg *
584428d7b3dSmrg * \note This zeroes out the Function Control bits, so it must be called
585428d7b3dSmrg *       \b before filling out any message-specific data.  Callers can
586428d7b3dSmrg *       choose not to fill in irrelevant bits; they will be zero.
587428d7b3dSmrg */
588428d7b3dSmrgstatic void
589428d7b3dSmrg__gen8_set_message_descriptor(struct gen8_instruction *inst,
590428d7b3dSmrg			      enum brw_message_target sfid,
591428d7b3dSmrg			      unsigned msg_length,
592428d7b3dSmrg			      unsigned response_length,
593428d7b3dSmrg			      bool header_present,
594428d7b3dSmrg			      bool end_of_thread)
595428d7b3dSmrg{
596428d7b3dSmrg	__gen8_set_src1(inst, brw_imm_d(0));
597428d7b3dSmrg
598428d7b3dSmrg	__gen8_set_sfid(inst, sfid);
599428d7b3dSmrg	__gen8_set_mlen(inst, msg_length);
600428d7b3dSmrg	__gen8_set_rlen(inst, response_length);
601428d7b3dSmrg	__gen8_set_header_present(inst, header_present);
602428d7b3dSmrg	__gen8_set_eot(inst, end_of_thread);
603428d7b3dSmrg}
604428d7b3dSmrg
605428d7b3dSmrg#if 0
606428d7b3dSmrgstatic void
607428d7b3dSmrg__gen8_set_urb_message(struct gen8_instruction *inst,
608428d7b3dSmrg		       unsigned opcode,
609428d7b3dSmrg		       unsigned msg_length,
610428d7b3dSmrg		       unsigned response_length,
611428d7b3dSmrg		       bool end_of_thread,
612428d7b3dSmrg		       unsigned offset,
613428d7b3dSmrg		       bool interleave)
614428d7b3dSmrg{
615428d7b3dSmrg	__gen8_set_message_descriptor(inst, BRW_SFID_URB, msg_length, response_length,
616428d7b3dSmrg				      true, end_of_thread);
617428d7b3dSmrg	__gen8_set_src0(inst, brw_vec8_grf(MRF_HACK_START + 1, 0));
618428d7b3dSmrg	__gen8_set_urb_opcode(inst, 0); /* URB_WRITE_HWORD */
619428d7b3dSmrg	__gen8_set_urb_global_offset(inst, offset);
620428d7b3dSmrg	__gen8_set_urb_interleave(inst, interleave);
621428d7b3dSmrg	/* per_slot_offset = 0 makes it ignore offsets in message header */
622428d7b3dSmrg	__gen8_set_urb_per_slot_offset(inst, 0);
623428d7b3dSmrg}
624428d7b3dSmrg#endif
625428d7b3dSmrg
626428d7b3dSmrgstatic void
627428d7b3dSmrg__gen8_set_sampler_message(struct gen8_instruction *inst,
628428d7b3dSmrg			   unsigned binding_table_index,
629428d7b3dSmrg			   unsigned sampler,
630428d7b3dSmrg			   unsigned msg_type,
631428d7b3dSmrg			   unsigned response_length,
632428d7b3dSmrg			   unsigned msg_length,
633428d7b3dSmrg			   bool header_present,
634428d7b3dSmrg			   unsigned simd_mode)
635428d7b3dSmrg{
636428d7b3dSmrg	__gen8_set_message_descriptor(inst, BRW_SFID_SAMPLER, msg_length,
637428d7b3dSmrg				      response_length, header_present, false);
638428d7b3dSmrg
639428d7b3dSmrg	__gen8_set_binding_table_index(inst, binding_table_index);
640428d7b3dSmrg	__gen8_set_sampler(inst, sampler);
641428d7b3dSmrg	__gen8_set_sampler_msg_type(inst, msg_type);
642428d7b3dSmrg	__gen8_set_sampler_simd_mode(inst, simd_mode);
643428d7b3dSmrg}
644428d7b3dSmrg
645428d7b3dSmrgstatic void
646428d7b3dSmrg__gen8_set_dp_message(struct gen8_instruction *inst,
647428d7b3dSmrg		      enum brw_message_target sfid,
648428d7b3dSmrg		      unsigned binding_table_index,
649428d7b3dSmrg		      unsigned msg_type,
650428d7b3dSmrg		      unsigned msg_control,
651428d7b3dSmrg		      unsigned mlen,
652428d7b3dSmrg		      unsigned rlen,
653428d7b3dSmrg		      bool header_present,
654428d7b3dSmrg		      bool end_of_thread)
655428d7b3dSmrg{
656428d7b3dSmrg	/* Binding table index is from 0..255 */
657428d7b3dSmrg	assert((binding_table_index & 0xff) == binding_table_index);
658428d7b3dSmrg
659428d7b3dSmrg	/* Message Type is only 5 bits */
660428d7b3dSmrg	assert((msg_type & 0x1f) == msg_type);
661428d7b3dSmrg
662428d7b3dSmrg	/* Message Control is only 6 bits */
663428d7b3dSmrg	assert((msg_control & 0x3f) == msg_control);
664428d7b3dSmrg
665428d7b3dSmrg	__gen8_set_message_descriptor(inst, sfid, mlen, rlen, header_present,
666428d7b3dSmrg				      end_of_thread);
667428d7b3dSmrg	__gen8_set_function_control(inst,
668428d7b3dSmrg				    binding_table_index | msg_type << 14 | msg_control << 8);
669428d7b3dSmrg}
670428d7b3dSmrg
671428d7b3dSmrgstatic inline struct gen8_instruction *
672428d7b3dSmrggen8_next_insn(struct brw_compile *p, int opcode)
673428d7b3dSmrg{
674428d7b3dSmrg	struct gen8_instruction *insn;
675428d7b3dSmrg
676428d7b3dSmrg	assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
677428d7b3dSmrg
678428d7b3dSmrg	insn = memcpy(&p->store[p->nr_insn++], p->current, sizeof(*insn));
679428d7b3dSmrg	__gen8_set_opcode(insn, opcode);
680428d7b3dSmrg
681428d7b3dSmrg	return insn;
682428d7b3dSmrg}
683428d7b3dSmrg
684428d7b3dSmrgstatic void gen8_math(struct brw_compile *p,
685428d7b3dSmrg		      struct brw_reg dst,
686428d7b3dSmrg		      unsigned function,
687428d7b3dSmrg		      unsigned saturate,
688428d7b3dSmrg		      unsigned msg_reg_nr,
689428d7b3dSmrg		      struct brw_reg src,
690428d7b3dSmrg		      unsigned data_type,
691428d7b3dSmrg		      unsigned precision)
692428d7b3dSmrg{
693428d7b3dSmrg	struct gen8_instruction *insn = gen8_next_insn(p, BRW_OPCODE_MATH);
694428d7b3dSmrg
695428d7b3dSmrg	assert(dst.file == BRW_GENERAL_REGISTER_FILE);
696428d7b3dSmrg	assert(src.file == BRW_GENERAL_REGISTER_FILE);
697428d7b3dSmrg
698428d7b3dSmrg	assert(dst.hstride == BRW_HORIZONTAL_STRIDE_1);
699428d7b3dSmrg	assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
700428d7b3dSmrg
701428d7b3dSmrg	/* Source modifiers are ignored for extended math instructions. */
702428d7b3dSmrg	assert(!src.negate);
703428d7b3dSmrg	assert(!src.abs);
704428d7b3dSmrg
705428d7b3dSmrg	if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
706428d7b3dSmrg	    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
707428d7b3dSmrg		assert(src.type == BRW_REGISTER_TYPE_F);
708428d7b3dSmrg	}
709428d7b3dSmrg
710428d7b3dSmrg	/* Math is the same ISA format as other opcodes, except that CondModifier
711428d7b3dSmrg	 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
712428d7b3dSmrg	 */
713428d7b3dSmrg	__gen8_set_cond_modifier(insn, function);
714428d7b3dSmrg	__gen8_set_saturate(insn, saturate);
715428d7b3dSmrg
716428d7b3dSmrg	__gen8_set_dst(p, insn, dst);
717428d7b3dSmrg	__gen8_set_src0(insn, src);
718428d7b3dSmrg	__gen8_set_src1(insn, brw_null_reg());
719428d7b3dSmrg}
720428d7b3dSmrg
721428d7b3dSmrgstatic inline void gen8_math_invert(struct brw_compile *p,
722428d7b3dSmrg				    struct brw_reg dst,
723428d7b3dSmrg				    struct brw_reg src)
724428d7b3dSmrg{
725428d7b3dSmrg	gen8_math(p,
726428d7b3dSmrg		  dst,
727428d7b3dSmrg		  BRW_MATH_FUNCTION_INV,
728428d7b3dSmrg		  BRW_MATH_SATURATE_NONE,
729428d7b3dSmrg		  0,
730428d7b3dSmrg		  src,
731428d7b3dSmrg		  BRW_MATH_PRECISION_FULL,
732428d7b3dSmrg		  BRW_MATH_DATA_VECTOR);
733428d7b3dSmrg
734428d7b3dSmrg}
735428d7b3dSmrg
736428d7b3dSmrg/* Helpers for regular instructions: */
737428d7b3dSmrgstatic inline struct gen8_instruction *gen8_alu1(struct brw_compile *p,
738428d7b3dSmrg						 unsigned opcode,
739428d7b3dSmrg						 struct brw_reg dst,
740428d7b3dSmrg						 struct brw_reg src)
741428d7b3dSmrg{
742428d7b3dSmrg	struct gen8_instruction *insn = gen8_next_insn(p, opcode);
743428d7b3dSmrg	__gen8_set_dst(p, insn, dst);
744428d7b3dSmrg	__gen8_set_src0(insn, src);
745428d7b3dSmrg	return insn;
746428d7b3dSmrg}
747428d7b3dSmrg
748428d7b3dSmrgstatic inline struct gen8_instruction *gen8_alu2(struct brw_compile *p,
749428d7b3dSmrg						 unsigned opcode,
750428d7b3dSmrg						 struct brw_reg dst,
751428d7b3dSmrg						 struct brw_reg src0,
752428d7b3dSmrg						 struct brw_reg src1)
753428d7b3dSmrg{
754428d7b3dSmrg	struct gen8_instruction *insn = gen8_next_insn(p, opcode);
755428d7b3dSmrg	__gen8_set_dst(p, insn, dst);
756428d7b3dSmrg	__gen8_set_src0(insn, src0);
757428d7b3dSmrg	__gen8_set_src1(insn, src1);
758428d7b3dSmrg	return insn;
759428d7b3dSmrg}
760428d7b3dSmrg
761428d7b3dSmrg#define ALU1(OP)							\
762428d7b3dSmrgstatic inline struct gen8_instruction *gen8_##OP(struct brw_compile *p,	\
763428d7b3dSmrg						 struct brw_reg dst,	\
764428d7b3dSmrg						 struct brw_reg src0)	\
765428d7b3dSmrg{									\
766428d7b3dSmrg   return gen8_alu1(p, BRW_OPCODE_##OP, dst, src0);			\
767428d7b3dSmrg}
768428d7b3dSmrg
769428d7b3dSmrg#define ALU2(OP)							\
770428d7b3dSmrgstatic inline struct gen8_instruction *gen8_##OP(struct brw_compile *p,	\
771428d7b3dSmrg						 struct brw_reg dst,	\
772428d7b3dSmrg						 struct brw_reg src0,	\
773428d7b3dSmrg						 struct brw_reg src1)	\
774428d7b3dSmrg{									\
775428d7b3dSmrg   return gen8_alu2(p, BRW_OPCODE_##OP, dst, src0, src1);		\
776428d7b3dSmrg}
777428d7b3dSmrg
778428d7b3dSmrgstatic inline struct gen8_instruction *gen8_ADD(struct brw_compile *p,
779428d7b3dSmrg						struct brw_reg dst,
780428d7b3dSmrg						struct brw_reg src0,
781428d7b3dSmrg						struct brw_reg src1)
782428d7b3dSmrg{
783428d7b3dSmrg	/* 6.2.2: add */
784428d7b3dSmrg	if (src0.type == BRW_REGISTER_TYPE_F ||
785428d7b3dSmrg	    (src0.file == BRW_IMMEDIATE_VALUE &&
786428d7b3dSmrg	     src0.type == BRW_REGISTER_TYPE_VF)) {
787428d7b3dSmrg		assert(src1.type != BRW_REGISTER_TYPE_UD);
788428d7b3dSmrg		assert(src1.type != BRW_REGISTER_TYPE_D);
789428d7b3dSmrg	}
790428d7b3dSmrg
791428d7b3dSmrg	if (src1.type == BRW_REGISTER_TYPE_F ||
792428d7b3dSmrg	    (src1.file == BRW_IMMEDIATE_VALUE &&
793428d7b3dSmrg	     src1.type == BRW_REGISTER_TYPE_VF)) {
794428d7b3dSmrg		assert(src0.type != BRW_REGISTER_TYPE_UD);
795428d7b3dSmrg		assert(src0.type != BRW_REGISTER_TYPE_D);
796428d7b3dSmrg	}
797428d7b3dSmrg
798428d7b3dSmrg	return gen8_alu2(p, BRW_OPCODE_ADD, dst, src0, src1);
799428d7b3dSmrg}
800428d7b3dSmrg
801428d7b3dSmrgstatic inline struct gen8_instruction *gen8_MUL(struct brw_compile *p,
802428d7b3dSmrg						struct brw_reg dst,
803428d7b3dSmrg						struct brw_reg src0,
804428d7b3dSmrg						struct brw_reg src1)
805428d7b3dSmrg{
806428d7b3dSmrg	/* 6.32.38: mul */
807428d7b3dSmrg	if (src0.type == BRW_REGISTER_TYPE_D ||
808428d7b3dSmrg	    src0.type == BRW_REGISTER_TYPE_UD ||
809428d7b3dSmrg	    src1.type == BRW_REGISTER_TYPE_D ||
810428d7b3dSmrg	    src1.type == BRW_REGISTER_TYPE_UD) {
811428d7b3dSmrg		assert(dst.type != BRW_REGISTER_TYPE_F);
812428d7b3dSmrg	}
813428d7b3dSmrg
814428d7b3dSmrg	if (src0.type == BRW_REGISTER_TYPE_F ||
815428d7b3dSmrg	    (src0.file == BRW_IMMEDIATE_VALUE &&
816428d7b3dSmrg	     src0.type == BRW_REGISTER_TYPE_VF)) {
817428d7b3dSmrg		assert(src1.type != BRW_REGISTER_TYPE_UD);
818428d7b3dSmrg		assert(src1.type != BRW_REGISTER_TYPE_D);
819428d7b3dSmrg	}
820428d7b3dSmrg
821428d7b3dSmrg	if (src1.type == BRW_REGISTER_TYPE_F ||
822428d7b3dSmrg	    (src1.file == BRW_IMMEDIATE_VALUE &&
823428d7b3dSmrg	     src1.type == BRW_REGISTER_TYPE_VF)) {
824428d7b3dSmrg		assert(src0.type != BRW_REGISTER_TYPE_UD);
825428d7b3dSmrg		assert(src0.type != BRW_REGISTER_TYPE_D);
826428d7b3dSmrg	}
827428d7b3dSmrg
828428d7b3dSmrg	assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
829428d7b3dSmrg	       src0.nr != BRW_ARF_ACCUMULATOR);
830428d7b3dSmrg	assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
831428d7b3dSmrg	       src1.nr != BRW_ARF_ACCUMULATOR);
832428d7b3dSmrg
833428d7b3dSmrg	return gen8_alu2(p, BRW_OPCODE_MUL, dst, src0, src1);
834428d7b3dSmrg}
835428d7b3dSmrg
836428d7b3dSmrgALU1(MOV);
837428d7b3dSmrgALU2(SEL);
838428d7b3dSmrgALU1(NOT);
839428d7b3dSmrgALU2(AND);
840428d7b3dSmrgALU2(OR);
841428d7b3dSmrgALU2(XOR);
842428d7b3dSmrgALU2(SHR);
843428d7b3dSmrgALU2(SHL);
844428d7b3dSmrgALU2(RSR);
845428d7b3dSmrgALU2(RSL);
846428d7b3dSmrgALU2(ASR);
847428d7b3dSmrgALU1(FRC);
848428d7b3dSmrgALU1(RNDD);
849428d7b3dSmrgALU2(MAC);
850428d7b3dSmrgALU2(MACH);
851428d7b3dSmrgALU1(LZD);
852428d7b3dSmrgALU2(DP4);
853428d7b3dSmrgALU2(DPH);
854428d7b3dSmrgALU2(DP3);
855428d7b3dSmrgALU2(DP2);
856428d7b3dSmrgALU2(LINE);
857428d7b3dSmrgALU2(PLN);
858428d7b3dSmrg
859428d7b3dSmrgALU1(RNDZ);
860428d7b3dSmrgALU1(RNDE);
861428d7b3dSmrg
862428d7b3dSmrg#undef ALU1
863428d7b3dSmrg#undef ALU2
864428d7b3dSmrg
865428d7b3dSmrgstatic void gen8_set_compression_control(struct brw_compile *p,
866428d7b3dSmrg					 enum brw_compression compression_control)
867428d7b3dSmrg{
868428d7b3dSmrg	unsigned v;
869428d7b3dSmrg
870428d7b3dSmrg	p->compressed = compression_control == BRW_COMPRESSION_COMPRESSED;
871428d7b3dSmrg
872428d7b3dSmrg	switch (compression_control) {
873428d7b3dSmrg	default: assert(0);
874428d7b3dSmrg	case BRW_COMPRESSION_NONE:       v = GEN6_COMPRESSION_1Q; break;
875428d7b3dSmrg	case BRW_COMPRESSION_2NDHALF:    v = GEN6_COMPRESSION_2Q; break;
876428d7b3dSmrg	case BRW_COMPRESSION_COMPRESSED: v = GEN6_COMPRESSION_1H; break;
877428d7b3dSmrg	}
878428d7b3dSmrg	__gen8_set_cmpt_control((struct gen8_instruction *)p->current, v);
879428d7b3dSmrg}
880428d7b3dSmrg
881428d7b3dSmrgstatic inline void gen8_set_mask_control(struct brw_compile *p, unsigned value)
882428d7b3dSmrg{
883428d7b3dSmrg	__gen8_set_mask_control((struct gen8_instruction *)p->current, value);
884428d7b3dSmrg}
885428d7b3dSmrg
886428d7b3dSmrgstatic inline void gen8_set_saturate(struct brw_compile *p, unsigned value)
887428d7b3dSmrg{
888428d7b3dSmrg	__gen8_set_saturate((struct gen8_instruction *)p->current, value);
889428d7b3dSmrg}
890428d7b3dSmrg
891428d7b3dSmrgstatic inline void gen8_set_acc_write_control(struct brw_compile *p, unsigned value)
892428d7b3dSmrg{
893428d7b3dSmrg	__gen8_set_acc_wr_control((struct gen8_instruction *)p->current, value);
894428d7b3dSmrg}
895428d7b3dSmrg
896428d7b3dSmrgstatic void gen8_SAMPLE(struct brw_compile *p,
897428d7b3dSmrg			struct brw_reg dst,
898428d7b3dSmrg			unsigned msg_reg_nr,
899428d7b3dSmrg			unsigned binding_table_index,
900428d7b3dSmrg			unsigned sampler,
901428d7b3dSmrg			unsigned writemask,
902428d7b3dSmrg			unsigned msg_type,
903428d7b3dSmrg			unsigned response_length,
904428d7b3dSmrg			unsigned msg_length,
905428d7b3dSmrg			bool header_present,
906428d7b3dSmrg			unsigned simd_mode)
907428d7b3dSmrg{
908428d7b3dSmrg	struct brw_reg src0 = brw_message_reg(msg_reg_nr);
909428d7b3dSmrg
910428d7b3dSmrg	assert(writemask);
911428d7b3dSmrg
912428d7b3dSmrg	if (writemask != WRITEMASK_XYZW) {
913428d7b3dSmrg		writemask = ~writemask & WRITEMASK_XYZW;
914428d7b3dSmrg
915428d7b3dSmrg		brw_push_insn_state(p);
916428d7b3dSmrg
917428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
918428d7b3dSmrg		gen8_set_mask_control(p, BRW_MASK_DISABLE);
919428d7b3dSmrg
920428d7b3dSmrg		gen8_MOV(p, __retype_ud(src0), __retype_ud(brw_vec8_grf(0,0)));
921428d7b3dSmrg		gen8_MOV(p, get_element_ud(src0, 2), brw_imm_ud(writemask << 12));
922428d7b3dSmrg
923428d7b3dSmrg		brw_pop_insn_state(p);
924428d7b3dSmrg	}
925428d7b3dSmrg
926428d7b3dSmrg	{
927428d7b3dSmrg		struct gen8_instruction *insn;
928428d7b3dSmrg
929428d7b3dSmrg		insn = gen8_next_insn(p, BRW_OPCODE_SEND);
930428d7b3dSmrg		__gen8_set_pred_control(insn, 0); /* XXX */
931428d7b3dSmrg		__gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q);
932428d7b3dSmrg
933428d7b3dSmrg		__gen8_set_dst(p, insn, dst);
934428d7b3dSmrg		__gen8_set_src0(insn, src0);
935428d7b3dSmrg		__gen8_set_sampler_message(insn,
936428d7b3dSmrg					   binding_table_index,
937428d7b3dSmrg					   sampler,
938428d7b3dSmrg					   msg_type,
939428d7b3dSmrg					   response_length,
940428d7b3dSmrg					   msg_length,
941428d7b3dSmrg					   header_present,
942428d7b3dSmrg					   simd_mode);
943428d7b3dSmrg	}
944428d7b3dSmrg}
945428d7b3dSmrg
946428d7b3dSmrg/* shader logic */
947428d7b3dSmrg
948428d7b3dSmrgstatic void wm_affine_st(struct brw_compile *p, int dw, int channel, int msg)
949428d7b3dSmrg{
950428d7b3dSmrg	int uv;
951428d7b3dSmrg
952428d7b3dSmrg	if (dw == 16) {
953428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
954428d7b3dSmrg		uv = 6;
955428d7b3dSmrg	} else {
956428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
957428d7b3dSmrg		uv = 4;
958428d7b3dSmrg	}
959428d7b3dSmrg	uv += 2*channel;
960428d7b3dSmrg
961428d7b3dSmrg	msg++;
962428d7b3dSmrg	gen8_PLN(p,
963428d7b3dSmrg		 brw_message_reg(msg),
964428d7b3dSmrg		 brw_vec1_grf(uv, 0),
965428d7b3dSmrg		 brw_vec8_grf(2, 0));
966428d7b3dSmrg	msg += dw/8;
967428d7b3dSmrg
968428d7b3dSmrg	gen8_PLN(p,
969428d7b3dSmrg		 brw_message_reg(msg),
970428d7b3dSmrg		 brw_vec1_grf(uv, 4),
971428d7b3dSmrg		 brw_vec8_grf(2, 0));
972428d7b3dSmrg}
973428d7b3dSmrg
974428d7b3dSmrgstatic inline unsigned simd(int dw)
975428d7b3dSmrg{
976428d7b3dSmrg	return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
977428d7b3dSmrg}
978428d7b3dSmrg
979428d7b3dSmrgstatic inline struct brw_reg sample_result(int dw, int result)
980428d7b3dSmrg{
981428d7b3dSmrg	return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
982428d7b3dSmrg		       BRW_REGISTER_TYPE_UW,
983428d7b3dSmrg		       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
984428d7b3dSmrg		       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
985428d7b3dSmrg		       BRW_HORIZONTAL_STRIDE_1,
986428d7b3dSmrg		       BRW_SWIZZLE_XYZW,
987428d7b3dSmrg		       WRITEMASK_XYZW);
988428d7b3dSmrg}
989428d7b3dSmrg
990428d7b3dSmrgstatic int wm_sample(struct brw_compile *p, int dw,
991428d7b3dSmrg		     int channel, int msg, int result)
992428d7b3dSmrg{
993428d7b3dSmrg	int len = dw == 16 ? 4 : 2;
994428d7b3dSmrg	gen8_SAMPLE(p, sample_result(dw, result), ++msg,
995428d7b3dSmrg		    channel+1, channel, WRITEMASK_XYZW, 0,
996428d7b3dSmrg		    2*len, len, false, simd(dw));
997428d7b3dSmrg	return result;
998428d7b3dSmrg}
999428d7b3dSmrg
1000428d7b3dSmrgstatic int wm_sample__alpha(struct brw_compile *p, int dw,
1001428d7b3dSmrg			    int channel, int msg, int result)
1002428d7b3dSmrg{
1003428d7b3dSmrg	int mlen, rlen;
1004428d7b3dSmrg
1005428d7b3dSmrg	if (dw == 8) {
1006428d7b3dSmrg		mlen = 3;
1007428d7b3dSmrg		rlen = 1;
1008428d7b3dSmrg	} else {
1009428d7b3dSmrg		mlen = 5;
1010428d7b3dSmrg		rlen = 2;
1011428d7b3dSmrg	}
1012428d7b3dSmrg
1013428d7b3dSmrg	gen8_SAMPLE(p, sample_result(dw, result), msg,
1014428d7b3dSmrg		    channel+1, channel, WRITEMASK_W, 0,
1015428d7b3dSmrg		    rlen, mlen, true, simd(dw));
1016428d7b3dSmrg
1017428d7b3dSmrg	return result;
1018428d7b3dSmrg}
1019428d7b3dSmrg
1020428d7b3dSmrgstatic int wm_affine(struct brw_compile *p, int dw,
1021428d7b3dSmrg		     int channel, int msg, int result)
1022428d7b3dSmrg{
1023428d7b3dSmrg	wm_affine_st(p, dw, channel, msg);
1024428d7b3dSmrg	return wm_sample(p, dw, channel, msg, result);
1025428d7b3dSmrg}
1026428d7b3dSmrg
1027428d7b3dSmrgstatic int wm_affine__alpha(struct brw_compile *p, int dw,
1028428d7b3dSmrg			    int channel, int msg, int result)
1029428d7b3dSmrg{
1030428d7b3dSmrg	wm_affine_st(p, dw, channel, msg);
1031428d7b3dSmrg	return wm_sample__alpha(p, dw, channel, msg, result);
1032428d7b3dSmrg}
1033428d7b3dSmrg
1034428d7b3dSmrgstatic inline struct brw_reg null_result(int dw)
1035428d7b3dSmrg{
1036428d7b3dSmrg	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
1037428d7b3dSmrg		       BRW_REGISTER_TYPE_UW,
1038428d7b3dSmrg		       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
1039428d7b3dSmrg		       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
1040428d7b3dSmrg		       BRW_HORIZONTAL_STRIDE_1,
1041428d7b3dSmrg		       BRW_SWIZZLE_XYZW,
1042428d7b3dSmrg		       WRITEMASK_XYZW);
1043428d7b3dSmrg}
1044428d7b3dSmrg
1045428d7b3dSmrgstatic void fb_write(struct brw_compile *p, int dw)
1046428d7b3dSmrg{
1047428d7b3dSmrg	struct gen8_instruction *insn;
1048428d7b3dSmrg	unsigned msg_control, msg_len;
1049428d7b3dSmrg	struct brw_reg src0;
1050428d7b3dSmrg
1051428d7b3dSmrg	if (dw == 16) {
1052428d7b3dSmrg		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
1053428d7b3dSmrg		msg_len = 8;
1054428d7b3dSmrg	} else {
1055428d7b3dSmrg		msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1056428d7b3dSmrg		msg_len = 4;
1057428d7b3dSmrg	}
1058428d7b3dSmrg	msg_control |= 1 << 4; /* Last Render Target */
1059428d7b3dSmrg
1060428d7b3dSmrg	/* The execution mask is ignored for render target writes. */
1061428d7b3dSmrg	insn = gen8_next_insn(p, BRW_OPCODE_SEND);
1062428d7b3dSmrg	__gen8_set_pred_control(insn, 0);
1063428d7b3dSmrg	__gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q);
1064428d7b3dSmrg
1065428d7b3dSmrg	src0 = brw_message_reg(2);
1066428d7b3dSmrg
1067428d7b3dSmrg	__gen8_set_dst(p, insn, null_result(dw));
1068428d7b3dSmrg	__gen8_set_src0(insn, src0);
1069428d7b3dSmrg	__gen8_set_dp_message(insn,
1070428d7b3dSmrg			      GEN6_SFID_DATAPORT_RENDER_CACHE,
1071428d7b3dSmrg			      0,
1072428d7b3dSmrg			      GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
1073428d7b3dSmrg			      msg_control,
1074428d7b3dSmrg			      msg_len, 0,
1075428d7b3dSmrg			      false, true);
1076428d7b3dSmrg}
1077428d7b3dSmrg
1078428d7b3dSmrgstatic void wm_write__mask(struct brw_compile *p, int dw,
1079428d7b3dSmrg			   int src, int mask)
1080428d7b3dSmrg{
1081428d7b3dSmrg	int n;
1082428d7b3dSmrg
1083428d7b3dSmrg	if (dw == 8) {
1084428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1085428d7b3dSmrg		for (n = 0; n < 4; n++)
1086428d7b3dSmrg			gen8_MUL(p,
1087428d7b3dSmrg				 brw_message_reg(2 + n),
1088428d7b3dSmrg				 brw_vec8_grf(src + n, 0),
1089428d7b3dSmrg				 brw_vec8_grf(mask, 0));
1090428d7b3dSmrg	} else {
1091428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1092428d7b3dSmrg		for (n = 0; n < 4; n++)
1093428d7b3dSmrg			gen8_MUL(p,
1094428d7b3dSmrg				 brw_message_reg(2 + 2*n),
1095428d7b3dSmrg				 brw_vec8_grf(src + 2*n, 0),
1096428d7b3dSmrg				 brw_vec8_grf(mask, 0));
1097428d7b3dSmrg	}
1098428d7b3dSmrg
1099428d7b3dSmrg	fb_write(p, dw);
1100428d7b3dSmrg}
1101428d7b3dSmrg
1102428d7b3dSmrgstatic void wm_write__opacity(struct brw_compile *p, int dw, int src, int mask)
1103428d7b3dSmrg{
1104428d7b3dSmrg	int n;
1105428d7b3dSmrg
1106428d7b3dSmrg	if (dw == 8) {
1107428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1108428d7b3dSmrg		for (n = 0; n < 4; n++)
1109428d7b3dSmrg			gen8_MUL(p,
1110428d7b3dSmrg				 brw_message_reg(2 + n),
1111428d7b3dSmrg				 brw_vec8_grf(src + n, 0),
1112428d7b3dSmrg				 brw_vec1_grf(mask, 3));
1113428d7b3dSmrg	} else {
1114428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1115428d7b3dSmrg		for (n = 0; n < 4; n++)
1116428d7b3dSmrg			gen8_MUL(p,
1117428d7b3dSmrg				 brw_message_reg(2 + 2*n),
1118428d7b3dSmrg				 brw_vec8_grf(src + 2*n, 0),
1119428d7b3dSmrg				 brw_vec1_grf(mask, 3));
1120428d7b3dSmrg	}
1121428d7b3dSmrg
1122428d7b3dSmrg	fb_write(p, dw);
1123428d7b3dSmrg}
1124428d7b3dSmrg
1125428d7b3dSmrgstatic void wm_write__mask_ca(struct brw_compile *p, int dw,
1126428d7b3dSmrg			      int src, int mask)
1127428d7b3dSmrg{
1128428d7b3dSmrg	int n;
1129428d7b3dSmrg
1130428d7b3dSmrg	if (dw == 8) {
1131428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1132428d7b3dSmrg		for (n = 0; n < 4; n++)
1133428d7b3dSmrg			gen8_MUL(p,
1134428d7b3dSmrg				 brw_message_reg(2 + n),
1135428d7b3dSmrg				 brw_vec8_grf(src + n, 0),
1136428d7b3dSmrg				 brw_vec8_grf(mask + n, 0));
1137428d7b3dSmrg	} else {
1138428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1139428d7b3dSmrg		for (n = 0; n < 4; n++)
1140428d7b3dSmrg			gen8_MUL(p,
1141428d7b3dSmrg				 brw_message_reg(2 + 2*n),
1142428d7b3dSmrg				 brw_vec8_grf(src + 2*n, 0),
1143428d7b3dSmrg				 brw_vec8_grf(mask + 2*n, 0));
1144428d7b3dSmrg	}
1145428d7b3dSmrg
1146428d7b3dSmrg	fb_write(p, dw);
1147428d7b3dSmrg}
1148428d7b3dSmrg
1149428d7b3dSmrgstatic void gen8_compile_init(struct brw_compile *p)
1150428d7b3dSmrg{
1151428d7b3dSmrg	struct gen8_instruction *insn = memset(p->current, 0, sizeof(*insn));
1152428d7b3dSmrg	COMPILE_TIME_ASSERT(sizeof(*insn) == sizeof(*p->current));
1153428d7b3dSmrg	__gen8_set_mask_control(insn, BRW_MASK_ENABLE);
1154428d7b3dSmrg	__gen8_set_saturate(insn, 0);
1155428d7b3dSmrg	__gen8_set_cmpt_control(insn, GEN6_COMPRESSION_1Q);
1156428d7b3dSmrg	//__gen8_set_pred_control(insn, 0xf);
1157428d7b3dSmrg}
1158428d7b3dSmrg
1159428d7b3dSmrgbool
1160428d7b3dSmrggen8_wm_kernel__affine(struct brw_compile *p, int dispatch)
1161428d7b3dSmrg{
1162428d7b3dSmrg	gen8_compile_init(p);
1163428d7b3dSmrg
1164428d7b3dSmrg	wm_affine(p, dispatch, 0, 10, MRF_HACK_START+2);
1165428d7b3dSmrg	fb_write(p, dispatch);
1166428d7b3dSmrg	return true;
1167428d7b3dSmrg}
1168428d7b3dSmrg
1169428d7b3dSmrgbool
1170428d7b3dSmrggen8_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
1171428d7b3dSmrg{
1172428d7b3dSmrg	int src, mask;
1173428d7b3dSmrg
1174428d7b3dSmrg	gen8_compile_init(p);
1175428d7b3dSmrg
1176428d7b3dSmrg	src = wm_affine(p, dispatch, 0, 1, 12);
1177428d7b3dSmrg	mask = wm_affine__alpha(p, dispatch, 1, 6, 20);
1178428d7b3dSmrg	wm_write__mask(p, dispatch, src, mask);
1179428d7b3dSmrg
1180428d7b3dSmrg	return true;
1181428d7b3dSmrg}
1182428d7b3dSmrg
1183428d7b3dSmrgbool
1184428d7b3dSmrggen8_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
1185428d7b3dSmrg{
1186428d7b3dSmrg	int src, mask;
1187428d7b3dSmrg
1188428d7b3dSmrg	gen8_compile_init(p);
1189428d7b3dSmrg
1190428d7b3dSmrg	src = wm_affine(p, dispatch, 0, 1, 12);
1191428d7b3dSmrg	mask = wm_affine(p, dispatch, 1, 6, 20);
1192428d7b3dSmrg	wm_write__mask_ca(p, dispatch, src, mask);
1193428d7b3dSmrg
1194428d7b3dSmrg	return true;
1195428d7b3dSmrg}
1196428d7b3dSmrg
1197428d7b3dSmrgbool
1198428d7b3dSmrggen8_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
1199428d7b3dSmrg{
1200428d7b3dSmrg	int src, mask;
1201428d7b3dSmrg
1202428d7b3dSmrg	gen8_compile_init(p);
1203428d7b3dSmrg
1204428d7b3dSmrg	src = wm_affine__alpha(p, dispatch, 0, 1, 12);
1205428d7b3dSmrg	mask = wm_affine(p, dispatch, 1, 6, 16);
1206428d7b3dSmrg	wm_write__mask(p, dispatch, mask, src);
1207428d7b3dSmrg
1208428d7b3dSmrg	return true;
1209428d7b3dSmrg}
1210428d7b3dSmrg
1211428d7b3dSmrg/* Projective variants */
1212428d7b3dSmrg
1213428d7b3dSmrgstatic void wm_projective_st(struct brw_compile *p, int dw,
1214428d7b3dSmrg			     int channel, int msg)
1215428d7b3dSmrg{
1216428d7b3dSmrg	int uv;
1217428d7b3dSmrg
1218428d7b3dSmrg	gen8_compile_init(p);
1219428d7b3dSmrg
1220428d7b3dSmrg	if (dw == 16) {
1221428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1222428d7b3dSmrg		uv = 6;
1223428d7b3dSmrg	} else {
1224428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1225428d7b3dSmrg		uv = 4;
1226428d7b3dSmrg	}
1227428d7b3dSmrg	uv += 2*channel;
1228428d7b3dSmrg
1229428d7b3dSmrg	msg++;
1230428d7b3dSmrg	/* First compute 1/z */
1231428d7b3dSmrg	gen8_PLN(p,
1232428d7b3dSmrg		 brw_vec8_grf(30, 0),
1233428d7b3dSmrg		 brw_vec1_grf(uv+1, 0),
1234428d7b3dSmrg		 brw_vec8_grf(2, 0));
1235428d7b3dSmrg
1236428d7b3dSmrg	if (dw == 16) {
1237428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_NONE);
1238428d7b3dSmrg		gen8_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
1239428d7b3dSmrg		gen8_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
1240428d7b3dSmrg		gen8_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1241428d7b3dSmrg	} else
1242428d7b3dSmrg		gen8_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
1243428d7b3dSmrg
1244428d7b3dSmrg	gen8_PLN(p,
1245428d7b3dSmrg		 brw_vec8_grf(26, 0),
1246428d7b3dSmrg		 brw_vec1_grf(uv, 0),
1247428d7b3dSmrg		 brw_vec8_grf(2, 0));
1248428d7b3dSmrg	gen8_PLN(p,
1249428d7b3dSmrg		 brw_vec8_grf(28, 0),
1250428d7b3dSmrg		 brw_vec1_grf(uv, 4),
1251428d7b3dSmrg		 brw_vec8_grf(2, 0));
1252428d7b3dSmrg
1253428d7b3dSmrg	gen8_MUL(p,
1254428d7b3dSmrg		 brw_message_reg(msg),
1255428d7b3dSmrg		 brw_vec8_grf(26, 0),
1256428d7b3dSmrg		 brw_vec8_grf(30, 0));
1257428d7b3dSmrg	gen8_MUL(p,
1258428d7b3dSmrg		 brw_message_reg(msg + dw/8),
1259428d7b3dSmrg		 brw_vec8_grf(28, 0),
1260428d7b3dSmrg		 brw_vec8_grf(30, 0));
1261428d7b3dSmrg}
1262428d7b3dSmrg
1263428d7b3dSmrgstatic int wm_projective(struct brw_compile *p, int dw,
1264428d7b3dSmrg			 int channel, int msg, int result)
1265428d7b3dSmrg{
1266428d7b3dSmrg	gen8_compile_init(p);
1267428d7b3dSmrg
1268428d7b3dSmrg	wm_projective_st(p, dw, channel, msg);
1269428d7b3dSmrg	return wm_sample(p, dw, channel, msg, result);
1270428d7b3dSmrg}
1271428d7b3dSmrg
1272428d7b3dSmrgstatic int wm_projective__alpha(struct brw_compile *p, int dw,
1273428d7b3dSmrg				int channel, int msg, int result)
1274428d7b3dSmrg{
1275428d7b3dSmrg	gen8_compile_init(p);
1276428d7b3dSmrg
1277428d7b3dSmrg	wm_projective_st(p, dw, channel, msg);
1278428d7b3dSmrg	return wm_sample__alpha(p, dw, channel, msg, result);
1279428d7b3dSmrg}
1280428d7b3dSmrg
1281428d7b3dSmrgbool
1282428d7b3dSmrggen8_wm_kernel__projective(struct brw_compile *p, int dispatch)
1283428d7b3dSmrg{
1284428d7b3dSmrg	gen8_compile_init(p);
1285428d7b3dSmrg
1286428d7b3dSmrg	wm_projective(p, dispatch, 0, 10, MRF_HACK_START+2);
1287428d7b3dSmrg	fb_write(p, dispatch);
1288428d7b3dSmrg	return true;
1289428d7b3dSmrg}
1290428d7b3dSmrg
1291428d7b3dSmrgbool
1292428d7b3dSmrggen8_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
1293428d7b3dSmrg{
1294428d7b3dSmrg	int src, mask;
1295428d7b3dSmrg
1296428d7b3dSmrg	gen8_compile_init(p);
1297428d7b3dSmrg
1298428d7b3dSmrg	src = wm_projective(p, dispatch, 0, 1, 12);
1299428d7b3dSmrg	mask = wm_projective__alpha(p, dispatch, 1, 6, 20);
1300428d7b3dSmrg	wm_write__mask(p, dispatch, src, mask);
1301428d7b3dSmrg
1302428d7b3dSmrg	return true;
1303428d7b3dSmrg}
1304428d7b3dSmrg
1305428d7b3dSmrgbool
1306428d7b3dSmrggen8_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
1307428d7b3dSmrg{
1308428d7b3dSmrg	int src, mask;
1309428d7b3dSmrg
1310428d7b3dSmrg	gen8_compile_init(p);
1311428d7b3dSmrg
1312428d7b3dSmrg	src = wm_projective(p, dispatch, 0, 1, 12);
1313428d7b3dSmrg	mask = wm_projective(p, dispatch, 1, 6, 20);
1314428d7b3dSmrg	wm_write__mask_ca(p, dispatch, src, mask);
1315428d7b3dSmrg
1316428d7b3dSmrg	return true;
1317428d7b3dSmrg}
1318428d7b3dSmrg
1319428d7b3dSmrgbool
1320428d7b3dSmrggen8_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
1321428d7b3dSmrg{
1322428d7b3dSmrg	int src, mask;
1323428d7b3dSmrg
1324428d7b3dSmrg	gen8_compile_init(p);
1325428d7b3dSmrg
1326428d7b3dSmrg	src = wm_projective__alpha(p, dispatch, 0, 1, 12);
1327428d7b3dSmrg	mask = wm_projective(p, dispatch, 1, 6, 16);
1328428d7b3dSmrg	wm_write__mask(p, dispatch, mask, src);
1329428d7b3dSmrg
1330428d7b3dSmrg	return true;
1331428d7b3dSmrg}
1332428d7b3dSmrg
1333428d7b3dSmrgbool
1334428d7b3dSmrggen8_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
1335428d7b3dSmrg{
1336428d7b3dSmrg	int src, mask;
1337428d7b3dSmrg
1338428d7b3dSmrg	gen8_compile_init(p);
1339428d7b3dSmrg
1340428d7b3dSmrg	src = wm_affine(p, dispatch, 0, 1, 12);
1341428d7b3dSmrg	mask = dispatch == 16 ? 8 : 6;
1342428d7b3dSmrg	wm_write__opacity(p, dispatch, src, mask);
1343428d7b3dSmrg
1344428d7b3dSmrg	return true;
1345428d7b3dSmrg}
1346428d7b3dSmrg
1347428d7b3dSmrgbool
1348428d7b3dSmrggen8_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
1349428d7b3dSmrg{
1350428d7b3dSmrg	int src, mask;
1351428d7b3dSmrg
1352428d7b3dSmrg	gen8_compile_init(p);
1353428d7b3dSmrg
1354428d7b3dSmrg	mask = dispatch == 16 ? 8 : 6;
1355428d7b3dSmrg	src = wm_projective(p, dispatch, 0, 1, 12);
1356428d7b3dSmrg	wm_write__opacity(p, dispatch, src, mask);
1357428d7b3dSmrg
1358428d7b3dSmrg	return true;
1359428d7b3dSmrg}
1360