1/*
2 * Copyright 2007 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv_include.h"
24
25#include "hwdefs/nv_object.xml.h"
26#include "hwdefs/nv30-40_3d.xml.h"
27#include "nv04_accel.h"
28
29typedef struct nv_pict_surface_format {
30	int	 pict_fmt;
31	uint32_t card_fmt;
32} nv_pict_surface_format_t;
33
34typedef struct nv_pict_texture_format {
35	int	 pict_fmt;
36	uint32_t card_fmt;
37	uint32_t card_swz;
38} nv_pict_texture_format_t;
39
40typedef struct nv_pict_op {
41	Bool	 src_alpha;
42	Bool	 dst_alpha;
43	uint32_t src_card_op;
44	uint32_t dst_card_op;
45} nv_pict_op_t;
46
47static nv_pict_surface_format_t
48NV40SurfaceFormat[] = {
49	{ PICT_a8r8g8b8	, NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 },
50	{ PICT_x8r8g8b8	, NV30_3D_RT_FORMAT_COLOR_X8R8G8B8 },
51	{ PICT_r5g6b5	, NV30_3D_RT_FORMAT_COLOR_R5G6B5   },
52	{ PICT_a8       , NV30_3D_RT_FORMAT_COLOR_B8       },
53	{ -1, ~0 }
54};
55
56static nv_pict_surface_format_t *
57NV40_GetPictSurfaceFormat(int format)
58{
59	int i = 0;
60
61	while (NV40SurfaceFormat[i].pict_fmt != -1) {
62		if (NV40SurfaceFormat[i].pict_fmt == format)
63			return &NV40SurfaceFormat[i];
64		i++;
65	}
66
67	return NULL;
68}
69
70#define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \
71  {                                                                            \
72  PICT_##r, NV40_3D_TEX_FORMAT_FORMAT_##tf,                                    \
73  NV30_3D_TEX_SWIZZLE_S0_X_##ts0x | NV30_3D_TEX_SWIZZLE_S0_Y_##ts0y |          \
74  NV30_3D_TEX_SWIZZLE_S0_Z_##ts0z | NV30_3D_TEX_SWIZZLE_S0_W_##ts0w |          \
75  NV30_3D_TEX_SWIZZLE_S1_X_##ts1x | NV30_3D_TEX_SWIZZLE_S1_Y_##ts1y |          \
76  NV30_3D_TEX_SWIZZLE_S1_Z_##ts1z | NV30_3D_TEX_SWIZZLE_S1_W_##ts1w,           \
77  }
78static nv_pict_texture_format_t
79NV40TextureFormat[] = {
80        _(a8r8g8b8, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
81        _(x8r8g8b8, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W),
82        _(x8b8g8r8, A8R8G8B8,   S1,   S1,   S1,  ONE, Z, Y, X, W),
83        _(a1r5g5b5, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
84        _(x1r5g5b5, A1R5G5B5,   S1,   S1,   S1,  ONE, X, Y, Z, W),
85        _(  r5g6b5,   R5G6B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
86        _(      a8,       L8, ZERO, ZERO, ZERO,   S1, X, X, X, X),
87        { -1, ~0, ~0 }
88};
89#undef _
90
91static nv_pict_texture_format_t *
92NV40_GetPictTextureFormat(int format)
93{
94	int i = 0;
95
96	while (NV40TextureFormat[i].pict_fmt != -1) {
97		if (NV40TextureFormat[i].pict_fmt == format)
98			return &NV40TextureFormat[i];
99		i++;
100	}
101
102	return NULL;
103}
104
105#define SF(bf) (NV30_3D_BLEND_FUNC_SRC_RGB_##bf |                              \
106		NV30_3D_BLEND_FUNC_SRC_ALPHA_##bf)
107#define DF(bf) (NV30_3D_BLEND_FUNC_DST_RGB_##bf |                              \
108		NV30_3D_BLEND_FUNC_DST_ALPHA_##bf)
109static nv_pict_op_t
110NV40PictOp[] = {
111/* Clear       */ { 0, 0, SF(               ZERO), DF(               ZERO) },
112/* Src         */ { 0, 0, SF(                ONE), DF(               ZERO) },
113/* Dst         */ { 0, 0, SF(               ZERO), DF(                ONE) },
114/* Over        */ { 1, 0, SF(                ONE), DF(ONE_MINUS_SRC_ALPHA) },
115/* OverReverse */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(                ONE) },
116/* In          */ { 0, 1, SF(          DST_ALPHA), DF(               ZERO) },
117/* InReverse   */ { 1, 0, SF(               ZERO), DF(          SRC_ALPHA) },
118/* Out         */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(               ZERO) },
119/* OutReverse  */ { 1, 0, SF(               ZERO), DF(ONE_MINUS_SRC_ALPHA) },
120/* Atop        */ { 1, 1, SF(          DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
121/* AtopReverse */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(          SRC_ALPHA) },
122/* Xor         */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
123/* Add         */ { 0, 0, SF(                ONE), DF(                ONE) }
124};
125
126static nv_pict_op_t *
127NV40_GetPictOpRec(int op)
128{
129	if (op >= PictOpSaturate)
130		return NULL;
131	return &NV40PictOp[op];
132}
133
134static void
135NV40_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
136		PictFormatShort dest_format, Bool component_alpha)
137{
138	NVPtr pNv = NVPTR(pScrn);
139	struct nouveau_pushbuf *push = pNv->pushbuf;
140	uint32_t sblend, dblend;
141
142	sblend = blend->src_card_op;
143	dblend = blend->dst_card_op;
144
145	if (blend->dst_alpha) {
146		if (!PICT_FORMAT_A(dest_format)) {
147			if (sblend == SF(DST_ALPHA)) {
148				sblend = SF(ONE);
149			} else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
150				sblend = SF(ZERO);
151			}
152		} else if (dest_format == PICT_a8) {
153			if (sblend == SF(DST_ALPHA)) {
154				sblend = SF(DST_COLOR);
155			} else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
156				sblend = SF(ONE_MINUS_DST_COLOR);
157			}
158		}
159	}
160
161	if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
162		if (dblend == DF(SRC_ALPHA)) {
163			dblend = DF(SRC_COLOR);
164		} else if (dblend == DF(ONE_MINUS_SRC_ALPHA)) {
165			dblend = DF(ONE_MINUS_SRC_COLOR);
166		}
167	}
168
169	if (sblend == SF(ONE) && dblend == DF(ZERO)) {
170		BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1);
171		PUSH_DATA (push, 0);
172	} else {
173		BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 5);
174		PUSH_DATA (push, 1);
175		PUSH_DATA (push, sblend);
176		PUSH_DATA (push, dblend);
177		PUSH_DATA (push, 0x00000000);
178		PUSH_DATA (push, NV40_3D_BLEND_EQUATION_ALPHA_FUNC_ADD |
179				 NV40_3D_BLEND_EQUATION_RGB_FUNC_ADD);
180	}
181}
182
183static Bool
184NV40EXAPictSolid(NVPtr pNv, PicturePtr pPict, int unit)
185{
186	struct nouveau_pushbuf *push = pNv->pushbuf;
187
188	PUSH_DATAu(push, pNv->scratch, SOLID(unit), 2);
189	PUSH_DATA (push, pPict->pSourcePict->solidFill.color);
190	PUSH_DATA (push, 0);
191	BEGIN_NV04(push, NV30_3D(TEX_OFFSET(unit)), 8);
192	PUSH_MTHDl(push, NV30_3D(TEX_OFFSET(unit)), pNv->scratch, SOLID(unit),
193			 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
194	PUSH_DATA (push, NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8 | 0x8000 |
195			 NV40_3D_TEX_FORMAT_LINEAR |
196			 NV30_3D_TEX_FORMAT_DIMS_2D |
197			 NV30_3D_TEX_FORMAT_NO_BORDER |
198			 (1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT) |
199			 NV30_3D_TEX_FORMAT_DMA0);
200	PUSH_DATA (push, NV30_3D_TEX_WRAP_S_REPEAT |
201			 NV30_3D_TEX_WRAP_T_REPEAT |
202			 NV30_3D_TEX_WRAP_R_REPEAT);
203	PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE);
204	PUSH_DATA (push, 0x0000aae4);
205	PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST |
206			 NV30_3D_TEX_FILTER_MAG_NEAREST | 0x3fd6);
207	PUSH_DATA (push, 0x00010001);
208	PUSH_DATA (push, 0x00000000);
209	BEGIN_NV04(push, NV40_3D(TEX_SIZE1(unit)), 1);
210	PUSH_DATA (push, 0x00100040);
211
212	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 17);
213	PUSH_DATA (push, unit * 4);
214	PUSH_DATAf(push, 1.0);
215	PUSH_DATAf(push, 0.0);
216	PUSH_DATAf(push, 0.0);
217	PUSH_DATAf(push, 0.0);
218	PUSH_DATAf(push, 0.0);
219	PUSH_DATAf(push, 1.0);
220	PUSH_DATAf(push, 0.0);
221	PUSH_DATAf(push, 0.0);
222	PUSH_DATAf(push, 0.0);
223	PUSH_DATAf(push, 0.0);
224	PUSH_DATAf(push, 1.0);
225	PUSH_DATAf(push, 0.0);
226	PUSH_DATAf(push, 1.0);
227	PUSH_DATAf(push, 1.0);
228	PUSH_DATAf(push, 0.0);
229	PUSH_DATAf(push, 0.0);
230	return TRUE;
231}
232
233static Bool
234NV40EXAPictGradient(NVPtr pNv, PicturePtr pPict, int unit)
235{
236	return FALSE;
237}
238
239static Bool
240NV40EXAPictTexture(NVPtr pNv, PixmapPtr pPix, PicturePtr pPict, int unit)
241{
242	unsigned reloc = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR;
243	struct nouveau_pushbuf *push = pNv->pushbuf;
244	struct nouveau_bo *bo = nouveau_pixmap_bo(pPix);
245	nv_pict_texture_format_t *fmt;
246
247	fmt = NV40_GetPictTextureFormat(pPict->format);
248	if (!fmt)
249		return FALSE;
250
251	BEGIN_NV04(push, NV30_3D(TEX_OFFSET(unit)), 8);
252	PUSH_MTHDl(push, NV30_3D(TEX_OFFSET(unit)), bo, 0, reloc);
253	PUSH_MTHDs(push, NV30_3D(TEX_FORMAT(unit)), bo, fmt->card_fmt |
254			 NV40_3D_TEX_FORMAT_LINEAR |
255			 NV30_3D_TEX_FORMAT_DIMS_2D | 0x8000 |
256			 NV30_3D_TEX_FORMAT_NO_BORDER |
257			 (1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT),
258			 reloc | NOUVEAU_BO_OR,
259			 NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
260	if (pPict->repeat) {
261		switch(pPict->repeatType) {
262		case RepeatPad:
263			PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE |
264					 NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE |
265					 NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE);
266			break;
267		case RepeatReflect:
268			PUSH_DATA (push, NV30_3D_TEX_WRAP_S_MIRRORED_REPEAT |
269					 NV30_3D_TEX_WRAP_T_MIRRORED_REPEAT |
270					 NV30_3D_TEX_WRAP_R_MIRRORED_REPEAT);
271			break;
272		case RepeatNormal:
273		default:
274			PUSH_DATA (push, NV30_3D_TEX_WRAP_S_REPEAT |
275					 NV30_3D_TEX_WRAP_T_REPEAT |
276					 NV30_3D_TEX_WRAP_R_REPEAT);
277			break;
278		}
279	} else {
280		PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_BORDER |
281				 NV30_3D_TEX_WRAP_T_CLAMP_TO_BORDER |
282				 NV30_3D_TEX_WRAP_R_CLAMP_TO_BORDER);
283	}
284	PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE);
285	PUSH_DATA (push, fmt->card_swz);
286	if (pPict->filter == PictFilterBilinear) {
287		PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR |
288				 NV30_3D_TEX_FILTER_MAG_LINEAR | 0x3fd6);
289	} else {
290		PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST |
291				 NV30_3D_TEX_FILTER_MAG_NEAREST | 0x3fd6);
292	}
293	PUSH_DATA (push, (pPix->drawable.width << 16) | pPix->drawable.height);
294	PUSH_DATA (push, 0); /* border ARGB */
295	BEGIN_NV04(push, NV40_3D(TEX_SIZE1(unit)), 1);
296	PUSH_DATA (push, (1 << NV40_3D_TEX_SIZE1_DEPTH__SHIFT) |
297			 (uint32_t)exaGetPixmapPitch(pPix));
298
299	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 17);
300	PUSH_DATA (push, unit * 4);
301	if (pPict->transform) {
302		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[0][0]));
303		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[0][1]));
304		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[0][2]));
305		PUSH_DATAf(push, 0);
306		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[1][0]));
307		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[1][1]));
308		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[1][2]));
309		PUSH_DATAf(push, 0);
310		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[2][0]));
311		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[2][1]));
312		PUSH_DATAf(push, xFixedToFloat(pPict->transform->matrix[2][2]));
313		PUSH_DATAf(push, 0);
314	} else {
315		PUSH_DATAf(push, 1.0);
316		PUSH_DATAf(push, 0.0);
317		PUSH_DATAf(push, 0.0);
318		PUSH_DATAf(push, 0.0);
319		PUSH_DATAf(push, 0.0);
320		PUSH_DATAf(push, 1.0);
321		PUSH_DATAf(push, 0.0);
322		PUSH_DATAf(push, 0.0);
323		PUSH_DATAf(push, 0.0);
324		PUSH_DATAf(push, 0.0);
325		PUSH_DATAf(push, 1.0);
326		PUSH_DATAf(push, 0.0);
327	}
328	PUSH_DATAf(push, 1.0 / pPix->drawable.width);
329	PUSH_DATAf(push, 1.0 / pPix->drawable.height);
330	PUSH_DATAf(push, 0.0);
331	PUSH_DATAf(push, 1.0);
332
333	return TRUE;
334}
335
336static Bool
337NV40EXAPicture(NVPtr pNv, PixmapPtr ppix, PicturePtr ppict, int unit)
338{
339	if (ppict->pDrawable)
340		return NV40EXAPictTexture(pNv, ppix, ppict, unit);
341
342	switch (ppict->pSourcePict->type) {
343	case SourcePictTypeSolidFill:
344		return NV40EXAPictSolid(pNv, ppict, unit);
345	case SourcePictTypeLinear:
346		return NV40EXAPictGradient(pNv, ppict, unit);
347	default:
348		break;
349	}
350
351	return FALSE;
352}
353
354static Bool
355NV40_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PictFormatShort format)
356{
357	NVPtr pNv = NVPTR(pScrn);
358	struct nouveau_pushbuf *push = pNv->pushbuf;
359	struct nouveau_bo *bo = nouveau_pixmap_bo(pPix);
360	nv_pict_surface_format_t *fmt;
361
362	fmt = NV40_GetPictSurfaceFormat(format);
363	if (!fmt) {
364		ErrorF("AIII no format\n");
365		return FALSE;
366	}
367
368	BEGIN_NV04(push, NV30_3D(RT_FORMAT), 3);
369	PUSH_DATA (push, NV30_3D_RT_FORMAT_TYPE_LINEAR |
370			 NV30_3D_RT_FORMAT_ZETA_Z24S8 | fmt->card_fmt);
371	PUSH_DATA (push, exaGetPixmapPitch(pPix));
372	PUSH_MTHDl(push, NV30_3D(COLOR0_OFFSET), bo, 0,
373			 NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
374	return TRUE;
375}
376
377static Bool
378NV40EXACheckCompositeTexture(PicturePtr pPict, PicturePtr pdPict, int op)
379{
380	nv_pict_texture_format_t *fmt;
381	int w = 1, h = 1;
382
383	if (pPict->pDrawable) {
384		w = pPict->pDrawable->width;
385		h = pPict->pDrawable->height;
386	} else {
387		switch (pPict->pSourcePict->type) {
388		case SourcePictTypeSolidFill:
389			break;
390		default:
391			NOUVEAU_FALLBACK("gradient\n");
392		}
393	}
394
395	if ((w > 4096) || (h > 4096))
396		NOUVEAU_FALLBACK("picture too large, %dx%d\n", w, h);
397
398	fmt = NV40_GetPictTextureFormat(pPict->format);
399	if (!fmt)
400		NOUVEAU_FALLBACK("picture format 0x%08x not supported\n",
401				pPict->format);
402
403	if (pPict->filter != PictFilterNearest &&
404	    pPict->filter != PictFilterBilinear)
405		NOUVEAU_FALLBACK("filter 0x%x not supported\n", pPict->filter);
406
407	/* Opengl and Render disagree on what should be sampled outside an XRGB
408	 * texture (with no repeating). Opengl has a hardcoded alpha value of
409	 * 1.0, while render expects 0.0. We assume that clipping is done for
410	 * untranformed sources.
411	 */
412	if (NV40PictOp[op].src_alpha && !pPict->repeat &&
413		pPict->transform && (PICT_FORMAT_A(pPict->format) == 0)
414		&& (PICT_FORMAT_A(pdPict->format) != 0))
415		NOUVEAU_FALLBACK("REPEAT_NONE unsupported for XRGB source\n");
416
417	return TRUE;
418}
419
420Bool
421NV40EXACheckComposite(int op, PicturePtr psPict,
422			      PicturePtr pmPict,
423			      PicturePtr pdPict)
424{
425	nv_pict_surface_format_t *fmt;
426	nv_pict_op_t *opr;
427
428	opr = NV40_GetPictOpRec(op);
429	if (!opr)
430		NOUVEAU_FALLBACK("unsupported blend op 0x%x\n", op);
431
432	fmt = NV40_GetPictSurfaceFormat(pdPict->format);
433	if (!fmt)
434		NOUVEAU_FALLBACK("dst picture format 0x%08x not supported\n",
435				pdPict->format);
436
437	if (!NV40EXACheckCompositeTexture(psPict, pdPict, op))
438		NOUVEAU_FALLBACK("src picture\n");
439	if (pmPict) {
440		if (pmPict->componentAlpha &&
441		    PICT_FORMAT_RGB(pmPict->format) &&
442		    opr->src_alpha && opr->src_card_op != SF(ZERO))
443			NOUVEAU_FALLBACK("mask CA + SA\n");
444		if (!NV40EXACheckCompositeTexture(pmPict, pdPict, op))
445			NOUVEAU_FALLBACK("mask picture\n");
446	}
447
448	return TRUE;
449}
450
451Bool
452NV40EXAPrepareComposite(int op, PicturePtr psPict,
453				PicturePtr pmPict,
454				PicturePtr pdPict,
455				PixmapPtr  psPix,
456				PixmapPtr  pmPix,
457				PixmapPtr  pdPix)
458{
459	ScrnInfoPtr pScrn = xf86ScreenToScrn(pdPix->drawable.pScreen);
460	NVPtr pNv = NVPTR(pScrn);
461	nv_pict_op_t *blend = NV40_GetPictOpRec(op);
462	struct nouveau_pushbuf *push = pNv->pushbuf;
463	uint32_t fragprog;
464
465	if (!PUSH_SPACE(push, 128))
466		NOUVEAU_FALLBACK("space\n");
467	PUSH_RESET(push);
468
469	NV40_SetupBlend(pScrn, blend, pdPict->format,
470			(pmPict && pmPict->componentAlpha &&
471			 PICT_FORMAT_RGB(pmPict->format)));
472
473	if (!NV40_SetupSurface(pScrn, pdPix, pdPict->format) ||
474	    !NV40EXAPicture(pNv, psPix, psPict, 0))
475		return FALSE;
476
477	if (pmPict) {
478		if (!NV40EXAPicture(pNv, pmPix, pmPict, 1))
479			return FALSE;
480
481		if (pdPict->format == PICT_a8) {
482			fragprog = PFP_C_A8;
483		} else
484		if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
485			if (blend->src_alpha)
486				fragprog = PFP_CCASA;
487			else
488				fragprog = PFP_CCA;
489		} else {
490			fragprog = PFP_C;
491		}
492	} else {
493		if (pdPict->format == PICT_a8)
494			fragprog = PFP_S_A8;
495		else
496			fragprog = PFP_S;
497	}
498
499	BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1);
500	PUSH_MTHD (push, NV30_3D(FP_ACTIVE_PROGRAM), pNv->scratch, fragprog,
501			 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
502			 NOUVEAU_BO_OR,
503			 NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
504			 NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
505	BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1);
506	PUSH_DATA (push, 0x02000000);
507
508	/* Appears to be some kind of cache flush, needed here at least
509	 * sometimes.. funky text rendering otherwise :)
510	 */
511	BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);
512	PUSH_DATA (push, 2);
513	BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);
514	PUSH_DATA (push, 1);
515
516	nouveau_pushbuf_bufctx(push, pNv->bufctx);
517	if (nouveau_pushbuf_validate(push)) {
518		nouveau_pushbuf_bufctx(push, NULL);
519		return FALSE;
520	}
521
522	return TRUE;
523}
524
525static __inline__ void
526PUSH_VTX2s(struct nouveau_pushbuf *push,
527	   int x1, int y1, int x2, int y2, int dx, int dy)
528{
529	BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(8)), 2);
530	PUSH_DATA (push, ((y1 & 0xffff) << 16) | (x1 & 0xffff));
531	PUSH_DATA (push, ((y2 & 0xffff) << 16) | (x2 & 0xffff));
532	BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
533	PUSH_DATA (push, ((dy & 0xffff) << 16) | (dx & 0xffff));
534}
535
536void
537NV40EXAComposite(PixmapPtr pdPix,
538		 int sx, int sy, int mx, int my, int dx, int dy, int w, int h)
539{
540	ScrnInfoPtr pScrn = xf86ScreenToScrn(pdPix->drawable.pScreen);
541	NVPtr pNv = NVPTR(pScrn);
542	struct nouveau_pushbuf *push = pNv->pushbuf;
543
544	if (!PUSH_SPACE(push, 64))
545		return;
546
547	/* We're drawing a triangle, we need to scissor it to a quad. */
548	/* The scissors are here for a good reason, we don't get the full
549	 * image, but just a part.
550	 */
551	/* Handling the cliprects is done for us already. */
552	BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);
553	PUSH_DATA (push, (w << 16) | dx);
554	PUSH_DATA (push, (h << 16) | dy);
555	BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
556	PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_TRIANGLES);
557	PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2));
558	PUSH_VTX2s(push, sx, sy, mx, my, dx, dy);
559	PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy);
560	BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
561	PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
562}
563
564void
565NV40EXADoneComposite(PixmapPtr pdPix)
566{
567	ScrnInfoPtr pScrn = xf86ScreenToScrn(pdPix->drawable.pScreen);
568	nouveau_pushbuf_bufctx(NVPTR(pScrn)->pushbuf, NULL);
569}
570
571#define NV30_3D_CHIPSET_4X_MASK 0x00000baf
572#define NV44TCL_CHIPSET_4X_MASK 0x00005450
573Bool
574NVAccelInitNV40TCL(ScrnInfoPtr pScrn)
575{
576	NVPtr pNv = NVPTR(pScrn);
577	struct nouveau_pushbuf *push = pNv->pushbuf;
578	struct nv04_fifo *fifo = pNv->channel->data;
579	uint32_t class = 0, chipset;
580	int i;
581
582	NVXVComputeBicubicFilter(pNv->scratch, XV_TABLE, XV_TABLE_SIZE);
583
584	chipset = pNv->dev->chipset;
585	if ((chipset & 0xf0) == NV_ARCH_40) {
586		chipset &= 0xf;
587		if (NV30_3D_CHIPSET_4X_MASK & (1<<chipset))
588			class = NV40_3D_CLASS;
589		else if (NV44TCL_CHIPSET_4X_MASK & (1<<chipset))
590			class = NV44_3D_CLASS;
591		else {
592			xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
593					"NV40EXA: Unknown chipset NV4%1x\n", chipset);
594			return FALSE;
595		}
596	} else if ( (chipset & 0xf0) == 0x60) {
597		class = NV44_3D_CLASS;
598	} else
599		return TRUE;
600
601	if (nouveau_object_new(pNv->channel, Nv3D, class, NULL, 0, &pNv->Nv3D))
602		return FALSE;
603
604	if (!PUSH_SPACE(push, 256))
605		return FALSE;
606
607	BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
608	PUSH_DATA (push, pNv->Nv3D->handle);
609	BEGIN_NV04(push, NV30_3D(DMA_NOTIFY), 1);
610	PUSH_DATA (push, pNv->notify0->handle);
611	BEGIN_NV04(push, NV30_3D(DMA_TEXTURE0), 2);
612	PUSH_DATA (push, fifo->vram);
613	PUSH_DATA (push, fifo->gart);
614	BEGIN_NV04(push, NV30_3D(DMA_COLOR0), 2);
615	PUSH_DATA (push, fifo->vram);
616	PUSH_DATA (push, fifo->vram);
617
618	/* voodoo */
619	BEGIN_NV04(push, SUBC_3D(0x1ea4), 3);
620	PUSH_DATA (push, 0x00000010);
621	PUSH_DATA (push, 0x01000100);
622	PUSH_DATA (push, 0xff800006);
623	BEGIN_NV04(push, SUBC_3D(0x1fc4), 1);
624	PUSH_DATA (push, 0x06144321);
625	BEGIN_NV04(push, SUBC_3D(0x1fc8), 2);
626	PUSH_DATA (push, 0xedcba987);
627	PUSH_DATA (push, 0x00000021);
628	BEGIN_NV04(push, SUBC_3D(0x1fd0), 1);
629	PUSH_DATA (push, 0x00171615);
630	BEGIN_NV04(push, SUBC_3D(0x1fd4), 1);
631	PUSH_DATA (push, 0x001b1a19);
632	BEGIN_NV04(push, SUBC_3D(0x1ef8), 1);
633	PUSH_DATA (push, 0x0020ffff);
634	BEGIN_NV04(push, SUBC_3D(0x1d64), 1);
635	PUSH_DATA (push, 0x00d30000);
636	BEGIN_NV04(push, NV30_3D(ENGINE), 1);
637	PUSH_DATA (push, NV30_3D_ENGINE_FP);
638
639	/* This removes the the stair shaped tearing that i get. */
640	/* Verified on one G70 card that it doesn't cause regressions for people without the problem. */
641	/* The blob sets this up by default for NV43. */
642	BEGIN_NV04(push, NV30_3D(FP_REG_CONTROL), 1);
643	PUSH_DATA (push, 0x0000000F);
644
645	BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);
646	PUSH_DATAf(push, 0.0);
647	PUSH_DATAf(push, 0.0);
648	PUSH_DATAf(push, 0.0);
649	PUSH_DATAf(push, 0.0);
650	PUSH_DATAf(push, 1.0);
651	PUSH_DATAf(push, 1.0);
652	PUSH_DATAf(push, 1.0);
653	PUSH_DATAf(push, 0.0);
654
655	/* default 3D state */
656	/*XXX: replace with the same state that the DRI emits on startup */
657	BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1);
658	PUSH_DATA (push, 0);
659	BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1);
660	PUSH_DATA (push, 0);
661	BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1);
662	PUSH_DATA (push, 0);
663	BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2);
664	PUSH_DATA (push, 0);
665	PUSH_DATA (push, 0);
666	BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1);
667	PUSH_DATA (push, 0x01010101); /* TR,TR,TR,TR */
668	BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1);
669	PUSH_DATA (push, 0);
670	BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1);
671	PUSH_DATA (push, 0);
672	BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 2);
673	PUSH_DATA (push, 0);
674	PUSH_DATA (push, NV30_3D_COLOR_LOGIC_OP_OP_COPY);
675	BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1);
676	PUSH_DATA (push, 0);
677	BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1);
678	PUSH_DATA (push, NV30_3D_SHADE_MODEL_SMOOTH);
679	BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FACTOR),2);
680	PUSH_DATAf(push, 0.0);
681	PUSH_DATAf(push, 0.0);
682	BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2);
683	PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL);
684	PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL);
685	BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_PATTERN(0)), 0x20);
686	for (i=0;i<0x20;i++)
687		PUSH_DATA (push, 0xFFFFFFFF);
688	for (i=0;i<16;i++) {
689		BEGIN_NV04(push, NV30_3D(TEX_ENABLE(i)), 1);
690		PUSH_DATA (push, 0);
691	}
692
693	BEGIN_NV04(push, NV30_3D(DEPTH_CONTROL), 1);
694	PUSH_DATA (push, 0x110);
695
696	BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
697	PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
698
699	BEGIN_NV04(push, NV30_3D(RT_HORIZ), 2);
700	PUSH_DATA (push, (4096 << 16));
701	PUSH_DATA (push, (4096 << 16));
702	BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);
703	PUSH_DATA (push, (4096 << 16));
704	PUSH_DATA (push, (4096 << 16));
705	BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2);
706	PUSH_DATA (push, (4096 << 16));
707	PUSH_DATA (push, (4096 << 16));
708	BEGIN_NV04(push, NV30_3D(VIEWPORT_CLIP_HORIZ(0)), 2);
709	PUSH_DATA (push, (4095 << 16));
710	PUSH_DATA (push, (4095 << 16));
711
712	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
713	PUSH_DATA (push, 0);
714	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
715	PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0] */
716	PUSH_DATA (push, 0x0040000d);
717	PUSH_DATA (push, 0x8106c083);
718	PUSH_DATA (push, 0x6041ef80);
719	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
720	PUSH_DATA (push, 0x00001c6c); /* mov r0.xyw, a[8].xyww */
721	PUSH_DATA (push, 0x0040080f);
722	PUSH_DATA (push, 0x8106c083);
723	PUSH_DATA (push, 0x6041affc);
724	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
725	PUSH_DATA (push, 0x00009c6c); /* dp3 r1.x, r0.xyw, c[0].xyz */
726	PUSH_DATA (push, 0x0140000f);
727	PUSH_DATA (push, 0x808680c3);
728	PUSH_DATA (push, 0x60410ffc);
729	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
730	PUSH_DATA (push, 0x00009c6c); /* dp3 r1.y, r0.xyw, c[1].xyz */
731	PUSH_DATA (push, 0x0140100f);
732	PUSH_DATA (push, 0x808680c3);
733	PUSH_DATA (push, 0x60408ffc);
734	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
735	PUSH_DATA (push, 0x00009c6c); /* dp3 r1.w, r0.xyw, c[2].xyz */
736	PUSH_DATA (push, 0x0140200f);
737	PUSH_DATA (push, 0x808680c3);
738	PUSH_DATA (push, 0x60402ffc);
739	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
740	PUSH_DATA (push, 0x401f9c6c); /* mul o[tex0].xyw, r1, c[3] */
741	PUSH_DATA (push, 0x0080300d);
742	PUSH_DATA (push, 0x8286c0c3);
743	PUSH_DATA (push, 0x6041af9c);
744	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
745	PUSH_DATA (push, 0x00001c6c); /* mov r0.xyw, a[9].xyww */
746	PUSH_DATA (push, 0x0040090f);
747	PUSH_DATA (push, 0x8106c083);
748	PUSH_DATA (push, 0x6041affc);
749	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
750	PUSH_DATA (push, 0x00009c6c); /* dp3 r1.x, r0.xyw, c[4].xyz */
751	PUSH_DATA (push, 0x0140400f);
752	PUSH_DATA (push, 0x808680c3);
753	PUSH_DATA (push, 0x60410ffc);
754	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
755	PUSH_DATA (push, 0x00009c6c); /* dp3 r1.y, r0.xyw, c[5].xyz */
756	PUSH_DATA (push, 0x0140500f);
757	PUSH_DATA (push, 0x808680c3);
758	PUSH_DATA (push, 0x60408ffc);
759	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
760	PUSH_DATA (push, 0x00009c6c); /* dp3 r1.w, r0.xyw, c[6].xyz */
761	PUSH_DATA (push, 0x0140600f);
762	PUSH_DATA (push, 0x808680c3);
763	PUSH_DATA (push, 0x60402ffc);
764	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
765	PUSH_DATA (push, 0x401f9c6c); /* exit mul o[tex1].xyw, r1, c[4] */
766	PUSH_DATA (push, 0x0080700d);
767	PUSH_DATA (push, 0x8286c0c3);
768	PUSH_DATA (push, 0x6041afa1);
769	BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
770	PUSH_DATA (push, 0x00000000); /* exit */
771	PUSH_DATA (push, 0x00000000);
772	PUSH_DATA (push, 0x00000000);
773	PUSH_DATA (push, 0x00000001);
774	BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
775	PUSH_DATA (push, 0);
776	BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
777	PUSH_DATA (push, 0x00000309);
778	PUSH_DATA (push, 0x0000c001);
779
780	PUSH_DATAu(push, pNv->scratch, PFP_PASS, 1 * 4);
781	PUSH_DATAs(push, 0x01403e81); /* mov r0, a[col0] */
782	PUSH_DATAs(push, 0x1c9dc801);
783	PUSH_DATAs(push, 0x0001c800);
784	PUSH_DATAs(push, 0x3fe1c800);
785
786	PUSH_DATAu(push, pNv->scratch, PFP_S, 2 * 4);
787	PUSH_DATAs(push, 0x18009e00); /* txp r0, a[tex0], t[0] */
788	PUSH_DATAs(push, 0x1c9dc801);
789	PUSH_DATAs(push, 0x0001c800);
790	PUSH_DATAs(push, 0x3fe1c800);
791	PUSH_DATAs(push, 0x01401e81); /* mov r0, r0 */
792	PUSH_DATAs(push, 0x1c9dc800);
793	PUSH_DATAs(push, 0x0001c800);
794	PUSH_DATAs(push, 0x0001c800);
795
796	PUSH_DATAu(push, pNv->scratch, PFP_S_A8, 2 * 4);
797	PUSH_DATAs(push, 0x18009000); /* txp r0.w, a[tex0], t[0] */
798	PUSH_DATAs(push, 0x1c9dc801);
799	PUSH_DATAs(push, 0x0001c800);
800	PUSH_DATAs(push, 0x3fe1c800);
801	PUSH_DATAs(push, 0x01401e81); /* mov r0, r0.w */
802	PUSH_DATAs(push, 0x1c9dfe00);
803	PUSH_DATAs(push, 0x0001c800);
804	PUSH_DATAs(push, 0x0001c800);
805
806	PUSH_DATAu(push, pNv->scratch, PFP_C, 3 * 4);
807	PUSH_DATAs(push, 0x1802b102); /* txpc0 r1.w, a[tex1], t[1] */
808	PUSH_DATAs(push, 0x1c9dc801);
809	PUSH_DATAs(push, 0x0001c800);
810	PUSH_DATAs(push, 0x3fe1c800);
811	PUSH_DATAs(push, 0x18009e00); /* txp r0 (ne0.w), a[tex0], t[0] */
812	PUSH_DATAs(push, 0x1ff5c801);
813	PUSH_DATAs(push, 0x0001c800);
814	PUSH_DATAs(push, 0x3fe1c800);
815	PUSH_DATAs(push, 0x02001e81); /* mul r0, r0, r1.w */
816	PUSH_DATAs(push, 0x1c9dc800);
817	PUSH_DATAs(push, 0x0001fe04);
818	PUSH_DATAs(push, 0x0001c800);
819
820	PUSH_DATAu(push, pNv->scratch, PFP_C_A8, 3 * 4);
821	PUSH_DATAs(push, 0x1802b102); /* txpc0 r1.w, a[tex1], t[1] */
822	PUSH_DATAs(push, 0x1c9dc801);
823	PUSH_DATAs(push, 0x0001c800);
824	PUSH_DATAs(push, 0x3fe1c800);
825	PUSH_DATAs(push, 0x18009000); /* txp r0.w (ne0.w), a[tex0], t[0] */
826	PUSH_DATAs(push, 0x1ff5c801);
827	PUSH_DATAs(push, 0x0001c800);
828	PUSH_DATAs(push, 0x3fe1c800);
829	PUSH_DATAs(push, 0x02001e81); /* mul r0, r0.w, r1.w */
830	PUSH_DATAs(push, 0x1c9dfe00);
831	PUSH_DATAs(push, 0x0001fe04);
832	PUSH_DATAs(push, 0x0001c800);
833
834	PUSH_DATAu(push, pNv->scratch, PFP_CCA, 3 * 4);
835	PUSH_DATAs(push, 0x18009f00); /* txpc0 r0, a[tex0], t[0] */
836	PUSH_DATAs(push, 0x1c9dc801);
837	PUSH_DATAs(push, 0x0001c800);
838	PUSH_DATAs(push, 0x3fe1c800);
839	PUSH_DATAs(push, 0x1802be02); /* txp r1 (ne0), a[tex1], t[1] */
840	PUSH_DATAs(push, 0x1c95c801);
841	PUSH_DATAs(push, 0x0001c800);
842	PUSH_DATAs(push, 0x3fe1c800);
843	PUSH_DATAs(push, 0x02001e81); /* mul r0, r0, r1 */
844	PUSH_DATAs(push, 0x1c9dc800);
845	PUSH_DATAs(push, 0x0001c804);
846	PUSH_DATAs(push, 0x0001c800);
847
848	PUSH_DATAu(push, pNv->scratch, PFP_CCASA, 3 * 4);
849	PUSH_DATAs(push, 0x18009102); /* txpc0 r1.w, a[tex0], t[0] */
850	PUSH_DATAs(push, 0x1c9dc801);
851	PUSH_DATAs(push, 0x0001c800);
852	PUSH_DATAs(push, 0x3fe1c800);
853	PUSH_DATAs(push, 0x1802be00); /* txp r0 (ne0.w), a[tex1], t[1] */
854	PUSH_DATAs(push, 0x1ff5c801);
855	PUSH_DATAs(push, 0x0001c800);
856	PUSH_DATAs(push, 0x3fe1c800);
857	PUSH_DATAs(push, 0x02001e81); /* mul r0, r1.w, r0 */
858	PUSH_DATAs(push, 0x1c9dfe04);
859	PUSH_DATAs(push, 0x0001c800);
860	PUSH_DATAs(push, 0x0001c800);
861
862	PUSH_DATAu(push, pNv->scratch, PFP_NV12_BILINEAR, 8 * 4);
863	PUSH_DATAs(push, 0x17028200); /* texr r0.x, a[tex0], t[1] */
864	PUSH_DATAs(push, 0x1c9dc801);
865	PUSH_DATAs(push, 0x0001c800);
866	PUSH_DATAs(push, 0x3fe1c800);
867	PUSH_DATAs(push, 0x04000e02); /* madr r1.xyz, r0.x, imm.x, imm.yzww */
868	PUSH_DATAs(push, 0x1c9c0000);
869	PUSH_DATAs(push, 0x00000002);
870	PUSH_DATAs(push, 0x0001f202);
871	PUSH_DATAs(push, 0x3f9507c8); /* { 1.16, -0.87, 0.53, -1.08 } */
872	PUSH_DATAs(push, 0xbf5ee393);
873	PUSH_DATAs(push, 0x3f078fef);
874	PUSH_DATAs(push, 0xbf8a6762);
875	PUSH_DATAs(push, 0x1704ac80); /* texr r0.yz, a[tex1], t[2] */
876	PUSH_DATAs(push, 0x1c9dc801);
877	PUSH_DATAs(push, 0x0001c800);
878	PUSH_DATAs(push, 0x3fe1c800);
879	PUSH_DATAs(push, 0x04000e02); /* madr r1.xyz, r0.y, imm, r1 */
880	PUSH_DATAs(push, 0x1c9cab00);
881	PUSH_DATAs(push, 0x0001c802);
882	PUSH_DATAs(push, 0x0001c804);
883	PUSH_DATAs(push, 0x00000000); /* { 0.00, -0.39, 2.02, 0.00 } */
884	PUSH_DATAs(push, 0xbec890d6);
885	PUSH_DATAs(push, 0x40011687);
886	PUSH_DATAs(push, 0x00000000);
887	PUSH_DATAs(push, 0x04000e81); /* madr r0.xyz, r0.z, imm, r1 */
888	PUSH_DATAs(push, 0x1c9d5500);
889	PUSH_DATAs(push, 0x0001c802);
890	PUSH_DATAs(push, 0x0001c804);
891	PUSH_DATAs(push, 0x3fcc432d); /* { 1.60, -0.81, 0.00, 0.00 } */
892	PUSH_DATAs(push, 0xbf501a37);
893	PUSH_DATAs(push, 0x00000000);
894	PUSH_DATAs(push, 0x00000000);
895
896
897	PUSH_DATAu(push, pNv->scratch, PFP_NV12_BICUBIC, 29 * 4);
898	PUSH_DATAs(push, 0x01008600); /* movr r0.xy, a[tex0] */
899	PUSH_DATAs(push, 0x1c9dc801);
900	PUSH_DATAs(push, 0x0001c800);
901	PUSH_DATAs(push, 0x3fe1c800);
902	PUSH_DATAs(push, 0x03000800); /* addr r0.z, r0.y, imm.x */
903	PUSH_DATAs(push, 0x1c9caa00);
904	PUSH_DATAs(push, 0x00000002);
905	PUSH_DATAs(push, 0x0001c800);
906	PUSH_DATAs(push, 0x3f000000); /* { 0.50, 0.00, 0.00, 0.00 } */
907	PUSH_DATAs(push, 0x00000000);
908	PUSH_DATAs(push, 0x00000000);
909	PUSH_DATAs(push, 0x00000000);
910	PUSH_DATAs(push, 0x03000202); /* addr r1.x, r0, imm.x */
911	PUSH_DATAs(push, 0x1c9dc800);
912	PUSH_DATAs(push, 0x00000002);
913	PUSH_DATAs(push, 0x0001c800);
914	PUSH_DATAs(push, 0x3f000000); /* { 0.50, 0.00, 0.00, 0.00 } */
915	PUSH_DATAs(push, 0x00000000);
916	PUSH_DATAs(push, 0x00000000);
917	PUSH_DATAs(push, 0x00000000);
918	PUSH_DATAs(push, 0x17000f82); /* texrc0 r1.xyz, r0.z, t[0] */
919	PUSH_DATAs(push, 0x1c9d5400);
920	PUSH_DATAs(push, 0x0001c800);
921	PUSH_DATAs(push, 0x0001c800);
922	PUSH_DATAs(push, 0x02001404); /* mulr r2.yw, r1.xxyy, imm.xxyy */
923	PUSH_DATAs(push, 0x1c9ca104);
924	PUSH_DATAs(push, 0x0000a002);
925	PUSH_DATAs(push, 0x0001c800);
926	PUSH_DATAs(push, 0xbf800000); /* { -1.00, 1.00, 0.00, 0.00 } */
927	PUSH_DATAs(push, 0x3f800000);
928	PUSH_DATAs(push, 0x00000000);
929	PUSH_DATAs(push, 0x00000000);
930	PUSH_DATAs(push, 0x17000e86); /* texr r3.xyz, r1, t[0] */
931	PUSH_DATAs(push, 0x1c9dc804);
932	PUSH_DATAs(push, 0x0001c800);
933	PUSH_DATAs(push, 0x0001c800);
934	PUSH_DATAs(push, 0x02000a04); /* mulr r2.xz, r3.xxyy, imm.xxyy */
935	PUSH_DATAs(push, 0x1c9ca10c);
936	PUSH_DATAs(push, 0x0000a002);
937	PUSH_DATAs(push, 0x0001c800);
938	PUSH_DATAs(push, 0xbf800000); /* { -1.00, 1.00, 0.00, 0.00 } */
939	PUSH_DATAs(push, 0x3f800000);
940	PUSH_DATAs(push, 0x00000000);
941	PUSH_DATAs(push, 0x00000000);
942	PUSH_DATAs(push, 0x03001e04); /* addr r2, r0.xyxy, r2 */
943	PUSH_DATAs(push, 0x1c9c8800);
944	PUSH_DATAs(push, 0x0001c808);
945	PUSH_DATAs(push, 0x0001c800);
946	PUSH_DATAs(push, 0x17020402); /* texr r1.y, r2.zwzz, -t[1] */
947	PUSH_DATAs(push, 0x1c9d5c08);
948	PUSH_DATAs(push, 0x0001c800);
949	PUSH_DATAs(push, 0x0001c800);
950	PUSH_DATAs(push, 0x04400282); /* madh r1.x, -r1.z, r1.y, r1.y */
951	PUSH_DATAs(push, 0x1c9f5504);
952	PUSH_DATAs(push, 0x0000aa04);
953	PUSH_DATAs(push, 0x0000aa04);
954	PUSH_DATAs(push, 0x17020400); /* texr r0.y, r2.xwxw, -t[1] */
955	PUSH_DATAs(push, 0x1c9d9808);
956	PUSH_DATAs(push, 0x0001c800);
957	PUSH_DATAs(push, 0x0001c800);
958	PUSH_DATAs(push, 0x04401080); /* madh r0.w, -r1.z, r0.y, r0.y */
959	PUSH_DATAs(push, 0x1c9f5504);
960	PUSH_DATAs(push, 0x0000aa00);
961	PUSH_DATAs(push, 0x0000aa00);
962	PUSH_DATAs(push, 0x17020200); /* texr r0.x, r2.zyxy, t[1] */
963	PUSH_DATAs(push, 0x1c9c8c08);
964	PUSH_DATAs(push, 0x0001c800);
965	PUSH_DATAs(push, 0x0001c800);
966	PUSH_DATAs(push, 0x04400282); /* madh r1.x, r1.z, r0, r1 */
967	PUSH_DATAs(push, 0x1c9d5504);
968	PUSH_DATAs(push, 0x0001c800);
969	PUSH_DATAs(push, 0x0001c904);
970	PUSH_DATAs(push, 0x17020200); /* texr r0.x (NE0.z), r2, t[1] */
971	PUSH_DATAs(push, 0x1555c808);
972	PUSH_DATAs(push, 0x0001c800);
973	PUSH_DATAs(push, 0x0001c800);
974	PUSH_DATAs(push, 0x04400280); /* madh r0.x, r1.z, r0, r0.w */
975	PUSH_DATAs(push, 0x1c9d5504);
976	PUSH_DATAs(push, 0x0001c800);
977	PUSH_DATAs(push, 0x0001ff00);
978	PUSH_DATAs(push, 0x04401080); /* madh r0.w, -r3.z, r1.x, r1.x */
979	PUSH_DATAs(push, 0x1c9f550c);
980	PUSH_DATAs(push, 0x00000104);
981	PUSH_DATAs(push, 0x00000104);
982	PUSH_DATAs(push, 0x1704ac80); /* texr r0.yz, a[tex1], t[2] */
983	PUSH_DATAs(push, 0x1c9dc801);
984	PUSH_DATAs(push, 0x0001c800);
985	PUSH_DATAs(push, 0x3fe1c800);
986	PUSH_DATAs(push, 0x04400280); /* madh r0.x, r3.z, r0, r0.w */
987	PUSH_DATAs(push, 0x1c9d550c);
988	PUSH_DATAs(push, 0x0001c900);
989	PUSH_DATAs(push, 0x0001ff00);
990	PUSH_DATAs(push, 0x04400e82); /* madh r1.xyz, r0.x, imm.x, imm.yzww */
991	PUSH_DATAs(push, 0x1c9c0100);
992	PUSH_DATAs(push, 0x00000002);
993	PUSH_DATAs(push, 0x0001f202);
994	PUSH_DATAs(push, 0x3f9507c8); /* { 1.16, -0.87, 0.53, -1.08 } */
995	PUSH_DATAs(push, 0xbf5ee393);
996	PUSH_DATAs(push, 0x3f078fef);
997	PUSH_DATAs(push, 0xbf8a6762);
998	PUSH_DATAs(push, 0x04400e82); /* madh r1.xyz, r0.y, imm, r1 */
999	PUSH_DATAs(push, 0x1c9cab00);
1000	PUSH_DATAs(push, 0x0001c802);
1001	PUSH_DATAs(push, 0x0001c904);
1002	PUSH_DATAs(push, 0x00000000); /* { 0.00, -0.39, 2.02, 0.00 } */
1003	PUSH_DATAs(push, 0xbec890d6);
1004	PUSH_DATAs(push, 0x40011687);
1005	PUSH_DATAs(push, 0x00000000);
1006	PUSH_DATAs(push, 0x04400e81); /* madh r0.xyz, r0.z, imm, r1 */
1007	PUSH_DATAs(push, 0x1c9d5500);
1008	PUSH_DATAs(push, 0x0001c802);
1009	PUSH_DATAs(push, 0x0001c904);
1010	PUSH_DATAs(push, 0x3fcc432d); /* { 1.60, -0.81, 0.00, 0.00 } */
1011	PUSH_DATAs(push, 0xbf501a37);
1012	PUSH_DATAs(push, 0x00000000);
1013	PUSH_DATAs(push, 0x00000000);
1014	return TRUE;
1015}
1016