radeon_textured_videofuncs.c revision 7821949a
1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Alex Deucher
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg *
24de2362d3Smrg * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al.
25de2362d3Smrg *
26de2362d3Smrg */
27de2362d3Smrg
287821949aSmrg#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
297821949aSmrg#error Cannot define both MMIO and CP acceleration!
307821949aSmrg#endif
317821949aSmrg
327821949aSmrg#if !defined(UNIXCPP) || defined(ANSICPP)
337821949aSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
347821949aSmrg#else
357821949aSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
367821949aSmrg#endif
377821949aSmrg
387821949aSmrg#ifdef ACCEL_MMIO
397821949aSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
407821949aSmrg#else
417821949aSmrg#ifdef ACCEL_CP
427821949aSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
437821949aSmrg#else
447821949aSmrg#error No accel type defined!
457821949aSmrg#endif
467821949aSmrg#endif
477821949aSmrg
487821949aSmrg#ifdef ACCEL_CP
497821949aSmrg
50de2362d3Smrg#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
51de2362d3Smrgdo {								\
527821949aSmrg    OUT_RING_F(_dstX);						\
537821949aSmrg    OUT_RING_F(_dstY);						\
547821949aSmrg    OUT_RING_F(_srcX);						\
557821949aSmrg    OUT_RING_F(_srcY);						\
567821949aSmrg    OUT_RING_F(_maskX);						\
577821949aSmrg    OUT_RING_F(_maskY);						\
58de2362d3Smrg} while (0)
59de2362d3Smrg
60de2362d3Smrg#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY)			\
61de2362d3Smrgdo {								\
627821949aSmrg    OUT_RING_F(_dstX);						\
637821949aSmrg    OUT_RING_F(_dstY);						\
647821949aSmrg    OUT_RING_F(_srcX);						\
657821949aSmrg    OUT_RING_F(_srcY);						\
66de2362d3Smrg} while (0)
67de2362d3Smrg
687821949aSmrg#else /* ACCEL_CP */
697821949aSmrg
707821949aSmrg#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)		\
717821949aSmrgdo {									\
727821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);			\
737821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);			\
747821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);			\
757821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);			\
767821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);			\
777821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);			\
787821949aSmrg} while (0)
797821949aSmrg
807821949aSmrg#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY)			\
817821949aSmrgdo {								\
827821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
837821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
847821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
857821949aSmrg    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
867821949aSmrg} while (0)
877821949aSmrg
887821949aSmrg#endif /* !ACCEL_CP */
89de2362d3Smrg
90de2362d3Smrgstatic Bool
917821949aSmrgFUNC_NAME(RADEONPrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
92de2362d3Smrg{
93de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
94de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
95de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
96de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
977821949aSmrg    uint32_t txformat, txsize, txpitch, txoffset;
98de2362d3Smrg    uint32_t dst_pitch, dst_format;
99de2362d3Smrg    uint32_t colorpitch;
100de2362d3Smrg    int pixel_shift;
1017821949aSmrg    int scissor_w = MIN(pPixmap->drawable.width, 2047);
1027821949aSmrg    int scissor_h = MIN(pPixmap->drawable.height, 2047);
1037821949aSmrg    ACCEL_PREAMBLE();
104de2362d3Smrg
1057821949aSmrg#ifdef XF86DRM_MODE
1067821949aSmrg    if (info->cs) {
1077821949aSmrg	int ret;
108de2362d3Smrg
1097821949aSmrg	radeon_cs_space_reset_bos(info->cs);
1107821949aSmrg        radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
111de2362d3Smrg
1127821949aSmrg	if (pPriv->bicubic_enabled)
1137821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
114de2362d3Smrg
1157821949aSmrg	driver_priv = exaGetPixmapDriverPrivate(pPixmap);
1167821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
1177821949aSmrg
1187821949aSmrg	ret = radeon_cs_space_check(info->cs);
1197821949aSmrg	if (ret) {
1207821949aSmrg	    ErrorF("Not enough RAM to hw accel xv operation\n");
1217821949aSmrg	    return FALSE;
1227821949aSmrg	}
123de2362d3Smrg    }
1247821949aSmrg#else
1257821949aSmrg    (void)src_bo;
1267821949aSmrg#endif
127de2362d3Smrg
128de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
129de2362d3Smrg
1307821949aSmrg
1317821949aSmrg#ifdef USE_EXA
1327821949aSmrg    if (info->useEXA) {
1337821949aSmrg	dst_pitch = exaGetPixmapPitch(pPixmap);
1347821949aSmrg    } else
1357821949aSmrg#endif
1367821949aSmrg    {
1377821949aSmrg        dst_pitch = pPixmap->devKind;
1387821949aSmrg    }
1397821949aSmrg
1407821949aSmrg#ifdef USE_EXA
1417821949aSmrg    if (info->useEXA) {
1427821949aSmrg	RADEON_SWITCH_TO_3D();
1437821949aSmrg    } else
1447821949aSmrg#endif
1457821949aSmrg    {
1467821949aSmrg	BEGIN_ACCEL(2);
1477821949aSmrg	OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
1487821949aSmrg	/* We must wait for 3d to idle, in case source was just written as a dest. */
1497821949aSmrg	OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
1507821949aSmrg		      RADEON_WAIT_HOST_IDLECLEAN |
1517821949aSmrg		      RADEON_WAIT_2D_IDLECLEAN |
1527821949aSmrg		      RADEON_WAIT_3D_IDLECLEAN |
1537821949aSmrg		      RADEON_WAIT_DMA_GUI_IDLE);
1547821949aSmrg	FINISH_ACCEL();
1557821949aSmrg
1567821949aSmrg	if (!info->accel_state->XInited3D)
1577821949aSmrg	    RADEONInit3DEngine(pScrn);
1587821949aSmrg    }
159de2362d3Smrg
160de2362d3Smrg    /* Same for R100/R200 */
161de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
162de2362d3Smrg    case 16:
163de2362d3Smrg	if (pPixmap->drawable.depth == 15)
164de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
165de2362d3Smrg	else
166de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_RGB565;
167de2362d3Smrg	break;
168de2362d3Smrg    case 32:
169de2362d3Smrg	dst_format = RADEON_COLOR_FORMAT_ARGB8888;
170de2362d3Smrg	break;
171de2362d3Smrg    default:
172de2362d3Smrg	return FALSE;
173de2362d3Smrg    }
174de2362d3Smrg
175de2362d3Smrg    if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) {
176de2362d3Smrg	pPriv->is_planar = TRUE;
177de2362d3Smrg	txformat = RADEON_TXFORMAT_Y8;
178de2362d3Smrg    } else {
179de2362d3Smrg	pPriv->is_planar = FALSE;
180de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
181de2362d3Smrg	    txformat = RADEON_TXFORMAT_YVYU422;
182de2362d3Smrg	else
183de2362d3Smrg	    txformat = RADEON_TXFORMAT_VYUY422;
184de2362d3Smrg    }
185de2362d3Smrg
186de2362d3Smrg    txformat |= RADEON_TXFORMAT_NON_POWER2;
187de2362d3Smrg
188de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
189de2362d3Smrg
190de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
191de2362d3Smrg	colorpitch |= RADEON_COLOR_TILE_ENABLE;
192de2362d3Smrg
1937821949aSmrg    txoffset = info->cs ? 0 : pPriv->src_offset;
1947821949aSmrg
195de2362d3Smrg    BEGIN_ACCEL_RELOC(4,2);
196de2362d3Smrg
1977821949aSmrg    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format);
198de2362d3Smrg    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap);
199de2362d3Smrg    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap);
2007821949aSmrg    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
201de2362d3Smrg		  RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
202de2362d3Smrg
2037821949aSmrg    FINISH_ACCEL();
204de2362d3Smrg
205de2362d3Smrg    if (pPriv->is_planar) {
206de2362d3Smrg	/* need 2 texcoord sets (even though they are identical) due
207de2362d3Smrg	   to denormalization! hw apparently can't premultiply
208de2362d3Smrg	   same coord set by different texture size */
209de2362d3Smrg	pPriv->vtx_count = 6;
210de2362d3Smrg
211de2362d3Smrg	txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
212de2362d3Smrg		  (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
213de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
214de2362d3Smrg	txpitch -= 32;
215de2362d3Smrg
216de2362d3Smrg	BEGIN_ACCEL_RELOC(23, 3);
217de2362d3Smrg
2187821949aSmrg	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
219de2362d3Smrg					  RADEON_SE_VTX_FMT_ST0 |
220de2362d3Smrg					  RADEON_SE_VTX_FMT_ST1));
221de2362d3Smrg
2227821949aSmrg	OUT_ACCEL_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE |
223de2362d3Smrg				       RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
224de2362d3Smrg				       RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE |
225de2362d3Smrg				       RADEON_PLANAR_YUV_ENABLE));
226de2362d3Smrg
227de2362d3Smrg	/* Y */
2287821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
229de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
230de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
231de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
232de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST |
233de2362d3Smrg		      RADEON_YUV_TO_RGB);
2347821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
2357821949aSmrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo);
2367821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
237de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
238de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
239de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
240de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
241de2362d3Smrg		      RADEON_CLAMP_TX);
2427821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
243de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
244de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
245de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
246de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
247de2362d3Smrg		      RADEON_CLAMP_TX);
248de2362d3Smrg
2497821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
250de2362d3Smrg		      (pPriv->w - 1) |
251de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
2527821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
253de2362d3Smrg		      pPriv->src_pitch - 32);
254de2362d3Smrg
255de2362d3Smrg	/* U */
2567821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1,
257de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
258de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
259de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
260de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST);
2617821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
2627821949aSmrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
2637821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1,
264de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
265de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
266de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
267de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
268de2362d3Smrg		      RADEON_CLAMP_TX);
2697821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXABLEND_1,
270de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
271de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
272de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
273de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
274de2362d3Smrg		      RADEON_CLAMP_TX);
275de2362d3Smrg
2767821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, txsize);
2777821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch);
278de2362d3Smrg
279de2362d3Smrg	/* V */
2807821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFILTER_2,
281de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
282de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
283de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
284de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST);
2857821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
2867821949aSmrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo);
2877821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2,
288de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
289de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
290de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
291de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
292de2362d3Smrg		      RADEON_CLAMP_TX);
2937821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXABLEND_2,
294de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
295de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
296de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
297de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
298de2362d3Smrg		      RADEON_CLAMP_TX);
299de2362d3Smrg
3007821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_2, txsize);
3017821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_2, txpitch);
3027821949aSmrg	FINISH_ACCEL();
303de2362d3Smrg    } else {
304de2362d3Smrg	pPriv->vtx_count = 4;
305de2362d3Smrg	BEGIN_ACCEL_RELOC(9, 1);
306de2362d3Smrg
3077821949aSmrg	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
308de2362d3Smrg					  RADEON_SE_VTX_FMT_ST0));
309de2362d3Smrg
3107821949aSmrg	OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
311de2362d3Smrg
3127821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
313de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
314de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
315de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
316de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST |
317de2362d3Smrg		      RADEON_YUV_TO_RGB);
3187821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
3197821949aSmrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo);
3207821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
321de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
322de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
323de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
324de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
325de2362d3Smrg		      RADEON_CLAMP_TX);
3267821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
327de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
328de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
329de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
330de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
331de2362d3Smrg		      RADEON_CLAMP_TX);
332de2362d3Smrg
3337821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
334de2362d3Smrg		      (pPriv->w - 1) |
335de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
3367821949aSmrg	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
337de2362d3Smrg		      pPriv->src_pitch - 32);
3387821949aSmrg	FINISH_ACCEL();
339de2362d3Smrg    }
340de2362d3Smrg
3417821949aSmrg    BEGIN_ACCEL(2);
3427821949aSmrg    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
3437821949aSmrg    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) |
344de2362d3Smrg					   (scissor_h << RADEON_RE_HEIGHT_SHIFT)));
3457821949aSmrg    FINISH_ACCEL();
346de2362d3Smrg
347de2362d3Smrg    if (pPriv->vsync) {
348de2362d3Smrg	xf86CrtcPtr crtc;
349de2362d3Smrg	if (pPriv->desired_crtc)
350de2362d3Smrg	    crtc = pPriv->desired_crtc;
351de2362d3Smrg	else
3527821949aSmrg	    crtc = radeon_pick_best_crtc(pScrn,
353de2362d3Smrg					 pPriv->drw_x,
354de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
355de2362d3Smrg					 pPriv->drw_y,
356de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
357de2362d3Smrg	if (crtc)
3587821949aSmrg	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
3597821949aSmrg					  crtc,
3607821949aSmrg					  pPriv->drw_y - crtc->y,
3617821949aSmrg					  (pPriv->drw_y - crtc->y) + pPriv->dst_h);
362de2362d3Smrg    }
363de2362d3Smrg
364de2362d3Smrg    return TRUE;
365de2362d3Smrg}
366de2362d3Smrg
367de2362d3Smrgstatic void
3687821949aSmrgFUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
369de2362d3Smrg{
370de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
371de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
372de2362d3Smrg    int dstxoff, dstyoff;
373de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
374de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
3757821949aSmrg    ACCEL_PREAMBLE();
376de2362d3Smrg
377de2362d3Smrg#ifdef COMPOSITE
378de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
379de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
380de2362d3Smrg#else
381de2362d3Smrg    dstxoff = 0;
382de2362d3Smrg    dstyoff = 0;
383de2362d3Smrg#endif
384de2362d3Smrg
3857821949aSmrg    if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv))
386de2362d3Smrg	return;
387de2362d3Smrg
388de2362d3Smrg    /*
389de2362d3Smrg     * Rendering of the actual polygon is done in two different
390de2362d3Smrg     * ways depending on chip generation:
391de2362d3Smrg     *
392de2362d3Smrg     * < R300:
393de2362d3Smrg     *
394de2362d3Smrg     *     These chips can render a rectangle in one pass, so
395de2362d3Smrg     *     handling is pretty straight-forward.
396de2362d3Smrg     *
397de2362d3Smrg     * >= R300:
398de2362d3Smrg     *
399de2362d3Smrg     *     These chips can accept a quad, but will render it as
400de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
401de2362d3Smrg     *     We render a single, large triangle and use the scissor
402de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
403de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
404de2362d3Smrg     *     the single triangle up to 2560/4021 pixels; above that we
405de2362d3Smrg     *     render as a quad.
406de2362d3Smrg     */
4077821949aSmrg#ifdef ACCEL_CP
408de2362d3Smrg    while (nBox) {
409de2362d3Smrg	int draw_size = 3 * pPriv->vtx_count + 5;
410de2362d3Smrg	int loop_boxes;
411de2362d3Smrg
412de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
4137821949aSmrg	    if (info->cs)
4147821949aSmrg		radeon_cs_flush_indirect(pScrn);
4157821949aSmrg	    else
4167821949aSmrg		RADEONCPFlushIndirect(pScrn, 1);
4177821949aSmrg	    if (!FUNC_NAME(RADEONPrepareTexturedVideo)(pScrn, pPriv))
418de2362d3Smrg		return;
419de2362d3Smrg	}
420de2362d3Smrg	loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox);
421de2362d3Smrg	nBox -= loop_boxes;
422de2362d3Smrg
423de2362d3Smrg	BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5);
424de2362d3Smrg	OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
425de2362d3Smrg			    loop_boxes * 3 * pPriv->vtx_count + 1));
426de2362d3Smrg	if (pPriv->is_planar)
427de2362d3Smrg	    OUT_RING(RADEON_CP_VC_FRMT_XY |
428de2362d3Smrg		     RADEON_CP_VC_FRMT_ST0 |
429de2362d3Smrg		     RADEON_CP_VC_FRMT_ST1);
430de2362d3Smrg	else
431de2362d3Smrg	    OUT_RING(RADEON_CP_VC_FRMT_XY |
432de2362d3Smrg		     RADEON_CP_VC_FRMT_ST0);
433de2362d3Smrg	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
434de2362d3Smrg		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
435de2362d3Smrg		 RADEON_CP_VC_CNTL_MAOS_ENABLE |
436de2362d3Smrg		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
437de2362d3Smrg		 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
438de2362d3Smrg
439de2362d3Smrg	while (loop_boxes--) {
440de2362d3Smrg	    float srcX, srcY, srcw, srch;
441de2362d3Smrg	    int dstX, dstY, dstw, dsth;
442de2362d3Smrg	    dstX = pBox->x1 + dstxoff;
443de2362d3Smrg	    dstY = pBox->y1 + dstyoff;
444de2362d3Smrg	    dstw = pBox->x2 - pBox->x1;
445de2362d3Smrg	    dsth = pBox->y2 - pBox->y1;
446de2362d3Smrg
447de2362d3Smrg	    srcX = pPriv->src_x;
448de2362d3Smrg	    srcX += ((pBox->x1 - pPriv->drw_x) *
449de2362d3Smrg		     pPriv->src_w) / (float)pPriv->dst_w;
450de2362d3Smrg	    srcY = pPriv->src_y;
451de2362d3Smrg	    srcY += ((pBox->y1 - pPriv->drw_y) *
452de2362d3Smrg		     pPriv->src_h) / (float)pPriv->dst_h;
453de2362d3Smrg
454de2362d3Smrg	    srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
455de2362d3Smrg	    srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
456de2362d3Smrg
457de2362d3Smrg
458de2362d3Smrg	    if (pPriv->is_planar) {
459de2362d3Smrg		/*
460de2362d3Smrg		 * Just render a rect (using three coords).
461de2362d3Smrg		 */
462de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
463de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
464de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
465de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
466de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
467de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
468de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
469de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
470de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
471de2362d3Smrg	    } else {
472de2362d3Smrg		/*
473de2362d3Smrg		 * Just render a rect (using three coords).
474de2362d3Smrg		 */
475de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
476de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
477de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
478de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
479de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
480de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
481de2362d3Smrg	    }
482de2362d3Smrg
483de2362d3Smrg	    pBox++;
484de2362d3Smrg	}
485de2362d3Smrg
4867821949aSmrg	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
487de2362d3Smrg	ADVANCE_RING();
488de2362d3Smrg    }
4897821949aSmrg#else /* ACCEL_CP */
4907821949aSmrg    BEGIN_ACCEL(nBox * pPriv->vtx_count * 3 + 2);
4917821949aSmrg    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
4927821949aSmrg				      RADEON_VF_PRIM_WALK_DATA |
4937821949aSmrg				      RADEON_VF_RADEON_MODE |
4947821949aSmrg				      ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT)));
4957821949aSmrg    while (nBox--) {
4967821949aSmrg	float srcX, srcY, srcw, srch;
4977821949aSmrg	int dstX, dstY, dstw, dsth;
4987821949aSmrg	dstX = pBox->x1 + dstxoff;
4997821949aSmrg	dstY = pBox->y1 + dstyoff;
5007821949aSmrg	dstw = pBox->x2 - pBox->x1;
5017821949aSmrg	dsth = pBox->y2 - pBox->y1;
5027821949aSmrg
5037821949aSmrg	srcX = pPriv->src_x;
5047821949aSmrg	srcX += ((pBox->x1 - pPriv->drw_x) *
5057821949aSmrg		 pPriv->src_w) / (float)pPriv->dst_w;
5067821949aSmrg	srcY = pPriv->src_y;
5077821949aSmrg	srcY += ((pBox->y1 - pPriv->drw_y) *
5087821949aSmrg		 pPriv->src_h) / (float)pPriv->dst_h;
5097821949aSmrg
5107821949aSmrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
5117821949aSmrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
5127821949aSmrg
5137821949aSmrg
5147821949aSmrg	if (pPriv->is_planar) {
5157821949aSmrg	    /*
5167821949aSmrg	     * Just render a rect (using three coords).
5177821949aSmrg	     */
5187821949aSmrg	    VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
5197821949aSmrg		      (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
5207821949aSmrg		      (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
5217821949aSmrg	    VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
5227821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
5237821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
5247821949aSmrg	    VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
5257821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
5267821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
5277821949aSmrg	} else {
5287821949aSmrg	    /*
5297821949aSmrg	     * Just render a rect (using three coords).
5307821949aSmrg	     */
5317821949aSmrg	    VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
5327821949aSmrg		      (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
5337821949aSmrg	    VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
5347821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
5357821949aSmrg	    VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
5367821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
5377821949aSmrg	}
5387821949aSmrg
5397821949aSmrg	pBox++;
5407821949aSmrg    }
5417821949aSmrg
5427821949aSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
5437821949aSmrg    FINISH_ACCEL();
5447821949aSmrg#endif /* !ACCEL_CP */
5457821949aSmrg
546de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
547de2362d3Smrg}
548de2362d3Smrg
549de2362d3Smrgstatic Bool
5507821949aSmrgFUNC_NAME(R200PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
551de2362d3Smrg{
552de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
553de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
554de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
555de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
556de2362d3Smrg    uint32_t txformat;
5577821949aSmrg    uint32_t txfilter, txsize, txpitch, txoffset;
558de2362d3Smrg    uint32_t dst_pitch, dst_format;
559de2362d3Smrg    uint32_t colorpitch;
560de2362d3Smrg    int pixel_shift;
5617821949aSmrg    int scissor_w = MIN(pPixmap->drawable.width, 2047);
5627821949aSmrg    int scissor_h = MIN(pPixmap->drawable.height, 2047);
563de2362d3Smrg    /* note: in contrast to r300, use input biasing on uv components */
564de2362d3Smrg    const float Loff = -0.0627;
565de2362d3Smrg    float uvcosf, uvsinf;
566de2362d3Smrg    float yco, yoff;
567de2362d3Smrg    float uco[3], vco[3];
568de2362d3Smrg    float bright, cont, sat;
569de2362d3Smrg    int ref = pPriv->transform_index;
570de2362d3Smrg    float ucscale = 0.25, vcscale = 0.25;
571de2362d3Smrg    Bool needux8 = FALSE, needvx8 = FALSE;
5727821949aSmrg    ACCEL_PREAMBLE();
573de2362d3Smrg
5747821949aSmrg#ifdef XF86DRM_MODE
5757821949aSmrg    if (info->cs) {
5767821949aSmrg	int ret;
577de2362d3Smrg
5787821949aSmrg	radeon_cs_space_reset_bos(info->cs);
5797821949aSmrg        radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
5807821949aSmrg
5817821949aSmrg	if (pPriv->bicubic_enabled)
5827821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
583de2362d3Smrg
5847821949aSmrg	driver_priv = exaGetPixmapDriverPrivate(pPixmap);
5857821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
586de2362d3Smrg
5877821949aSmrg	ret = radeon_cs_space_check(info->cs);
5887821949aSmrg	if (ret) {
5897821949aSmrg	    ErrorF("Not enough RAM to hw accel xv operation\n");
5907821949aSmrg	    return FALSE;
5917821949aSmrg	}
592de2362d3Smrg    }
5937821949aSmrg#else
5947821949aSmrg    (void)src_bo;
5957821949aSmrg#endif
596de2362d3Smrg
597de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
598de2362d3Smrg
5997821949aSmrg#ifdef USE_EXA
6007821949aSmrg    if (info->useEXA) {
6017821949aSmrg	dst_pitch = exaGetPixmapPitch(pPixmap);
6027821949aSmrg    } else
6037821949aSmrg#endif
6047821949aSmrg    {
6057821949aSmrg	dst_pitch = pPixmap->devKind;
6067821949aSmrg    }
607de2362d3Smrg
6087821949aSmrg#ifdef USE_EXA
6097821949aSmrg    if (info->useEXA) {
6107821949aSmrg	RADEON_SWITCH_TO_3D();
6117821949aSmrg    } else
6127821949aSmrg#endif
6137821949aSmrg    {
6147821949aSmrg	BEGIN_ACCEL(2);
6157821949aSmrg	OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
6167821949aSmrg	/* We must wait for 3d to idle, in case source was just written as a dest. */
6177821949aSmrg	OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
6187821949aSmrg		      RADEON_WAIT_HOST_IDLECLEAN |
6197821949aSmrg		      RADEON_WAIT_2D_IDLECLEAN |
6207821949aSmrg		      RADEON_WAIT_3D_IDLECLEAN |
6217821949aSmrg		      RADEON_WAIT_DMA_GUI_IDLE);
6227821949aSmrg	FINISH_ACCEL();
6237821949aSmrg
6247821949aSmrg	if (!info->accel_state->XInited3D)
6257821949aSmrg	    RADEONInit3DEngine(pScrn);
6267821949aSmrg    }
627de2362d3Smrg
628de2362d3Smrg    /* Same for R100/R200 */
629de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
630de2362d3Smrg    case 16:
631de2362d3Smrg	if (pPixmap->drawable.depth == 15)
632de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
633de2362d3Smrg	else
634de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_RGB565;
635de2362d3Smrg	break;
636de2362d3Smrg    case 32:
637de2362d3Smrg	dst_format = RADEON_COLOR_FORMAT_ARGB8888;
638de2362d3Smrg	break;
639de2362d3Smrg    default:
640de2362d3Smrg	return FALSE;
641de2362d3Smrg    }
642de2362d3Smrg
643de2362d3Smrg    if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) {
644de2362d3Smrg	pPriv->is_planar = TRUE;
645de2362d3Smrg	txformat = RADEON_TXFORMAT_I8;
646de2362d3Smrg    } else {
647de2362d3Smrg	pPriv->is_planar = FALSE;
648de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
649de2362d3Smrg	    txformat = RADEON_TXFORMAT_YVYU422;
650de2362d3Smrg	else
651de2362d3Smrg	    txformat = RADEON_TXFORMAT_VYUY422;
652de2362d3Smrg    }
653de2362d3Smrg
654de2362d3Smrg    txformat |= RADEON_TXFORMAT_NON_POWER2;
655de2362d3Smrg
656de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
657de2362d3Smrg
658de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
659de2362d3Smrg	colorpitch |= RADEON_COLOR_TILE_ENABLE;
660de2362d3Smrg
661de2362d3Smrg    BEGIN_ACCEL_RELOC(4,2);
662de2362d3Smrg
6637821949aSmrg    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format);
664de2362d3Smrg    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap);
665de2362d3Smrg    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap);
666de2362d3Smrg
6677821949aSmrg    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
668de2362d3Smrg		  RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
669de2362d3Smrg
6707821949aSmrg    FINISH_ACCEL();
671de2362d3Smrg
672de2362d3Smrg    txfilter =  R200_MAG_FILTER_LINEAR |
673de2362d3Smrg	R200_MIN_FILTER_LINEAR |
674de2362d3Smrg	R200_CLAMP_S_CLAMP_LAST |
675de2362d3Smrg	R200_CLAMP_T_CLAMP_LAST;
676de2362d3Smrg
677de2362d3Smrg    /* contrast can cause constant overflow, clamp */
678de2362d3Smrg    cont = RTFContrast(pPriv->contrast);
679de2362d3Smrg    if (cont * trans[ref].RefLuma > 2.0)
680de2362d3Smrg	cont = 2.0 / trans[ref].RefLuma;
681de2362d3Smrg    /* brightness is only from -0.5 to 0.5 should be safe */
682de2362d3Smrg    bright = RTFBrightness(pPriv->brightness);
683de2362d3Smrg    /* saturation can also cause overflow, clamp */
684de2362d3Smrg    sat = RTFSaturation(pPriv->saturation);
685de2362d3Smrg    if (sat * trans[ref].RefBCb > 4.0)
686de2362d3Smrg	sat = 4.0 / trans[ref].RefBCb;
687de2362d3Smrg    uvcosf = sat * cos(RTFHue(pPriv->hue));
688de2362d3Smrg    uvsinf = sat * sin(RTFHue(pPriv->hue));
689de2362d3Smrg
690de2362d3Smrg    yco = trans[ref].RefLuma * cont;
691de2362d3Smrg    uco[0] = -trans[ref].RefRCr * uvsinf;
692de2362d3Smrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
693de2362d3Smrg    uco[2] = trans[ref].RefBCb * uvcosf;
694de2362d3Smrg    vco[0] = trans[ref].RefRCr * uvcosf;
695de2362d3Smrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
696de2362d3Smrg    vco[2] = trans[ref].RefBCb * uvsinf;
697de2362d3Smrg    yoff = Loff * yco + bright;
698de2362d3Smrg
699de2362d3Smrg    if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
700de2362d3Smrg	needux8 = TRUE;
701de2362d3Smrg	ucscale = 0.125;
702de2362d3Smrg    }
703de2362d3Smrg    if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
704de2362d3Smrg	needvx8 = TRUE;
705de2362d3Smrg	vcscale = 0.125;
706de2362d3Smrg    }
707de2362d3Smrg
7087821949aSmrg    txoffset = info->cs ? 0 : pPriv->src_offset;
7097821949aSmrg
710de2362d3Smrg    if (pPriv->is_planar) {
711de2362d3Smrg	/* need 2 texcoord sets (even though they are identical) due
712de2362d3Smrg	   to denormalization! hw apparently can't premultiply
713de2362d3Smrg	   same coord set by different texture size */
714de2362d3Smrg	pPriv->vtx_count = 6;
715de2362d3Smrg
716de2362d3Smrg	txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
717de2362d3Smrg		  (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
718de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
719de2362d3Smrg	txpitch -= 32;
720de2362d3Smrg
721de2362d3Smrg	BEGIN_ACCEL_RELOC(36, 3);
722de2362d3Smrg
7237821949aSmrg	OUT_ACCEL_REG(RADEON_PP_CNTL,
724de2362d3Smrg		      RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
725de2362d3Smrg		      RADEON_TEX_BLEND_0_ENABLE |
726de2362d3Smrg		      RADEON_TEX_BLEND_1_ENABLE |
727de2362d3Smrg		      RADEON_TEX_BLEND_2_ENABLE);
728de2362d3Smrg
7297821949aSmrg	OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
7307821949aSmrg	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
731de2362d3Smrg		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
732de2362d3Smrg		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
733de2362d3Smrg
7347821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
7357821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
7367821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
7377821949aSmrg	OUT_ACCEL_REG(R200_PP_TXSIZE_0,
738de2362d3Smrg		      (pPriv->w - 1) |
739de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
7407821949aSmrg	OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
7417821949aSmrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo);
7427821949aSmrg
7437821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
7447821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
7457821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
7467821949aSmrg	OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize);
7477821949aSmrg	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
7487821949aSmrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
7497821949aSmrg
7507821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
7517821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
7527821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
7537821949aSmrg	OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize);
7547821949aSmrg	OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
7557821949aSmrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo);
756de2362d3Smrg
757de2362d3Smrg	/* similar to r300 code. Note the big problem is that hardware constants
758de2362d3Smrg	 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
759de2362d3Smrg	 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
760de2362d3Smrg	 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
761de2362d3Smrg	 * the constants not. To get larger range can use output scale, but for
762de2362d3Smrg	 * that 2.018 value we need a total scale by 8, which means the constants
763de2362d3Smrg	 * really have no accuracy whatsoever (5 fractional bits only).
764de2362d3Smrg	 * The only direct way to get high  precision "constants" into the fragment
765de2362d3Smrg	 * pipe I know of is to use the texcoord interpolator (not color, this one
766de2362d3Smrg	 * is 8 bit only too), which seems a bit expensive. We're lucky though it
767de2362d3Smrg	 * seems the values we need seem to fit better than worst case (get about
768de2362d3Smrg	 * 6 fractional bits for this instead of 5, at least when not correcting for
769de2362d3Smrg	 * hue/saturation/contrast/brightness, which is the same as for vco - yco and
770de2362d3Smrg	 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
771de2362d3Smrg	 * even with non-default saturation/hue/contrast/brightness adjustments,
772de2362d3Smrg	 * it gets a little crazy and ultimately precision might still be lacking.
773de2362d3Smrg	 *
774de2362d3Smrg	 * A higher precision (8 fractional bits) version might just put uco into
775de2362d3Smrg	 * a texcoord, and calculate a new vcoconst in the shader, like so:
776de2362d3Smrg	 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
777de2362d3Smrg	 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
778de2362d3Smrg	 * vcocalc = ADD temp, bias/scale(cohelper), vco
779de2362d3Smrg	 * would in total use 4 tex units, 4 instructions which seems fairly
780de2362d3Smrg	 * balanced for this architecture (instead of 3 + 3 for the solution here)
781de2362d3Smrg	 *
782de2362d3Smrg	 * temp = MAD(yco, yuv.yyyy, yoff)
783de2362d3Smrg	 * temp = MAD(uco, yuv.uuuu, temp)
784de2362d3Smrg	 * result = MAD(vco, yuv.vvvv, temp)
785de2362d3Smrg	 *
786de2362d3Smrg	 * note first mad produces actually scalar, hence we transform
787de2362d3Smrg	 * it into a dp2a to get 8 bit precision of yco instead of 7 -
788de2362d3Smrg	 * That's assuming hw correctly expands consts to internal precision.
789de2362d3Smrg	 * (y * 1 + y * (yco - 1) + yoff)
790de2362d3Smrg	 * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
791de2362d3Smrg	 * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
792de2362d3Smrg	 * result = MAD x2 (vco / 2, yuv.vvvv, temp)
793de2362d3Smrg	 *
794de2362d3Smrg	 * vco, uco need bias (and hence scale too)
795de2362d3Smrg	 *
796de2362d3Smrg	 */
797de2362d3Smrg
798de2362d3Smrg	/* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
7997821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
800de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
801de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
802de2362d3Smrg		      R200_TXC_ARG_C_TFACTOR_COLOR |
803de2362d3Smrg		      (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
804de2362d3Smrg		      R200_TXC_OP_DOT2_ADD);
8057821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
806de2362d3Smrg		      (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
807de2362d3Smrg		      R200_TXC_SCALE_INV2 |
808de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
8097821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND_0,
810de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
811de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
812de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
813de2362d3Smrg		      R200_TXA_OP_MADD);
8147821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
815de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
816de2362d3Smrg
817de2362d3Smrg	/* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
8187821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
819de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
820de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
821de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
822de2362d3Smrg		      R200_TXC_ARG_B_R1_COLOR |
823de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
824de2362d3Smrg		      (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
825de2362d3Smrg		      R200_TXC_ARG_C_R0_COLOR |
826de2362d3Smrg		      R200_TXC_OP_MADD);
8277821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
828de2362d3Smrg		      (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
829de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
8307821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND_1,
831de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
832de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
833de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
834de2362d3Smrg		      R200_TXA_OP_MADD);
8357821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
836de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
837de2362d3Smrg
838de2362d3Smrg	/* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
8397821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
840de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
841de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
842de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
843de2362d3Smrg		      R200_TXC_ARG_B_R2_COLOR |
844de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
845de2362d3Smrg		      (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
846de2362d3Smrg		      R200_TXC_ARG_C_R0_COLOR |
847de2362d3Smrg		      R200_TXC_OP_MADD);
8487821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
849de2362d3Smrg		      (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
850de2362d3Smrg		      R200_TXC_SCALE_2X |
851de2362d3Smrg		      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
8527821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND_2,
853de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
854de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
855de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
856de2362d3Smrg		      R200_TXA_COMP_ARG_C |
857de2362d3Smrg		      R200_TXA_OP_MADD);
8587821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
859de2362d3Smrg		      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
860de2362d3Smrg
861de2362d3Smrg	/* shader constants */
8627821949aSmrg	OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
863de2362d3Smrg						      yco > 1.0 ? yco - 1.0: yco,
864de2362d3Smrg						      yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
865de2362d3Smrg						      0.0));
8667821949aSmrg	OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
867de2362d3Smrg						      uco[1] * ucscale + 0.5, /* or [-2, 2] */
868de2362d3Smrg						      uco[2] * ucscale + 0.5,
869de2362d3Smrg						      0.0));
8707821949aSmrg	OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
871de2362d3Smrg						      vco[1] * vcscale + 0.5, /* or [-4, 4] */
872de2362d3Smrg						      vco[2] * vcscale + 0.5,
873de2362d3Smrg						      0.0));
874de2362d3Smrg
8757821949aSmrg	FINISH_ACCEL();
876de2362d3Smrg    } else {
877de2362d3Smrg	pPriv->vtx_count = 4;
878de2362d3Smrg
879de2362d3Smrg	BEGIN_ACCEL_RELOC(24, 1);
880de2362d3Smrg
8817821949aSmrg	OUT_ACCEL_REG(RADEON_PP_CNTL,
882de2362d3Smrg		      RADEON_TEX_0_ENABLE |
883de2362d3Smrg		      RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
884de2362d3Smrg		      RADEON_TEX_BLEND_2_ENABLE);
885de2362d3Smrg
8867821949aSmrg	OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
8877821949aSmrg	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
888de2362d3Smrg		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
889de2362d3Smrg
8907821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
8917821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
8927821949aSmrg	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
8937821949aSmrg	OUT_ACCEL_REG(R200_PP_TXSIZE_0,
894de2362d3Smrg		      (pPriv->w - 1) |
895de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
8967821949aSmrg	OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
8977821949aSmrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo);
898de2362d3Smrg
899de2362d3Smrg	/* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
9007821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
901de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
902de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
903de2362d3Smrg		      R200_TXC_ARG_C_TFACTOR_COLOR |
904de2362d3Smrg		      (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
905de2362d3Smrg		      R200_TXC_OP_DOT2_ADD);
9067821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
907de2362d3Smrg		      (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
908de2362d3Smrg		      R200_TXC_SCALE_INV2 |
909de2362d3Smrg		      (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
910de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
9117821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND_0,
912de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
913de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
914de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
915de2362d3Smrg		      R200_TXA_OP_MADD);
9167821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
917de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
918de2362d3Smrg
919de2362d3Smrg	/* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
9207821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
921de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
922de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
923de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
924de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
925de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
926de2362d3Smrg		      (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
927de2362d3Smrg		      R200_TXC_ARG_C_R1_COLOR |
928de2362d3Smrg		      R200_TXC_OP_MADD);
9297821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
930de2362d3Smrg		      (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
931de2362d3Smrg		      (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
932de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
9337821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND_1,
934de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
935de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
936de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
937de2362d3Smrg		      R200_TXA_OP_MADD);
9387821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
939de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
940de2362d3Smrg
941de2362d3Smrg	/* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
9427821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
943de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
944de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
945de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
946de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
947de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
948de2362d3Smrg		      (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
949de2362d3Smrg		      R200_TXC_ARG_C_R1_COLOR |
950de2362d3Smrg		      R200_TXC_OP_MADD);
9517821949aSmrg	OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
952de2362d3Smrg		      (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
953de2362d3Smrg		      R200_TXC_SCALE_2X |
954de2362d3Smrg		      (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
955de2362d3Smrg		      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
9567821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND_2,
957de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
958de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
959de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
960de2362d3Smrg		      R200_TXA_COMP_ARG_C |
961de2362d3Smrg		      R200_TXA_OP_MADD);
9627821949aSmrg	OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
963de2362d3Smrg		      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
964de2362d3Smrg
965de2362d3Smrg	/* shader constants */
9667821949aSmrg	OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
967de2362d3Smrg						      yco > 1.0 ? yco - 1.0: yco,
968de2362d3Smrg						      yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
969de2362d3Smrg						      0.0));
9707821949aSmrg	OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
971de2362d3Smrg						      uco[1] * ucscale + 0.5, /* or [-2, 2] */
972de2362d3Smrg						      uco[2] * ucscale + 0.5,
973de2362d3Smrg						      0.0));
9747821949aSmrg	OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
975de2362d3Smrg						      vco[1] * vcscale + 0.5, /* or [-4, 4] */
976de2362d3Smrg						      vco[2] * vcscale + 0.5,
977de2362d3Smrg						      0.0));
978de2362d3Smrg
9797821949aSmrg	FINISH_ACCEL();
980de2362d3Smrg    }
981de2362d3Smrg
9827821949aSmrg    BEGIN_ACCEL(2);
9837821949aSmrg    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
9847821949aSmrg    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) |
985de2362d3Smrg					   (scissor_h << RADEON_RE_HEIGHT_SHIFT)));
9867821949aSmrg    FINISH_ACCEL();
987de2362d3Smrg
988de2362d3Smrg    if (pPriv->vsync) {
989de2362d3Smrg	xf86CrtcPtr crtc;
990de2362d3Smrg	if (pPriv->desired_crtc)
991de2362d3Smrg	    crtc = pPriv->desired_crtc;
992de2362d3Smrg	else
9937821949aSmrg	    crtc = radeon_pick_best_crtc(pScrn,
994de2362d3Smrg					 pPriv->drw_x,
995de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
996de2362d3Smrg					 pPriv->drw_y,
997de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
998de2362d3Smrg	if (crtc)
9997821949aSmrg	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
10007821949aSmrg					  crtc,
10017821949aSmrg					  pPriv->drw_y - crtc->y,
10027821949aSmrg					  (pPriv->drw_y - crtc->y) + pPriv->dst_h);
1003de2362d3Smrg    }
1004de2362d3Smrg
1005de2362d3Smrg    return TRUE;
1006de2362d3Smrg}
1007de2362d3Smrg
1008de2362d3Smrgstatic void
10097821949aSmrgFUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
1010de2362d3Smrg{
1011de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1012de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
1013de2362d3Smrg    int dstxoff, dstyoff;
1014de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
1015de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
10167821949aSmrg    ACCEL_PREAMBLE();
1017de2362d3Smrg
1018de2362d3Smrg#ifdef COMPOSITE
1019de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
1020de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
1021de2362d3Smrg#else
1022de2362d3Smrg    dstxoff = 0;
1023de2362d3Smrg    dstyoff = 0;
1024de2362d3Smrg#endif
1025de2362d3Smrg
10267821949aSmrg    if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv))
1027de2362d3Smrg	return;
1028de2362d3Smrg
1029de2362d3Smrg    /*
1030de2362d3Smrg     * Rendering of the actual polygon is done in two different
1031de2362d3Smrg     * ways depending on chip generation:
1032de2362d3Smrg     *
1033de2362d3Smrg     * < R300:
1034de2362d3Smrg     *
1035de2362d3Smrg     *     These chips can render a rectangle in one pass, so
1036de2362d3Smrg     *     handling is pretty straight-forward.
1037de2362d3Smrg     *
1038de2362d3Smrg     * >= R300:
1039de2362d3Smrg     *
1040de2362d3Smrg     *     These chips can accept a quad, but will render it as
1041de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
1042de2362d3Smrg     *     We render a single, large triangle and use the scissor
1043de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
1044de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
1045de2362d3Smrg     *     the single triangle up to 2560/4021 pixels; above that we
1046de2362d3Smrg     *     render as a quad.
1047de2362d3Smrg     */
1048de2362d3Smrg
10497821949aSmrg#ifdef ACCEL_CP
1050de2362d3Smrg    while (nBox) {
1051de2362d3Smrg	int draw_size = 3 * pPriv->vtx_count + 4;
1052de2362d3Smrg	int loop_boxes;
1053de2362d3Smrg
1054de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
10557821949aSmrg	    if (info->cs)
10567821949aSmrg		radeon_cs_flush_indirect(pScrn);
10577821949aSmrg	    else
10587821949aSmrg		RADEONCPFlushIndirect(pScrn, 1);
10597821949aSmrg	    if (!FUNC_NAME(R200PrepareTexturedVideo)(pScrn, pPriv))
1060de2362d3Smrg		return;
1061de2362d3Smrg	}
1062de2362d3Smrg	loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox);
1063de2362d3Smrg	nBox -= loop_boxes;
1064de2362d3Smrg
1065de2362d3Smrg	BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4);
1066de2362d3Smrg	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
1067de2362d3Smrg			    loop_boxes * 3 * pPriv->vtx_count));
1068de2362d3Smrg	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
1069de2362d3Smrg		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
1070de2362d3Smrg		 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
1071de2362d3Smrg
1072de2362d3Smrg	while (loop_boxes--) {
1073de2362d3Smrg	    float srcX, srcY, srcw, srch;
1074de2362d3Smrg	    int dstX, dstY, dstw, dsth;
1075de2362d3Smrg	    dstX = pBox->x1 + dstxoff;
1076de2362d3Smrg	    dstY = pBox->y1 + dstyoff;
1077de2362d3Smrg	    dstw = pBox->x2 - pBox->x1;
1078de2362d3Smrg	    dsth = pBox->y2 - pBox->y1;
1079de2362d3Smrg
1080de2362d3Smrg	    srcX = pPriv->src_x;
1081de2362d3Smrg	    srcX += ((pBox->x1 - pPriv->drw_x) *
1082de2362d3Smrg		     pPriv->src_w) / (float)pPriv->dst_w;
1083de2362d3Smrg	    srcY = pPriv->src_y;
1084de2362d3Smrg	    srcY += ((pBox->y1 - pPriv->drw_y) *
1085de2362d3Smrg		     pPriv->src_h) / (float)pPriv->dst_h;
1086de2362d3Smrg
1087de2362d3Smrg	    srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
1088de2362d3Smrg	    srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
1089de2362d3Smrg
1090de2362d3Smrg	    if (pPriv->is_planar) {
1091de2362d3Smrg		/*
1092de2362d3Smrg		 * Just render a rect (using three coords).
1093de2362d3Smrg		 */
1094de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
1095de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
1096de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
1097de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
1098de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
1099de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
1100de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
1101de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
1102de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
1103de2362d3Smrg	    } else {
1104de2362d3Smrg		/*
1105de2362d3Smrg		 * Just render a rect (using three coords).
1106de2362d3Smrg		 */
1107de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
1108de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
1109de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
1110de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
1111de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
1112de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
1113de2362d3Smrg	    }
1114de2362d3Smrg
1115de2362d3Smrg	    pBox++;
1116de2362d3Smrg	}
1117de2362d3Smrg
11187821949aSmrg	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1119de2362d3Smrg	ADVANCE_RING();
1120de2362d3Smrg    }
11217821949aSmrg#else /* ACCEL_CP */
11227821949aSmrg    BEGIN_ACCEL(nBox * 3 * pPriv->vtx_count + 2);
11237821949aSmrg    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
11247821949aSmrg				      RADEON_VF_PRIM_WALK_DATA |
11257821949aSmrg				      ((nBox * 3) << RADEON_VF_NUM_VERTICES_SHIFT)));
11267821949aSmrg    while (nBox--) {
11277821949aSmrg	float srcX, srcY, srcw, srch;
11287821949aSmrg	int dstX, dstY, dstw, dsth;
11297821949aSmrg	dstX = pBox->x1 + dstxoff;
11307821949aSmrg	dstY = pBox->y1 + dstyoff;
11317821949aSmrg	dstw = pBox->x2 - pBox->x1;
11327821949aSmrg	dsth = pBox->y2 - pBox->y1;
11337821949aSmrg
11347821949aSmrg	srcX = pPriv->src_x;
11357821949aSmrg	srcX += ((pBox->x1 - pPriv->drw_x) *
11367821949aSmrg		 pPriv->src_w) / (float)pPriv->dst_w;
11377821949aSmrg	srcY = pPriv->src_y;
11387821949aSmrg	srcY += ((pBox->y1 - pPriv->drw_y) *
11397821949aSmrg		 pPriv->src_h) / (float)pPriv->dst_h;
11407821949aSmrg
11417821949aSmrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
11427821949aSmrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
11437821949aSmrg
11447821949aSmrg	if (pPriv->is_planar) {
11457821949aSmrg	    /*
11467821949aSmrg	     * Just render a rect (using three coords).
11477821949aSmrg	     */
11487821949aSmrg	    VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
11497821949aSmrg		      (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
11507821949aSmrg		      (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
11517821949aSmrg	    VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
11527821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
11537821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
11547821949aSmrg	    VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
11557821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
11567821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
11577821949aSmrg	} else {
11587821949aSmrg	    /*
11597821949aSmrg	     * Just render a rect (using three coords).
11607821949aSmrg	     */
11617821949aSmrg	    VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
11627821949aSmrg		      (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
11637821949aSmrg	    VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
11647821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
11657821949aSmrg	    VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
11667821949aSmrg		      (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
11677821949aSmrg	}
11687821949aSmrg
11697821949aSmrg	pBox++;
11707821949aSmrg    }
11717821949aSmrg
11727821949aSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
11737821949aSmrg    FINISH_ACCEL();
11747821949aSmrg#endif /* !ACCEL_CP */
1175de2362d3Smrg
1176de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
1177de2362d3Smrg}
1178de2362d3Smrg
1179de2362d3Smrgstatic Bool
11807821949aSmrgFUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
1181de2362d3Smrg{
1182de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1183de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
1184de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
1185de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
11867821949aSmrg    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
1187de2362d3Smrg    uint32_t dst_pitch, dst_format;
11887821949aSmrg    uint32_t txenable, colorpitch, bicubic_offset;
1189de2362d3Smrg    uint32_t output_fmt;
1190de2362d3Smrg    int pixel_shift;
11917821949aSmrg    ACCEL_PREAMBLE();
1192de2362d3Smrg
11937821949aSmrg#ifdef XF86DRM_MODE
11947821949aSmrg    if (info->cs) {
11957821949aSmrg	int ret;
1196de2362d3Smrg
11977821949aSmrg	radeon_cs_space_reset_bos(info->cs);
11987821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
1199de2362d3Smrg
12007821949aSmrg	if (pPriv->bicubic_enabled)
12017821949aSmrg	  radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
1202de2362d3Smrg
12037821949aSmrg	driver_priv = exaGetPixmapDriverPrivate(pPixmap);
12047821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
12057821949aSmrg
12067821949aSmrg	ret = radeon_cs_space_check(info->cs);
12077821949aSmrg	if (ret) {
12087821949aSmrg	    ErrorF("Not enough RAM to hw accel xv operation\n");
12097821949aSmrg	    return FALSE;
12107821949aSmrg	}
1211de2362d3Smrg    }
12127821949aSmrg#else
12137821949aSmrg    (void)src_bo;
12147821949aSmrg#endif
1215de2362d3Smrg
1216de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
1217de2362d3Smrg
12187821949aSmrg#ifdef USE_EXA
12197821949aSmrg    if (info->useEXA) {
12207821949aSmrg	dst_pitch = exaGetPixmapPitch(pPixmap);
12217821949aSmrg    } else
12227821949aSmrg#endif
12237821949aSmrg    {
12247821949aSmrg	dst_pitch = pPixmap->devKind;
12257821949aSmrg    }
12267821949aSmrg
12277821949aSmrg#ifdef USE_EXA
12287821949aSmrg    if (info->useEXA) {
12297821949aSmrg	RADEON_SWITCH_TO_3D();
12307821949aSmrg    } else
12317821949aSmrg#endif
12327821949aSmrg    {
12337821949aSmrg	BEGIN_ACCEL(2);
12347821949aSmrg	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
12357821949aSmrg	/* We must wait for 3d to idle, in case source was just written as a dest. */
12367821949aSmrg	OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
12377821949aSmrg		      RADEON_WAIT_HOST_IDLECLEAN |
12387821949aSmrg		      RADEON_WAIT_2D_IDLECLEAN |
12397821949aSmrg		      RADEON_WAIT_3D_IDLECLEAN |
12407821949aSmrg		      RADEON_WAIT_DMA_GUI_IDLE);
12417821949aSmrg	FINISH_ACCEL();
12427821949aSmrg
12437821949aSmrg	if (!info->accel_state->XInited3D)
12447821949aSmrg	    RADEONInit3DEngine(pScrn);
12457821949aSmrg    }
1246de2362d3Smrg
1247de2362d3Smrg    if (pPriv->bicubic_enabled)
1248de2362d3Smrg	pPriv->vtx_count = 6;
1249de2362d3Smrg    else
1250de2362d3Smrg	pPriv->vtx_count = 4;
1251de2362d3Smrg
1252de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
1253de2362d3Smrg    case 16:
1254de2362d3Smrg	if (pPixmap->drawable.depth == 15)
1255de2362d3Smrg	    dst_format = R300_COLORFORMAT_ARGB1555;
1256de2362d3Smrg	else
1257de2362d3Smrg	    dst_format = R300_COLORFORMAT_RGB565;
1258de2362d3Smrg	break;
1259de2362d3Smrg    case 32:
1260de2362d3Smrg	dst_format = R300_COLORFORMAT_ARGB8888;
1261de2362d3Smrg	break;
1262de2362d3Smrg    default:
1263de2362d3Smrg	return FALSE;
1264de2362d3Smrg    }
1265de2362d3Smrg
1266de2362d3Smrg    output_fmt = (R300_OUT_FMT_C4_8 |
1267de2362d3Smrg		  R300_OUT_FMT_C0_SEL_BLUE |
1268de2362d3Smrg		  R300_OUT_FMT_C1_SEL_GREEN |
1269de2362d3Smrg		  R300_OUT_FMT_C2_SEL_RED |
1270de2362d3Smrg		  R300_OUT_FMT_C3_SEL_ALPHA);
1271de2362d3Smrg
1272de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
1273de2362d3Smrg    colorpitch |= dst_format;
1274de2362d3Smrg
1275de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
1276de2362d3Smrg	colorpitch |= R300_COLORTILE;
1277de2362d3Smrg
1278de2362d3Smrg
1279de2362d3Smrg    if (((pPriv->bicubic_state == BICUBIC_OFF)) &&
1280de2362d3Smrg	(pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12))
1281de2362d3Smrg	pPriv->is_planar = TRUE;
1282de2362d3Smrg    else
1283de2362d3Smrg	pPriv->is_planar = FALSE;
1284de2362d3Smrg
1285de2362d3Smrg    if (pPriv->is_planar) {
1286de2362d3Smrg	txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
1287de2362d3Smrg	txpitch = pPriv->src_pitch;
1288de2362d3Smrg    } else {
1289de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
1290de2362d3Smrg	    txformat1 = R300_TX_FORMAT_YVYU422;
1291de2362d3Smrg	else
1292de2362d3Smrg	    txformat1 = R300_TX_FORMAT_VYUY422;
1293de2362d3Smrg
1294de2362d3Smrg	if (pPriv->bicubic_state != BICUBIC_OFF)
1295de2362d3Smrg	    txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
1296de2362d3Smrg
1297de2362d3Smrg	/* pitch is in pixels */
1298de2362d3Smrg	txpitch = pPriv->src_pitch / 2;
1299de2362d3Smrg    }
1300de2362d3Smrg    txpitch -= 1;
1301de2362d3Smrg
1302de2362d3Smrg    txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1303de2362d3Smrg		 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
1304de2362d3Smrg		 R300_TXPITCH_EN);
1305de2362d3Smrg
1306de2362d3Smrg    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1307de2362d3Smrg		R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
1308de2362d3Smrg		R300_TX_MAG_FILTER_LINEAR |
1309de2362d3Smrg		R300_TX_MIN_FILTER_LINEAR |
1310de2362d3Smrg		(0 << R300_TX_ID_SHIFT));
1311de2362d3Smrg
13127821949aSmrg    txoffset = info->cs ? 0 : pPriv->src_offset;
13137821949aSmrg
1314de2362d3Smrg    BEGIN_ACCEL_RELOC(6, 1);
13157821949aSmrg    OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
13167821949aSmrg    OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
13177821949aSmrg    OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
1318de2362d3Smrg    if (pPriv->is_planar)
13197821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0);
1320de2362d3Smrg    else
13217821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
13227821949aSmrg    OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
13237821949aSmrg    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo);
13247821949aSmrg    FINISH_ACCEL();
1325de2362d3Smrg
1326de2362d3Smrg    txenable = R300_TEX_0_ENABLE;
1327de2362d3Smrg
1328de2362d3Smrg    if (pPriv->is_planar) {
1329de2362d3Smrg	txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1330de2362d3Smrg		     (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
1331de2362d3Smrg		     R300_TXPITCH_EN);
1332de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
1333de2362d3Smrg	txpitch -= 1;
1334de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1335de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
1336de2362d3Smrg		    R300_TX_MIN_FILTER_LINEAR |
1337de2362d3Smrg		    R300_TX_MAG_FILTER_LINEAR);
1338de2362d3Smrg
1339de2362d3Smrg	BEGIN_ACCEL_RELOC(12, 2);
13407821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
13417821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
13427821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
13437821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
13447821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
13457821949aSmrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
13467821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
13477821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
13487821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
13497821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
13507821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
13517821949aSmrg	OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo);
13527821949aSmrg	FINISH_ACCEL();
1353de2362d3Smrg	txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
1354de2362d3Smrg    }
1355de2362d3Smrg
1356de2362d3Smrg    if (pPriv->bicubic_enabled) {
1357de2362d3Smrg	/* Size is 128x1 */
1358de2362d3Smrg	txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
1359de2362d3Smrg		     (0x0 << R300_TXHEIGHT_SHIFT) |
1360de2362d3Smrg		     R300_TXPITCH_EN);
1361de2362d3Smrg	/* Format is 32-bit floats, 4bpp */
1362de2362d3Smrg	txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
1363de2362d3Smrg	/* Pitch is 127 (128-1) */
1364de2362d3Smrg	txpitch = 0x7f;
1365de2362d3Smrg	/* Tex filter */
1366de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
1367de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
1368de2362d3Smrg		    R300_TX_MIN_FILTER_NEAREST |
1369de2362d3Smrg		    R300_TX_MAG_FILTER_NEAREST |
1370de2362d3Smrg		    (1 << R300_TX_ID_SHIFT));
1371de2362d3Smrg
13727821949aSmrg	if (info->cs)
13737821949aSmrg	    bicubic_offset = 0;
13747821949aSmrg	else
13757821949aSmrg	    bicubic_offset = pPriv->bicubic_src_offset;
13767821949aSmrg
1377de2362d3Smrg	BEGIN_ACCEL_RELOC(6, 1);
13787821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
13797821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
13807821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
13817821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
13827821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
13837821949aSmrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo);
13847821949aSmrg	FINISH_ACCEL();
1385de2362d3Smrg
1386de2362d3Smrg	/* Enable tex 1 */
1387de2362d3Smrg	txenable |= R300_TEX_1_ENABLE;
1388de2362d3Smrg    }
1389de2362d3Smrg
1390de2362d3Smrg    /* setup the VAP */
1391de2362d3Smrg    if (info->accel_state->has_tcl) {
1392de2362d3Smrg	if (pPriv->bicubic_enabled)
13937821949aSmrg	    BEGIN_ACCEL(7);
1394de2362d3Smrg	else
13957821949aSmrg	    BEGIN_ACCEL(6);
1396de2362d3Smrg    } else {
1397de2362d3Smrg	if (pPriv->bicubic_enabled)
13987821949aSmrg	    BEGIN_ACCEL(5);
1399de2362d3Smrg	else
14007821949aSmrg	    BEGIN_ACCEL(4);
1401de2362d3Smrg    }
1402de2362d3Smrg
1403de2362d3Smrg    /* These registers define the number, type, and location of data submitted
1404de2362d3Smrg     * to the PVS unit of GA input (when PVS is disabled)
1405de2362d3Smrg     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1406de2362d3Smrg     * enabled.  This memory provides the imputs to the vertex shader program
1407de2362d3Smrg     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1408de2362d3Smrg     * directly to the GA input memory and the order is signifigant.  In
1409de2362d3Smrg     * PVS_BYPASS mode the order is as follows:
1410de2362d3Smrg     * Position
1411de2362d3Smrg     * Point Size
1412de2362d3Smrg     * Color 0-3
1413de2362d3Smrg     * Textures 0-7
1414de2362d3Smrg     * Fog
1415de2362d3Smrg     */
1416de2362d3Smrg    if (pPriv->bicubic_enabled) {
14177821949aSmrg	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1418de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1419de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1420de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1421de2362d3Smrg		       R300_SIGNED_0 |
1422de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1423de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1424de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1425de2362d3Smrg		       R300_SIGNED_1));
14267821949aSmrg	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1427de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1428de2362d3Smrg		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1429de2362d3Smrg		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1430de2362d3Smrg		       R300_LAST_VEC_2 |
1431de2362d3Smrg		       R300_SIGNED_2));
1432de2362d3Smrg    } else {
14337821949aSmrg	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1434de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1435de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1436de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1437de2362d3Smrg		       R300_SIGNED_0 |
1438de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1439de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1440de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1441de2362d3Smrg		       R300_LAST_VEC_1 |
1442de2362d3Smrg		       R300_SIGNED_1));
1443de2362d3Smrg    }
1444de2362d3Smrg
1445de2362d3Smrg    /* load the vertex shader
1446de2362d3Smrg     * We pre-load vertex programs in RADEONInit3DEngine():
1447de2362d3Smrg     * - exa
1448de2362d3Smrg     * - Xv
1449de2362d3Smrg     * - Xv bicubic
1450de2362d3Smrg     * Here we select the offset of the vertex program we want to use
1451de2362d3Smrg     */
1452de2362d3Smrg    if (info->accel_state->has_tcl) {
1453de2362d3Smrg	if (pPriv->bicubic_enabled) {
14547821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1455de2362d3Smrg			  ((11 << R300_PVS_FIRST_INST_SHIFT) |
1456de2362d3Smrg			   (13 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1457de2362d3Smrg			   (13 << R300_PVS_LAST_INST_SHIFT)));
14587821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1459de2362d3Smrg			  (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1460de2362d3Smrg	} else {
14617821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1462de2362d3Smrg			  ((9 << R300_PVS_FIRST_INST_SHIFT) |
1463de2362d3Smrg			   (10 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1464de2362d3Smrg			   (10 << R300_PVS_LAST_INST_SHIFT)));
14657821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1466de2362d3Smrg			  (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1467de2362d3Smrg	}
1468de2362d3Smrg    }
1469de2362d3Smrg
1470de2362d3Smrg    /* Position and one set of 2 texture coordinates */
14717821949aSmrg    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1472de2362d3Smrg    if (pPriv->bicubic_enabled)
14737821949aSmrg	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1474de2362d3Smrg					       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1475de2362d3Smrg    else
14767821949aSmrg	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
1477de2362d3Smrg
14787821949aSmrg    OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
14797821949aSmrg    FINISH_ACCEL();
1480de2362d3Smrg
1481de2362d3Smrg    /* setup pixel shader */
1482de2362d3Smrg    if (pPriv->bicubic_state != BICUBIC_OFF) {
1483de2362d3Smrg	if (pPriv->bicubic_enabled) {
14847821949aSmrg	    BEGIN_ACCEL(79);
1485de2362d3Smrg
1486de2362d3Smrg	    /* 4 components: 2 for tex0 and 2 for tex1 */
14877821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1488de2362d3Smrg					  R300_RS_COUNT_HIRES_EN));
1489de2362d3Smrg
1490de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
14917821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1492de2362d3Smrg
1493de2362d3Smrg	    /* Pixel stack frame size. */
14947821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
1495de2362d3Smrg
1496de2362d3Smrg	    /* Indirection levels */
14977821949aSmrg	    OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
1498de2362d3Smrg					   R300_FIRST_TEX));
1499de2362d3Smrg
1500de2362d3Smrg	    /* Set nodes. */
15017821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1502de2362d3Smrg						R300_ALU_CODE_SIZE(14) |
1503de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1504de2362d3Smrg						R300_TEX_CODE_SIZE(6)));
1505de2362d3Smrg
1506de2362d3Smrg	    /* Nodes are allocated highest first, but executed lowest first */
15077821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
15087821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
1509de2362d3Smrg						R300_ALU_SIZE(0) |
1510de2362d3Smrg						R300_TEX_START(0) |
1511de2362d3Smrg						R300_TEX_SIZE(0)));
15127821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
1513de2362d3Smrg						R300_ALU_SIZE(9) |
1514de2362d3Smrg						R300_TEX_START(1) |
1515de2362d3Smrg						R300_TEX_SIZE(0)));
15167821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
1517de2362d3Smrg						R300_ALU_SIZE(2) |
1518de2362d3Smrg						R300_TEX_START(2) |
1519de2362d3Smrg						R300_TEX_SIZE(3) |
1520de2362d3Smrg						R300_RGBA_OUT));
1521de2362d3Smrg
1522de2362d3Smrg	    /* ** BICUBIC FP ** */
1523de2362d3Smrg
1524de2362d3Smrg	    /* texcoord0 => temp0
1525de2362d3Smrg	     * texcoord1 => temp1 */
1526de2362d3Smrg
1527de2362d3Smrg	    // first node
1528de2362d3Smrg	    /* TEX temp2, temp1.rrr0, tex1, 1D */
15297821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
1530de2362d3Smrg						R300_TEX_ID(1) |
1531de2362d3Smrg						R300_TEX_SRC_ADDR(1) |
1532de2362d3Smrg						R300_TEX_DST_ADDR(2)));
1533de2362d3Smrg
1534de2362d3Smrg	    /* MOV temp1.r, temp1.ggg0 */
15357821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1536de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1537de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1538de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
15397821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
1540de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1541de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
15427821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1543de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1544de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1545de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
15467821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
1547de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1548de2362d3Smrg
1549de2362d3Smrg
1550de2362d3Smrg	    // second node
1551de2362d3Smrg	    /* TEX temp1, temp1, tex1, 1D */
15527821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
1553de2362d3Smrg						R300_TEX_ID(1) |
1554de2362d3Smrg						R300_TEX_SRC_ADDR(1) |
1555de2362d3Smrg						R300_TEX_DST_ADDR(1)));
1556de2362d3Smrg
1557de2362d3Smrg	    /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
15587821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1559de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1560de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1561de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
15627821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
1563de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
1564de2362d3Smrg						    R300_ALU_RGB_ADDRD(3) |
1565de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
15667821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1567de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1568de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1569de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
15707821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
1571de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1572de2362d3Smrg
1573de2362d3Smrg
1574de2362d3Smrg	    /* MUL temp2.rg, temp2.rrr0, const0.rgb */
15757821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1576de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
1577de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1578de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
15797821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
1580de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
1581de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
1582de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
15837821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1584de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1585de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1586de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
15877821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
1588de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1589de2362d3Smrg
1590de2362d3Smrg	    /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
15917821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1592de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1593de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1594de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
15957821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
1596de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1597de2362d3Smrg						    R300_ALU_RGB_ADDR2(3) |
1598de2362d3Smrg						    R300_ALU_RGB_ADDRD(4) |
1599de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16007821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1601de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1602de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1603de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
16047821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
1605de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1606de2362d3Smrg
1607de2362d3Smrg	    /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
16087821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1609de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1610de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1611de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
16127821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
1613de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1614de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
1615de2362d3Smrg						    R300_ALU_RGB_ADDRD(5) |
1616de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16177821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1618de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1619de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1620de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
16217821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
1622de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1623de2362d3Smrg
1624de2362d3Smrg	    /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
16257821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1626de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
1627de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1628de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
16297821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
1630de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1631de2362d3Smrg						    R300_ALU_RGB_ADDR2(3) |
1632de2362d3Smrg						    R300_ALU_RGB_ADDRD(3) |
1633de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16347821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1635de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1636de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1637de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
16387821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
1639de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1640de2362d3Smrg
1641de2362d3Smrg	    /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
16427821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1643de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
1644de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1645de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
16467821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
1647de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1648de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
1649de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1650de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16517821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1652de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1653de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1654de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
16557821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
1656de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1657de2362d3Smrg
1658de2362d3Smrg	    /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
16597821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1660de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1661de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1662de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
16637821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
1664de2362d3Smrg						    R300_ALU_RGB_ADDR2(1) |
1665de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1666de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16677821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1668de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1669de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1670de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
16717821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
1672de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1673de2362d3Smrg
1674de2362d3Smrg	    /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
16757821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1676de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1677de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1678de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
16797821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
1680de2362d3Smrg						    R300_ALU_RGB_ADDR2(3) |
1681de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
1682de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16837821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1684de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1685de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1686de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
16877821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
1688de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1689de2362d3Smrg
1690de2362d3Smrg	    /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
16917821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1692de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1693de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1694de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
16957821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
1696de2362d3Smrg						    R300_ALU_RGB_ADDR2(5) |
1697de2362d3Smrg						    R300_ALU_RGB_ADDRD(3) |
1698de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
16997821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1700de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1701de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1702de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
17037821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
1704de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1705de2362d3Smrg
1706de2362d3Smrg	    /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
17077821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1708de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1709de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1710de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
17117821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
1712de2362d3Smrg						     R300_ALU_RGB_ADDR2(4) |
1713de2362d3Smrg						     R300_ALU_RGB_ADDRD(0) |
1714de2362d3Smrg						     R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
17157821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1716de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1717de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1718de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
17197821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
1720de2362d3Smrg						       R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1721de2362d3Smrg
1722de2362d3Smrg
1723de2362d3Smrg	    // third node
1724de2362d3Smrg	    /* TEX temp4, temp1.rg--, tex0, 1D */
17257821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
1726de2362d3Smrg						R300_TEX_ID(0) |
1727de2362d3Smrg						R300_TEX_SRC_ADDR(1) |
1728de2362d3Smrg						R300_TEX_DST_ADDR(4)));
1729de2362d3Smrg
1730de2362d3Smrg	    /* TEX temp3, temp3.rg--, tex0, 1D */
17317821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
1732de2362d3Smrg						R300_TEX_ID(0) |
1733de2362d3Smrg						R300_TEX_SRC_ADDR(3) |
1734de2362d3Smrg						R300_TEX_DST_ADDR(3)));
1735de2362d3Smrg
1736de2362d3Smrg	    /* TEX temp5, temp2.rg--, tex0, 1D */
17377821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
1738de2362d3Smrg						R300_TEX_ID(0) |
1739de2362d3Smrg						R300_TEX_SRC_ADDR(2) |
1740de2362d3Smrg						R300_TEX_DST_ADDR(5)));
1741de2362d3Smrg
1742de2362d3Smrg	    /* TEX temp0, temp0.rg--, tex0, 1D */
17437821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
1744de2362d3Smrg						R300_TEX_ID(0) |
1745de2362d3Smrg						R300_TEX_SRC_ADDR(0) |
1746de2362d3Smrg						R300_TEX_DST_ADDR(0)));
1747de2362d3Smrg
1748de2362d3Smrg	    /* LRP temp3, temp1.bbbb, temp4, temp3 ->
1749de2362d3Smrg	     * - PRESUB temps, temp4 - temp3
1750de2362d3Smrg	     * - MAD temp3, temp1.bbbb, temps, temp3 */
17517821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1752de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
1753de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
1754de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1755de2362d3Smrg						     R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
17567821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
1757de2362d3Smrg						     R300_ALU_RGB_ADDR1(4) |
1758de2362d3Smrg						     R300_ALU_RGB_ADDR2(1) |
1759de2362d3Smrg						     R300_ALU_RGB_ADDRD(3) |
1760de2362d3Smrg						     R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
17617821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1762de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
1763de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
1764de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
17657821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
1766de2362d3Smrg						       R300_ALU_ALPHA_ADDR1(4) |
1767de2362d3Smrg						       R300_ALU_ALPHA_ADDR2(1) |
1768de2362d3Smrg						       R300_ALU_ALPHA_ADDRD(3) |
1769de2362d3Smrg						       R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
1770de2362d3Smrg
1771de2362d3Smrg	    /* LRP temp0, temp1.bbbb, temp5, temp0 ->
1772de2362d3Smrg	     * - PRESUB temps, temp5 - temp0
1773de2362d3Smrg	     * - MAD temp0, temp1.bbbb, temps, temp0 */
17747821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1775de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
1776de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
1777de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1778de2362d3Smrg						     R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
1779de2362d3Smrg						     R300_ALU_RGB_INSERT_NOP));
17807821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
1781de2362d3Smrg						     R300_ALU_RGB_ADDR1(5) |
1782de2362d3Smrg						     R300_ALU_RGB_ADDR2(1) |
1783de2362d3Smrg						     R300_ALU_RGB_ADDRD(0) |
1784de2362d3Smrg						     R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
17857821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1786de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
1787de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
1788de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
17897821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
1790de2362d3Smrg						       R300_ALU_ALPHA_ADDR1(5) |
1791de2362d3Smrg						       R300_ALU_ALPHA_ADDR2(1) |
1792de2362d3Smrg						       R300_ALU_ALPHA_ADDRD(0) |
1793de2362d3Smrg						       R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
1794de2362d3Smrg
1795de2362d3Smrg	    /* LRP output, temp2.bbbb, temp3, temp0 ->
1796de2362d3Smrg	     * - PRESUB temps, temp3 - temp0
1797de2362d3Smrg	     * - MAD output, temp2.bbbb, temps, temp0 */
17987821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1799de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
1800de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
1801de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1802de2362d3Smrg						     R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
18037821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
1804de2362d3Smrg						     R300_ALU_RGB_ADDR1(3) |
1805de2362d3Smrg						     R300_ALU_RGB_ADDR2(2) |
1806de2362d3Smrg						     R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
18077821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1808de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
1809de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
1810de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
18117821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
1812de2362d3Smrg						       R300_ALU_ALPHA_ADDR1(3) |
1813de2362d3Smrg						       R300_ALU_ALPHA_ADDR2(2) |
1814de2362d3Smrg						       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
1815de2362d3Smrg
1816de2362d3Smrg	    /* Shader constants. */
18177821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
18187821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
18197821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
18207821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
1821de2362d3Smrg
18227821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
18237821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
18247821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
18257821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
1826de2362d3Smrg
18277821949aSmrg	    FINISH_ACCEL();
1828de2362d3Smrg	} else {
18297821949aSmrg	    BEGIN_ACCEL(11);
1830de2362d3Smrg	    /* 2 components: 2 for tex0 */
18317821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
1832de2362d3Smrg                          ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1833de2362d3Smrg                           R300_RS_COUNT_HIRES_EN));
1834de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
18357821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1836de2362d3Smrg
18377821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
1838de2362d3Smrg
1839de2362d3Smrg	    /* Indirection levels */
18407821949aSmrg	    OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
1841de2362d3Smrg					   R300_FIRST_TEX));
1842de2362d3Smrg
18437821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1844de2362d3Smrg						R300_ALU_CODE_SIZE(1) |
1845de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1846de2362d3Smrg						R300_TEX_CODE_SIZE(1)));
1847de2362d3Smrg
18487821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
1849de2362d3Smrg						R300_ALU_SIZE(0) |
1850de2362d3Smrg						R300_TEX_START(0) |
1851de2362d3Smrg						R300_TEX_SIZE(0) |
1852de2362d3Smrg						R300_RGBA_OUT));
1853de2362d3Smrg
1854de2362d3Smrg	    /* tex inst */
18557821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
1856de2362d3Smrg					       R300_TEX_DST_ADDR(0) |
1857de2362d3Smrg					       R300_TEX_ID(0) |
1858de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
1859de2362d3Smrg
1860de2362d3Smrg	    /* ALU inst */
1861de2362d3Smrg	    /* RGB */
18627821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
1863de2362d3Smrg                                                   R300_ALU_RGB_ADDR1(0) |
1864de2362d3Smrg                                                   R300_ALU_RGB_ADDR2(0) |
1865de2362d3Smrg                                                   R300_ALU_RGB_ADDRD(0) |
1866de2362d3Smrg                                                   R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1867de2362d3Smrg								       R300_ALU_RGB_MASK_G |
1868de2362d3Smrg								       R300_ALU_RGB_MASK_B)) |
1869de2362d3Smrg                                                   R300_ALU_RGB_TARGET_A));
18707821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1871de2362d3Smrg                                                   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1872de2362d3Smrg                                                   R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1873de2362d3Smrg						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1874de2362d3Smrg                                                   R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1875de2362d3Smrg                                                   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1876de2362d3Smrg                                                   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1877de2362d3Smrg                                                   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1878de2362d3Smrg                                                   R300_ALU_RGB_CLAMP));
1879de2362d3Smrg	    /* Alpha */
18807821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
1881de2362d3Smrg						     R300_ALU_ALPHA_ADDR1(0) |
1882de2362d3Smrg						     R300_ALU_ALPHA_ADDR2(0) |
1883de2362d3Smrg						     R300_ALU_ALPHA_ADDRD(0) |
1884de2362d3Smrg						     R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1885de2362d3Smrg						     R300_ALU_ALPHA_TARGET_A |
1886de2362d3Smrg						     R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
18877821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
1888de2362d3Smrg						     R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1889de2362d3Smrg						     R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
1890de2362d3Smrg						     R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1891de2362d3Smrg						     R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1892de2362d3Smrg						     R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1893de2362d3Smrg						     R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1894de2362d3Smrg						     R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1895de2362d3Smrg						     R300_ALU_ALPHA_CLAMP));
18967821949aSmrg	    FINISH_ACCEL();
1897de2362d3Smrg	}
1898de2362d3Smrg    } else {
1899de2362d3Smrg	/*
1900de2362d3Smrg	 * y' = y - .0625
1901de2362d3Smrg	 * u' = u - .5
1902de2362d3Smrg	 * v' = v - .5;
1903de2362d3Smrg	 *
1904de2362d3Smrg	 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
1905de2362d3Smrg	 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
1906de2362d3Smrg	 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
1907de2362d3Smrg	 *
1908de2362d3Smrg	 * DP3 might look like the straightforward solution
1909de2362d3Smrg	 * but we'd need to move the texture yuv values in
1910de2362d3Smrg	 * the same reg for this to work. Therefore use MADs.
1911de2362d3Smrg	 * Brightness just adds to the off constant.
1912de2362d3Smrg	 * Contrast is multiplication of luminance.
1913de2362d3Smrg	 * Saturation and hue change the u and v coeffs.
1914de2362d3Smrg	 * Default values (before adjustments - depend on colorspace):
1915de2362d3Smrg	 * yco = 1.1643
1916de2362d3Smrg	 * uco = 0, -0.39173, 2.017
1917de2362d3Smrg	 * vco = 1.5958, -0.8129, 0
1918de2362d3Smrg	 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
1919de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
1920de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
1921de2362d3Smrg	 *
1922de2362d3Smrg	 * temp = MAD(yco, yuv.yyyy, off)
1923de2362d3Smrg	 * temp = MAD(uco, yuv.uuuu, temp)
1924de2362d3Smrg	 * result = MAD(vco, yuv.vvvv, temp)
1925de2362d3Smrg	 */
1926de2362d3Smrg	/* TODO: don't recalc consts always */
1927de2362d3Smrg	const float Loff = -0.0627;
1928de2362d3Smrg	const float Coff = -0.502;
1929de2362d3Smrg	float uvcosf, uvsinf;
1930de2362d3Smrg	float yco;
1931de2362d3Smrg	float uco[3], vco[3], off[3];
1932de2362d3Smrg	float bright, cont, gamma;
1933de2362d3Smrg	int ref = pPriv->transform_index;
1934de2362d3Smrg	Bool needgamma = FALSE;
1935de2362d3Smrg
1936de2362d3Smrg	cont = RTFContrast(pPriv->contrast);
1937de2362d3Smrg	bright = RTFBrightness(pPriv->brightness);
1938de2362d3Smrg	gamma = (float)pPriv->gamma / 1000.0;
1939de2362d3Smrg	uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
1940de2362d3Smrg	uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
1941de2362d3Smrg	/* overlay video also does pre-gamma contrast/sat adjust, should we? */
1942de2362d3Smrg
1943de2362d3Smrg	yco = trans[ref].RefLuma * cont;
1944de2362d3Smrg	uco[0] = -trans[ref].RefRCr * uvsinf;
1945de2362d3Smrg	uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
1946de2362d3Smrg	uco[2] = trans[ref].RefBCb * uvcosf;
1947de2362d3Smrg	vco[0] = trans[ref].RefRCr * uvcosf;
1948de2362d3Smrg	vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
1949de2362d3Smrg	vco[2] = trans[ref].RefBCb * uvsinf;
1950de2362d3Smrg	off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
1951de2362d3Smrg	off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
1952de2362d3Smrg	off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
1953de2362d3Smrg
1954de2362d3Smrg	if (gamma != 1.0) {
1955de2362d3Smrg	    needgamma = TRUE;
1956de2362d3Smrg	    /* note: gamma correction is out = in ^ gamma;
1957de2362d3Smrg	       gpu can only do LG2/EX2 therefore we transform into
1958de2362d3Smrg	       in ^ gamma = 2 ^ (log2(in) * gamma).
1959de2362d3Smrg	       Lots of scalar ops, unfortunately (better solution?) -
1960de2362d3Smrg	       without gamma that's 3 inst, with gamma it's 10...
1961de2362d3Smrg	       could use different gamma factors per channel,
1962de2362d3Smrg	       if that's of any use. */
1963de2362d3Smrg	}
1964de2362d3Smrg
1965de2362d3Smrg	if (pPriv->is_planar) {
19667821949aSmrg	    BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
1967de2362d3Smrg	    /* 2 components: same 2 for tex0/1/2 */
19687821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
1969de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1970de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
1971de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
19727821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1973de2362d3Smrg
19747821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
1975de2362d3Smrg
1976de2362d3Smrg	    /* Indirection levels */
19777821949aSmrg	    OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
1978de2362d3Smrg					   R300_FIRST_TEX));
1979de2362d3Smrg
19807821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1981de2362d3Smrg						R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
1982de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1983de2362d3Smrg						R300_TEX_CODE_SIZE(3)));
1984de2362d3Smrg
19857821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
1986de2362d3Smrg						R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
1987de2362d3Smrg						R300_TEX_START(0) |
1988de2362d3Smrg						R300_TEX_SIZE(2) |
1989de2362d3Smrg						R300_RGBA_OUT));
1990de2362d3Smrg
1991de2362d3Smrg	    /* tex inst */
19927821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
1993de2362d3Smrg					       R300_TEX_DST_ADDR(2) |
1994de2362d3Smrg					       R300_TEX_ID(0) |
1995de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
19967821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
1997de2362d3Smrg					       R300_TEX_DST_ADDR(1) |
1998de2362d3Smrg					       R300_TEX_ID(1) |
1999de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
20007821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
2001de2362d3Smrg					       R300_TEX_DST_ADDR(0) |
2002de2362d3Smrg					       R300_TEX_ID(2) |
2003de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
2004de2362d3Smrg
2005de2362d3Smrg	    /* ALU inst */
2006de2362d3Smrg	    /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
20077821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
2008de2362d3Smrg						    R300_ALU_RGB_ADDR1(2) |
2009de2362d3Smrg						    R300_ALU_RGB_ADDR2(0) |
2010de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
2011de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
20127821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
2013de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2014de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
2015de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2016de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
2017de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2018de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2019de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2020de2362d3Smrg	    /* alpha nop, but need to set up alpha source for rgb usage */
20217821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
2022de2362d3Smrg						      R300_ALU_ALPHA_ADDR1(2) |
2023de2362d3Smrg						      R300_ALU_ALPHA_ADDR2(0) |
2024de2362d3Smrg						      R300_ALU_ALPHA_ADDRD(2) |
2025de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
20267821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2027de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2028de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2029de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2030de2362d3Smrg
2031de2362d3Smrg	    /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
20327821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
2033de2362d3Smrg						    R300_ALU_RGB_ADDR1(1) |
2034de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
2035de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
2036de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
20377821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2038de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2039de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
2040de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2041de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
2042de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2043de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2044de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2045de2362d3Smrg	    /* alpha nop */
20467821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) |
2047de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
20487821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2049de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2050de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2051de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2052de2362d3Smrg
2053de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
20547821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
2055de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
2056de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
2057de2362d3Smrg						    R300_ALU_RGB_ADDRD(0) |
2058de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
2059de2362d3Smrg						    (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
20607821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2061de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2062de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
2063de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2064de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
2065de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2066de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2067de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
2068de2362d3Smrg						    R300_ALU_RGB_CLAMP));
2069de2362d3Smrg	    /* write alpha 1 */
20707821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
2071de2362d3Smrg						      R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
2072de2362d3Smrg						      R300_ALU_ALPHA_TARGET_A));
20737821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2074de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2075de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2076de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
2077de2362d3Smrg
2078de2362d3Smrg	    if (needgamma) {
2079de2362d3Smrg		/* rgb temp0.r = op_sop, set up src0 reg */
20807821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
2081de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
20827821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
2083de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2084de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2085de2362d3Smrg		/* alpha lg2 temp0, temp0.r */
20867821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
2087de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
20887821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2089de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
2090de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2091de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2092de2362d3Smrg
2093de2362d3Smrg		/* rgb temp0.g = op_sop, set up src0 reg */
20947821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
2095de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
20967821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
2097de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2098de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2099de2362d3Smrg		/* alpha lg2 temp0, temp0.g */
21007821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
2101de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
21027821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2103de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
2104de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2105de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2106de2362d3Smrg
2107de2362d3Smrg		/* rgb temp0.b = op_sop, set up src0 reg */
21087821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
2109de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
21107821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
2111de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2112de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2113de2362d3Smrg		/* alpha lg2 temp0, temp0.b */
21147821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
2115de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
21167821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2117de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
2118de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2119de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2120de2362d3Smrg
2121de2362d3Smrg		/* MUL const1, temp1, temp0 */
21227821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
2123de2362d3Smrg							R300_ALU_RGB_ADDR1(0) |
2124de2362d3Smrg							R300_ALU_RGB_ADDR2(0) |
2125de2362d3Smrg							R300_ALU_RGB_ADDRD(0) |
2126de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
21277821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2128de2362d3Smrg							R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2129de2362d3Smrg							R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
2130de2362d3Smrg							R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2131de2362d3Smrg							R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
2132de2362d3Smrg							R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2133de2362d3Smrg							R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2134de2362d3Smrg							R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2135de2362d3Smrg		/* alpha nop, but set up const1 */
21367821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
2137de2362d3Smrg							  R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
2138de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
21397821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2140de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2141de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2142de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2143de2362d3Smrg
2144de2362d3Smrg		/* rgb out0.r = op_sop, set up src0 reg */
21457821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
2146de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
2147de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
21487821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
2149de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2150de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2151de2362d3Smrg		/* alpha ex2 temp0, temp0.r */
21527821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
2153de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
21547821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2155de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
2156de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2157de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2158de2362d3Smrg
2159de2362d3Smrg		/* rgb out0.g = op_sop, set up src0 reg */
21607821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
2161de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
2162de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
21637821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
2164de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2165de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2166de2362d3Smrg		/* alpha ex2 temp0, temp0.g */
21677821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
2168de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
21697821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2170de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
2171de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2172de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2173de2362d3Smrg
2174de2362d3Smrg		/* rgb out0.b = op_sop, set up src0 reg */
21757821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
2176de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
2177de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
21787821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
2179de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2180de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2181de2362d3Smrg		/* alpha ex2 temp0, temp0.b */
21827821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
2183de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
21847821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2185de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
2186de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2187de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2188de2362d3Smrg	    }
2189de2362d3Smrg	} else {
21907821949aSmrg	    BEGIN_ACCEL(needgamma ? 28 + 31 : 31);
2191de2362d3Smrg	    /* 2 components */
21927821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
2193de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
2194de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
2195de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
21967821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
2197de2362d3Smrg
21987821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
2199de2362d3Smrg
2200de2362d3Smrg	    /* Indirection levels */
22017821949aSmrg	    OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
2202de2362d3Smrg					   R300_FIRST_TEX));
2203de2362d3Smrg
22047821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
2205de2362d3Smrg						R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
2206de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
2207de2362d3Smrg						R300_TEX_CODE_SIZE(1)));
2208de2362d3Smrg
22097821949aSmrg	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
2210de2362d3Smrg						R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
2211de2362d3Smrg						R300_TEX_START(0) |
2212de2362d3Smrg						R300_TEX_SIZE(0) |
2213de2362d3Smrg						R300_RGBA_OUT));
2214de2362d3Smrg
2215de2362d3Smrg	    /* tex inst */
22167821949aSmrg	    OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
2217de2362d3Smrg					       R300_TEX_DST_ADDR(0) |
2218de2362d3Smrg					       R300_TEX_ID(0) |
2219de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
2220de2362d3Smrg
2221de2362d3Smrg	    /* ALU inst */
2222de2362d3Smrg	    /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
22237821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
2224de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
2225de2362d3Smrg						    R300_ALU_RGB_ADDR2(0) |
2226de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
2227de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
22287821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
2229de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2230de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) |
2231de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2232de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
2233de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2234de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2235de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2236de2362d3Smrg	    /* alpha nop, but need to set up alpha source for rgb usage */
22377821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
2238de2362d3Smrg						      R300_ALU_ALPHA_ADDR1(0) |
2239de2362d3Smrg						      R300_ALU_ALPHA_ADDR2(0) |
2240de2362d3Smrg						      R300_ALU_ALPHA_ADDRD(0) |
2241de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
22427821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2243de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2244de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2245de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2246de2362d3Smrg
2247de2362d3Smrg	    /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
22487821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
2249de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
2250de2362d3Smrg						    R300_ALU_RGB_ADDR2(1) |
2251de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
2252de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
22537821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2254de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2255de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) |
2256de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2257de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
2258de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2259de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2260de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2261de2362d3Smrg	    /* alpha nop */
22627821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
2263de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
22647821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2265de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2266de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2267de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2268de2362d3Smrg
2269de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
22707821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
2271de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
2272de2362d3Smrg						    R300_ALU_RGB_ADDR2(1) |
2273de2362d3Smrg						    R300_ALU_RGB_ADDRD(0) |
2274de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
2275de2362d3Smrg						    (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
22767821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2277de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2278de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) |
2279de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2280de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
2281de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2282de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2283de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
2284de2362d3Smrg						    R300_ALU_RGB_CLAMP));
2285de2362d3Smrg	    /* write alpha 1 */
22867821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
2287de2362d3Smrg						      R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
2288de2362d3Smrg						      R300_ALU_ALPHA_TARGET_A));
22897821949aSmrg	    OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2290de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2291de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2292de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
2293de2362d3Smrg
2294de2362d3Smrg	    if (needgamma) {
2295de2362d3Smrg		/* rgb temp0.r = op_sop, set up src0 reg */
22967821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
2297de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
22987821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
2299de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2300de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2301de2362d3Smrg		/* alpha lg2 temp0, temp0.r */
23027821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
2303de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
23047821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2305de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
2306de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2307de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2308de2362d3Smrg
2309de2362d3Smrg		/* rgb temp0.g = op_sop, set up src0 reg */
23107821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
2311de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
23127821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
2313de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2314de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2315de2362d3Smrg		/* alpha lg2 temp0, temp0.g */
23167821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
2317de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
23187821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2319de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
2320de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2321de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2322de2362d3Smrg
2323de2362d3Smrg		/* rgb temp0.b = op_sop, set up src0 reg */
23247821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
2325de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
23267821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
2327de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2328de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2329de2362d3Smrg		/* alpha lg2 temp0, temp0.b */
23307821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
2331de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
23327821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2333de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
2334de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2335de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2336de2362d3Smrg
2337de2362d3Smrg		/* MUL const1, temp1, temp0 */
23387821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
2339de2362d3Smrg							R300_ALU_RGB_ADDR1(0) |
2340de2362d3Smrg							R300_ALU_RGB_ADDR2(0) |
2341de2362d3Smrg							R300_ALU_RGB_ADDRD(0) |
2342de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
23437821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2344de2362d3Smrg							R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2345de2362d3Smrg							R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
2346de2362d3Smrg							R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2347de2362d3Smrg							R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
2348de2362d3Smrg							R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2349de2362d3Smrg							R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2350de2362d3Smrg							R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2351de2362d3Smrg		/* alpha nop, but set up const1 */
23527821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
2353de2362d3Smrg							  R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
2354de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
23557821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2356de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2357de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2358de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2359de2362d3Smrg
2360de2362d3Smrg		/* rgb out0.r = op_sop, set up src0 reg */
23617821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
2362de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
2363de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
23647821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
2365de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2366de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2367de2362d3Smrg		/* alpha ex2 temp0, temp0.r */
23687821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
2369de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
23707821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2371de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
2372de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2373de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2374de2362d3Smrg
2375de2362d3Smrg		/* rgb out0.g = op_sop, set up src0 reg */
23767821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
2377de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
2378de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
23797821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
2380de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2381de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2382de2362d3Smrg		/* alpha ex2 temp0, temp0.g */
23837821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
2384de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
23857821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2386de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
2387de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2388de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2389de2362d3Smrg
2390de2362d3Smrg		/* rgb out0.b = op_sop, set up src0 reg */
23917821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
2392de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
2393de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
23947821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
2395de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2396de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2397de2362d3Smrg		/* alpha ex2 temp0, temp0.b */
23987821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
2399de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
24007821949aSmrg		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2401de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
2402de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2403de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2404de2362d3Smrg	    }
2405de2362d3Smrg	}
2406de2362d3Smrg
2407de2362d3Smrg	/* Shader constants. */
2408de2362d3Smrg	/* constant 0: off, yco */
24097821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
24107821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
24117821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
24127821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
2413de2362d3Smrg	/* constant 1: uco */
24147821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
24157821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
24167821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
24177821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
2418de2362d3Smrg	/* constant 2: vco */
24197821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
24207821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
24217821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
24227821949aSmrg	OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
2423de2362d3Smrg
24247821949aSmrg	FINISH_ACCEL();
2425de2362d3Smrg    }
2426de2362d3Smrg
2427de2362d3Smrg    BEGIN_ACCEL_RELOC(6, 2);
24287821949aSmrg    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
24297821949aSmrg    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
2430de2362d3Smrg
2431de2362d3Smrg    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap);
2432de2362d3Smrg    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap);
2433de2362d3Smrg
2434de2362d3Smrg    /* no need to enable blending */
24357821949aSmrg    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
2436de2362d3Smrg
24377821949aSmrg    OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count);
24387821949aSmrg    FINISH_ACCEL();
2439de2362d3Smrg
2440de2362d3Smrg    if (pPriv->vsync) {
2441de2362d3Smrg	xf86CrtcPtr crtc;
2442de2362d3Smrg	if (pPriv->desired_crtc)
2443de2362d3Smrg	    crtc = pPriv->desired_crtc;
2444de2362d3Smrg	else
24457821949aSmrg	    crtc = radeon_pick_best_crtc(pScrn,
2446de2362d3Smrg					 pPriv->drw_x,
2447de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
2448de2362d3Smrg					 pPriv->drw_y,
2449de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
2450de2362d3Smrg	if (crtc)
24517821949aSmrg	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
24527821949aSmrg					  crtc,
24537821949aSmrg					  pPriv->drw_y - crtc->y,
24547821949aSmrg					  (pPriv->drw_y - crtc->y) + pPriv->dst_h);
2455de2362d3Smrg    }
2456de2362d3Smrg
2457de2362d3Smrg    return TRUE;
2458de2362d3Smrg}
2459de2362d3Smrg
2460de2362d3Smrgstatic void
24617821949aSmrgFUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
2462de2362d3Smrg{
2463de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2464de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
2465de2362d3Smrg    int dstxoff, dstyoff;
2466de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
2467de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
24687821949aSmrg    ACCEL_PREAMBLE();
2469de2362d3Smrg
2470de2362d3Smrg#ifdef COMPOSITE
2471de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
2472de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
2473de2362d3Smrg#else
2474de2362d3Smrg    dstxoff = 0;
2475de2362d3Smrg    dstyoff = 0;
2476de2362d3Smrg#endif
2477de2362d3Smrg
24787821949aSmrg    if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv))
2479de2362d3Smrg	return;
2480de2362d3Smrg
2481de2362d3Smrg    /*
2482de2362d3Smrg     * Rendering of the actual polygon is done in two different
2483de2362d3Smrg     * ways depending on chip generation:
2484de2362d3Smrg     *
2485de2362d3Smrg     * < R300:
2486de2362d3Smrg     *
2487de2362d3Smrg     *     These chips can render a rectangle in one pass, so
2488de2362d3Smrg     *     handling is pretty straight-forward.
2489de2362d3Smrg     *
2490de2362d3Smrg     * >= R300:
2491de2362d3Smrg     *
2492de2362d3Smrg     *     These chips can accept a quad, but will render it as
2493de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
2494de2362d3Smrg     *     We render a single, large triangle and use the scissor
2495de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
2496de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
2497de2362d3Smrg     *     the single triangle up to 2560/4021 pixels; above that we
2498de2362d3Smrg     *     render as a quad.
2499de2362d3Smrg     */
2500de2362d3Smrg
2501de2362d3Smrg    while (nBox--) {
2502de2362d3Smrg	float srcX, srcY, srcw, srch;
2503de2362d3Smrg	int dstX, dstY, dstw, dsth;
2504de2362d3Smrg	Bool use_quad = FALSE;
25057821949aSmrg#ifdef ACCEL_CP
2506de2362d3Smrg	int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3;
2507de2362d3Smrg
2508de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
25097821949aSmrg	    if (info->cs)
25107821949aSmrg		radeon_cs_flush_indirect(pScrn);
25117821949aSmrg	    else
25127821949aSmrg		RADEONCPFlushIndirect(pScrn, 1);
25137821949aSmrg	    if (!FUNC_NAME(R300PrepareTexturedVideo)(pScrn, pPriv))
2514de2362d3Smrg		return;
2515de2362d3Smrg	}
25167821949aSmrg#endif
2517de2362d3Smrg
2518de2362d3Smrg	dstX = pBox->x1 + dstxoff;
2519de2362d3Smrg	dstY = pBox->y1 + dstyoff;
2520de2362d3Smrg	dstw = pBox->x2 - pBox->x1;
2521de2362d3Smrg	dsth = pBox->y2 - pBox->y1;
2522de2362d3Smrg
2523de2362d3Smrg	srcX = pPriv->src_x;
2524de2362d3Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
2525de2362d3Smrg		 pPriv->src_w) / (float)pPriv->dst_w;
2526de2362d3Smrg	srcY = pPriv->src_y;
2527de2362d3Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
2528de2362d3Smrg		 pPriv->src_h) / (float)pPriv->dst_h;
2529de2362d3Smrg
2530de2362d3Smrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
2531de2362d3Smrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
2532de2362d3Smrg
2533de2362d3Smrg	if (IS_R400_3D) {
2534de2362d3Smrg	    if ((dstw+dsth) > 4021)
2535de2362d3Smrg		use_quad = TRUE;
2536de2362d3Smrg	} else {
2537de2362d3Smrg	    if ((dstw+dsth) > 2560)
2538de2362d3Smrg		use_quad = TRUE;
2539de2362d3Smrg	}
2540de2362d3Smrg	/*
2541de2362d3Smrg	 * Set up the scissor area to that of the output size.
2542de2362d3Smrg	 */
25437821949aSmrg	BEGIN_ACCEL(2);
2544de2362d3Smrg	/* R300 has an offset */
25457821949aSmrg	OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) |
2546de2362d3Smrg					 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT)));
25477821949aSmrg	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2548de2362d3Smrg					 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
25497821949aSmrg	FINISH_ACCEL();
2550de2362d3Smrg
25517821949aSmrg#ifdef ACCEL_CP
2552de2362d3Smrg	if (use_quad) {
2553de2362d3Smrg	    BEGIN_RING(4 * pPriv->vtx_count + 4);
2554de2362d3Smrg	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2555de2362d3Smrg				4 * pPriv->vtx_count));
2556de2362d3Smrg	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2557de2362d3Smrg		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2558de2362d3Smrg		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2559de2362d3Smrg	} else {
2560de2362d3Smrg	    BEGIN_RING(3 * pPriv->vtx_count + 4);
2561de2362d3Smrg	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2562de2362d3Smrg				3 * pPriv->vtx_count));
2563de2362d3Smrg	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
2564de2362d3Smrg		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2565de2362d3Smrg		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2566de2362d3Smrg	}
25677821949aSmrg#else /* ACCEL_CP */
25687821949aSmrg	if (use_quad)
25697821949aSmrg	    BEGIN_ACCEL(2 + pPriv->vtx_count * 4);
25707821949aSmrg	else
25717821949aSmrg	    BEGIN_ACCEL(2 + pPriv->vtx_count * 3);
2572de2362d3Smrg
25737821949aSmrg	if (use_quad)
25747821949aSmrg	    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
25757821949aSmrg					      RADEON_VF_PRIM_WALK_DATA |
25767821949aSmrg					      (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
25777821949aSmrg	else
25787821949aSmrg	    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
25797821949aSmrg					      RADEON_VF_PRIM_WALK_DATA |
25807821949aSmrg					      (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
25817821949aSmrg#endif
2582de2362d3Smrg	if (pPriv->bicubic_enabled) {
2583de2362d3Smrg		/*
2584de2362d3Smrg		 * This code is only executed on >= R300, so we don't
2585de2362d3Smrg		 * have to deal with the legacy handling.
2586de2362d3Smrg		 */
2587de2362d3Smrg	    if (use_quad) {
2588de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)dstY,
2589de2362d3Smrg			  (float)srcX / pPriv->w,          (float)srcY / pPriv->h,
2590de2362d3Smrg			  (float)srcX + 0.5,               (float)srcY + 0.5);
2591de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
2592de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
2593de2362d3Smrg			  (float)srcX + 0.5,               (float)(srcY + srch) + 0.5);
2594de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
2595de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
2596de2362d3Smrg			  (float)(srcX + srcw) + 0.5,      (float)(srcY + srch) + 0.5);
2597de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
2598de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
2599de2362d3Smrg			  (float)(srcX + srcw) + 0.5,      (float)srcY + 0.5);
2600de2362d3Smrg	    } else {
2601de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)dstY,
2602de2362d3Smrg			  (float)srcX / pPriv->w,          (float)srcY / pPriv->h,
2603de2362d3Smrg			  (float)srcX + 0.5,               (float)srcY + 0.5);
2604de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dstw + dsth),
2605de2362d3Smrg			  (float)srcX / pPriv->w,
2606de2362d3Smrg			  ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h,
2607de2362d3Smrg			  (float)srcX + 0.5,
2608de2362d3Smrg			  (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
2609de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw + dsth),     (float)dstY,
2610de2362d3Smrg			  ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
2611de2362d3Smrg			  (float)srcY / pPriv->h,
2612de2362d3Smrg			  (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
2613de2362d3Smrg			  (float)srcY + 0.5);
2614de2362d3Smrg	    }
2615de2362d3Smrg	} else {
2616de2362d3Smrg	    if (use_quad) {
2617de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)dstY,
2618de2362d3Smrg			  (float)srcX / pPriv->w,          (float)srcY / pPriv->h);
2619de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
2620de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
2621de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
2622de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
2623de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
2624de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
2625de2362d3Smrg	    } else {
2626de2362d3Smrg		/*
2627de2362d3Smrg		 * Render a big, scissored triangle. This means
2628de2362d3Smrg		 * increasing the triangle size and adjusting
2629de2362d3Smrg		 * texture coordinates.
2630de2362d3Smrg		 */
2631de2362d3Smrg		VTX_OUT_4((float)dstX,                 (float)dstY,
2632de2362d3Smrg			  (float)srcX / pPriv->w,      (float)srcY / pPriv->h);
2633de2362d3Smrg		VTX_OUT_4((float)dstX,                 (float)(dstY + dsth + dstw),
2634de2362d3Smrg			  (float)srcX / pPriv->w,
2635de2362d3Smrg			  ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h);
2636de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY,
2637de2362d3Smrg			  ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
2638de2362d3Smrg			  (float)srcY / pPriv->h);
2639de2362d3Smrg	    }
2640de2362d3Smrg	}
2641de2362d3Smrg
2642de2362d3Smrg	/* flushing is pipelined, free/finish is not */
26437821949aSmrg	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
2644de2362d3Smrg
26457821949aSmrg#ifdef ACCEL_CP
2646de2362d3Smrg	ADVANCE_RING();
26477821949aSmrg#else
26487821949aSmrg	FINISH_ACCEL();
26497821949aSmrg#endif /* !ACCEL_CP */
2650de2362d3Smrg
2651de2362d3Smrg	pBox++;
2652de2362d3Smrg    }
2653de2362d3Smrg
26547821949aSmrg    BEGIN_ACCEL(3);
26557821949aSmrg    OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
26567821949aSmrg    OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
26577821949aSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
26587821949aSmrg    FINISH_ACCEL();
2659de2362d3Smrg
2660de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
2661de2362d3Smrg}
2662de2362d3Smrg
2663de2362d3Smrgstatic Bool
26647821949aSmrgFUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
2665de2362d3Smrg{
2666de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2667de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
2668de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
2669de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
26707821949aSmrg    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
2671de2362d3Smrg    uint32_t dst_pitch, dst_format;
26727821949aSmrg    uint32_t txenable, colorpitch, bicubic_offset;
2673de2362d3Smrg    uint32_t output_fmt;
2674de2362d3Smrg    int pixel_shift, out_size = 6;
26757821949aSmrg    ACCEL_PREAMBLE();
2676de2362d3Smrg
26777821949aSmrg#ifdef XF86DRM_MODE
26787821949aSmrg    if (info->cs) {
26797821949aSmrg	int ret;
2680de2362d3Smrg
26817821949aSmrg	radeon_cs_space_reset_bos(info->cs);
26827821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
26837821949aSmrg
26847821949aSmrg	if (pPriv->bicubic_enabled)
26857821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
26867821949aSmrg
26877821949aSmrg	driver_priv = exaGetPixmapDriverPrivate(pPixmap);
26887821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo, 0, RADEON_GEM_DOMAIN_VRAM);
26897821949aSmrg
26907821949aSmrg	ret = radeon_cs_space_check(info->cs);
26917821949aSmrg	if (ret) {
26927821949aSmrg	    ErrorF("Not enough RAM to hw accel xv operation\n");
26937821949aSmrg	    return FALSE;
26947821949aSmrg	}
2695de2362d3Smrg    }
26967821949aSmrg#else
26977821949aSmrg    (void)src_bo;
26987821949aSmrg#endif
2699de2362d3Smrg
2700de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
2701de2362d3Smrg
27027821949aSmrg#ifdef USE_EXA
27037821949aSmrg    if (info->useEXA) {
27047821949aSmrg	dst_pitch = exaGetPixmapPitch(pPixmap);
27057821949aSmrg    } else
27067821949aSmrg#endif
27077821949aSmrg    {
27087821949aSmrg	dst_pitch = pPixmap->devKind;
27097821949aSmrg    }
27107821949aSmrg
27117821949aSmrg#ifdef USE_EXA
27127821949aSmrg    if (info->useEXA) {
27137821949aSmrg	RADEON_SWITCH_TO_3D();
27147821949aSmrg    } else
27157821949aSmrg#endif
27167821949aSmrg    {
27177821949aSmrg	BEGIN_ACCEL(2);
27187821949aSmrg	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
27197821949aSmrg	/* We must wait for 3d to idle, in case source was just written as a dest. */
27207821949aSmrg	OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
27217821949aSmrg		      RADEON_WAIT_HOST_IDLECLEAN |
27227821949aSmrg		      RADEON_WAIT_2D_IDLECLEAN |
27237821949aSmrg		      RADEON_WAIT_3D_IDLECLEAN |
27247821949aSmrg		      RADEON_WAIT_DMA_GUI_IDLE);
27257821949aSmrg	FINISH_ACCEL();
27267821949aSmrg
27277821949aSmrg	if (!info->accel_state->XInited3D)
27287821949aSmrg	    RADEONInit3DEngine(pScrn);
27297821949aSmrg    }
2730de2362d3Smrg
2731de2362d3Smrg    if (pPriv->bicubic_enabled)
2732de2362d3Smrg	pPriv->vtx_count = 6;
2733de2362d3Smrg    else
2734de2362d3Smrg	pPriv->vtx_count = 4;
2735de2362d3Smrg
2736de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
2737de2362d3Smrg    case 16:
2738de2362d3Smrg	if (pPixmap->drawable.depth == 15)
2739de2362d3Smrg	    dst_format = R300_COLORFORMAT_ARGB1555;
2740de2362d3Smrg	else
2741de2362d3Smrg	    dst_format = R300_COLORFORMAT_RGB565;
2742de2362d3Smrg	break;
2743de2362d3Smrg    case 32:
2744de2362d3Smrg	dst_format = R300_COLORFORMAT_ARGB8888;
2745de2362d3Smrg	break;
2746de2362d3Smrg    default:
2747de2362d3Smrg	return FALSE;
2748de2362d3Smrg    }
2749de2362d3Smrg
2750de2362d3Smrg    output_fmt = (R300_OUT_FMT_C4_8 |
2751de2362d3Smrg		  R300_OUT_FMT_C0_SEL_BLUE |
2752de2362d3Smrg		  R300_OUT_FMT_C1_SEL_GREEN |
2753de2362d3Smrg		  R300_OUT_FMT_C2_SEL_RED |
2754de2362d3Smrg		  R300_OUT_FMT_C3_SEL_ALPHA);
2755de2362d3Smrg
2756de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
2757de2362d3Smrg    colorpitch |= dst_format;
2758de2362d3Smrg
2759de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
2760de2362d3Smrg	colorpitch |= R300_COLORTILE;
2761de2362d3Smrg
2762de2362d3Smrg    if (((pPriv->bicubic_state == BICUBIC_OFF)) &&
2763de2362d3Smrg        (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12))
2764de2362d3Smrg	pPriv->is_planar = TRUE;
2765de2362d3Smrg    else
2766de2362d3Smrg	pPriv->is_planar = FALSE;
2767de2362d3Smrg
2768de2362d3Smrg    if (pPriv->is_planar) {
2769de2362d3Smrg	txformat1 = R300_TX_FORMAT_X8;
2770de2362d3Smrg	txpitch = pPriv->src_pitch;
2771de2362d3Smrg    } else {
2772de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
2773de2362d3Smrg	    txformat1 = R300_TX_FORMAT_YVYU422;
2774de2362d3Smrg	else
2775de2362d3Smrg	    txformat1 = R300_TX_FORMAT_VYUY422;
2776de2362d3Smrg
2777de2362d3Smrg	if (pPriv->bicubic_state != BICUBIC_OFF)
2778de2362d3Smrg	    txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
2779de2362d3Smrg
2780de2362d3Smrg	/* pitch is in pixels */
2781de2362d3Smrg	txpitch = pPriv->src_pitch / 2;
2782de2362d3Smrg    }
2783de2362d3Smrg    txpitch -= 1;
2784de2362d3Smrg
2785de2362d3Smrg    txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
2786de2362d3Smrg		 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
2787de2362d3Smrg		 R300_TXPITCH_EN);
2788de2362d3Smrg
2789de2362d3Smrg    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
2790de2362d3Smrg		R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
2791de2362d3Smrg		R300_TX_MAG_FILTER_LINEAR |
2792de2362d3Smrg		R300_TX_MIN_FILTER_LINEAR |
2793de2362d3Smrg		(0 << R300_TX_ID_SHIFT));
2794de2362d3Smrg
2795de2362d3Smrg
2796de2362d3Smrg    if ((pPriv->w - 1) & 0x800)
2797de2362d3Smrg	txpitch |= R500_TXWIDTH_11;
2798de2362d3Smrg
2799de2362d3Smrg    if ((pPriv->h - 1) & 0x800)
2800de2362d3Smrg	txpitch |= R500_TXHEIGHT_11;
2801de2362d3Smrg
2802de2362d3Smrg    if (info->ChipFamily == CHIP_FAMILY_R520) {
2803de2362d3Smrg	unsigned us_width = (pPriv->w - 1) & 0x7ff;
2804de2362d3Smrg	unsigned us_height = (pPriv->h - 1) & 0x7ff;
2805de2362d3Smrg	unsigned us_depth = 0;
2806de2362d3Smrg
2807de2362d3Smrg	if (pPriv->w > 2048) {
2808de2362d3Smrg	    us_width = (0x7ff + us_width) >> 1;
2809de2362d3Smrg	    us_depth |= 0x0d;
2810de2362d3Smrg	}
2811de2362d3Smrg	if (pPriv->h > 2048) {
2812de2362d3Smrg	    us_height = (0x7ff + us_height) >> 1;
2813de2362d3Smrg	    us_depth |= 0x0e;
2814de2362d3Smrg	}
2815de2362d3Smrg	us_format = (us_width << R300_TXWIDTH_SHIFT) |
2816de2362d3Smrg		    (us_height << R300_TXHEIGHT_SHIFT) |
2817de2362d3Smrg		    (us_depth << R300_TXDEPTH_SHIFT);
2818de2362d3Smrg	out_size++;
2819de2362d3Smrg    }
2820de2362d3Smrg
28217821949aSmrg    txoffset = info->cs ? 0 : pPriv->src_offset;
28227821949aSmrg
2823de2362d3Smrg    BEGIN_ACCEL_RELOC(out_size, 1);
28247821949aSmrg    OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
28257821949aSmrg    OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
28267821949aSmrg    OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
28277821949aSmrg    OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
28287821949aSmrg    OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
28297821949aSmrg    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo);
2830de2362d3Smrg    if (info->ChipFamily == CHIP_FAMILY_R520)
28317821949aSmrg	OUT_ACCEL_REG(R500_US_FORMAT0_0, us_format);
28327821949aSmrg    FINISH_ACCEL();
2833de2362d3Smrg
2834de2362d3Smrg    txenable = R300_TEX_0_ENABLE;
2835de2362d3Smrg
2836de2362d3Smrg    if (pPriv->is_planar) {
2837de2362d3Smrg	txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
2838de2362d3Smrg		     (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
2839de2362d3Smrg		     R300_TXPITCH_EN);
2840de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
2841de2362d3Smrg	txpitch -= 1;
2842de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
2843de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
2844de2362d3Smrg		    R300_TX_MIN_FILTER_LINEAR |
2845de2362d3Smrg		    R300_TX_MAG_FILTER_LINEAR);
2846de2362d3Smrg
2847de2362d3Smrg	BEGIN_ACCEL_RELOC(12, 2);
28487821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
28497821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
28507821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
28517821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8);
28527821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
28537821949aSmrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
28547821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
28557821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
28567821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
28577821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8);
28587821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
28597821949aSmrg	OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo);
28607821949aSmrg	FINISH_ACCEL();
2861de2362d3Smrg	txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
2862de2362d3Smrg    }
2863de2362d3Smrg
2864de2362d3Smrg    if (pPriv->bicubic_enabled) {
2865de2362d3Smrg	/* Size is 128x1 */
2866de2362d3Smrg	txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
2867de2362d3Smrg		     (0x0 << R300_TXHEIGHT_SHIFT) |
2868de2362d3Smrg		     R300_TXPITCH_EN);
2869de2362d3Smrg	/* Format is 32-bit floats, 4bpp */
2870de2362d3Smrg	txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
2871de2362d3Smrg	/* Pitch is 127 (128-1) */
2872de2362d3Smrg	txpitch = 0x7f;
2873de2362d3Smrg	/* Tex filter */
2874de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
2875de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
2876de2362d3Smrg		    R300_TX_MIN_FILTER_NEAREST |
2877de2362d3Smrg		    R300_TX_MAG_FILTER_NEAREST |
2878de2362d3Smrg		    (1 << R300_TX_ID_SHIFT));
2879de2362d3Smrg
28807821949aSmrg	if (info->cs)
28817821949aSmrg	    bicubic_offset = 0;
28827821949aSmrg	else
28837821949aSmrg	    bicubic_offset = pPriv->bicubic_src_offset;
28847821949aSmrg
2885de2362d3Smrg	BEGIN_ACCEL_RELOC(6, 1);
28867821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
28877821949aSmrg	OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
28887821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
28897821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
28907821949aSmrg	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
28917821949aSmrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, info->bicubic_bo);
28927821949aSmrg	FINISH_ACCEL();
2893de2362d3Smrg
2894de2362d3Smrg	/* Enable tex 1 */
2895de2362d3Smrg	txenable |= R300_TEX_1_ENABLE;
2896de2362d3Smrg    }
2897de2362d3Smrg
2898de2362d3Smrg    /* setup the VAP */
2899de2362d3Smrg    if (info->accel_state->has_tcl) {
2900de2362d3Smrg	if (pPriv->bicubic_enabled)
29017821949aSmrg	    BEGIN_ACCEL(7);
2902de2362d3Smrg	else
29037821949aSmrg	    BEGIN_ACCEL(6);
2904de2362d3Smrg    } else {
2905de2362d3Smrg	if (pPriv->bicubic_enabled)
29067821949aSmrg	    BEGIN_ACCEL(5);
2907de2362d3Smrg	else
29087821949aSmrg	    BEGIN_ACCEL(4);
2909de2362d3Smrg    }
2910de2362d3Smrg
2911de2362d3Smrg    /* These registers define the number, type, and location of data submitted
2912de2362d3Smrg     * to the PVS unit of GA input (when PVS is disabled)
2913de2362d3Smrg     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
2914de2362d3Smrg     * enabled.  This memory provides the imputs to the vertex shader program
2915de2362d3Smrg     * and ordering is not important.  When PVS/TCL is disabled, this field maps
2916de2362d3Smrg     * directly to the GA input memory and the order is signifigant.  In
2917de2362d3Smrg     * PVS_BYPASS mode the order is as follows:
2918de2362d3Smrg     * Position
2919de2362d3Smrg     * Point Size
2920de2362d3Smrg     * Color 0-3
2921de2362d3Smrg     * Textures 0-7
2922de2362d3Smrg     * Fog
2923de2362d3Smrg     */
2924de2362d3Smrg    if (pPriv->bicubic_enabled) {
29257821949aSmrg	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
2926de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
2927de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
2928de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
2929de2362d3Smrg		       R300_SIGNED_0 |
2930de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
2931de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
2932de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
2933de2362d3Smrg		       R300_SIGNED_1));
29347821949aSmrg	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
2935de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
2936de2362d3Smrg		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
2937de2362d3Smrg		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
2938de2362d3Smrg		       R300_LAST_VEC_2 |
2939de2362d3Smrg		       R300_SIGNED_2));
2940de2362d3Smrg    } else {
29417821949aSmrg	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
2942de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
2943de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
2944de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
2945de2362d3Smrg		       R300_SIGNED_0 |
2946de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
2947de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
2948de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
2949de2362d3Smrg		       R300_LAST_VEC_1 |
2950de2362d3Smrg		       R300_SIGNED_1));
2951de2362d3Smrg    }
2952de2362d3Smrg
2953de2362d3Smrg    /* load the vertex shader
2954de2362d3Smrg     * We pre-load vertex programs in RADEONInit3DEngine():
2955de2362d3Smrg     * - exa
2956de2362d3Smrg     * - Xv
2957de2362d3Smrg     * - Xv bicubic
2958de2362d3Smrg     * Here we select the offset of the vertex program we want to use
2959de2362d3Smrg     */
2960de2362d3Smrg    if (info->accel_state->has_tcl) {
2961de2362d3Smrg	if (pPriv->bicubic_enabled) {
29627821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
2963de2362d3Smrg			  ((11 << R300_PVS_FIRST_INST_SHIFT) |
2964de2362d3Smrg			   (13 << R300_PVS_XYZW_VALID_INST_SHIFT) |
2965de2362d3Smrg			   (13 << R300_PVS_LAST_INST_SHIFT)));
29667821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
2967de2362d3Smrg			  (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
2968de2362d3Smrg	} else {
29697821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
2970de2362d3Smrg			  ((9 << R300_PVS_FIRST_INST_SHIFT) |
2971de2362d3Smrg			   (10 << R300_PVS_XYZW_VALID_INST_SHIFT) |
2972de2362d3Smrg			   (10 << R300_PVS_LAST_INST_SHIFT)));
29737821949aSmrg	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
2974de2362d3Smrg			  (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
2975de2362d3Smrg	}
2976de2362d3Smrg    }
2977de2362d3Smrg
2978de2362d3Smrg    /* Position and one set of 2 texture coordinates */
29797821949aSmrg    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
2980de2362d3Smrg    if (pPriv->bicubic_enabled)
29817821949aSmrg	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
2982de2362d3Smrg					       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
2983de2362d3Smrg    else
29847821949aSmrg	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
2985de2362d3Smrg
29867821949aSmrg    OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
29877821949aSmrg    FINISH_ACCEL();
2988de2362d3Smrg
2989de2362d3Smrg    /* setup pixel shader */
2990de2362d3Smrg    if (pPriv->bicubic_state != BICUBIC_OFF) {
2991de2362d3Smrg	if (pPriv->bicubic_enabled) {
29927821949aSmrg	    BEGIN_ACCEL(7);
2993de2362d3Smrg
2994de2362d3Smrg	    /* 4 components: 2 for tex0 and 2 for tex1 */
29957821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
2996de2362d3Smrg			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
2997de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
2998de2362d3Smrg
2999de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
30007821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
3001de2362d3Smrg
3002de2362d3Smrg	    /* Pixel stack frame size. */
30037821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
3004de2362d3Smrg
3005de2362d3Smrg	    /* FP length. */
30067821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3007de2362d3Smrg					      R500_US_CODE_END_ADDR(13)));
30087821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3009de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(13)));
3010de2362d3Smrg
3011de2362d3Smrg	    /* Prepare for FP emission. */
30127821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
30137821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
30147821949aSmrg	    FINISH_ACCEL();
3015de2362d3Smrg
30167821949aSmrg	    BEGIN_ACCEL(89);
3017de2362d3Smrg	    /* Pixel shader.
3018de2362d3Smrg	     * I've gone ahead and annotated each instruction, since this
3019de2362d3Smrg	     * thing is MASSIVE. :3
3020de2362d3Smrg	     * Note: In order to avoid buggies with temps and multiple
3021de2362d3Smrg	     * inputs, all temps are offset by 2. temp0 -> register2. */
3022de2362d3Smrg
3023de2362d3Smrg	    /* TEX temp2, input1.xxxx, tex1, 1D */
30247821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3025de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3026de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3027de2362d3Smrg						   R500_INST_RGB_WMASK_B));
30287821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
3029de2362d3Smrg						   R500_TEX_INST_LD |
3030de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
30317821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
3032de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3033de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_R |
3034de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_R |
3035de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_R |
3036de2362d3Smrg						   R500_TEX_DST_ADDR(2) |
3037de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3038de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3039de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3040de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
30417821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
30427821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
30437821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3044de2362d3Smrg
3045de2362d3Smrg	    /* TEX temp5, input1.yyyy, tex1, 1D */
30467821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3047de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3048de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3049de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3050de2362d3Smrg						   R500_INST_RGB_WMASK_B));
30517821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
3052de2362d3Smrg						   R500_TEX_INST_LD |
3053de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3054de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
30557821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
3056de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_G |
3057de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3058de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_G |
3059de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_G |
3060de2362d3Smrg						   R500_TEX_DST_ADDR(5) |
3061de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3062de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3063de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3064de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
30657821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
30667821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
30677821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3068de2362d3Smrg
3069de2362d3Smrg	    /* MUL temp4, const0.x0x0, temp2.yyxx */
30707821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3071de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3072de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3073de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3074de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3075de2362d3Smrg						   R500_INST_ALPHA_WMASK));
30767821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3077de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3078de2362d3Smrg						   R500_RGB_ADDR1(2)));
30797821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3080de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3081de2362d3Smrg						   R500_ALPHA_ADDR1(2)));
30827821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3083de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3084de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_0 |
3085de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_R |
3086de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3087de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_G |
3088de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3089de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_R));
30907821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
3091de2362d3Smrg						   R500_ALPHA_OP_MAD |
3092de2362d3Smrg						   R500_ALPHA_SEL_A_SRC0 |
3093de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3094de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
3095de2362d3Smrg						   R500_ALPHA_SWIZ_B_R));
30967821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
3097de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3098de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_0 |
3099de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_0 |
3100de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_0 |
3101de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3102de2362d3Smrg
3103de2362d3Smrg	    /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
31047821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3105de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3106de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3107de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3108de2362d3Smrg						   R500_INST_ALPHA_WMASK));
31097821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3110de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3111de2362d3Smrg						   R500_RGB_ADDR1(5) |
3112de2362d3Smrg						   R500_RGB_ADDR2(4)));
31137821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3114de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3115de2362d3Smrg						   R500_ALPHA_ADDR1(5) |
3116de2362d3Smrg						   R500_ALPHA_ADDR2(4)));
31177821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3118de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_0 |
3119de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3120de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_0 |
3121de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3122de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3123de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_R |
3124de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_R));
31257821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
3126de2362d3Smrg						   R500_ALPHA_OP_MAD |
3127de2362d3Smrg						   R500_ALPHA_SEL_A_SRC0 |
3128de2362d3Smrg						   R500_ALPHA_SWIZ_A_G |
3129de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
3130de2362d3Smrg						   R500_ALPHA_SWIZ_B_R));
31317821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
3132de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3133de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3134de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3135de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3136de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3137de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3138de2362d3Smrg
3139de2362d3Smrg	    /* ADD temp3, temp3, input0.xyxy */
31407821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3141de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3142de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3143de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3144de2362d3Smrg						   R500_INST_ALPHA_WMASK));
31457821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
3146de2362d3Smrg						   R500_RGB_ADDR2(0)));
31477821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
3148de2362d3Smrg						   R500_ALPHA_ADDR2(0)));
31497821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
3150de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_1 |
3151de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_1 |
3152de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3153de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3154de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3155de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
31567821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
3157de2362d3Smrg						   R500_ALPHA_OP_MAD |
3158de2362d3Smrg						   R500_ALPHA_SWIZ_A_1 |
3159de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
3160de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
31617821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
3162de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3163de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3164de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3165de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3166de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_R |
3167de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_G));
3168de2362d3Smrg
3169de2362d3Smrg	    /* TEX temp1, temp3.zwxy, tex0, 2D */
31707821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3171de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3172de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3173de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3174de2362d3Smrg						   R500_INST_ALPHA_WMASK));
31757821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3176de2362d3Smrg						   R500_TEX_INST_LD |
3177de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
31787821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
3179de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_B |
3180de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_A |
3181de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_R |
3182de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_G |
3183de2362d3Smrg						   R500_TEX_DST_ADDR(1) |
3184de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3185de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3186de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3187de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
31887821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
31897821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
31907821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3191de2362d3Smrg
3192de2362d3Smrg	    /* TEX temp3, temp3.xyzw, tex0, 2D */
31937821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3194de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3195de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3196de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3197de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3198de2362d3Smrg						   R500_INST_ALPHA_WMASK));
31997821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3200de2362d3Smrg						   R500_TEX_INST_LD |
3201de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3202de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
32037821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
3204de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3205de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3206de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_B |
3207de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_A |
3208de2362d3Smrg						   R500_TEX_DST_ADDR(3) |
3209de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3210de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3211de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3212de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
32137821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
32147821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
32157821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3216de2362d3Smrg
3217de2362d3Smrg	    /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
32187821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3219de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3220de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3221de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3222de2362d3Smrg						   R500_INST_ALPHA_WMASK));
32237821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3224de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3225de2362d3Smrg						   R500_RGB_ADDR1(5) |
3226de2362d3Smrg						   R500_RGB_ADDR2(4)));
32277821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3228de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3229de2362d3Smrg						   R500_ALPHA_ADDR1(5) |
3230de2362d3Smrg						   R500_ALPHA_ADDR2(4)));
32317821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3232de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_0 |
3233de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3234de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_0 |
3235de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3236de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_G |
3237de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3238de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G));
32397821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
3240de2362d3Smrg						   R500_ALPHA_OP_MAD |
3241de2362d3Smrg						   R500_ALPHA_SEL_A_SRC0 |
3242de2362d3Smrg						   R500_ALPHA_SWIZ_A_G |
3243de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
3244de2362d3Smrg						   R500_ALPHA_SWIZ_B_G));
32457821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
3246de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3247de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3248de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3249de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3250de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3251de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3252de2362d3Smrg
3253de2362d3Smrg	    /* ADD temp0, temp4, input0.xyxy */
32547821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3255de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3256de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3257de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3258de2362d3Smrg						   R500_INST_ALPHA_WMASK));
32597821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
3260de2362d3Smrg						   R500_RGB_ADDR2(0)));
32617821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
3262de2362d3Smrg						   R500_ALPHA_ADDR2(0)));
32637821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
3264de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_1 |
3265de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_1 |
3266de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3267de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3268de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3269de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
32707821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
3271de2362d3Smrg						   R500_ALPHA_OP_MAD |
3272de2362d3Smrg						   R500_ALPHA_SWIZ_A_1 |
3273de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
3274de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
32757821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
3276de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3277de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3278de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3279de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3280de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_R |
3281de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_G));
3282de2362d3Smrg
3283de2362d3Smrg	    /* TEX temp4, temp0.zwzw, tex0, 2D */
32847821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3285de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3286de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3287de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3288de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3289de2362d3Smrg						   R500_INST_ALPHA_WMASK));
32907821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3291de2362d3Smrg						   R500_TEX_INST_LD |
3292de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
32937821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3294de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_B |
3295de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_A |
3296de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_B |
3297de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_A |
3298de2362d3Smrg						   R500_TEX_DST_ADDR(4) |
3299de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3300de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3301de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3302de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
33037821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
33047821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
33057821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3306de2362d3Smrg
3307de2362d3Smrg	    /* TEX temp0, temp0.xyzw, tex0, 2D */
33087821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3309de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3310de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3311de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3312de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3313de2362d3Smrg						   R500_INST_ALPHA_WMASK));
33147821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3315de2362d3Smrg						   R500_TEX_INST_LD |
3316de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3317de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
33187821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3319de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3320de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3321de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_B |
3322de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_A |
3323de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3324de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3325de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3326de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3327de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
33287821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
33297821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
33307821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3331de2362d3Smrg
3332de2362d3Smrg	    /* LRP temp3, temp2.zzzz, temp1, temp3 ->
3333de2362d3Smrg	     * - PRESUB temps, temp1 - temp3
3334de2362d3Smrg	     * - MAD temp2.zzzz, temps, temp3 */
33357821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3336de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3337de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3338de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3339de2362d3Smrg						   R500_INST_ALPHA_WMASK));
33407821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
3341de2362d3Smrg						   R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
3342de2362d3Smrg						   R500_RGB_ADDR1(1) |
3343de2362d3Smrg						   R500_RGB_ADDR2(2)));
33447821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
3345de2362d3Smrg						   R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
3346de2362d3Smrg						   R500_ALPHA_ADDR1(1) |
3347de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
33487821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
3349de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_B |
3350de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_B |
3351de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3352de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRCP |
3353de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3354de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3355de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
33567821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
3357de2362d3Smrg						   R500_ALPHA_OP_MAD |
3358de2362d3Smrg						   R500_ALPHA_SEL_A_SRC2 |
3359de2362d3Smrg						   R500_ALPHA_SWIZ_A_B |
3360de2362d3Smrg						   R500_ALPHA_SEL_B_SRCP |
3361de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
33627821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
3363de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3364de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3365de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3366de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3367de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3368de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3369de2362d3Smrg
3370de2362d3Smrg	    /* LRP temp0, temp2.zzzz, temp4, temp0 ->
3371de2362d3Smrg	     * - PRESUB temps, temp4 - temp1
3372de2362d3Smrg	     * - MAD temp2.zzzz, temps, temp0 */
33737821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3374de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3375de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3376de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3377de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3378de2362d3Smrg						   R500_INST_ALPHA_WMASK));
33797821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3380de2362d3Smrg						   R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
3381de2362d3Smrg						   R500_RGB_ADDR1(4) |
3382de2362d3Smrg						   R500_RGB_ADDR2(2)));
33837821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3384de2362d3Smrg						   R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
3385de2362d3Smrg						   R500_ALPHA_ADDR1(4) |
3386de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
33877821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
3388de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_B |
3389de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_B |
3390de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3391de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRCP |
3392de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3393de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3394de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
33957821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
3396de2362d3Smrg						   R500_ALPHA_OP_MAD |
3397de2362d3Smrg						   R500_ALPHA_SEL_A_SRC2 |
3398de2362d3Smrg						   R500_ALPHA_SWIZ_A_B |
3399de2362d3Smrg						   R500_ALPHA_SEL_B_SRCP |
3400de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
34017821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
3402de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3403de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3404de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3405de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3406de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3407de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3408de2362d3Smrg
3409de2362d3Smrg	    /* LRP output, temp5.zzzz, temp3, temp0 ->
3410de2362d3Smrg	     * - PRESUB temps, temp3 - temp0
3411de2362d3Smrg	     * - MAD temp5.zzzz, temps, temp0 */
34127821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3413de2362d3Smrg						   R500_INST_LAST |
3414de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3415de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3416de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3417de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3418de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3419de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3420de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3421de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3422de2362d3Smrg						   R500_INST_ALPHA_OMASK));
34237821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3424de2362d3Smrg						   R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
3425de2362d3Smrg						   R500_RGB_ADDR1(3) |
3426de2362d3Smrg						   R500_RGB_ADDR2(5)));
34277821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3428de2362d3Smrg						   R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
3429de2362d3Smrg						   R500_ALPHA_ADDR1(3) |
3430de2362d3Smrg						   R500_ALPHA_ADDR2(5)));
34317821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
3432de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_B |
3433de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_B |
3434de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3435de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRCP |
3436de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3437de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3438de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
34397821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
3440de2362d3Smrg						   R500_ALPHA_OP_MAD |
3441de2362d3Smrg						   R500_ALPHA_SEL_A_SRC2 |
3442de2362d3Smrg						   R500_ALPHA_SWIZ_A_B |
3443de2362d3Smrg						   R500_ALPHA_SEL_B_SRCP |
3444de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
34457821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
3446de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3447de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3448de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3449de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3450de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3451de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3452de2362d3Smrg
3453de2362d3Smrg	    /* Shader constants. */
34547821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
3455de2362d3Smrg
3456de2362d3Smrg	    /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
3457de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
3458de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
3459de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
3460de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
3461de2362d3Smrg
34627821949aSmrg	    FINISH_ACCEL();
3463de2362d3Smrg	} else {
34647821949aSmrg	    BEGIN_ACCEL(19);
3465de2362d3Smrg	    /* 2 components: 2 for tex0 */
34667821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
3467de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
3468de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
3469de2362d3Smrg
3470de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
34717821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
3472de2362d3Smrg
3473de2362d3Smrg	    /* Pixel stack frame size. */
34747821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
3475de2362d3Smrg
3476de2362d3Smrg	    /* FP length. */
34777821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3478de2362d3Smrg					      R500_US_CODE_END_ADDR(1)));
34797821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3480de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(1)));
3481de2362d3Smrg
3482de2362d3Smrg	    /* Prepare for FP emission. */
34837821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
34847821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
3485de2362d3Smrg
3486de2362d3Smrg	    /* tex inst */
34877821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3488de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3489de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3490de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3491de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3492de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3493de2362d3Smrg						   R500_INST_RGB_CLAMP |
3494de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
34957821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3496de2362d3Smrg						   R500_TEX_INST_LD |
3497de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3498de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
34997821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3500de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3501de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3502de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3503de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3504de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3505de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3506de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
35077821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3508de2362d3Smrg						   R500_DX_S_SWIZ_R |
3509de2362d3Smrg						   R500_DX_T_SWIZ_R |
3510de2362d3Smrg						   R500_DX_R_SWIZ_R |
3511de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3512de2362d3Smrg						   R500_DY_ADDR(0) |
3513de2362d3Smrg						   R500_DY_S_SWIZ_R |
3514de2362d3Smrg						   R500_DY_T_SWIZ_R |
3515de2362d3Smrg						   R500_DY_R_SWIZ_R |
3516de2362d3Smrg						   R500_DY_Q_SWIZ_R));
35177821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
35187821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3519de2362d3Smrg
3520de2362d3Smrg	    /* ALU inst */
35217821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3522de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3523de2362d3Smrg						   R500_INST_LAST |
3524de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3525de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3526de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3527de2362d3Smrg						   R500_INST_ALPHA_OMASK |
3528de2362d3Smrg						   R500_INST_RGB_CLAMP |
3529de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
35307821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3531de2362d3Smrg						   R500_RGB_ADDR1(0) |
3532de2362d3Smrg						   R500_RGB_ADDR1_CONST |
3533de2362d3Smrg						   R500_RGB_ADDR2(0) |
3534de2362d3Smrg						   R500_RGB_ADDR2_CONST));
35357821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3536de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3537de2362d3Smrg						   R500_ALPHA_ADDR1_CONST |
3538de2362d3Smrg						   R500_ALPHA_ADDR2(0) |
3539de2362d3Smrg						   R500_ALPHA_ADDR2_CONST));
35407821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3541de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3542de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3543de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3544de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC0 |
3545de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_1 |
3546de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_1 |
3547de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_1));
35487821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3549de2362d3Smrg						   R500_ALPHA_SWIZ_A_A |
3550de2362d3Smrg						   R500_ALPHA_SWIZ_B_1));
35517821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3552de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_0 |
3553de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_0 |
3554de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_0 |
3555de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
35567821949aSmrg	    FINISH_ACCEL();
3557de2362d3Smrg	}
3558de2362d3Smrg    } else {
3559de2362d3Smrg	/*
3560de2362d3Smrg	 * y' = y - .0625
3561de2362d3Smrg	 * u' = u - .5
3562de2362d3Smrg	 * v' = v - .5;
3563de2362d3Smrg	 *
3564de2362d3Smrg	 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
3565de2362d3Smrg	 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
3566de2362d3Smrg	 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
3567de2362d3Smrg	 *
3568de2362d3Smrg	 * DP3 might look like the straightforward solution
3569de2362d3Smrg	 * but we'd need to move the texture yuv values in
3570de2362d3Smrg	 * the same reg for this to work. Therefore use MADs.
3571de2362d3Smrg	 * Brightness just adds to the off constant.
3572de2362d3Smrg	 * Contrast is multiplication of luminance.
3573de2362d3Smrg	 * Saturation and hue change the u and v coeffs.
3574de2362d3Smrg	 * Default values (before adjustments - depend on colorspace):
3575de2362d3Smrg	 * yco = 1.1643
3576de2362d3Smrg	 * uco = 0, -0.39173, 2.017
3577de2362d3Smrg	 * vco = 1.5958, -0.8129, 0
3578de2362d3Smrg	 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
3579de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
3580de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
3581de2362d3Smrg	 *
3582de2362d3Smrg	 * temp = MAD(yco, yuv.yyyy, off)
3583de2362d3Smrg	 * temp = MAD(uco, yuv.uuuu, temp)
3584de2362d3Smrg	 * result = MAD(vco, yuv.vvvv, temp)
3585de2362d3Smrg	 */
3586de2362d3Smrg	/* TODO: don't recalc consts always */
3587de2362d3Smrg	const float Loff = -0.0627;
3588de2362d3Smrg	const float Coff = -0.502;
3589de2362d3Smrg	float uvcosf, uvsinf;
3590de2362d3Smrg	float yco;
3591de2362d3Smrg	float uco[3], vco[3], off[3];
3592de2362d3Smrg	float bright, cont, gamma;
3593de2362d3Smrg	int ref = pPriv->transform_index;
35947821949aSmrg	Bool needgamma = FALSE;
3595de2362d3Smrg
3596de2362d3Smrg	cont = RTFContrast(pPriv->contrast);
3597de2362d3Smrg	bright = RTFBrightness(pPriv->brightness);
3598de2362d3Smrg	gamma = (float)pPriv->gamma / 1000.0;
3599de2362d3Smrg	uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
3600de2362d3Smrg	uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
3601de2362d3Smrg	/* overlay video also does pre-gamma contrast/sat adjust, should we? */
3602de2362d3Smrg
3603de2362d3Smrg	yco = trans[ref].RefLuma * cont;
3604de2362d3Smrg	uco[0] = -trans[ref].RefRCr * uvsinf;
3605de2362d3Smrg	uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
3606de2362d3Smrg	uco[2] = trans[ref].RefBCb * uvcosf;
3607de2362d3Smrg	vco[0] = trans[ref].RefRCr * uvcosf;
3608de2362d3Smrg	vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
3609de2362d3Smrg	vco[2] = trans[ref].RefBCb * uvsinf;
3610de2362d3Smrg	off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
3611de2362d3Smrg	off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
3612de2362d3Smrg	off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
3613de2362d3Smrg
3614de2362d3Smrg	//XXX gamma
3615de2362d3Smrg
36167821949aSmrg	if (gamma != 1.0) {
36177821949aSmrg	    needgamma = TRUE;
36187821949aSmrg	    /* note: gamma correction is out = in ^ gamma;
36197821949aSmrg	       gpu can only do LG2/EX2 therefore we transform into
36207821949aSmrg	       in ^ gamma = 2 ^ (log2(in) * gamma).
36217821949aSmrg	       Lots of scalar ops, unfortunately (better solution?) -
36227821949aSmrg	       without gamma that's 3 inst, with gamma it's 10...
36237821949aSmrg	       could use different gamma factors per channel,
36247821949aSmrg	       if that's of any use. */
36257821949aSmrg	}
36267821949aSmrg
3627de2362d3Smrg	if (pPriv->is_planar) {
36287821949aSmrg	    BEGIN_ACCEL(56);
3629de2362d3Smrg	    /* 2 components: 2 for tex0 */
36307821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
3631de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
3632de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
3633de2362d3Smrg
3634de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
36357821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
3636de2362d3Smrg
3637de2362d3Smrg	    /* Pixel stack frame size. */
36387821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
3639de2362d3Smrg
3640de2362d3Smrg	    /* FP length. */
36417821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3642de2362d3Smrg					      R500_US_CODE_END_ADDR(5)));
36437821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3644de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(5)));
3645de2362d3Smrg
3646de2362d3Smrg	    /* Prepare for FP emission. */
36477821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
36487821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
3649de2362d3Smrg
3650de2362d3Smrg	    /* tex inst */
36517821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3652de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3653de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3654de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3655de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3656de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3657de2362d3Smrg						   R500_INST_RGB_CLAMP |
3658de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
36597821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3660de2362d3Smrg						   R500_TEX_INST_LD |
3661de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
36627821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3663de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3664de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3665de2362d3Smrg						   R500_TEX_DST_ADDR(2) |
3666de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3667de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3668de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3669de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
36707821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3671de2362d3Smrg						   R500_DX_S_SWIZ_R |
3672de2362d3Smrg						   R500_DX_T_SWIZ_R |
3673de2362d3Smrg						   R500_DX_R_SWIZ_R |
3674de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3675de2362d3Smrg						   R500_DY_ADDR(0) |
3676de2362d3Smrg						   R500_DY_S_SWIZ_R |
3677de2362d3Smrg						   R500_DY_T_SWIZ_R |
3678de2362d3Smrg						   R500_DY_R_SWIZ_R |
3679de2362d3Smrg						   R500_DY_Q_SWIZ_R));
36807821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
36817821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3682de2362d3Smrg
3683de2362d3Smrg	    /* tex inst */
36847821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3685de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3686de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3687de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3688de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3689de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3690de2362d3Smrg						   R500_INST_RGB_CLAMP |
3691de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
36927821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
3693de2362d3Smrg						   R500_TEX_INST_LD |
3694de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
36957821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3696de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3697de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3698de2362d3Smrg						   R500_TEX_DST_ADDR(1) |
3699de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3700de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3701de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3702de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
37037821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3704de2362d3Smrg						   R500_DX_S_SWIZ_R |
3705de2362d3Smrg						   R500_DX_T_SWIZ_R |
3706de2362d3Smrg						   R500_DX_R_SWIZ_R |
3707de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3708de2362d3Smrg						   R500_DY_ADDR(0) |
3709de2362d3Smrg						   R500_DY_S_SWIZ_R |
3710de2362d3Smrg						   R500_DY_T_SWIZ_R |
3711de2362d3Smrg						   R500_DY_R_SWIZ_R |
3712de2362d3Smrg						   R500_DY_Q_SWIZ_R));
37137821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
37147821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3715de2362d3Smrg
3716de2362d3Smrg	    /* tex inst */
37177821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3718de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3719de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3720de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3721de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3722de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3723de2362d3Smrg						   R500_INST_RGB_CLAMP |
3724de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
37257821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) |
3726de2362d3Smrg						   R500_TEX_INST_LD |
3727de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3728de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
37297821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3730de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3731de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3732de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3733de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3734de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3735de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3736de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
37377821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3738de2362d3Smrg						   R500_DX_S_SWIZ_R |
3739de2362d3Smrg						   R500_DX_T_SWIZ_R |
3740de2362d3Smrg						   R500_DX_R_SWIZ_R |
3741de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3742de2362d3Smrg						   R500_DY_ADDR(0) |
3743de2362d3Smrg						   R500_DY_S_SWIZ_R |
3744de2362d3Smrg						   R500_DY_T_SWIZ_R |
3745de2362d3Smrg						   R500_DY_R_SWIZ_R |
3746de2362d3Smrg						   R500_DY_Q_SWIZ_R));
37477821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
37487821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3749de2362d3Smrg
3750de2362d3Smrg	    /* ALU inst */
3751de2362d3Smrg	    /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
37527821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3753de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3754de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3755de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3756de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3757de2362d3Smrg						   R500_INST_ALPHA_WMASK));
37587821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3759de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3760de2362d3Smrg						   R500_RGB_ADDR1(2) |
3761de2362d3Smrg						   R500_RGB_ADDR2(0) |
3762de2362d3Smrg						   R500_RGB_ADDR2_CONST));
37637821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3764de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3765de2362d3Smrg						   R500_ALPHA_ADDR1(2) |
3766de2362d3Smrg						   R500_ALPHA_ADDR2(0) |
3767de2362d3Smrg						   R500_ALPHA_ADDR2_CONST));
37687821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3769de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_A |
3770de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_A |
3771de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_A |
3772de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3773de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3774de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3775de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
37767821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3777de2362d3Smrg						   R500_ALPHA_ADDRD(2) |
3778de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3779de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
37807821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3781de2362d3Smrg						   R500_ALU_RGBA_ADDRD(2) |
3782de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3783de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3784de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3785de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3786de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3787de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3788de2362d3Smrg
3789de2362d3Smrg	    /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
37907821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3791de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3792de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3793de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3794de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3795de2362d3Smrg						   R500_INST_ALPHA_WMASK));
37967821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
3797de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3798de2362d3Smrg						   R500_RGB_ADDR1(1) |
3799de2362d3Smrg						   R500_RGB_ADDR2(2)));
38007821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
3801de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3802de2362d3Smrg						   R500_ALPHA_ADDR1(1) |
3803de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
38047821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3805de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3806de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3807de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3808de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3809de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3810de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3811de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
38127821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3813de2362d3Smrg						   R500_ALPHA_ADDRD(2) |
3814de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3815de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
38167821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3817de2362d3Smrg						   R500_ALU_RGBA_ADDRD(2) |
3818de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3819de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3820de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3821de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3822de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3823de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3824de2362d3Smrg
3825de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
38267821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3827de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3828de2362d3Smrg						   R500_INST_LAST |
3829de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3830de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3831de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3832de2362d3Smrg						   R500_INST_ALPHA_OMASK |
3833de2362d3Smrg						   R500_INST_RGB_CLAMP |
3834de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
38357821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
3836de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3837de2362d3Smrg						   R500_RGB_ADDR1(0) |
3838de2362d3Smrg						   R500_RGB_ADDR2(2)));
38397821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) |
3840de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3841de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3842de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
38437821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3844de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3845de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3846de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3847de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3848de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3849de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3850de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
38517821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3852de2362d3Smrg						   R500_ALPHA_ADDRD(0) |
3853de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3854de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
38557821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3856de2362d3Smrg						   R500_ALU_RGBA_ADDRD(0) |
3857de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3858de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3859de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3860de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3861de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3862de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_1));
3863de2362d3Smrg
3864de2362d3Smrg	} else {
38657821949aSmrg	    BEGIN_ACCEL(44);
3866de2362d3Smrg	    /* 2 components: 2 for tex0/1/2 */
38677821949aSmrg	    OUT_ACCEL_REG(R300_RS_COUNT,
3868de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
3869de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
3870de2362d3Smrg
3871de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
38727821949aSmrg	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
3873de2362d3Smrg
3874de2362d3Smrg	    /* Pixel stack frame size. */
38757821949aSmrg	    OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
3876de2362d3Smrg
3877de2362d3Smrg	    /* FP length. */
38787821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3879de2362d3Smrg					      R500_US_CODE_END_ADDR(3)));
38807821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3881de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(3)));
3882de2362d3Smrg
3883de2362d3Smrg	    /* Prepare for FP emission. */
38847821949aSmrg	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
38857821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
3886de2362d3Smrg
3887de2362d3Smrg	    /* tex inst */
38887821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3889de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3890de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3891de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3892de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3893de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3894de2362d3Smrg						   R500_INST_RGB_CLAMP |
3895de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
38967821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3897de2362d3Smrg						   R500_TEX_INST_LD |
3898de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3899de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
39007821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3901de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3902de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3903de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3904de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3905de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3906de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3907de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
39087821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3909de2362d3Smrg						   R500_DX_S_SWIZ_R |
3910de2362d3Smrg						   R500_DX_T_SWIZ_R |
3911de2362d3Smrg						   R500_DX_R_SWIZ_R |
3912de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3913de2362d3Smrg						   R500_DY_ADDR(0) |
3914de2362d3Smrg						   R500_DY_S_SWIZ_R |
3915de2362d3Smrg						   R500_DY_T_SWIZ_R |
3916de2362d3Smrg						   R500_DY_R_SWIZ_R |
3917de2362d3Smrg						   R500_DY_Q_SWIZ_R));
39187821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
39197821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3920de2362d3Smrg
3921de2362d3Smrg	    /* ALU inst */
3922de2362d3Smrg	    /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
39237821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3924de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3925de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3926de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3927de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3928de2362d3Smrg						   R500_INST_ALPHA_WMASK));
39297821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3930de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3931de2362d3Smrg						   R500_RGB_ADDR1(0) |
3932de2362d3Smrg						   R500_RGB_ADDR2(0) |
3933de2362d3Smrg						   R500_RGB_ADDR2_CONST));
39347821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3935de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3936de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3937de2362d3Smrg						   R500_ALPHA_ADDR2(0) |
3938de2362d3Smrg						   R500_ALPHA_ADDR2_CONST));
39397821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3940de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_A |
3941de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_A |
3942de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_A |
3943de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3944de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_G |
3945de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3946de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G));
39477821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3948de2362d3Smrg						   R500_ALPHA_ADDRD(1) |
3949de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3950de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
39517821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3952de2362d3Smrg						   R500_ALU_RGBA_ADDRD(1) |
3953de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3954de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3955de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3956de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3957de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3958de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3959de2362d3Smrg
3960de2362d3Smrg	    /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
39617821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3962de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3963de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3964de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3965de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3966de2362d3Smrg						   R500_INST_ALPHA_WMASK));
39677821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
3968de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3969de2362d3Smrg						   R500_RGB_ADDR1(0) |
3970de2362d3Smrg						   R500_RGB_ADDR2(1)));
39717821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
3972de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3973de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3974de2362d3Smrg						   R500_ALPHA_ADDR2(1)));
39757821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3976de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3977de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3978de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3979de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3980de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_B |
3981de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B |
3982de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
39837821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3984de2362d3Smrg						   R500_ALPHA_ADDRD(1) |
3985de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3986de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
39877821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3988de2362d3Smrg						   R500_ALU_RGBA_ADDRD(1) |
3989de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3990de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3991de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3992de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3993de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3994de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3995de2362d3Smrg
3996de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
39977821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3998de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3999de2362d3Smrg						   R500_INST_LAST |
4000de2362d3Smrg						   R500_INST_RGB_OMASK_R |
4001de2362d3Smrg						   R500_INST_RGB_OMASK_G |
4002de2362d3Smrg						   R500_INST_RGB_OMASK_B |
4003de2362d3Smrg						   R500_INST_ALPHA_OMASK |
4004de2362d3Smrg						   R500_INST_RGB_CLAMP |
4005de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
40067821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
4007de2362d3Smrg						   R500_RGB_ADDR0_CONST |
4008de2362d3Smrg						   R500_RGB_ADDR1(0) |
4009de2362d3Smrg						   R500_RGB_ADDR2(1)));
40107821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
4011de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
4012de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
4013de2362d3Smrg						   R500_ALPHA_ADDR2(1)));
40147821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
4015de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
4016de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
4017de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
4018de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
4019de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
4020de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_R |
4021de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_R));
40227821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
4023de2362d3Smrg						   R500_ALPHA_ADDRD(1) |
4024de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
4025de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
40267821949aSmrg	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
4027de2362d3Smrg						   R500_ALU_RGBA_ADDRD(1) |
4028de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
4029de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
4030de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
4031de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
4032de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
4033de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_1));
4034de2362d3Smrg	}
4035de2362d3Smrg
4036de2362d3Smrg	/* Shader constants. */
40377821949aSmrg	OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
4038de2362d3Smrg
4039de2362d3Smrg	/* constant 0: off, yco */
4040de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]);
4041de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]);
4042de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]);
4043de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco);
4044de2362d3Smrg	/* constant 1: uco */
4045de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]);
4046de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]);
4047de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]);
4048de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma);
4049de2362d3Smrg	/* constant 2: vco */
4050de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]);
4051de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]);
4052de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]);
4053de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0);
4054de2362d3Smrg
40557821949aSmrg	FINISH_ACCEL();
4056de2362d3Smrg    }
4057de2362d3Smrg
4058de2362d3Smrg    BEGIN_ACCEL_RELOC(6, 2);
40597821949aSmrg    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
40607821949aSmrg    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
4061de2362d3Smrg
4062de2362d3Smrg    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap);
4063de2362d3Smrg    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap);
4064de2362d3Smrg
4065de2362d3Smrg    /* no need to enable blending */
40667821949aSmrg    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
4067de2362d3Smrg
40687821949aSmrg    OUT_ACCEL_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count);
40697821949aSmrg    FINISH_ACCEL();
4070de2362d3Smrg
4071de2362d3Smrg    if (pPriv->vsync) {
4072de2362d3Smrg	xf86CrtcPtr crtc;
4073de2362d3Smrg	if (pPriv->desired_crtc)
4074de2362d3Smrg	    crtc = pPriv->desired_crtc;
4075de2362d3Smrg	else
40767821949aSmrg	    crtc = radeon_pick_best_crtc(pScrn,
4077de2362d3Smrg					 pPriv->drw_x,
4078de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
4079de2362d3Smrg					 pPriv->drw_y,
4080de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
4081de2362d3Smrg	if (crtc)
40827821949aSmrg	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
40837821949aSmrg					  crtc,
40847821949aSmrg					  pPriv->drw_y - crtc->y,
40857821949aSmrg					  (pPriv->drw_y - crtc->y) + pPriv->dst_h);
4086de2362d3Smrg    }
4087de2362d3Smrg
4088de2362d3Smrg    return TRUE;
4089de2362d3Smrg}
4090de2362d3Smrg
4091de2362d3Smrgstatic void
40927821949aSmrgFUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
4093de2362d3Smrg{
4094de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
4095de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
4096de2362d3Smrg    int dstxoff, dstyoff;
4097de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
4098de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
40997821949aSmrg    ACCEL_PREAMBLE();
4100de2362d3Smrg
4101de2362d3Smrg#ifdef COMPOSITE
4102de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
4103de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
4104de2362d3Smrg#else
4105de2362d3Smrg    dstxoff = 0;
4106de2362d3Smrg    dstyoff = 0;
4107de2362d3Smrg#endif
4108de2362d3Smrg
41097821949aSmrg    if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv))
4110de2362d3Smrg	return;
4111de2362d3Smrg
4112de2362d3Smrg    /*
4113de2362d3Smrg     * Rendering of the actual polygon is done in two different
4114de2362d3Smrg     * ways depending on chip generation:
4115de2362d3Smrg     *
4116de2362d3Smrg     * < R300:
4117de2362d3Smrg     *
4118de2362d3Smrg     *     These chips can render a rectangle in one pass, so
4119de2362d3Smrg     *     handling is pretty straight-forward.
4120de2362d3Smrg     *
4121de2362d3Smrg     * >= R300:
4122de2362d3Smrg     *
4123de2362d3Smrg     *     These chips can accept a quad, but will render it as
4124de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
4125de2362d3Smrg     *     We render a single, large triangle and use the scissor
4126de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
4127de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
4128de2362d3Smrg     *     the single triangle up to 2880 pixels; above that we
4129de2362d3Smrg     *     render as a quad.
4130de2362d3Smrg     */
4131de2362d3Smrg
4132de2362d3Smrg    while (nBox--) {
4133de2362d3Smrg	float srcX, srcY, srcw, srch;
4134de2362d3Smrg	int dstX, dstY, dstw, dsth;
41357821949aSmrg#ifdef ACCEL_CP
4136de2362d3Smrg	int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3;
4137de2362d3Smrg
4138de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
41397821949aSmrg	    if (info->cs)
41407821949aSmrg		radeon_cs_flush_indirect(pScrn);
41417821949aSmrg	    else
41427821949aSmrg		RADEONCPFlushIndirect(pScrn, 1);
41437821949aSmrg	    if (!FUNC_NAME(R500PrepareTexturedVideo)(pScrn, pPriv))
4144de2362d3Smrg		return;
4145de2362d3Smrg	}
41467821949aSmrg#endif
4147de2362d3Smrg
4148de2362d3Smrg	dstX = pBox->x1 + dstxoff;
4149de2362d3Smrg	dstY = pBox->y1 + dstyoff;
4150de2362d3Smrg	dstw = pBox->x2 - pBox->x1;
4151de2362d3Smrg	dsth = pBox->y2 - pBox->y1;
4152de2362d3Smrg
4153de2362d3Smrg	srcX = pPriv->src_x;
4154de2362d3Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
4155de2362d3Smrg		 pPriv->src_w) / (float)pPriv->dst_w;
4156de2362d3Smrg	srcY = pPriv->src_y;
4157de2362d3Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
4158de2362d3Smrg		 pPriv->src_h) / (float)pPriv->dst_h;
4159de2362d3Smrg
4160de2362d3Smrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
4161de2362d3Smrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
4162de2362d3Smrg
41637821949aSmrg	BEGIN_ACCEL(2);
41647821949aSmrg	OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) |
4165de2362d3Smrg					 ((dstY) << R300_SCISSOR_Y_SHIFT)));
41667821949aSmrg	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) |
4167de2362d3Smrg					 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT)));
41687821949aSmrg	FINISH_ACCEL();
4169de2362d3Smrg
41707821949aSmrg#ifdef ACCEL_CP
4171de2362d3Smrg	BEGIN_RING(3 * pPriv->vtx_count + 4);
4172de2362d3Smrg	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
4173de2362d3Smrg			    3 * pPriv->vtx_count));
4174de2362d3Smrg	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
4175de2362d3Smrg		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
4176de2362d3Smrg		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
41777821949aSmrg#else /* ACCEL_CP */
41787821949aSmrg	BEGIN_ACCEL(2 + pPriv->vtx_count * 3);
41797821949aSmrg	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
41807821949aSmrg					  RADEON_VF_PRIM_WALK_DATA |
41817821949aSmrg					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
41827821949aSmrg#endif
4183de2362d3Smrg	if (pPriv->bicubic_enabled) {
4184de2362d3Smrg	    VTX_OUT_6((float)dstX,            (float)dstY,
4185de2362d3Smrg		      (float)srcX / pPriv->w, (float)srcY / pPriv->h,
4186de2362d3Smrg		      (float)srcX + 0.5,      (float)srcY + 0.5);
4187de2362d3Smrg	    VTX_OUT_6((float)dstX,            (float)(dstY + dstw + dsth),
4188de2362d3Smrg		      (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h,
4189de2362d3Smrg		      (float)srcX + 0.5,      (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
4190de2362d3Smrg	    VTX_OUT_6((float)(dstX + dstw + dsth),                       (float)dstY,
4191de2362d3Smrg		      ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
4192de2362d3Smrg		      (float)srcY / pPriv->h,
4193de2362d3Smrg		      (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
4194de2362d3Smrg		      (float)srcY + 0.5);
4195de2362d3Smrg	} else {
4196de2362d3Smrg	    /*
4197de2362d3Smrg	     * Render a big, scissored triangle. This means
4198de2362d3Smrg	     * increasing the triangle size and adjusting
4199de2362d3Smrg	     * texture coordinates.
4200de2362d3Smrg	     */
4201de2362d3Smrg	    VTX_OUT_4((float)dstX,            (float)dstY,
4202de2362d3Smrg		      (float)srcX / pPriv->w, (float)srcY / pPriv->h);
4203de2362d3Smrg	    VTX_OUT_4((float)dstX,                              (float)(dstY + dsth + dstw),
4204de2362d3Smrg		      (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h);
4205de2362d3Smrg	    VTX_OUT_4((float)(dstX + dstw + dsth),              (float)dstY,
4206de2362d3Smrg		      ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
4207de2362d3Smrg		      (float)srcY / pPriv->h);
4208de2362d3Smrg	}
4209de2362d3Smrg
4210de2362d3Smrg	/* flushing is pipelined, free/finish is not */
42117821949aSmrg	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
4212de2362d3Smrg
42137821949aSmrg#ifdef ACCEL_CP
4214de2362d3Smrg	ADVANCE_RING();
42157821949aSmrg#else
42167821949aSmrg	FINISH_ACCEL();
42177821949aSmrg#endif /* !ACCEL_CP */
4218de2362d3Smrg
4219de2362d3Smrg	pBox++;
4220de2362d3Smrg    }
4221de2362d3Smrg
42227821949aSmrg    BEGIN_ACCEL(3);
42237821949aSmrg    OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
42247821949aSmrg    OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
42257821949aSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
42267821949aSmrg    FINISH_ACCEL();
4227de2362d3Smrg
4228de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
4229de2362d3Smrg}
4230de2362d3Smrg
4231de2362d3Smrg#undef VTX_OUT_4
4232de2362d3Smrg#undef VTX_OUT_6
42337821949aSmrg#undef FUNC_NAME
4234