radeon_textured_videofuncs.c revision 0a1d3ae0
1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Alex Deucher
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg *
24de2362d3Smrg * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al.
25de2362d3Smrg *
26de2362d3Smrg */
27de2362d3Smrg
28de2362d3Smrg#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
29de2362d3Smrgdo {								\
3018781e08Smrg    OUT_RING(F_TO_DW(_dstX));						\
3118781e08Smrg    OUT_RING(F_TO_DW(_dstY));						\
3218781e08Smrg    OUT_RING(F_TO_DW(_srcX));						\
3318781e08Smrg    OUT_RING(F_TO_DW(_srcY));						\
3418781e08Smrg    OUT_RING(F_TO_DW(_maskX));						\
3518781e08Smrg    OUT_RING(F_TO_DW(_maskY));						\
36de2362d3Smrg} while (0)
37de2362d3Smrg
38de2362d3Smrg#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY)			\
39de2362d3Smrgdo {								\
4018781e08Smrg    OUT_RING(F_TO_DW(_dstX));						\
4118781e08Smrg    OUT_RING(F_TO_DW(_dstY));						\
4218781e08Smrg    OUT_RING(F_TO_DW(_srcX));						\
4318781e08Smrg    OUT_RING(F_TO_DW(_srcY));						\
44de2362d3Smrg} while (0)
45de2362d3Smrg
46de2362d3Smrg
47de2362d3Smrgstatic Bool
4818781e08SmrgRADEONPrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
49de2362d3Smrg{
50de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
51de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
52de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
53de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
5418781e08Smrg    uint32_t txformat, txsize, txpitch;
55de2362d3Smrg    uint32_t dst_pitch, dst_format;
56de2362d3Smrg    uint32_t colorpitch;
57de2362d3Smrg    int pixel_shift;
5818781e08Smrg    int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1;
5918781e08Smrg    int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1;
6018781e08Smrg    int ret;
61de2362d3Smrg
6218781e08Smrg    radeon_cs_space_reset_bos(info->cs);
6318781e08Smrg    radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
64de2362d3Smrg
6518781e08Smrg    if (pPriv->bicubic_enabled)
6639413783Smrg	radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo,
6739413783Smrg					  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
68de2362d3Smrg
6918781e08Smrg    driver_priv = exaGetPixmapDriverPrivate(pPixmap);
7039413783Smrg    radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0,
7139413783Smrg				      RADEON_GEM_DOMAIN_VRAM);
727821949aSmrg
7318781e08Smrg    ret = radeon_cs_space_check(info->cs);
7418781e08Smrg    if (ret) {
7518781e08Smrg	ErrorF("Not enough RAM to hw accel xv operation\n");
7618781e08Smrg	return FALSE;
77de2362d3Smrg    }
78de2362d3Smrg
79de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
80de2362d3Smrg
8118781e08Smrg    dst_pitch = exaGetPixmapPitch(pPixmap);
8218781e08Smrg    RADEON_SWITCH_TO_3D();
83de2362d3Smrg
84de2362d3Smrg    /* Same for R100/R200 */
85de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
86de2362d3Smrg    case 16:
87de2362d3Smrg	if (pPixmap->drawable.depth == 15)
88de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
89de2362d3Smrg	else
90de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_RGB565;
91de2362d3Smrg	break;
92de2362d3Smrg    case 32:
93de2362d3Smrg	dst_format = RADEON_COLOR_FORMAT_ARGB8888;
94de2362d3Smrg	break;
95de2362d3Smrg    default:
96de2362d3Smrg	return FALSE;
97de2362d3Smrg    }
98de2362d3Smrg
99de2362d3Smrg    if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) {
100de2362d3Smrg	pPriv->is_planar = TRUE;
101de2362d3Smrg	txformat = RADEON_TXFORMAT_Y8;
102de2362d3Smrg    } else {
103de2362d3Smrg	pPriv->is_planar = FALSE;
104de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
105de2362d3Smrg	    txformat = RADEON_TXFORMAT_YVYU422;
106de2362d3Smrg	else
107de2362d3Smrg	    txformat = RADEON_TXFORMAT_VYUY422;
108de2362d3Smrg    }
109de2362d3Smrg
110de2362d3Smrg    txformat |= RADEON_TXFORMAT_NON_POWER2;
111de2362d3Smrg
112de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
113de2362d3Smrg
114de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
115de2362d3Smrg	colorpitch |= RADEON_COLOR_TILE_ENABLE;
116de2362d3Smrg
117de2362d3Smrg    BEGIN_ACCEL_RELOC(4,2);
118de2362d3Smrg
11918781e08Smrg    OUT_RING_REG(RADEON_RB3D_CNTL, dst_format);
120de2362d3Smrg    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap);
121de2362d3Smrg    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap);
12218781e08Smrg    OUT_RING_REG(RADEON_RB3D_BLENDCNTL,
123de2362d3Smrg		  RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
124de2362d3Smrg
12518781e08Smrg    ADVANCE_RING();
126de2362d3Smrg
127de2362d3Smrg    if (pPriv->is_planar) {
128de2362d3Smrg	/* need 2 texcoord sets (even though they are identical) due
129de2362d3Smrg	   to denormalization! hw apparently can't premultiply
130de2362d3Smrg	   same coord set by different texture size */
131de2362d3Smrg	pPriv->vtx_count = 6;
132de2362d3Smrg
133de2362d3Smrg	txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
134de2362d3Smrg		  (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
135de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
136de2362d3Smrg	txpitch -= 32;
137de2362d3Smrg
138de2362d3Smrg	BEGIN_ACCEL_RELOC(23, 3);
139de2362d3Smrg
14018781e08Smrg	OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
141de2362d3Smrg					  RADEON_SE_VTX_FMT_ST0 |
142de2362d3Smrg					  RADEON_SE_VTX_FMT_ST1));
143de2362d3Smrg
14418781e08Smrg	OUT_RING_REG(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE |
145de2362d3Smrg				       RADEON_TEX_1_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
146de2362d3Smrg				       RADEON_TEX_2_ENABLE | RADEON_TEX_BLEND_2_ENABLE |
147de2362d3Smrg				       RADEON_PLANAR_YUV_ENABLE));
148de2362d3Smrg
149de2362d3Smrg	/* Y */
15018781e08Smrg	OUT_RING_REG(RADEON_PP_TXFILTER_0,
151de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
152de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
153de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
154de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST |
155de2362d3Smrg		      RADEON_YUV_TO_RGB);
15618781e08Smrg	OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
15718781e08Smrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo);
15818781e08Smrg	OUT_RING_REG(RADEON_PP_TXCBLEND_0,
159de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
160de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
161de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
162de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
163de2362d3Smrg		      RADEON_CLAMP_TX);
16418781e08Smrg	OUT_RING_REG(RADEON_PP_TXABLEND_0,
165de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
166de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
167de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
168de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
169de2362d3Smrg		      RADEON_CLAMP_TX);
170de2362d3Smrg
17118781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_SIZE_0,
172de2362d3Smrg		      (pPriv->w - 1) |
173de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
17418781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_PITCH_0,
175de2362d3Smrg		      pPriv->src_pitch - 32);
176de2362d3Smrg
177de2362d3Smrg	/* U */
17818781e08Smrg	OUT_RING_REG(RADEON_PP_TXFILTER_1,
179de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
180de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
181de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
182de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST);
18318781e08Smrg	OUT_RING_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
18418781e08Smrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo);
18518781e08Smrg	OUT_RING_REG(RADEON_PP_TXCBLEND_1,
186de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
187de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
188de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
189de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
190de2362d3Smrg		      RADEON_CLAMP_TX);
19118781e08Smrg	OUT_RING_REG(RADEON_PP_TXABLEND_1,
192de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
193de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
194de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
195de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
196de2362d3Smrg		      RADEON_CLAMP_TX);
197de2362d3Smrg
19818781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_SIZE_1, txsize);
19918781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_PITCH_1, txpitch);
200de2362d3Smrg
201de2362d3Smrg	/* V */
20218781e08Smrg	OUT_RING_REG(RADEON_PP_TXFILTER_2,
203de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
204de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
205de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
206de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST);
20718781e08Smrg	OUT_RING_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
20818781e08Smrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, pPriv->planev_offset, src_bo);
20918781e08Smrg	OUT_RING_REG(RADEON_PP_TXCBLEND_2,
210de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
211de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
212de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
213de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
214de2362d3Smrg		      RADEON_CLAMP_TX);
21518781e08Smrg	OUT_RING_REG(RADEON_PP_TXABLEND_2,
216de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
217de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
218de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
219de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
220de2362d3Smrg		      RADEON_CLAMP_TX);
221de2362d3Smrg
22218781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_SIZE_2, txsize);
22318781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_PITCH_2, txpitch);
22418781e08Smrg	ADVANCE_RING();
225de2362d3Smrg    } else {
226de2362d3Smrg	pPriv->vtx_count = 4;
227de2362d3Smrg	BEGIN_ACCEL_RELOC(9, 1);
228de2362d3Smrg
22918781e08Smrg	OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
230de2362d3Smrg					  RADEON_SE_VTX_FMT_ST0));
231de2362d3Smrg
23218781e08Smrg	OUT_RING_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
233de2362d3Smrg
23418781e08Smrg	OUT_RING_REG(RADEON_PP_TXFILTER_0,
235de2362d3Smrg		      RADEON_MAG_FILTER_LINEAR |
236de2362d3Smrg		      RADEON_MIN_FILTER_LINEAR |
237de2362d3Smrg		      RADEON_CLAMP_S_CLAMP_LAST |
238de2362d3Smrg		      RADEON_CLAMP_T_CLAMP_LAST |
239de2362d3Smrg		      RADEON_YUV_TO_RGB);
24018781e08Smrg	OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
24118781e08Smrg	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, 0, src_bo);
24218781e08Smrg	OUT_RING_REG(RADEON_PP_TXCBLEND_0,
243de2362d3Smrg		      RADEON_COLOR_ARG_A_ZERO |
244de2362d3Smrg		      RADEON_COLOR_ARG_B_ZERO |
245de2362d3Smrg		      RADEON_COLOR_ARG_C_T0_COLOR |
246de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
247de2362d3Smrg		      RADEON_CLAMP_TX);
24818781e08Smrg	OUT_RING_REG(RADEON_PP_TXABLEND_0,
249de2362d3Smrg		      RADEON_ALPHA_ARG_A_ZERO |
250de2362d3Smrg		      RADEON_ALPHA_ARG_B_ZERO |
251de2362d3Smrg		      RADEON_ALPHA_ARG_C_T0_ALPHA |
252de2362d3Smrg		      RADEON_BLEND_CTL_ADD |
253de2362d3Smrg		      RADEON_CLAMP_TX);
254de2362d3Smrg
25518781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_SIZE_0,
256de2362d3Smrg		      (pPriv->w - 1) |
257de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
25818781e08Smrg	OUT_RING_REG(RADEON_PP_TEX_PITCH_0,
259de2362d3Smrg		      pPriv->src_pitch - 32);
26018781e08Smrg	ADVANCE_RING();
261de2362d3Smrg    }
262de2362d3Smrg
26318781e08Smrg    BEGIN_RING(2*2);
26418781e08Smrg    OUT_RING_REG(RADEON_RE_TOP_LEFT, 0);
26518781e08Smrg    OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) |
266de2362d3Smrg					   (scissor_h << RADEON_RE_HEIGHT_SHIFT)));
26718781e08Smrg    ADVANCE_RING();
268de2362d3Smrg
269de2362d3Smrg    if (pPriv->vsync) {
270de2362d3Smrg	xf86CrtcPtr crtc;
271de2362d3Smrg	if (pPriv->desired_crtc)
272de2362d3Smrg	    crtc = pPriv->desired_crtc;
273de2362d3Smrg	else
27418781e08Smrg	    crtc = radeon_pick_best_crtc(pScrn, FALSE,
275de2362d3Smrg					 pPriv->drw_x,
276de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
277de2362d3Smrg					 pPriv->drw_y,
278de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
279de2362d3Smrg	if (crtc)
28018781e08Smrg	    RADEONWaitForVLine(pScrn, pPixmap,
28118781e08Smrg				 crtc,
28218781e08Smrg				 pPriv->drw_y - crtc->y,
28318781e08Smrg				 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
284de2362d3Smrg    }
285de2362d3Smrg
286de2362d3Smrg    return TRUE;
287de2362d3Smrg}
288de2362d3Smrg
289de2362d3Smrgstatic void
29018781e08SmrgRADEONDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
291de2362d3Smrg{
292de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
293de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
294de2362d3Smrg    int dstxoff, dstyoff;
295de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
296de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
297de2362d3Smrg
298de2362d3Smrg#ifdef COMPOSITE
299de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
300de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
301de2362d3Smrg#else
302de2362d3Smrg    dstxoff = 0;
303de2362d3Smrg    dstyoff = 0;
304de2362d3Smrg#endif
305de2362d3Smrg
30618781e08Smrg    if (!RADEONPrepareTexturedVideo(pScrn, pPriv))
307de2362d3Smrg	return;
308de2362d3Smrg
309de2362d3Smrg    /*
310de2362d3Smrg     * Rendering of the actual polygon is done in two different
311de2362d3Smrg     * ways depending on chip generation:
312de2362d3Smrg     *
313de2362d3Smrg     * < R300:
314de2362d3Smrg     *
315de2362d3Smrg     *     These chips can render a rectangle in one pass, so
316de2362d3Smrg     *     handling is pretty straight-forward.
317de2362d3Smrg     *
318de2362d3Smrg     * >= R300:
319de2362d3Smrg     *
320de2362d3Smrg     *     These chips can accept a quad, but will render it as
321de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
322de2362d3Smrg     *     We render a single, large triangle and use the scissor
323de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
324de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
325de2362d3Smrg     *     the single triangle up to 2560/4021 pixels; above that we
326de2362d3Smrg     *     render as a quad.
327de2362d3Smrg     */
328de2362d3Smrg    while (nBox) {
329de2362d3Smrg	int draw_size = 3 * pPriv->vtx_count + 5;
330de2362d3Smrg	int loop_boxes;
331de2362d3Smrg
332de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
33318781e08Smrg	    radeon_cs_flush_indirect(pScrn);
33418781e08Smrg	    if (!RADEONPrepareTexturedVideo(pScrn, pPriv))
335de2362d3Smrg		return;
336de2362d3Smrg	}
337de2362d3Smrg	loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox);
338de2362d3Smrg	nBox -= loop_boxes;
339de2362d3Smrg
340de2362d3Smrg	BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 5);
341de2362d3Smrg	OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
342de2362d3Smrg			    loop_boxes * 3 * pPriv->vtx_count + 1));
343de2362d3Smrg	if (pPriv->is_planar)
344de2362d3Smrg	    OUT_RING(RADEON_CP_VC_FRMT_XY |
345de2362d3Smrg		     RADEON_CP_VC_FRMT_ST0 |
346de2362d3Smrg		     RADEON_CP_VC_FRMT_ST1);
347de2362d3Smrg	else
348de2362d3Smrg	    OUT_RING(RADEON_CP_VC_FRMT_XY |
349de2362d3Smrg		     RADEON_CP_VC_FRMT_ST0);
350de2362d3Smrg	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
351de2362d3Smrg		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
352de2362d3Smrg		 RADEON_CP_VC_CNTL_MAOS_ENABLE |
353de2362d3Smrg		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
354de2362d3Smrg		 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
355de2362d3Smrg
356de2362d3Smrg	while (loop_boxes--) {
357de2362d3Smrg	    float srcX, srcY, srcw, srch;
358de2362d3Smrg	    int dstX, dstY, dstw, dsth;
359de2362d3Smrg	    dstX = pBox->x1 + dstxoff;
360de2362d3Smrg	    dstY = pBox->y1 + dstyoff;
361de2362d3Smrg	    dstw = pBox->x2 - pBox->x1;
362de2362d3Smrg	    dsth = pBox->y2 - pBox->y1;
363de2362d3Smrg
364de2362d3Smrg	    srcX = pPriv->src_x;
365de2362d3Smrg	    srcX += ((pBox->x1 - pPriv->drw_x) *
366de2362d3Smrg		     pPriv->src_w) / (float)pPriv->dst_w;
367de2362d3Smrg	    srcY = pPriv->src_y;
368de2362d3Smrg	    srcY += ((pBox->y1 - pPriv->drw_y) *
369de2362d3Smrg		     pPriv->src_h) / (float)pPriv->dst_h;
370de2362d3Smrg
371de2362d3Smrg	    srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
372de2362d3Smrg	    srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
373de2362d3Smrg
374de2362d3Smrg
375de2362d3Smrg	    if (pPriv->is_planar) {
376de2362d3Smrg		/*
377de2362d3Smrg		 * Just render a rect (using three coords).
378de2362d3Smrg		 */
379de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
380de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
381de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
382de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
383de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
384de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
385de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
386de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
387de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
388de2362d3Smrg	    } else {
389de2362d3Smrg		/*
390de2362d3Smrg		 * Just render a rect (using three coords).
391de2362d3Smrg		 */
392de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
393de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
394de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
395de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
396de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
397de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
398de2362d3Smrg	    }
399de2362d3Smrg
400de2362d3Smrg	    pBox++;
401de2362d3Smrg	}
402de2362d3Smrg
40318781e08Smrg	OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
404de2362d3Smrg	ADVANCE_RING();
405de2362d3Smrg    }
406de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
407de2362d3Smrg}
408de2362d3Smrg
409de2362d3Smrgstatic Bool
41018781e08SmrgR200PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
411de2362d3Smrg{
412de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
413de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
414de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
415de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
416de2362d3Smrg    uint32_t txformat;
41718781e08Smrg    uint32_t txfilter, txsize, txpitch;
418de2362d3Smrg    uint32_t dst_pitch, dst_format;
419de2362d3Smrg    uint32_t colorpitch;
420de2362d3Smrg    int pixel_shift;
42118781e08Smrg    int scissor_w = MIN(pPixmap->drawable.width, 2048) - 1;
42218781e08Smrg    int scissor_h = MIN(pPixmap->drawable.height, 2048) - 1;
423de2362d3Smrg    /* note: in contrast to r300, use input biasing on uv components */
424de2362d3Smrg    const float Loff = -0.0627;
425de2362d3Smrg    float uvcosf, uvsinf;
426de2362d3Smrg    float yco, yoff;
427de2362d3Smrg    float uco[3], vco[3];
428de2362d3Smrg    float bright, cont, sat;
429de2362d3Smrg    int ref = pPriv->transform_index;
430de2362d3Smrg    float ucscale = 0.25, vcscale = 0.25;
431de2362d3Smrg    Bool needux8 = FALSE, needvx8 = FALSE;
43218781e08Smrg    int ret;
433de2362d3Smrg
43418781e08Smrg    radeon_cs_space_reset_bos(info->cs);
43518781e08Smrg    radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
4367821949aSmrg
43718781e08Smrg    if (pPriv->bicubic_enabled)
43839413783Smrg	radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo,
43939413783Smrg					  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
440de2362d3Smrg
44118781e08Smrg    driver_priv = exaGetPixmapDriverPrivate(pPixmap);
44239413783Smrg    radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0,
44339413783Smrg				      RADEON_GEM_DOMAIN_VRAM);
444de2362d3Smrg
44518781e08Smrg    ret = radeon_cs_space_check(info->cs);
44618781e08Smrg    if (ret) {
44718781e08Smrg	ErrorF("Not enough RAM to hw accel xv operation\n");
44818781e08Smrg	return FALSE;
449de2362d3Smrg    }
450de2362d3Smrg
451de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
452de2362d3Smrg
45318781e08Smrg    dst_pitch = exaGetPixmapPitch(pPixmap);
454de2362d3Smrg
45518781e08Smrg    RADEON_SWITCH_TO_3D();
456de2362d3Smrg
457de2362d3Smrg    /* Same for R100/R200 */
458de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
459de2362d3Smrg    case 16:
460de2362d3Smrg	if (pPixmap->drawable.depth == 15)
461de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
462de2362d3Smrg	else
463de2362d3Smrg	    dst_format = RADEON_COLOR_FORMAT_RGB565;
464de2362d3Smrg	break;
465de2362d3Smrg    case 32:
466de2362d3Smrg	dst_format = RADEON_COLOR_FORMAT_ARGB8888;
467de2362d3Smrg	break;
468de2362d3Smrg    default:
469de2362d3Smrg	return FALSE;
470de2362d3Smrg    }
471de2362d3Smrg
472de2362d3Smrg    if (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12) {
473de2362d3Smrg	pPriv->is_planar = TRUE;
474de2362d3Smrg	txformat = RADEON_TXFORMAT_I8;
475de2362d3Smrg    } else {
476de2362d3Smrg	pPriv->is_planar = FALSE;
477de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
478de2362d3Smrg	    txformat = RADEON_TXFORMAT_YVYU422;
479de2362d3Smrg	else
480de2362d3Smrg	    txformat = RADEON_TXFORMAT_VYUY422;
481de2362d3Smrg    }
482de2362d3Smrg
483de2362d3Smrg    txformat |= RADEON_TXFORMAT_NON_POWER2;
484de2362d3Smrg
485de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
486de2362d3Smrg
487de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
488de2362d3Smrg	colorpitch |= RADEON_COLOR_TILE_ENABLE;
489de2362d3Smrg
490de2362d3Smrg    BEGIN_ACCEL_RELOC(4,2);
491de2362d3Smrg
49218781e08Smrg    OUT_RING_REG(RADEON_RB3D_CNTL, dst_format);
493de2362d3Smrg    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap);
494de2362d3Smrg    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pPixmap);
495de2362d3Smrg
49618781e08Smrg    OUT_RING_REG(RADEON_RB3D_BLENDCNTL,
497de2362d3Smrg		  RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
498de2362d3Smrg
49918781e08Smrg    ADVANCE_RING();
500de2362d3Smrg
501de2362d3Smrg    txfilter =  R200_MAG_FILTER_LINEAR |
502de2362d3Smrg	R200_MIN_FILTER_LINEAR |
503de2362d3Smrg	R200_CLAMP_S_CLAMP_LAST |
504de2362d3Smrg	R200_CLAMP_T_CLAMP_LAST;
505de2362d3Smrg
506de2362d3Smrg    /* contrast can cause constant overflow, clamp */
507de2362d3Smrg    cont = RTFContrast(pPriv->contrast);
508de2362d3Smrg    if (cont * trans[ref].RefLuma > 2.0)
509de2362d3Smrg	cont = 2.0 / trans[ref].RefLuma;
510de2362d3Smrg    /* brightness is only from -0.5 to 0.5 should be safe */
511de2362d3Smrg    bright = RTFBrightness(pPriv->brightness);
512de2362d3Smrg    /* saturation can also cause overflow, clamp */
513de2362d3Smrg    sat = RTFSaturation(pPriv->saturation);
514de2362d3Smrg    if (sat * trans[ref].RefBCb > 4.0)
515de2362d3Smrg	sat = 4.0 / trans[ref].RefBCb;
516de2362d3Smrg    uvcosf = sat * cos(RTFHue(pPriv->hue));
517de2362d3Smrg    uvsinf = sat * sin(RTFHue(pPriv->hue));
518de2362d3Smrg
519de2362d3Smrg    yco = trans[ref].RefLuma * cont;
520de2362d3Smrg    uco[0] = -trans[ref].RefRCr * uvsinf;
521de2362d3Smrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
522de2362d3Smrg    uco[2] = trans[ref].RefBCb * uvcosf;
523de2362d3Smrg    vco[0] = trans[ref].RefRCr * uvcosf;
524de2362d3Smrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
525de2362d3Smrg    vco[2] = trans[ref].RefBCb * uvsinf;
526de2362d3Smrg    yoff = Loff * yco + bright;
527de2362d3Smrg
528de2362d3Smrg    if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
529de2362d3Smrg	needux8 = TRUE;
530de2362d3Smrg	ucscale = 0.125;
531de2362d3Smrg    }
532de2362d3Smrg    if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
533de2362d3Smrg	needvx8 = TRUE;
534de2362d3Smrg	vcscale = 0.125;
535de2362d3Smrg    }
536de2362d3Smrg
537de2362d3Smrg    if (pPriv->is_planar) {
538de2362d3Smrg	/* need 2 texcoord sets (even though they are identical) due
539de2362d3Smrg	   to denormalization! hw apparently can't premultiply
540de2362d3Smrg	   same coord set by different texture size */
541de2362d3Smrg	pPriv->vtx_count = 6;
542de2362d3Smrg
543de2362d3Smrg	txsize = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
544de2362d3Smrg		  (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
545de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
546de2362d3Smrg	txpitch -= 32;
547de2362d3Smrg
548de2362d3Smrg	BEGIN_ACCEL_RELOC(36, 3);
549de2362d3Smrg
55018781e08Smrg	OUT_RING_REG(RADEON_PP_CNTL,
551de2362d3Smrg		      RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
552de2362d3Smrg		      RADEON_TEX_BLEND_0_ENABLE |
553de2362d3Smrg		      RADEON_TEX_BLEND_1_ENABLE |
554de2362d3Smrg		      RADEON_TEX_BLEND_2_ENABLE);
555de2362d3Smrg
55618781e08Smrg	OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
55718781e08Smrg	OUT_RING_REG(R200_SE_VTX_FMT_1,
558de2362d3Smrg		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
559de2362d3Smrg		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
560de2362d3Smrg
56118781e08Smrg	OUT_RING_REG(R200_PP_TXFILTER_0, txfilter);
56218781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_0, txformat);
56318781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0);
56418781e08Smrg	OUT_RING_REG(R200_PP_TXSIZE_0,
565de2362d3Smrg		      (pPriv->w - 1) |
566de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
56718781e08Smrg	OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
56818781e08Smrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo);
56918781e08Smrg
57018781e08Smrg	OUT_RING_REG(R200_PP_TXFILTER_1, txfilter);
57118781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
57218781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_X_1, 0);
57318781e08Smrg	OUT_RING_REG(R200_PP_TXSIZE_1, txsize);
57418781e08Smrg	OUT_RING_REG(R200_PP_TXPITCH_1, txpitch);
57518781e08Smrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, pPriv->planeu_offset, src_bo);
57618781e08Smrg
57718781e08Smrg	OUT_RING_REG(R200_PP_TXFILTER_2, txfilter);
57818781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
57918781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_X_2, 0);
58018781e08Smrg	OUT_RING_REG(R200_PP_TXSIZE_2, txsize);
58118781e08Smrg	OUT_RING_REG(R200_PP_TXPITCH_2, txpitch);
58218781e08Smrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, pPriv->planev_offset, src_bo);
583de2362d3Smrg
584de2362d3Smrg	/* similar to r300 code. Note the big problem is that hardware constants
585de2362d3Smrg	 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
586de2362d3Smrg	 * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
587de2362d3Smrg	 * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
588de2362d3Smrg	 * the constants not. To get larger range can use output scale, but for
589de2362d3Smrg	 * that 2.018 value we need a total scale by 8, which means the constants
590de2362d3Smrg	 * really have no accuracy whatsoever (5 fractional bits only).
591de2362d3Smrg	 * The only direct way to get high  precision "constants" into the fragment
592de2362d3Smrg	 * pipe I know of is to use the texcoord interpolator (not color, this one
593de2362d3Smrg	 * is 8 bit only too), which seems a bit expensive. We're lucky though it
594de2362d3Smrg	 * seems the values we need seem to fit better than worst case (get about
595de2362d3Smrg	 * 6 fractional bits for this instead of 5, at least when not correcting for
596de2362d3Smrg	 * hue/saturation/contrast/brightness, which is the same as for vco - yco and
597de2362d3Smrg	 * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
598de2362d3Smrg	 * even with non-default saturation/hue/contrast/brightness adjustments,
599de2362d3Smrg	 * it gets a little crazy and ultimately precision might still be lacking.
600de2362d3Smrg	 *
601de2362d3Smrg	 * A higher precision (8 fractional bits) version might just put uco into
602de2362d3Smrg	 * a texcoord, and calculate a new vcoconst in the shader, like so:
603de2362d3Smrg	 * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
604de2362d3Smrg	 * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
605de2362d3Smrg	 * vcocalc = ADD temp, bias/scale(cohelper), vco
606de2362d3Smrg	 * would in total use 4 tex units, 4 instructions which seems fairly
607de2362d3Smrg	 * balanced for this architecture (instead of 3 + 3 for the solution here)
608de2362d3Smrg	 *
609de2362d3Smrg	 * temp = MAD(yco, yuv.yyyy, yoff)
610de2362d3Smrg	 * temp = MAD(uco, yuv.uuuu, temp)
611de2362d3Smrg	 * result = MAD(vco, yuv.vvvv, temp)
612de2362d3Smrg	 *
613de2362d3Smrg	 * note first mad produces actually scalar, hence we transform
614de2362d3Smrg	 * it into a dp2a to get 8 bit precision of yco instead of 7 -
615de2362d3Smrg	 * That's assuming hw correctly expands consts to internal precision.
616de2362d3Smrg	 * (y * 1 + y * (yco - 1) + yoff)
617de2362d3Smrg	 * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
618de2362d3Smrg	 * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
619de2362d3Smrg	 * result = MAD x2 (vco / 2, yuv.vvvv, temp)
620de2362d3Smrg	 *
621de2362d3Smrg	 * vco, uco need bias (and hence scale too)
622de2362d3Smrg	 *
623de2362d3Smrg	 */
624de2362d3Smrg
625de2362d3Smrg	/* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
62618781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND_0,
627de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
628de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
629de2362d3Smrg		      R200_TXC_ARG_C_TFACTOR_COLOR |
630de2362d3Smrg		      (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
631de2362d3Smrg		      R200_TXC_OP_DOT2_ADD);
63218781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND2_0,
633de2362d3Smrg		      (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
634de2362d3Smrg		      R200_TXC_SCALE_INV2 |
635de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
63618781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND_0,
637de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
638de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
639de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
640de2362d3Smrg		      R200_TXA_OP_MADD);
64118781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND2_0,
642de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
643de2362d3Smrg
644de2362d3Smrg	/* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
64518781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND_1,
646de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
647de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
648de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
649de2362d3Smrg		      R200_TXC_ARG_B_R1_COLOR |
650de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
651de2362d3Smrg		      (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
652de2362d3Smrg		      R200_TXC_ARG_C_R0_COLOR |
653de2362d3Smrg		      R200_TXC_OP_MADD);
65418781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND2_1,
655de2362d3Smrg		      (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
656de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
65718781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND_1,
658de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
659de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
660de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
661de2362d3Smrg		      R200_TXA_OP_MADD);
66218781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND2_1,
663de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
664de2362d3Smrg
665de2362d3Smrg	/* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
66618781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND_2,
667de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
668de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
669de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
670de2362d3Smrg		      R200_TXC_ARG_B_R2_COLOR |
671de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
672de2362d3Smrg		      (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
673de2362d3Smrg		      R200_TXC_ARG_C_R0_COLOR |
674de2362d3Smrg		      R200_TXC_OP_MADD);
67518781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND2_2,
676de2362d3Smrg		      (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
677de2362d3Smrg		      R200_TXC_SCALE_2X |
678de2362d3Smrg		      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
67918781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND_2,
680de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
681de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
682de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
683de2362d3Smrg		      R200_TXA_COMP_ARG_C |
684de2362d3Smrg		      R200_TXA_OP_MADD);
68518781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND2_2,
686de2362d3Smrg		      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
687de2362d3Smrg
688de2362d3Smrg	/* shader constants */
68918781e08Smrg	OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
690de2362d3Smrg						      yco > 1.0 ? yco - 1.0: yco,
691de2362d3Smrg						      yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
692de2362d3Smrg						      0.0));
69318781e08Smrg	OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
694de2362d3Smrg						      uco[1] * ucscale + 0.5, /* or [-2, 2] */
695de2362d3Smrg						      uco[2] * ucscale + 0.5,
696de2362d3Smrg						      0.0));
69718781e08Smrg	OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
698de2362d3Smrg						      vco[1] * vcscale + 0.5, /* or [-4, 4] */
699de2362d3Smrg						      vco[2] * vcscale + 0.5,
700de2362d3Smrg						      0.0));
701de2362d3Smrg
70218781e08Smrg	ADVANCE_RING();
703de2362d3Smrg    } else {
704de2362d3Smrg	pPriv->vtx_count = 4;
705de2362d3Smrg
706de2362d3Smrg	BEGIN_ACCEL_RELOC(24, 1);
707de2362d3Smrg
70818781e08Smrg	OUT_RING_REG(RADEON_PP_CNTL,
709de2362d3Smrg		      RADEON_TEX_0_ENABLE |
710de2362d3Smrg		      RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
711de2362d3Smrg		      RADEON_TEX_BLEND_2_ENABLE);
712de2362d3Smrg
71318781e08Smrg	OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
71418781e08Smrg	OUT_RING_REG(R200_SE_VTX_FMT_1,
715de2362d3Smrg		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
716de2362d3Smrg
71718781e08Smrg	OUT_RING_REG(R200_PP_TXFILTER_0, txfilter);
71818781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_0, txformat);
71918781e08Smrg	OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0);
72018781e08Smrg	OUT_RING_REG(R200_PP_TXSIZE_0,
721de2362d3Smrg		      (pPriv->w - 1) |
722de2362d3Smrg		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
72318781e08Smrg	OUT_RING_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
72418781e08Smrg	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, 0, src_bo);
725de2362d3Smrg
726de2362d3Smrg	/* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
72718781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND_0,
728de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
729de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
730de2362d3Smrg		      R200_TXC_ARG_C_TFACTOR_COLOR |
731de2362d3Smrg		      (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
732de2362d3Smrg		      R200_TXC_OP_DOT2_ADD);
73318781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND2_0,
734de2362d3Smrg		      (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
735de2362d3Smrg		      R200_TXC_SCALE_INV2 |
736de2362d3Smrg		      (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
737de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
73818781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND_0,
739de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
740de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
741de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
742de2362d3Smrg		      R200_TXA_OP_MADD);
74318781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND2_0,
744de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
745de2362d3Smrg
746de2362d3Smrg	/* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
74718781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND_1,
748de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
749de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
750de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
751de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
752de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
753de2362d3Smrg		      (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
754de2362d3Smrg		      R200_TXC_ARG_C_R1_COLOR |
755de2362d3Smrg		      R200_TXC_OP_MADD);
75618781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND2_1,
757de2362d3Smrg		      (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
758de2362d3Smrg		      (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
759de2362d3Smrg		      R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
76018781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND_1,
761de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
762de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
763de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
764de2362d3Smrg		      R200_TXA_OP_MADD);
76518781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND2_1,
766de2362d3Smrg		      R200_TXA_OUTPUT_REG_NONE);
767de2362d3Smrg
768de2362d3Smrg	/* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
76918781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND_2,
770de2362d3Smrg		      R200_TXC_ARG_A_TFACTOR_COLOR |
771de2362d3Smrg		      R200_TXC_BIAS_ARG_A |
772de2362d3Smrg		      R200_TXC_SCALE_ARG_A |
773de2362d3Smrg		      R200_TXC_ARG_B_R0_COLOR |
774de2362d3Smrg		      R200_TXC_BIAS_ARG_B |
775de2362d3Smrg		      (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
776de2362d3Smrg		      R200_TXC_ARG_C_R1_COLOR |
777de2362d3Smrg		      R200_TXC_OP_MADD);
77818781e08Smrg	OUT_RING_REG(R200_PP_TXCBLEND2_2,
779de2362d3Smrg		      (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
780de2362d3Smrg		      R200_TXC_SCALE_2X |
781de2362d3Smrg		      (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
782de2362d3Smrg		      R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
78318781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND_2,
784de2362d3Smrg		      R200_TXA_ARG_A_ZERO |
785de2362d3Smrg		      R200_TXA_ARG_B_ZERO |
786de2362d3Smrg		      R200_TXA_ARG_C_ZERO |
787de2362d3Smrg		      R200_TXA_COMP_ARG_C |
788de2362d3Smrg		      R200_TXA_OP_MADD);
78918781e08Smrg	OUT_RING_REG(R200_PP_TXABLEND2_2,
790de2362d3Smrg		      R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
791de2362d3Smrg
792de2362d3Smrg	/* shader constants */
79318781e08Smrg	OUT_RING_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
794de2362d3Smrg						      yco > 1.0 ? yco - 1.0: yco,
795de2362d3Smrg						      yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
796de2362d3Smrg						      0.0));
79718781e08Smrg	OUT_RING_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
798de2362d3Smrg						      uco[1] * ucscale + 0.5, /* or [-2, 2] */
799de2362d3Smrg						      uco[2] * ucscale + 0.5,
800de2362d3Smrg						      0.0));
80118781e08Smrg	OUT_RING_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
802de2362d3Smrg						      vco[1] * vcscale + 0.5, /* or [-4, 4] */
803de2362d3Smrg						      vco[2] * vcscale + 0.5,
804de2362d3Smrg						      0.0));
805de2362d3Smrg
80618781e08Smrg	ADVANCE_RING();
807de2362d3Smrg    }
808de2362d3Smrg
80918781e08Smrg    BEGIN_RING(2*2);
81018781e08Smrg    OUT_RING_REG(RADEON_RE_TOP_LEFT, 0);
81118781e08Smrg    OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, ((scissor_w << RADEON_RE_WIDTH_SHIFT) |
812de2362d3Smrg					   (scissor_h << RADEON_RE_HEIGHT_SHIFT)));
81318781e08Smrg    ADVANCE_RING();
814de2362d3Smrg
815de2362d3Smrg    if (pPriv->vsync) {
816de2362d3Smrg	xf86CrtcPtr crtc;
817de2362d3Smrg	if (pPriv->desired_crtc)
818de2362d3Smrg	    crtc = pPriv->desired_crtc;
819de2362d3Smrg	else
82018781e08Smrg	    crtc = radeon_pick_best_crtc(pScrn, FALSE,
821de2362d3Smrg					 pPriv->drw_x,
822de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
823de2362d3Smrg					 pPriv->drw_y,
824de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
825de2362d3Smrg	if (crtc)
82618781e08Smrg	    RADEONWaitForVLine(pScrn, pPixmap,
82718781e08Smrg				 crtc,
82818781e08Smrg				 pPriv->drw_y - crtc->y,
82918781e08Smrg				 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
830de2362d3Smrg    }
831de2362d3Smrg
832de2362d3Smrg    return TRUE;
833de2362d3Smrg}
834de2362d3Smrg
835de2362d3Smrgstatic void
83618781e08SmrgR200DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
837de2362d3Smrg{
838de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
839de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
840de2362d3Smrg    int dstxoff, dstyoff;
841de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
842de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
843de2362d3Smrg
844de2362d3Smrg#ifdef COMPOSITE
845de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
846de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
847de2362d3Smrg#else
848de2362d3Smrg    dstxoff = 0;
849de2362d3Smrg    dstyoff = 0;
850de2362d3Smrg#endif
851de2362d3Smrg
85218781e08Smrg    if (!R200PrepareTexturedVideo(pScrn, pPriv))
853de2362d3Smrg	return;
854de2362d3Smrg
855de2362d3Smrg    /*
856de2362d3Smrg     * Rendering of the actual polygon is done in two different
857de2362d3Smrg     * ways depending on chip generation:
858de2362d3Smrg     *
859de2362d3Smrg     * < R300:
860de2362d3Smrg     *
861de2362d3Smrg     *     These chips can render a rectangle in one pass, so
862de2362d3Smrg     *     handling is pretty straight-forward.
863de2362d3Smrg     *
864de2362d3Smrg     * >= R300:
865de2362d3Smrg     *
866de2362d3Smrg     *     These chips can accept a quad, but will render it as
867de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
868de2362d3Smrg     *     We render a single, large triangle and use the scissor
869de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
870de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
871de2362d3Smrg     *     the single triangle up to 2560/4021 pixels; above that we
872de2362d3Smrg     *     render as a quad.
873de2362d3Smrg     */
874de2362d3Smrg
875de2362d3Smrg    while (nBox) {
876de2362d3Smrg	int draw_size = 3 * pPriv->vtx_count + 4;
877de2362d3Smrg	int loop_boxes;
878de2362d3Smrg
879de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
88018781e08Smrg	    radeon_cs_flush_indirect(pScrn);
88118781e08Smrg	    if (!R200PrepareTexturedVideo(pScrn, pPriv))
882de2362d3Smrg		return;
883de2362d3Smrg	}
884de2362d3Smrg	loop_boxes = MIN(radeon_cs_space_remaining(pScrn) / draw_size, nBox);
885de2362d3Smrg	nBox -= loop_boxes;
886de2362d3Smrg
887de2362d3Smrg	BEGIN_RING(loop_boxes * 3 * pPriv->vtx_count + 4);
888de2362d3Smrg	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
889de2362d3Smrg			    loop_boxes * 3 * pPriv->vtx_count));
890de2362d3Smrg	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
891de2362d3Smrg		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
892de2362d3Smrg		 ((loop_boxes * 3) << RADEON_CP_VC_CNTL_NUM_SHIFT));
893de2362d3Smrg
894de2362d3Smrg	while (loop_boxes--) {
895de2362d3Smrg	    float srcX, srcY, srcw, srch;
896de2362d3Smrg	    int dstX, dstY, dstw, dsth;
897de2362d3Smrg	    dstX = pBox->x1 + dstxoff;
898de2362d3Smrg	    dstY = pBox->y1 + dstyoff;
899de2362d3Smrg	    dstw = pBox->x2 - pBox->x1;
900de2362d3Smrg	    dsth = pBox->y2 - pBox->y1;
901de2362d3Smrg
902de2362d3Smrg	    srcX = pPriv->src_x;
903de2362d3Smrg	    srcX += ((pBox->x1 - pPriv->drw_x) *
904de2362d3Smrg		     pPriv->src_w) / (float)pPriv->dst_w;
905de2362d3Smrg	    srcY = pPriv->src_y;
906de2362d3Smrg	    srcY += ((pBox->y1 - pPriv->drw_y) *
907de2362d3Smrg		     pPriv->src_h) / (float)pPriv->dst_h;
908de2362d3Smrg
909de2362d3Smrg	    srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
910de2362d3Smrg	    srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
911de2362d3Smrg
912de2362d3Smrg	    if (pPriv->is_planar) {
913de2362d3Smrg		/*
914de2362d3Smrg		 * Just render a rect (using three coords).
915de2362d3Smrg		 */
916de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
917de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
918de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
919de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
920de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
921de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
922de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
923de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
924de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
925de2362d3Smrg	    } else {
926de2362d3Smrg		/*
927de2362d3Smrg		 * Just render a rect (using three coords).
928de2362d3Smrg		 */
929de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
930de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
931de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
932de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
933de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
934de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
935de2362d3Smrg	    }
936de2362d3Smrg
937de2362d3Smrg	    pBox++;
938de2362d3Smrg	}
939de2362d3Smrg
94018781e08Smrg	OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
941de2362d3Smrg	ADVANCE_RING();
942de2362d3Smrg    }
943de2362d3Smrg
944de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
945de2362d3Smrg}
946de2362d3Smrg
947de2362d3Smrgstatic Bool
94818781e08SmrgR300PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
949de2362d3Smrg{
950de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
951de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
952de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
953de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
95418781e08Smrg    uint32_t txfilter, txformat0, txformat1, txpitch;
955de2362d3Smrg    uint32_t dst_pitch, dst_format;
95618781e08Smrg    uint32_t txenable, colorpitch;
957de2362d3Smrg    uint32_t output_fmt;
958de2362d3Smrg    int pixel_shift;
95918781e08Smrg    int ret;
960de2362d3Smrg
96118781e08Smrg    radeon_cs_space_reset_bos(info->cs);
96218781e08Smrg    radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
963de2362d3Smrg
96418781e08Smrg    if (pPriv->bicubic_enabled)
96539413783Smrg	radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo,
96639413783Smrg					  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
967de2362d3Smrg
96818781e08Smrg    driver_priv = exaGetPixmapDriverPrivate(pPixmap);
96939413783Smrg    radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0,
97039413783Smrg				      RADEON_GEM_DOMAIN_VRAM);
9717821949aSmrg
97218781e08Smrg    ret = radeon_cs_space_check(info->cs);
97318781e08Smrg    if (ret) {
97418781e08Smrg	ErrorF("Not enough RAM to hw accel xv operation\n");
97518781e08Smrg	return FALSE;
976de2362d3Smrg    }
977de2362d3Smrg
978de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
979de2362d3Smrg
98018781e08Smrg    dst_pitch = exaGetPixmapPitch(pPixmap);
98118781e08Smrg    RADEON_SWITCH_TO_3D();
982de2362d3Smrg
983de2362d3Smrg    if (pPriv->bicubic_enabled)
984de2362d3Smrg	pPriv->vtx_count = 6;
985de2362d3Smrg    else
986de2362d3Smrg	pPriv->vtx_count = 4;
987de2362d3Smrg
988de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
989de2362d3Smrg    case 16:
990de2362d3Smrg	if (pPixmap->drawable.depth == 15)
991de2362d3Smrg	    dst_format = R300_COLORFORMAT_ARGB1555;
992de2362d3Smrg	else
993de2362d3Smrg	    dst_format = R300_COLORFORMAT_RGB565;
994de2362d3Smrg	break;
995de2362d3Smrg    case 32:
996de2362d3Smrg	dst_format = R300_COLORFORMAT_ARGB8888;
997de2362d3Smrg	break;
998de2362d3Smrg    default:
999de2362d3Smrg	return FALSE;
1000de2362d3Smrg    }
1001de2362d3Smrg
1002de2362d3Smrg    output_fmt = (R300_OUT_FMT_C4_8 |
1003de2362d3Smrg		  R300_OUT_FMT_C0_SEL_BLUE |
1004de2362d3Smrg		  R300_OUT_FMT_C1_SEL_GREEN |
1005de2362d3Smrg		  R300_OUT_FMT_C2_SEL_RED |
1006de2362d3Smrg		  R300_OUT_FMT_C3_SEL_ALPHA);
1007de2362d3Smrg
1008de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
1009de2362d3Smrg    colorpitch |= dst_format;
1010de2362d3Smrg
1011de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
1012de2362d3Smrg	colorpitch |= R300_COLORTILE;
1013de2362d3Smrg
1014de2362d3Smrg
1015de2362d3Smrg    if (((pPriv->bicubic_state == BICUBIC_OFF)) &&
1016de2362d3Smrg	(pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12))
1017de2362d3Smrg	pPriv->is_planar = TRUE;
1018de2362d3Smrg    else
1019de2362d3Smrg	pPriv->is_planar = FALSE;
1020de2362d3Smrg
1021de2362d3Smrg    if (pPriv->is_planar) {
1022de2362d3Smrg	txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
1023de2362d3Smrg	txpitch = pPriv->src_pitch;
1024de2362d3Smrg    } else {
1025de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
1026de2362d3Smrg	    txformat1 = R300_TX_FORMAT_YVYU422;
1027de2362d3Smrg	else
1028de2362d3Smrg	    txformat1 = R300_TX_FORMAT_VYUY422;
1029de2362d3Smrg
1030de2362d3Smrg	if (pPriv->bicubic_state != BICUBIC_OFF)
1031de2362d3Smrg	    txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
1032de2362d3Smrg
1033de2362d3Smrg	/* pitch is in pixels */
1034de2362d3Smrg	txpitch = pPriv->src_pitch / 2;
1035de2362d3Smrg    }
1036de2362d3Smrg    txpitch -= 1;
1037de2362d3Smrg
1038de2362d3Smrg    txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1039de2362d3Smrg		 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
1040de2362d3Smrg		 R300_TXPITCH_EN);
1041de2362d3Smrg
1042de2362d3Smrg    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1043de2362d3Smrg		R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
1044de2362d3Smrg		R300_TX_MAG_FILTER_LINEAR |
1045de2362d3Smrg		R300_TX_MIN_FILTER_LINEAR |
1046de2362d3Smrg		(0 << R300_TX_ID_SHIFT));
1047de2362d3Smrg
1048de2362d3Smrg    BEGIN_ACCEL_RELOC(6, 1);
104918781e08Smrg    OUT_RING_REG(R300_TX_FILTER0_0, txfilter);
105018781e08Smrg    OUT_RING_REG(R300_TX_FILTER1_0, 0);
105118781e08Smrg    OUT_RING_REG(R300_TX_FORMAT0_0, txformat0);
1052de2362d3Smrg    if (pPriv->is_planar)
105318781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_0, txformat1 | R300_TX_FORMAT_CACHE_HALF_REGION_0);
1054de2362d3Smrg    else
105518781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_0, txformat1);
105618781e08Smrg    OUT_RING_REG(R300_TX_FORMAT2_0, txpitch);
105718781e08Smrg    OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo);
105818781e08Smrg    ADVANCE_RING();
1059de2362d3Smrg
1060de2362d3Smrg    txenable = R300_TEX_0_ENABLE;
1061de2362d3Smrg
1062de2362d3Smrg    if (pPriv->is_planar) {
1063de2362d3Smrg	txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1064de2362d3Smrg		     (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
1065de2362d3Smrg		     R300_TXPITCH_EN);
1066de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
1067de2362d3Smrg	txpitch -= 1;
1068de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1069de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
1070de2362d3Smrg		    R300_TX_MIN_FILTER_LINEAR |
1071de2362d3Smrg		    R300_TX_MAG_FILTER_LINEAR);
1072de2362d3Smrg
1073de2362d3Smrg	BEGIN_ACCEL_RELOC(12, 2);
107418781e08Smrg	OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
107518781e08Smrg	OUT_RING_REG(R300_TX_FILTER1_1, 0);
107618781e08Smrg	OUT_RING_REG(R300_TX_FORMAT0_1, txformat0);
107718781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
107818781e08Smrg	OUT_RING_REG(R300_TX_FORMAT2_1, txpitch);
107918781e08Smrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo);
108018781e08Smrg	OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
108118781e08Smrg	OUT_RING_REG(R300_TX_FILTER1_2, 0);
108218781e08Smrg	OUT_RING_REG(R300_TX_FORMAT0_2, txformat0);
108318781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
108418781e08Smrg	OUT_RING_REG(R300_TX_FORMAT2_2, txpitch);
108518781e08Smrg	OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo);
108618781e08Smrg	ADVANCE_RING();
1087de2362d3Smrg	txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
1088de2362d3Smrg    }
1089de2362d3Smrg
1090de2362d3Smrg    if (pPriv->bicubic_enabled) {
1091de2362d3Smrg	/* Size is 128x1 */
1092de2362d3Smrg	txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
1093de2362d3Smrg		     (0x0 << R300_TXHEIGHT_SHIFT) |
1094de2362d3Smrg		     R300_TXPITCH_EN);
1095de2362d3Smrg	/* Format is 32-bit floats, 4bpp */
1096de2362d3Smrg	txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
1097de2362d3Smrg	/* Pitch is 127 (128-1) */
1098de2362d3Smrg	txpitch = 0x7f;
1099de2362d3Smrg	/* Tex filter */
1100de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
1101de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
1102de2362d3Smrg		    R300_TX_MIN_FILTER_NEAREST |
1103de2362d3Smrg		    R300_TX_MAG_FILTER_NEAREST |
1104de2362d3Smrg		    (1 << R300_TX_ID_SHIFT));
1105de2362d3Smrg
1106de2362d3Smrg	BEGIN_ACCEL_RELOC(6, 1);
110718781e08Smrg	OUT_RING_REG(R300_TX_FILTER0_1, txfilter);
110818781e08Smrg	OUT_RING_REG(R300_TX_FILTER1_1, 0);
110918781e08Smrg	OUT_RING_REG(R300_TX_FORMAT0_1, txformat0);
111018781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_1, txformat1);
111118781e08Smrg	OUT_RING_REG(R300_TX_FORMAT2_1, txpitch);
111218781e08Smrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo);
111318781e08Smrg	ADVANCE_RING();
1114de2362d3Smrg
1115de2362d3Smrg	/* Enable tex 1 */
1116de2362d3Smrg	txenable |= R300_TEX_1_ENABLE;
1117de2362d3Smrg    }
1118de2362d3Smrg
1119de2362d3Smrg    /* setup the VAP */
1120de2362d3Smrg    if (info->accel_state->has_tcl) {
1121de2362d3Smrg	if (pPriv->bicubic_enabled)
112218781e08Smrg	    BEGIN_RING(2*7);
1123de2362d3Smrg	else
112418781e08Smrg	    BEGIN_RING(2*6);
1125de2362d3Smrg    } else {
1126de2362d3Smrg	if (pPriv->bicubic_enabled)
112718781e08Smrg	    BEGIN_RING(2*5);
1128de2362d3Smrg	else
112918781e08Smrg	    BEGIN_RING(2*4);
1130de2362d3Smrg    }
1131de2362d3Smrg
1132de2362d3Smrg    /* These registers define the number, type, and location of data submitted
1133de2362d3Smrg     * to the PVS unit of GA input (when PVS is disabled)
1134de2362d3Smrg     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1135de2362d3Smrg     * enabled.  This memory provides the imputs to the vertex shader program
1136de2362d3Smrg     * and ordering is not important.  When PVS/TCL is disabled, this field maps
11370a1d3ae0Smrg     * directly to the GA input memory and the order is significant.  In
1138de2362d3Smrg     * PVS_BYPASS mode the order is as follows:
1139de2362d3Smrg     * Position
1140de2362d3Smrg     * Point Size
1141de2362d3Smrg     * Color 0-3
1142de2362d3Smrg     * Textures 0-7
1143de2362d3Smrg     * Fog
1144de2362d3Smrg     */
1145de2362d3Smrg    if (pPriv->bicubic_enabled) {
114618781e08Smrg	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
1147de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1148de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1149de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1150de2362d3Smrg		       R300_SIGNED_0 |
1151de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1152de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1153de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1154de2362d3Smrg		       R300_SIGNED_1));
115518781e08Smrg	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1,
1156de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1157de2362d3Smrg		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1158de2362d3Smrg		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1159de2362d3Smrg		       R300_LAST_VEC_2 |
1160de2362d3Smrg		       R300_SIGNED_2));
1161de2362d3Smrg    } else {
116218781e08Smrg	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
1163de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1164de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1165de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1166de2362d3Smrg		       R300_SIGNED_0 |
1167de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1168de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1169de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1170de2362d3Smrg		       R300_LAST_VEC_1 |
1171de2362d3Smrg		       R300_SIGNED_1));
1172de2362d3Smrg    }
1173de2362d3Smrg
1174de2362d3Smrg    /* load the vertex shader
1175de2362d3Smrg     * We pre-load vertex programs in RADEONInit3DEngine():
1176de2362d3Smrg     * - exa
1177de2362d3Smrg     * - Xv
1178de2362d3Smrg     * - Xv bicubic
1179de2362d3Smrg     * Here we select the offset of the vertex program we want to use
1180de2362d3Smrg     */
1181de2362d3Smrg    if (info->accel_state->has_tcl) {
1182de2362d3Smrg	if (pPriv->bicubic_enabled) {
118318781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
1184de2362d3Smrg			  ((11 << R300_PVS_FIRST_INST_SHIFT) |
1185de2362d3Smrg			   (13 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1186de2362d3Smrg			   (13 << R300_PVS_LAST_INST_SHIFT)));
118718781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
1188de2362d3Smrg			  (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1189de2362d3Smrg	} else {
119018781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
1191de2362d3Smrg			  ((9 << R300_PVS_FIRST_INST_SHIFT) |
1192de2362d3Smrg			   (10 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1193de2362d3Smrg			   (10 << R300_PVS_LAST_INST_SHIFT)));
119418781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
1195de2362d3Smrg			  (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1196de2362d3Smrg	}
1197de2362d3Smrg    }
1198de2362d3Smrg
1199de2362d3Smrg    /* Position and one set of 2 texture coordinates */
120018781e08Smrg    OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1201de2362d3Smrg    if (pPriv->bicubic_enabled)
120218781e08Smrg	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1203de2362d3Smrg					       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1204de2362d3Smrg    else
120518781e08Smrg	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
1206de2362d3Smrg
120718781e08Smrg    OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt);
120818781e08Smrg    ADVANCE_RING();
1209de2362d3Smrg
1210de2362d3Smrg    /* setup pixel shader */
1211de2362d3Smrg    if (pPriv->bicubic_state != BICUBIC_OFF) {
1212de2362d3Smrg	if (pPriv->bicubic_enabled) {
121318781e08Smrg	    BEGIN_RING(2*79);
1214de2362d3Smrg
1215de2362d3Smrg	    /* 4 components: 2 for tex0 and 2 for tex1 */
121618781e08Smrg	    OUT_RING_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1217de2362d3Smrg					  R300_RS_COUNT_HIRES_EN));
1218de2362d3Smrg
1219de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
122018781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1221de2362d3Smrg
1222de2362d3Smrg	    /* Pixel stack frame size. */
122318781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 5);
1224de2362d3Smrg
1225de2362d3Smrg	    /* Indirection levels */
122618781e08Smrg	    OUT_RING_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
1227de2362d3Smrg					   R300_FIRST_TEX));
1228de2362d3Smrg
1229de2362d3Smrg	    /* Set nodes. */
123018781e08Smrg	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1231de2362d3Smrg						R300_ALU_CODE_SIZE(14) |
1232de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1233de2362d3Smrg						R300_TEX_CODE_SIZE(6)));
1234de2362d3Smrg
1235de2362d3Smrg	    /* Nodes are allocated highest first, but executed lowest first */
123618781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_0, 0);
123718781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
1238de2362d3Smrg						R300_ALU_SIZE(0) |
1239de2362d3Smrg						R300_TEX_START(0) |
1240de2362d3Smrg						R300_TEX_SIZE(0)));
124118781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
1242de2362d3Smrg						R300_ALU_SIZE(9) |
1243de2362d3Smrg						R300_TEX_START(1) |
1244de2362d3Smrg						R300_TEX_SIZE(0)));
124518781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
1246de2362d3Smrg						R300_ALU_SIZE(2) |
1247de2362d3Smrg						R300_TEX_START(2) |
1248de2362d3Smrg						R300_TEX_SIZE(3) |
1249de2362d3Smrg						R300_RGBA_OUT));
1250de2362d3Smrg
1251de2362d3Smrg	    /* ** BICUBIC FP ** */
1252de2362d3Smrg
1253de2362d3Smrg	    /* texcoord0 => temp0
1254de2362d3Smrg	     * texcoord1 => temp1 */
1255de2362d3Smrg
1256de2362d3Smrg	    // first node
1257de2362d3Smrg	    /* TEX temp2, temp1.rrr0, tex1, 1D */
125818781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
1259de2362d3Smrg						R300_TEX_ID(1) |
1260de2362d3Smrg						R300_TEX_SRC_ADDR(1) |
1261de2362d3Smrg						R300_TEX_DST_ADDR(2)));
1262de2362d3Smrg
1263de2362d3Smrg	    /* MOV temp1.r, temp1.ggg0 */
126418781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1265de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1266de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1267de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
126818781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
1269de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1270de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
127118781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1272de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1273de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1274de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
127518781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
1276de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1277de2362d3Smrg
1278de2362d3Smrg
1279de2362d3Smrg	    // second node
1280de2362d3Smrg	    /* TEX temp1, temp1, tex1, 1D */
128118781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
1282de2362d3Smrg						R300_TEX_ID(1) |
1283de2362d3Smrg						R300_TEX_SRC_ADDR(1) |
1284de2362d3Smrg						R300_TEX_DST_ADDR(1)));
1285de2362d3Smrg
1286de2362d3Smrg	    /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
128718781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1288de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1289de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1290de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
129118781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
1292de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
1293de2362d3Smrg						    R300_ALU_RGB_ADDRD(3) |
1294de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
129518781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1296de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1297de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1298de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
129918781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
1300de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1301de2362d3Smrg
1302de2362d3Smrg
1303de2362d3Smrg	    /* MUL temp2.rg, temp2.rrr0, const0.rgb */
130418781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1305de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
1306de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1307de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
130818781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
1309de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
1310de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
1311de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
131218781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1313de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1314de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1315de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
131618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
1317de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1318de2362d3Smrg
1319de2362d3Smrg	    /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
132018781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1321de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1322de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1323de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
132418781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
1325de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1326de2362d3Smrg						    R300_ALU_RGB_ADDR2(3) |
1327de2362d3Smrg						    R300_ALU_RGB_ADDRD(4) |
1328de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
132918781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1330de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1331de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1332de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
133318781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
1334de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1335de2362d3Smrg
1336de2362d3Smrg	    /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
133718781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1338de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
1339de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1340de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
134118781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
1342de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1343de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
1344de2362d3Smrg						    R300_ALU_RGB_ADDRD(5) |
1345de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
134618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1347de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1348de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1349de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
135018781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
1351de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1352de2362d3Smrg
1353de2362d3Smrg	    /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
135418781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1355de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
1356de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1357de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
135818781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
1359de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1360de2362d3Smrg						    R300_ALU_RGB_ADDR2(3) |
1361de2362d3Smrg						    R300_ALU_RGB_ADDRD(3) |
1362de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
136318781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1364de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1365de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1366de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
136718781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
1368de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1369de2362d3Smrg
1370de2362d3Smrg	    /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
137118781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1372de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
1373de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1374de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
137518781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
1376de2362d3Smrg						    R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
1377de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
1378de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1379de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
138018781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1381de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1382de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1383de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
138418781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
1385de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1386de2362d3Smrg
1387de2362d3Smrg	    /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
138818781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1389de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1390de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1391de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
139218781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
1393de2362d3Smrg						    R300_ALU_RGB_ADDR2(1) |
1394de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1395de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
139618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1397de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1398de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1399de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
140018781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
1401de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1402de2362d3Smrg
1403de2362d3Smrg	    /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
140418781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1405de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1406de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1407de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
140818781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
1409de2362d3Smrg						    R300_ALU_RGB_ADDR2(3) |
1410de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
1411de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
141218781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1413de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1414de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1415de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
141618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
1417de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1418de2362d3Smrg
1419de2362d3Smrg	    /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
142018781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1421de2362d3Smrg						    R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1422de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1423de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
142418781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
1425de2362d3Smrg						    R300_ALU_RGB_ADDR2(5) |
1426de2362d3Smrg						    R300_ALU_RGB_ADDRD(3) |
1427de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
142818781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1429de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1430de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1431de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
143218781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
1433de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1434de2362d3Smrg
1435de2362d3Smrg	    /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
143618781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1437de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1438de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1439de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
144018781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
1441de2362d3Smrg						     R300_ALU_RGB_ADDR2(4) |
1442de2362d3Smrg						     R300_ALU_RGB_ADDRD(0) |
1443de2362d3Smrg						     R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
144418781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1445de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1446de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1447de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
144818781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
1449de2362d3Smrg						       R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
1450de2362d3Smrg
1451de2362d3Smrg
1452de2362d3Smrg	    // third node
1453de2362d3Smrg	    /* TEX temp4, temp1.rg--, tex0, 1D */
145418781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
1455de2362d3Smrg						R300_TEX_ID(0) |
1456de2362d3Smrg						R300_TEX_SRC_ADDR(1) |
1457de2362d3Smrg						R300_TEX_DST_ADDR(4)));
1458de2362d3Smrg
1459de2362d3Smrg	    /* TEX temp3, temp3.rg--, tex0, 1D */
146018781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
1461de2362d3Smrg						R300_TEX_ID(0) |
1462de2362d3Smrg						R300_TEX_SRC_ADDR(3) |
1463de2362d3Smrg						R300_TEX_DST_ADDR(3)));
1464de2362d3Smrg
1465de2362d3Smrg	    /* TEX temp5, temp2.rg--, tex0, 1D */
146618781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
1467de2362d3Smrg						R300_TEX_ID(0) |
1468de2362d3Smrg						R300_TEX_SRC_ADDR(2) |
1469de2362d3Smrg						R300_TEX_DST_ADDR(5)));
1470de2362d3Smrg
1471de2362d3Smrg	    /* TEX temp0, temp0.rg--, tex0, 1D */
147218781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
1473de2362d3Smrg						R300_TEX_ID(0) |
1474de2362d3Smrg						R300_TEX_SRC_ADDR(0) |
1475de2362d3Smrg						R300_TEX_DST_ADDR(0)));
1476de2362d3Smrg
1477de2362d3Smrg	    /* LRP temp3, temp1.bbbb, temp4, temp3 ->
1478de2362d3Smrg	     * - PRESUB temps, temp4 - temp3
1479de2362d3Smrg	     * - MAD temp3, temp1.bbbb, temps, temp3 */
148018781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1481de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
1482de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
1483de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1484de2362d3Smrg						     R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
148518781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
1486de2362d3Smrg						     R300_ALU_RGB_ADDR1(4) |
1487de2362d3Smrg						     R300_ALU_RGB_ADDR2(1) |
1488de2362d3Smrg						     R300_ALU_RGB_ADDRD(3) |
1489de2362d3Smrg						     R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
149018781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1491de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
1492de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
1493de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
149418781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
1495de2362d3Smrg						       R300_ALU_ALPHA_ADDR1(4) |
1496de2362d3Smrg						       R300_ALU_ALPHA_ADDR2(1) |
1497de2362d3Smrg						       R300_ALU_ALPHA_ADDRD(3) |
1498de2362d3Smrg						       R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
1499de2362d3Smrg
1500de2362d3Smrg	    /* LRP temp0, temp1.bbbb, temp5, temp0 ->
1501de2362d3Smrg	     * - PRESUB temps, temp5 - temp0
1502de2362d3Smrg	     * - MAD temp0, temp1.bbbb, temps, temp0 */
150318781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1504de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
1505de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
1506de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1507de2362d3Smrg						     R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
1508de2362d3Smrg						     R300_ALU_RGB_INSERT_NOP));
150918781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
1510de2362d3Smrg						     R300_ALU_RGB_ADDR1(5) |
1511de2362d3Smrg						     R300_ALU_RGB_ADDR2(1) |
1512de2362d3Smrg						     R300_ALU_RGB_ADDRD(0) |
1513de2362d3Smrg						     R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
151418781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1515de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
1516de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
1517de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
151818781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
1519de2362d3Smrg						       R300_ALU_ALPHA_ADDR1(5) |
1520de2362d3Smrg						       R300_ALU_ALPHA_ADDR2(1) |
1521de2362d3Smrg						       R300_ALU_ALPHA_ADDRD(0) |
1522de2362d3Smrg						       R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
1523de2362d3Smrg
1524de2362d3Smrg	    /* LRP output, temp2.bbbb, temp3, temp0 ->
1525de2362d3Smrg	     * - PRESUB temps, temp3 - temp0
1526de2362d3Smrg	     * - MAD output, temp2.bbbb, temps, temp0 */
152718781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1528de2362d3Smrg						     R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
1529de2362d3Smrg						     R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
1530de2362d3Smrg						     R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1531de2362d3Smrg						     R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
153218781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
1533de2362d3Smrg						     R300_ALU_RGB_ADDR1(3) |
1534de2362d3Smrg						     R300_ALU_RGB_ADDR2(2) |
1535de2362d3Smrg						     R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
153618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1537de2362d3Smrg						       R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
1538de2362d3Smrg						       R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
1539de2362d3Smrg						       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
154018781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
1541de2362d3Smrg						       R300_ALU_ALPHA_ADDR1(3) |
1542de2362d3Smrg						       R300_ALU_ALPHA_ADDR2(2) |
1543de2362d3Smrg						       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
1544de2362d3Smrg
1545de2362d3Smrg	    /* Shader constants. */
154618781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
154718781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_G(0), 0);
154818781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_B(0), 0);
154918781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_A(0), 0);
1550de2362d3Smrg
155118781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_R(1), 0);
155218781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
155318781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_B(1), 0);
155418781e08Smrg	    OUT_RING_REG(R300_US_ALU_CONST_A(1), 0);
1555de2362d3Smrg
155618781e08Smrg	    ADVANCE_RING();
1557de2362d3Smrg	} else {
155818781e08Smrg	    BEGIN_RING(2*11);
1559de2362d3Smrg	    /* 2 components: 2 for tex0 */
156018781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
1561de2362d3Smrg                          ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1562de2362d3Smrg                           R300_RS_COUNT_HIRES_EN));
1563de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
156418781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1565de2362d3Smrg
156618781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */
1567de2362d3Smrg
1568de2362d3Smrg	    /* Indirection levels */
156918781e08Smrg	    OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
1570de2362d3Smrg					   R300_FIRST_TEX));
1571de2362d3Smrg
157218781e08Smrg	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1573de2362d3Smrg						R300_ALU_CODE_SIZE(1) |
1574de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1575de2362d3Smrg						R300_TEX_CODE_SIZE(1)));
1576de2362d3Smrg
157718781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
1578de2362d3Smrg						R300_ALU_SIZE(0) |
1579de2362d3Smrg						R300_TEX_START(0) |
1580de2362d3Smrg						R300_TEX_SIZE(0) |
1581de2362d3Smrg						R300_RGBA_OUT));
1582de2362d3Smrg
1583de2362d3Smrg	    /* tex inst */
158418781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
1585de2362d3Smrg					       R300_TEX_DST_ADDR(0) |
1586de2362d3Smrg					       R300_TEX_ID(0) |
1587de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
1588de2362d3Smrg
1589de2362d3Smrg	    /* ALU inst */
1590de2362d3Smrg	    /* RGB */
159118781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
1592de2362d3Smrg                                                   R300_ALU_RGB_ADDR1(0) |
1593de2362d3Smrg                                                   R300_ALU_RGB_ADDR2(0) |
1594de2362d3Smrg                                                   R300_ALU_RGB_ADDRD(0) |
1595de2362d3Smrg                                                   R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1596de2362d3Smrg								       R300_ALU_RGB_MASK_G |
1597de2362d3Smrg								       R300_ALU_RGB_MASK_B)) |
1598de2362d3Smrg                                                   R300_ALU_RGB_TARGET_A));
159918781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1600de2362d3Smrg                                                   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1601de2362d3Smrg                                                   R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
1602de2362d3Smrg						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1603de2362d3Smrg                                                   R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1604de2362d3Smrg                                                   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1605de2362d3Smrg                                                   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1606de2362d3Smrg                                                   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1607de2362d3Smrg                                                   R300_ALU_RGB_CLAMP));
1608de2362d3Smrg	    /* Alpha */
160918781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
1610de2362d3Smrg						     R300_ALU_ALPHA_ADDR1(0) |
1611de2362d3Smrg						     R300_ALU_ALPHA_ADDR2(0) |
1612de2362d3Smrg						     R300_ALU_ALPHA_ADDRD(0) |
1613de2362d3Smrg						     R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1614de2362d3Smrg						     R300_ALU_ALPHA_TARGET_A |
1615de2362d3Smrg						     R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
161618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
1617de2362d3Smrg						     R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1618de2362d3Smrg						     R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
1619de2362d3Smrg						     R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1620de2362d3Smrg						     R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1621de2362d3Smrg						     R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1622de2362d3Smrg						     R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1623de2362d3Smrg						     R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1624de2362d3Smrg						     R300_ALU_ALPHA_CLAMP));
162518781e08Smrg	    ADVANCE_RING();
1626de2362d3Smrg	}
1627de2362d3Smrg    } else {
1628de2362d3Smrg	/*
1629de2362d3Smrg	 * y' = y - .0625
1630de2362d3Smrg	 * u' = u - .5
1631de2362d3Smrg	 * v' = v - .5;
1632de2362d3Smrg	 *
1633de2362d3Smrg	 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
1634de2362d3Smrg	 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
1635de2362d3Smrg	 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
1636de2362d3Smrg	 *
1637de2362d3Smrg	 * DP3 might look like the straightforward solution
1638de2362d3Smrg	 * but we'd need to move the texture yuv values in
1639de2362d3Smrg	 * the same reg for this to work. Therefore use MADs.
1640de2362d3Smrg	 * Brightness just adds to the off constant.
1641de2362d3Smrg	 * Contrast is multiplication of luminance.
1642de2362d3Smrg	 * Saturation and hue change the u and v coeffs.
1643de2362d3Smrg	 * Default values (before adjustments - depend on colorspace):
1644de2362d3Smrg	 * yco = 1.1643
1645de2362d3Smrg	 * uco = 0, -0.39173, 2.017
1646de2362d3Smrg	 * vco = 1.5958, -0.8129, 0
1647de2362d3Smrg	 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
1648de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
1649de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
1650de2362d3Smrg	 *
1651de2362d3Smrg	 * temp = MAD(yco, yuv.yyyy, off)
1652de2362d3Smrg	 * temp = MAD(uco, yuv.uuuu, temp)
1653de2362d3Smrg	 * result = MAD(vco, yuv.vvvv, temp)
1654de2362d3Smrg	 */
1655de2362d3Smrg	/* TODO: don't recalc consts always */
1656de2362d3Smrg	const float Loff = -0.0627;
1657de2362d3Smrg	const float Coff = -0.502;
1658de2362d3Smrg	float uvcosf, uvsinf;
1659de2362d3Smrg	float yco;
1660de2362d3Smrg	float uco[3], vco[3], off[3];
1661de2362d3Smrg	float bright, cont, gamma;
1662de2362d3Smrg	int ref = pPriv->transform_index;
1663de2362d3Smrg	Bool needgamma = FALSE;
1664de2362d3Smrg
1665de2362d3Smrg	cont = RTFContrast(pPriv->contrast);
1666de2362d3Smrg	bright = RTFBrightness(pPriv->brightness);
1667de2362d3Smrg	gamma = (float)pPriv->gamma / 1000.0;
1668de2362d3Smrg	uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
1669de2362d3Smrg	uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
1670de2362d3Smrg	/* overlay video also does pre-gamma contrast/sat adjust, should we? */
1671de2362d3Smrg
1672de2362d3Smrg	yco = trans[ref].RefLuma * cont;
1673de2362d3Smrg	uco[0] = -trans[ref].RefRCr * uvsinf;
1674de2362d3Smrg	uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
1675de2362d3Smrg	uco[2] = trans[ref].RefBCb * uvcosf;
1676de2362d3Smrg	vco[0] = trans[ref].RefRCr * uvcosf;
1677de2362d3Smrg	vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
1678de2362d3Smrg	vco[2] = trans[ref].RefBCb * uvsinf;
1679de2362d3Smrg	off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
1680de2362d3Smrg	off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
1681de2362d3Smrg	off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
1682de2362d3Smrg
1683de2362d3Smrg	if (gamma != 1.0) {
1684de2362d3Smrg	    needgamma = TRUE;
1685de2362d3Smrg	    /* note: gamma correction is out = in ^ gamma;
1686de2362d3Smrg	       gpu can only do LG2/EX2 therefore we transform into
1687de2362d3Smrg	       in ^ gamma = 2 ^ (log2(in) * gamma).
1688de2362d3Smrg	       Lots of scalar ops, unfortunately (better solution?) -
1689de2362d3Smrg	       without gamma that's 3 inst, with gamma it's 10...
1690de2362d3Smrg	       could use different gamma factors per channel,
1691de2362d3Smrg	       if that's of any use. */
1692de2362d3Smrg	}
1693de2362d3Smrg
1694de2362d3Smrg	if (pPriv->is_planar) {
169518781e08Smrg	    BEGIN_RING(2 * (needgamma ? (28 + 33) : 33));
1696de2362d3Smrg	    /* 2 components: same 2 for tex0/1/2 */
169718781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
1698de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1699de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
1700de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
170118781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1702de2362d3Smrg
170318781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */
1704de2362d3Smrg
1705de2362d3Smrg	    /* Indirection levels */
170618781e08Smrg	    OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
1707de2362d3Smrg					   R300_FIRST_TEX));
1708de2362d3Smrg
170918781e08Smrg	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1710de2362d3Smrg						R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
1711de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1712de2362d3Smrg						R300_TEX_CODE_SIZE(3)));
1713de2362d3Smrg
171418781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
1715de2362d3Smrg						R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
1716de2362d3Smrg						R300_TEX_START(0) |
1717de2362d3Smrg						R300_TEX_SIZE(2) |
1718de2362d3Smrg						R300_RGBA_OUT));
1719de2362d3Smrg
1720de2362d3Smrg	    /* tex inst */
172118781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
1722de2362d3Smrg					       R300_TEX_DST_ADDR(2) |
1723de2362d3Smrg					       R300_TEX_ID(0) |
1724de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
172518781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
1726de2362d3Smrg					       R300_TEX_DST_ADDR(1) |
1727de2362d3Smrg					       R300_TEX_ID(1) |
1728de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
172918781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
1730de2362d3Smrg					       R300_TEX_DST_ADDR(0) |
1731de2362d3Smrg					       R300_TEX_ID(2) |
1732de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
1733de2362d3Smrg
1734de2362d3Smrg	    /* ALU inst */
1735de2362d3Smrg	    /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
173618781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
1737de2362d3Smrg						    R300_ALU_RGB_ADDR1(2) |
1738de2362d3Smrg						    R300_ALU_RGB_ADDR2(0) |
1739de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
1740de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
174118781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
1742de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1743de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1744de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1745de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1746de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1747de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1748de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
1749de2362d3Smrg	    /* alpha nop, but need to set up alpha source for rgb usage */
175018781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
1751de2362d3Smrg						      R300_ALU_ALPHA_ADDR1(2) |
1752de2362d3Smrg						      R300_ALU_ALPHA_ADDR2(0) |
1753de2362d3Smrg						      R300_ALU_ALPHA_ADDRD(2) |
1754de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
175518781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1756de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1757de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1758de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1759de2362d3Smrg
1760de2362d3Smrg	    /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
176118781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
1762de2362d3Smrg						    R300_ALU_RGB_ADDR1(1) |
1763de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
1764de2362d3Smrg						    R300_ALU_RGB_ADDRD(2) |
1765de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
176618781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1767de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1768de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1769de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1770de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
1771de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1772de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1773de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
1774de2362d3Smrg	    /* alpha nop */
177518781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) |
1776de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
177718781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1778de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1779de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1780de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1781de2362d3Smrg
1782de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
178318781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
1784de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
1785de2362d3Smrg						    R300_ALU_RGB_ADDR2(2) |
1786de2362d3Smrg						    R300_ALU_RGB_ADDRD(0) |
1787de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
1788de2362d3Smrg						    (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
178918781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1790de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1791de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
1792de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1793de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
1794de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1795de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1796de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1797de2362d3Smrg						    R300_ALU_RGB_CLAMP));
1798de2362d3Smrg	    /* write alpha 1 */
179918781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
1800de2362d3Smrg						      R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1801de2362d3Smrg						      R300_ALU_ALPHA_TARGET_A));
180218781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1803de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1804de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1805de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
1806de2362d3Smrg
1807de2362d3Smrg	    if (needgamma) {
1808de2362d3Smrg		/* rgb temp0.r = op_sop, set up src0 reg */
180918781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
1810de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
181118781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(3),
1812de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
1813de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
1814de2362d3Smrg		/* alpha lg2 temp0, temp0.r */
181518781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
1816de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
181718781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
1818de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
1819de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1820de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1821de2362d3Smrg
1822de2362d3Smrg		/* rgb temp0.g = op_sop, set up src0 reg */
182318781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
1824de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
182518781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(4),
1826de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
1827de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
1828de2362d3Smrg		/* alpha lg2 temp0, temp0.g */
182918781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
1830de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
183118781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
1832de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
1833de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1834de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1835de2362d3Smrg
1836de2362d3Smrg		/* rgb temp0.b = op_sop, set up src0 reg */
183718781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
1838de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
183918781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(5),
1840de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
1841de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
1842de2362d3Smrg		/* alpha lg2 temp0, temp0.b */
184318781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
1844de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
184518781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
1846de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
1847de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1848de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1849de2362d3Smrg
1850de2362d3Smrg		/* MUL const1, temp1, temp0 */
185118781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
1852de2362d3Smrg							R300_ALU_RGB_ADDR1(0) |
1853de2362d3Smrg							R300_ALU_RGB_ADDR2(0) |
1854de2362d3Smrg							R300_ALU_RGB_ADDRD(0) |
1855de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
185618781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1857de2362d3Smrg							R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1858de2362d3Smrg							R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
1859de2362d3Smrg							R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1860de2362d3Smrg							R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1861de2362d3Smrg							R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1862de2362d3Smrg							R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1863de2362d3Smrg							R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
1864de2362d3Smrg		/* alpha nop, but set up const1 */
186518781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
1866de2362d3Smrg							  R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
1867de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
186818781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1869de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1870de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1871de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1872de2362d3Smrg
1873de2362d3Smrg		/* rgb out0.r = op_sop, set up src0 reg */
187418781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
1875de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
1876de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
187718781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(7),
1878de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
1879de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
1880de2362d3Smrg		/* alpha ex2 temp0, temp0.r */
188118781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
1882de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
188318781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
1884de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
1885de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1886de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1887de2362d3Smrg
1888de2362d3Smrg		/* rgb out0.g = op_sop, set up src0 reg */
188918781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
1890de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
1891de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
189218781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(8),
1893de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
1894de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
1895de2362d3Smrg		/* alpha ex2 temp0, temp0.g */
189618781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
1897de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
189818781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
1899de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
1900de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1901de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1902de2362d3Smrg
1903de2362d3Smrg		/* rgb out0.b = op_sop, set up src0 reg */
190418781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
1905de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
1906de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
190718781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(9),
1908de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
1909de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
1910de2362d3Smrg		/* alpha ex2 temp0, temp0.b */
191118781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
1912de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
191318781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
1914de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
1915de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1916de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1917de2362d3Smrg	    }
1918de2362d3Smrg	} else {
191918781e08Smrg	    BEGIN_RING(2 * (needgamma ? (28 + 31) : 31));
1920de2362d3Smrg	    /* 2 components */
192118781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
1922de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1923de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
1924de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
192518781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1926de2362d3Smrg
192718781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1928de2362d3Smrg
1929de2362d3Smrg	    /* Indirection levels */
193018781e08Smrg	    OUT_RING_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
1931de2362d3Smrg					   R300_FIRST_TEX));
1932de2362d3Smrg
193318781e08Smrg	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1934de2362d3Smrg						R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
1935de2362d3Smrg						R300_TEX_CODE_OFFSET(0) |
1936de2362d3Smrg						R300_TEX_CODE_SIZE(1)));
1937de2362d3Smrg
193818781e08Smrg	    OUT_RING_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
1939de2362d3Smrg						R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
1940de2362d3Smrg						R300_TEX_START(0) |
1941de2362d3Smrg						R300_TEX_SIZE(0) |
1942de2362d3Smrg						R300_RGBA_OUT));
1943de2362d3Smrg
1944de2362d3Smrg	    /* tex inst */
194518781e08Smrg	    OUT_RING_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
1946de2362d3Smrg					       R300_TEX_DST_ADDR(0) |
1947de2362d3Smrg					       R300_TEX_ID(0) |
1948de2362d3Smrg					       R300_TEX_INST(R300_TEX_INST_LD)));
1949de2362d3Smrg
1950de2362d3Smrg	    /* ALU inst */
1951de2362d3Smrg	    /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
195218781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
1953de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
1954de2362d3Smrg						    R300_ALU_RGB_ADDR2(0) |
1955de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1956de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
195718781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
1958de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1959de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) |
1960de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1961de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
1962de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1963de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1964de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
1965de2362d3Smrg	    /* alpha nop, but need to set up alpha source for rgb usage */
196618781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
1967de2362d3Smrg						      R300_ALU_ALPHA_ADDR1(0) |
1968de2362d3Smrg						      R300_ALU_ALPHA_ADDR2(0) |
1969de2362d3Smrg						      R300_ALU_ALPHA_ADDRD(0) |
1970de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
197118781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1972de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1973de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1974de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1975de2362d3Smrg
1976de2362d3Smrg	    /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
197718781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
1978de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
1979de2362d3Smrg						    R300_ALU_RGB_ADDR2(1) |
1980de2362d3Smrg						    R300_ALU_RGB_ADDRD(1) |
1981de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
198218781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
1983de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1984de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) |
1985de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1986de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
1987de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1988de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1989de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
1990de2362d3Smrg	    /* alpha nop */
199118781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
1992de2362d3Smrg						      R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
199318781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1994de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
1995de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
1996de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
1997de2362d3Smrg
1998de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
199918781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
2000de2362d3Smrg						    R300_ALU_RGB_ADDR1(0) |
2001de2362d3Smrg						    R300_ALU_RGB_ADDR2(1) |
2002de2362d3Smrg						    R300_ALU_RGB_ADDRD(0) |
2003de2362d3Smrg						    R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
2004de2362d3Smrg						    (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
200518781e08Smrg	    OUT_RING_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2006de2362d3Smrg						    R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2007de2362d3Smrg						    R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) |
2008de2362d3Smrg						    R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2009de2362d3Smrg						    R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
2010de2362d3Smrg						    R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2011de2362d3Smrg						    R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2012de2362d3Smrg						    R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
2013de2362d3Smrg						    R300_ALU_RGB_CLAMP));
2014de2362d3Smrg	    /* write alpha 1 */
201518781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
2016de2362d3Smrg						      R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
2017de2362d3Smrg						      R300_ALU_ALPHA_TARGET_A));
201818781e08Smrg	    OUT_RING_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2019de2362d3Smrg						      R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2020de2362d3Smrg						      R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2021de2362d3Smrg						      R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
2022de2362d3Smrg
2023de2362d3Smrg	    if (needgamma) {
2024de2362d3Smrg		/* rgb temp0.r = op_sop, set up src0 reg */
202518781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
2026de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
202718781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(3),
2028de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2029de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2030de2362d3Smrg		/* alpha lg2 temp0, temp0.r */
203118781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
2032de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
203318781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2034de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
2035de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2036de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2037de2362d3Smrg
2038de2362d3Smrg		/* rgb temp0.g = op_sop, set up src0 reg */
203918781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
2040de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
204118781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(4),
2042de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2043de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2044de2362d3Smrg		/* alpha lg2 temp0, temp0.g */
204518781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
2046de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
204718781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2048de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
2049de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2050de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2051de2362d3Smrg
2052de2362d3Smrg		/* rgb temp0.b = op_sop, set up src0 reg */
205318781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
2054de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
205518781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(5),
2056de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2057de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2058de2362d3Smrg		/* alpha lg2 temp0, temp0.b */
205918781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
2060de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
206118781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
2062de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
2063de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2064de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2065de2362d3Smrg
2066de2362d3Smrg		/* MUL const1, temp1, temp0 */
206718781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
2068de2362d3Smrg							R300_ALU_RGB_ADDR1(0) |
2069de2362d3Smrg							R300_ALU_RGB_ADDR2(0) |
2070de2362d3Smrg							R300_ALU_RGB_ADDRD(0) |
2071de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
207218781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
2073de2362d3Smrg							R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
2074de2362d3Smrg							R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
2075de2362d3Smrg							R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
2076de2362d3Smrg							R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
2077de2362d3Smrg							R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
2078de2362d3Smrg							R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
2079de2362d3Smrg							R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
2080de2362d3Smrg		/* alpha nop, but set up const1 */
208118781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
2082de2362d3Smrg							  R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
2083de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
208418781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
2085de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
2086de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2087de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2088de2362d3Smrg
2089de2362d3Smrg		/* rgb out0.r = op_sop, set up src0 reg */
209018781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
2091de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
2092de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
209318781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(7),
2094de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2095de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2096de2362d3Smrg		/* alpha ex2 temp0, temp0.r */
209718781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
2098de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
209918781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2100de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
2101de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2102de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2103de2362d3Smrg
2104de2362d3Smrg		/* rgb out0.g = op_sop, set up src0 reg */
210518781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
2106de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
2107de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
210818781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(8),
2109de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2110de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2111de2362d3Smrg		/* alpha ex2 temp0, temp0.g */
211218781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
2113de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
211418781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2115de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
2116de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2117de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2118de2362d3Smrg
2119de2362d3Smrg		/* rgb out0.b = op_sop, set up src0 reg */
212018781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
2121de2362d3Smrg							R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
2122de2362d3Smrg							R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
212318781e08Smrg		OUT_RING_REG(R300_US_ALU_RGB_INST(9),
2124de2362d3Smrg			      R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
2125de2362d3Smrg			      R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
2126de2362d3Smrg		/* alpha ex2 temp0, temp0.b */
212718781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
2128de2362d3Smrg							  R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
212918781e08Smrg		OUT_RING_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
2130de2362d3Smrg							  R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
2131de2362d3Smrg							  R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
2132de2362d3Smrg							  R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
2133de2362d3Smrg	    }
2134de2362d3Smrg	}
2135de2362d3Smrg
2136de2362d3Smrg	/* Shader constants. */
2137de2362d3Smrg	/* constant 0: off, yco */
213818781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
213918781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
214018781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
214118781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
2142de2362d3Smrg	/* constant 1: uco */
214318781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
214418781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
214518781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
214618781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
2147de2362d3Smrg	/* constant 2: vco */
214818781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
214918781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
215018781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
215118781e08Smrg	OUT_RING_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
2152de2362d3Smrg
215318781e08Smrg	ADVANCE_RING();
2154de2362d3Smrg    }
2155de2362d3Smrg
2156de2362d3Smrg    BEGIN_ACCEL_RELOC(6, 2);
215718781e08Smrg    OUT_RING_REG(R300_TX_INVALTAGS, 0);
215818781e08Smrg    OUT_RING_REG(R300_TX_ENABLE, txenable);
2159de2362d3Smrg
2160de2362d3Smrg    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap);
2161de2362d3Smrg    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap);
2162de2362d3Smrg
2163de2362d3Smrg    /* no need to enable blending */
216418781e08Smrg    OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
2165de2362d3Smrg
216618781e08Smrg    OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count);
216718781e08Smrg    ADVANCE_RING();
2168de2362d3Smrg
2169de2362d3Smrg    if (pPriv->vsync) {
2170de2362d3Smrg	xf86CrtcPtr crtc;
2171de2362d3Smrg	if (pPriv->desired_crtc)
2172de2362d3Smrg	    crtc = pPriv->desired_crtc;
2173de2362d3Smrg	else
217418781e08Smrg	    crtc = radeon_pick_best_crtc(pScrn, FALSE,
2175de2362d3Smrg					 pPriv->drw_x,
2176de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
2177de2362d3Smrg					 pPriv->drw_y,
2178de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
2179de2362d3Smrg	if (crtc)
218018781e08Smrg	    RADEONWaitForVLine(pScrn, pPixmap,
218118781e08Smrg			       crtc,
218218781e08Smrg			       pPriv->drw_y - crtc->y,
218318781e08Smrg			       (pPriv->drw_y - crtc->y) + pPriv->dst_h);
2184de2362d3Smrg    }
2185de2362d3Smrg
2186de2362d3Smrg    return TRUE;
2187de2362d3Smrg}
2188de2362d3Smrg
2189de2362d3Smrgstatic void
219018781e08SmrgR300DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
2191de2362d3Smrg{
2192de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2193de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
2194de2362d3Smrg    int dstxoff, dstyoff;
2195de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
2196de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
2197de2362d3Smrg
2198de2362d3Smrg#ifdef COMPOSITE
2199de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
2200de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
2201de2362d3Smrg#else
2202de2362d3Smrg    dstxoff = 0;
2203de2362d3Smrg    dstyoff = 0;
2204de2362d3Smrg#endif
2205de2362d3Smrg
220618781e08Smrg    if (!R300PrepareTexturedVideo(pScrn, pPriv))
2207de2362d3Smrg	return;
2208de2362d3Smrg
2209de2362d3Smrg    /*
2210de2362d3Smrg     * Rendering of the actual polygon is done in two different
2211de2362d3Smrg     * ways depending on chip generation:
2212de2362d3Smrg     *
2213de2362d3Smrg     * < R300:
2214de2362d3Smrg     *
2215de2362d3Smrg     *     These chips can render a rectangle in one pass, so
2216de2362d3Smrg     *     handling is pretty straight-forward.
2217de2362d3Smrg     *
2218de2362d3Smrg     * >= R300:
2219de2362d3Smrg     *
2220de2362d3Smrg     *     These chips can accept a quad, but will render it as
2221de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
2222de2362d3Smrg     *     We render a single, large triangle and use the scissor
2223de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
2224de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
2225de2362d3Smrg     *     the single triangle up to 2560/4021 pixels; above that we
2226de2362d3Smrg     *     render as a quad.
2227de2362d3Smrg     */
2228de2362d3Smrg
2229de2362d3Smrg    while (nBox--) {
2230de2362d3Smrg	float srcX, srcY, srcw, srch;
2231de2362d3Smrg	int dstX, dstY, dstw, dsth;
2232de2362d3Smrg	Bool use_quad = FALSE;
2233de2362d3Smrg	int draw_size = 4 * pPriv->vtx_count + 4 + 2 + 3;
2234de2362d3Smrg
2235de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
223618781e08Smrg	    radeon_cs_flush_indirect(pScrn);
223718781e08Smrg	    if (!R300PrepareTexturedVideo(pScrn, pPriv))
2238de2362d3Smrg		return;
2239de2362d3Smrg	}
2240de2362d3Smrg
2241de2362d3Smrg	dstX = pBox->x1 + dstxoff;
2242de2362d3Smrg	dstY = pBox->y1 + dstyoff;
2243de2362d3Smrg	dstw = pBox->x2 - pBox->x1;
2244de2362d3Smrg	dsth = pBox->y2 - pBox->y1;
2245de2362d3Smrg
2246de2362d3Smrg	srcX = pPriv->src_x;
2247de2362d3Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
2248de2362d3Smrg		 pPriv->src_w) / (float)pPriv->dst_w;
2249de2362d3Smrg	srcY = pPriv->src_y;
2250de2362d3Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
2251de2362d3Smrg		 pPriv->src_h) / (float)pPriv->dst_h;
2252de2362d3Smrg
2253de2362d3Smrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
2254de2362d3Smrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
2255de2362d3Smrg
2256de2362d3Smrg	if (IS_R400_3D) {
2257de2362d3Smrg	    if ((dstw+dsth) > 4021)
2258de2362d3Smrg		use_quad = TRUE;
2259de2362d3Smrg	} else {
2260de2362d3Smrg	    if ((dstw+dsth) > 2560)
2261de2362d3Smrg		use_quad = TRUE;
2262de2362d3Smrg	}
2263de2362d3Smrg	/*
2264de2362d3Smrg	 * Set up the scissor area to that of the output size.
2265de2362d3Smrg	 */
226618781e08Smrg	BEGIN_RING(2*2);
2267de2362d3Smrg	/* R300 has an offset */
226818781e08Smrg	OUT_RING_REG(R300_SC_SCISSOR0, (((dstX + 1440) << R300_SCISSOR_X_SHIFT) |
2269de2362d3Smrg					 ((dstY + 1440) << R300_SCISSOR_Y_SHIFT)));
227018781e08Smrg	OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2271de2362d3Smrg					 ((dstY + dsth + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
227218781e08Smrg	ADVANCE_RING();
2273de2362d3Smrg
2274de2362d3Smrg	if (use_quad) {
2275de2362d3Smrg	    BEGIN_RING(4 * pPriv->vtx_count + 4);
2276de2362d3Smrg	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2277de2362d3Smrg				4 * pPriv->vtx_count));
2278de2362d3Smrg	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2279de2362d3Smrg		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2280de2362d3Smrg		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2281de2362d3Smrg	} else {
2282de2362d3Smrg	    BEGIN_RING(3 * pPriv->vtx_count + 4);
2283de2362d3Smrg	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2284de2362d3Smrg				3 * pPriv->vtx_count));
2285de2362d3Smrg	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
2286de2362d3Smrg		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2287de2362d3Smrg		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2288de2362d3Smrg	}
2289de2362d3Smrg
2290de2362d3Smrg	if (pPriv->bicubic_enabled) {
2291de2362d3Smrg		/*
2292de2362d3Smrg		 * This code is only executed on >= R300, so we don't
2293de2362d3Smrg		 * have to deal with the legacy handling.
2294de2362d3Smrg		 */
2295de2362d3Smrg	    if (use_quad) {
2296de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)dstY,
2297de2362d3Smrg			  (float)srcX / pPriv->w,          (float)srcY / pPriv->h,
2298de2362d3Smrg			  (float)srcX + 0.5,               (float)srcY + 0.5);
2299de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dsth),
2300de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h,
2301de2362d3Smrg			  (float)srcX + 0.5,               (float)(srcY + srch) + 0.5);
2302de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)(dstY + dsth),
2303de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h,
2304de2362d3Smrg			  (float)(srcX + srcw) + 0.5,      (float)(srcY + srch) + 0.5);
2305de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw),            (float)dstY,
2306de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h,
2307de2362d3Smrg			  (float)(srcX + srcw) + 0.5,      (float)srcY + 0.5);
2308de2362d3Smrg	    } else {
2309de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)dstY,
2310de2362d3Smrg			  (float)srcX / pPriv->w,          (float)srcY / pPriv->h,
2311de2362d3Smrg			  (float)srcX + 0.5,               (float)srcY + 0.5);
2312de2362d3Smrg		VTX_OUT_6((float)dstX,                     (float)(dstY + dstw + dsth),
2313de2362d3Smrg			  (float)srcX / pPriv->w,
2314de2362d3Smrg			  ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h,
2315de2362d3Smrg			  (float)srcX + 0.5,
2316de2362d3Smrg			  (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
2317de2362d3Smrg		VTX_OUT_6((float)(dstX + dstw + dsth),     (float)dstY,
2318de2362d3Smrg			  ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
2319de2362d3Smrg			  (float)srcY / pPriv->h,
2320de2362d3Smrg			  (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
2321de2362d3Smrg			  (float)srcY + 0.5);
2322de2362d3Smrg	    }
2323de2362d3Smrg	} else {
2324de2362d3Smrg	    if (use_quad) {
2325de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)dstY,
2326de2362d3Smrg			  (float)srcX / pPriv->w,          (float)srcY / pPriv->h);
2327de2362d3Smrg		VTX_OUT_4((float)dstX,                     (float)(dstY + dsth),
2328de2362d3Smrg			  (float)srcX / pPriv->w,          (float)(srcY + srch) / pPriv->h);
2329de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)(dstY + dsth),
2330de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)(srcY + srch) / pPriv->h);
2331de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw),            (float)dstY,
2332de2362d3Smrg			  (float)(srcX + srcw) / pPriv->w, (float)srcY / pPriv->h);
2333de2362d3Smrg	    } else {
2334de2362d3Smrg		/*
2335de2362d3Smrg		 * Render a big, scissored triangle. This means
2336de2362d3Smrg		 * increasing the triangle size and adjusting
2337de2362d3Smrg		 * texture coordinates.
2338de2362d3Smrg		 */
2339de2362d3Smrg		VTX_OUT_4((float)dstX,                 (float)dstY,
2340de2362d3Smrg			  (float)srcX / pPriv->w,      (float)srcY / pPriv->h);
2341de2362d3Smrg		VTX_OUT_4((float)dstX,                 (float)(dstY + dsth + dstw),
2342de2362d3Smrg			  (float)srcX / pPriv->w,
2343de2362d3Smrg			  ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h);
2344de2362d3Smrg		VTX_OUT_4((float)(dstX + dstw + dsth), (float)dstY,
2345de2362d3Smrg			  ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
2346de2362d3Smrg			  (float)srcY / pPriv->h);
2347de2362d3Smrg	    }
2348de2362d3Smrg	}
2349de2362d3Smrg
2350de2362d3Smrg	/* flushing is pipelined, free/finish is not */
235118781e08Smrg	OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
2352de2362d3Smrg
2353de2362d3Smrg	ADVANCE_RING();
2354de2362d3Smrg
2355de2362d3Smrg	pBox++;
2356de2362d3Smrg    }
2357de2362d3Smrg
235818781e08Smrg    BEGIN_RING(2*3);
235918781e08Smrg    OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA);
236018781e08Smrg    OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
236118781e08Smrg    OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
236218781e08Smrg    ADVANCE_RING();
2363de2362d3Smrg
2364de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
2365de2362d3Smrg}
2366de2362d3Smrg
2367de2362d3Smrgstatic Bool
236818781e08SmrgR500PrepareTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
2369de2362d3Smrg{
2370de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2371de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
2372de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
2373de2362d3Smrg    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
237418781e08Smrg    uint32_t txfilter, txformat0, txformat1, txpitch, us_format = 0;
2375de2362d3Smrg    uint32_t dst_pitch, dst_format;
237618781e08Smrg    uint32_t txenable, colorpitch;
2377de2362d3Smrg    uint32_t output_fmt;
2378de2362d3Smrg    int pixel_shift, out_size = 6;
237918781e08Smrg    int ret;
2380de2362d3Smrg
238118781e08Smrg    radeon_cs_space_reset_bos(info->cs);
238218781e08Smrg    radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
23837821949aSmrg
238418781e08Smrg    if (pPriv->bicubic_enabled)
238539413783Smrg	radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo,
238639413783Smrg					  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
238718781e08Smrg
238818781e08Smrg    driver_priv = exaGetPixmapDriverPrivate(pPixmap);
238939413783Smrg    radeon_cs_space_add_persistent_bo(info->cs, driver_priv->bo->bo.radeon, 0,
239039413783Smrg				      RADEON_GEM_DOMAIN_VRAM);
239118781e08Smrg
239218781e08Smrg    ret = radeon_cs_space_check(info->cs);
239318781e08Smrg    if (ret) {
239418781e08Smrg	ErrorF("Not enough RAM to hw accel xv operation\n");
239518781e08Smrg	return FALSE;
2396de2362d3Smrg    }
2397de2362d3Smrg
2398de2362d3Smrg    pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
2399de2362d3Smrg
240018781e08Smrg    dst_pitch = exaGetPixmapPitch(pPixmap);
240118781e08Smrg    RADEON_SWITCH_TO_3D();
2402de2362d3Smrg
2403de2362d3Smrg    if (pPriv->bicubic_enabled)
2404de2362d3Smrg	pPriv->vtx_count = 6;
2405de2362d3Smrg    else
2406de2362d3Smrg	pPriv->vtx_count = 4;
2407de2362d3Smrg
2408de2362d3Smrg    switch (pPixmap->drawable.bitsPerPixel) {
2409de2362d3Smrg    case 16:
2410de2362d3Smrg	if (pPixmap->drawable.depth == 15)
2411de2362d3Smrg	    dst_format = R300_COLORFORMAT_ARGB1555;
2412de2362d3Smrg	else
2413de2362d3Smrg	    dst_format = R300_COLORFORMAT_RGB565;
2414de2362d3Smrg	break;
2415de2362d3Smrg    case 32:
2416de2362d3Smrg	dst_format = R300_COLORFORMAT_ARGB8888;
2417de2362d3Smrg	break;
2418de2362d3Smrg    default:
2419de2362d3Smrg	return FALSE;
2420de2362d3Smrg    }
2421de2362d3Smrg
2422de2362d3Smrg    output_fmt = (R300_OUT_FMT_C4_8 |
2423de2362d3Smrg		  R300_OUT_FMT_C0_SEL_BLUE |
2424de2362d3Smrg		  R300_OUT_FMT_C1_SEL_GREEN |
2425de2362d3Smrg		  R300_OUT_FMT_C2_SEL_RED |
2426de2362d3Smrg		  R300_OUT_FMT_C3_SEL_ALPHA);
2427de2362d3Smrg
2428de2362d3Smrg    colorpitch = dst_pitch >> pixel_shift;
2429de2362d3Smrg    colorpitch |= dst_format;
2430de2362d3Smrg
2431de2362d3Smrg    if (RADEONTilingEnabled(pScrn, pPixmap))
2432de2362d3Smrg	colorpitch |= R300_COLORTILE;
2433de2362d3Smrg
2434de2362d3Smrg    if (((pPriv->bicubic_state == BICUBIC_OFF)) &&
2435de2362d3Smrg        (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12))
2436de2362d3Smrg	pPriv->is_planar = TRUE;
2437de2362d3Smrg    else
2438de2362d3Smrg	pPriv->is_planar = FALSE;
2439de2362d3Smrg
2440de2362d3Smrg    if (pPriv->is_planar) {
2441de2362d3Smrg	txformat1 = R300_TX_FORMAT_X8;
2442de2362d3Smrg	txpitch = pPriv->src_pitch;
2443de2362d3Smrg    } else {
2444de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
2445de2362d3Smrg	    txformat1 = R300_TX_FORMAT_YVYU422;
2446de2362d3Smrg	else
2447de2362d3Smrg	    txformat1 = R300_TX_FORMAT_VYUY422;
2448de2362d3Smrg
2449de2362d3Smrg	if (pPriv->bicubic_state != BICUBIC_OFF)
2450de2362d3Smrg	    txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
2451de2362d3Smrg
2452de2362d3Smrg	/* pitch is in pixels */
2453de2362d3Smrg	txpitch = pPriv->src_pitch / 2;
2454de2362d3Smrg    }
2455de2362d3Smrg    txpitch -= 1;
2456de2362d3Smrg
2457de2362d3Smrg    txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
2458de2362d3Smrg		 (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
2459de2362d3Smrg		 R300_TXPITCH_EN);
2460de2362d3Smrg
2461de2362d3Smrg    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
2462de2362d3Smrg		R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
2463de2362d3Smrg		R300_TX_MAG_FILTER_LINEAR |
2464de2362d3Smrg		R300_TX_MIN_FILTER_LINEAR |
2465de2362d3Smrg		(0 << R300_TX_ID_SHIFT));
2466de2362d3Smrg
2467de2362d3Smrg
2468de2362d3Smrg    if ((pPriv->w - 1) & 0x800)
2469de2362d3Smrg	txpitch |= R500_TXWIDTH_11;
2470de2362d3Smrg
2471de2362d3Smrg    if ((pPriv->h - 1) & 0x800)
2472de2362d3Smrg	txpitch |= R500_TXHEIGHT_11;
2473de2362d3Smrg
2474de2362d3Smrg    if (info->ChipFamily == CHIP_FAMILY_R520) {
2475de2362d3Smrg	unsigned us_width = (pPriv->w - 1) & 0x7ff;
2476de2362d3Smrg	unsigned us_height = (pPriv->h - 1) & 0x7ff;
2477de2362d3Smrg	unsigned us_depth = 0;
2478de2362d3Smrg
2479de2362d3Smrg	if (pPriv->w > 2048) {
2480de2362d3Smrg	    us_width = (0x7ff + us_width) >> 1;
2481de2362d3Smrg	    us_depth |= 0x0d;
2482de2362d3Smrg	}
2483de2362d3Smrg	if (pPriv->h > 2048) {
2484de2362d3Smrg	    us_height = (0x7ff + us_height) >> 1;
2485de2362d3Smrg	    us_depth |= 0x0e;
2486de2362d3Smrg	}
2487de2362d3Smrg	us_format = (us_width << R300_TXWIDTH_SHIFT) |
2488de2362d3Smrg		    (us_height << R300_TXHEIGHT_SHIFT) |
2489de2362d3Smrg		    (us_depth << R300_TXDEPTH_SHIFT);
2490de2362d3Smrg	out_size++;
2491de2362d3Smrg    }
2492de2362d3Smrg
2493de2362d3Smrg    BEGIN_ACCEL_RELOC(out_size, 1);
249418781e08Smrg    OUT_RING_REG(R300_TX_FILTER0_0, txfilter);
249518781e08Smrg    OUT_RING_REG(R300_TX_FILTER1_0, 0);
249618781e08Smrg    OUT_RING_REG(R300_TX_FORMAT0_0, txformat0);
249718781e08Smrg    OUT_RING_REG(R300_TX_FORMAT1_0, txformat1);
249818781e08Smrg    OUT_RING_REG(R300_TX_FORMAT2_0, txpitch);
249918781e08Smrg    OUT_TEXTURE_REG(R300_TX_OFFSET_0, 0, src_bo);
2500de2362d3Smrg    if (info->ChipFamily == CHIP_FAMILY_R520)
250118781e08Smrg	OUT_RING_REG(R500_US_FORMAT0_0, us_format);
250218781e08Smrg    ADVANCE_RING();
2503de2362d3Smrg
2504de2362d3Smrg    txenable = R300_TEX_0_ENABLE;
2505de2362d3Smrg
2506de2362d3Smrg    if (pPriv->is_planar) {
2507de2362d3Smrg	txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
2508de2362d3Smrg		     (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
2509de2362d3Smrg		     R300_TXPITCH_EN);
2510de2362d3Smrg	txpitch = RADEON_ALIGN(pPriv->src_pitch >> 1, 64);
2511de2362d3Smrg	txpitch -= 1;
2512de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
2513de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
2514de2362d3Smrg		    R300_TX_MIN_FILTER_LINEAR |
2515de2362d3Smrg		    R300_TX_MAG_FILTER_LINEAR);
2516de2362d3Smrg
2517de2362d3Smrg	BEGIN_ACCEL_RELOC(12, 2);
251818781e08Smrg	OUT_RING_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
251918781e08Smrg	OUT_RING_REG(R300_TX_FILTER1_1, 0);
252018781e08Smrg	OUT_RING_REG(R300_TX_FORMAT0_1, txformat0);
252118781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8);
252218781e08Smrg	OUT_RING_REG(R300_TX_FORMAT2_1, txpitch);
252318781e08Smrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, pPriv->planeu_offset, src_bo);
252418781e08Smrg	OUT_RING_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
252518781e08Smrg	OUT_RING_REG(R300_TX_FILTER1_2, 0);
252618781e08Smrg	OUT_RING_REG(R300_TX_FORMAT0_2, txformat0);
252718781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8);
252818781e08Smrg	OUT_RING_REG(R300_TX_FORMAT2_2, txpitch);
252918781e08Smrg	OUT_TEXTURE_REG(R300_TX_OFFSET_2, pPriv->planev_offset, src_bo);
253018781e08Smrg	ADVANCE_RING();
2531de2362d3Smrg	txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
2532de2362d3Smrg    }
2533de2362d3Smrg
2534de2362d3Smrg    if (pPriv->bicubic_enabled) {
2535de2362d3Smrg	/* Size is 128x1 */
2536de2362d3Smrg	txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
2537de2362d3Smrg		     (0x0 << R300_TXHEIGHT_SHIFT) |
2538de2362d3Smrg		     R300_TXPITCH_EN);
2539de2362d3Smrg	/* Format is 32-bit floats, 4bpp */
2540de2362d3Smrg	txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
2541de2362d3Smrg	/* Pitch is 127 (128-1) */
2542de2362d3Smrg	txpitch = 0x7f;
2543de2362d3Smrg	/* Tex filter */
2544de2362d3Smrg	txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
2545de2362d3Smrg		    R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
2546de2362d3Smrg		    R300_TX_MIN_FILTER_NEAREST |
2547de2362d3Smrg		    R300_TX_MAG_FILTER_NEAREST |
2548de2362d3Smrg		    (1 << R300_TX_ID_SHIFT));
2549de2362d3Smrg
2550de2362d3Smrg	BEGIN_ACCEL_RELOC(6, 1);
255118781e08Smrg	OUT_RING_REG(R300_TX_FILTER0_1, txfilter);
255218781e08Smrg	OUT_RING_REG(R300_TX_FILTER1_1, 0);
255318781e08Smrg	OUT_RING_REG(R300_TX_FORMAT0_1, txformat0);
255418781e08Smrg	OUT_RING_REG(R300_TX_FORMAT1_1, txformat1);
255518781e08Smrg	OUT_RING_REG(R300_TX_FORMAT2_1, txpitch);
255618781e08Smrg	OUT_TEXTURE_REG(R300_TX_OFFSET_1, 0, info->bicubic_bo);
255718781e08Smrg	ADVANCE_RING();
2558de2362d3Smrg
2559de2362d3Smrg	/* Enable tex 1 */
2560de2362d3Smrg	txenable |= R300_TEX_1_ENABLE;
2561de2362d3Smrg    }
2562de2362d3Smrg
2563de2362d3Smrg    /* setup the VAP */
2564de2362d3Smrg    if (info->accel_state->has_tcl) {
2565de2362d3Smrg	if (pPriv->bicubic_enabled)
256618781e08Smrg	    BEGIN_RING(2*7);
2567de2362d3Smrg	else
256818781e08Smrg	    BEGIN_RING(2*6);
2569de2362d3Smrg    } else {
2570de2362d3Smrg	if (pPriv->bicubic_enabled)
257118781e08Smrg	    BEGIN_RING(2*5);
2572de2362d3Smrg	else
257318781e08Smrg	    BEGIN_RING(2*4);
2574de2362d3Smrg    }
2575de2362d3Smrg
2576de2362d3Smrg    /* These registers define the number, type, and location of data submitted
2577de2362d3Smrg     * to the PVS unit of GA input (when PVS is disabled)
2578de2362d3Smrg     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
2579de2362d3Smrg     * enabled.  This memory provides the imputs to the vertex shader program
2580de2362d3Smrg     * and ordering is not important.  When PVS/TCL is disabled, this field maps
25810a1d3ae0Smrg     * directly to the GA input memory and the order is significant.  In
2582de2362d3Smrg     * PVS_BYPASS mode the order is as follows:
2583de2362d3Smrg     * Position
2584de2362d3Smrg     * Point Size
2585de2362d3Smrg     * Color 0-3
2586de2362d3Smrg     * Textures 0-7
2587de2362d3Smrg     * Fog
2588de2362d3Smrg     */
2589de2362d3Smrg    if (pPriv->bicubic_enabled) {
259018781e08Smrg	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
2591de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
2592de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
2593de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
2594de2362d3Smrg		       R300_SIGNED_0 |
2595de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
2596de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
2597de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
2598de2362d3Smrg		       R300_SIGNED_1));
259918781e08Smrg	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1,
2600de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
2601de2362d3Smrg		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
2602de2362d3Smrg		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
2603de2362d3Smrg		       R300_LAST_VEC_2 |
2604de2362d3Smrg		       R300_SIGNED_2));
2605de2362d3Smrg    } else {
260618781e08Smrg	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
2607de2362d3Smrg		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
2608de2362d3Smrg		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
2609de2362d3Smrg		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
2610de2362d3Smrg		       R300_SIGNED_0 |
2611de2362d3Smrg		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
2612de2362d3Smrg		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
2613de2362d3Smrg		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
2614de2362d3Smrg		       R300_LAST_VEC_1 |
2615de2362d3Smrg		       R300_SIGNED_1));
2616de2362d3Smrg    }
2617de2362d3Smrg
2618de2362d3Smrg    /* load the vertex shader
2619de2362d3Smrg     * We pre-load vertex programs in RADEONInit3DEngine():
2620de2362d3Smrg     * - exa
2621de2362d3Smrg     * - Xv
2622de2362d3Smrg     * - Xv bicubic
2623de2362d3Smrg     * Here we select the offset of the vertex program we want to use
2624de2362d3Smrg     */
2625de2362d3Smrg    if (info->accel_state->has_tcl) {
2626de2362d3Smrg	if (pPriv->bicubic_enabled) {
262718781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
2628de2362d3Smrg			  ((11 << R300_PVS_FIRST_INST_SHIFT) |
2629de2362d3Smrg			   (13 << R300_PVS_XYZW_VALID_INST_SHIFT) |
2630de2362d3Smrg			   (13 << R300_PVS_LAST_INST_SHIFT)));
263118781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
2632de2362d3Smrg			  (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
2633de2362d3Smrg	} else {
263418781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
2635de2362d3Smrg			  ((9 << R300_PVS_FIRST_INST_SHIFT) |
2636de2362d3Smrg			   (10 << R300_PVS_XYZW_VALID_INST_SHIFT) |
2637de2362d3Smrg			   (10 << R300_PVS_LAST_INST_SHIFT)));
263818781e08Smrg	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
2639de2362d3Smrg			  (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
2640de2362d3Smrg	}
2641de2362d3Smrg    }
2642de2362d3Smrg
2643de2362d3Smrg    /* Position and one set of 2 texture coordinates */
264418781e08Smrg    OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
2645de2362d3Smrg    if (pPriv->bicubic_enabled)
264618781e08Smrg	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
2647de2362d3Smrg					       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
2648de2362d3Smrg    else
264918781e08Smrg	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
2650de2362d3Smrg
265118781e08Smrg    OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt);
265218781e08Smrg    ADVANCE_RING();
2653de2362d3Smrg
2654de2362d3Smrg    /* setup pixel shader */
2655de2362d3Smrg    if (pPriv->bicubic_state != BICUBIC_OFF) {
2656de2362d3Smrg	if (pPriv->bicubic_enabled) {
265718781e08Smrg	    BEGIN_RING(2*7);
2658de2362d3Smrg
2659de2362d3Smrg	    /* 4 components: 2 for tex0 and 2 for tex1 */
266018781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
2661de2362d3Smrg			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
2662de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
2663de2362d3Smrg
2664de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
266518781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
2666de2362d3Smrg
2667de2362d3Smrg	    /* Pixel stack frame size. */
266818781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 5);
2669de2362d3Smrg
2670de2362d3Smrg	    /* FP length. */
267118781e08Smrg	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
2672de2362d3Smrg					      R500_US_CODE_END_ADDR(13)));
267318781e08Smrg	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
2674de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(13)));
2675de2362d3Smrg
2676de2362d3Smrg	    /* Prepare for FP emission. */
267718781e08Smrg	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
267818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
267918781e08Smrg	    ADVANCE_RING();
2680de2362d3Smrg
268118781e08Smrg	    BEGIN_RING(2*89);
2682de2362d3Smrg	    /* Pixel shader.
2683de2362d3Smrg	     * I've gone ahead and annotated each instruction, since this
2684de2362d3Smrg	     * thing is MASSIVE. :3
2685de2362d3Smrg	     * Note: In order to avoid buggies with temps and multiple
2686de2362d3Smrg	     * inputs, all temps are offset by 2. temp0 -> register2. */
2687de2362d3Smrg
2688de2362d3Smrg	    /* TEX temp2, input1.xxxx, tex1, 1D */
268918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2690de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2691de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2692de2362d3Smrg						   R500_INST_RGB_WMASK_B));
269318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
2694de2362d3Smrg						   R500_TEX_INST_LD |
2695de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
269618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
2697de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
2698de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_R |
2699de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_R |
2700de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_R |
2701de2362d3Smrg						   R500_TEX_DST_ADDR(2) |
2702de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
2703de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
2704de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
2705de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
270618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
270718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
270818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2709de2362d3Smrg
2710de2362d3Smrg	    /* TEX temp5, input1.yyyy, tex1, 1D */
271118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2712de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
2713de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2714de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2715de2362d3Smrg						   R500_INST_RGB_WMASK_B));
271618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
2717de2362d3Smrg						   R500_TEX_INST_LD |
2718de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
2719de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
272018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
2721de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_G |
2722de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
2723de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_G |
2724de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_G |
2725de2362d3Smrg						   R500_TEX_DST_ADDR(5) |
2726de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
2727de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
2728de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
2729de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
273018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
273118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
273218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2733de2362d3Smrg
2734de2362d3Smrg	    /* MUL temp4, const0.x0x0, temp2.yyxx */
273518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
2736de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
2737de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2738de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2739de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2740de2362d3Smrg						   R500_INST_ALPHA_WMASK));
274118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2742de2362d3Smrg						   R500_RGB_ADDR0_CONST |
2743de2362d3Smrg						   R500_RGB_ADDR1(2)));
274418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2745de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
2746de2362d3Smrg						   R500_ALPHA_ADDR1(2)));
274718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2748de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
2749de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_0 |
2750de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_R |
2751de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
2752de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_G |
2753de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
2754de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_R));
275518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
2756de2362d3Smrg						   R500_ALPHA_OP_MAD |
2757de2362d3Smrg						   R500_ALPHA_SEL_A_SRC0 |
2758de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
2759de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
2760de2362d3Smrg						   R500_ALPHA_SWIZ_B_R));
276118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
2762de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
2763de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_0 |
2764de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_0 |
2765de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_0 |
2766de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
2767de2362d3Smrg
2768de2362d3Smrg	    /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
276918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
2770de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2771de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2772de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2773de2362d3Smrg						   R500_INST_ALPHA_WMASK));
277418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2775de2362d3Smrg						   R500_RGB_ADDR0_CONST |
2776de2362d3Smrg						   R500_RGB_ADDR1(5) |
2777de2362d3Smrg						   R500_RGB_ADDR2(4)));
277818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2779de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
2780de2362d3Smrg						   R500_ALPHA_ADDR1(5) |
2781de2362d3Smrg						   R500_ALPHA_ADDR2(4)));
278218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2783de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_0 |
2784de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
2785de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_0 |
2786de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
2787de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
2788de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_R |
2789de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_R));
279018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
2791de2362d3Smrg						   R500_ALPHA_OP_MAD |
2792de2362d3Smrg						   R500_ALPHA_SEL_A_SRC0 |
2793de2362d3Smrg						   R500_ALPHA_SWIZ_A_G |
2794de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
2795de2362d3Smrg						   R500_ALPHA_SWIZ_B_R));
279618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
2797de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
2798de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
2799de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
2800de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
2801de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
2802de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
2803de2362d3Smrg
2804de2362d3Smrg	    /* ADD temp3, temp3, input0.xyxy */
280518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
2806de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2807de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2808de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2809de2362d3Smrg						   R500_INST_ALPHA_WMASK));
281018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
2811de2362d3Smrg						   R500_RGB_ADDR2(0)));
281218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
2813de2362d3Smrg						   R500_ALPHA_ADDR2(0)));
281418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
2815de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_1 |
2816de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_1 |
2817de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
2818de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
2819de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
2820de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
282118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
2822de2362d3Smrg						   R500_ALPHA_OP_MAD |
2823de2362d3Smrg						   R500_ALPHA_SWIZ_A_1 |
2824de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
2825de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
282618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
2827de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
2828de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
2829de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
2830de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
2831de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_R |
2832de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_G));
2833de2362d3Smrg
2834de2362d3Smrg	    /* TEX temp1, temp3.zwxy, tex0, 2D */
283518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2836de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2837de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2838de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2839de2362d3Smrg						   R500_INST_ALPHA_WMASK));
284018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
2841de2362d3Smrg						   R500_TEX_INST_LD |
2842de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
284318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
2844de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_B |
2845de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_A |
2846de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_R |
2847de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_G |
2848de2362d3Smrg						   R500_TEX_DST_ADDR(1) |
2849de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
2850de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
2851de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
2852de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
285318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
285418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
285518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2856de2362d3Smrg
2857de2362d3Smrg	    /* TEX temp3, temp3.xyzw, tex0, 2D */
285818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2859de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
2860de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2861de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2862de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2863de2362d3Smrg						   R500_INST_ALPHA_WMASK));
286418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
2865de2362d3Smrg						   R500_TEX_INST_LD |
2866de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
2867de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
286818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
2869de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
2870de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
2871de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_B |
2872de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_A |
2873de2362d3Smrg						   R500_TEX_DST_ADDR(3) |
2874de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
2875de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
2876de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
2877de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
287818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
287918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
288018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2881de2362d3Smrg
2882de2362d3Smrg	    /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
288318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
2884de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2885de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2886de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2887de2362d3Smrg						   R500_INST_ALPHA_WMASK));
288818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2889de2362d3Smrg						   R500_RGB_ADDR0_CONST |
2890de2362d3Smrg						   R500_RGB_ADDR1(5) |
2891de2362d3Smrg						   R500_RGB_ADDR2(4)));
289218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2893de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
2894de2362d3Smrg						   R500_ALPHA_ADDR1(5) |
2895de2362d3Smrg						   R500_ALPHA_ADDR2(4)));
289618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2897de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_0 |
2898de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
2899de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_0 |
2900de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
2901de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_G |
2902de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
2903de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G));
290418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
2905de2362d3Smrg						   R500_ALPHA_OP_MAD |
2906de2362d3Smrg						   R500_ALPHA_SEL_A_SRC0 |
2907de2362d3Smrg						   R500_ALPHA_SWIZ_A_G |
2908de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
2909de2362d3Smrg						   R500_ALPHA_SWIZ_B_G));
291018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
2911de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
2912de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
2913de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
2914de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
2915de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
2916de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
2917de2362d3Smrg
2918de2362d3Smrg	    /* ADD temp0, temp4, input0.xyxy */
291918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
2920de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2921de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2922de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2923de2362d3Smrg						   R500_INST_ALPHA_WMASK));
292418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
2925de2362d3Smrg						   R500_RGB_ADDR2(0)));
292618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
2927de2362d3Smrg						   R500_ALPHA_ADDR2(0)));
292818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
2929de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_1 |
2930de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_1 |
2931de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
2932de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
2933de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
2934de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
293518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
2936de2362d3Smrg						   R500_ALPHA_OP_MAD |
2937de2362d3Smrg						   R500_ALPHA_SWIZ_A_1 |
2938de2362d3Smrg						   R500_ALPHA_SEL_B_SRC1 |
2939de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
294018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
2941de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
2942de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
2943de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
2944de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
2945de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_R |
2946de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_G));
2947de2362d3Smrg
2948de2362d3Smrg	    /* TEX temp4, temp0.zwzw, tex0, 2D */
294918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2950de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
2951de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2952de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2953de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2954de2362d3Smrg						   R500_INST_ALPHA_WMASK));
295518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
2956de2362d3Smrg						   R500_TEX_INST_LD |
2957de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
295818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
2959de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_B |
2960de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_A |
2961de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_B |
2962de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_A |
2963de2362d3Smrg						   R500_TEX_DST_ADDR(4) |
2964de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
2965de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
2966de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
2967de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
296818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
296918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
297018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2971de2362d3Smrg
2972de2362d3Smrg	    /* TEX temp0, temp0.xyzw, tex0, 2D */
297318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2974de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
2975de2362d3Smrg						   R500_INST_RGB_WMASK_R |
2976de2362d3Smrg						   R500_INST_RGB_WMASK_G |
2977de2362d3Smrg						   R500_INST_RGB_WMASK_B |
2978de2362d3Smrg						   R500_INST_ALPHA_WMASK));
297918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
2980de2362d3Smrg						   R500_TEX_INST_LD |
2981de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
2982de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
298318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
2984de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
2985de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
2986de2362d3Smrg						   R500_TEX_SRC_R_SWIZ_B |
2987de2362d3Smrg						   R500_TEX_SRC_Q_SWIZ_A |
2988de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
2989de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
2990de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
2991de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
2992de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
299318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
299418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
299518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2996de2362d3Smrg
2997de2362d3Smrg	    /* LRP temp3, temp2.zzzz, temp1, temp3 ->
2998de2362d3Smrg	     * - PRESUB temps, temp1 - temp3
2999de2362d3Smrg	     * - MAD temp2.zzzz, temps, temp3 */
300018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3001de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3002de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3003de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3004de2362d3Smrg						   R500_INST_ALPHA_WMASK));
300518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
3006de2362d3Smrg						   R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
3007de2362d3Smrg						   R500_RGB_ADDR1(1) |
3008de2362d3Smrg						   R500_RGB_ADDR2(2)));
300918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
3010de2362d3Smrg						   R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
3011de2362d3Smrg						   R500_ALPHA_ADDR1(1) |
3012de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
301318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
3014de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_B |
3015de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_B |
3016de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3017de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRCP |
3018de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3019de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3020de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
302118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
3022de2362d3Smrg						   R500_ALPHA_OP_MAD |
3023de2362d3Smrg						   R500_ALPHA_SEL_A_SRC2 |
3024de2362d3Smrg						   R500_ALPHA_SWIZ_A_B |
3025de2362d3Smrg						   R500_ALPHA_SEL_B_SRCP |
3026de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
302718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
3028de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3029de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3030de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3031de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3032de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3033de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3034de2362d3Smrg
3035de2362d3Smrg	    /* LRP temp0, temp2.zzzz, temp4, temp0 ->
3036de2362d3Smrg	     * - PRESUB temps, temp4 - temp1
3037de2362d3Smrg	     * - MAD temp2.zzzz, temps, temp0 */
303818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3039de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3040de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3041de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3042de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3043de2362d3Smrg						   R500_INST_ALPHA_WMASK));
304418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3045de2362d3Smrg						   R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
3046de2362d3Smrg						   R500_RGB_ADDR1(4) |
3047de2362d3Smrg						   R500_RGB_ADDR2(2)));
304818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3049de2362d3Smrg						   R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
3050de2362d3Smrg						   R500_ALPHA_ADDR1(4) |
3051de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
305218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
3053de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_B |
3054de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_B |
3055de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3056de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRCP |
3057de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3058de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3059de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
306018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
3061de2362d3Smrg						   R500_ALPHA_OP_MAD |
3062de2362d3Smrg						   R500_ALPHA_SEL_A_SRC2 |
3063de2362d3Smrg						   R500_ALPHA_SWIZ_A_B |
3064de2362d3Smrg						   R500_ALPHA_SEL_B_SRCP |
3065de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
306618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
3067de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3068de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3069de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3070de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3071de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3072de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3073de2362d3Smrg
3074de2362d3Smrg	    /* LRP output, temp5.zzzz, temp3, temp0 ->
3075de2362d3Smrg	     * - PRESUB temps, temp3 - temp0
3076de2362d3Smrg	     * - MAD temp5.zzzz, temps, temp0 */
307718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3078de2362d3Smrg						   R500_INST_LAST |
3079de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3080de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3081de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3082de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3083de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3084de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3085de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3086de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3087de2362d3Smrg						   R500_INST_ALPHA_OMASK));
308818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3089de2362d3Smrg						   R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
3090de2362d3Smrg						   R500_RGB_ADDR1(3) |
3091de2362d3Smrg						   R500_RGB_ADDR2(5)));
309218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3093de2362d3Smrg						   R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
3094de2362d3Smrg						   R500_ALPHA_ADDR1(3) |
3095de2362d3Smrg						   R500_ALPHA_ADDR2(5)));
309618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
3097de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_B |
3098de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_B |
3099de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3100de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRCP |
3101de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3102de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G |
3103de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B));
310418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
3105de2362d3Smrg						   R500_ALPHA_OP_MAD |
3106de2362d3Smrg						   R500_ALPHA_SEL_A_SRC2 |
3107de2362d3Smrg						   R500_ALPHA_SWIZ_A_B |
3108de2362d3Smrg						   R500_ALPHA_SEL_B_SRCP |
3109de2362d3Smrg						   R500_ALPHA_SWIZ_B_A));
311018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
3111de2362d3Smrg						   R500_ALU_RGBA_OP_MAD |
3112de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3113de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3114de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3115de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3116de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_A));
3117de2362d3Smrg
3118de2362d3Smrg	    /* Shader constants. */
311918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
3120de2362d3Smrg
3121de2362d3Smrg	    /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
3122de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
3123de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
3124de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
3125de2362d3Smrg	    OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
3126de2362d3Smrg
312718781e08Smrg	    ADVANCE_RING();
3128de2362d3Smrg	} else {
312918781e08Smrg	    BEGIN_RING(2*19);
3130de2362d3Smrg	    /* 2 components: 2 for tex0 */
313118781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
3132de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
3133de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
3134de2362d3Smrg
3135de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
313618781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
3137de2362d3Smrg
3138de2362d3Smrg	    /* Pixel stack frame size. */
313918781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 0); /* highest temp used */
3140de2362d3Smrg
3141de2362d3Smrg	    /* FP length. */
314218781e08Smrg	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3143de2362d3Smrg					      R500_US_CODE_END_ADDR(1)));
314418781e08Smrg	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3145de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(1)));
3146de2362d3Smrg
3147de2362d3Smrg	    /* Prepare for FP emission. */
314818781e08Smrg	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
314918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
3150de2362d3Smrg
3151de2362d3Smrg	    /* tex inst */
315218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3153de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3154de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3155de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3156de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3157de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3158de2362d3Smrg						   R500_INST_RGB_CLAMP |
3159de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
316018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3161de2362d3Smrg						   R500_TEX_INST_LD |
3162de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3163de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
316418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3165de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3166de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3167de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3168de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3169de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3170de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3171de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
317218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3173de2362d3Smrg						   R500_DX_S_SWIZ_R |
3174de2362d3Smrg						   R500_DX_T_SWIZ_R |
3175de2362d3Smrg						   R500_DX_R_SWIZ_R |
3176de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3177de2362d3Smrg						   R500_DY_ADDR(0) |
3178de2362d3Smrg						   R500_DY_S_SWIZ_R |
3179de2362d3Smrg						   R500_DY_T_SWIZ_R |
3180de2362d3Smrg						   R500_DY_R_SWIZ_R |
3181de2362d3Smrg						   R500_DY_Q_SWIZ_R));
318218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
318318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3184de2362d3Smrg
3185de2362d3Smrg	    /* ALU inst */
318618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3187de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3188de2362d3Smrg						   R500_INST_LAST |
3189de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3190de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3191de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3192de2362d3Smrg						   R500_INST_ALPHA_OMASK |
3193de2362d3Smrg						   R500_INST_RGB_CLAMP |
3194de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
319518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3196de2362d3Smrg						   R500_RGB_ADDR1(0) |
3197de2362d3Smrg						   R500_RGB_ADDR1_CONST |
3198de2362d3Smrg						   R500_RGB_ADDR2(0) |
3199de2362d3Smrg						   R500_RGB_ADDR2_CONST));
320018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3201de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3202de2362d3Smrg						   R500_ALPHA_ADDR1_CONST |
3203de2362d3Smrg						   R500_ALPHA_ADDR2(0) |
3204de2362d3Smrg						   R500_ALPHA_ADDR2_CONST));
320518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3206de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3207de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3208de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3209de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC0 |
3210de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_1 |
3211de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_1 |
3212de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_1));
321318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3214de2362d3Smrg						   R500_ALPHA_SWIZ_A_A |
3215de2362d3Smrg						   R500_ALPHA_SWIZ_B_1));
321618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3217de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_0 |
3218de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_0 |
3219de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_0 |
3220de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
322118781e08Smrg	    ADVANCE_RING();
3222de2362d3Smrg	}
3223de2362d3Smrg    } else {
3224de2362d3Smrg	/*
3225de2362d3Smrg	 * y' = y - .0625
3226de2362d3Smrg	 * u' = u - .5
3227de2362d3Smrg	 * v' = v - .5;
3228de2362d3Smrg	 *
3229de2362d3Smrg	 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
3230de2362d3Smrg	 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
3231de2362d3Smrg	 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
3232de2362d3Smrg	 *
3233de2362d3Smrg	 * DP3 might look like the straightforward solution
3234de2362d3Smrg	 * but we'd need to move the texture yuv values in
3235de2362d3Smrg	 * the same reg for this to work. Therefore use MADs.
3236de2362d3Smrg	 * Brightness just adds to the off constant.
3237de2362d3Smrg	 * Contrast is multiplication of luminance.
3238de2362d3Smrg	 * Saturation and hue change the u and v coeffs.
3239de2362d3Smrg	 * Default values (before adjustments - depend on colorspace):
3240de2362d3Smrg	 * yco = 1.1643
3241de2362d3Smrg	 * uco = 0, -0.39173, 2.017
3242de2362d3Smrg	 * vco = 1.5958, -0.8129, 0
3243de2362d3Smrg	 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
3244de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
3245de2362d3Smrg	 *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
3246de2362d3Smrg	 *
3247de2362d3Smrg	 * temp = MAD(yco, yuv.yyyy, off)
3248de2362d3Smrg	 * temp = MAD(uco, yuv.uuuu, temp)
3249de2362d3Smrg	 * result = MAD(vco, yuv.vvvv, temp)
3250de2362d3Smrg	 */
3251de2362d3Smrg	/* TODO: don't recalc consts always */
3252de2362d3Smrg	const float Loff = -0.0627;
3253de2362d3Smrg	const float Coff = -0.502;
3254de2362d3Smrg	float uvcosf, uvsinf;
3255de2362d3Smrg	float yco;
3256de2362d3Smrg	float uco[3], vco[3], off[3];
3257de2362d3Smrg	float bright, cont, gamma;
3258de2362d3Smrg	int ref = pPriv->transform_index;
3259de2362d3Smrg
3260de2362d3Smrg	cont = RTFContrast(pPriv->contrast);
3261de2362d3Smrg	bright = RTFBrightness(pPriv->brightness);
3262de2362d3Smrg	gamma = (float)pPriv->gamma / 1000.0;
3263de2362d3Smrg	uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
3264de2362d3Smrg	uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
3265de2362d3Smrg	/* overlay video also does pre-gamma contrast/sat adjust, should we? */
3266de2362d3Smrg
3267de2362d3Smrg	yco = trans[ref].RefLuma * cont;
3268de2362d3Smrg	uco[0] = -trans[ref].RefRCr * uvsinf;
3269de2362d3Smrg	uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
3270de2362d3Smrg	uco[2] = trans[ref].RefBCb * uvcosf;
3271de2362d3Smrg	vco[0] = trans[ref].RefRCr * uvcosf;
3272de2362d3Smrg	vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
3273de2362d3Smrg	vco[2] = trans[ref].RefBCb * uvsinf;
3274de2362d3Smrg	off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
3275de2362d3Smrg	off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
3276de2362d3Smrg	off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
3277de2362d3Smrg
3278de2362d3Smrg	//XXX gamma
3279de2362d3Smrg
3280de2362d3Smrg	if (pPriv->is_planar) {
328118781e08Smrg	    BEGIN_RING(2*56);
3282de2362d3Smrg	    /* 2 components: 2 for tex0 */
328318781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
3284de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
3285de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
3286de2362d3Smrg
3287de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
328818781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
3289de2362d3Smrg
3290de2362d3Smrg	    /* Pixel stack frame size. */
329118781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 2); /* highest temp used */
3292de2362d3Smrg
3293de2362d3Smrg	    /* FP length. */
329418781e08Smrg	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3295de2362d3Smrg					      R500_US_CODE_END_ADDR(5)));
329618781e08Smrg	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3297de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(5)));
3298de2362d3Smrg
3299de2362d3Smrg	    /* Prepare for FP emission. */
330018781e08Smrg	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
330118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
3302de2362d3Smrg
3303de2362d3Smrg	    /* tex inst */
330418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3305de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3306de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3307de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3308de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3309de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3310de2362d3Smrg						   R500_INST_RGB_CLAMP |
3311de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
331218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3313de2362d3Smrg						   R500_TEX_INST_LD |
3314de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
331518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3316de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3317de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3318de2362d3Smrg						   R500_TEX_DST_ADDR(2) |
3319de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3320de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3321de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3322de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
332318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3324de2362d3Smrg						   R500_DX_S_SWIZ_R |
3325de2362d3Smrg						   R500_DX_T_SWIZ_R |
3326de2362d3Smrg						   R500_DX_R_SWIZ_R |
3327de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3328de2362d3Smrg						   R500_DY_ADDR(0) |
3329de2362d3Smrg						   R500_DY_S_SWIZ_R |
3330de2362d3Smrg						   R500_DY_T_SWIZ_R |
3331de2362d3Smrg						   R500_DY_R_SWIZ_R |
3332de2362d3Smrg						   R500_DY_Q_SWIZ_R));
333318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
333418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3335de2362d3Smrg
3336de2362d3Smrg	    /* tex inst */
333718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3338de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3339de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3340de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3341de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3342de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3343de2362d3Smrg						   R500_INST_RGB_CLAMP |
3344de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
334518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
3346de2362d3Smrg						   R500_TEX_INST_LD |
3347de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
334818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3349de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3350de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3351de2362d3Smrg						   R500_TEX_DST_ADDR(1) |
3352de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3353de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3354de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3355de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
335618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3357de2362d3Smrg						   R500_DX_S_SWIZ_R |
3358de2362d3Smrg						   R500_DX_T_SWIZ_R |
3359de2362d3Smrg						   R500_DX_R_SWIZ_R |
3360de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3361de2362d3Smrg						   R500_DY_ADDR(0) |
3362de2362d3Smrg						   R500_DY_S_SWIZ_R |
3363de2362d3Smrg						   R500_DY_T_SWIZ_R |
3364de2362d3Smrg						   R500_DY_R_SWIZ_R |
3365de2362d3Smrg						   R500_DY_Q_SWIZ_R));
336618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
336718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3368de2362d3Smrg
3369de2362d3Smrg	    /* tex inst */
337018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3371de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3372de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3373de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3374de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3375de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3376de2362d3Smrg						   R500_INST_RGB_CLAMP |
3377de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
337818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(2) |
3379de2362d3Smrg						   R500_TEX_INST_LD |
3380de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3381de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
338218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3383de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3384de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3385de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3386de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3387de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3388de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3389de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
339018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3391de2362d3Smrg						   R500_DX_S_SWIZ_R |
3392de2362d3Smrg						   R500_DX_T_SWIZ_R |
3393de2362d3Smrg						   R500_DX_R_SWIZ_R |
3394de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3395de2362d3Smrg						   R500_DY_ADDR(0) |
3396de2362d3Smrg						   R500_DY_S_SWIZ_R |
3397de2362d3Smrg						   R500_DY_T_SWIZ_R |
3398de2362d3Smrg						   R500_DY_R_SWIZ_R |
3399de2362d3Smrg						   R500_DY_Q_SWIZ_R));
340018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
340118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3402de2362d3Smrg
3403de2362d3Smrg	    /* ALU inst */
3404de2362d3Smrg	    /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */
340518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3406de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3407de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3408de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3409de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3410de2362d3Smrg						   R500_INST_ALPHA_WMASK));
341118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3412de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3413de2362d3Smrg						   R500_RGB_ADDR1(2) |
3414de2362d3Smrg						   R500_RGB_ADDR2(0) |
3415de2362d3Smrg						   R500_RGB_ADDR2_CONST));
341618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3417de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3418de2362d3Smrg						   R500_ALPHA_ADDR1(2) |
3419de2362d3Smrg						   R500_ALPHA_ADDR2(0) |
3420de2362d3Smrg						   R500_ALPHA_ADDR2_CONST));
342118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3422de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_A |
3423de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_A |
3424de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_A |
3425de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3426de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3427de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3428de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
342918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3430de2362d3Smrg						   R500_ALPHA_ADDRD(2) |
3431de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3432de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
343318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3434de2362d3Smrg						   R500_ALU_RGBA_ADDRD(2) |
3435de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3436de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3437de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3438de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3439de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3440de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3441de2362d3Smrg
3442de2362d3Smrg	    /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */
344318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3444de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3445de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3446de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3447de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3448de2362d3Smrg						   R500_INST_ALPHA_WMASK));
344918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
3450de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3451de2362d3Smrg						   R500_RGB_ADDR1(1) |
3452de2362d3Smrg						   R500_RGB_ADDR2(2)));
345318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
3454de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3455de2362d3Smrg						   R500_ALPHA_ADDR1(1) |
3456de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
345718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3458de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3459de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3460de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3461de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3462de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3463de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3464de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
346518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3466de2362d3Smrg						   R500_ALPHA_ADDRD(2) |
3467de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3468de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
346918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3470de2362d3Smrg						   R500_ALU_RGBA_ADDRD(2) |
3471de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3472de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3473de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3474de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3475de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3476de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3477de2362d3Smrg
3478de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */
347918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3480de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3481de2362d3Smrg						   R500_INST_LAST |
3482de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3483de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3484de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3485de2362d3Smrg						   R500_INST_ALPHA_OMASK |
3486de2362d3Smrg						   R500_INST_RGB_CLAMP |
3487de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
348818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
3489de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3490de2362d3Smrg						   R500_RGB_ADDR1(0) |
3491de2362d3Smrg						   R500_RGB_ADDR2(2)));
349218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(2) |
3493de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3494de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3495de2362d3Smrg						   R500_ALPHA_ADDR2(2)));
349618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3497de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3498de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3499de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3500de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3501de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3502de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3503de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
350418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3505de2362d3Smrg						   R500_ALPHA_ADDRD(0) |
3506de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3507de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
350818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3509de2362d3Smrg						   R500_ALU_RGBA_ADDRD(0) |
3510de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3511de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3512de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3513de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3514de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3515de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_1));
3516de2362d3Smrg
3517de2362d3Smrg	} else {
351818781e08Smrg	    BEGIN_RING(2*44);
3519de2362d3Smrg	    /* 2 components: 2 for tex0/1/2 */
352018781e08Smrg	    OUT_RING_REG(R300_RS_COUNT,
3521de2362d3Smrg			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
3522de2362d3Smrg			   R300_RS_COUNT_HIRES_EN));
3523de2362d3Smrg
3524de2362d3Smrg	    /* R300_INST_COUNT_RS - highest RS instruction used */
352518781e08Smrg	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
3526de2362d3Smrg
3527de2362d3Smrg	    /* Pixel stack frame size. */
352818781e08Smrg	    OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */
3529de2362d3Smrg
3530de2362d3Smrg	    /* FP length. */
353118781e08Smrg	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
3532de2362d3Smrg					      R500_US_CODE_END_ADDR(3)));
353318781e08Smrg	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
3534de2362d3Smrg					       R500_US_CODE_RANGE_SIZE(3)));
3535de2362d3Smrg
3536de2362d3Smrg	    /* Prepare for FP emission. */
353718781e08Smrg	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
353818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
3539de2362d3Smrg
3540de2362d3Smrg	    /* tex inst */
354118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
3542de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3543de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3544de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3545de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3546de2362d3Smrg						   R500_INST_ALPHA_WMASK |
3547de2362d3Smrg						   R500_INST_RGB_CLAMP |
3548de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
354918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
3550de2362d3Smrg						   R500_TEX_INST_LD |
3551de2362d3Smrg						   R500_TEX_SEM_ACQUIRE |
3552de2362d3Smrg						   R500_TEX_IGNORE_UNCOVERED));
355318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
3554de2362d3Smrg						   R500_TEX_SRC_S_SWIZ_R |
3555de2362d3Smrg						   R500_TEX_SRC_T_SWIZ_G |
3556de2362d3Smrg						   R500_TEX_DST_ADDR(0) |
3557de2362d3Smrg						   R500_TEX_DST_R_SWIZ_R |
3558de2362d3Smrg						   R500_TEX_DST_G_SWIZ_G |
3559de2362d3Smrg						   R500_TEX_DST_B_SWIZ_B |
3560de2362d3Smrg						   R500_TEX_DST_A_SWIZ_A));
356118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
3562de2362d3Smrg						   R500_DX_S_SWIZ_R |
3563de2362d3Smrg						   R500_DX_T_SWIZ_R |
3564de2362d3Smrg						   R500_DX_R_SWIZ_R |
3565de2362d3Smrg						   R500_DX_Q_SWIZ_R |
3566de2362d3Smrg						   R500_DY_ADDR(0) |
3567de2362d3Smrg						   R500_DY_S_SWIZ_R |
3568de2362d3Smrg						   R500_DY_T_SWIZ_R |
3569de2362d3Smrg						   R500_DY_R_SWIZ_R |
3570de2362d3Smrg						   R500_DY_Q_SWIZ_R));
357118781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
357218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
3573de2362d3Smrg
3574de2362d3Smrg	    /* ALU inst */
3575de2362d3Smrg	    /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */
357618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3577de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3578de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3579de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3580de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3581de2362d3Smrg						   R500_INST_ALPHA_WMASK));
358218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
3583de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3584de2362d3Smrg						   R500_RGB_ADDR1(0) |
3585de2362d3Smrg						   R500_RGB_ADDR2(0) |
3586de2362d3Smrg						   R500_RGB_ADDR2_CONST));
358718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
3588de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3589de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3590de2362d3Smrg						   R500_ALPHA_ADDR2(0) |
3591de2362d3Smrg						   R500_ALPHA_ADDR2_CONST));
359218781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3593de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_A |
3594de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_A |
3595de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_A |
3596de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3597de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_G |
3598de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_G |
3599de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_G));
360018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3601de2362d3Smrg						   R500_ALPHA_ADDRD(1) |
3602de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3603de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
360418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3605de2362d3Smrg						   R500_ALU_RGBA_ADDRD(1) |
3606de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC0 |
3607de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3608de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3609de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3610de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3611de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3612de2362d3Smrg
3613de2362d3Smrg	    /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */
361418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
3615de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3616de2362d3Smrg						   R500_INST_RGB_WMASK_R |
3617de2362d3Smrg						   R500_INST_RGB_WMASK_G |
3618de2362d3Smrg						   R500_INST_RGB_WMASK_B |
3619de2362d3Smrg						   R500_INST_ALPHA_WMASK));
362018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(1) |
3621de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3622de2362d3Smrg						   R500_RGB_ADDR1(0) |
3623de2362d3Smrg						   R500_RGB_ADDR2(1)));
362418781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
3625de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3626de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3627de2362d3Smrg						   R500_ALPHA_ADDR2(1)));
362818781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3629de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3630de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3631de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3632de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3633de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_B |
3634de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_B |
3635de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_B));
363618781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3637de2362d3Smrg						   R500_ALPHA_ADDRD(1) |
3638de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3639de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
364018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3641de2362d3Smrg						   R500_ALU_RGBA_ADDRD(1) |
3642de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3643de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3644de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3645de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3646de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3647de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_0));
3648de2362d3Smrg
3649de2362d3Smrg	    /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */
365018781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
3651de2362d3Smrg						   R500_INST_TEX_SEM_WAIT |
3652de2362d3Smrg						   R500_INST_LAST |
3653de2362d3Smrg						   R500_INST_RGB_OMASK_R |
3654de2362d3Smrg						   R500_INST_RGB_OMASK_G |
3655de2362d3Smrg						   R500_INST_RGB_OMASK_B |
3656de2362d3Smrg						   R500_INST_ALPHA_OMASK |
3657de2362d3Smrg						   R500_INST_RGB_CLAMP |
3658de2362d3Smrg						   R500_INST_ALPHA_CLAMP));
365918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(2) |
3660de2362d3Smrg						   R500_RGB_ADDR0_CONST |
3661de2362d3Smrg						   R500_RGB_ADDR1(0) |
3662de2362d3Smrg						   R500_RGB_ADDR2(1)));
366318781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(1) |
3664de2362d3Smrg						   R500_ALPHA_ADDR0_CONST |
3665de2362d3Smrg						   R500_ALPHA_ADDR1(0) |
3666de2362d3Smrg						   R500_ALPHA_ADDR2(1)));
366718781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
3668de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_A_R |
3669de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_A_G |
3670de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_A_B |
3671de2362d3Smrg						   R500_ALU_RGB_SEL_B_SRC1 |
3672de2362d3Smrg						   R500_ALU_RGB_R_SWIZ_B_R |
3673de2362d3Smrg						   R500_ALU_RGB_B_SWIZ_B_R |
3674de2362d3Smrg						   R500_ALU_RGB_G_SWIZ_B_R));
367518781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
3676de2362d3Smrg						   R500_ALPHA_ADDRD(1) |
3677de2362d3Smrg						   R500_ALPHA_SWIZ_A_0 |
3678de2362d3Smrg						   R500_ALPHA_SWIZ_B_0));
367918781e08Smrg	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
3680de2362d3Smrg						   R500_ALU_RGBA_ADDRD(1) |
3681de2362d3Smrg						   R500_ALU_RGBA_SEL_C_SRC2 |
3682de2362d3Smrg						   R500_ALU_RGBA_R_SWIZ_R |
3683de2362d3Smrg						   R500_ALU_RGBA_G_SWIZ_G |
3684de2362d3Smrg						   R500_ALU_RGBA_B_SWIZ_B |
3685de2362d3Smrg						   R500_ALU_RGBA_ALPHA_SEL_C_SRC0 |
3686de2362d3Smrg						   R500_ALU_RGBA_A_SWIZ_1));
3687de2362d3Smrg	}
3688de2362d3Smrg
3689de2362d3Smrg	/* Shader constants. */
369018781e08Smrg	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
3691de2362d3Smrg
3692de2362d3Smrg	/* constant 0: off, yco */
3693de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[0]);
3694de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[1]);
3695de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, off[2]);
3696de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, yco);
3697de2362d3Smrg	/* constant 1: uco */
3698de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[0]);
3699de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[1]);
3700de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, uco[2]);
3701de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, gamma);
3702de2362d3Smrg	/* constant 2: vco */
3703de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[0]);
3704de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[1]);
3705de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, vco[2]);
3706de2362d3Smrg	OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0.0);
3707de2362d3Smrg
370818781e08Smrg	ADVANCE_RING();
3709de2362d3Smrg    }
3710de2362d3Smrg
3711de2362d3Smrg    BEGIN_ACCEL_RELOC(6, 2);
371218781e08Smrg    OUT_RING_REG(R300_TX_INVALTAGS, 0);
371318781e08Smrg    OUT_RING_REG(R300_TX_ENABLE, txenable);
3714de2362d3Smrg
3715de2362d3Smrg    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap);
3716de2362d3Smrg    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pPixmap);
3717de2362d3Smrg
3718de2362d3Smrg    /* no need to enable blending */
371918781e08Smrg    OUT_RING_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
3720de2362d3Smrg
372118781e08Smrg    OUT_RING_REG(R300_VAP_VTX_SIZE, pPriv->vtx_count);
372218781e08Smrg    ADVANCE_RING();
3723de2362d3Smrg
3724de2362d3Smrg    if (pPriv->vsync) {
3725de2362d3Smrg	xf86CrtcPtr crtc;
3726de2362d3Smrg	if (pPriv->desired_crtc)
3727de2362d3Smrg	    crtc = pPriv->desired_crtc;
3728de2362d3Smrg	else
372918781e08Smrg	    crtc = radeon_pick_best_crtc(pScrn, FALSE,
3730de2362d3Smrg					 pPriv->drw_x,
3731de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
3732de2362d3Smrg					 pPriv->drw_y,
3733de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
3734de2362d3Smrg	if (crtc)
373518781e08Smrg	    RADEONWaitForVLine(pScrn, pPixmap,
373618781e08Smrg			       crtc,
373718781e08Smrg			       pPriv->drw_y - crtc->y,
373818781e08Smrg			       (pPriv->drw_y - crtc->y) + pPriv->dst_h);
3739de2362d3Smrg    }
3740de2362d3Smrg
3741de2362d3Smrg    return TRUE;
3742de2362d3Smrg}
3743de2362d3Smrg
3744de2362d3Smrgstatic void
374518781e08SmrgR500DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
3746de2362d3Smrg{
3747de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
3748de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
3749de2362d3Smrg    int dstxoff, dstyoff;
3750de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
3751de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
3752de2362d3Smrg
3753de2362d3Smrg#ifdef COMPOSITE
3754de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
3755de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
3756de2362d3Smrg#else
3757de2362d3Smrg    dstxoff = 0;
3758de2362d3Smrg    dstyoff = 0;
3759de2362d3Smrg#endif
3760de2362d3Smrg
376118781e08Smrg    if (!R500PrepareTexturedVideo(pScrn, pPriv))
3762de2362d3Smrg	return;
3763de2362d3Smrg
3764de2362d3Smrg    /*
3765de2362d3Smrg     * Rendering of the actual polygon is done in two different
3766de2362d3Smrg     * ways depending on chip generation:
3767de2362d3Smrg     *
3768de2362d3Smrg     * < R300:
3769de2362d3Smrg     *
3770de2362d3Smrg     *     These chips can render a rectangle in one pass, so
3771de2362d3Smrg     *     handling is pretty straight-forward.
3772de2362d3Smrg     *
3773de2362d3Smrg     * >= R300:
3774de2362d3Smrg     *
3775de2362d3Smrg     *     These chips can accept a quad, but will render it as
3776de2362d3Smrg     *     two triangles which results in a diagonal tear. Instead
3777de2362d3Smrg     *     We render a single, large triangle and use the scissor
3778de2362d3Smrg     *     functionality to restrict it to the desired rectangle.
3779de2362d3Smrg     *     Due to guardband limits on r3xx/r4xx, we can only use
3780de2362d3Smrg     *     the single triangle up to 2880 pixels; above that we
3781de2362d3Smrg     *     render as a quad.
3782de2362d3Smrg     */
3783de2362d3Smrg
3784de2362d3Smrg    while (nBox--) {
3785de2362d3Smrg	float srcX, srcY, srcw, srch;
3786de2362d3Smrg	int dstX, dstY, dstw, dsth;
3787de2362d3Smrg	int draw_size = 3 * pPriv->vtx_count + 4 + 2 + 3;
3788de2362d3Smrg
3789de2362d3Smrg	if (draw_size > radeon_cs_space_remaining(pScrn)) {
379018781e08Smrg	    radeon_cs_flush_indirect(pScrn);
379118781e08Smrg	    if (!R500PrepareTexturedVideo(pScrn, pPriv))
3792de2362d3Smrg		return;
3793de2362d3Smrg	}
3794de2362d3Smrg
3795de2362d3Smrg	dstX = pBox->x1 + dstxoff;
3796de2362d3Smrg	dstY = pBox->y1 + dstyoff;
3797de2362d3Smrg	dstw = pBox->x2 - pBox->x1;
3798de2362d3Smrg	dsth = pBox->y2 - pBox->y1;
3799de2362d3Smrg
3800de2362d3Smrg	srcX = pPriv->src_x;
3801de2362d3Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
3802de2362d3Smrg		 pPriv->src_w) / (float)pPriv->dst_w;
3803de2362d3Smrg	srcY = pPriv->src_y;
3804de2362d3Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
3805de2362d3Smrg		 pPriv->src_h) / (float)pPriv->dst_h;
3806de2362d3Smrg
3807de2362d3Smrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
3808de2362d3Smrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
3809de2362d3Smrg
381018781e08Smrg	BEGIN_RING(2*2);
381118781e08Smrg	OUT_RING_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) |
3812de2362d3Smrg					 ((dstY) << R300_SCISSOR_Y_SHIFT)));
381318781e08Smrg	OUT_RING_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) |
3814de2362d3Smrg					 ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT)));
381518781e08Smrg	ADVANCE_RING();
3816de2362d3Smrg
3817de2362d3Smrg	BEGIN_RING(3 * pPriv->vtx_count + 4);
3818de2362d3Smrg	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
3819de2362d3Smrg			    3 * pPriv->vtx_count));
3820de2362d3Smrg	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
3821de2362d3Smrg		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
3822de2362d3Smrg		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
382318781e08Smrg
3824de2362d3Smrg	if (pPriv->bicubic_enabled) {
3825de2362d3Smrg	    VTX_OUT_6((float)dstX,            (float)dstY,
3826de2362d3Smrg		      (float)srcX / pPriv->w, (float)srcY / pPriv->h,
3827de2362d3Smrg		      (float)srcX + 0.5,      (float)srcY + 0.5);
3828de2362d3Smrg	    VTX_OUT_6((float)dstX,            (float)(dstY + dstw + dsth),
3829de2362d3Smrg		      (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h,
3830de2362d3Smrg		      (float)srcX + 0.5,      (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
3831de2362d3Smrg	    VTX_OUT_6((float)(dstX + dstw + dsth),                       (float)dstY,
3832de2362d3Smrg		      ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
3833de2362d3Smrg		      (float)srcY / pPriv->h,
3834de2362d3Smrg		      (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
3835de2362d3Smrg		      (float)srcY + 0.5);
3836de2362d3Smrg	} else {
3837de2362d3Smrg	    /*
3838de2362d3Smrg	     * Render a big, scissored triangle. This means
3839de2362d3Smrg	     * increasing the triangle size and adjusting
3840de2362d3Smrg	     * texture coordinates.
3841de2362d3Smrg	     */
3842de2362d3Smrg	    VTX_OUT_4((float)dstX,            (float)dstY,
3843de2362d3Smrg		      (float)srcX / pPriv->w, (float)srcY / pPriv->h);
3844de2362d3Smrg	    VTX_OUT_4((float)dstX,                              (float)(dstY + dsth + dstw),
3845de2362d3Smrg		      (float)srcX / pPriv->w, ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / pPriv->h);
3846de2362d3Smrg	    VTX_OUT_4((float)(dstX + dstw + dsth),              (float)dstY,
3847de2362d3Smrg		      ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / pPriv->w,
3848de2362d3Smrg		      (float)srcY / pPriv->h);
3849de2362d3Smrg	}
3850de2362d3Smrg
3851de2362d3Smrg	/* flushing is pipelined, free/finish is not */
385218781e08Smrg	OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
3853de2362d3Smrg
3854de2362d3Smrg	ADVANCE_RING();
3855de2362d3Smrg
3856de2362d3Smrg	pBox++;
3857de2362d3Smrg    }
3858de2362d3Smrg
385918781e08Smrg    BEGIN_RING(2*3);
386018781e08Smrg    OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA);
386118781e08Smrg    OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
386218781e08Smrg    OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
386318781e08Smrg    ADVANCE_RING();
3864de2362d3Smrg
3865de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
3866de2362d3Smrg}
3867de2362d3Smrg
3868de2362d3Smrg#undef VTX_OUT_4
3869de2362d3Smrg#undef VTX_OUT_6
3870