radeon_exa_render.c revision 18781e08
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33struct blendinfo {
34    Bool dst_alpha;
35    Bool src_alpha;
36    uint32_t blend_cntl;
37};
38
39static struct blendinfo RadeonBlendOp[] = {
40    /* Clear */
41    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
42    /* Src */
43    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
44    /* Dst */
45    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
46    /* Over */
47    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
48    /* OverReverse */
49    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
50    /* In */
51    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
52    /* InReverse */
53    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
54    /* Out */
55    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
56    /* OutReverse */
57    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
58    /* Atop */
59    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
60    /* AtopReverse */
61    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
62    /* Xor */
63    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
64    /* Add */
65    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
66};
67
68struct formatinfo {
69    int fmt;
70    uint32_t card_fmt;
71};
72
73/* Note on texture formats:
74 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
75 */
76static struct formatinfo R100TexFormats[] = {
77	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
78	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
79	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
80	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
81	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
82	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
83};
84
85static struct formatinfo R200TexFormats[] = {
86    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
87    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
88    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
89    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
90    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
91    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
92    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
93    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
94};
95
96static struct formatinfo R300TexFormats[] = {
97    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
98    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
99    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
100    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
101    {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
102    {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
103    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
104    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
105    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
106    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
107};
108
109/* Common Radeon setup code */
110
111static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
112{
113    switch (pDstPicture->format) {
114    case PICT_a8r8g8b8:
115    case PICT_x8r8g8b8:
116	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
117	break;
118    case PICT_r5g6b5:
119	*dst_format = RADEON_COLOR_FORMAT_RGB565;
120	break;
121    case PICT_a1r5g5b5:
122    case PICT_x1r5g5b5:
123	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
124	break;
125    case PICT_a8:
126	*dst_format = RADEON_COLOR_FORMAT_RGB8;
127	break;
128    default:
129	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
130			(int)pDstPicture->format));
131    }
132
133    return TRUE;
134}
135
136static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
137{
138    switch (pDstPicture->format) {
139    case PICT_a8r8g8b8:
140    case PICT_x8r8g8b8:
141    case PICT_a8b8g8r8:
142    case PICT_x8b8g8r8:
143    case PICT_b8g8r8a8:
144    case PICT_b8g8r8x8:
145	*dst_format = R300_COLORFORMAT_ARGB8888;
146	break;
147    case PICT_r5g6b5:
148	*dst_format = R300_COLORFORMAT_RGB565;
149	break;
150    case PICT_a1r5g5b5:
151    case PICT_x1r5g5b5:
152	*dst_format = R300_COLORFORMAT_ARGB1555;
153	break;
154    case PICT_a8:
155	*dst_format = R300_COLORFORMAT_I8;
156	break;
157    default:
158	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
159	       (int)pDstPicture->format));
160    }
161    return TRUE;
162}
163
164static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
165{
166    uint32_t sblend, dblend;
167
168    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
169    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
170
171    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
172     * it as always 1.
173     */
174    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
175	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
176	    sblend = RADEON_SRC_BLEND_GL_ONE;
177	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
178	    sblend = RADEON_SRC_BLEND_GL_ZERO;
179    }
180
181    /* If the source alpha is being used, then we should only be in a case where
182     * the source blend factor is 0, and the source blend value is the mask
183     * channels multiplied by the source picture's alpha.
184     */
185    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
186	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
187	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
188	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
189	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
190	}
191    }
192
193    return sblend | dblend;
194}
195
196union intfloat {
197    float f;
198    uint32_t i;
199};
200
201/* Check if we need a software-fallback because of a repeating
202 *   non-power-of-two texture.
203 *
204 * canTile: whether we can emulate a repeat by drawing in tiles:
205 *   possible for the source, but not for the mask. (Actually
206 *   we could do tiling for the mask too, but dealing with the
207 *   combination of a tiled mask and a tiled source would be
208 *   a lot of complexity, so we handle only the most common
209 *   case of a repeating mask.)
210 */
211static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
212{
213    int w = pPict->pDrawable->width;
214    int h = pPict->pDrawable->height;
215    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
216
217    if ((repeatType == RepeatNormal || repeatType == RepeatReflect) &&
218	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
219	!(repeatType == RepeatNormal && !pPict->transform && canTile))
220	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
221			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
222
223    return TRUE;
224}
225
226/* Determine if the pitch of the pixmap meets the criteria for being
227 * used as a repeating texture: no padding or only a single line texture.
228 */
229static Bool RADEONPitchMatches(PixmapPtr pPix)
230{
231    int w = pPix->drawable.width;
232    int h = pPix->drawable.height;
233    uint32_t txpitch = exaGetPixmapPitch(pPix);
234
235    if (h > 1 && (RADEON_ALIGN(w * pPix->drawable.bitsPerPixel / 8, 32)) != txpitch)
236	return FALSE;
237
238    return TRUE;
239}
240
241/* We can't turn on repeats normally for a non-power-of-two dimension,
242 * but if the source isn't transformed, we can get the same effect
243 * by drawing the image in multiple tiles. (A common case that it's
244 * important to get right is drawing a strip of a NPOTxPOT texture
245 * repeating in the POT direction. With tiling, this ends up as a
246 * a single tile on R300 and newer, which is perfect.)
247 *
248 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
249 *   one direction and POT in the other in the POT direction; on
250 *   older chips we can only repeat at all if the texture is POT in
251 *   both directions.
252 *
253 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
254 *   there is no padding in the texture. Textures with small POT widths
255 *   (1,2,4,8) thus can't be tiled.
256 */
257static Bool RADEONSetupSourceTile(PicturePtr pPict,
258				  PixmapPtr pPix,
259				  Bool canTile1d,
260				  Bool needMatchingPitch)
261{
262    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
263    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
264
265    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
266    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
267
268    if (repeatType == RepeatNormal || repeatType == RepeatReflect) {
269	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
270
271	int w = pPict->pDrawable ? pPict->pDrawable->width : 1;
272	int h = pPict->pDrawable ? pPict->pDrawable->height : 1;
273
274	if (pPict->transform) {
275	    if (badPitch)
276		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
277				 w, (unsigned)exaGetPixmapPitch(pPix)));
278	} else {
279	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
280	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
281
282	    if ((info->accel_state->need_src_tile_x ||
283		 info->accel_state->need_src_tile_y) &&
284		repeatType != RepeatNormal)
285		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
286
287	    if (!canTile1d)
288		info->accel_state->need_src_tile_x =
289		    info->accel_state->need_src_tile_y =
290		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
291	}
292
293	if (info->accel_state->need_src_tile_x)
294	    info->accel_state->src_tile_width = w;
295	if (info->accel_state->need_src_tile_y)
296	    info->accel_state->src_tile_height = h;
297    }
298
299    return TRUE;
300}
301
302/* R100-specific code */
303
304static Bool R100CheckCompositeTexture(PicturePtr pPict,
305				      PicturePtr pDstPict,
306				      int op,
307				      int unit)
308{
309    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
310    int i;
311
312    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
313	if (R100TexFormats[i].fmt == pPict->format)
314	    break;
315    }
316    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
317	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
318			(int)pPict->format));
319
320    if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0))
321	return FALSE;
322
323    if (pPict->filter != PictFilterNearest &&
324	pPict->filter != PictFilterBilinear)
325    {
326	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
327    }
328
329    /* for REPEAT_NONE, Render semantics are that sampling outside the source
330     * picture results in alpha=0 pixels. We can implement this with a border color
331     * *if* our source texture has an alpha channel, otherwise we need to fall
332     * back. If we're not transformed then we hope that upper layers have clipped
333     * rendering to the bounds of the source drawable, in which case it doesn't
334     * matter. I have not, however, verified that the X server always does such
335     * clipping.
336     */
337    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
338	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
339	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
340    }
341
342    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
343	RADEON_FALLBACK(("non-affine transforms not supported\n"));
344
345    return TRUE;
346}
347
348static Bool R100TextureSetup(PicturePtr pPict, PixmapPtr pPix,
349					int unit)
350{
351    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
352    uint32_t txfilter, txformat, txoffset, txpitch;
353    unsigned int repeatType;
354    Bool repeat;
355    int i, w, h;
356    struct radeon_exa_pixmap_priv *driver_priv;
357
358    if (pPict->pDrawable) {
359	w = pPict->pDrawable->width;
360	h = pPict->pDrawable->height;
361	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
362    } else {
363	w = h = 1;
364	repeatType = RepeatNormal;
365    }
366
367    repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
368	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
369
370    txpitch = exaGetPixmapPitch(pPix);
371    txoffset = 0;
372
373    if ((txpitch & 0x1f) != 0)
374	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
375
376    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
377    {
378	if (R100TexFormats[i].fmt == pPict->format)
379	    break;
380    }
381    txformat = R100TexFormats[i].card_fmt;
382    if (RADEONPixmapIsColortiled(pPix))
383	txoffset |= RADEON_TXO_MACRO_TILE;
384
385    if (repeat) {
386	if (!RADEONPitchMatches(pPix))
387	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
388			     w, (unsigned)txpitch));
389
390	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
391	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
392    } else
393	txformat |= RADEON_TXFORMAT_NON_POWER2;
394    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
395
396    info->accel_state->texW[unit] = w;
397    info->accel_state->texH[unit] = h;
398
399    switch (pPict->filter) {
400    case PictFilterNearest:
401	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
402	break;
403    case PictFilterBilinear:
404	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
405	break;
406    default:
407	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
408    }
409
410    switch (repeatType) {
411	case RepeatNormal:
412	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
413		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
414	    else
415	        txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
416	    break;
417	case RepeatPad:
418	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
419	    break;
420	case RepeatReflect:
421	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
422	    break;
423	case RepeatNone:
424	    /* don't set an illegal clamp mode for rects */
425	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
426		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
427	    break;
428    }
429
430    BEGIN_ACCEL_RELOC(5, 1);
431    if (unit == 0) {
432	OUT_RING_REG(RADEON_PP_TXFILTER_0, txfilter);
433	OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat);
434	OUT_RING_REG(RADEON_PP_TEX_SIZE_0,
435	    (pPix->drawable.width - 1) |
436	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
437	OUT_RING_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
438
439	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
440	/* emit a texture relocation */
441    } else {
442	OUT_RING_REG(RADEON_PP_TXFILTER_1, txfilter);
443	OUT_RING_REG(RADEON_PP_TXFORMAT_1, txformat);
444
445	OUT_RING_REG(RADEON_PP_TEX_SIZE_1,
446	    (pPix->drawable.width - 1) |
447	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
448	OUT_RING_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
449	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
450	/* emit a texture relocation */
451    }
452    ADVANCE_RING();
453
454    if (pPict->transform != 0) {
455	info->accel_state->is_transform[unit] = TRUE;
456	info->accel_state->transform[unit] = pPict->transform;
457    } else {
458	info->accel_state->is_transform[unit] = FALSE;
459    }
460
461    return TRUE;
462}
463
464static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
465			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
466{
467    PixmapPtr pSrcPixmap, pDstPixmap;
468    uint32_t tmp1;
469
470    /* Check for unsupported compositing operations. */
471    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
472	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
473
474    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
475
476    if (pDstPixmap->drawable.width > 2048 ||
477	pDstPixmap->drawable.height > 2048) {
478	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
479			 pDstPixmap->drawable.width,
480			 pDstPixmap->drawable.height));
481    }
482
483    if (pSrcPicture->pDrawable) {
484	/* r100 limit should be 2048, there are issues with 2048
485	 * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
486	 */
487	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
488
489	if (pSrcPixmap->drawable.width > 2048 ||
490	    pSrcPixmap->drawable.height > 2048) {
491	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
492			     pSrcPixmap->drawable.width,
493			     pSrcPixmap->drawable.height));
494	}
495    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
496	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
497
498    if (pMaskPicture) {
499	PixmapPtr pMaskPixmap;
500
501	if (pMaskPicture->pDrawable) {
502	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
503
504	    if (pMaskPixmap->drawable.width > 2048 ||
505		pMaskPixmap->drawable.height > 2048) {
506		RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
507				 pMaskPixmap->drawable.width,
508				 pMaskPixmap->drawable.height));
509	    }
510	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
511	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
512
513	if (pMaskPicture->componentAlpha) {
514	    /* Check if it's component alpha that relies on a source alpha and
515	     * on the source value.  We can only get one of those into the
516	     * single source value that we get to blend with.
517	     */
518	    if (RadeonBlendOp[op].src_alpha &&
519		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
520		RADEON_SRC_BLEND_GL_ZERO) {
521		RADEON_FALLBACK(("Component alpha not supported with source "
522				 "alpha and source value blending.\n"));
523	    }
524	}
525
526	if (!R100CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
527	    return FALSE;
528    }
529
530    if (!R100CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
531	return FALSE;
532
533    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
534	return FALSE;
535
536    return TRUE;
537}
538
539static Bool
540RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
541			    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask,
542			    PixmapPtr pDst)
543{
544    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
545    int ret;
546
547    info->accel_state->composite_op = op;
548    info->accel_state->dst_pic = pDstPicture;
549    info->accel_state->msk_pic = pMaskPicture;
550    info->accel_state->src_pic = pSrcPicture;
551    info->accel_state->dst_pix = pDst;
552    info->accel_state->msk_pix = pMask;
553    info->accel_state->src_pix = pSrc;
554
555    radeon_cs_space_reset_bos(info->cs);
556
557    radeon_add_pixmap(info->cs, pSrc,
558		      RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
559
560    if (pMask)
561	radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
562
563    radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
564
565    ret = radeon_cs_space_check(info->cs);
566    if (ret)
567	RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
568
569    return TRUE;
570}
571
572static Bool R100PrepareComposite(int op,
573					    PicturePtr pSrcPicture,
574					    PicturePtr pMaskPicture,
575					    PicturePtr pDstPicture,
576					    PixmapPtr pSrc,
577					    PixmapPtr pMask,
578					    PixmapPtr pDst)
579{
580    ScreenPtr pScreen = pDst->drawable.pScreen;
581    RINFO_FROM_SCREEN(pScreen);
582    uint32_t dst_format, dst_pitch, colorpitch;
583    uint32_t pp_cntl, blendcntl, cblend, ablend;
584    int pixel_shift;
585    struct radeon_exa_pixmap_priv *driver_priv;
586
587    TRACE;
588
589    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
590	return FALSE;
591
592    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
593	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
594
595    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
596
597    dst_pitch = exaGetPixmapPitch(pDst);
598    colorpitch = dst_pitch >> pixel_shift;
599    if (RADEONPixmapIsColortiled(pDst))
600	colorpitch |= RADEON_COLOR_TILE_ENABLE;
601
602    if (!pSrc) {
603	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
604	if (!pSrc)
605	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
606    }
607
608    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
609	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
610
611    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
612	return FALSE;
613
614    if (pMaskPicture && !pMask) {
615	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
616	if (!pMask) {
617	    if (!pSrcPicture->pDrawable)
618		pScreen->DestroyPixmap(pSrc);
619	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
620	}
621    }
622
623    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
624			     pSrc, pMask, pDst);
625
626    /* switch to 3D after doing buffer space checks as the latter may flush */
627    RADEON_SWITCH_TO_3D();
628
629    if (!R100TextureSetup(pSrcPicture, pSrc, 0))
630	return FALSE;
631    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
632
633    if (pMask != NULL) {
634	if (!R100TextureSetup(pMaskPicture, pMask, 1))
635	    return FALSE;
636	pp_cntl |= RADEON_TEX_1_ENABLE;
637    } else {
638	info->accel_state->is_transform[1] = FALSE;
639    }
640
641    BEGIN_ACCEL_RELOC(10, 2);
642    OUT_RING_REG(RADEON_PP_CNTL, pp_cntl);
643    OUT_RING_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
644    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
645    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
646
647    /* IN operator: Multiply src by mask components or mask alpha.
648     * BLEND_CTL_ADD is A * B + C.
649     * If a source is a8, we have to explicitly zero its color values.
650     * If the destination is a8, we have to route the alpha to red, I think.
651     * If we're doing component alpha where the source for blending is going to
652     * be the source alpha (and there's no source value used), we have to zero
653     * the source's color values.
654     */
655    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
656    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
657
658    if (pDstPicture->format == PICT_a8 ||
659	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
660    {
661	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
662    } else if (pSrcPicture->format == PICT_a8)
663	cblend |= RADEON_COLOR_ARG_A_ZERO;
664    else
665	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
666    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
667
668    if (pMask) {
669	if (pMaskPicture->componentAlpha &&
670	    pDstPicture->format != PICT_a8)
671	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
672	else
673	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
674	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
675    } else {
676	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
677	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
678    }
679
680    OUT_RING_REG(RADEON_PP_TXCBLEND_0, cblend);
681    OUT_RING_REG(RADEON_PP_TXABLEND_0, ablend);
682    if (pMask)
683	OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
684					  RADEON_SE_VTX_FMT_ST0 |
685					  RADEON_SE_VTX_FMT_ST1));
686    else
687	OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
688					  RADEON_SE_VTX_FMT_ST0));
689    /* Op operator. */
690    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
691
692    OUT_RING_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
693
694    OUT_RING_REG(RADEON_RE_TOP_LEFT, 0);
695    OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width - 1) << RADEON_RE_WIDTH_SHIFT) |
696					   ((pDst->drawable.height - 1) << RADEON_RE_HEIGHT_SHIFT)));
697    ADVANCE_RING();
698
699    return TRUE;
700}
701
702static Bool R200CheckCompositeTexture(PicturePtr pPict,
703				      PicturePtr pDstPict,
704				      int op,
705				      int unit)
706{
707    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
708    int i;
709
710    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
711    {
712	if (R200TexFormats[i].fmt == pPict->format)
713	    break;
714    }
715    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
716	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
717			 (int)pPict->format));
718
719    if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0))
720	return FALSE;
721
722    if (pPict->filter != PictFilterNearest &&
723	pPict->filter != PictFilterBilinear)
724	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
725
726    /* for REPEAT_NONE, Render semantics are that sampling outside the source
727     * picture results in alpha=0 pixels. We can implement this with a border color
728     * *if* our source texture has an alpha channel, otherwise we need to fall
729     * back. If we're not transformed then we hope that upper layers have clipped
730     * rendering to the bounds of the source drawable, in which case it doesn't
731     * matter. I have not, however, verified that the X server always does such
732     * clipping.
733     */
734    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
735	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
736	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
737    }
738
739    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
740	RADEON_FALLBACK(("non-affine transforms not supported\n"));
741
742    return TRUE;
743}
744
745static Bool R200TextureSetup(PicturePtr pPict, PixmapPtr pPix,
746					int unit)
747{
748    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
749    uint32_t txfilter, txformat, txoffset, txpitch;
750    unsigned int repeatType;
751    Bool repeat;
752    int i, w, h;
753    struct radeon_exa_pixmap_priv *driver_priv;
754
755    if (pPict->pDrawable) {
756	w = pPict->pDrawable->width;
757	h = pPict->pDrawable->height;
758	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
759    } else {
760	w = h = 1;
761	repeatType = RepeatNormal;
762    }
763
764    repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
765	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
766
767    txpitch = exaGetPixmapPitch(pPix);
768
769    txoffset = 0;
770
771    if ((txpitch & 0x1f) != 0)
772	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
773
774    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
775    {
776	if (R200TexFormats[i].fmt == pPict->format)
777	    break;
778    }
779    txformat = R200TexFormats[i].card_fmt;
780    if (RADEONPixmapIsColortiled(pPix))
781	txoffset |= R200_TXO_MACRO_TILE;
782
783    if (repeat) {
784	if (!RADEONPitchMatches(pPix))
785	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
786			     w, (unsigned)txpitch));
787
788	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
789	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
790    } else
791	txformat |= R200_TXFORMAT_NON_POWER2;
792    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
793
794    info->accel_state->texW[unit] = w;
795    info->accel_state->texH[unit] = h;
796
797    switch (pPict->filter) {
798    case PictFilterNearest:
799	txfilter = (R200_MAG_FILTER_NEAREST |
800		    R200_MIN_FILTER_NEAREST);
801	break;
802    case PictFilterBilinear:
803	txfilter = (R200_MAG_FILTER_LINEAR |
804		    R200_MIN_FILTER_LINEAR);
805	break;
806    default:
807	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
808    }
809
810    switch (repeatType) {
811	case RepeatNormal:
812	    if (txformat & R200_TXFORMAT_NON_POWER2)
813		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
814	    else
815	        txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
816	    break;
817	case RepeatPad:
818	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
819	    break;
820	case RepeatReflect:
821	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
822	    break;
823	case RepeatNone:
824	    /* don't set an illegal clamp mode for rect textures */
825	    if (txformat & R200_TXFORMAT_NON_POWER2)
826		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
827	    break;
828    }
829
830    BEGIN_ACCEL_RELOC(6, 1);
831    if (unit == 0) {
832	OUT_RING_REG(R200_PP_TXFILTER_0, txfilter);
833	OUT_RING_REG(R200_PP_TXFORMAT_0, txformat);
834	OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0);
835	OUT_RING_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
836		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
837	OUT_RING_REG(R200_PP_TXPITCH_0, txpitch - 32);
838	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
839    } else {
840	OUT_RING_REG(R200_PP_TXFILTER_1, txfilter);
841	OUT_RING_REG(R200_PP_TXFORMAT_1, txformat);
842	OUT_RING_REG(R200_PP_TXFORMAT_X_1, 0);
843	OUT_RING_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
844		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
845	OUT_RING_REG(R200_PP_TXPITCH_1, txpitch - 32);
846	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
847	/* emit a texture relocation */
848    }
849    ADVANCE_RING();
850
851    if (pPict->transform != 0) {
852	info->accel_state->is_transform[unit] = TRUE;
853	info->accel_state->transform[unit] = pPict->transform;
854    } else {
855	info->accel_state->is_transform[unit] = FALSE;
856    }
857
858    return TRUE;
859}
860
861static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
862			       PicturePtr pDstPicture)
863{
864    PixmapPtr pSrcPixmap, pDstPixmap;
865    uint32_t tmp1;
866
867    TRACE;
868
869    /* Check for unsupported compositing operations. */
870    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
871	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
872
873    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
874
875    if (pDstPixmap->drawable.width > 2048 ||
876	pDstPixmap->drawable.height > 2048) {
877	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
878			 pDstPixmap->drawable.width,
879			 pDstPixmap->drawable.height));
880    }
881
882    if (pSrcPicture->pDrawable) {
883	/* r200 limit should be 2048, there are issues with 2048
884	 * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
885	 */
886	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
887
888	if (pSrcPixmap->drawable.width > 2048 ||
889	    pSrcPixmap->drawable.height > 2048) {
890	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
891			     pSrcPixmap->drawable.width,
892			     pSrcPixmap->drawable.height));
893	}
894    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
895	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
896
897    if (pMaskPicture) {
898	PixmapPtr pMaskPixmap;
899
900	if (pMaskPicture->pDrawable) {
901	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
902
903	    if (pMaskPixmap->drawable.width > 2048 ||
904		pMaskPixmap->drawable.height > 2048) {
905		RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
906				 pMaskPixmap->drawable.width,
907				 pMaskPixmap->drawable.height));
908	    }
909	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
910	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
911
912	if (pMaskPicture->componentAlpha) {
913	    /* Check if it's component alpha that relies on a source alpha and
914	     * on the source value.  We can only get one of those into the
915	     * single source value that we get to blend with.
916	     */
917	    if (RadeonBlendOp[op].src_alpha &&
918		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
919		RADEON_SRC_BLEND_GL_ZERO) {
920		RADEON_FALLBACK(("Component alpha not supported with source "
921				 "alpha and source value blending.\n"));
922	    }
923	}
924
925	if (!R200CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
926	    return FALSE;
927    }
928
929    if (!R200CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
930	return FALSE;
931
932    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
933	return FALSE;
934
935    return TRUE;
936}
937
938static Bool R200PrepareComposite(int op, PicturePtr pSrcPicture,
939				PicturePtr pMaskPicture, PicturePtr pDstPicture,
940				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
941{
942    ScreenPtr pScreen = pDst->drawable.pScreen;
943    RINFO_FROM_SCREEN(pScreen);
944    uint32_t dst_format, dst_pitch;
945    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
946    int pixel_shift;
947    struct radeon_exa_pixmap_priv *driver_priv;
948
949    TRACE;
950
951    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
952	return FALSE;
953
954    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
955	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
956
957    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
958
959    dst_pitch = exaGetPixmapPitch(pDst);
960    colorpitch = dst_pitch >> pixel_shift;
961    if (RADEONPixmapIsColortiled(pDst))
962	colorpitch |= RADEON_COLOR_TILE_ENABLE;
963
964    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
965	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
966
967    if (!pSrc) {
968	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
969	if (!pSrc)
970	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
971    }
972
973    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
974	return FALSE;
975
976    if (pMaskPicture && !pMask) {
977	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
978	if (!pMask) {
979	    if (!pSrcPicture->pDrawable)
980		pScreen->DestroyPixmap(pSrc);
981	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
982	}
983    }
984
985    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
986			     pSrc, pMask, pDst);
987
988    /* switch to 3D after doing buffer space checks as it may flush */
989    RADEON_SWITCH_TO_3D();
990
991    if (!R200TextureSetup(pSrcPicture, pSrc, 0))
992	return FALSE;
993    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
994
995    if (pMask != NULL) {
996	if (!R200TextureSetup(pMaskPicture, pMask, 1))
997	    return FALSE;
998	pp_cntl |= RADEON_TEX_1_ENABLE;
999    } else {
1000	info->accel_state->is_transform[1] = FALSE;
1001    }
1002
1003    BEGIN_ACCEL_RELOC(12, 2);
1004
1005    OUT_RING_REG(RADEON_PP_CNTL, pp_cntl);
1006    OUT_RING_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
1007
1008    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
1009    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
1010
1011    OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
1012    if (pMask)
1013	OUT_RING_REG(R200_SE_VTX_FMT_1,
1014		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
1015		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
1016    else
1017	OUT_RING_REG(R200_SE_VTX_FMT_1,
1018		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
1019
1020
1021
1022    /* IN operator: Multiply src by mask components or mask alpha.
1023     * BLEND_CTL_ADD is A * B + C.
1024     * If a picture is a8, we have to explicitly zero its color values.
1025     * If the destination is a8, we have to route the alpha to red, I think.
1026     * If we're doing component alpha where the source for blending is going to
1027     * be the source alpha (and there's no source value used), we have to zero
1028     * the source's color values.
1029     */
1030    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
1031    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
1032
1033    if (pDstPicture->format == PICT_a8 ||
1034	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
1035    {
1036	cblend |= R200_TXC_ARG_A_R0_ALPHA;
1037    } else if (pSrcPicture->format == PICT_a8)
1038	cblend |= R200_TXC_ARG_A_ZERO;
1039    else
1040	cblend |= R200_TXC_ARG_A_R0_COLOR;
1041    ablend |= R200_TXA_ARG_A_R0_ALPHA;
1042
1043    if (pMask) {
1044	if (pMaskPicture->componentAlpha &&
1045	    pDstPicture->format != PICT_a8)
1046	    cblend |= R200_TXC_ARG_B_R1_COLOR;
1047	else
1048	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
1049	ablend |= R200_TXA_ARG_B_R1_ALPHA;
1050    } else {
1051	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
1052	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
1053    }
1054
1055    OUT_RING_REG(R200_PP_TXCBLEND_0, cblend);
1056    OUT_RING_REG(R200_PP_TXCBLEND2_0,
1057	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
1058    OUT_RING_REG(R200_PP_TXABLEND_0, ablend);
1059    OUT_RING_REG(R200_PP_TXABLEND2_0,
1060	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
1061
1062    /* Op operator. */
1063    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1064    OUT_RING_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
1065
1066    OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width - 1) << RADEON_RE_WIDTH_SHIFT) |
1067					   ((pDst->drawable.height - 1) << RADEON_RE_HEIGHT_SHIFT)));
1068
1069    ADVANCE_RING();
1070
1071    return TRUE;
1072}
1073
1074static Bool R300CheckCompositeTexture(PicturePtr pPict,
1075				      PicturePtr pDstPict,
1076				      int op,
1077				      int unit,
1078				      Bool is_r500)
1079{
1080    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1081    int i;
1082
1083    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1084    {
1085	if (R300TexFormats[i].fmt == pPict->format)
1086	    break;
1087    }
1088    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1089	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1090			 (int)pPict->format));
1091
1092    if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0)) {
1093#if 0
1094	      		struct radeon_exa_pixmap_priv *driver_priv;
1095		PixmapPtr pPix;
1096
1097    		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
1098		driver_priv = exaGetPixmapDriverPrivate(pPix);
1099		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
1100#endif
1101	return FALSE;
1102    }
1103
1104    if (pPict->filter != PictFilterNearest &&
1105	pPict->filter != PictFilterBilinear)
1106	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1107
1108    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1109     * picture results in alpha=0 pixels. We can implement this with a border color
1110     * *if* our source texture has an alpha channel, otherwise we need to fall
1111     * back. If we're not transformed then we hope that upper layers have clipped
1112     * rendering to the bounds of the source drawable, in which case it doesn't
1113     * matter. I have not, however, verified that the X server always does such
1114     * clipping.
1115     */
1116    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1117	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1118	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1119    }
1120
1121    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
1122	RADEON_FALLBACK(("non-affine transforms not supported\n"));
1123
1124    return TRUE;
1125}
1126
1127static Bool R300TextureSetup(PicturePtr pPict, PixmapPtr pPix,
1128					int unit)
1129{
1130    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1131    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
1132    int w, h;
1133    int i, pixel_shift, out_size = 6;
1134    unsigned int repeatType;
1135    struct radeon_exa_pixmap_priv *driver_priv;
1136
1137    TRACE;
1138
1139    if (pPict->pDrawable) {
1140	w = pPict->pDrawable->width;
1141	h = pPict->pDrawable->height;
1142	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1143    } else {
1144	w = h = 1;
1145	repeatType = RepeatNormal;
1146    }
1147
1148    txpitch = exaGetPixmapPitch(pPix);
1149    txoffset = 0;
1150
1151    if ((txpitch & 0x1f) != 0)
1152	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1153
1154    /* TXPITCH = pixels (texels) per line - 1 */
1155    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1156    txpitch >>= pixel_shift;
1157    txpitch -= 1;
1158
1159    if (RADEONPixmapIsColortiled(pPix))
1160	txoffset |= R300_MACRO_TILE;
1161
1162    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1163    {
1164	if (R300TexFormats[i].fmt == pPict->format)
1165	    break;
1166    }
1167
1168    txformat1 = R300TexFormats[i].card_fmt;
1169
1170    if (IS_R300_3D) {
1171	if ((unit == 0) && info->accel_state->msk_pic)
1172	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0;
1173	else if (unit == 1)
1174	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1;
1175    }
1176
1177    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1178		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1179
1180    if (IS_R500_3D && ((w - 1) & 0x800))
1181	txpitch |= R500_TXWIDTH_11;
1182
1183    if (IS_R500_3D && ((h - 1) & 0x800))
1184	txpitch |= R500_TXHEIGHT_11;
1185
1186    if (info->ChipFamily == CHIP_FAMILY_R520) {
1187	unsigned us_width = (w - 1) & 0x7ff;
1188	unsigned us_height = (h - 1) & 0x7ff;
1189	unsigned us_depth = 0;
1190
1191	if (w > 2048) {
1192	    us_width = (0x7ff + us_width) >> 1;
1193	    us_depth |= 0x0d;
1194	}
1195	if (h > 2048) {
1196	    us_height = (0x7ff + us_height) >> 1;
1197	    us_depth |= 0x0e;
1198	}
1199
1200	us_format = (us_width << R300_TXWIDTH_SHIFT) |
1201		    (us_height << R300_TXHEIGHT_SHIFT) |
1202		    (us_depth << R300_TXDEPTH_SHIFT);
1203	out_size++;
1204    }
1205
1206    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1207     * omit this if there is no padding, but there is no apparent advantage
1208     * in doing so.
1209     */
1210    txformat0 |= R300_TXPITCH_EN;
1211
1212    txfilter = (unit << R300_TX_ID_SHIFT);
1213
1214    switch (repeatType) {
1215    case RepeatNormal:
1216	if (unit != 0 || !info->accel_state->need_src_tile_x)
1217	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1218	else
1219	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1220
1221	if (unit != 0 || !info->accel_state->need_src_tile_y)
1222	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1223	else
1224	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1225
1226	break;
1227    case RepeatPad:
1228	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1229	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1230	break;
1231    case RepeatReflect:
1232	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1233	    R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1234	break;
1235    case RepeatNone:
1236	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1237	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1238	break;
1239    }
1240
1241    switch (pPict->filter) {
1242    case PictFilterNearest:
1243	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1244	break;
1245    case PictFilterBilinear:
1246	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1247	break;
1248    default:
1249	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1250    }
1251
1252    if (repeatType == RepeatNone)
1253	out_size++;
1254    BEGIN_ACCEL_RELOC(out_size, 1);
1255    OUT_RING_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1256    OUT_RING_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1257    OUT_RING_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1258    OUT_RING_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1259    OUT_RING_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1260
1261    EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
1262
1263    if (repeatType == RepeatNone)
1264	OUT_RING_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1265    if (info->ChipFamily == CHIP_FAMILY_R520)
1266	OUT_RING_REG(R500_US_FORMAT0_0 + (unit * 4), us_format);
1267    ADVANCE_RING();
1268
1269    if (pPict->transform != 0) {
1270	info->accel_state->is_transform[unit] = TRUE;
1271	info->accel_state->transform[unit] = pPict->transform;
1272
1273	/* setup the PVS consts */
1274	if (info->accel_state->has_tcl) {
1275	    info->accel_state->texW[unit] = 1;
1276	    info->accel_state->texH[unit] = 1;
1277	    BEGIN_RING(2*9);
1278	    if (IS_R300_3D)
1279		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1280	    else
1281		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1282
1283	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
1284	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
1285	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
1286	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1287
1288	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
1289	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
1290	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
1291	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1292
1293	    ADVANCE_RING();
1294	} else {
1295	    info->accel_state->texW[unit] = w;
1296	    info->accel_state->texH[unit] = h;
1297	}
1298    } else {
1299	info->accel_state->is_transform[unit] = FALSE;
1300
1301	/* setup the PVS consts */
1302	if (info->accel_state->has_tcl) {
1303	    info->accel_state->texW[unit] = 1;
1304	    info->accel_state->texH[unit] = 1;
1305
1306	    BEGIN_RING(2*9);
1307	    if (IS_R300_3D)
1308		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1309	    else
1310		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1311
1312	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1313	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1314	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1315	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1316
1317	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1318	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1319	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1320	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1321
1322	    ADVANCE_RING();
1323	} else {
1324	    info->accel_state->texW[unit] = w;
1325	    info->accel_state->texH[unit] = h;
1326	}
1327    }
1328
1329    return TRUE;
1330}
1331
1332static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1333			       PicturePtr pDstPicture)
1334{
1335    uint32_t tmp1;
1336    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1337    PixmapPtr pSrcPixmap, pDstPixmap;
1338    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1339    RADEONInfoPtr info = RADEONPTR(pScrn);
1340    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1341
1342    TRACE;
1343
1344    /* Check for unsupported compositing operations. */
1345    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1346	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1347
1348    if (IS_R500_3D) {
1349	max_tex_w = 4096;
1350	max_tex_h = 4096;
1351	max_dst_w = 4096;
1352	max_dst_h = 4096;
1353    } else {
1354	max_tex_w = 2048;
1355	max_tex_h = 2048;
1356	if (IS_R400_3D) {
1357	    max_dst_w = 4021;
1358	    max_dst_h = 4021;
1359	} else {
1360	    max_dst_w = 2560;
1361	    max_dst_h = 2560;
1362	}
1363    }
1364
1365    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1366
1367    if (pDstPixmap->drawable.width > max_dst_w ||
1368	pDstPixmap->drawable.height > max_dst_h) {
1369	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1370			 pDstPixmap->drawable.width,
1371			 pDstPixmap->drawable.height));
1372    }
1373
1374    if (pSrcPicture->pDrawable) {
1375	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1376
1377	if (pSrcPixmap->drawable.width > max_tex_w ||
1378	    pSrcPixmap->drawable.height > max_tex_h) {
1379	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1380			     pSrcPixmap->drawable.width,
1381			     pSrcPixmap->drawable.height));
1382	}
1383    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1384	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1385
1386    if (pMaskPicture) {
1387	PixmapPtr pMaskPixmap;
1388
1389	if (pMaskPicture->pDrawable) {
1390	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1391
1392	    if (pMaskPixmap->drawable.width > max_tex_w ||
1393		pMaskPixmap->drawable.height > max_tex_h) {
1394	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1395			       pMaskPixmap->drawable.width,
1396			       pMaskPixmap->drawable.height));
1397	    }
1398	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1399	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1400
1401	if (pMaskPicture->componentAlpha) {
1402	    /* Check if it's component alpha that relies on a source alpha and
1403	     * on the source value.  We can only get one of those into the
1404	     * single source value that we get to blend with.
1405	     */
1406	    if (RadeonBlendOp[op].src_alpha &&
1407		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1408		RADEON_SRC_BLEND_GL_ZERO) {
1409		RADEON_FALLBACK(("Component alpha not supported with source "
1410				 "alpha and source value blending.\n"));
1411	    }
1412	}
1413
1414	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1415	    return FALSE;
1416    }
1417
1418    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1419	return FALSE;
1420
1421    if (!R300GetDestFormat(pDstPicture, &tmp1))
1422	return FALSE;
1423
1424    return TRUE;
1425
1426}
1427
1428static Bool R300PrepareComposite(int op, PicturePtr pSrcPicture,
1429				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1430				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1431{
1432    ScreenPtr pScreen = pDst->drawable.pScreen;
1433    RINFO_FROM_SCREEN(pScreen);
1434    uint32_t dst_format, dst_pitch;
1435    uint32_t txenable, colorpitch;
1436    uint32_t blendcntl, output_fmt;
1437    uint32_t src_color, src_alpha;
1438    uint32_t mask_color, mask_alpha;
1439    int pixel_shift;
1440    struct radeon_exa_pixmap_priv *driver_priv;
1441    TRACE;
1442
1443    if (!R300GetDestFormat(pDstPicture, &dst_format))
1444	return FALSE;
1445
1446    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1447
1448    dst_pitch = exaGetPixmapPitch(pDst);
1449    colorpitch = dst_pitch >> pixel_shift;
1450
1451    if (RADEONPixmapIsColortiled(pDst))
1452	colorpitch |= R300_COLORTILE;
1453
1454    colorpitch |= dst_format;
1455
1456    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1457	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1458
1459    if (!pSrc) {
1460	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
1461	if (!pSrc)
1462	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
1463    }
1464
1465    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1466	return FALSE;
1467
1468    if (pMaskPicture && !pMask) {
1469	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
1470	if (!pMask) {
1471	    if (!pSrcPicture->pDrawable)
1472		pScreen->DestroyPixmap(pSrc);
1473	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
1474	}
1475    }
1476
1477    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1478			     pSrc, pMask, pDst);
1479
1480    /* have to execute switch after doing buffer sizing check as the latter flushes */
1481    RADEON_SWITCH_TO_3D();
1482
1483    if (!R300TextureSetup(pSrcPicture, pSrc, 0))
1484	return FALSE;
1485    txenable = R300_TEX_0_ENABLE;
1486
1487    if (pMask != NULL) {
1488	if (!R300TextureSetup(pMaskPicture, pMask, 1))
1489	    return FALSE;
1490	txenable |= R300_TEX_1_ENABLE;
1491    } else {
1492	info->accel_state->is_transform[1] = FALSE;
1493    }
1494
1495    /* setup the VAP */
1496    if (info->accel_state->has_tcl) {
1497	if (pMask)
1498	    BEGIN_RING(2*10);
1499	else
1500	    BEGIN_RING(2*9);
1501	OUT_RING_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
1502    } else {
1503	if (pMask)
1504	    BEGIN_RING(2*6);
1505	else
1506	    BEGIN_RING(2*5);
1507    }
1508
1509    /* These registers define the number, type, and location of data submitted
1510     * to the PVS unit of GA input (when PVS is disabled)
1511     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1512     * enabled.  This memory provides the imputs to the vertex shader program
1513     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1514     * directly to the GA input memory and the order is signifigant.  In
1515     * PVS_BYPASS mode the order is as follows:
1516     * Position
1517     * Point Size
1518     * Color 0-3
1519     * Textures 0-7
1520     * Fog
1521     */
1522    if (pMask) {
1523	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
1524		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1525		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1526		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1527		       R300_SIGNED_0 |
1528		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1529		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1530		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1531		       R300_SIGNED_1));
1532	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1,
1533		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1534		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1535		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1536		       R300_LAST_VEC_2 |
1537		       R300_SIGNED_2));
1538    } else
1539	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
1540		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1541		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1542		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1543		       R300_SIGNED_0 |
1544		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1545		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1546		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1547		       R300_LAST_VEC_1 |
1548		       R300_SIGNED_1));
1549
1550    /* load the vertex shader
1551     * We pre-load vertex programs in RADEONInit3DEngine():
1552     * - exa
1553     * - Xv
1554     * - Xv bicubic
1555     * Here we select the offset of the vertex program we want to use
1556     */
1557    if (info->accel_state->has_tcl) {
1558	if (pMask) {
1559	    /* consts used by vertex shaders */
1560	    OUT_RING_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1561						    R300_PVS_MAX_CONST_ADDR(3)));
1562	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
1563			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1564			   (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1565			   (8 << R300_PVS_LAST_INST_SHIFT)));
1566	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
1567			  (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1568	} else {
1569	    /* consts used by vertex shaders */
1570	    OUT_RING_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1571						    R300_PVS_MAX_CONST_ADDR(3)));
1572	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
1573			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1574			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1575			   (4 << R300_PVS_LAST_INST_SHIFT)));
1576	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
1577			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1578	}
1579    }
1580
1581    /* Position and one or two sets of 2 texture coordinates */
1582    OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1583    if (pMask)
1584	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1,
1585		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1586		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1587    else
1588	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1,
1589		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1590
1591    OUT_RING_REG(R300_TX_INVALTAGS, 0x0);
1592    OUT_RING_REG(R300_TX_ENABLE, txenable);
1593    ADVANCE_RING();
1594
1595    /* shader output swizzling */
1596    switch (pDstPicture->format) {
1597    case PICT_a8r8g8b8:
1598    case PICT_x8r8g8b8:
1599    default:
1600	output_fmt = (R300_OUT_FMT_C4_8 |
1601		      R300_OUT_FMT_C0_SEL_BLUE |
1602		      R300_OUT_FMT_C1_SEL_GREEN |
1603		      R300_OUT_FMT_C2_SEL_RED |
1604		      R300_OUT_FMT_C3_SEL_ALPHA);
1605	break;
1606    case PICT_a8b8g8r8:
1607    case PICT_x8b8g8r8:
1608	output_fmt = (R300_OUT_FMT_C4_8 |
1609		      R300_OUT_FMT_C0_SEL_RED |
1610		      R300_OUT_FMT_C1_SEL_GREEN |
1611		      R300_OUT_FMT_C2_SEL_BLUE |
1612		      R300_OUT_FMT_C3_SEL_ALPHA);
1613	break;
1614    case PICT_b8g8r8a8:
1615    case PICT_b8g8r8x8:
1616	output_fmt = (R300_OUT_FMT_C4_8 |
1617		      R300_OUT_FMT_C0_SEL_ALPHA |
1618		      R300_OUT_FMT_C1_SEL_RED |
1619		      R300_OUT_FMT_C2_SEL_GREEN |
1620		      R300_OUT_FMT_C3_SEL_BLUE);
1621	break;
1622    case PICT_a8:
1623	output_fmt = (R300_OUT_FMT_C4_8 |
1624		      R300_OUT_FMT_C0_SEL_ALPHA);
1625	break;
1626    }
1627
1628    /* setup pixel shader */
1629    if (IS_R300_3D) {
1630	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1631	    src_color = R300_ALU_RGB_0_0;
1632	else
1633	    src_color = R300_ALU_RGB_SRC0_RGB;
1634
1635	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1636	    src_alpha = R300_ALU_ALPHA_1_0;
1637	else
1638	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1639
1640	if (pMask) {
1641	    if (pMaskPicture->componentAlpha) {
1642		if (RadeonBlendOp[op].src_alpha) {
1643		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1644			src_color = R300_ALU_RGB_1_0;
1645		    else
1646			src_color = R300_ALU_RGB_SRC0_AAA;
1647		} else
1648		    src_color = R300_ALU_RGB_SRC0_RGB;
1649		mask_color = R300_ALU_RGB_SRC1_RGB;
1650	    } else {
1651		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1652		    mask_color = R300_ALU_RGB_1_0;
1653		else
1654		    mask_color = R300_ALU_RGB_SRC1_AAA;
1655	    }
1656	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1657		mask_alpha = R300_ALU_ALPHA_1_0;
1658	    else
1659		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1660	} else {
1661	    mask_color = R300_ALU_RGB_1_0;
1662	    mask_alpha = R300_ALU_ALPHA_1_0;
1663	}
1664
1665	/* setup the rasterizer, load FS */
1666	if (pMask) {
1667	    BEGIN_RING(2*16);
1668	    /* 4 components: 2 for tex0, 2 for tex1 */
1669	    OUT_RING_REG(R300_RS_COUNT,
1670			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1671			   R300_RS_COUNT_HIRES_EN));
1672
1673	    /* R300_INST_COUNT_RS - highest RS instruction used */
1674	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1675
1676	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1677						R300_ALU_CODE_SIZE(0) |
1678						R300_TEX_CODE_OFFSET(0) |
1679						R300_TEX_CODE_SIZE(1)));
1680
1681	    OUT_RING_REG(R300_US_CODE_ADDR_3,
1682			  (R300_ALU_START(0) |
1683			   R300_ALU_SIZE(0) |
1684			   R300_TEX_START(0) |
1685			   R300_TEX_SIZE(1) |
1686			   R300_RGBA_OUT));
1687
1688
1689	} else {
1690	    BEGIN_RING(2*15);
1691	    /* 2 components: 2 for tex0 */
1692	    OUT_RING_REG(R300_RS_COUNT,
1693			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1694			   R300_RS_COUNT_HIRES_EN));
1695
1696	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1697
1698	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1699						R300_ALU_CODE_SIZE(0) |
1700						R300_TEX_CODE_OFFSET(0) |
1701						R300_TEX_CODE_SIZE(0)));
1702
1703	    OUT_RING_REG(R300_US_CODE_ADDR_3,
1704			  (R300_ALU_START(0) |
1705			   R300_ALU_SIZE(0) |
1706			   R300_TEX_START(0) |
1707			   R300_TEX_SIZE(0) |
1708			   R300_RGBA_OUT));
1709
1710	}
1711
1712	OUT_RING_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1713	OUT_RING_REG(R300_US_CODE_ADDR_0,
1714		      (R300_ALU_START(0) |
1715		       R300_ALU_SIZE(0) |
1716		       R300_TEX_START(0) |
1717		       R300_TEX_SIZE(0)));
1718	OUT_RING_REG(R300_US_CODE_ADDR_1,
1719		      (R300_ALU_START(0) |
1720		       R300_ALU_SIZE(0) |
1721		       R300_TEX_START(0) |
1722		       R300_TEX_SIZE(0)));
1723	OUT_RING_REG(R300_US_CODE_ADDR_2,
1724		      (R300_ALU_START(0) |
1725		       R300_ALU_SIZE(0) |
1726		       R300_TEX_START(0) |
1727		       R300_TEX_SIZE(0)));
1728
1729	OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1730	/* shader output swizzling */
1731	OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt);
1732
1733	/* tex inst for src texture */
1734	OUT_RING_REG(R300_US_TEX_INST(0),
1735		      (R300_TEX_SRC_ADDR(0) |
1736		       R300_TEX_DST_ADDR(0) |
1737		       R300_TEX_ID(0) |
1738		       R300_TEX_INST(R300_TEX_INST_LD)));
1739
1740	if (pMask) {
1741	    /* tex inst for mask texture */
1742	    OUT_RING_REG(R300_US_TEX_INST(1),
1743			  (R300_TEX_SRC_ADDR(1) |
1744			   R300_TEX_DST_ADDR(1) |
1745			   R300_TEX_ID(1) |
1746			   R300_TEX_INST(R300_TEX_INST_LD)));
1747	}
1748
1749	/* RGB inst
1750	 * temp addresses for texture inputs
1751	 * ALU_RGB_ADDR0 is src tex (temp 0)
1752	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1753	 * R300_ALU_RGB_OMASK - output components to write
1754	 * R300_ALU_RGB_TARGET_A - render target
1755	 */
1756	OUT_RING_REG(R300_US_ALU_RGB_ADDR(0),
1757		      (R300_ALU_RGB_ADDR0(0) |
1758		       R300_ALU_RGB_ADDR1(1) |
1759		       R300_ALU_RGB_ADDR2(0) |
1760		       R300_ALU_RGB_ADDRD(0) |
1761		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1762					   R300_ALU_RGB_MASK_G |
1763					   R300_ALU_RGB_MASK_B)) |
1764		       R300_ALU_RGB_TARGET_A));
1765	/* RGB inst
1766	 * ALU operation
1767	 */
1768	OUT_RING_REG(R300_US_ALU_RGB_INST(0),
1769		      (R300_ALU_RGB_SEL_A(src_color) |
1770		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1771		       R300_ALU_RGB_SEL_B(mask_color) |
1772		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1773		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1774		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1775		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1776		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1777		       R300_ALU_RGB_CLAMP));
1778	/* Alpha inst
1779	 * temp addresses for texture inputs
1780	 * ALU_ALPHA_ADDR0 is src tex (0)
1781	 * ALU_ALPHA_ADDR1 is mask tex (1)
1782	 * R300_ALU_ALPHA_OMASK - output components to write
1783	 * R300_ALU_ALPHA_TARGET_A - render target
1784	 */
1785	OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0),
1786		      (R300_ALU_ALPHA_ADDR0(0) |
1787		       R300_ALU_ALPHA_ADDR1(1) |
1788		       R300_ALU_ALPHA_ADDR2(0) |
1789		       R300_ALU_ALPHA_ADDRD(0) |
1790		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1791		       R300_ALU_ALPHA_TARGET_A |
1792		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1793	/* Alpha inst
1794	 * ALU operation
1795	 */
1796	OUT_RING_REG(R300_US_ALU_ALPHA_INST(0),
1797		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1798		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1799		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1800		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1801		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1802		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1803		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1804		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1805		       R300_ALU_ALPHA_CLAMP));
1806	ADVANCE_RING();
1807    } else {
1808	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1809	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1810			 R500_ALU_RGB_G_SWIZ_A_0 |
1811			 R500_ALU_RGB_B_SWIZ_A_0);
1812	else
1813	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1814			 R500_ALU_RGB_G_SWIZ_A_G |
1815			 R500_ALU_RGB_B_SWIZ_A_B);
1816
1817	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1818	    src_alpha = R500_ALPHA_SWIZ_A_1;
1819	else
1820	    src_alpha = R500_ALPHA_SWIZ_A_A;
1821
1822	if (pMask) {
1823	    if (pMaskPicture->componentAlpha) {
1824		if (RadeonBlendOp[op].src_alpha) {
1825		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1826			src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1827				     R500_ALU_RGB_G_SWIZ_A_1 |
1828				     R500_ALU_RGB_B_SWIZ_A_1);
1829		    else
1830			src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1831				     R500_ALU_RGB_G_SWIZ_A_A |
1832				     R500_ALU_RGB_B_SWIZ_A_A);
1833		} else
1834		    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1835				 R500_ALU_RGB_G_SWIZ_A_G |
1836				 R500_ALU_RGB_B_SWIZ_A_B);
1837
1838		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1839			      R500_ALU_RGB_G_SWIZ_B_G |
1840			      R500_ALU_RGB_B_SWIZ_B_B);
1841	    } else {
1842		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1843		    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1844				  R500_ALU_RGB_G_SWIZ_B_1 |
1845				  R500_ALU_RGB_B_SWIZ_B_1);
1846		else
1847		    mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1848				  R500_ALU_RGB_G_SWIZ_B_A |
1849				  R500_ALU_RGB_B_SWIZ_B_A);
1850	    }
1851	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1852		mask_alpha = R500_ALPHA_SWIZ_B_1;
1853	    else
1854		mask_alpha = R500_ALPHA_SWIZ_B_A;
1855	} else {
1856	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1857			  R500_ALU_RGB_G_SWIZ_B_1 |
1858			  R500_ALU_RGB_B_SWIZ_B_1);
1859	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1860	}
1861
1862	BEGIN_RING(2*7);
1863	if (pMask) {
1864	    /* 4 components: 2 for tex0, 2 for tex1 */
1865	    OUT_RING_REG(R300_RS_COUNT,
1866			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1867			   R300_RS_COUNT_HIRES_EN));
1868
1869	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1870	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1871
1872	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1873					      R500_US_CODE_END_ADDR(2)));
1874	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1875					       R500_US_CODE_RANGE_SIZE(2)));
1876	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
1877	} else {
1878	    OUT_RING_REG(R300_RS_COUNT,
1879			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1880			   R300_RS_COUNT_HIRES_EN));
1881
1882	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1883
1884	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1885					      R500_US_CODE_END_ADDR(1)));
1886	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1887					       R500_US_CODE_RANGE_SIZE(1)));
1888	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
1889	}
1890
1891	OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1892	OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt);
1893	ADVANCE_RING();
1894
1895	if (pMask) {
1896	    BEGIN_RING(2*19);
1897	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1898	    /* tex inst for src texture */
1899	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1900						   R500_INST_RGB_WMASK_R |
1901						   R500_INST_RGB_WMASK_G |
1902						   R500_INST_RGB_WMASK_B |
1903						   R500_INST_ALPHA_WMASK |
1904						   R500_INST_RGB_CLAMP |
1905						   R500_INST_ALPHA_CLAMP));
1906
1907	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1908						   R500_TEX_INST_LD |
1909						   R500_TEX_IGNORE_UNCOVERED));
1910
1911	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1912						   R500_TEX_SRC_S_SWIZ_R |
1913						   R500_TEX_SRC_T_SWIZ_G |
1914						   R500_TEX_DST_ADDR(0) |
1915						   R500_TEX_DST_R_SWIZ_R |
1916						   R500_TEX_DST_G_SWIZ_G |
1917						   R500_TEX_DST_B_SWIZ_B |
1918						   R500_TEX_DST_A_SWIZ_A));
1919	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1920						   R500_DX_S_SWIZ_R |
1921						   R500_DX_T_SWIZ_R |
1922						   R500_DX_R_SWIZ_R |
1923						   R500_DX_Q_SWIZ_R |
1924						   R500_DY_ADDR(0) |
1925						   R500_DY_S_SWIZ_R |
1926						   R500_DY_T_SWIZ_R |
1927						   R500_DY_R_SWIZ_R |
1928						   R500_DY_Q_SWIZ_R));
1929	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1930	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1931
1932	    /* tex inst for mask texture */
1933	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1934						   R500_INST_TEX_SEM_WAIT |
1935						   R500_INST_RGB_WMASK_R |
1936						   R500_INST_RGB_WMASK_G |
1937						   R500_INST_RGB_WMASK_B |
1938						   R500_INST_ALPHA_WMASK |
1939						   R500_INST_RGB_CLAMP |
1940						   R500_INST_ALPHA_CLAMP));
1941
1942	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1943						   R500_TEX_INST_LD |
1944						   R500_TEX_SEM_ACQUIRE |
1945						   R500_TEX_IGNORE_UNCOVERED));
1946
1947	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1948						   R500_TEX_SRC_S_SWIZ_R |
1949						   R500_TEX_SRC_T_SWIZ_G |
1950						   R500_TEX_DST_ADDR(1) |
1951						   R500_TEX_DST_R_SWIZ_R |
1952						   R500_TEX_DST_G_SWIZ_G |
1953						   R500_TEX_DST_B_SWIZ_B |
1954						   R500_TEX_DST_A_SWIZ_A));
1955	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1956						   R500_DX_S_SWIZ_R |
1957						   R500_DX_T_SWIZ_R |
1958						   R500_DX_R_SWIZ_R |
1959						   R500_DX_Q_SWIZ_R |
1960						   R500_DY_ADDR(1) |
1961						   R500_DY_S_SWIZ_R |
1962						   R500_DY_T_SWIZ_R |
1963						   R500_DY_R_SWIZ_R |
1964						   R500_DY_Q_SWIZ_R));
1965	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1966	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1967	} else {
1968	    BEGIN_RING(2*13);
1969	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1970	    /* tex inst for src texture */
1971	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1972						   R500_INST_TEX_SEM_WAIT |
1973						   R500_INST_RGB_WMASK_R |
1974						   R500_INST_RGB_WMASK_G |
1975						   R500_INST_RGB_WMASK_B |
1976						   R500_INST_ALPHA_WMASK |
1977						   R500_INST_RGB_CLAMP |
1978						   R500_INST_ALPHA_CLAMP));
1979
1980	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1981						   R500_TEX_INST_LD |
1982						   R500_TEX_SEM_ACQUIRE |
1983						   R500_TEX_IGNORE_UNCOVERED));
1984
1985	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1986						   R500_TEX_SRC_S_SWIZ_R |
1987						   R500_TEX_SRC_T_SWIZ_G |
1988						   R500_TEX_DST_ADDR(0) |
1989						   R500_TEX_DST_R_SWIZ_R |
1990						   R500_TEX_DST_G_SWIZ_G |
1991						   R500_TEX_DST_B_SWIZ_B |
1992						   R500_TEX_DST_A_SWIZ_A));
1993	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1994						   R500_DX_S_SWIZ_R |
1995						   R500_DX_T_SWIZ_R |
1996						   R500_DX_R_SWIZ_R |
1997						   R500_DX_Q_SWIZ_R |
1998						   R500_DY_ADDR(0) |
1999						   R500_DY_S_SWIZ_R |
2000						   R500_DY_T_SWIZ_R |
2001						   R500_DY_R_SWIZ_R |
2002						   R500_DY_Q_SWIZ_R));
2003	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2004	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2005	}
2006
2007	/* ALU inst */
2008	/* *_OMASK* - output component write mask */
2009	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
2010					       R500_INST_TEX_SEM_WAIT |
2011					       R500_INST_LAST |
2012					       R500_INST_RGB_OMASK_R |
2013					       R500_INST_RGB_OMASK_G |
2014					       R500_INST_RGB_OMASK_B |
2015					       R500_INST_ALPHA_OMASK |
2016					       R500_INST_RGB_CLAMP |
2017					       R500_INST_ALPHA_CLAMP));
2018	/* ALU inst
2019	 * temp addresses for texture inputs
2020	 * RGB_ADDR0 is src tex (temp 0)
2021	 * RGB_ADDR1 is mask tex (temp 1)
2022	 */
2023	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2024					       R500_RGB_ADDR1(1) |
2025					       R500_RGB_ADDR2(0)));
2026	/* ALU inst
2027	 * temp addresses for texture inputs
2028	 * ALPHA_ADDR0 is src tex (temp 0)
2029	 * ALPHA_ADDR1 is mask tex (temp 1)
2030	 */
2031	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2032					       R500_ALPHA_ADDR1(1) |
2033					       R500_ALPHA_ADDR2(0)));
2034
2035	/* R500_ALU_RGB_TARGET - RGB render target */
2036	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2037					       src_color |
2038					       R500_ALU_RGB_SEL_B_SRC1 |
2039					       mask_color |
2040					       R500_ALU_RGB_TARGET(0)));
2041
2042	/* R500_ALPHA_RGB_TARGET - alpha render target */
2043	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
2044					       R500_ALPHA_ADDRD(0) |
2045					       R500_ALPHA_SEL_A_SRC0 |
2046					       src_alpha |
2047					       R500_ALPHA_SEL_B_SRC1 |
2048					       mask_alpha |
2049					       R500_ALPHA_TARGET(0)));
2050
2051	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
2052					       R500_ALU_RGBA_ADDRD(0) |
2053					       R500_ALU_RGBA_R_SWIZ_0 |
2054					       R500_ALU_RGBA_G_SWIZ_0 |
2055					       R500_ALU_RGBA_B_SWIZ_0 |
2056					       R500_ALU_RGBA_A_SWIZ_0));
2057	ADVANCE_RING();
2058    }
2059
2060    /* Clear out scissoring */
2061    BEGIN_RING(2*2);
2062    if (IS_R300_3D) {
2063	OUT_RING_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
2064					 (1440 << R300_SCISSOR_Y_SHIFT)));
2065	OUT_RING_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2066					 ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
2067
2068    } else {
2069	OUT_RING_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
2070					 (0 << R300_SCISSOR_Y_SHIFT)));
2071	OUT_RING_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) |
2072					 ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT)));
2073    }
2074    ADVANCE_RING();
2075
2076
2077    BEGIN_ACCEL_RELOC(3, 2);
2078    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
2079    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst);
2080
2081    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
2082    OUT_RING_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
2083
2084    ADVANCE_RING();
2085
2086    BEGIN_RING(2*1);
2087    if (pMask)
2088	OUT_RING_REG(R300_VAP_VTX_SIZE, 6);
2089    else
2090	OUT_RING_REG(R300_VAP_VTX_SIZE, 4);
2091    ADVANCE_RING();
2092
2093    return TRUE;
2094}
2095
2096static void RadeonFinishComposite(PixmapPtr pDst)
2097{
2098    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2099
2100    ENTER_DRAW(0);
2101
2102    if (info->accel_state->draw_header) {
2103	if (info->ChipFamily < CHIP_FAMILY_R200) {
2104	    info->accel_state->draw_header[0] = CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2105							   info->accel_state->num_vtx *
2106							   info->accel_state->vtx_count + 1);
2107	    info->accel_state->draw_header[2] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2108						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2109						 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2110						 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2111						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2112	} else if (IS_R300_3D || IS_R500_3D) {
2113	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2114							   info->accel_state->num_vtx *
2115							   info->accel_state->vtx_count);
2116	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2117						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2118						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2119	} else {
2120	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2121							   info->accel_state->num_vtx *
2122							   info->accel_state->vtx_count);
2123	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2124						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2125						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2126	}
2127	info->accel_state->draw_header = NULL;
2128    }
2129
2130    if (IS_R300_3D || IS_R500_3D) {
2131	BEGIN_RING(2*3);
2132	OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA);
2133	OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2134    } else
2135	BEGIN_RING(2*1);
2136    OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2137    ADVANCE_RING();
2138
2139    LEAVE_DRAW(0);
2140}
2141
2142static void RadeonDoneComposite(PixmapPtr pDst)
2143{
2144    ScreenPtr pScreen = pDst->drawable.pScreen;
2145    RINFO_FROM_SCREEN(pScreen);
2146    struct radeon_accel_state *accel_state = info->accel_state;
2147
2148    RadeonFinishComposite(pDst);
2149
2150    if (!accel_state->src_pic->pDrawable)
2151	pScreen->DestroyPixmap(accel_state->src_pix);
2152
2153    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
2154	pScreen->DestroyPixmap(accel_state->msk_pix);
2155}
2156
2157#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2158do {								\
2159    OUT_RING(F_TO_DW(_dstX));						\
2160    OUT_RING(F_TO_DW(_dstY));						\
2161    OUT_RING(F_TO_DW(_srcX));						\
2162    OUT_RING(F_TO_DW(_srcY));						\
2163    OUT_RING(F_TO_DW(_maskX));						\
2164    OUT_RING(F_TO_DW(_maskY));						\
2165} while (0)
2166
2167#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2168do {								\
2169    OUT_RING(F_TO_DW(_dstX));						\
2170    OUT_RING(F_TO_DW(_dstY));						\
2171    OUT_RING(F_TO_DW(_srcX));						\
2172    OUT_RING(F_TO_DW(_srcY));						\
2173} while (0)
2174
2175static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2176{
2177    PictVector v;
2178    v.vector[0] = point->x;
2179    v.vector[1] = point->y;
2180    v.vector[2] = xFixed1;
2181    PictureTransformPoint(transform, &v);
2182    point->x = v.vector[0];
2183    point->y = v.vector[1];
2184}
2185
2186static void RadeonCompositeTile(ScrnInfoPtr pScrn,
2187					   RADEONInfoPtr info,
2188					   PixmapPtr pDst,
2189					   int srcX, int srcY,
2190					   int maskX, int maskY,
2191					   int dstX, int dstY,
2192					   int w, int h)
2193{
2194    int vtx_count;
2195    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2196    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2197
2198    ENTER_DRAW(0);
2199
2200    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2201       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2202
2203    if (CS_FULL(info->cs)) {
2204	RadeonFinishComposite(info->accel_state->dst_pix);
2205	radeon_cs_flush_indirect(pScrn);
2206	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
2207						 info->accel_state->src_pic,
2208						 info->accel_state->msk_pic,
2209						 info->accel_state->dst_pic,
2210						 info->accel_state->src_pix,
2211						 info->accel_state->msk_pix,
2212						 info->accel_state->dst_pix);
2213    }
2214
2215    srcTopLeft.x     = IntToxFixed(srcX);
2216    srcTopLeft.y     = IntToxFixed(srcY);
2217    srcTopRight.x    = IntToxFixed(srcX + w);
2218    srcTopRight.y    = IntToxFixed(srcY);
2219    srcBottomLeft.x  = IntToxFixed(srcX);
2220    srcBottomLeft.y  = IntToxFixed(srcY + h);
2221    srcBottomRight.x = IntToxFixed(srcX + w);
2222    srcBottomRight.y = IntToxFixed(srcY + h);
2223
2224    if (info->accel_state->is_transform[0]) {
2225	if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2226	    transformPoint(info->accel_state->transform[0], &srcTopLeft);
2227	    transformPoint(info->accel_state->transform[0], &srcTopRight);
2228	    transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2229	    transformPoint(info->accel_state->transform[0], &srcBottomRight);
2230	}
2231    }
2232
2233    if (info->accel_state->msk_pic) {
2234	maskTopLeft.x     = IntToxFixed(maskX);
2235	maskTopLeft.y     = IntToxFixed(maskY);
2236	maskTopRight.x    = IntToxFixed(maskX + w);
2237	maskTopRight.y    = IntToxFixed(maskY);
2238	maskBottomLeft.x  = IntToxFixed(maskX);
2239	maskBottomLeft.y  = IntToxFixed(maskY + h);
2240	maskBottomRight.x = IntToxFixed(maskX + w);
2241	maskBottomRight.y = IntToxFixed(maskY + h);
2242
2243	if (info->accel_state->is_transform[1]) {
2244	    if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2245		transformPoint(info->accel_state->transform[1], &maskTopLeft);
2246		transformPoint(info->accel_state->transform[1], &maskTopRight);
2247		transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2248		transformPoint(info->accel_state->transform[1], &maskBottomRight);
2249	    }
2250	}
2251
2252	vtx_count = 6;
2253    } else
2254	vtx_count = 4;
2255
2256    if (info->accel_state->vsync)
2257        RADEONWaitForVLine(pScrn, pDst,
2258			   radeon_pick_best_crtc(pScrn, FALSE, dstX, dstX + w, dstY, dstY + h),
2259			   dstY, dstY + h);
2260
2261    if (info->ChipFamily < CHIP_FAMILY_R200) {
2262	if (!info->accel_state->draw_header) {
2263	    BEGIN_RING(3);
2264
2265	    info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2266	    info->accel_state->num_vtx = 0;
2267	    info->accel_state->vtx_count = vtx_count;
2268
2269	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2270				3 * vtx_count + 1));
2271	    if (info->accel_state->msk_pic)
2272		OUT_RING(RADEON_CP_VC_FRMT_XY |
2273			 RADEON_CP_VC_FRMT_ST0 |
2274			 RADEON_CP_VC_FRMT_ST1);
2275	    else
2276		OUT_RING(RADEON_CP_VC_FRMT_XY |
2277			 RADEON_CP_VC_FRMT_ST0);
2278	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2279		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2280		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
2281		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2282		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2283	    ADVANCE_RING();
2284	}
2285
2286	info->accel_state->num_vtx += 3;
2287	BEGIN_RING(3 * vtx_count);
2288    } else if (IS_R300_3D || IS_R500_3D) {
2289	if (!info->accel_state->draw_header) {
2290	    BEGIN_RING(2);
2291
2292	    info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2293	    info->accel_state->num_vtx = 0;
2294	    info->accel_state->vtx_count = vtx_count;
2295
2296	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2297				4 * vtx_count));
2298	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2299		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2300		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2301	    ADVANCE_RING();
2302	}
2303
2304	info->accel_state->num_vtx += 4;
2305	BEGIN_RING(4 * vtx_count);
2306    } else {
2307	if (!info->accel_state->draw_header) {
2308	    BEGIN_RING(2);
2309
2310	    info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2311	    info->accel_state->num_vtx = 0;
2312	    info->accel_state->vtx_count = vtx_count;
2313
2314	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2315				3 * vtx_count));
2316	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2317		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2318		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2319	    ADVANCE_RING();
2320	}
2321
2322	info->accel_state->num_vtx += 3;
2323	BEGIN_RING(3 * vtx_count);
2324    }
2325
2326    if (info->accel_state->msk_pic) {
2327	if (IS_R300_3D || IS_R500_3D) {
2328	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2329			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2330			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2331	}
2332	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2333		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2334		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2335	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2336		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2337		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2338	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2339		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2340		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2341    } else {
2342	if (IS_R300_3D || IS_R500_3D) {
2343	    VTX_OUT((float)dstX,                                      (float)dstY,
2344		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2345	}
2346	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2347		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2348	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2349		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2350	VTX_OUT((float)(dstX + w),                                (float)dstY,
2351		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2352    }
2353
2354    ADVANCE_RING();
2355
2356    LEAVE_DRAW(0);
2357}
2358#undef VTX_OUT
2359#undef VTX_OUT_MASK
2360
2361static void RadeonComposite(PixmapPtr pDst,
2362				       int srcX, int srcY,
2363				       int maskX, int maskY,
2364				       int dstX, int dstY,
2365				       int width, int height)
2366{
2367    int tileSrcY, tileMaskY, tileDstY;
2368    int remainingHeight;
2369    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2370
2371    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2372	RadeonCompositeTile(pScrn,
2373				       info,
2374				       pDst,
2375				       srcX, srcY,
2376				       maskX, maskY,
2377				       dstX, dstY,
2378				       width, height);
2379	return;
2380    }
2381
2382    /* Tiling logic borrowed from exaFillRegionTiled */
2383
2384    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2385    tileMaskY = maskY;
2386    tileDstY = dstY;
2387
2388    remainingHeight = height;
2389    while (remainingHeight > 0) {
2390	int remainingWidth = width;
2391	int tileSrcX, tileMaskX, tileDstX;
2392	int h = info->accel_state->src_tile_height - tileSrcY;
2393
2394	if (h > remainingHeight)
2395	    h = remainingHeight;
2396	remainingHeight -= h;
2397
2398	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2399	tileMaskX = maskX;
2400	tileDstX = dstX;
2401
2402	while (remainingWidth > 0) {
2403	    int w = info->accel_state->src_tile_width - tileSrcX;
2404	    if (w > remainingWidth)
2405		w = remainingWidth;
2406	    remainingWidth -= w;
2407
2408	    RadeonCompositeTile(pScrn,
2409					   info,
2410					   pDst,
2411					   tileSrcX, tileSrcY,
2412					   tileMaskX, tileMaskY,
2413					   tileDstX, tileDstY,
2414					   w, h);
2415
2416	    tileSrcX = 0;
2417	    tileMaskX += w;
2418	    tileDstX += w;
2419	}
2420	tileSrcY = 0;
2421	tileMaskY += h;
2422	tileDstY += h;
2423    }
2424}
2425
2426