radeon_exa_render.c revision 209ff23f
1fe4c343aSmrg/*
2fe4c343aSmrg * Copyright 2005 Eric Anholt
3fe4c343aSmrg * Copyright 2005 Benjamin Herrenschmidt
4fe4c343aSmrg * All Rights Reserved.
5fe4c343aSmrg *
6fe4c343aSmrg * Permission is hereby granted, free of charge, to any person obtaining a
7fe4c343aSmrg * copy of this software and associated documentation files (the "Software"),
8fe4c343aSmrg * to deal in the Software without restriction, including without limitation
9fe4c343aSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10fe4c343aSmrg * and/or sell copies of the Software, and to permit persons to whom the
11fe4c343aSmrg * Software is furnished to do so, subject to the following conditions:
12fe4c343aSmrg *
13fe4c343aSmrg * The above copyright notice and this permission notice (including the next
14fe4c343aSmrg * paragraph) shall be included in all copies or substantial portions of the
15fe4c343aSmrg * Software.
16fe4c343aSmrg *
17fe4c343aSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18fe4c343aSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19fe4c343aSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20fe4c343aSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21fe4c343aSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59static Bool is_transform[2];
60static PictTransform *transform[2];
61static Bool has_mask;
62/* Whether we are tiling horizontally and vertically */
63static Bool need_src_tile_x;
64static Bool need_src_tile_y;
65/* Size of tiles ... set to 65536x65536 if not tiling in that direction */
66static Bool src_tile_width;
67static Bool src_tile_height;
68
69struct blendinfo {
70    Bool dst_alpha;
71    Bool src_alpha;
72    uint32_t blend_cntl;
73};
74
75static struct blendinfo RadeonBlendOp[] = {
76    /* Clear */
77    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
78    /* Src */
79    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
80    /* Dst */
81    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
82    /* Over */
83    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
84    /* OverReverse */
85    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
86    /* In */
87    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
88    /* InReverse */
89    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
90    /* Out */
91    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
92    /* OutReverse */
93    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
94    /* Atop */
95    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
96    /* AtopReverse */
97    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
98    /* Xor */
99    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
100    /* Add */
101    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
102};
103
104struct formatinfo {
105    int fmt;
106    uint32_t card_fmt;
107};
108
109/* Note on texture formats:
110 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
111 */
112static struct formatinfo R100TexFormats[] = {
113	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
114	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
115	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
116	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
117	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
118	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
119};
120
121static struct formatinfo R200TexFormats[] = {
122    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
123    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
124    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
125    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
126    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
127    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
128    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
129    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
130};
131
132static struct formatinfo R300TexFormats[] = {
133    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
134    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
135    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
136    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
137    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
138    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
139    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
140    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
141};
142
143/* Common Radeon setup code */
144
145static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
146{
147    switch (pDstPicture->format) {
148    case PICT_a8r8g8b8:
149    case PICT_x8r8g8b8:
150	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
151	break;
152    case PICT_r5g6b5:
153	*dst_format = RADEON_COLOR_FORMAT_RGB565;
154	break;
155    case PICT_a1r5g5b5:
156    case PICT_x1r5g5b5:
157	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
158	break;
159    case PICT_a8:
160	*dst_format = RADEON_COLOR_FORMAT_RGB8;
161	break;
162    default:
163	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
164			(int)pDstPicture->format));
165    }
166
167    return TRUE;
168}
169
170static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
171{
172    switch (pDstPicture->format) {
173    case PICT_a8r8g8b8:
174    case PICT_x8r8g8b8:
175	*dst_format = R300_COLORFORMAT_ARGB8888;
176	break;
177    case PICT_r5g6b5:
178	*dst_format = R300_COLORFORMAT_RGB565;
179	break;
180    case PICT_a1r5g5b5:
181    case PICT_x1r5g5b5:
182	*dst_format = R300_COLORFORMAT_ARGB1555;
183	break;
184    case PICT_a8:
185	*dst_format = R300_COLORFORMAT_I8;
186	break;
187    default:
188	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
189	       (int)pDstPicture->format));
190    }
191    return TRUE;
192}
193
194static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
195{
196    uint32_t sblend, dblend;
197
198    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
199    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
200
201    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
202     * it as always 1.
203     */
204    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
205	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
206	    sblend = RADEON_SRC_BLEND_GL_ONE;
207	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
208	    sblend = RADEON_SRC_BLEND_GL_ZERO;
209    }
210
211    /* If the source alpha is being used, then we should only be in a case where
212     * the source blend factor is 0, and the source blend value is the mask
213     * channels multiplied by the source picture's alpha.
214     */
215    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
216	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
217	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
218	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
219	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
220	}
221    }
222
223    return sblend | dblend;
224}
225
226union intfloat {
227    float f;
228    uint32_t i;
229};
230
231/* Check if we need a software-fallback because of a repeating
232 *   non-power-of-two texture.
233 *
234 * canTile: whether we can emulate a repeat by drawing in tiles:
235 *   possible for the source, but not for the mask. (Actually
236 *   we could do tiling for the mask too, but dealing with the
237 *   combination of a tiled mask and a tiled source would be
238 *   a lot of complexity, so we handle only the most common
239 *   case of a repeating mask.)
240 */
241static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
242{
243    int w = pPict->pDrawable->width;
244    int h = pPict->pDrawable->height;
245
246    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
247	!(!pPict->transform && canTile))
248	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
249			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
250
251    return TRUE;
252}
253
254/* Determine if the pitch of the pixmap meets the criteria for being
255 * used as a repeating texture: no padding or only a single line texture.
256 */
257static Bool RADEONPitchMatches(PixmapPtr pPix)
258{
259    int w = pPix->drawable.width;
260    int h = pPix->drawable.height;
261    uint32_t txpitch = exaGetPixmapPitch(pPix);
262
263    if (h > 1 && ((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch)
264	return FALSE;
265
266    return TRUE;
267}
268
269/* We can't turn on repeats normally for a non-power-of-two dimension,
270 * but if the source isn't transformed, we can get the same effect
271 * by drawing the image in multiple tiles. (A common case that it's
272 * important to get right is drawing a strip of a NPOTxPOT texture
273 * repeating in the POT direction. With tiling, this ends up as a
274 * a single tile on R300 and newer, which is perfect.)
275 *
276 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
277 *   one direction and POT in the other in the POT direction; on
278 *   older chips we can only repeat at all if the texture is POT in
279 *   both directions.
280 *
281 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
282 *   there is no padding in the texture. Textures with small POT widths
283 *   (1,2,4,8) thus can't be tiled.
284 */
285static Bool RADEONSetupSourceTile(PicturePtr pPict,
286				  PixmapPtr pPix,
287				  Bool canTile1d,
288				  Bool needMatchingPitch)
289{
290    need_src_tile_x = need_src_tile_y = FALSE;
291    src_tile_width = src_tile_height = 65536; /* "infinite" */
292
293    if (pPict->repeat) {
294	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
295
296	int w = pPict->pDrawable->width;
297	int h = pPict->pDrawable->height;
298
299	if (pPict->transform) {
300	    if (badPitch)
301		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
302				 w, (unsigned)exaGetPixmapPitch(pPix)));
303	} else {
304	    need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
305	    need_src_tile_y = (h & (h - 1)) != 0;
306
307	    if (!canTile1d)
308		need_src_tile_x = need_src_tile_y = need_src_tile_x || need_src_tile_y;
309	}
310
311	if (need_src_tile_x)
312	  src_tile_width = w;
313	if (need_src_tile_y)
314	  src_tile_height = h;
315    }
316
317    return TRUE;
318}
319
320/* R100-specific code */
321
322static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit)
323{
324    int w = pPict->pDrawable->width;
325    int h = pPict->pDrawable->height;
326    int i;
327
328    if ((w > 0x7ff) || (h > 0x7ff))
329	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
330
331    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
332	if (R100TexFormats[i].fmt == pPict->format)
333	    break;
334    }
335    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
336	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
337			(int)pPict->format));
338
339    if (!RADEONCheckTexturePOT(pPict, unit == 0))
340	return FALSE;
341
342    if (pPict->filter != PictFilterNearest &&
343	pPict->filter != PictFilterBilinear)
344    {
345	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
346    }
347
348    return TRUE;
349}
350
351#endif /* ONLY_ONCE */
352
353static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
354					int unit)
355{
356    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
357    uint32_t txfilter, txformat, txoffset, txpitch;
358    int w = pPict->pDrawable->width;
359    int h = pPict->pDrawable->height;
360    Bool repeat = pPict->repeat && !(unit == 0 && (need_src_tile_x || need_src_tile_y));
361    int i;
362    ACCEL_PREAMBLE();
363
364    txpitch = exaGetPixmapPitch(pPix);
365    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
366
367    if ((txoffset & 0x1f) != 0)
368	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
369    if ((txpitch & 0x1f) != 0)
370	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
371
372    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
373    {
374	if (R100TexFormats[i].fmt == pPict->format)
375	    break;
376    }
377    txformat = R100TexFormats[i].card_fmt;
378    if (RADEONPixmapIsColortiled(pPix))
379	txoffset |= RADEON_TXO_MACRO_TILE;
380
381    if (repeat) {
382	if (!RADEONPitchMatches(pPix))
383	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
384			     w, (unsigned)txpitch));
385
386	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
387	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
388    } else
389	txformat |= RADEON_TXFORMAT_NON_POWER2;
390    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
391
392    info->texW[unit] = 1;
393    info->texH[unit] = 1;
394
395    switch (pPict->filter) {
396    case PictFilterNearest:
397	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
398	break;
399    case PictFilterBilinear:
400	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
401	break;
402    default:
403	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
404    }
405
406    if (repeat)
407      txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
408
409    BEGIN_ACCEL(5);
410    if (unit == 0) {
411	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
412	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
413	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset);
414	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
415	    (pPix->drawable.width - 1) |
416	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
417	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
418    } else {
419	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
420	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
421	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset);
422	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
423	    (pPix->drawable.width - 1) |
424	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
425	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
426    }
427    FINISH_ACCEL();
428
429    if (pPict->transform != 0) {
430	is_transform[unit] = TRUE;
431	transform[unit] = pPict->transform;
432    } else {
433	is_transform[unit] = FALSE;
434    }
435
436    return TRUE;
437}
438
439#ifdef ONLY_ONCE
440
441static PixmapPtr
442RADEONGetDrawablePixmap(DrawablePtr pDrawable)
443{
444    if (pDrawable->type == DRAWABLE_WINDOW)
445	return pDrawable->pScreen->GetWindowPixmap((WindowPtr)pDrawable);
446    else
447	return (PixmapPtr)pDrawable;
448}
449
450static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
451			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
452{
453    PixmapPtr pSrcPixmap, pDstPixmap;
454    uint32_t tmp1;
455
456    /* Check for unsupported compositing operations. */
457    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
458	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
459
460    if (!pSrcPicture->pDrawable)
461	return FALSE;
462
463    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
464
465    if (pSrcPixmap->drawable.width >= 2048 ||
466	pSrcPixmap->drawable.height >= 2048) {
467	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
468			 pSrcPixmap->drawable.width,
469			 pSrcPixmap->drawable.height));
470    }
471
472    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
473
474    if (pDstPixmap->drawable.width >= 2048 ||
475	pDstPixmap->drawable.height >= 2048) {
476	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
477			 pDstPixmap->drawable.width,
478			 pDstPixmap->drawable.height));
479    }
480
481    if (pMaskPicture) {
482	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
483
484	if (pMaskPixmap->drawable.width >= 2048 ||
485	    pMaskPixmap->drawable.height >= 2048) {
486	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
487			     pMaskPixmap->drawable.width,
488			     pMaskPixmap->drawable.height));
489	}
490
491	if (pMaskPicture->componentAlpha) {
492	    /* Check if it's component alpha that relies on a source alpha and
493	     * on the source value.  We can only get one of those into the
494	     * single source value that we get to blend with.
495	     */
496	    if (RadeonBlendOp[op].src_alpha &&
497		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
498		RADEON_SRC_BLEND_GL_ZERO) {
499		RADEON_FALLBACK(("Component alpha not supported with source "
500				 "alpha and source value blending.\n"));
501	    }
502	}
503
504	if (!R100CheckCompositeTexture(pMaskPicture, 1))
505	    return FALSE;
506    }
507
508    if (!R100CheckCompositeTexture(pSrcPicture, 0))
509	return FALSE;
510
511    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
512	return FALSE;
513
514    return TRUE;
515}
516#endif /* ONLY_ONCE */
517
518static Bool FUNC_NAME(R100PrepareComposite)(int op,
519					    PicturePtr pSrcPicture,
520					    PicturePtr pMaskPicture,
521					    PicturePtr pDstPicture,
522					    PixmapPtr pSrc,
523					    PixmapPtr pMask,
524					    PixmapPtr pDst)
525{
526    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
527    uint32_t dst_format, dst_offset, dst_pitch, colorpitch;
528    uint32_t pp_cntl, blendcntl, cblend, ablend;
529    int pixel_shift;
530    ACCEL_PREAMBLE();
531
532    TRACE;
533
534    if (!info->XInited3D)
535	RADEONInit3DEngine(pScrn);
536
537    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
538	return FALSE;
539
540    if (pMask)
541	has_mask = TRUE;
542    else
543	has_mask = FALSE;
544
545    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
546
547    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
548    dst_pitch = exaGetPixmapPitch(pDst);
549    colorpitch = dst_pitch >> pixel_shift;
550    if (RADEONPixmapIsColortiled(pDst))
551	colorpitch |= RADEON_COLOR_TILE_ENABLE;
552
553    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
554    dst_pitch = exaGetPixmapPitch(pDst);
555    if ((dst_offset & 0x0f) != 0)
556	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
557    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
558	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
559
560    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
561	return FALSE;
562
563    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
564	return FALSE;
565    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
566
567    if (pMask != NULL) {
568	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
569	    return FALSE;
570	pp_cntl |= RADEON_TEX_1_ENABLE;
571    } else {
572	is_transform[1] = FALSE;
573    }
574
575    RADEON_SWITCH_TO_3D();
576
577    BEGIN_ACCEL(8);
578    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
579    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
580    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
581    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
582
583    /* IN operator: Multiply src by mask components or mask alpha.
584     * BLEND_CTL_ADD is A * B + C.
585     * If a source is a8, we have to explicitly zero its color values.
586     * If the destination is a8, we have to route the alpha to red, I think.
587     * If we're doing component alpha where the source for blending is going to
588     * be the source alpha (and there's no source value used), we have to zero
589     * the source's color values.
590     */
591    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
592    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
593
594    if (pDstPicture->format == PICT_a8 ||
595	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
596    {
597	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
598    } else if (pSrcPicture->format == PICT_a8)
599	cblend |= RADEON_COLOR_ARG_A_ZERO;
600    else
601	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
602    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
603
604    if (pMask) {
605	if (pMaskPicture->componentAlpha &&
606	    pDstPicture->format != PICT_a8)
607	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
608	else
609	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
610	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
611    } else {
612	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
613	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
614    }
615
616    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
617    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
618    if (pMask)
619	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
620					  RADEON_SE_VTX_FMT_ST0 |
621					  RADEON_SE_VTX_FMT_ST1));
622    else
623	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
624					  RADEON_SE_VTX_FMT_ST0));
625    /* Op operator. */
626    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
627
628    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
629    FINISH_ACCEL();
630
631    return TRUE;
632}
633
634#ifdef ONLY_ONCE
635
636static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit)
637{
638    int w = pPict->pDrawable->width;
639    int h = pPict->pDrawable->height;
640    int i;
641
642    if ((w > 0x7ff) || (h > 0x7ff))
643	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
644
645    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
646    {
647	if (R200TexFormats[i].fmt == pPict->format)
648	    break;
649    }
650    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
651	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
652			 (int)pPict->format));
653
654    if (!RADEONCheckTexturePOT(pPict, unit == 0))
655	return FALSE;
656
657    if (pPict->filter != PictFilterNearest &&
658	pPict->filter != PictFilterBilinear)
659	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
660
661    return TRUE;
662}
663
664#endif /* ONLY_ONCE */
665
666static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
667					int unit)
668{
669    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
670    uint32_t txfilter, txformat, txoffset, txpitch;
671    int w = pPict->pDrawable->width;
672    int h = pPict->pDrawable->height;
673    Bool repeat = pPict->repeat && !(unit == 0 && (need_src_tile_x || need_src_tile_y));
674    int i;
675    ACCEL_PREAMBLE();
676
677    txpitch = exaGetPixmapPitch(pPix);
678    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
679
680    if ((txoffset & 0x1f) != 0)
681	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
682    if ((txpitch & 0x1f) != 0)
683	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
684
685    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
686    {
687	if (R200TexFormats[i].fmt == pPict->format)
688	    break;
689    }
690    txformat = R200TexFormats[i].card_fmt;
691    if (RADEONPixmapIsColortiled(pPix))
692	txoffset |= R200_TXO_MACRO_TILE;
693
694    if (repeat) {
695	if (!RADEONPitchMatches(pPix))
696	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
697			     w, (unsigned)txpitch));
698
699	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
700	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
701    } else
702	txformat |= R200_TXFORMAT_NON_POWER2;
703    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
704
705    info->texW[unit] = w;
706    info->texH[unit] = h;
707
708    switch (pPict->filter) {
709    case PictFilterNearest:
710	txfilter = (R200_MAG_FILTER_NEAREST |
711		    R200_MIN_FILTER_NEAREST);
712	break;
713    case PictFilterBilinear:
714	txfilter = (R200_MAG_FILTER_LINEAR |
715		    R200_MIN_FILTER_LINEAR);
716	break;
717    default:
718	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
719    }
720
721    if (repeat)
722      txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
723
724    BEGIN_ACCEL(6);
725    if (unit == 0) {
726	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
727	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
728	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
729	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
730		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
731	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
732	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset);
733    } else {
734	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
735	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
736	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
737	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
738		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
739	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
740	OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset);
741    }
742    FINISH_ACCEL();
743
744    if (pPict->transform != 0) {
745	is_transform[unit] = TRUE;
746	transform[unit] = pPict->transform;
747    } else {
748	is_transform[unit] = FALSE;
749    }
750
751    return TRUE;
752}
753
754#ifdef ONLY_ONCE
755static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
756			       PicturePtr pDstPicture)
757{
758    PixmapPtr pSrcPixmap, pDstPixmap;
759    uint32_t tmp1;
760
761    TRACE;
762
763    if (!pSrcPicture->pDrawable)
764	return FALSE;
765
766    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
767
768    if (pSrcPixmap->drawable.width >= 2048 ||
769	pSrcPixmap->drawable.height >= 2048) {
770	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
771			 pSrcPixmap->drawable.width,
772			 pSrcPixmap->drawable.height));
773    }
774
775    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
776
777    if (pDstPixmap->drawable.width >= 2048 ||
778	pDstPixmap->drawable.height >= 2048) {
779	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
780			 pDstPixmap->drawable.width,
781			 pDstPixmap->drawable.height));
782    }
783
784    if (pMaskPicture) {
785	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
786
787	if (pMaskPixmap->drawable.width >= 2048 ||
788	    pMaskPixmap->drawable.height >= 2048) {
789	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
790			     pMaskPixmap->drawable.width,
791			     pMaskPixmap->drawable.height));
792	}
793
794	if (pMaskPicture->componentAlpha) {
795	    /* Check if it's component alpha that relies on a source alpha and
796	     * on the source value.  We can only get one of those into the
797	     * single source value that we get to blend with.
798	     */
799	    if (RadeonBlendOp[op].src_alpha &&
800		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
801		RADEON_SRC_BLEND_GL_ZERO) {
802		RADEON_FALLBACK(("Component alpha not supported with source "
803				 "alpha and source value blending.\n"));
804	    }
805	}
806
807	if (!R200CheckCompositeTexture(pMaskPicture, 1))
808	    return FALSE;
809    }
810
811    if (!R200CheckCompositeTexture(pSrcPicture, 0))
812	return FALSE;
813
814    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
815	return FALSE;
816
817    return TRUE;
818}
819#endif /* ONLY_ONCE */
820
821static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
822				PicturePtr pMaskPicture, PicturePtr pDstPicture,
823				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
824{
825    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
826    uint32_t dst_format, dst_offset, dst_pitch;
827    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
828    int pixel_shift;
829    ACCEL_PREAMBLE();
830
831    TRACE;
832
833    if (!info->XInited3D)
834	RADEONInit3DEngine(pScrn);
835
836    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
837	return FALSE;
838
839    if (pMask)
840	has_mask = TRUE;
841    else
842	has_mask = FALSE;
843
844    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
845
846    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
847    dst_pitch = exaGetPixmapPitch(pDst);
848    colorpitch = dst_pitch >> pixel_shift;
849    if (RADEONPixmapIsColortiled(pDst))
850	colorpitch |= RADEON_COLOR_TILE_ENABLE;
851
852    if ((dst_offset & 0x0f) != 0)
853	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
854    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
855	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
856
857    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
858	return FALSE;
859
860    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
861	return FALSE;
862    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
863
864    if (pMask != NULL) {
865	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
866	    return FALSE;
867	pp_cntl |= RADEON_TEX_1_ENABLE;
868    } else {
869	is_transform[1] = FALSE;
870    }
871
872    RADEON_SWITCH_TO_3D();
873
874    BEGIN_ACCEL(11);
875
876    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
877    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
878    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
879
880    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
881    if (pMask)
882	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
883		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
884		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
885    else
886	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
887		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
888
889    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
890
891    /* IN operator: Multiply src by mask components or mask alpha.
892     * BLEND_CTL_ADD is A * B + C.
893     * If a picture is a8, we have to explicitly zero its color values.
894     * If the destination is a8, we have to route the alpha to red, I think.
895     * If we're doing component alpha where the source for blending is going to
896     * be the source alpha (and there's no source value used), we have to zero
897     * the source's color values.
898     */
899    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
900    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
901
902    if (pDstPicture->format == PICT_a8 ||
903	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
904    {
905	cblend |= R200_TXC_ARG_A_R0_ALPHA;
906    } else if (pSrcPicture->format == PICT_a8)
907	cblend |= R200_TXC_ARG_A_ZERO;
908    else
909	cblend |= R200_TXC_ARG_A_R0_COLOR;
910    ablend |= R200_TXA_ARG_A_R0_ALPHA;
911
912    if (pMask) {
913	if (pMaskPicture->componentAlpha &&
914	    pDstPicture->format != PICT_a8)
915	    cblend |= R200_TXC_ARG_B_R1_COLOR;
916	else
917	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
918	ablend |= R200_TXA_ARG_B_R1_ALPHA;
919    } else {
920	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
921	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
922    }
923
924    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
925    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
926	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
927    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
928    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
929	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
930
931    /* Op operator. */
932    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
933    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
934    FINISH_ACCEL();
935
936    return TRUE;
937}
938
939#ifdef ONLY_ONCE
940
941static Bool R300CheckCompositeTexture(PicturePtr pPict,
942				      PicturePtr pDstPict,
943				      int op,
944				      int unit,
945				      Bool is_r500)
946{
947    int w = pPict->pDrawable->width;
948    int h = pPict->pDrawable->height;
949    int i;
950    int max_tex_w, max_tex_h;
951
952    if (is_r500) {
953	max_tex_w = 4096;
954	max_tex_h = 4096;
955    } else {
956	max_tex_w = 2048;
957	max_tex_h = 2048;
958    }
959
960    if ((w > max_tex_w) || (h > max_tex_h))
961	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
962
963    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
964    {
965	if (R300TexFormats[i].fmt == pPict->format)
966	    break;
967    }
968    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
969	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
970			 (int)pPict->format));
971
972    if (!RADEONCheckTexturePOT(pPict, unit == 0))
973	return FALSE;
974
975    if (pPict->filter != PictFilterNearest &&
976	pPict->filter != PictFilterBilinear)
977	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
978
979    /* for REPEAT_NONE, Render semantics are that sampling outside the source
980     * picture results in alpha=0 pixels. We can implement this with a border color
981     * *if* our source texture has an alpha channel, otherwise we need to fall
982     * back. If we're not transformed then we hope that upper layers have clipped
983     * rendering to the bounds of the source drawable, in which case it doesn't
984     * matter. I have not, however, verified that the X server always does such
985     * clipping.
986     */
987    if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
988	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
989	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
990    }
991
992    return TRUE;
993}
994
995#endif /* ONLY_ONCE */
996
997static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
998					int unit)
999{
1000    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1001    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
1002    int w = pPict->pDrawable->width;
1003    int h = pPict->pDrawable->height;
1004    int i, pixel_shift;
1005    ACCEL_PREAMBLE();
1006
1007    TRACE;
1008
1009    txpitch = exaGetPixmapPitch(pPix);
1010    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
1011
1012    if ((txoffset & 0x1f) != 0)
1013	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
1014    if ((txpitch & 0x1f) != 0)
1015	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1016
1017    /* TXPITCH = pixels (texels) per line - 1 */
1018    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1019    txpitch >>= pixel_shift;
1020    txpitch -= 1;
1021
1022    if (RADEONPixmapIsColortiled(pPix))
1023	txoffset |= R300_MACRO_TILE;
1024
1025    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1026    {
1027	if (R300TexFormats[i].fmt == pPict->format)
1028	    break;
1029    }
1030
1031    txformat1 = R300TexFormats[i].card_fmt;
1032
1033    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1034		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1035
1036    if (IS_R500_3D && ((w - 1) & 0x800))
1037	txpitch |= R500_TXWIDTH_11;
1038
1039    if (IS_R500_3D && ((h - 1) & 0x800))
1040	txpitch |= R500_TXHEIGHT_11;
1041
1042    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1043     * omit this if there is no padding, but there is no apparent advantage
1044     * in doing so.
1045     */
1046    txformat0 |= R300_TXPITCH_EN;
1047
1048    info->texW[unit] = w;
1049    info->texH[unit] = h;
1050
1051    if (pPict->repeat && !(unit == 0 && need_src_tile_x))
1052      txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1053    else
1054      txfilter = R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1055
1056    if (pPict->repeat && !(unit == 0 && need_src_tile_y))
1057      txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1058    else
1059      txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1060
1061    txfilter |= (unit << R300_TX_ID_SHIFT);
1062
1063    switch (pPict->filter) {
1064    case PictFilterNearest:
1065	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1066	break;
1067    case PictFilterBilinear:
1068	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1069	break;
1070    default:
1071	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1072    }
1073
1074    BEGIN_ACCEL(pPict->repeat ? 6 : 7);
1075    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1076    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1077    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1078    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1079    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1080    OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset);
1081    if (!pPict->repeat)
1082	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1083    FINISH_ACCEL();
1084
1085    if (pPict->transform != 0) {
1086	is_transform[unit] = TRUE;
1087	transform[unit] = pPict->transform;
1088    } else {
1089	is_transform[unit] = FALSE;
1090    }
1091
1092    return TRUE;
1093}
1094
1095#ifdef ONLY_ONCE
1096
1097static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1098			       PicturePtr pDstPicture)
1099{
1100    uint32_t tmp1;
1101    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1102    PixmapPtr pSrcPixmap, pDstPixmap;
1103    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1104    RADEONInfoPtr info = RADEONPTR(pScrn);
1105    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1106
1107    TRACE;
1108
1109    /* Check for unsupported compositing operations. */
1110    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1111	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1112
1113    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1114
1115    if (IS_R500_3D) {
1116	max_tex_w = 4096;
1117	max_tex_h = 4096;
1118	max_dst_w = 4096;
1119	max_dst_h = 4096;
1120    } else {
1121	max_tex_w = 2048;
1122	max_tex_h = 2048;
1123	max_dst_w = 2560;
1124	max_dst_h = 2560;
1125    }
1126
1127    if (pSrcPixmap->drawable.width >= max_tex_w ||
1128	pSrcPixmap->drawable.height >= max_tex_h) {
1129	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1130			 pSrcPixmap->drawable.width,
1131			 pSrcPixmap->drawable.height));
1132    }
1133
1134    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1135
1136    if (pDstPixmap->drawable.width >= max_dst_w ||
1137	pDstPixmap->drawable.height >= max_dst_h) {
1138	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1139			 pDstPixmap->drawable.width,
1140			 pDstPixmap->drawable.height));
1141    }
1142
1143    if (pMaskPicture) {
1144	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1145
1146	if (pMaskPixmap->drawable.width >= max_tex_w ||
1147	    pMaskPixmap->drawable.height >= max_tex_h) {
1148	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1149			     pMaskPixmap->drawable.width,
1150			     pMaskPixmap->drawable.height));
1151	}
1152
1153	if (pMaskPicture->componentAlpha) {
1154	    /* Check if it's component alpha that relies on a source alpha and
1155	     * on the source value.  We can only get one of those into the
1156	     * single source value that we get to blend with.
1157	     */
1158	    if (RadeonBlendOp[op].src_alpha &&
1159		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1160		RADEON_SRC_BLEND_GL_ZERO) {
1161		RADEON_FALLBACK(("Component alpha not supported with source "
1162				 "alpha and source value blending.\n"));
1163	    }
1164	}
1165
1166	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1167	    return FALSE;
1168    }
1169
1170    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1171	return FALSE;
1172
1173    if (!R300GetDestFormat(pDstPicture, &tmp1))
1174	return FALSE;
1175
1176    return TRUE;
1177
1178}
1179#endif /* ONLY_ONCE */
1180
1181static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1182				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1183				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1184{
1185    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1186    uint32_t dst_format, dst_offset, dst_pitch;
1187    uint32_t txenable, colorpitch;
1188    uint32_t blendcntl;
1189    int pixel_shift;
1190    ACCEL_PREAMBLE();
1191
1192    TRACE;
1193
1194    if (!info->XInited3D)
1195	RADEONInit3DEngine(pScrn);
1196
1197    if (!R300GetDestFormat(pDstPicture, &dst_format))
1198	return FALSE;
1199
1200    if (pMask)
1201	has_mask = TRUE;
1202    else
1203	has_mask = FALSE;
1204
1205    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1206
1207    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1208    dst_pitch = exaGetPixmapPitch(pDst);
1209    colorpitch = dst_pitch >> pixel_shift;
1210
1211    if (RADEONPixmapIsColortiled(pDst))
1212	colorpitch |= R300_COLORTILE;
1213
1214    colorpitch |= dst_format;
1215
1216    if ((dst_offset & 0x0f) != 0)
1217	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
1218    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1219	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1220
1221    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1222	return FALSE;
1223
1224    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1225	return FALSE;
1226    txenable = R300_TEX_0_ENABLE;
1227
1228    if (pMask != NULL) {
1229	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1230	    return FALSE;
1231	txenable |= R300_TEX_1_ENABLE;
1232    } else {
1233	is_transform[1] = FALSE;
1234    }
1235
1236    RADEON_SWITCH_TO_3D();
1237
1238    /* setup the VAP */
1239    if (info->has_tcl) {
1240	if (pMask)
1241	    BEGIN_ACCEL(8);
1242	else
1243	    BEGIN_ACCEL(7);
1244    } else {
1245	if (pMask)
1246	    BEGIN_ACCEL(6);
1247	else
1248	    BEGIN_ACCEL(5);
1249    }
1250
1251    /* These registers define the number, type, and location of data submitted
1252     * to the PVS unit of GA input (when PVS is disabled)
1253     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1254     * enabled.  This memory provides the imputs to the vertex shader program
1255     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1256     * directly to the GA input memory and the order is signifigant.  In
1257     * PVS_BYPASS mode the order is as follows:
1258     * Position
1259     * Point Size
1260     * Color 0-3
1261     * Textures 0-7
1262     * Fog
1263     */
1264    if (pMask) {
1265	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1266		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1267		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1268		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1269		       R300_SIGNED_0 |
1270		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1271		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1272		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1273		       R300_SIGNED_1));
1274	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1275		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1276		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1277		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1278		       R300_LAST_VEC_2 |
1279		       R300_SIGNED_2));
1280    } else
1281	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1282		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1283		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1284		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1285		       R300_SIGNED_0 |
1286		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1287		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1288		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1289		       R300_LAST_VEC_1 |
1290		       R300_SIGNED_1));
1291
1292    /* load the vertex shader
1293     * We pre-load vertex programs in RADEONInit3DEngine():
1294     * - exa no mask
1295     * - exa mask
1296     * - Xv
1297     * Here we select the offset of the vertex program we want to use
1298     */
1299    if (info->has_tcl) {
1300	if (pMask) {
1301	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1302			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1303			   (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1304			   (2 << R300_PVS_LAST_INST_SHIFT)));
1305	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1306			  (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1307	} else {
1308	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1309			  ((3 << R300_PVS_FIRST_INST_SHIFT) |
1310			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1311			   (4 << R300_PVS_LAST_INST_SHIFT)));
1312	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1313			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1314	}
1315    }
1316
1317    /* Position and one or two sets of 2 texture coordinates */
1318    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1319    if (pMask)
1320	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1321		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1322		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1323    else
1324	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1325		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1326
1327    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1328    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1329    FINISH_ACCEL();
1330
1331    /* setup pixel shader */
1332    if (IS_R300_3D) {
1333	uint32_t output_fmt;
1334	int src_color, src_alpha;
1335	int mask_color, mask_alpha;
1336
1337	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1338	    src_color = R300_ALU_RGB_0_0;
1339	else
1340	    src_color = R300_ALU_RGB_SRC0_RGB;
1341
1342	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1343	    src_alpha = R300_ALU_ALPHA_1_0;
1344	else
1345	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1346
1347	if (pMask && pMaskPicture->componentAlpha) {
1348	    if (RadeonBlendOp[op].src_alpha) {
1349		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
1350		    src_color = R300_ALU_RGB_1_0;
1351		    src_alpha = R300_ALU_ALPHA_1_0;
1352		} else {
1353		    src_color = R300_ALU_RGB_SRC0_AAA;
1354		    src_alpha = R300_ALU_ALPHA_SRC0_A;
1355		}
1356
1357		mask_color = R300_ALU_RGB_SRC1_RGB;
1358
1359		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1360		    mask_alpha = R300_ALU_ALPHA_1_0;
1361		else
1362		    mask_alpha = R300_ALU_ALPHA_SRC1_A;
1363
1364	    } else {
1365		src_color = R300_ALU_RGB_SRC0_RGB;
1366
1367		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1368		    src_alpha = R300_ALU_ALPHA_1_0;
1369		else
1370		    src_alpha = R300_ALU_ALPHA_SRC0_A;
1371
1372		mask_color = R300_ALU_RGB_SRC1_RGB;
1373
1374		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1375		    mask_alpha = R300_ALU_ALPHA_1_0;
1376		else
1377		    mask_alpha = R300_ALU_ALPHA_SRC1_A;
1378
1379	    }
1380	} else if (pMask) {
1381	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1382		mask_color = R300_ALU_RGB_1_0;
1383	    else
1384		mask_color = R300_ALU_RGB_SRC1_AAA;
1385
1386	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1387		mask_alpha = R300_ALU_ALPHA_1_0;
1388	    else
1389		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1390	} else {
1391	    mask_color = R300_ALU_RGB_1_0;
1392	    mask_alpha = R300_ALU_ALPHA_1_0;
1393	}
1394
1395	/* shader output swizzling */
1396	switch (pDstPicture->format) {
1397	case PICT_a8r8g8b8:
1398	case PICT_x8r8g8b8:
1399	default:
1400	    output_fmt = (R300_OUT_FMT_C4_8 |
1401			  R300_OUT_FMT_C0_SEL_BLUE |
1402			  R300_OUT_FMT_C1_SEL_GREEN |
1403			  R300_OUT_FMT_C2_SEL_RED |
1404			  R300_OUT_FMT_C3_SEL_ALPHA);
1405	    break;
1406	case PICT_a8b8g8r8:
1407	case PICT_x8b8g8r8:
1408	    output_fmt = (R300_OUT_FMT_C4_8 |
1409			  R300_OUT_FMT_C0_SEL_RED |
1410			  R300_OUT_FMT_C1_SEL_GREEN |
1411			  R300_OUT_FMT_C2_SEL_BLUE |
1412			  R300_OUT_FMT_C3_SEL_ALPHA);
1413	    break;
1414	case PICT_a8:
1415	    output_fmt = (R300_OUT_FMT_C4_8 |
1416			  R300_OUT_FMT_C0_SEL_ALPHA);
1417	    break;
1418	}
1419
1420
1421	/* setup the rasterizer, load FS */
1422	BEGIN_ACCEL(9);
1423	if (pMask) {
1424	    /* 4 components: 2 for tex0, 2 for tex1 */
1425	    OUT_ACCEL_REG(R300_RS_COUNT,
1426			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1427			   R300_RS_COUNT_HIRES_EN));
1428
1429	    /* R300_INST_COUNT_RS - highest RS instruction used */
1430	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
1431
1432	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1433						R300_ALU_CODE_SIZE(0) |
1434						R300_TEX_CODE_OFFSET(0) |
1435						R300_TEX_CODE_SIZE(1)));
1436
1437	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1438			  (R300_ALU_START(0) |
1439			   R300_ALU_SIZE(0) |
1440			   R300_TEX_START(0) |
1441			   R300_TEX_SIZE(1) |
1442			   R300_RGBA_OUT));
1443	} else {
1444	    /* 2 components: 2 for tex0 */
1445	    OUT_ACCEL_REG(R300_RS_COUNT,
1446			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1447			   R300_RS_COUNT_HIRES_EN));
1448
1449	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
1450
1451	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1452						R300_ALU_CODE_SIZE(0) |
1453						R300_TEX_CODE_OFFSET(0) |
1454						R300_TEX_CODE_SIZE(0)));
1455
1456	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1457			  (R300_ALU_START(0) |
1458			   R300_ALU_SIZE(0) |
1459			   R300_TEX_START(0) |
1460			   R300_TEX_SIZE(0) |
1461			   R300_RGBA_OUT));
1462	}
1463
1464	/* shader output swizzling */
1465	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1466
1467	/* tex inst for src texture is pre-loaded in RADEONInit3DEngine() */
1468	/* tex inst for mask texture is pre-loaded in RADEONInit3DEngine() */
1469
1470	/* RGB inst
1471	 * temp addresses for texture inputs
1472	 * ALU_RGB_ADDR0 is src tex (temp 0)
1473	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1474	 * R300_ALU_RGB_OMASK - output components to write
1475	 * R300_ALU_RGB_TARGET_A - render target
1476	 */
1477	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0,
1478		      (R300_ALU_RGB_ADDR0(0) |
1479		       R300_ALU_RGB_ADDR1(1) |
1480		       R300_ALU_RGB_ADDR2(0) |
1481		       R300_ALU_RGB_ADDRD(0) |
1482		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1483					   R300_ALU_RGB_MASK_G |
1484					   R300_ALU_RGB_MASK_B)) |
1485		       R300_ALU_RGB_TARGET_A));
1486	/* RGB inst
1487	 * ALU operation
1488	 */
1489	OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0,
1490		      (R300_ALU_RGB_SEL_A(src_color) |
1491		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1492		       R300_ALU_RGB_SEL_B(mask_color) |
1493		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1494		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1495		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1496		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1497		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1498		       R300_ALU_RGB_CLAMP));
1499	/* Alpha inst
1500	 * temp addresses for texture inputs
1501	 * ALU_ALPHA_ADDR0 is src tex (0)
1502	 * ALU_ALPHA_ADDR1 is mask tex (1)
1503	 * R300_ALU_ALPHA_OMASK - output components to write
1504	 * R300_ALU_ALPHA_TARGET_A - render target
1505	 */
1506	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0,
1507		      (R300_ALU_ALPHA_ADDR0(0) |
1508		       R300_ALU_ALPHA_ADDR1(1) |
1509		       R300_ALU_ALPHA_ADDR2(0) |
1510		       R300_ALU_ALPHA_ADDRD(0) |
1511		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1512		       R300_ALU_ALPHA_TARGET_A |
1513		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1514	/* Alpha inst
1515	 * ALU operation
1516	 */
1517	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0,
1518		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1519		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1520		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1521		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1522		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1523		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1524		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1525		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1526		       R300_ALU_ALPHA_CLAMP));
1527	FINISH_ACCEL();
1528    } else {
1529	uint32_t output_fmt;
1530	uint32_t src_color, src_alpha;
1531	uint32_t mask_color, mask_alpha;
1532
1533	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1534	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1535			 R500_ALU_RGB_G_SWIZ_A_0 |
1536			 R500_ALU_RGB_B_SWIZ_A_0);
1537	else
1538	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1539			 R500_ALU_RGB_G_SWIZ_A_G |
1540			 R500_ALU_RGB_B_SWIZ_A_B);
1541
1542	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1543	    src_alpha = R500_ALPHA_SWIZ_A_1;
1544	else
1545	    src_alpha = R500_ALPHA_SWIZ_A_A;
1546
1547	if (pMask && pMaskPicture->componentAlpha) {
1548	    if (RadeonBlendOp[op].src_alpha) {
1549		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
1550		    src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1551				 R500_ALU_RGB_G_SWIZ_A_1 |
1552				 R500_ALU_RGB_B_SWIZ_A_1);
1553		    src_alpha = R500_ALPHA_SWIZ_A_1;
1554		} else {
1555		    src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1556				 R500_ALU_RGB_G_SWIZ_A_A |
1557				 R500_ALU_RGB_B_SWIZ_A_A);
1558		    src_alpha = R500_ALPHA_SWIZ_A_A;
1559		}
1560
1561		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1562			      R500_ALU_RGB_G_SWIZ_B_G |
1563			      R500_ALU_RGB_B_SWIZ_B_B);
1564
1565		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1566		    mask_alpha = R500_ALPHA_SWIZ_B_1;
1567		else
1568		    mask_alpha = R500_ALPHA_SWIZ_B_A;
1569
1570	    } else {
1571		src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1572			     R500_ALU_RGB_G_SWIZ_A_G |
1573			     R500_ALU_RGB_B_SWIZ_A_B);
1574
1575		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1576		    src_alpha = R500_ALPHA_SWIZ_A_1;
1577		else
1578		    src_alpha = R500_ALPHA_SWIZ_A_A;
1579
1580		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1581			      R500_ALU_RGB_G_SWIZ_B_G |
1582			      R500_ALU_RGB_B_SWIZ_B_B);
1583
1584		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1585		    mask_alpha = R500_ALPHA_SWIZ_B_1;
1586		else
1587		    mask_alpha = R500_ALPHA_SWIZ_B_A;
1588
1589	    }
1590	} else if (pMask) {
1591	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1592		mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1593			      R500_ALU_RGB_G_SWIZ_B_1 |
1594			      R500_ALU_RGB_B_SWIZ_B_1);
1595	    else
1596		mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1597			      R500_ALU_RGB_G_SWIZ_B_A |
1598			      R500_ALU_RGB_B_SWIZ_B_A);
1599
1600	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1601		mask_alpha = R500_ALPHA_SWIZ_B_1;
1602	    else
1603		mask_alpha = R500_ALPHA_SWIZ_B_A;
1604	} else {
1605	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1606			  R500_ALU_RGB_G_SWIZ_B_1 |
1607			  R500_ALU_RGB_B_SWIZ_B_1);
1608	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1609	}
1610
1611	/* shader output swizzling */
1612	switch (pDstPicture->format) {
1613	case PICT_a8r8g8b8:
1614	case PICT_x8r8g8b8:
1615	default:
1616	    output_fmt = (R300_OUT_FMT_C4_8 |
1617			  R300_OUT_FMT_C0_SEL_BLUE |
1618			  R300_OUT_FMT_C1_SEL_GREEN |
1619			  R300_OUT_FMT_C2_SEL_RED |
1620			  R300_OUT_FMT_C3_SEL_ALPHA);
1621	    break;
1622	case PICT_a8b8g8r8:
1623	case PICT_x8b8g8r8:
1624	    output_fmt = (R300_OUT_FMT_C4_8 |
1625			  R300_OUT_FMT_C0_SEL_RED |
1626			  R300_OUT_FMT_C1_SEL_GREEN |
1627			  R300_OUT_FMT_C2_SEL_BLUE |
1628			  R300_OUT_FMT_C3_SEL_ALPHA);
1629	    break;
1630	case PICT_a8:
1631	    output_fmt = (R300_OUT_FMT_C4_8 |
1632			  R300_OUT_FMT_C0_SEL_ALPHA);
1633	    break;
1634	}
1635
1636	BEGIN_ACCEL(6);
1637	if (pMask) {
1638	    /* 4 components: 2 for tex0, 2 for tex1 */
1639	    OUT_ACCEL_REG(R300_RS_COUNT,
1640			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1641			   R300_RS_COUNT_HIRES_EN));
1642
1643	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1644	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
1645
1646	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1647					      R500_US_CODE_END_ADDR(2)));
1648	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1649					       R500_US_CODE_RANGE_SIZE(2)));
1650	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1651	} else {
1652	    OUT_ACCEL_REG(R300_RS_COUNT,
1653			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1654			   R300_RS_COUNT_HIRES_EN));
1655
1656	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
1657
1658	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1659					      R500_US_CODE_END_ADDR(1)));
1660	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1661					       R500_US_CODE_RANGE_SIZE(1)));
1662	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1663	}
1664
1665	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1666	FINISH_ACCEL();
1667
1668	if (pMask) {
1669	    BEGIN_ACCEL(19);
1670	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0);
1671	    /* tex inst for src texture */
1672	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1673						   R500_INST_RGB_WMASK_R |
1674						   R500_INST_RGB_WMASK_G |
1675						   R500_INST_RGB_WMASK_B |
1676						   R500_INST_ALPHA_WMASK |
1677						   R500_INST_RGB_CLAMP |
1678						   R500_INST_ALPHA_CLAMP));
1679
1680	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1681						   R500_TEX_INST_LD |
1682						   R500_TEX_IGNORE_UNCOVERED));
1683
1684	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1685						   R500_TEX_SRC_S_SWIZ_R |
1686						   R500_TEX_SRC_T_SWIZ_G |
1687						   R500_TEX_DST_ADDR(0) |
1688						   R500_TEX_DST_R_SWIZ_R |
1689						   R500_TEX_DST_G_SWIZ_G |
1690						   R500_TEX_DST_B_SWIZ_B |
1691						   R500_TEX_DST_A_SWIZ_A));
1692	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1693						   R500_DX_S_SWIZ_R |
1694						   R500_DX_T_SWIZ_R |
1695						   R500_DX_R_SWIZ_R |
1696						   R500_DX_Q_SWIZ_R |
1697						   R500_DY_ADDR(0) |
1698						   R500_DY_S_SWIZ_R |
1699						   R500_DY_T_SWIZ_R |
1700						   R500_DY_R_SWIZ_R |
1701						   R500_DY_Q_SWIZ_R));
1702	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1703	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1704
1705	    /* tex inst for mask texture */
1706	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1707						   R500_INST_TEX_SEM_WAIT |
1708						   R500_INST_RGB_WMASK_R |
1709						   R500_INST_RGB_WMASK_G |
1710						   R500_INST_RGB_WMASK_B |
1711						   R500_INST_ALPHA_WMASK |
1712						   R500_INST_RGB_CLAMP |
1713						   R500_INST_ALPHA_CLAMP));
1714
1715	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1716						   R500_TEX_INST_LD |
1717						   R500_TEX_SEM_ACQUIRE |
1718						   R500_TEX_IGNORE_UNCOVERED));
1719
1720	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1721						   R500_TEX_SRC_S_SWIZ_R |
1722						   R500_TEX_SRC_T_SWIZ_G |
1723						   R500_TEX_DST_ADDR(1) |
1724						   R500_TEX_DST_R_SWIZ_R |
1725						   R500_TEX_DST_G_SWIZ_G |
1726						   R500_TEX_DST_B_SWIZ_B |
1727						   R500_TEX_DST_A_SWIZ_A));
1728	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1729						   R500_DX_S_SWIZ_R |
1730						   R500_DX_T_SWIZ_R |
1731						   R500_DX_R_SWIZ_R |
1732						   R500_DX_Q_SWIZ_R |
1733						   R500_DY_ADDR(1) |
1734						   R500_DY_S_SWIZ_R |
1735						   R500_DY_T_SWIZ_R |
1736						   R500_DY_R_SWIZ_R |
1737						   R500_DY_Q_SWIZ_R));
1738	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1739	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1740	} else {
1741	    BEGIN_ACCEL(13);
1742	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0);
1743	    /* tex inst for src texture */
1744	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1745						   R500_INST_TEX_SEM_WAIT |
1746						   R500_INST_RGB_WMASK_R |
1747						   R500_INST_RGB_WMASK_G |
1748						   R500_INST_RGB_WMASK_B |
1749						   R500_INST_ALPHA_WMASK |
1750						   R500_INST_RGB_CLAMP |
1751						   R500_INST_ALPHA_CLAMP));
1752
1753	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1754						   R500_TEX_INST_LD |
1755						   R500_TEX_SEM_ACQUIRE |
1756						   R500_TEX_IGNORE_UNCOVERED));
1757
1758	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1759						   R500_TEX_SRC_S_SWIZ_R |
1760						   R500_TEX_SRC_T_SWIZ_G |
1761						   R500_TEX_DST_ADDR(0) |
1762						   R500_TEX_DST_R_SWIZ_R |
1763						   R500_TEX_DST_G_SWIZ_G |
1764						   R500_TEX_DST_B_SWIZ_B |
1765						   R500_TEX_DST_A_SWIZ_A));
1766	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1767						   R500_DX_S_SWIZ_R |
1768						   R500_DX_T_SWIZ_R |
1769						   R500_DX_R_SWIZ_R |
1770						   R500_DX_Q_SWIZ_R |
1771						   R500_DY_ADDR(0) |
1772						   R500_DY_S_SWIZ_R |
1773						   R500_DY_T_SWIZ_R |
1774						   R500_DY_R_SWIZ_R |
1775						   R500_DY_Q_SWIZ_R));
1776	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1777	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1778	}
1779
1780	/* ALU inst */
1781	/* *_OMASK* - output component write mask */
1782	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
1783					       R500_INST_TEX_SEM_WAIT |
1784					       R500_INST_LAST |
1785					       R500_INST_RGB_OMASK_R |
1786					       R500_INST_RGB_OMASK_G |
1787					       R500_INST_RGB_OMASK_B |
1788					       R500_INST_ALPHA_OMASK |
1789					       R500_INST_RGB_CLAMP |
1790					       R500_INST_ALPHA_CLAMP));
1791	/* ALU inst
1792	 * temp addresses for texture inputs
1793	 * RGB_ADDR0 is src tex (temp 0)
1794	 * RGB_ADDR1 is mask tex (temp 1)
1795	 */
1796	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
1797					       R500_RGB_ADDR1(1) |
1798					       R500_RGB_ADDR2(0)));
1799	/* ALU inst
1800	 * temp addresses for texture inputs
1801	 * ALPHA_ADDR0 is src tex (temp 0)
1802	 * ALPHA_ADDR1 is mask tex (temp 1)
1803	 */
1804	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
1805					       R500_ALPHA_ADDR1(1) |
1806					       R500_ALPHA_ADDR2(0)));
1807
1808	/* R500_ALU_RGB_TARGET - RGB render target */
1809	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
1810					       src_color |
1811					       R500_ALU_RGB_SEL_B_SRC1 |
1812					       mask_color |
1813					       R500_ALU_RGB_TARGET(0)));
1814
1815	/* R500_ALPHA_RGB_TARGET - alpha render target */
1816	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
1817					       R500_ALPHA_ADDRD(0) |
1818					       R500_ALPHA_SEL_A_SRC0 |
1819					       src_alpha |
1820					       R500_ALPHA_SEL_B_SRC1 |
1821					       mask_alpha |
1822					       R500_ALPHA_TARGET(0)));
1823
1824	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
1825					       R500_ALU_RGBA_ADDRD(0) |
1826					       R500_ALU_RGBA_R_SWIZ_0 |
1827					       R500_ALU_RGBA_G_SWIZ_0 |
1828					       R500_ALU_RGBA_B_SWIZ_0 |
1829					       R500_ALU_RGBA_A_SWIZ_0));
1830	FINISH_ACCEL();
1831    }
1832
1833    BEGIN_ACCEL(3);
1834
1835    OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
1836    OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
1837
1838    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1839    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
1840
1841    FINISH_ACCEL();
1842
1843    return TRUE;
1844}
1845
1846#define VTX_COUNT_MASK 6
1847#define VTX_COUNT 4
1848
1849#ifdef ACCEL_CP
1850
1851#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
1852do {								\
1853    OUT_RING_F(_dstX);						\
1854    OUT_RING_F(_dstY);						\
1855    OUT_RING_F(_srcX);						\
1856    OUT_RING_F(_srcY);						\
1857    OUT_RING_F(_maskX);						\
1858    OUT_RING_F(_maskY);						\
1859} while (0)
1860
1861#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
1862do {								\
1863    OUT_RING_F(_dstX);						\
1864    OUT_RING_F(_dstY);						\
1865    OUT_RING_F(_srcX);						\
1866    OUT_RING_F(_srcY);						\
1867} while (0)
1868
1869#else /* ACCEL_CP */
1870
1871#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
1872do {								\
1873    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
1874    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
1875    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
1876    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
1877    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
1878    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
1879} while (0)
1880
1881#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
1882do {								\
1883    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
1884    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
1885    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
1886    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
1887} while (0)
1888
1889#endif /* !ACCEL_CP */
1890
1891#ifdef ONLY_ONCE
1892static inline void transformPoint(PictTransform *transform, xPointFixed *point)
1893{
1894    PictVector v;
1895    v.vector[0] = point->x;
1896    v.vector[1] = point->y;
1897    v.vector[2] = xFixed1;
1898    PictureTransformPoint(transform, &v);
1899    point->x = v.vector[0];
1900    point->y = v.vector[1];
1901}
1902#endif
1903
1904static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst,
1905					   int srcX, int srcY,
1906					   int maskX, int maskY,
1907					   int dstX, int dstY,
1908					   int w, int h)
1909{
1910    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1911    int vtx_count;
1912    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
1913    xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
1914    ACCEL_PREAMBLE();
1915
1916    ENTER_DRAW(0);
1917
1918    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1919       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1920
1921    srcTopLeft.x     = IntToxFixed(srcX);
1922    srcTopLeft.y     = IntToxFixed(srcY);
1923    srcTopRight.x    = IntToxFixed(srcX + w);
1924    srcTopRight.y    = IntToxFixed(srcY);
1925    srcBottomLeft.x  = IntToxFixed(srcX);
1926    srcBottomLeft.y  = IntToxFixed(srcY + h);
1927    srcBottomRight.x = IntToxFixed(srcX + w);
1928    srcBottomRight.y = IntToxFixed(srcY + h);
1929
1930    maskTopLeft.x     = IntToxFixed(maskX);
1931    maskTopLeft.y     = IntToxFixed(maskY);
1932    maskTopRight.x    = IntToxFixed(maskX + w);
1933    maskTopRight.y    = IntToxFixed(maskY);
1934    maskBottomLeft.x  = IntToxFixed(maskX);
1935    maskBottomLeft.y  = IntToxFixed(maskY + h);
1936    maskBottomRight.x = IntToxFixed(maskX + w);
1937    maskBottomRight.y = IntToxFixed(maskY + h);
1938
1939    if (is_transform[0]) {
1940	transformPoint(transform[0], &srcTopLeft);
1941	transformPoint(transform[0], &srcTopRight);
1942	transformPoint(transform[0], &srcBottomLeft);
1943	transformPoint(transform[0], &srcBottomRight);
1944    }
1945    if (is_transform[1]) {
1946	transformPoint(transform[1], &maskTopLeft);
1947	transformPoint(transform[1], &maskTopRight);
1948	transformPoint(transform[1], &maskBottomLeft);
1949	transformPoint(transform[1], &maskBottomRight);
1950    }
1951
1952    if (has_mask)
1953	vtx_count = VTX_COUNT_MASK;
1954    else
1955	vtx_count = VTX_COUNT;
1956
1957    if (IS_R300_3D || IS_R500_3D) {
1958	BEGIN_ACCEL(1);
1959	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
1960	FINISH_ACCEL();
1961    }
1962
1963#ifdef ACCEL_CP
1964    if (info->ChipFamily < CHIP_FAMILY_R200) {
1965	BEGIN_RING(4 * vtx_count + 3);
1966	OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
1967			    4 * vtx_count + 1));
1968	if (has_mask)
1969	    OUT_RING(RADEON_CP_VC_FRMT_XY |
1970		     RADEON_CP_VC_FRMT_ST0 |
1971		     RADEON_CP_VC_FRMT_ST1);
1972	else
1973	    OUT_RING(RADEON_CP_VC_FRMT_XY |
1974		     RADEON_CP_VC_FRMT_ST0);
1975	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
1976		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
1977		 RADEON_CP_VC_CNTL_MAOS_ENABLE |
1978		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
1979		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
1980    } else {
1981	if (IS_R300_3D || IS_R500_3D)
1982	    BEGIN_RING(4 * vtx_count + 4);
1983	else
1984	    BEGIN_RING(4 * vtx_count + 2);
1985
1986	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
1987			    4 * vtx_count));
1988	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
1989		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
1990		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
1991    }
1992
1993#else /* ACCEL_CP */
1994    if (IS_R300_3D || IS_R500_3D)
1995	BEGIN_ACCEL(2 + vtx_count * 4);
1996    else
1997	BEGIN_ACCEL(1 + vtx_count * 4);
1998
1999    if (info->ChipFamily < CHIP_FAMILY_R200) {
2000	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
2001					  RADEON_VF_PRIM_WALK_DATA |
2002					  RADEON_VF_RADEON_MODE |
2003					  4 << RADEON_VF_NUM_VERTICES_SHIFT));
2004    } else {
2005	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2006					  RADEON_VF_PRIM_WALK_DATA |
2007					  4 << RADEON_VF_NUM_VERTICES_SHIFT));
2008    }
2009#endif
2010
2011    if (has_mask) {
2012	VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2013		xFixedToFloat(srcTopLeft.x) / info->texW[0],      xFixedToFloat(srcTopLeft.y) / info->texH[0],
2014		xFixedToFloat(maskTopLeft.x) / info->texW[1],     xFixedToFloat(maskTopLeft.y) / info->texH[1]);
2015	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2016		xFixedToFloat(srcBottomLeft.x) / info->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->texH[0],
2017		xFixedToFloat(maskBottomLeft.x) / info->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->texH[1]);
2018	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2019		xFixedToFloat(srcBottomRight.x) / info->texW[0],  xFixedToFloat(srcBottomRight.y) / info->texH[0],
2020		xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]);
2021	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2022		xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0],
2023		xFixedToFloat(maskTopRight.x) / info->texW[1],    xFixedToFloat(maskTopRight.y) / info->texH[1]);
2024    } else {
2025	VTX_OUT((float)dstX,                                      (float)dstY,
2026		xFixedToFloat(srcTopLeft.x) / info->texW[0],      xFixedToFloat(srcTopLeft.y) / info->texH[0]);
2027	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2028		xFixedToFloat(srcBottomLeft.x) / info->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->texH[0]);
2029	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2030		xFixedToFloat(srcBottomRight.x) / info->texW[0],  xFixedToFloat(srcBottomRight.y) / info->texH[0]);
2031	VTX_OUT((float)(dstX + w),                                (float)dstY,
2032		xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0]);
2033    }
2034
2035    if (IS_R300_3D || IS_R500_3D)
2036	/* flushing is pipelined, free/finish is not */
2037	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
2038
2039#ifdef ACCEL_CP
2040    ADVANCE_RING();
2041#else
2042    FINISH_ACCEL();
2043#endif /* !ACCEL_CP */
2044
2045    LEAVE_DRAW(0);
2046}
2047#undef VTX_OUT
2048#undef VTX_OUT_MASK
2049
2050static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2051				       int srcX, int srcY,
2052				       int maskX, int maskY,
2053				       int dstX, int dstY,
2054				       int width, int height)
2055{
2056    int tileSrcY, tileMaskY, tileDstY;
2057    int remainingHeight;
2058
2059    if (!need_src_tile_x && !need_src_tile_y) {
2060	FUNC_NAME(RadeonCompositeTile)(pDst,
2061				       srcX, srcY,
2062				       maskX, maskY,
2063				       dstX, dstY,
2064				       width, height);
2065	return;
2066    }
2067
2068    /* Tiling logic borrowed from exaFillRegionTiled */
2069
2070    modulus(srcY, src_tile_height, tileSrcY);
2071    tileMaskY = maskY;
2072    tileDstY = dstY;
2073
2074    remainingHeight = height;
2075    while (remainingHeight > 0) {
2076	int remainingWidth = width;
2077	int tileSrcX, tileMaskX, tileDstX;
2078	int h = src_tile_height - tileSrcY;
2079
2080	if (h > remainingHeight)
2081	    h = remainingHeight;
2082	remainingHeight -= h;
2083
2084	modulus(srcX, src_tile_width, tileSrcX);
2085	tileMaskX = maskX;
2086	tileDstX = dstX;
2087
2088	while (remainingWidth > 0) {
2089	    int w = src_tile_width - tileSrcX;
2090	    if (w > remainingWidth)
2091		w = remainingWidth;
2092	    remainingWidth -= w;
2093
2094	    FUNC_NAME(RadeonCompositeTile)(pDst,
2095					   tileSrcX, tileSrcY,
2096					   tileMaskX, tileMaskY,
2097					   tileDstX, tileDstY,
2098					   w, h);
2099
2100	    tileSrcX = 0;
2101	    tileMaskX += w;
2102	    tileDstX += w;
2103	}
2104	tileSrcY = 0;
2105	tileMaskY += h;
2106	tileDstY += h;
2107    }
2108}
2109
2110static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2111{
2112    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2113    ACCEL_PREAMBLE();
2114
2115    ENTER_DRAW(0);
2116
2117    if (IS_R300_3D || IS_R500_3D) {
2118	BEGIN_ACCEL(2);
2119	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2120    } else
2121	BEGIN_ACCEL(1);
2122    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2123    FINISH_ACCEL();
2124
2125    LEAVE_DRAW(0);
2126}
2127
2128#undef ONLY_ONCE
2129#undef FUNC_NAME
2130