radeon_exa_render.c revision 7821949a
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59
60struct blendinfo {
61    Bool dst_alpha;
62    Bool src_alpha;
63    uint32_t blend_cntl;
64};
65
66static struct blendinfo RadeonBlendOp[] = {
67    /* Clear */
68    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
69    /* Src */
70    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
71    /* Dst */
72    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
73    /* Over */
74    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
75    /* OverReverse */
76    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
77    /* In */
78    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
79    /* InReverse */
80    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
81    /* Out */
82    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
83    /* OutReverse */
84    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
85    /* Atop */
86    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
87    /* AtopReverse */
88    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
91    /* Add */
92    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
93};
94
95struct formatinfo {
96    int fmt;
97    uint32_t card_fmt;
98};
99
100/* Note on texture formats:
101 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
102 */
103static struct formatinfo R100TexFormats[] = {
104	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
105	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
106	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
107	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
108	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
109	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
110};
111
112static struct formatinfo R200TexFormats[] = {
113    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
114    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
115    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
116    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
117    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
118    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
119    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
120    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
121};
122
123static struct formatinfo R300TexFormats[] = {
124    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
125    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
126    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
127    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
128#ifdef PICT_TYPE_BGRA
129    {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
130    {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
131#endif
132    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
133    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
134    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
135    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
136};
137
138/* Common Radeon setup code */
139
140static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
141{
142    switch (pDstPicture->format) {
143    case PICT_a8r8g8b8:
144    case PICT_x8r8g8b8:
145	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
146	break;
147    case PICT_r5g6b5:
148	*dst_format = RADEON_COLOR_FORMAT_RGB565;
149	break;
150    case PICT_a1r5g5b5:
151    case PICT_x1r5g5b5:
152	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
153	break;
154    case PICT_a8:
155	*dst_format = RADEON_COLOR_FORMAT_RGB8;
156	break;
157    default:
158	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
159			(int)pDstPicture->format));
160    }
161
162    return TRUE;
163}
164
165static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
166{
167    switch (pDstPicture->format) {
168    case PICT_a8r8g8b8:
169    case PICT_x8r8g8b8:
170    case PICT_a8b8g8r8:
171    case PICT_x8b8g8r8:
172#ifdef PICT_TYPE_BGRA
173    case PICT_b8g8r8a8:
174    case PICT_b8g8r8x8:
175#endif
176	*dst_format = R300_COLORFORMAT_ARGB8888;
177	break;
178    case PICT_r5g6b5:
179	*dst_format = R300_COLORFORMAT_RGB565;
180	break;
181    case PICT_a1r5g5b5:
182    case PICT_x1r5g5b5:
183	*dst_format = R300_COLORFORMAT_ARGB1555;
184	break;
185    case PICT_a8:
186	*dst_format = R300_COLORFORMAT_I8;
187	break;
188    default:
189	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
190	       (int)pDstPicture->format));
191    }
192    return TRUE;
193}
194
195static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
196{
197    uint32_t sblend, dblend;
198
199    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
200    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
201
202    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
203     * it as always 1.
204     */
205    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
206	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
207	    sblend = RADEON_SRC_BLEND_GL_ONE;
208	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
209	    sblend = RADEON_SRC_BLEND_GL_ZERO;
210    }
211
212    /* If the source alpha is being used, then we should only be in a case where
213     * the source blend factor is 0, and the source blend value is the mask
214     * channels multiplied by the source picture's alpha.
215     */
216    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
217	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
218	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
219	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
220	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
221	}
222    }
223
224    return sblend | dblend;
225}
226
227union intfloat {
228    float f;
229    uint32_t i;
230};
231
232/* Check if we need a software-fallback because of a repeating
233 *   non-power-of-two texture.
234 *
235 * canTile: whether we can emulate a repeat by drawing in tiles:
236 *   possible for the source, but not for the mask. (Actually
237 *   we could do tiling for the mask too, but dealing with the
238 *   combination of a tiled mask and a tiled source would be
239 *   a lot of complexity, so we handle only the most common
240 *   case of a repeating mask.)
241 */
242static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
243{
244    int w = pPict->pDrawable->width;
245    int h = pPict->pDrawable->height;
246    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
247
248    if ((repeatType == RepeatNormal || repeatType == RepeatReflect) &&
249	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
250	!(repeatType == RepeatNormal && !pPict->transform && canTile))
251	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
252			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
253
254    return TRUE;
255}
256
257/* Determine if the pitch of the pixmap meets the criteria for being
258 * used as a repeating texture: no padding or only a single line texture.
259 */
260static Bool RADEONPitchMatches(PixmapPtr pPix)
261{
262    int w = pPix->drawable.width;
263    int h = pPix->drawable.height;
264    uint32_t txpitch = exaGetPixmapPitch(pPix);
265
266    if (h > 1 && (RADEON_ALIGN(w * pPix->drawable.bitsPerPixel / 8, 32)) != txpitch)
267	return FALSE;
268
269    return TRUE;
270}
271
272/* We can't turn on repeats normally for a non-power-of-two dimension,
273 * but if the source isn't transformed, we can get the same effect
274 * by drawing the image in multiple tiles. (A common case that it's
275 * important to get right is drawing a strip of a NPOTxPOT texture
276 * repeating in the POT direction. With tiling, this ends up as a
277 * a single tile on R300 and newer, which is perfect.)
278 *
279 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
280 *   one direction and POT in the other in the POT direction; on
281 *   older chips we can only repeat at all if the texture is POT in
282 *   both directions.
283 *
284 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
285 *   there is no padding in the texture. Textures with small POT widths
286 *   (1,2,4,8) thus can't be tiled.
287 */
288static Bool RADEONSetupSourceTile(PicturePtr pPict,
289				  PixmapPtr pPix,
290				  Bool canTile1d,
291				  Bool needMatchingPitch)
292{
293    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
294    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
295
296    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
297    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
298
299    if (repeatType == RepeatNormal || repeatType == RepeatReflect) {
300	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
301
302	int w = pPict->pDrawable ? pPict->pDrawable->width : 1;
303	int h = pPict->pDrawable ? pPict->pDrawable->height : 1;
304
305	if (pPict->transform) {
306	    if (badPitch)
307		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
308				 w, (unsigned)exaGetPixmapPitch(pPix)));
309	} else {
310	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
311	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
312
313	    if ((info->accel_state->need_src_tile_x ||
314		 info->accel_state->need_src_tile_y) &&
315		repeatType != RepeatNormal)
316		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
317
318	    if (!canTile1d)
319		info->accel_state->need_src_tile_x =
320		    info->accel_state->need_src_tile_y =
321		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
322	}
323
324	if (info->accel_state->need_src_tile_x)
325	    info->accel_state->src_tile_width = w;
326	if (info->accel_state->need_src_tile_y)
327	    info->accel_state->src_tile_height = h;
328    }
329
330    return TRUE;
331}
332
333/* R100-specific code */
334
335static Bool R100CheckCompositeTexture(PicturePtr pPict,
336				      PicturePtr pDstPict,
337				      int op,
338				      int unit)
339{
340    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
341    int i;
342
343    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
344	if (R100TexFormats[i].fmt == pPict->format)
345	    break;
346    }
347    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
348	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
349			(int)pPict->format));
350
351    if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0))
352	return FALSE;
353
354    if (pPict->filter != PictFilterNearest &&
355	pPict->filter != PictFilterBilinear)
356    {
357	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
358    }
359
360    /* for REPEAT_NONE, Render semantics are that sampling outside the source
361     * picture results in alpha=0 pixels. We can implement this with a border color
362     * *if* our source texture has an alpha channel, otherwise we need to fall
363     * back. If we're not transformed then we hope that upper layers have clipped
364     * rendering to the bounds of the source drawable, in which case it doesn't
365     * matter. I have not, however, verified that the X server always does such
366     * clipping.
367     */
368    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
369	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
370	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
371    }
372
373    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
374	RADEON_FALLBACK(("non-affine transforms not supported\n"));
375
376    return TRUE;
377}
378
379#endif /* ONLY_ONCE */
380
381static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
382					int unit)
383{
384    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
385    uint32_t txfilter, txformat, txoffset, txpitch;
386    unsigned int repeatType;
387    Bool repeat;
388    int i, w, h;
389    struct radeon_exa_pixmap_priv *driver_priv;
390    ACCEL_PREAMBLE();
391
392    if (pPict->pDrawable) {
393	w = pPict->pDrawable->width;
394	h = pPict->pDrawable->height;
395	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
396    } else {
397	w = h = 1;
398	repeatType = RepeatNormal;
399    }
400
401    repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
402	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
403
404    txpitch = exaGetPixmapPitch(pPix);
405    txoffset = 0;
406
407    CHECK_OFFSET(pPix, 0x1f, "texture");
408
409    if ((txpitch & 0x1f) != 0)
410	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
411
412    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
413    {
414	if (R100TexFormats[i].fmt == pPict->format)
415	    break;
416    }
417    txformat = R100TexFormats[i].card_fmt;
418    if (RADEONPixmapIsColortiled(pPix))
419	txoffset |= RADEON_TXO_MACRO_TILE;
420
421    if (repeat) {
422	if (!RADEONPitchMatches(pPix))
423	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
424			     w, (unsigned)txpitch));
425
426	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
427	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
428    } else
429	txformat |= RADEON_TXFORMAT_NON_POWER2;
430    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
431
432    info->accel_state->texW[unit] = w;
433    info->accel_state->texH[unit] = h;
434
435    switch (pPict->filter) {
436    case PictFilterNearest:
437	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
438	break;
439    case PictFilterBilinear:
440	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
441	break;
442    default:
443	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
444    }
445
446    switch (repeatType) {
447	case RepeatNormal:
448	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
449		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
450	    else
451	        txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
452	    break;
453	case RepeatPad:
454	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
455	    break;
456	case RepeatReflect:
457	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
458	    break;
459	case RepeatNone:
460	    /* don't set an illegal clamp mode for rects */
461	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
462		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
463	    break;
464    }
465
466    BEGIN_ACCEL_RELOC(5, 1);
467    if (unit == 0) {
468	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
469	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
470	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
471	    (pPix->drawable.width - 1) |
472	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
473	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
474
475	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
476	/* emit a texture relocation */
477    } else {
478	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
479	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
480
481	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
482	    (pPix->drawable.width - 1) |
483	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
484	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
485	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
486	/* emit a texture relocation */
487    }
488    FINISH_ACCEL();
489
490    if (pPict->transform != 0) {
491	info->accel_state->is_transform[unit] = TRUE;
492	info->accel_state->transform[unit] = pPict->transform;
493    } else {
494	info->accel_state->is_transform[unit] = FALSE;
495    }
496
497    return TRUE;
498}
499
500#ifdef ONLY_ONCE
501
502
503static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
504			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
505{
506    PixmapPtr pSrcPixmap, pDstPixmap;
507    uint32_t tmp1;
508
509    /* Check for unsupported compositing operations. */
510    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
511	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
512
513    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
514
515    if (pDstPixmap->drawable.width > 2047 ||
516	pDstPixmap->drawable.height > 2047) {
517	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
518			 pDstPixmap->drawable.width,
519			 pDstPixmap->drawable.height));
520    }
521
522    if (pSrcPicture->pDrawable) {
523	/* r100 limit should be 2048, there are issues with 2048
524	 * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
525	 */
526	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
527
528	if (pSrcPixmap->drawable.width > 2047 ||
529	    pSrcPixmap->drawable.height > 2047) {
530	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
531			     pSrcPixmap->drawable.width,
532			     pSrcPixmap->drawable.height));
533	}
534    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
535	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
536
537    if (pMaskPicture) {
538	PixmapPtr pMaskPixmap;
539
540	if (pMaskPicture->pDrawable) {
541	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
542
543	    if (pMaskPixmap->drawable.width > 2047 ||
544		pMaskPixmap->drawable.height > 2047) {
545		RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
546				 pMaskPixmap->drawable.width,
547				 pMaskPixmap->drawable.height));
548	    }
549	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
550	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
551
552	if (pMaskPicture->componentAlpha) {
553	    /* Check if it's component alpha that relies on a source alpha and
554	     * on the source value.  We can only get one of those into the
555	     * single source value that we get to blend with.
556	     */
557	    if (RadeonBlendOp[op].src_alpha &&
558		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
559		RADEON_SRC_BLEND_GL_ZERO) {
560		RADEON_FALLBACK(("Component alpha not supported with source "
561				 "alpha and source value blending.\n"));
562	    }
563	}
564
565	if (!R100CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
566	    return FALSE;
567    }
568
569    if (!R100CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
570	return FALSE;
571
572    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
573	return FALSE;
574
575    return TRUE;
576}
577
578static Bool
579RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
580			    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask,
581			    PixmapPtr pDst)
582{
583    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
584
585    info->accel_state->composite_op = op;
586    info->accel_state->dst_pic = pDstPicture;
587    info->accel_state->msk_pic = pMaskPicture;
588    info->accel_state->src_pic = pSrcPicture;
589    info->accel_state->dst_pix = pDst;
590    info->accel_state->msk_pix = pMask;
591    info->accel_state->src_pix = pSrc;
592
593#ifdef XF86DRM_MODE
594    if (info->cs) {
595	int ret;
596
597	radeon_cs_space_reset_bos(info->cs);
598
599	radeon_add_pixmap(info->cs, pSrc,
600			  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
601
602	if (pMask)
603	    radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
604
605	radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
606
607	ret = radeon_cs_space_check(info->cs);
608	if (ret)
609	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
610    }
611#endif
612
613    return TRUE;
614}
615
616#endif /* ONLY_ONCE */
617
618static Bool FUNC_NAME(R100PrepareComposite)(int op,
619					    PicturePtr pSrcPicture,
620					    PicturePtr pMaskPicture,
621					    PicturePtr pDstPicture,
622					    PixmapPtr pSrc,
623					    PixmapPtr pMask,
624					    PixmapPtr pDst)
625{
626    ScreenPtr pScreen = pDst->drawable.pScreen;
627    RINFO_FROM_SCREEN(pScreen);
628    uint32_t dst_format, dst_pitch, colorpitch;
629    uint32_t pp_cntl, blendcntl, cblend, ablend;
630    int pixel_shift;
631    struct radeon_exa_pixmap_priv *driver_priv;
632    ACCEL_PREAMBLE();
633
634    TRACE;
635
636    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
637	return FALSE;
638
639    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
640	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
641
642    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
643
644    dst_pitch = exaGetPixmapPitch(pDst);
645    colorpitch = dst_pitch >> pixel_shift;
646    if (RADEONPixmapIsColortiled(pDst))
647	colorpitch |= RADEON_COLOR_TILE_ENABLE;
648
649    CHECK_OFFSET(pDst, 0x0f, "destination");
650
651    if (!pSrc) {
652	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
653	if (!pSrc)
654	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
655    }
656
657    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
658	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
659
660    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
661	return FALSE;
662
663    if (pMaskPicture && !pMask) {
664	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
665	if (!pMask) {
666	    if (!pSrcPicture->pDrawable)
667		pScreen->DestroyPixmap(pSrc);
668	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
669	}
670    }
671
672    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
673			     pSrc, pMask, pDst);
674
675    /* switch to 3D after doing buffer space checks as the latter may flush */
676    RADEON_SWITCH_TO_3D();
677
678    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
679	return FALSE;
680    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
681
682    if (pMask != NULL) {
683	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
684	    return FALSE;
685	pp_cntl |= RADEON_TEX_1_ENABLE;
686    } else {
687	info->accel_state->is_transform[1] = FALSE;
688    }
689
690    BEGIN_ACCEL_RELOC(10, 2);
691    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
692    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
693    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
694    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
695
696    /* IN operator: Multiply src by mask components or mask alpha.
697     * BLEND_CTL_ADD is A * B + C.
698     * If a source is a8, we have to explicitly zero its color values.
699     * If the destination is a8, we have to route the alpha to red, I think.
700     * If we're doing component alpha where the source for blending is going to
701     * be the source alpha (and there's no source value used), we have to zero
702     * the source's color values.
703     */
704    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
705    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
706
707    if (pDstPicture->format == PICT_a8 ||
708	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
709    {
710	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
711    } else if (pSrcPicture->format == PICT_a8)
712	cblend |= RADEON_COLOR_ARG_A_ZERO;
713    else
714	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
715    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
716
717    if (pMask) {
718	if (pMaskPicture->componentAlpha &&
719	    pDstPicture->format != PICT_a8)
720	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
721	else
722	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
723	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
724    } else {
725	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
726	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
727    }
728
729    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
730    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
731    if (pMask)
732	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
733					  RADEON_SE_VTX_FMT_ST0 |
734					  RADEON_SE_VTX_FMT_ST1));
735    else
736	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
737					  RADEON_SE_VTX_FMT_ST0));
738    /* Op operator. */
739    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
740
741    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
742
743    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
744    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
745					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
746    FINISH_ACCEL();
747
748    return TRUE;
749}
750
751#ifdef ONLY_ONCE
752
753static Bool R200CheckCompositeTexture(PicturePtr pPict,
754				      PicturePtr pDstPict,
755				      int op,
756				      int unit)
757{
758    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
759    int i;
760
761    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
762    {
763	if (R200TexFormats[i].fmt == pPict->format)
764	    break;
765    }
766    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
767	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
768			 (int)pPict->format));
769
770    if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0))
771	return FALSE;
772
773    if (pPict->filter != PictFilterNearest &&
774	pPict->filter != PictFilterBilinear)
775	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
776
777    /* for REPEAT_NONE, Render semantics are that sampling outside the source
778     * picture results in alpha=0 pixels. We can implement this with a border color
779     * *if* our source texture has an alpha channel, otherwise we need to fall
780     * back. If we're not transformed then we hope that upper layers have clipped
781     * rendering to the bounds of the source drawable, in which case it doesn't
782     * matter. I have not, however, verified that the X server always does such
783     * clipping.
784     */
785    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
786	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
787	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
788    }
789
790    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
791	RADEON_FALLBACK(("non-affine transforms not supported\n"));
792
793    return TRUE;
794}
795
796#endif /* ONLY_ONCE */
797
798static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
799					int unit)
800{
801    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
802    uint32_t txfilter, txformat, txoffset, txpitch;
803    unsigned int repeatType;
804    Bool repeat;
805    int i, w, h;
806    struct radeon_exa_pixmap_priv *driver_priv;
807    ACCEL_PREAMBLE();
808
809    if (pPict->pDrawable) {
810	w = pPict->pDrawable->width;
811	h = pPict->pDrawable->height;
812	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
813    } else {
814	w = h = 1;
815	repeatType = RepeatNormal;
816    }
817
818    repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
819	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
820
821    txpitch = exaGetPixmapPitch(pPix);
822
823    txoffset = 0;
824    CHECK_OFFSET(pPix, 0x1f, "texture");
825
826    if ((txpitch & 0x1f) != 0)
827	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
828
829    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
830    {
831	if (R200TexFormats[i].fmt == pPict->format)
832	    break;
833    }
834    txformat = R200TexFormats[i].card_fmt;
835    if (RADEONPixmapIsColortiled(pPix))
836	txoffset |= R200_TXO_MACRO_TILE;
837
838    if (repeat) {
839	if (!RADEONPitchMatches(pPix))
840	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
841			     w, (unsigned)txpitch));
842
843	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
844	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
845    } else
846	txformat |= R200_TXFORMAT_NON_POWER2;
847    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
848
849    info->accel_state->texW[unit] = w;
850    info->accel_state->texH[unit] = h;
851
852    switch (pPict->filter) {
853    case PictFilterNearest:
854	txfilter = (R200_MAG_FILTER_NEAREST |
855		    R200_MIN_FILTER_NEAREST);
856	break;
857    case PictFilterBilinear:
858	txfilter = (R200_MAG_FILTER_LINEAR |
859		    R200_MIN_FILTER_LINEAR);
860	break;
861    default:
862	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
863    }
864
865    switch (repeatType) {
866	case RepeatNormal:
867	    if (txformat & R200_TXFORMAT_NON_POWER2)
868		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
869	    else
870	        txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
871	    break;
872	case RepeatPad:
873	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
874	    break;
875	case RepeatReflect:
876	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
877	    break;
878	case RepeatNone:
879	    /* don't set an illegal clamp mode for rect textures */
880	    if (txformat & R200_TXFORMAT_NON_POWER2)
881		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
882	    break;
883    }
884
885    BEGIN_ACCEL_RELOC(6, 1);
886    if (unit == 0) {
887	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
888	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
889	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
890	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
891		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
892	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
893	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
894    } else {
895	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
896	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
897	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
898	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
899		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
900	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
901	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
902	/* emit a texture relocation */
903    }
904    FINISH_ACCEL();
905
906    if (pPict->transform != 0) {
907	info->accel_state->is_transform[unit] = TRUE;
908	info->accel_state->transform[unit] = pPict->transform;
909    } else {
910	info->accel_state->is_transform[unit] = FALSE;
911    }
912
913    return TRUE;
914}
915
916#ifdef ONLY_ONCE
917static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
918			       PicturePtr pDstPicture)
919{
920    PixmapPtr pSrcPixmap, pDstPixmap;
921    uint32_t tmp1;
922
923    TRACE;
924
925    /* Check for unsupported compositing operations. */
926    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
927	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
928
929    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
930
931    if (pDstPixmap->drawable.width > 2047 ||
932	pDstPixmap->drawable.height > 2047) {
933	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
934			 pDstPixmap->drawable.width,
935			 pDstPixmap->drawable.height));
936    }
937
938    if (pSrcPicture->pDrawable) {
939	/* r200 limit should be 2048, there are issues with 2048
940	 * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
941	 */
942	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
943
944	if (pSrcPixmap->drawable.width > 2047 ||
945	    pSrcPixmap->drawable.height > 2047) {
946	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
947			     pSrcPixmap->drawable.width,
948			     pSrcPixmap->drawable.height));
949	}
950    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
951	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
952
953    if (pMaskPicture) {
954	PixmapPtr pMaskPixmap;
955
956	if (pMaskPicture->pDrawable) {
957	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
958
959	    if (pMaskPixmap->drawable.width > 2047 ||
960		pMaskPixmap->drawable.height > 2047) {
961		RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
962				 pMaskPixmap->drawable.width,
963				 pMaskPixmap->drawable.height));
964	    }
965	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
966	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
967
968	if (pMaskPicture->componentAlpha) {
969	    /* Check if it's component alpha that relies on a source alpha and
970	     * on the source value.  We can only get one of those into the
971	     * single source value that we get to blend with.
972	     */
973	    if (RadeonBlendOp[op].src_alpha &&
974		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
975		RADEON_SRC_BLEND_GL_ZERO) {
976		RADEON_FALLBACK(("Component alpha not supported with source "
977				 "alpha and source value blending.\n"));
978	    }
979	}
980
981	if (!R200CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
982	    return FALSE;
983    }
984
985    if (!R200CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
986	return FALSE;
987
988    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
989	return FALSE;
990
991    return TRUE;
992}
993#endif /* ONLY_ONCE */
994
995static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
996				PicturePtr pMaskPicture, PicturePtr pDstPicture,
997				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
998{
999    ScreenPtr pScreen = pDst->drawable.pScreen;
1000    RINFO_FROM_SCREEN(pScreen);
1001    uint32_t dst_format, dst_pitch;
1002    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
1003    int pixel_shift;
1004    struct radeon_exa_pixmap_priv *driver_priv;
1005    ACCEL_PREAMBLE();
1006
1007    TRACE;
1008
1009    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
1010	return FALSE;
1011
1012    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
1013	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
1014
1015    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1016
1017    dst_pitch = exaGetPixmapPitch(pDst);
1018    colorpitch = dst_pitch >> pixel_shift;
1019    if (RADEONPixmapIsColortiled(pDst))
1020	colorpitch |= RADEON_COLOR_TILE_ENABLE;
1021
1022    CHECK_OFFSET(pDst, 0xf, "destination");
1023
1024    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1025	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1026
1027    if (!pSrc) {
1028	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
1029	if (!pSrc)
1030	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1031    }
1032
1033    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
1034	return FALSE;
1035
1036    if (pMaskPicture && !pMask) {
1037	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
1038	if (!pMask) {
1039	    if (!pSrcPicture->pDrawable)
1040		pScreen->DestroyPixmap(pSrc);
1041	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1042	}
1043    }
1044
1045    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1046			     pSrc, pMask, pDst);
1047
1048    /* switch to 3D after doing buffer space checks as it may flush */
1049    RADEON_SWITCH_TO_3D();
1050
1051    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
1052	return FALSE;
1053    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
1054
1055    if (pMask != NULL) {
1056	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
1057	    return FALSE;
1058	pp_cntl |= RADEON_TEX_1_ENABLE;
1059    } else {
1060	info->accel_state->is_transform[1] = FALSE;
1061    }
1062
1063    BEGIN_ACCEL_RELOC(12, 2);
1064
1065    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
1066    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
1067
1068    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
1069    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
1070
1071    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
1072    if (pMask)
1073	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1074		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
1075		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
1076    else
1077	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1078		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
1079
1080
1081
1082    /* IN operator: Multiply src by mask components or mask alpha.
1083     * BLEND_CTL_ADD is A * B + C.
1084     * If a picture is a8, we have to explicitly zero its color values.
1085     * If the destination is a8, we have to route the alpha to red, I think.
1086     * If we're doing component alpha where the source for blending is going to
1087     * be the source alpha (and there's no source value used), we have to zero
1088     * the source's color values.
1089     */
1090    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
1091    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
1092
1093    if (pDstPicture->format == PICT_a8 ||
1094	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
1095    {
1096	cblend |= R200_TXC_ARG_A_R0_ALPHA;
1097    } else if (pSrcPicture->format == PICT_a8)
1098	cblend |= R200_TXC_ARG_A_ZERO;
1099    else
1100	cblend |= R200_TXC_ARG_A_R0_COLOR;
1101    ablend |= R200_TXA_ARG_A_R0_ALPHA;
1102
1103    if (pMask) {
1104	if (pMaskPicture->componentAlpha &&
1105	    pDstPicture->format != PICT_a8)
1106	    cblend |= R200_TXC_ARG_B_R1_COLOR;
1107	else
1108	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
1109	ablend |= R200_TXA_ARG_B_R1_ALPHA;
1110    } else {
1111	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
1112	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
1113    }
1114
1115    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
1116    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
1117	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
1118    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
1119    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
1120	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
1121
1122    /* Op operator. */
1123    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1124    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
1125
1126    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
1127					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
1128
1129    FINISH_ACCEL();
1130
1131    return TRUE;
1132}
1133
1134#ifdef ONLY_ONCE
1135
1136static Bool R300CheckCompositeTexture(PicturePtr pPict,
1137				      PicturePtr pDstPict,
1138				      int op,
1139				      int unit,
1140				      Bool is_r500)
1141{
1142    ScreenPtr pScreen = pDstPict->pDrawable->pScreen;
1143    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1144    RADEONInfoPtr info = RADEONPTR(pScrn);
1145    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1146    int i;
1147
1148    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1149    {
1150	if (R300TexFormats[i].fmt == pPict->format)
1151	    break;
1152    }
1153    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1154	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1155			 (int)pPict->format));
1156
1157    if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0)) {
1158	if (info->cs) {
1159    		struct radeon_exa_pixmap_priv *driver_priv;
1160		PixmapPtr pPix;
1161
1162    		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
1163		driver_priv = exaGetPixmapDriverPrivate(pPix);
1164		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
1165	}
1166	return FALSE;
1167    }
1168
1169    if (pPict->filter != PictFilterNearest &&
1170	pPict->filter != PictFilterBilinear)
1171	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1172
1173    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1174     * picture results in alpha=0 pixels. We can implement this with a border color
1175     * *if* our source texture has an alpha channel, otherwise we need to fall
1176     * back. If we're not transformed then we hope that upper layers have clipped
1177     * rendering to the bounds of the source drawable, in which case it doesn't
1178     * matter. I have not, however, verified that the X server always does such
1179     * clipping.
1180     */
1181    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1182	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1183	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1184    }
1185
1186    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
1187	RADEON_FALLBACK(("non-affine transforms not supported\n"));
1188
1189    return TRUE;
1190}
1191
1192#endif /* ONLY_ONCE */
1193
1194static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
1195					int unit)
1196{
1197    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1198    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
1199    int w, h;
1200    int i, pixel_shift, out_size = 6;
1201    unsigned int repeatType;
1202    struct radeon_exa_pixmap_priv *driver_priv;
1203    ACCEL_PREAMBLE();
1204
1205    TRACE;
1206
1207    if (pPict->pDrawable) {
1208	w = pPict->pDrawable->width;
1209	h = pPict->pDrawable->height;
1210	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1211    } else {
1212	w = h = 1;
1213	repeatType = RepeatNormal;
1214    }
1215
1216    txpitch = exaGetPixmapPitch(pPix);
1217    txoffset = 0;
1218
1219    CHECK_OFFSET(pPix, 0x1f, "texture");
1220
1221    if ((txpitch & 0x1f) != 0)
1222	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1223
1224    /* TXPITCH = pixels (texels) per line - 1 */
1225    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1226    txpitch >>= pixel_shift;
1227    txpitch -= 1;
1228
1229    if (RADEONPixmapIsColortiled(pPix))
1230	txoffset |= R300_MACRO_TILE;
1231
1232    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1233    {
1234	if (R300TexFormats[i].fmt == pPict->format)
1235	    break;
1236    }
1237
1238    txformat1 = R300TexFormats[i].card_fmt;
1239
1240    if (IS_R300_3D) {
1241	if ((unit == 0) && info->accel_state->msk_pic)
1242	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0;
1243	else if (unit == 1)
1244	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1;
1245    }
1246
1247    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1248		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1249
1250    if (IS_R500_3D && ((w - 1) & 0x800))
1251	txpitch |= R500_TXWIDTH_11;
1252
1253    if (IS_R500_3D && ((h - 1) & 0x800))
1254	txpitch |= R500_TXHEIGHT_11;
1255
1256    if (info->ChipFamily == CHIP_FAMILY_R520) {
1257	unsigned us_width = (w - 1) & 0x7ff;
1258	unsigned us_height = (h - 1) & 0x7ff;
1259	unsigned us_depth = 0;
1260
1261	if (w > 2048) {
1262	    us_width = (0x7ff + us_width) >> 1;
1263	    us_depth |= 0x0d;
1264	}
1265	if (h > 2048) {
1266	    us_height = (0x7ff + us_height) >> 1;
1267	    us_depth |= 0x0e;
1268	}
1269
1270	us_format = (us_width << R300_TXWIDTH_SHIFT) |
1271		    (us_height << R300_TXHEIGHT_SHIFT) |
1272		    (us_depth << R300_TXDEPTH_SHIFT);
1273	out_size++;
1274    }
1275
1276    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1277     * omit this if there is no padding, but there is no apparent advantage
1278     * in doing so.
1279     */
1280    txformat0 |= R300_TXPITCH_EN;
1281
1282    txfilter = (unit << R300_TX_ID_SHIFT);
1283
1284    switch (repeatType) {
1285    case RepeatNormal:
1286	if (unit != 0 || !info->accel_state->need_src_tile_x)
1287	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1288	else
1289	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1290
1291	if (unit != 0 || !info->accel_state->need_src_tile_y)
1292	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1293	else
1294	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1295
1296	break;
1297    case RepeatPad:
1298	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1299	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1300	break;
1301    case RepeatReflect:
1302	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1303	    R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1304	break;
1305    case RepeatNone:
1306	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1307	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1308	break;
1309    }
1310
1311    switch (pPict->filter) {
1312    case PictFilterNearest:
1313	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1314	break;
1315    case PictFilterBilinear:
1316	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1317	break;
1318    default:
1319	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1320    }
1321
1322    if (repeatType == RepeatNone)
1323	out_size++;
1324    BEGIN_ACCEL_RELOC(out_size, 1);
1325    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1326    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1327    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1328    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1329    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1330
1331    EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
1332
1333    if (repeatType == RepeatNone)
1334	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1335    if (info->ChipFamily == CHIP_FAMILY_R520)
1336	OUT_ACCEL_REG(R500_US_FORMAT0_0 + (unit * 4), us_format);
1337    FINISH_ACCEL();
1338
1339    if (pPict->transform != 0) {
1340	info->accel_state->is_transform[unit] = TRUE;
1341	info->accel_state->transform[unit] = pPict->transform;
1342
1343	/* setup the PVS consts */
1344	if (info->accel_state->has_tcl) {
1345	    info->accel_state->texW[unit] = 1;
1346	    info->accel_state->texH[unit] = 1;
1347	    BEGIN_ACCEL(9);
1348	    if (IS_R300_3D)
1349		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1350	    else
1351		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1352
1353	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
1354	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
1355	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
1356	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1357
1358	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
1359	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
1360	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
1361	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1362
1363	    FINISH_ACCEL();
1364	} else {
1365	    info->accel_state->texW[unit] = w;
1366	    info->accel_state->texH[unit] = h;
1367	}
1368    } else {
1369	info->accel_state->is_transform[unit] = FALSE;
1370
1371	/* setup the PVS consts */
1372	if (info->accel_state->has_tcl) {
1373	    info->accel_state->texW[unit] = 1;
1374	    info->accel_state->texH[unit] = 1;
1375
1376	    BEGIN_ACCEL(9);
1377	    if (IS_R300_3D)
1378		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1379	    else
1380		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1381
1382	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1383	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1384	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1385	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1386
1387	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1388	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1389	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1390	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1391
1392	    FINISH_ACCEL();
1393	} else {
1394	    info->accel_state->texW[unit] = w;
1395	    info->accel_state->texH[unit] = h;
1396	}
1397    }
1398
1399    return TRUE;
1400}
1401
1402#ifdef ONLY_ONCE
1403
1404static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1405			       PicturePtr pDstPicture)
1406{
1407    uint32_t tmp1;
1408    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1409    PixmapPtr pSrcPixmap, pDstPixmap;
1410    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1411    RADEONInfoPtr info = RADEONPTR(pScrn);
1412    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1413
1414    TRACE;
1415
1416    /* Check for unsupported compositing operations. */
1417    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1418	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1419
1420    if (IS_R500_3D) {
1421	max_tex_w = 4096;
1422	max_tex_h = 4096;
1423	max_dst_w = 4096;
1424	max_dst_h = 4096;
1425    } else {
1426	max_tex_w = 2048;
1427	max_tex_h = 2048;
1428	if (IS_R400_3D) {
1429	    max_dst_w = 4021;
1430	    max_dst_h = 4021;
1431	} else {
1432	    max_dst_w = 2560;
1433	    max_dst_h = 2560;
1434	}
1435    }
1436
1437    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1438
1439    if (pDstPixmap->drawable.width > max_dst_w ||
1440	pDstPixmap->drawable.height > max_dst_h) {
1441	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1442			 pDstPixmap->drawable.width,
1443			 pDstPixmap->drawable.height));
1444    }
1445
1446    if (pSrcPicture->pDrawable) {
1447	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1448
1449	if (pSrcPixmap->drawable.width > max_tex_w ||
1450	    pSrcPixmap->drawable.height > max_tex_h) {
1451	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1452			     pSrcPixmap->drawable.width,
1453			     pSrcPixmap->drawable.height));
1454	}
1455    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1456	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1457
1458    if (pMaskPicture) {
1459	PixmapPtr pMaskPixmap;
1460
1461	if (pMaskPicture->pDrawable) {
1462	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1463
1464	    if (pMaskPixmap->drawable.width > max_tex_w ||
1465		pMaskPixmap->drawable.height > max_tex_h) {
1466	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1467			       pMaskPixmap->drawable.width,
1468			       pMaskPixmap->drawable.height));
1469	    }
1470	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1471	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1472
1473	if (pMaskPicture->componentAlpha) {
1474	    /* Check if it's component alpha that relies on a source alpha and
1475	     * on the source value.  We can only get one of those into the
1476	     * single source value that we get to blend with.
1477	     */
1478	    if (RadeonBlendOp[op].src_alpha &&
1479		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1480		RADEON_SRC_BLEND_GL_ZERO) {
1481		RADEON_FALLBACK(("Component alpha not supported with source "
1482				 "alpha and source value blending.\n"));
1483	    }
1484	}
1485
1486	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1487	    return FALSE;
1488    }
1489
1490    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1491	return FALSE;
1492
1493    if (!R300GetDestFormat(pDstPicture, &tmp1))
1494	return FALSE;
1495
1496    return TRUE;
1497
1498}
1499#endif /* ONLY_ONCE */
1500
1501static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1502				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1503				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1504{
1505    ScreenPtr pScreen = pDst->drawable.pScreen;
1506    RINFO_FROM_SCREEN(pScreen);
1507    uint32_t dst_format, dst_pitch;
1508    uint32_t txenable, colorpitch;
1509    uint32_t blendcntl, output_fmt;
1510    uint32_t src_color, src_alpha;
1511    uint32_t mask_color, mask_alpha;
1512    int pixel_shift;
1513    struct radeon_exa_pixmap_priv *driver_priv;
1514    ACCEL_PREAMBLE();
1515    TRACE;
1516
1517    if (!R300GetDestFormat(pDstPicture, &dst_format))
1518	return FALSE;
1519
1520    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1521
1522    dst_pitch = exaGetPixmapPitch(pDst);
1523    colorpitch = dst_pitch >> pixel_shift;
1524
1525    if (RADEONPixmapIsColortiled(pDst))
1526	colorpitch |= R300_COLORTILE;
1527
1528    colorpitch |= dst_format;
1529
1530    CHECK_OFFSET(pDst, 0x0f, "destination");
1531
1532    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1533	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1534
1535    if (!pSrc) {
1536	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
1537	if (!pSrc)
1538	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1539    }
1540
1541    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1542	return FALSE;
1543
1544    if (pMaskPicture && !pMask) {
1545	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
1546	if (!pMask) {
1547	    if (!pSrcPicture->pDrawable)
1548		pScreen->DestroyPixmap(pSrc);
1549	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1550	}
1551    }
1552
1553    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1554			     pSrc, pMask, pDst);
1555
1556    /* have to execute switch after doing buffer sizing check as the latter flushes */
1557    RADEON_SWITCH_TO_3D();
1558
1559    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1560	return FALSE;
1561    txenable = R300_TEX_0_ENABLE;
1562
1563    if (pMask != NULL) {
1564	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1565	    return FALSE;
1566	txenable |= R300_TEX_1_ENABLE;
1567    } else {
1568	info->accel_state->is_transform[1] = FALSE;
1569    }
1570
1571    /* setup the VAP */
1572    if (info->accel_state->has_tcl) {
1573	if (pMask)
1574	    BEGIN_ACCEL(10);
1575	else
1576	    BEGIN_ACCEL(9);
1577	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
1578    } else {
1579	if (pMask)
1580	    BEGIN_ACCEL(6);
1581	else
1582	    BEGIN_ACCEL(5);
1583    }
1584
1585    /* These registers define the number, type, and location of data submitted
1586     * to the PVS unit of GA input (when PVS is disabled)
1587     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1588     * enabled.  This memory provides the imputs to the vertex shader program
1589     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1590     * directly to the GA input memory and the order is signifigant.  In
1591     * PVS_BYPASS mode the order is as follows:
1592     * Position
1593     * Point Size
1594     * Color 0-3
1595     * Textures 0-7
1596     * Fog
1597     */
1598    if (pMask) {
1599	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1600		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1601		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1602		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1603		       R300_SIGNED_0 |
1604		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1605		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1606		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1607		       R300_SIGNED_1));
1608	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1609		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1610		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1611		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1612		       R300_LAST_VEC_2 |
1613		       R300_SIGNED_2));
1614    } else
1615	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1616		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1617		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1618		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1619		       R300_SIGNED_0 |
1620		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1621		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1622		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1623		       R300_LAST_VEC_1 |
1624		       R300_SIGNED_1));
1625
1626    /* load the vertex shader
1627     * We pre-load vertex programs in RADEONInit3DEngine():
1628     * - exa
1629     * - Xv
1630     * - Xv bicubic
1631     * Here we select the offset of the vertex program we want to use
1632     */
1633    if (info->accel_state->has_tcl) {
1634	if (pMask) {
1635	    /* consts used by vertex shaders */
1636	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1637						    R300_PVS_MAX_CONST_ADDR(3)));
1638	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1639			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1640			   (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1641			   (8 << R300_PVS_LAST_INST_SHIFT)));
1642	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1643			  (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1644	} else {
1645	    /* consts used by vertex shaders */
1646	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1647						    R300_PVS_MAX_CONST_ADDR(3)));
1648	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1649			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1650			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1651			   (4 << R300_PVS_LAST_INST_SHIFT)));
1652	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1653			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1654	}
1655    }
1656
1657    /* Position and one or two sets of 2 texture coordinates */
1658    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1659    if (pMask)
1660	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1661		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1662		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1663    else
1664	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1665		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1666
1667    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1668    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1669    FINISH_ACCEL();
1670
1671    /* shader output swizzling */
1672    switch (pDstPicture->format) {
1673    case PICT_a8r8g8b8:
1674    case PICT_x8r8g8b8:
1675    default:
1676	output_fmt = (R300_OUT_FMT_C4_8 |
1677		      R300_OUT_FMT_C0_SEL_BLUE |
1678		      R300_OUT_FMT_C1_SEL_GREEN |
1679		      R300_OUT_FMT_C2_SEL_RED |
1680		      R300_OUT_FMT_C3_SEL_ALPHA);
1681	break;
1682    case PICT_a8b8g8r8:
1683    case PICT_x8b8g8r8:
1684	output_fmt = (R300_OUT_FMT_C4_8 |
1685		      R300_OUT_FMT_C0_SEL_RED |
1686		      R300_OUT_FMT_C1_SEL_GREEN |
1687		      R300_OUT_FMT_C2_SEL_BLUE |
1688		      R300_OUT_FMT_C3_SEL_ALPHA);
1689	break;
1690#ifdef PICT_TYPE_BGRA
1691    case PICT_b8g8r8a8:
1692    case PICT_b8g8r8x8:
1693	output_fmt = (R300_OUT_FMT_C4_8 |
1694		      R300_OUT_FMT_C0_SEL_ALPHA |
1695		      R300_OUT_FMT_C1_SEL_RED |
1696		      R300_OUT_FMT_C2_SEL_GREEN |
1697		      R300_OUT_FMT_C3_SEL_BLUE);
1698	break;
1699#endif
1700    case PICT_a8:
1701	output_fmt = (R300_OUT_FMT_C4_8 |
1702		      R300_OUT_FMT_C0_SEL_ALPHA);
1703	break;
1704    }
1705
1706    /* setup pixel shader */
1707    if (IS_R300_3D) {
1708	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1709	    src_color = R300_ALU_RGB_0_0;
1710	else
1711	    src_color = R300_ALU_RGB_SRC0_RGB;
1712
1713	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1714	    src_alpha = R300_ALU_ALPHA_1_0;
1715	else
1716	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1717
1718	if (pMask) {
1719	    if (pMaskPicture->componentAlpha) {
1720		if (RadeonBlendOp[op].src_alpha) {
1721		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1722			src_color = R300_ALU_RGB_1_0;
1723		    else
1724			src_color = R300_ALU_RGB_SRC0_AAA;
1725		} else
1726		    src_color = R300_ALU_RGB_SRC0_RGB;
1727		mask_color = R300_ALU_RGB_SRC1_RGB;
1728	    } else {
1729		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1730		    mask_color = R300_ALU_RGB_1_0;
1731		else
1732		    mask_color = R300_ALU_RGB_SRC1_AAA;
1733	    }
1734	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1735		mask_alpha = R300_ALU_ALPHA_1_0;
1736	    else
1737		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1738	} else {
1739	    mask_color = R300_ALU_RGB_1_0;
1740	    mask_alpha = R300_ALU_ALPHA_1_0;
1741	}
1742
1743	/* setup the rasterizer, load FS */
1744	if (pMask) {
1745	    BEGIN_ACCEL(16);
1746	    /* 4 components: 2 for tex0, 2 for tex1 */
1747	    OUT_ACCEL_REG(R300_RS_COUNT,
1748			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1749			   R300_RS_COUNT_HIRES_EN));
1750
1751	    /* R300_INST_COUNT_RS - highest RS instruction used */
1752	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1753
1754	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1755						R300_ALU_CODE_SIZE(0) |
1756						R300_TEX_CODE_OFFSET(0) |
1757						R300_TEX_CODE_SIZE(1)));
1758
1759	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1760			  (R300_ALU_START(0) |
1761			   R300_ALU_SIZE(0) |
1762			   R300_TEX_START(0) |
1763			   R300_TEX_SIZE(1) |
1764			   R300_RGBA_OUT));
1765
1766
1767	} else {
1768	    BEGIN_ACCEL(15);
1769	    /* 2 components: 2 for tex0 */
1770	    OUT_ACCEL_REG(R300_RS_COUNT,
1771			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1772			   R300_RS_COUNT_HIRES_EN));
1773
1774	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1775
1776	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1777						R300_ALU_CODE_SIZE(0) |
1778						R300_TEX_CODE_OFFSET(0) |
1779						R300_TEX_CODE_SIZE(0)));
1780
1781	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1782			  (R300_ALU_START(0) |
1783			   R300_ALU_SIZE(0) |
1784			   R300_TEX_START(0) |
1785			   R300_TEX_SIZE(0) |
1786			   R300_RGBA_OUT));
1787
1788	}
1789
1790	OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1791	OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
1792		      (R300_ALU_START(0) |
1793		       R300_ALU_SIZE(0) |
1794		       R300_TEX_START(0) |
1795		       R300_TEX_SIZE(0)));
1796	OUT_ACCEL_REG(R300_US_CODE_ADDR_1,
1797		      (R300_ALU_START(0) |
1798		       R300_ALU_SIZE(0) |
1799		       R300_TEX_START(0) |
1800		       R300_TEX_SIZE(0)));
1801	OUT_ACCEL_REG(R300_US_CODE_ADDR_2,
1802		      (R300_ALU_START(0) |
1803		       R300_ALU_SIZE(0) |
1804		       R300_TEX_START(0) |
1805		       R300_TEX_SIZE(0)));
1806
1807	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1808	/* shader output swizzling */
1809	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1810
1811	/* tex inst for src texture */
1812	OUT_ACCEL_REG(R300_US_TEX_INST(0),
1813		      (R300_TEX_SRC_ADDR(0) |
1814		       R300_TEX_DST_ADDR(0) |
1815		       R300_TEX_ID(0) |
1816		       R300_TEX_INST(R300_TEX_INST_LD)));
1817
1818	if (pMask) {
1819	    /* tex inst for mask texture */
1820	    OUT_ACCEL_REG(R300_US_TEX_INST(1),
1821			  (R300_TEX_SRC_ADDR(1) |
1822			   R300_TEX_DST_ADDR(1) |
1823			   R300_TEX_ID(1) |
1824			   R300_TEX_INST(R300_TEX_INST_LD)));
1825	}
1826
1827	/* RGB inst
1828	 * temp addresses for texture inputs
1829	 * ALU_RGB_ADDR0 is src tex (temp 0)
1830	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1831	 * R300_ALU_RGB_OMASK - output components to write
1832	 * R300_ALU_RGB_TARGET_A - render target
1833	 */
1834	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0),
1835		      (R300_ALU_RGB_ADDR0(0) |
1836		       R300_ALU_RGB_ADDR1(1) |
1837		       R300_ALU_RGB_ADDR2(0) |
1838		       R300_ALU_RGB_ADDRD(0) |
1839		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1840					   R300_ALU_RGB_MASK_G |
1841					   R300_ALU_RGB_MASK_B)) |
1842		       R300_ALU_RGB_TARGET_A));
1843	/* RGB inst
1844	 * ALU operation
1845	 */
1846	OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0),
1847		      (R300_ALU_RGB_SEL_A(src_color) |
1848		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1849		       R300_ALU_RGB_SEL_B(mask_color) |
1850		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1851		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1852		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1853		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1854		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1855		       R300_ALU_RGB_CLAMP));
1856	/* Alpha inst
1857	 * temp addresses for texture inputs
1858	 * ALU_ALPHA_ADDR0 is src tex (0)
1859	 * ALU_ALPHA_ADDR1 is mask tex (1)
1860	 * R300_ALU_ALPHA_OMASK - output components to write
1861	 * R300_ALU_ALPHA_TARGET_A - render target
1862	 */
1863	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0),
1864		      (R300_ALU_ALPHA_ADDR0(0) |
1865		       R300_ALU_ALPHA_ADDR1(1) |
1866		       R300_ALU_ALPHA_ADDR2(0) |
1867		       R300_ALU_ALPHA_ADDRD(0) |
1868		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1869		       R300_ALU_ALPHA_TARGET_A |
1870		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1871	/* Alpha inst
1872	 * ALU operation
1873	 */
1874	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0),
1875		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1876		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1877		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1878		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1879		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1880		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1881		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1882		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1883		       R300_ALU_ALPHA_CLAMP));
1884	FINISH_ACCEL();
1885    } else {
1886	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1887	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1888			 R500_ALU_RGB_G_SWIZ_A_0 |
1889			 R500_ALU_RGB_B_SWIZ_A_0);
1890	else
1891	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1892			 R500_ALU_RGB_G_SWIZ_A_G |
1893			 R500_ALU_RGB_B_SWIZ_A_B);
1894
1895	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1896	    src_alpha = R500_ALPHA_SWIZ_A_1;
1897	else
1898	    src_alpha = R500_ALPHA_SWIZ_A_A;
1899
1900	if (pMask) {
1901	    if (pMaskPicture->componentAlpha) {
1902		if (RadeonBlendOp[op].src_alpha) {
1903		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1904			src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1905				     R500_ALU_RGB_G_SWIZ_A_1 |
1906				     R500_ALU_RGB_B_SWIZ_A_1);
1907		    else
1908			src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1909				     R500_ALU_RGB_G_SWIZ_A_A |
1910				     R500_ALU_RGB_B_SWIZ_A_A);
1911		} else
1912		    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1913				 R500_ALU_RGB_G_SWIZ_A_G |
1914				 R500_ALU_RGB_B_SWIZ_A_B);
1915
1916		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1917			      R500_ALU_RGB_G_SWIZ_B_G |
1918			      R500_ALU_RGB_B_SWIZ_B_B);
1919	    } else {
1920		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1921		    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1922				  R500_ALU_RGB_G_SWIZ_B_1 |
1923				  R500_ALU_RGB_B_SWIZ_B_1);
1924		else
1925		    mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1926				  R500_ALU_RGB_G_SWIZ_B_A |
1927				  R500_ALU_RGB_B_SWIZ_B_A);
1928	    }
1929	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1930		mask_alpha = R500_ALPHA_SWIZ_B_1;
1931	    else
1932		mask_alpha = R500_ALPHA_SWIZ_B_A;
1933	} else {
1934	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1935			  R500_ALU_RGB_G_SWIZ_B_1 |
1936			  R500_ALU_RGB_B_SWIZ_B_1);
1937	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1938	}
1939
1940	BEGIN_ACCEL(7);
1941	if (pMask) {
1942	    /* 4 components: 2 for tex0, 2 for tex1 */
1943	    OUT_ACCEL_REG(R300_RS_COUNT,
1944			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1945			   R300_RS_COUNT_HIRES_EN));
1946
1947	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1948	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1949
1950	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1951					      R500_US_CODE_END_ADDR(2)));
1952	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1953					       R500_US_CODE_RANGE_SIZE(2)));
1954	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1955	} else {
1956	    OUT_ACCEL_REG(R300_RS_COUNT,
1957			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1958			   R300_RS_COUNT_HIRES_EN));
1959
1960	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1961
1962	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1963					      R500_US_CODE_END_ADDR(1)));
1964	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1965					       R500_US_CODE_RANGE_SIZE(1)));
1966	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1967	}
1968
1969	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1970	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1971	FINISH_ACCEL();
1972
1973	if (pMask) {
1974	    BEGIN_ACCEL(19);
1975	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1976	    /* tex inst for src texture */
1977	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1978						   R500_INST_RGB_WMASK_R |
1979						   R500_INST_RGB_WMASK_G |
1980						   R500_INST_RGB_WMASK_B |
1981						   R500_INST_ALPHA_WMASK |
1982						   R500_INST_RGB_CLAMP |
1983						   R500_INST_ALPHA_CLAMP));
1984
1985	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1986						   R500_TEX_INST_LD |
1987						   R500_TEX_IGNORE_UNCOVERED));
1988
1989	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1990						   R500_TEX_SRC_S_SWIZ_R |
1991						   R500_TEX_SRC_T_SWIZ_G |
1992						   R500_TEX_DST_ADDR(0) |
1993						   R500_TEX_DST_R_SWIZ_R |
1994						   R500_TEX_DST_G_SWIZ_G |
1995						   R500_TEX_DST_B_SWIZ_B |
1996						   R500_TEX_DST_A_SWIZ_A));
1997	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1998						   R500_DX_S_SWIZ_R |
1999						   R500_DX_T_SWIZ_R |
2000						   R500_DX_R_SWIZ_R |
2001						   R500_DX_Q_SWIZ_R |
2002						   R500_DY_ADDR(0) |
2003						   R500_DY_S_SWIZ_R |
2004						   R500_DY_T_SWIZ_R |
2005						   R500_DY_R_SWIZ_R |
2006						   R500_DY_Q_SWIZ_R));
2007	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2008	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2009
2010	    /* tex inst for mask texture */
2011	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2012						   R500_INST_TEX_SEM_WAIT |
2013						   R500_INST_RGB_WMASK_R |
2014						   R500_INST_RGB_WMASK_G |
2015						   R500_INST_RGB_WMASK_B |
2016						   R500_INST_ALPHA_WMASK |
2017						   R500_INST_RGB_CLAMP |
2018						   R500_INST_ALPHA_CLAMP));
2019
2020	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
2021						   R500_TEX_INST_LD |
2022						   R500_TEX_SEM_ACQUIRE |
2023						   R500_TEX_IGNORE_UNCOVERED));
2024
2025	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
2026						   R500_TEX_SRC_S_SWIZ_R |
2027						   R500_TEX_SRC_T_SWIZ_G |
2028						   R500_TEX_DST_ADDR(1) |
2029						   R500_TEX_DST_R_SWIZ_R |
2030						   R500_TEX_DST_G_SWIZ_G |
2031						   R500_TEX_DST_B_SWIZ_B |
2032						   R500_TEX_DST_A_SWIZ_A));
2033	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
2034						   R500_DX_S_SWIZ_R |
2035						   R500_DX_T_SWIZ_R |
2036						   R500_DX_R_SWIZ_R |
2037						   R500_DX_Q_SWIZ_R |
2038						   R500_DY_ADDR(1) |
2039						   R500_DY_S_SWIZ_R |
2040						   R500_DY_T_SWIZ_R |
2041						   R500_DY_R_SWIZ_R |
2042						   R500_DY_Q_SWIZ_R));
2043	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2044	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2045	} else {
2046	    BEGIN_ACCEL(13);
2047	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
2048	    /* tex inst for src texture */
2049	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2050						   R500_INST_TEX_SEM_WAIT |
2051						   R500_INST_RGB_WMASK_R |
2052						   R500_INST_RGB_WMASK_G |
2053						   R500_INST_RGB_WMASK_B |
2054						   R500_INST_ALPHA_WMASK |
2055						   R500_INST_RGB_CLAMP |
2056						   R500_INST_ALPHA_CLAMP));
2057
2058	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
2059						   R500_TEX_INST_LD |
2060						   R500_TEX_SEM_ACQUIRE |
2061						   R500_TEX_IGNORE_UNCOVERED));
2062
2063	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
2064						   R500_TEX_SRC_S_SWIZ_R |
2065						   R500_TEX_SRC_T_SWIZ_G |
2066						   R500_TEX_DST_ADDR(0) |
2067						   R500_TEX_DST_R_SWIZ_R |
2068						   R500_TEX_DST_G_SWIZ_G |
2069						   R500_TEX_DST_B_SWIZ_B |
2070						   R500_TEX_DST_A_SWIZ_A));
2071	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
2072						   R500_DX_S_SWIZ_R |
2073						   R500_DX_T_SWIZ_R |
2074						   R500_DX_R_SWIZ_R |
2075						   R500_DX_Q_SWIZ_R |
2076						   R500_DY_ADDR(0) |
2077						   R500_DY_S_SWIZ_R |
2078						   R500_DY_T_SWIZ_R |
2079						   R500_DY_R_SWIZ_R |
2080						   R500_DY_Q_SWIZ_R));
2081	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2082	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2083	}
2084
2085	/* ALU inst */
2086	/* *_OMASK* - output component write mask */
2087	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
2088					       R500_INST_TEX_SEM_WAIT |
2089					       R500_INST_LAST |
2090					       R500_INST_RGB_OMASK_R |
2091					       R500_INST_RGB_OMASK_G |
2092					       R500_INST_RGB_OMASK_B |
2093					       R500_INST_ALPHA_OMASK |
2094					       R500_INST_RGB_CLAMP |
2095					       R500_INST_ALPHA_CLAMP));
2096	/* ALU inst
2097	 * temp addresses for texture inputs
2098	 * RGB_ADDR0 is src tex (temp 0)
2099	 * RGB_ADDR1 is mask tex (temp 1)
2100	 */
2101	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2102					       R500_RGB_ADDR1(1) |
2103					       R500_RGB_ADDR2(0)));
2104	/* ALU inst
2105	 * temp addresses for texture inputs
2106	 * ALPHA_ADDR0 is src tex (temp 0)
2107	 * ALPHA_ADDR1 is mask tex (temp 1)
2108	 */
2109	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2110					       R500_ALPHA_ADDR1(1) |
2111					       R500_ALPHA_ADDR2(0)));
2112
2113	/* R500_ALU_RGB_TARGET - RGB render target */
2114	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2115					       src_color |
2116					       R500_ALU_RGB_SEL_B_SRC1 |
2117					       mask_color |
2118					       R500_ALU_RGB_TARGET(0)));
2119
2120	/* R500_ALPHA_RGB_TARGET - alpha render target */
2121	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
2122					       R500_ALPHA_ADDRD(0) |
2123					       R500_ALPHA_SEL_A_SRC0 |
2124					       src_alpha |
2125					       R500_ALPHA_SEL_B_SRC1 |
2126					       mask_alpha |
2127					       R500_ALPHA_TARGET(0)));
2128
2129	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
2130					       R500_ALU_RGBA_ADDRD(0) |
2131					       R500_ALU_RGBA_R_SWIZ_0 |
2132					       R500_ALU_RGBA_G_SWIZ_0 |
2133					       R500_ALU_RGBA_B_SWIZ_0 |
2134					       R500_ALU_RGBA_A_SWIZ_0));
2135	FINISH_ACCEL();
2136    }
2137
2138    /* Clear out scissoring */
2139    BEGIN_ACCEL(2);
2140    if (IS_R300_3D) {
2141	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
2142					 (1440 << R300_SCISSOR_Y_SHIFT)));
2143	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2144					 ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
2145
2146    } else {
2147	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
2148					 (0 << R300_SCISSOR_Y_SHIFT)));
2149	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) |
2150					 ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT)));
2151    }
2152    FINISH_ACCEL();
2153
2154
2155    BEGIN_ACCEL_RELOC(3, 2);
2156    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
2157    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst);
2158
2159    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
2160    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
2161
2162    FINISH_ACCEL();
2163
2164    BEGIN_ACCEL(1);
2165    if (pMask)
2166	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6);
2167    else
2168	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4);
2169    FINISH_ACCEL();
2170
2171    return TRUE;
2172}
2173
2174static void FUNC_NAME(RadeonFinishComposite)(PixmapPtr pDst)
2175{
2176    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2177    ACCEL_PREAMBLE();
2178
2179    ENTER_DRAW(0);
2180
2181    if (info->accel_state->draw_header) {
2182	if (info->ChipFamily < CHIP_FAMILY_R200) {
2183	    info->accel_state->draw_header[0] = CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2184							   info->accel_state->num_vtx *
2185							   info->accel_state->vtx_count + 1);
2186	    info->accel_state->draw_header[2] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2187						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2188						 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2189						 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2190						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2191	} else if (IS_R300_3D || IS_R500_3D) {
2192	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2193							   info->accel_state->num_vtx *
2194							   info->accel_state->vtx_count);
2195	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2196						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2197						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2198	} else {
2199	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2200							   info->accel_state->num_vtx *
2201							   info->accel_state->vtx_count);
2202	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2203						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2204						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2205	}
2206	info->accel_state->draw_header = NULL;
2207    }
2208
2209    if (IS_R300_3D || IS_R500_3D) {
2210	BEGIN_ACCEL(3);
2211	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
2212	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2213    } else
2214	BEGIN_ACCEL(1);
2215    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2216    FINISH_ACCEL();
2217
2218    LEAVE_DRAW(0);
2219}
2220
2221static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2222{
2223    ScreenPtr pScreen = pDst->drawable.pScreen;
2224    RINFO_FROM_SCREEN(pScreen);
2225    struct radeon_accel_state *accel_state = info->accel_state;
2226
2227    FUNC_NAME(RadeonFinishComposite)(pDst);
2228
2229    if (!accel_state->src_pic->pDrawable)
2230	pScreen->DestroyPixmap(accel_state->src_pix);
2231
2232    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
2233	pScreen->DestroyPixmap(accel_state->msk_pix);
2234}
2235
2236#ifdef ACCEL_CP
2237
2238#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2239do {								\
2240    OUT_RING_F(_dstX);						\
2241    OUT_RING_F(_dstY);						\
2242    OUT_RING_F(_srcX);						\
2243    OUT_RING_F(_srcY);						\
2244    OUT_RING_F(_maskX);						\
2245    OUT_RING_F(_maskY);						\
2246} while (0)
2247
2248#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2249do {								\
2250    OUT_RING_F(_dstX);						\
2251    OUT_RING_F(_dstY);						\
2252    OUT_RING_F(_srcX);						\
2253    OUT_RING_F(_srcY);						\
2254} while (0)
2255
2256#else /* ACCEL_CP */
2257
2258#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2259do {								\
2260    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2261    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2262    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2263    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2264    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
2265    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
2266} while (0)
2267
2268#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2269do {								\
2270    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2271    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2272    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2273    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2274} while (0)
2275
2276#endif /* !ACCEL_CP */
2277
2278#ifdef ONLY_ONCE
2279static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2280{
2281    PictVector v;
2282    v.vector[0] = point->x;
2283    v.vector[1] = point->y;
2284    v.vector[2] = xFixed1;
2285    PictureTransformPoint(transform, &v);
2286    point->x = v.vector[0];
2287    point->y = v.vector[1];
2288}
2289#endif
2290
2291static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
2292					   RADEONInfoPtr info,
2293					   PixmapPtr pDst,
2294					   int srcX, int srcY,
2295					   int maskX, int maskY,
2296					   int dstX, int dstY,
2297					   int w, int h)
2298{
2299    int vtx_count;
2300    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2301    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2302    ACCEL_PREAMBLE();
2303
2304    ENTER_DRAW(0);
2305
2306    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2307       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2308
2309#if defined(ACCEL_CP)
2310    if ((info->cs && CS_FULL(info->cs)) ||
2311	(!info->cs && (info->cp->indirectBuffer->used + 4 * 32) >
2312	 info->cp->indirectBuffer->total)) {
2313	FUNC_NAME(RadeonFinishComposite)(info->accel_state->dst_pix);
2314	if (info->cs)
2315	    radeon_cs_flush_indirect(pScrn);
2316	else
2317	    RADEONCPFlushIndirect(pScrn, 1);
2318	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
2319						 info->accel_state->src_pic,
2320						 info->accel_state->msk_pic,
2321						 info->accel_state->dst_pic,
2322						 info->accel_state->src_pix,
2323						 info->accel_state->msk_pix,
2324						 info->accel_state->dst_pix);
2325    }
2326#endif
2327
2328    srcTopLeft.x     = IntToxFixed(srcX);
2329    srcTopLeft.y     = IntToxFixed(srcY);
2330    srcTopRight.x    = IntToxFixed(srcX + w);
2331    srcTopRight.y    = IntToxFixed(srcY);
2332    srcBottomLeft.x  = IntToxFixed(srcX);
2333    srcBottomLeft.y  = IntToxFixed(srcY + h);
2334    srcBottomRight.x = IntToxFixed(srcX + w);
2335    srcBottomRight.y = IntToxFixed(srcY + h);
2336
2337    if (info->accel_state->is_transform[0]) {
2338	if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2339	    transformPoint(info->accel_state->transform[0], &srcTopLeft);
2340	    transformPoint(info->accel_state->transform[0], &srcTopRight);
2341	    transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2342	    transformPoint(info->accel_state->transform[0], &srcBottomRight);
2343	}
2344    }
2345
2346    if (info->accel_state->msk_pic) {
2347	maskTopLeft.x     = IntToxFixed(maskX);
2348	maskTopLeft.y     = IntToxFixed(maskY);
2349	maskTopRight.x    = IntToxFixed(maskX + w);
2350	maskTopRight.y    = IntToxFixed(maskY);
2351	maskBottomLeft.x  = IntToxFixed(maskX);
2352	maskBottomLeft.y  = IntToxFixed(maskY + h);
2353	maskBottomRight.x = IntToxFixed(maskX + w);
2354	maskBottomRight.y = IntToxFixed(maskY + h);
2355
2356	if (info->accel_state->is_transform[1]) {
2357	    if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2358		transformPoint(info->accel_state->transform[1], &maskTopLeft);
2359		transformPoint(info->accel_state->transform[1], &maskTopRight);
2360		transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2361		transformPoint(info->accel_state->transform[1], &maskBottomRight);
2362	    }
2363	}
2364
2365	vtx_count = 6;
2366    } else
2367	vtx_count = 4;
2368
2369    if (info->accel_state->vsync)
2370	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
2371				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
2372				      dstY, dstY + h);
2373
2374#ifdef ACCEL_CP
2375    if (info->ChipFamily < CHIP_FAMILY_R200) {
2376	if (!info->accel_state->draw_header) {
2377	    BEGIN_RING(3);
2378
2379#ifdef XF86DRM_MODE
2380	    if (info->cs)
2381		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2382	    else
2383#endif
2384		info->accel_state->draw_header = __head;
2385	    info->accel_state->num_vtx = 0;
2386	    info->accel_state->vtx_count = vtx_count;
2387
2388	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2389				3 * vtx_count + 1));
2390	    if (info->accel_state->msk_pic)
2391		OUT_RING(RADEON_CP_VC_FRMT_XY |
2392			 RADEON_CP_VC_FRMT_ST0 |
2393			 RADEON_CP_VC_FRMT_ST1);
2394	    else
2395		OUT_RING(RADEON_CP_VC_FRMT_XY |
2396			 RADEON_CP_VC_FRMT_ST0);
2397	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2398		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2399		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
2400		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2401		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2402	    ADVANCE_RING();
2403	}
2404
2405	info->accel_state->num_vtx += 3;
2406	BEGIN_RING(3 * vtx_count);
2407    } else if (IS_R300_3D || IS_R500_3D) {
2408	if (!info->accel_state->draw_header) {
2409	    BEGIN_RING(2);
2410
2411#ifdef XF86DRM_MODE
2412	    if (info->cs)
2413		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2414	    else
2415#endif
2416		info->accel_state->draw_header = __head;
2417	    info->accel_state->num_vtx = 0;
2418	    info->accel_state->vtx_count = vtx_count;
2419
2420	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2421				4 * vtx_count));
2422	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2423		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2424		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2425	    ADVANCE_RING();
2426	}
2427
2428	info->accel_state->num_vtx += 4;
2429	BEGIN_RING(4 * vtx_count);
2430    } else {
2431	if (!info->accel_state->draw_header) {
2432	    BEGIN_RING(2);
2433
2434#ifdef XF86DRM_MODE
2435	    if (info->cs)
2436		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2437	    else
2438#endif
2439		info->accel_state->draw_header = __head;
2440	    info->accel_state->num_vtx = 0;
2441	    info->accel_state->vtx_count = vtx_count;
2442
2443	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2444				3 * vtx_count));
2445	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2446		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2447		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2448	    ADVANCE_RING();
2449	}
2450
2451	info->accel_state->num_vtx += 3;
2452	BEGIN_RING(3 * vtx_count);
2453    }
2454
2455#else /* ACCEL_CP */
2456    if (IS_R300_3D || IS_R500_3D)
2457	BEGIN_ACCEL(2 + vtx_count * 4);
2458    else
2459	BEGIN_ACCEL(1 + vtx_count * 3);
2460
2461    if (info->ChipFamily < CHIP_FAMILY_R200)
2462	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2463					  RADEON_VF_PRIM_WALK_DATA |
2464					  RADEON_VF_RADEON_MODE |
2465					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2466    else if (IS_R300_3D || IS_R500_3D)
2467	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2468					  RADEON_VF_PRIM_WALK_DATA |
2469					  (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
2470    else
2471	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2472					  RADEON_VF_PRIM_WALK_DATA |
2473					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2474
2475#endif
2476
2477    if (info->accel_state->msk_pic) {
2478	if (IS_R300_3D || IS_R500_3D) {
2479	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2480			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2481			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2482	}
2483	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2484		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2485		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2486	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2487		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2488		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2489	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2490		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2491		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2492    } else {
2493	if (IS_R300_3D || IS_R500_3D) {
2494	    VTX_OUT((float)dstX,                                      (float)dstY,
2495		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2496	}
2497	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2498		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2499	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2500		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2501	VTX_OUT((float)(dstX + w),                                (float)dstY,
2502		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2503    }
2504
2505#ifdef ACCEL_CP
2506    ADVANCE_RING();
2507#else
2508    FINISH_ACCEL();
2509#endif /* !ACCEL_CP */
2510
2511    LEAVE_DRAW(0);
2512}
2513#undef VTX_OUT
2514#undef VTX_OUT_MASK
2515
2516static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2517				       int srcX, int srcY,
2518				       int maskX, int maskY,
2519				       int dstX, int dstY,
2520				       int width, int height)
2521{
2522    int tileSrcY, tileMaskY, tileDstY;
2523    int remainingHeight;
2524    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2525
2526    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2527	FUNC_NAME(RadeonCompositeTile)(pScrn,
2528				       info,
2529				       pDst,
2530				       srcX, srcY,
2531				       maskX, maskY,
2532				       dstX, dstY,
2533				       width, height);
2534	return;
2535    }
2536
2537    /* Tiling logic borrowed from exaFillRegionTiled */
2538
2539    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2540    tileMaskY = maskY;
2541    tileDstY = dstY;
2542
2543    remainingHeight = height;
2544    while (remainingHeight > 0) {
2545	int remainingWidth = width;
2546	int tileSrcX, tileMaskX, tileDstX;
2547	int h = info->accel_state->src_tile_height - tileSrcY;
2548
2549	if (h > remainingHeight)
2550	    h = remainingHeight;
2551	remainingHeight -= h;
2552
2553	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2554	tileMaskX = maskX;
2555	tileDstX = dstX;
2556
2557	while (remainingWidth > 0) {
2558	    int w = info->accel_state->src_tile_width - tileSrcX;
2559	    if (w > remainingWidth)
2560		w = remainingWidth;
2561	    remainingWidth -= w;
2562
2563	    FUNC_NAME(RadeonCompositeTile)(pScrn,
2564					   info,
2565					   pDst,
2566					   tileSrcX, tileSrcY,
2567					   tileMaskX, tileMaskY,
2568					   tileDstX, tileDstY,
2569					   w, h);
2570
2571	    tileSrcX = 0;
2572	    tileMaskX += w;
2573	    tileDstX += w;
2574	}
2575	tileSrcY = 0;
2576	tileMaskY += h;
2577	tileDstY += h;
2578    }
2579}
2580
2581#undef ONLY_ONCE
2582#undef FUNC_NAME
2583