radeon_exa_render.c revision 0974d292
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59
60struct blendinfo {
61    Bool dst_alpha;
62    Bool src_alpha;
63    uint32_t blend_cntl;
64};
65
66static struct blendinfo RadeonBlendOp[] = {
67    /* Clear */
68    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
69    /* Src */
70    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
71    /* Dst */
72    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
73    /* Over */
74    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
75    /* OverReverse */
76    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
77    /* In */
78    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
79    /* InReverse */
80    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
81    /* Out */
82    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
83    /* OutReverse */
84    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
85    /* Atop */
86    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
87    /* AtopReverse */
88    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
91    /* Add */
92    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
93};
94
95struct formatinfo {
96    int fmt;
97    uint32_t card_fmt;
98};
99
100/* Note on texture formats:
101 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
102 */
103static struct formatinfo R100TexFormats[] = {
104	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
105	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
106	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
107	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
108	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
109	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
110};
111
112static struct formatinfo R200TexFormats[] = {
113    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
114    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
115    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
116    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
117    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
118    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
119    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
120    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
121};
122
123static struct formatinfo R300TexFormats[] = {
124    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
125    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
126    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
127    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
128#ifdef PICT_TYPE_BGRA
129    {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
130    {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
131#endif
132    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
133    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
134    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
135    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
136};
137
138/* Common Radeon setup code */
139
140static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
141{
142    switch (pDstPicture->format) {
143    case PICT_a8r8g8b8:
144    case PICT_x8r8g8b8:
145	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
146	break;
147    case PICT_r5g6b5:
148	*dst_format = RADEON_COLOR_FORMAT_RGB565;
149	break;
150    case PICT_a1r5g5b5:
151    case PICT_x1r5g5b5:
152	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
153	break;
154    case PICT_a8:
155	*dst_format = RADEON_COLOR_FORMAT_RGB8;
156	break;
157    default:
158	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
159			(int)pDstPicture->format));
160    }
161
162    return TRUE;
163}
164
165static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
166{
167    switch (pDstPicture->format) {
168    case PICT_a8r8g8b8:
169    case PICT_x8r8g8b8:
170    case PICT_a8b8g8r8:
171    case PICT_x8b8g8r8:
172#ifdef PICT_TYPE_BGRA
173    case PICT_b8g8r8a8:
174    case PICT_b8g8r8x8:
175#endif
176	*dst_format = R300_COLORFORMAT_ARGB8888;
177	break;
178    case PICT_r5g6b5:
179	*dst_format = R300_COLORFORMAT_RGB565;
180	break;
181    case PICT_a1r5g5b5:
182    case PICT_x1r5g5b5:
183	*dst_format = R300_COLORFORMAT_ARGB1555;
184	break;
185    case PICT_a8:
186	*dst_format = R300_COLORFORMAT_I8;
187	break;
188    default:
189	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
190	       (int)pDstPicture->format));
191    }
192    return TRUE;
193}
194
195static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
196{
197    uint32_t sblend, dblend;
198
199    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
200    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
201
202    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
203     * it as always 1.
204     */
205    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
206	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
207	    sblend = RADEON_SRC_BLEND_GL_ONE;
208	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
209	    sblend = RADEON_SRC_BLEND_GL_ZERO;
210    }
211
212    /* If the source alpha is being used, then we should only be in a case where
213     * the source blend factor is 0, and the source blend value is the mask
214     * channels multiplied by the source picture's alpha.
215     */
216    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
217	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
218	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
219	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
220	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
221	}
222    }
223
224    return sblend | dblend;
225}
226
227union intfloat {
228    float f;
229    uint32_t i;
230};
231
232/* Check if we need a software-fallback because of a repeating
233 *   non-power-of-two texture.
234 *
235 * canTile: whether we can emulate a repeat by drawing in tiles:
236 *   possible for the source, but not for the mask. (Actually
237 *   we could do tiling for the mask too, but dealing with the
238 *   combination of a tiled mask and a tiled source would be
239 *   a lot of complexity, so we handle only the most common
240 *   case of a repeating mask.)
241 */
242static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
243{
244    int w = pPict->pDrawable->width;
245    int h = pPict->pDrawable->height;
246    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
247
248    if ((repeatType == RepeatNormal || repeatType == RepeatReflect) &&
249	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
250	!(repeatType == RepeatNormal && !pPict->transform && canTile))
251	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
252			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
253
254    return TRUE;
255}
256
257/* Determine if the pitch of the pixmap meets the criteria for being
258 * used as a repeating texture: no padding or only a single line texture.
259 */
260static Bool RADEONPitchMatches(PixmapPtr pPix)
261{
262    int w = pPix->drawable.width;
263    int h = pPix->drawable.height;
264    uint32_t txpitch = exaGetPixmapPitch(pPix);
265
266    if (h > 1 && (RADEON_ALIGN(w * pPix->drawable.bitsPerPixel / 8, 32)) != txpitch)
267	return FALSE;
268
269    return TRUE;
270}
271
272/* We can't turn on repeats normally for a non-power-of-two dimension,
273 * but if the source isn't transformed, we can get the same effect
274 * by drawing the image in multiple tiles. (A common case that it's
275 * important to get right is drawing a strip of a NPOTxPOT texture
276 * repeating in the POT direction. With tiling, this ends up as a
277 * a single tile on R300 and newer, which is perfect.)
278 *
279 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
280 *   one direction and POT in the other in the POT direction; on
281 *   older chips we can only repeat at all if the texture is POT in
282 *   both directions.
283 *
284 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
285 *   there is no padding in the texture. Textures with small POT widths
286 *   (1,2,4,8) thus can't be tiled.
287 */
288static Bool RADEONSetupSourceTile(PicturePtr pPict,
289				  PixmapPtr pPix,
290				  Bool canTile1d,
291				  Bool needMatchingPitch)
292{
293    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
294    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
295
296    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
297    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
298
299    if (repeatType == RepeatNormal || repeatType == RepeatReflect) {
300	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
301
302	int w = pPict->pDrawable->width;
303	int h = pPict->pDrawable->height;
304
305	if (pPict->transform) {
306	    if (badPitch)
307		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
308				 w, (unsigned)exaGetPixmapPitch(pPix)));
309	} else {
310	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
311	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
312
313	    if ((info->accel_state->need_src_tile_x ||
314		 info->accel_state->need_src_tile_y) &&
315		repeatType != RepeatNormal)
316		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
317
318	    if (!canTile1d)
319		info->accel_state->need_src_tile_x =
320		    info->accel_state->need_src_tile_y =
321		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
322	}
323
324	if (info->accel_state->need_src_tile_x)
325	    info->accel_state->src_tile_width = w;
326	if (info->accel_state->need_src_tile_y)
327	    info->accel_state->src_tile_height = h;
328    }
329
330    return TRUE;
331}
332
333/* R100-specific code */
334
335static Bool R100CheckCompositeTexture(PicturePtr pPict,
336				      PicturePtr pDstPict,
337				      int op,
338				      int unit)
339{
340    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
341    int w = pPict->pDrawable->width;
342    int h = pPict->pDrawable->height;
343    int i;
344
345    /* r100 limit should be 2048, there are issues with 2048
346     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
347     */
348
349    if ((w > 2047) || (h > 2047))
350	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
351
352    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
353	if (R100TexFormats[i].fmt == pPict->format)
354	    break;
355    }
356    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
357	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
358			(int)pPict->format));
359
360    if (!RADEONCheckTexturePOT(pPict, unit == 0))
361	return FALSE;
362
363    if (pPict->filter != PictFilterNearest &&
364	pPict->filter != PictFilterBilinear)
365    {
366	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
367    }
368
369    /* for REPEAT_NONE, Render semantics are that sampling outside the source
370     * picture results in alpha=0 pixels. We can implement this with a border color
371     * *if* our source texture has an alpha channel, otherwise we need to fall
372     * back. If we're not transformed then we hope that upper layers have clipped
373     * rendering to the bounds of the source drawable, in which case it doesn't
374     * matter. I have not, however, verified that the X server always does such
375     * clipping.
376     */
377    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
378	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
379	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
380    }
381
382    return TRUE;
383}
384
385#endif /* ONLY_ONCE */
386
387static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
388					int unit)
389{
390    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
391    uint32_t txfilter, txformat, txoffset, txpitch;
392    int w = pPict->pDrawable->width;
393    int h = pPict->pDrawable->height;
394    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
395    Bool repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
396	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
397    int i;
398    struct radeon_exa_pixmap_priv *driver_priv;
399    ACCEL_PREAMBLE();
400
401    txpitch = exaGetPixmapPitch(pPix);
402    txoffset = 0;
403
404    CHECK_OFFSET(pPix, 0x1f, "texture");
405
406    if ((txpitch & 0x1f) != 0)
407	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
408
409    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
410    {
411	if (R100TexFormats[i].fmt == pPict->format)
412	    break;
413    }
414    txformat = R100TexFormats[i].card_fmt;
415    if (RADEONPixmapIsColortiled(pPix))
416	txoffset |= RADEON_TXO_MACRO_TILE;
417
418    if (repeat) {
419	if (!RADEONPitchMatches(pPix))
420	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
421			     w, (unsigned)txpitch));
422
423	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
424	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
425    } else
426	txformat |= RADEON_TXFORMAT_NON_POWER2;
427    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
428
429    info->accel_state->texW[unit] = w;
430    info->accel_state->texH[unit] = h;
431
432    switch (pPict->filter) {
433    case PictFilterNearest:
434	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
435	break;
436    case PictFilterBilinear:
437	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
438	break;
439    default:
440	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
441    }
442
443    switch (repeatType) {
444	case RepeatNormal:
445	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
446		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
447	    else
448	        txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
449	    break;
450	case RepeatPad:
451	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
452	    break;
453	case RepeatReflect:
454	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
455	    break;
456	case RepeatNone:
457	    /* don't set an illegal clamp mode for rects */
458	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
459		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
460	    break;
461    }
462
463    BEGIN_ACCEL_RELOC(5, 1);
464    if (unit == 0) {
465	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
466	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
467	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
468	    (pPix->drawable.width - 1) |
469	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
470	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
471
472	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
473	/* emit a texture relocation */
474    } else {
475	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
476	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
477
478	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
479	    (pPix->drawable.width - 1) |
480	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
481	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
482	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
483	/* emit a texture relocation */
484    }
485    FINISH_ACCEL();
486
487    if (pPict->transform != 0) {
488	info->accel_state->is_transform[unit] = TRUE;
489	info->accel_state->transform[unit] = pPict->transform;
490    } else {
491	info->accel_state->is_transform[unit] = FALSE;
492    }
493
494    return TRUE;
495}
496
497#ifdef ONLY_ONCE
498
499
500static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
501			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
502{
503    PixmapPtr pSrcPixmap, pDstPixmap;
504    uint32_t tmp1;
505
506    /* Check for unsupported compositing operations. */
507    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
508	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
509
510    if (!pSrcPicture->pDrawable)
511	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
512
513    /* r100 limit should be 2048, there are issues with 2048
514     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
515     */
516
517    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
518
519    if (pSrcPixmap->drawable.width > 2047 ||
520	pSrcPixmap->drawable.height > 2047) {
521	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
522			 pSrcPixmap->drawable.width,
523			 pSrcPixmap->drawable.height));
524    }
525
526    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
527
528    if (pDstPixmap->drawable.width > 2047 ||
529	pDstPixmap->drawable.height > 2047) {
530	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
531			 pDstPixmap->drawable.width,
532			 pDstPixmap->drawable.height));
533    }
534
535    if (pMaskPicture) {
536	PixmapPtr pMaskPixmap;
537
538	if (!pMaskPicture->pDrawable)
539	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
540
541	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
542
543	if (pMaskPixmap->drawable.width > 2047 ||
544	    pMaskPixmap->drawable.height > 2047) {
545	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
546			     pMaskPixmap->drawable.width,
547			     pMaskPixmap->drawable.height));
548	}
549
550	if (pMaskPicture->componentAlpha) {
551	    /* Check if it's component alpha that relies on a source alpha and
552	     * on the source value.  We can only get one of those into the
553	     * single source value that we get to blend with.
554	     */
555	    if (RadeonBlendOp[op].src_alpha &&
556		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
557		RADEON_SRC_BLEND_GL_ZERO) {
558		RADEON_FALLBACK(("Component alpha not supported with source "
559				 "alpha and source value blending.\n"));
560	    }
561	}
562
563	if (!R100CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
564	    return FALSE;
565    }
566
567    if (!R100CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
568	return FALSE;
569
570    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
571	return FALSE;
572
573    return TRUE;
574}
575
576static Bool
577RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
578			    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask,
579			    PixmapPtr pDst)
580{
581    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
582
583    info->accel_state->composite_op = op;
584    info->accel_state->dst_pic = pDstPicture;
585    info->accel_state->msk_pic = pMaskPicture;
586    info->accel_state->src_pic = pSrcPicture;
587    info->accel_state->dst_pix = pDst;
588    info->accel_state->msk_pix = pMask;
589    info->accel_state->src_pix = pSrc;
590
591#ifdef XF86DRM_MODE
592    if (info->cs) {
593	int ret;
594
595	radeon_cs_space_reset_bos(info->cs);
596
597	radeon_add_pixmap(info->cs, pSrc,
598			  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
599
600	if (pMask)
601	    radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
602
603	radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
604
605	ret = radeon_cs_space_check(info->cs);
606	if (ret)
607	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
608    }
609#endif
610
611    return TRUE;
612}
613
614#endif /* ONLY_ONCE */
615
616static Bool FUNC_NAME(R100PrepareComposite)(int op,
617					    PicturePtr pSrcPicture,
618					    PicturePtr pMaskPicture,
619					    PicturePtr pDstPicture,
620					    PixmapPtr pSrc,
621					    PixmapPtr pMask,
622					    PixmapPtr pDst)
623{
624    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
625    uint32_t dst_format, dst_pitch, colorpitch;
626    uint32_t pp_cntl, blendcntl, cblend, ablend;
627    int pixel_shift;
628    struct radeon_exa_pixmap_priv *driver_priv;
629    ACCEL_PREAMBLE();
630
631    TRACE;
632
633    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
634	return FALSE;
635
636    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
637	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
638
639    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
640
641    dst_pitch = exaGetPixmapPitch(pDst);
642    colorpitch = dst_pitch >> pixel_shift;
643    if (RADEONPixmapIsColortiled(pDst))
644	colorpitch |= RADEON_COLOR_TILE_ENABLE;
645
646    CHECK_OFFSET(pDst, 0x0f, "destination");
647
648    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
649	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
650
651    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
652	return FALSE;
653
654    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
655			     pSrc, pMask, pDst);
656
657    /* switch to 3D after doing buffer space checks as the latter may flush */
658    RADEON_SWITCH_TO_3D();
659
660    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
661	return FALSE;
662    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
663
664    if (pMask != NULL) {
665	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
666	    return FALSE;
667	pp_cntl |= RADEON_TEX_1_ENABLE;
668    } else {
669	info->accel_state->is_transform[1] = FALSE;
670    }
671
672    BEGIN_ACCEL_RELOC(10, 2);
673    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
674    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
675    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
676    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
677
678    /* IN operator: Multiply src by mask components or mask alpha.
679     * BLEND_CTL_ADD is A * B + C.
680     * If a source is a8, we have to explicitly zero its color values.
681     * If the destination is a8, we have to route the alpha to red, I think.
682     * If we're doing component alpha where the source for blending is going to
683     * be the source alpha (and there's no source value used), we have to zero
684     * the source's color values.
685     */
686    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
687    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
688
689    if (pDstPicture->format == PICT_a8 ||
690	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
691    {
692	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
693    } else if (pSrcPicture->format == PICT_a8)
694	cblend |= RADEON_COLOR_ARG_A_ZERO;
695    else
696	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
697    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
698
699    if (pMask) {
700	if (pMaskPicture->componentAlpha &&
701	    pDstPicture->format != PICT_a8)
702	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
703	else
704	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
705	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
706    } else {
707	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
708	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
709    }
710
711    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
712    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
713    if (pMask)
714	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
715					  RADEON_SE_VTX_FMT_ST0 |
716					  RADEON_SE_VTX_FMT_ST1));
717    else
718	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
719					  RADEON_SE_VTX_FMT_ST0));
720    /* Op operator. */
721    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
722
723    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
724
725    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
726    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
727					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
728    FINISH_ACCEL();
729
730    return TRUE;
731}
732
733#ifdef ONLY_ONCE
734
735static Bool R200CheckCompositeTexture(PicturePtr pPict,
736				      PicturePtr pDstPict,
737				      int op,
738				      int unit)
739{
740    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
741    int w = pPict->pDrawable->width;
742    int h = pPict->pDrawable->height;
743    int i;
744
745    /* r200 limit should be 2048, there are issues with 2048
746     * see bug 19269
747     */
748
749    if ((w > 2047) || (h > 2047))
750	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
751
752    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
753    {
754	if (R200TexFormats[i].fmt == pPict->format)
755	    break;
756    }
757    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
758	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
759			 (int)pPict->format));
760
761    if (!RADEONCheckTexturePOT(pPict, unit == 0))
762	return FALSE;
763
764    if (pPict->filter != PictFilterNearest &&
765	pPict->filter != PictFilterBilinear)
766	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
767
768    /* for REPEAT_NONE, Render semantics are that sampling outside the source
769     * picture results in alpha=0 pixels. We can implement this with a border color
770     * *if* our source texture has an alpha channel, otherwise we need to fall
771     * back. If we're not transformed then we hope that upper layers have clipped
772     * rendering to the bounds of the source drawable, in which case it doesn't
773     * matter. I have not, however, verified that the X server always does such
774     * clipping.
775     */
776    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
777	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
778	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
779    }
780
781    return TRUE;
782}
783
784#endif /* ONLY_ONCE */
785
786static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
787					int unit)
788{
789    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
790    uint32_t txfilter, txformat, txoffset, txpitch;
791    int w = pPict->pDrawable->width;
792    int h = pPict->pDrawable->height;
793    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
794    Bool repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
795	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
796    int i;
797    struct radeon_exa_pixmap_priv *driver_priv;
798    ACCEL_PREAMBLE();
799
800    txpitch = exaGetPixmapPitch(pPix);
801
802    txoffset = 0;
803    CHECK_OFFSET(pPix, 0x1f, "texture");
804
805    if ((txpitch & 0x1f) != 0)
806	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
807
808    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
809    {
810	if (R200TexFormats[i].fmt == pPict->format)
811	    break;
812    }
813    txformat = R200TexFormats[i].card_fmt;
814    if (RADEONPixmapIsColortiled(pPix))
815	txoffset |= R200_TXO_MACRO_TILE;
816
817    if (repeat) {
818	if (!RADEONPitchMatches(pPix))
819	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
820			     w, (unsigned)txpitch));
821
822	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
823	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
824    } else
825	txformat |= R200_TXFORMAT_NON_POWER2;
826    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
827
828    info->accel_state->texW[unit] = w;
829    info->accel_state->texH[unit] = h;
830
831    switch (pPict->filter) {
832    case PictFilterNearest:
833	txfilter = (R200_MAG_FILTER_NEAREST |
834		    R200_MIN_FILTER_NEAREST);
835	break;
836    case PictFilterBilinear:
837	txfilter = (R200_MAG_FILTER_LINEAR |
838		    R200_MIN_FILTER_LINEAR);
839	break;
840    default:
841	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
842    }
843
844    switch (repeatType) {
845	case RepeatNormal:
846	    if (txformat & R200_TXFORMAT_NON_POWER2)
847		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
848	    else
849	        txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
850	    break;
851	case RepeatPad:
852	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
853	    break;
854	case RepeatReflect:
855	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
856	    break;
857	case RepeatNone:
858	    /* don't set an illegal clamp mode for rect textures */
859	    if (txformat & R200_TXFORMAT_NON_POWER2)
860		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
861	    break;
862    }
863
864    BEGIN_ACCEL_RELOC(6, 1);
865    if (unit == 0) {
866	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
867	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
868	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
869	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
870		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
871	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
872	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
873    } else {
874	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
875	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
876	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
877	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
878		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
879	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
880	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
881	/* emit a texture relocation */
882    }
883    FINISH_ACCEL();
884
885    if (pPict->transform != 0) {
886	info->accel_state->is_transform[unit] = TRUE;
887	info->accel_state->transform[unit] = pPict->transform;
888    } else {
889	info->accel_state->is_transform[unit] = FALSE;
890    }
891
892    return TRUE;
893}
894
895#ifdef ONLY_ONCE
896static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
897			       PicturePtr pDstPicture)
898{
899    PixmapPtr pSrcPixmap, pDstPixmap;
900    uint32_t tmp1;
901
902    TRACE;
903
904    /* Check for unsupported compositing operations. */
905    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
906	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
907
908    if (!pSrcPicture->pDrawable)
909	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
910
911    /* r200 limit should be 2048, there are issues with 2048
912     * see bug 19269
913     */
914
915    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
916
917    if (pSrcPixmap->drawable.width > 2047 ||
918	pSrcPixmap->drawable.height > 2047) {
919	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
920			 pSrcPixmap->drawable.width,
921			 pSrcPixmap->drawable.height));
922    }
923
924    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
925
926    if (pDstPixmap->drawable.width > 2047 ||
927	pDstPixmap->drawable.height > 2047) {
928	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
929			 pDstPixmap->drawable.width,
930			 pDstPixmap->drawable.height));
931    }
932
933    if (pMaskPicture) {
934	PixmapPtr pMaskPixmap;
935
936	if (!pMaskPicture->pDrawable)
937	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
938
939	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
940
941	if (pMaskPixmap->drawable.width > 2047 ||
942	    pMaskPixmap->drawable.height > 2047) {
943	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
944			     pMaskPixmap->drawable.width,
945			     pMaskPixmap->drawable.height));
946	}
947
948	if (pMaskPicture->componentAlpha) {
949	    /* Check if it's component alpha that relies on a source alpha and
950	     * on the source value.  We can only get one of those into the
951	     * single source value that we get to blend with.
952	     */
953	    if (RadeonBlendOp[op].src_alpha &&
954		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
955		RADEON_SRC_BLEND_GL_ZERO) {
956		RADEON_FALLBACK(("Component alpha not supported with source "
957				 "alpha and source value blending.\n"));
958	    }
959	}
960
961	if (!R200CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
962	    return FALSE;
963    }
964
965    if (!R200CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
966	return FALSE;
967
968    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
969	return FALSE;
970
971    return TRUE;
972}
973#endif /* ONLY_ONCE */
974
975static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
976				PicturePtr pMaskPicture, PicturePtr pDstPicture,
977				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
978{
979    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
980    uint32_t dst_format, dst_pitch;
981    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
982    int pixel_shift;
983    struct radeon_exa_pixmap_priv *driver_priv;
984    ACCEL_PREAMBLE();
985
986    TRACE;
987
988    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
989	return FALSE;
990
991    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
992	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
993
994    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
995
996    dst_pitch = exaGetPixmapPitch(pDst);
997    colorpitch = dst_pitch >> pixel_shift;
998    if (RADEONPixmapIsColortiled(pDst))
999	colorpitch |= RADEON_COLOR_TILE_ENABLE;
1000
1001    CHECK_OFFSET(pDst, 0xf, "destination");
1002
1003    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1004	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1005
1006    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
1007	return FALSE;
1008
1009    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1010			     pSrc, pMask, pDst);
1011
1012    /* switch to 3D after doing buffer space checks as it may flush */
1013    RADEON_SWITCH_TO_3D();
1014
1015    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
1016	return FALSE;
1017    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
1018
1019    if (pMask != NULL) {
1020	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
1021	    return FALSE;
1022	pp_cntl |= RADEON_TEX_1_ENABLE;
1023    } else {
1024	info->accel_state->is_transform[1] = FALSE;
1025    }
1026
1027    BEGIN_ACCEL_RELOC(12, 2);
1028
1029    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
1030    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
1031
1032    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
1033    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
1034
1035    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
1036    if (pMask)
1037	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1038		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
1039		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
1040    else
1041	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1042		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
1043
1044
1045
1046    /* IN operator: Multiply src by mask components or mask alpha.
1047     * BLEND_CTL_ADD is A * B + C.
1048     * If a picture is a8, we have to explicitly zero its color values.
1049     * If the destination is a8, we have to route the alpha to red, I think.
1050     * If we're doing component alpha where the source for blending is going to
1051     * be the source alpha (and there's no source value used), we have to zero
1052     * the source's color values.
1053     */
1054    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
1055    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
1056
1057    if (pDstPicture->format == PICT_a8 ||
1058	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
1059    {
1060	cblend |= R200_TXC_ARG_A_R0_ALPHA;
1061    } else if (pSrcPicture->format == PICT_a8)
1062	cblend |= R200_TXC_ARG_A_ZERO;
1063    else
1064	cblend |= R200_TXC_ARG_A_R0_COLOR;
1065    ablend |= R200_TXA_ARG_A_R0_ALPHA;
1066
1067    if (pMask) {
1068	if (pMaskPicture->componentAlpha &&
1069	    pDstPicture->format != PICT_a8)
1070	    cblend |= R200_TXC_ARG_B_R1_COLOR;
1071	else
1072	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
1073	ablend |= R200_TXA_ARG_B_R1_ALPHA;
1074    } else {
1075	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
1076	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
1077    }
1078
1079    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
1080    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
1081	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
1082    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
1083    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
1084	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
1085
1086    /* Op operator. */
1087    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1088    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
1089
1090    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
1091					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
1092
1093    FINISH_ACCEL();
1094
1095    return TRUE;
1096}
1097
1098#ifdef ONLY_ONCE
1099
1100static Bool R300CheckCompositeTexture(PicturePtr pPict,
1101				      PicturePtr pDstPict,
1102				      int op,
1103				      int unit,
1104				      Bool is_r500)
1105{
1106    ScreenPtr pScreen = pDstPict->pDrawable->pScreen;
1107    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1108    RADEONInfoPtr info = RADEONPTR(pScrn);
1109
1110    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1111    int w = pPict->pDrawable->width;
1112    int h = pPict->pDrawable->height;
1113    int i;
1114    int max_tex_w, max_tex_h;
1115
1116    if (is_r500) {
1117	max_tex_w = 4096;
1118	max_tex_h = 4096;
1119    } else {
1120	max_tex_w = 2048;
1121	max_tex_h = 2048;
1122    }
1123
1124    if ((w > max_tex_w) || (h > max_tex_h))
1125	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1126
1127    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1128    {
1129	if (R300TexFormats[i].fmt == pPict->format)
1130	    break;
1131    }
1132    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1133	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1134			 (int)pPict->format));
1135
1136    if (!RADEONCheckTexturePOT(pPict, unit == 0)) {
1137	if (info->cs) {
1138    		struct radeon_exa_pixmap_priv *driver_priv;
1139		PixmapPtr pPix;
1140
1141    		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
1142		driver_priv = exaGetPixmapDriverPrivate(pPix);
1143		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
1144	}
1145	return FALSE;
1146    }
1147
1148    if (pPict->filter != PictFilterNearest &&
1149	pPict->filter != PictFilterBilinear)
1150	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1151
1152    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1153     * picture results in alpha=0 pixels. We can implement this with a border color
1154     * *if* our source texture has an alpha channel, otherwise we need to fall
1155     * back. If we're not transformed then we hope that upper layers have clipped
1156     * rendering to the bounds of the source drawable, in which case it doesn't
1157     * matter. I have not, however, verified that the X server always does such
1158     * clipping.
1159     */
1160    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1161	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1162	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1163    }
1164
1165    return TRUE;
1166}
1167
1168#endif /* ONLY_ONCE */
1169
1170static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
1171					int unit)
1172{
1173    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1174    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
1175    int w = pPict->pDrawable->width;
1176    int h = pPict->pDrawable->height;
1177    int i, pixel_shift;
1178    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1179    struct radeon_exa_pixmap_priv *driver_priv;
1180    ACCEL_PREAMBLE();
1181
1182    TRACE;
1183
1184    txpitch = exaGetPixmapPitch(pPix);
1185    txoffset = 0;
1186
1187    CHECK_OFFSET(pPix, 0x1f, "texture");
1188
1189    if ((txpitch & 0x1f) != 0)
1190	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1191
1192    /* TXPITCH = pixels (texels) per line - 1 */
1193    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1194    txpitch >>= pixel_shift;
1195    txpitch -= 1;
1196
1197    if (RADEONPixmapIsColortiled(pPix))
1198	txoffset |= R300_MACRO_TILE;
1199
1200    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1201    {
1202	if (R300TexFormats[i].fmt == pPict->format)
1203	    break;
1204    }
1205
1206    txformat1 = R300TexFormats[i].card_fmt;
1207
1208    if (IS_R300_3D) {
1209	if ((unit == 0) && info->accel_state->msk_pic)
1210	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0;
1211	else if (unit == 1)
1212	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1;
1213    }
1214
1215    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1216		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1217
1218    if (IS_R500_3D && ((w - 1) & 0x800))
1219	txpitch |= R500_TXWIDTH_11;
1220
1221    if (IS_R500_3D && ((h - 1) & 0x800))
1222	txpitch |= R500_TXHEIGHT_11;
1223
1224    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1225     * omit this if there is no padding, but there is no apparent advantage
1226     * in doing so.
1227     */
1228    txformat0 |= R300_TXPITCH_EN;
1229
1230    txfilter = (unit << R300_TX_ID_SHIFT);
1231
1232    switch (repeatType) {
1233    case RepeatNormal:
1234	if (unit != 0 || !info->accel_state->need_src_tile_x)
1235	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1236	else
1237	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1238
1239	if (unit != 0 || !info->accel_state->need_src_tile_y)
1240	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1241	else
1242	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1243
1244	break;
1245    case RepeatPad:
1246	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1247	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1248	break;
1249    case RepeatReflect:
1250	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1251	    R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1252	break;
1253    case RepeatNone:
1254	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1255	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1256	break;
1257    }
1258
1259    switch (pPict->filter) {
1260    case PictFilterNearest:
1261	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1262	break;
1263    case PictFilterBilinear:
1264	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1265	break;
1266    default:
1267	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1268    }
1269
1270    BEGIN_ACCEL_RELOC(repeatType == RepeatNone ? 7 : 6, 1);
1271    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1272    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1273    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1274    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1275    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1276
1277    EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
1278
1279    if (repeatType == RepeatNone)
1280	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1281    FINISH_ACCEL();
1282
1283    if (pPict->transform != 0) {
1284	info->accel_state->is_transform[unit] = TRUE;
1285	info->accel_state->transform[unit] = pPict->transform;
1286
1287	/* setup the PVS consts */
1288	if (info->accel_state->has_tcl) {
1289	    info->accel_state->texW[unit] = 1;
1290	    info->accel_state->texH[unit] = 1;
1291	    BEGIN_ACCEL(9);
1292	    if (IS_R300_3D)
1293		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1294	    else
1295		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1296
1297	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
1298	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
1299	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
1300	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1301
1302	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
1303	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
1304	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
1305	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1306
1307	    FINISH_ACCEL();
1308	} else {
1309	    info->accel_state->texW[unit] = w;
1310	    info->accel_state->texH[unit] = h;
1311	}
1312    } else {
1313	info->accel_state->is_transform[unit] = FALSE;
1314
1315	/* setup the PVS consts */
1316	if (info->accel_state->has_tcl) {
1317	    info->accel_state->texW[unit] = 1;
1318	    info->accel_state->texH[unit] = 1;
1319
1320	    BEGIN_ACCEL(9);
1321	    if (IS_R300_3D)
1322		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1323	    else
1324		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1325
1326	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1327	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1328	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1329	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1330
1331	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1332	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1333	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1334	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1335
1336	    FINISH_ACCEL();
1337	} else {
1338	    info->accel_state->texW[unit] = w;
1339	    info->accel_state->texH[unit] = h;
1340	}
1341    }
1342
1343    return TRUE;
1344}
1345
1346#ifdef ONLY_ONCE
1347
1348static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1349			       PicturePtr pDstPicture)
1350{
1351    uint32_t tmp1;
1352    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1353    PixmapPtr pSrcPixmap, pDstPixmap;
1354    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1355    RADEONInfoPtr info = RADEONPTR(pScrn);
1356    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1357
1358    TRACE;
1359
1360    /* Check for unsupported compositing operations. */
1361    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1362	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1363
1364    if (!pSrcPicture->pDrawable)
1365	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1366
1367    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1368
1369    if (IS_R500_3D) {
1370	max_tex_w = 4096;
1371	max_tex_h = 4096;
1372	max_dst_w = 4096;
1373	max_dst_h = 4096;
1374    } else {
1375	max_tex_w = 2048;
1376	max_tex_h = 2048;
1377	if (IS_R400_3D) {
1378	    max_dst_w = 4021;
1379	    max_dst_h = 4021;
1380	} else {
1381	    max_dst_w = 2560;
1382	    max_dst_h = 2560;
1383	}
1384    }
1385
1386    if (pSrcPixmap->drawable.width > max_tex_w ||
1387	pSrcPixmap->drawable.height > max_tex_h) {
1388	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1389			 pSrcPixmap->drawable.width,
1390			 pSrcPixmap->drawable.height));
1391    }
1392
1393    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1394
1395    if (pDstPixmap->drawable.width > max_dst_w ||
1396	pDstPixmap->drawable.height > max_dst_h) {
1397	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1398			 pDstPixmap->drawable.width,
1399			 pDstPixmap->drawable.height));
1400    }
1401
1402    if (pMaskPicture) {
1403	PixmapPtr pMaskPixmap;
1404
1405	if (!pMaskPicture->pDrawable)
1406	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1407
1408	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1409
1410	if (pMaskPixmap->drawable.width > max_tex_w ||
1411	    pMaskPixmap->drawable.height > max_tex_h) {
1412	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1413			     pMaskPixmap->drawable.width,
1414			     pMaskPixmap->drawable.height));
1415	}
1416
1417	if (pMaskPicture->componentAlpha) {
1418	    /* Check if it's component alpha that relies on a source alpha and
1419	     * on the source value.  We can only get one of those into the
1420	     * single source value that we get to blend with.
1421	     */
1422	    if (RadeonBlendOp[op].src_alpha &&
1423		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1424		RADEON_SRC_BLEND_GL_ZERO) {
1425		RADEON_FALLBACK(("Component alpha not supported with source "
1426				 "alpha and source value blending.\n"));
1427	    }
1428	}
1429
1430	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1431	    return FALSE;
1432    }
1433
1434    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1435	return FALSE;
1436
1437    if (!R300GetDestFormat(pDstPicture, &tmp1))
1438	return FALSE;
1439
1440    return TRUE;
1441
1442}
1443#endif /* ONLY_ONCE */
1444
1445static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1446				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1447				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1448{
1449    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1450    uint32_t dst_format, dst_pitch;
1451    uint32_t txenable, colorpitch;
1452    uint32_t blendcntl, output_fmt;
1453    uint32_t src_color, src_alpha;
1454    uint32_t mask_color, mask_alpha;
1455    int pixel_shift;
1456    struct radeon_exa_pixmap_priv *driver_priv;
1457    ACCEL_PREAMBLE();
1458    TRACE;
1459
1460    if (!R300GetDestFormat(pDstPicture, &dst_format))
1461	return FALSE;
1462
1463    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1464
1465    dst_pitch = exaGetPixmapPitch(pDst);
1466    colorpitch = dst_pitch >> pixel_shift;
1467
1468    if (RADEONPixmapIsColortiled(pDst))
1469	colorpitch |= R300_COLORTILE;
1470
1471    colorpitch |= dst_format;
1472
1473    CHECK_OFFSET(pDst, 0x0f, "destination");
1474
1475    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1476	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1477
1478    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1479	return FALSE;
1480
1481    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1482			     pSrc, pMask, pDst);
1483
1484    /* have to execute switch after doing buffer sizing check as the latter flushes */
1485    RADEON_SWITCH_TO_3D();
1486
1487    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1488	return FALSE;
1489    txenable = R300_TEX_0_ENABLE;
1490
1491    if (pMask != NULL) {
1492	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1493	    return FALSE;
1494	txenable |= R300_TEX_1_ENABLE;
1495    } else {
1496	info->accel_state->is_transform[1] = FALSE;
1497    }
1498
1499    /* setup the VAP */
1500    if (info->accel_state->has_tcl) {
1501	if (pMask)
1502	    BEGIN_ACCEL(10);
1503	else
1504	    BEGIN_ACCEL(9);
1505	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
1506    } else {
1507	if (pMask)
1508	    BEGIN_ACCEL(6);
1509	else
1510	    BEGIN_ACCEL(5);
1511    }
1512
1513    /* These registers define the number, type, and location of data submitted
1514     * to the PVS unit of GA input (when PVS is disabled)
1515     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1516     * enabled.  This memory provides the imputs to the vertex shader program
1517     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1518     * directly to the GA input memory and the order is signifigant.  In
1519     * PVS_BYPASS mode the order is as follows:
1520     * Position
1521     * Point Size
1522     * Color 0-3
1523     * Textures 0-7
1524     * Fog
1525     */
1526    if (pMask) {
1527	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1528		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1529		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1530		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1531		       R300_SIGNED_0 |
1532		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1533		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1534		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1535		       R300_SIGNED_1));
1536	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1537		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1538		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1539		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1540		       R300_LAST_VEC_2 |
1541		       R300_SIGNED_2));
1542    } else
1543	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1544		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1545		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1546		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1547		       R300_SIGNED_0 |
1548		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1549		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1550		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1551		       R300_LAST_VEC_1 |
1552		       R300_SIGNED_1));
1553
1554    /* load the vertex shader
1555     * We pre-load vertex programs in RADEONInit3DEngine():
1556     * - exa
1557     * - Xv
1558     * - Xv bicubic
1559     * Here we select the offset of the vertex program we want to use
1560     */
1561    if (info->accel_state->has_tcl) {
1562	if (pMask) {
1563	    /* consts used by vertex shaders */
1564	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1565						    R300_PVS_MAX_CONST_ADDR(3)));
1566	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1567			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1568			   (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1569			   (8 << R300_PVS_LAST_INST_SHIFT)));
1570	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1571			  (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1572	} else {
1573	    /* consts used by vertex shaders */
1574	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1575						    R300_PVS_MAX_CONST_ADDR(3)));
1576	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1577			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1578			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1579			   (4 << R300_PVS_LAST_INST_SHIFT)));
1580	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1581			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1582	}
1583    }
1584
1585    /* Position and one or two sets of 2 texture coordinates */
1586    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1587    if (pMask)
1588	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1589		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1590		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1591    else
1592	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1593		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1594
1595    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1596    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1597    FINISH_ACCEL();
1598
1599    /* shader output swizzling */
1600    switch (pDstPicture->format) {
1601    case PICT_a8r8g8b8:
1602    case PICT_x8r8g8b8:
1603    default:
1604	output_fmt = (R300_OUT_FMT_C4_8 |
1605		      R300_OUT_FMT_C0_SEL_BLUE |
1606		      R300_OUT_FMT_C1_SEL_GREEN |
1607		      R300_OUT_FMT_C2_SEL_RED |
1608		      R300_OUT_FMT_C3_SEL_ALPHA);
1609	break;
1610    case PICT_a8b8g8r8:
1611    case PICT_x8b8g8r8:
1612	output_fmt = (R300_OUT_FMT_C4_8 |
1613		      R300_OUT_FMT_C0_SEL_RED |
1614		      R300_OUT_FMT_C1_SEL_GREEN |
1615		      R300_OUT_FMT_C2_SEL_BLUE |
1616		      R300_OUT_FMT_C3_SEL_ALPHA);
1617	break;
1618#ifdef PICT_TYPE_BGRA
1619    case PICT_b8g8r8a8:
1620    case PICT_b8g8r8x8:
1621	output_fmt = (R300_OUT_FMT_C4_8 |
1622		      R300_OUT_FMT_C0_SEL_ALPHA |
1623		      R300_OUT_FMT_C1_SEL_RED |
1624		      R300_OUT_FMT_C2_SEL_GREEN |
1625		      R300_OUT_FMT_C3_SEL_BLUE);
1626	break;
1627#endif
1628    case PICT_a8:
1629	output_fmt = (R300_OUT_FMT_C4_8 |
1630		      R300_OUT_FMT_C0_SEL_ALPHA);
1631	break;
1632    }
1633
1634    /* setup pixel shader */
1635    if (IS_R300_3D) {
1636	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1637	    src_color = R300_ALU_RGB_0_0;
1638	else
1639	    src_color = R300_ALU_RGB_SRC0_RGB;
1640
1641	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1642	    src_alpha = R300_ALU_ALPHA_1_0;
1643	else
1644	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1645
1646	if (pMask) {
1647	    if (pMaskPicture->componentAlpha) {
1648		if (RadeonBlendOp[op].src_alpha) {
1649		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1650			src_color = R300_ALU_RGB_1_0;
1651		    else
1652			src_color = R300_ALU_RGB_SRC0_AAA;
1653		} else
1654		    src_color = R300_ALU_RGB_SRC0_RGB;
1655		mask_color = R300_ALU_RGB_SRC1_RGB;
1656	    } else {
1657		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1658		    mask_color = R300_ALU_RGB_1_0;
1659		else
1660		    mask_color = R300_ALU_RGB_SRC1_AAA;
1661	    }
1662	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1663		mask_alpha = R300_ALU_ALPHA_1_0;
1664	    else
1665		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1666	} else {
1667	    mask_color = R300_ALU_RGB_1_0;
1668	    mask_alpha = R300_ALU_ALPHA_1_0;
1669	}
1670
1671	/* setup the rasterizer, load FS */
1672	if (pMask) {
1673	    BEGIN_ACCEL(16);
1674	    /* 4 components: 2 for tex0, 2 for tex1 */
1675	    OUT_ACCEL_REG(R300_RS_COUNT,
1676			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1677			   R300_RS_COUNT_HIRES_EN));
1678
1679	    /* R300_INST_COUNT_RS - highest RS instruction used */
1680	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1681
1682	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1683						R300_ALU_CODE_SIZE(0) |
1684						R300_TEX_CODE_OFFSET(0) |
1685						R300_TEX_CODE_SIZE(1)));
1686
1687	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1688			  (R300_ALU_START(0) |
1689			   R300_ALU_SIZE(0) |
1690			   R300_TEX_START(0) |
1691			   R300_TEX_SIZE(1) |
1692			   R300_RGBA_OUT));
1693
1694
1695	} else {
1696	    BEGIN_ACCEL(15);
1697	    /* 2 components: 2 for tex0 */
1698	    OUT_ACCEL_REG(R300_RS_COUNT,
1699			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1700			   R300_RS_COUNT_HIRES_EN));
1701
1702	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1703
1704	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1705						R300_ALU_CODE_SIZE(0) |
1706						R300_TEX_CODE_OFFSET(0) |
1707						R300_TEX_CODE_SIZE(0)));
1708
1709	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1710			  (R300_ALU_START(0) |
1711			   R300_ALU_SIZE(0) |
1712			   R300_TEX_START(0) |
1713			   R300_TEX_SIZE(0) |
1714			   R300_RGBA_OUT));
1715
1716	}
1717
1718	OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1719	OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
1720		      (R300_ALU_START(0) |
1721		       R300_ALU_SIZE(0) |
1722		       R300_TEX_START(0) |
1723		       R300_TEX_SIZE(0)));
1724	OUT_ACCEL_REG(R300_US_CODE_ADDR_1,
1725		      (R300_ALU_START(0) |
1726		       R300_ALU_SIZE(0) |
1727		       R300_TEX_START(0) |
1728		       R300_TEX_SIZE(0)));
1729	OUT_ACCEL_REG(R300_US_CODE_ADDR_2,
1730		      (R300_ALU_START(0) |
1731		       R300_ALU_SIZE(0) |
1732		       R300_TEX_START(0) |
1733		       R300_TEX_SIZE(0)));
1734
1735	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1736	/* shader output swizzling */
1737	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1738
1739	/* tex inst for src texture */
1740	OUT_ACCEL_REG(R300_US_TEX_INST(0),
1741		      (R300_TEX_SRC_ADDR(0) |
1742		       R300_TEX_DST_ADDR(0) |
1743		       R300_TEX_ID(0) |
1744		       R300_TEX_INST(R300_TEX_INST_LD)));
1745
1746	if (pMask) {
1747	    /* tex inst for mask texture */
1748	    OUT_ACCEL_REG(R300_US_TEX_INST(1),
1749			  (R300_TEX_SRC_ADDR(1) |
1750			   R300_TEX_DST_ADDR(1) |
1751			   R300_TEX_ID(1) |
1752			   R300_TEX_INST(R300_TEX_INST_LD)));
1753	}
1754
1755	/* RGB inst
1756	 * temp addresses for texture inputs
1757	 * ALU_RGB_ADDR0 is src tex (temp 0)
1758	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1759	 * R300_ALU_RGB_OMASK - output components to write
1760	 * R300_ALU_RGB_TARGET_A - render target
1761	 */
1762	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0),
1763		      (R300_ALU_RGB_ADDR0(0) |
1764		       R300_ALU_RGB_ADDR1(1) |
1765		       R300_ALU_RGB_ADDR2(0) |
1766		       R300_ALU_RGB_ADDRD(0) |
1767		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1768					   R300_ALU_RGB_MASK_G |
1769					   R300_ALU_RGB_MASK_B)) |
1770		       R300_ALU_RGB_TARGET_A));
1771	/* RGB inst
1772	 * ALU operation
1773	 */
1774	OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0),
1775		      (R300_ALU_RGB_SEL_A(src_color) |
1776		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1777		       R300_ALU_RGB_SEL_B(mask_color) |
1778		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1779		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1780		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1781		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1782		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1783		       R300_ALU_RGB_CLAMP));
1784	/* Alpha inst
1785	 * temp addresses for texture inputs
1786	 * ALU_ALPHA_ADDR0 is src tex (0)
1787	 * ALU_ALPHA_ADDR1 is mask tex (1)
1788	 * R300_ALU_ALPHA_OMASK - output components to write
1789	 * R300_ALU_ALPHA_TARGET_A - render target
1790	 */
1791	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0),
1792		      (R300_ALU_ALPHA_ADDR0(0) |
1793		       R300_ALU_ALPHA_ADDR1(1) |
1794		       R300_ALU_ALPHA_ADDR2(0) |
1795		       R300_ALU_ALPHA_ADDRD(0) |
1796		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1797		       R300_ALU_ALPHA_TARGET_A |
1798		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1799	/* Alpha inst
1800	 * ALU operation
1801	 */
1802	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0),
1803		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1804		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1805		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1806		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1807		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1808		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1809		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1810		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1811		       R300_ALU_ALPHA_CLAMP));
1812	FINISH_ACCEL();
1813    } else {
1814	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1815	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1816			 R500_ALU_RGB_G_SWIZ_A_0 |
1817			 R500_ALU_RGB_B_SWIZ_A_0);
1818	else
1819	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1820			 R500_ALU_RGB_G_SWIZ_A_G |
1821			 R500_ALU_RGB_B_SWIZ_A_B);
1822
1823	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1824	    src_alpha = R500_ALPHA_SWIZ_A_1;
1825	else
1826	    src_alpha = R500_ALPHA_SWIZ_A_A;
1827
1828	if (pMask) {
1829	    if (pMaskPicture->componentAlpha) {
1830		if (RadeonBlendOp[op].src_alpha) {
1831		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1832			src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1833				     R500_ALU_RGB_G_SWIZ_A_1 |
1834				     R500_ALU_RGB_B_SWIZ_A_1);
1835		    else
1836			src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1837				     R500_ALU_RGB_G_SWIZ_A_A |
1838				     R500_ALU_RGB_B_SWIZ_A_A);
1839		} else
1840		    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1841				 R500_ALU_RGB_G_SWIZ_A_G |
1842				 R500_ALU_RGB_B_SWIZ_A_B);
1843
1844		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1845			      R500_ALU_RGB_G_SWIZ_B_G |
1846			      R500_ALU_RGB_B_SWIZ_B_B);
1847	    } else {
1848		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1849		    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1850				  R500_ALU_RGB_G_SWIZ_B_1 |
1851				  R500_ALU_RGB_B_SWIZ_B_1);
1852		else
1853		    mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1854				  R500_ALU_RGB_G_SWIZ_B_A |
1855				  R500_ALU_RGB_B_SWIZ_B_A);
1856	    }
1857	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1858		mask_alpha = R500_ALPHA_SWIZ_B_1;
1859	    else
1860		mask_alpha = R500_ALPHA_SWIZ_B_A;
1861	} else {
1862	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1863			  R500_ALU_RGB_G_SWIZ_B_1 |
1864			  R500_ALU_RGB_B_SWIZ_B_1);
1865	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1866	}
1867
1868	BEGIN_ACCEL(7);
1869	if (pMask) {
1870	    /* 4 components: 2 for tex0, 2 for tex1 */
1871	    OUT_ACCEL_REG(R300_RS_COUNT,
1872			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1873			   R300_RS_COUNT_HIRES_EN));
1874
1875	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1876	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1877
1878	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1879					      R500_US_CODE_END_ADDR(2)));
1880	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1881					       R500_US_CODE_RANGE_SIZE(2)));
1882	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1883	} else {
1884	    OUT_ACCEL_REG(R300_RS_COUNT,
1885			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1886			   R300_RS_COUNT_HIRES_EN));
1887
1888	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1889
1890	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1891					      R500_US_CODE_END_ADDR(1)));
1892	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1893					       R500_US_CODE_RANGE_SIZE(1)));
1894	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1895	}
1896
1897	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1898	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1899	FINISH_ACCEL();
1900
1901	if (pMask) {
1902	    BEGIN_ACCEL(19);
1903	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1904	    /* tex inst for src texture */
1905	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1906						   R500_INST_RGB_WMASK_R |
1907						   R500_INST_RGB_WMASK_G |
1908						   R500_INST_RGB_WMASK_B |
1909						   R500_INST_ALPHA_WMASK |
1910						   R500_INST_RGB_CLAMP |
1911						   R500_INST_ALPHA_CLAMP));
1912
1913	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1914						   R500_TEX_INST_LD |
1915						   R500_TEX_IGNORE_UNCOVERED));
1916
1917	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1918						   R500_TEX_SRC_S_SWIZ_R |
1919						   R500_TEX_SRC_T_SWIZ_G |
1920						   R500_TEX_DST_ADDR(0) |
1921						   R500_TEX_DST_R_SWIZ_R |
1922						   R500_TEX_DST_G_SWIZ_G |
1923						   R500_TEX_DST_B_SWIZ_B |
1924						   R500_TEX_DST_A_SWIZ_A));
1925	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1926						   R500_DX_S_SWIZ_R |
1927						   R500_DX_T_SWIZ_R |
1928						   R500_DX_R_SWIZ_R |
1929						   R500_DX_Q_SWIZ_R |
1930						   R500_DY_ADDR(0) |
1931						   R500_DY_S_SWIZ_R |
1932						   R500_DY_T_SWIZ_R |
1933						   R500_DY_R_SWIZ_R |
1934						   R500_DY_Q_SWIZ_R));
1935	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1936	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1937
1938	    /* tex inst for mask texture */
1939	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1940						   R500_INST_TEX_SEM_WAIT |
1941						   R500_INST_RGB_WMASK_R |
1942						   R500_INST_RGB_WMASK_G |
1943						   R500_INST_RGB_WMASK_B |
1944						   R500_INST_ALPHA_WMASK |
1945						   R500_INST_RGB_CLAMP |
1946						   R500_INST_ALPHA_CLAMP));
1947
1948	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1949						   R500_TEX_INST_LD |
1950						   R500_TEX_SEM_ACQUIRE |
1951						   R500_TEX_IGNORE_UNCOVERED));
1952
1953	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1954						   R500_TEX_SRC_S_SWIZ_R |
1955						   R500_TEX_SRC_T_SWIZ_G |
1956						   R500_TEX_DST_ADDR(1) |
1957						   R500_TEX_DST_R_SWIZ_R |
1958						   R500_TEX_DST_G_SWIZ_G |
1959						   R500_TEX_DST_B_SWIZ_B |
1960						   R500_TEX_DST_A_SWIZ_A));
1961	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1962						   R500_DX_S_SWIZ_R |
1963						   R500_DX_T_SWIZ_R |
1964						   R500_DX_R_SWIZ_R |
1965						   R500_DX_Q_SWIZ_R |
1966						   R500_DY_ADDR(1) |
1967						   R500_DY_S_SWIZ_R |
1968						   R500_DY_T_SWIZ_R |
1969						   R500_DY_R_SWIZ_R |
1970						   R500_DY_Q_SWIZ_R));
1971	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1972	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1973	} else {
1974	    BEGIN_ACCEL(13);
1975	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1976	    /* tex inst for src texture */
1977	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1978						   R500_INST_TEX_SEM_WAIT |
1979						   R500_INST_RGB_WMASK_R |
1980						   R500_INST_RGB_WMASK_G |
1981						   R500_INST_RGB_WMASK_B |
1982						   R500_INST_ALPHA_WMASK |
1983						   R500_INST_RGB_CLAMP |
1984						   R500_INST_ALPHA_CLAMP));
1985
1986	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1987						   R500_TEX_INST_LD |
1988						   R500_TEX_SEM_ACQUIRE |
1989						   R500_TEX_IGNORE_UNCOVERED));
1990
1991	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1992						   R500_TEX_SRC_S_SWIZ_R |
1993						   R500_TEX_SRC_T_SWIZ_G |
1994						   R500_TEX_DST_ADDR(0) |
1995						   R500_TEX_DST_R_SWIZ_R |
1996						   R500_TEX_DST_G_SWIZ_G |
1997						   R500_TEX_DST_B_SWIZ_B |
1998						   R500_TEX_DST_A_SWIZ_A));
1999	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
2000						   R500_DX_S_SWIZ_R |
2001						   R500_DX_T_SWIZ_R |
2002						   R500_DX_R_SWIZ_R |
2003						   R500_DX_Q_SWIZ_R |
2004						   R500_DY_ADDR(0) |
2005						   R500_DY_S_SWIZ_R |
2006						   R500_DY_T_SWIZ_R |
2007						   R500_DY_R_SWIZ_R |
2008						   R500_DY_Q_SWIZ_R));
2009	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2010	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2011	}
2012
2013	/* ALU inst */
2014	/* *_OMASK* - output component write mask */
2015	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
2016					       R500_INST_TEX_SEM_WAIT |
2017					       R500_INST_LAST |
2018					       R500_INST_RGB_OMASK_R |
2019					       R500_INST_RGB_OMASK_G |
2020					       R500_INST_RGB_OMASK_B |
2021					       R500_INST_ALPHA_OMASK |
2022					       R500_INST_RGB_CLAMP |
2023					       R500_INST_ALPHA_CLAMP));
2024	/* ALU inst
2025	 * temp addresses for texture inputs
2026	 * RGB_ADDR0 is src tex (temp 0)
2027	 * RGB_ADDR1 is mask tex (temp 1)
2028	 */
2029	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2030					       R500_RGB_ADDR1(1) |
2031					       R500_RGB_ADDR2(0)));
2032	/* ALU inst
2033	 * temp addresses for texture inputs
2034	 * ALPHA_ADDR0 is src tex (temp 0)
2035	 * ALPHA_ADDR1 is mask tex (temp 1)
2036	 */
2037	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2038					       R500_ALPHA_ADDR1(1) |
2039					       R500_ALPHA_ADDR2(0)));
2040
2041	/* R500_ALU_RGB_TARGET - RGB render target */
2042	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2043					       src_color |
2044					       R500_ALU_RGB_SEL_B_SRC1 |
2045					       mask_color |
2046					       R500_ALU_RGB_TARGET(0)));
2047
2048	/* R500_ALPHA_RGB_TARGET - alpha render target */
2049	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
2050					       R500_ALPHA_ADDRD(0) |
2051					       R500_ALPHA_SEL_A_SRC0 |
2052					       src_alpha |
2053					       R500_ALPHA_SEL_B_SRC1 |
2054					       mask_alpha |
2055					       R500_ALPHA_TARGET(0)));
2056
2057	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
2058					       R500_ALU_RGBA_ADDRD(0) |
2059					       R500_ALU_RGBA_R_SWIZ_0 |
2060					       R500_ALU_RGBA_G_SWIZ_0 |
2061					       R500_ALU_RGBA_B_SWIZ_0 |
2062					       R500_ALU_RGBA_A_SWIZ_0));
2063	FINISH_ACCEL();
2064    }
2065
2066    /* Clear out scissoring */
2067    BEGIN_ACCEL(2);
2068    if (IS_R300_3D) {
2069	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
2070					 (1440 << R300_SCISSOR_Y_SHIFT)));
2071	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2072					 ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
2073
2074    } else {
2075	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
2076					 (0 << R300_SCISSOR_Y_SHIFT)));
2077	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) |
2078					 ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT)));
2079    }
2080    FINISH_ACCEL();
2081
2082
2083    BEGIN_ACCEL_RELOC(3, 2);
2084    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
2085    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst);
2086
2087    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
2088    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
2089
2090    FINISH_ACCEL();
2091
2092    BEGIN_ACCEL(1);
2093    if (pMask)
2094	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6);
2095    else
2096	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4);
2097    FINISH_ACCEL();
2098
2099    return TRUE;
2100}
2101
2102static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2103{
2104    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2105    ACCEL_PREAMBLE();
2106
2107    ENTER_DRAW(0);
2108
2109    if (info->accel_state->draw_header) {
2110	if (info->ChipFamily < CHIP_FAMILY_R200) {
2111	    info->accel_state->draw_header[0] = CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2112							   info->accel_state->num_vtx *
2113							   info->accel_state->vtx_count + 1);
2114	    info->accel_state->draw_header[2] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2115						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2116						 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2117						 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2118						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2119	} else if (IS_R300_3D || IS_R500_3D) {
2120	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2121							   info->accel_state->num_vtx *
2122							   info->accel_state->vtx_count);
2123	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2124						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2125						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2126	} else {
2127	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2128							   info->accel_state->num_vtx *
2129							   info->accel_state->vtx_count);
2130	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2131						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2132						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2133	}
2134	info->accel_state->draw_header = NULL;
2135    }
2136
2137    if (IS_R300_3D || IS_R500_3D) {
2138	BEGIN_ACCEL(3);
2139	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
2140	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2141    } else
2142	BEGIN_ACCEL(1);
2143    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2144    FINISH_ACCEL();
2145
2146    LEAVE_DRAW(0);
2147}
2148
2149
2150#ifdef ACCEL_CP
2151
2152#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2153do {								\
2154    OUT_RING_F(_dstX);						\
2155    OUT_RING_F(_dstY);						\
2156    OUT_RING_F(_srcX);						\
2157    OUT_RING_F(_srcY);						\
2158    OUT_RING_F(_maskX);						\
2159    OUT_RING_F(_maskY);						\
2160} while (0)
2161
2162#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2163do {								\
2164    OUT_RING_F(_dstX);						\
2165    OUT_RING_F(_dstY);						\
2166    OUT_RING_F(_srcX);						\
2167    OUT_RING_F(_srcY);						\
2168} while (0)
2169
2170#else /* ACCEL_CP */
2171
2172#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2173do {								\
2174    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2175    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2176    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2177    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2178    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
2179    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
2180} while (0)
2181
2182#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2183do {								\
2184    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2185    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2186    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2187    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2188} while (0)
2189
2190#endif /* !ACCEL_CP */
2191
2192#ifdef ONLY_ONCE
2193static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2194{
2195    PictVector v;
2196    v.vector[0] = point->x;
2197    v.vector[1] = point->y;
2198    v.vector[2] = xFixed1;
2199    PictureTransformPoint(transform, &v);
2200    point->x = v.vector[0];
2201    point->y = v.vector[1];
2202}
2203#endif
2204
2205static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
2206					   RADEONInfoPtr info,
2207					   PixmapPtr pDst,
2208					   int srcX, int srcY,
2209					   int maskX, int maskY,
2210					   int dstX, int dstY,
2211					   int w, int h)
2212{
2213    int vtx_count;
2214    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2215    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2216    ACCEL_PREAMBLE();
2217
2218    ENTER_DRAW(0);
2219
2220    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2221       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2222
2223#if defined(ACCEL_CP)
2224    if ((info->cs && CS_FULL(info->cs)) ||
2225	(!info->cs && (info->cp->indirectBuffer->used + 4 * 32) >
2226	 info->cp->indirectBuffer->total)) {
2227	FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix);
2228	if (info->cs)
2229	    radeon_cs_flush_indirect(pScrn);
2230	else
2231	    RADEONCPFlushIndirect(pScrn, 1);
2232	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
2233						 info->accel_state->src_pic,
2234						 info->accel_state->msk_pic,
2235						 info->accel_state->dst_pic,
2236						 info->accel_state->src_pix,
2237						 info->accel_state->msk_pix,
2238						 info->accel_state->dst_pix);
2239    }
2240#endif
2241
2242    srcTopLeft.x     = IntToxFixed(srcX);
2243    srcTopLeft.y     = IntToxFixed(srcY);
2244    srcTopRight.x    = IntToxFixed(srcX + w);
2245    srcTopRight.y    = IntToxFixed(srcY);
2246    srcBottomLeft.x  = IntToxFixed(srcX);
2247    srcBottomLeft.y  = IntToxFixed(srcY + h);
2248    srcBottomRight.x = IntToxFixed(srcX + w);
2249    srcBottomRight.y = IntToxFixed(srcY + h);
2250
2251    if (info->accel_state->is_transform[0]) {
2252	if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2253	    transformPoint(info->accel_state->transform[0], &srcTopLeft);
2254	    transformPoint(info->accel_state->transform[0], &srcTopRight);
2255	    transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2256	    transformPoint(info->accel_state->transform[0], &srcBottomRight);
2257	}
2258    }
2259
2260    if (info->accel_state->msk_pic) {
2261	maskTopLeft.x     = IntToxFixed(maskX);
2262	maskTopLeft.y     = IntToxFixed(maskY);
2263	maskTopRight.x    = IntToxFixed(maskX + w);
2264	maskTopRight.y    = IntToxFixed(maskY);
2265	maskBottomLeft.x  = IntToxFixed(maskX);
2266	maskBottomLeft.y  = IntToxFixed(maskY + h);
2267	maskBottomRight.x = IntToxFixed(maskX + w);
2268	maskBottomRight.y = IntToxFixed(maskY + h);
2269
2270	if (info->accel_state->is_transform[1]) {
2271	    if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2272		transformPoint(info->accel_state->transform[1], &maskTopLeft);
2273		transformPoint(info->accel_state->transform[1], &maskTopRight);
2274		transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2275		transformPoint(info->accel_state->transform[1], &maskBottomRight);
2276	    }
2277	}
2278
2279	vtx_count = 6;
2280    } else
2281	vtx_count = 4;
2282
2283    if (info->accel_state->vsync)
2284	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
2285				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
2286				      dstY, dstY + h);
2287
2288#ifdef ACCEL_CP
2289    if (info->ChipFamily < CHIP_FAMILY_R200) {
2290	if (!info->accel_state->draw_header) {
2291	    BEGIN_RING(3);
2292
2293#ifdef XF86DRM_MODE
2294	    if (info->cs)
2295		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2296	    else
2297#endif
2298		info->accel_state->draw_header = __head;
2299	    info->accel_state->num_vtx = 0;
2300	    info->accel_state->vtx_count = vtx_count;
2301
2302	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2303				3 * vtx_count + 1));
2304	    if (info->accel_state->msk_pic)
2305		OUT_RING(RADEON_CP_VC_FRMT_XY |
2306			 RADEON_CP_VC_FRMT_ST0 |
2307			 RADEON_CP_VC_FRMT_ST1);
2308	    else
2309		OUT_RING(RADEON_CP_VC_FRMT_XY |
2310			 RADEON_CP_VC_FRMT_ST0);
2311	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2312		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2313		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
2314		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2315		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2316	    ADVANCE_RING();
2317	}
2318
2319	info->accel_state->num_vtx += 3;
2320	BEGIN_RING(3 * vtx_count);
2321    } else if (IS_R300_3D || IS_R500_3D) {
2322	if (!info->accel_state->draw_header) {
2323	    BEGIN_RING(2);
2324
2325#ifdef XF86DRM_MODE
2326	    if (info->cs)
2327		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2328	    else
2329#endif
2330		info->accel_state->draw_header = __head;
2331	    info->accel_state->num_vtx = 0;
2332	    info->accel_state->vtx_count = vtx_count;
2333
2334	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2335				4 * vtx_count));
2336	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2337		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2338		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2339	    ADVANCE_RING();
2340	}
2341
2342	info->accel_state->num_vtx += 4;
2343	BEGIN_RING(4 * vtx_count);
2344    } else {
2345	if (!info->accel_state->draw_header) {
2346	    BEGIN_RING(2);
2347
2348#ifdef XF86DRM_MODE
2349	    if (info->cs)
2350		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2351	    else
2352#endif
2353		info->accel_state->draw_header = __head;
2354	    info->accel_state->num_vtx = 0;
2355	    info->accel_state->vtx_count = vtx_count;
2356
2357	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2358				3 * vtx_count));
2359	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2360		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2361		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2362	    ADVANCE_RING();
2363	}
2364
2365	info->accel_state->num_vtx += 3;
2366	BEGIN_RING(3 * vtx_count);
2367    }
2368
2369#else /* ACCEL_CP */
2370    if (IS_R300_3D || IS_R500_3D)
2371	BEGIN_ACCEL(2 + vtx_count * 4);
2372    else
2373	BEGIN_ACCEL(1 + vtx_count * 3);
2374
2375    if (info->ChipFamily < CHIP_FAMILY_R200)
2376	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2377					  RADEON_VF_PRIM_WALK_DATA |
2378					  RADEON_VF_RADEON_MODE |
2379					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2380    else if (IS_R300_3D || IS_R500_3D)
2381	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2382					  RADEON_VF_PRIM_WALK_DATA |
2383					  (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
2384    else
2385	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2386					  RADEON_VF_PRIM_WALK_DATA |
2387					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2388
2389#endif
2390
2391    if (info->accel_state->msk_pic) {
2392	if (IS_R300_3D || IS_R500_3D) {
2393	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2394			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2395			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2396	}
2397	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2398		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2399		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2400	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2401		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2402		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2403	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2404		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2405		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2406    } else {
2407	if (IS_R300_3D || IS_R500_3D) {
2408	    VTX_OUT((float)dstX,                                      (float)dstY,
2409		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2410	}
2411	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2412		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2413	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2414		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2415	VTX_OUT((float)(dstX + w),                                (float)dstY,
2416		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2417    }
2418
2419#ifdef ACCEL_CP
2420    ADVANCE_RING();
2421#else
2422    FINISH_ACCEL();
2423#endif /* !ACCEL_CP */
2424
2425    LEAVE_DRAW(0);
2426}
2427#undef VTX_OUT
2428#undef VTX_OUT_MASK
2429
2430static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2431				       int srcX, int srcY,
2432				       int maskX, int maskY,
2433				       int dstX, int dstY,
2434				       int width, int height)
2435{
2436    int tileSrcY, tileMaskY, tileDstY;
2437    int remainingHeight;
2438    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2439
2440    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2441	FUNC_NAME(RadeonCompositeTile)(pScrn,
2442				       info,
2443				       pDst,
2444				       srcX, srcY,
2445				       maskX, maskY,
2446				       dstX, dstY,
2447				       width, height);
2448	return;
2449    }
2450
2451    /* Tiling logic borrowed from exaFillRegionTiled */
2452
2453    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2454    tileMaskY = maskY;
2455    tileDstY = dstY;
2456
2457    remainingHeight = height;
2458    while (remainingHeight > 0) {
2459	int remainingWidth = width;
2460	int tileSrcX, tileMaskX, tileDstX;
2461	int h = info->accel_state->src_tile_height - tileSrcY;
2462
2463	if (h > remainingHeight)
2464	    h = remainingHeight;
2465	remainingHeight -= h;
2466
2467	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2468	tileMaskX = maskX;
2469	tileDstX = dstX;
2470
2471	while (remainingWidth > 0) {
2472	    int w = info->accel_state->src_tile_width - tileSrcX;
2473	    if (w > remainingWidth)
2474		w = remainingWidth;
2475	    remainingWidth -= w;
2476
2477	    FUNC_NAME(RadeonCompositeTile)(pScrn,
2478					   info,
2479					   pDst,
2480					   tileSrcX, tileSrcY,
2481					   tileMaskX, tileMaskY,
2482					   tileDstX, tileDstY,
2483					   w, h);
2484
2485	    tileSrcX = 0;
2486	    tileMaskX += w;
2487	    tileDstX += w;
2488	}
2489	tileSrcY = 0;
2490	tileMaskY += h;
2491	tileDstY += h;
2492    }
2493}
2494
2495#undef ONLY_ONCE
2496#undef FUNC_NAME
2497