radeon_exa_render.c revision b13dfe66
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59
60struct blendinfo {
61    Bool dst_alpha;
62    Bool src_alpha;
63    uint32_t blend_cntl;
64};
65
66static struct blendinfo RadeonBlendOp[] = {
67    /* Clear */
68    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
69    /* Src */
70    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
71    /* Dst */
72    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
73    /* Over */
74    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
75    /* OverReverse */
76    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
77    /* In */
78    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
79    /* InReverse */
80    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
81    /* Out */
82    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
83    /* OutReverse */
84    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
85    /* Atop */
86    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
87    /* AtopReverse */
88    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
91    /* Add */
92    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
93};
94
95struct formatinfo {
96    int fmt;
97    uint32_t card_fmt;
98};
99
100/* Note on texture formats:
101 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
102 */
103static struct formatinfo R100TexFormats[] = {
104	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
105	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
106	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
107	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
108	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
109	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
110};
111
112static struct formatinfo R200TexFormats[] = {
113    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
114    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
115    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
116    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
117    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
118    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
119    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
120    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
121};
122
123static struct formatinfo R300TexFormats[] = {
124    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
125    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
126    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
127    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
128#ifdef PICT_TYPE_BGRA
129    {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
130    {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
131#endif
132    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
133    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
134    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
135    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
136};
137
138/* Common Radeon setup code */
139
140static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
141{
142    switch (pDstPicture->format) {
143    case PICT_a8r8g8b8:
144    case PICT_x8r8g8b8:
145	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
146	break;
147    case PICT_r5g6b5:
148	*dst_format = RADEON_COLOR_FORMAT_RGB565;
149	break;
150    case PICT_a1r5g5b5:
151    case PICT_x1r5g5b5:
152	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
153	break;
154    case PICT_a8:
155	*dst_format = RADEON_COLOR_FORMAT_RGB8;
156	break;
157    default:
158	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
159			(int)pDstPicture->format));
160    }
161
162    return TRUE;
163}
164
165static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
166{
167    switch (pDstPicture->format) {
168    case PICT_a8r8g8b8:
169    case PICT_x8r8g8b8:
170    case PICT_a8b8g8r8:
171    case PICT_x8b8g8r8:
172#ifdef PICT_TYPE_BGRA
173    case PICT_b8g8r8a8:
174    case PICT_b8g8r8x8:
175#endif
176	*dst_format = R300_COLORFORMAT_ARGB8888;
177	break;
178    case PICT_r5g6b5:
179	*dst_format = R300_COLORFORMAT_RGB565;
180	break;
181    case PICT_a1r5g5b5:
182    case PICT_x1r5g5b5:
183	*dst_format = R300_COLORFORMAT_ARGB1555;
184	break;
185    case PICT_a8:
186	*dst_format = R300_COLORFORMAT_I8;
187	break;
188    default:
189	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
190	       (int)pDstPicture->format));
191    }
192    return TRUE;
193}
194
195static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
196{
197    uint32_t sblend, dblend;
198
199    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
200    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
201
202    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
203     * it as always 1.
204     */
205    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
206	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
207	    sblend = RADEON_SRC_BLEND_GL_ONE;
208	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
209	    sblend = RADEON_SRC_BLEND_GL_ZERO;
210    }
211
212    /* If the source alpha is being used, then we should only be in a case where
213     * the source blend factor is 0, and the source blend value is the mask
214     * channels multiplied by the source picture's alpha.
215     */
216    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
217	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
218	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
219	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
220	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
221	}
222    }
223
224    return sblend | dblend;
225}
226
227union intfloat {
228    float f;
229    uint32_t i;
230};
231
232/* Check if we need a software-fallback because of a repeating
233 *   non-power-of-two texture.
234 *
235 * canTile: whether we can emulate a repeat by drawing in tiles:
236 *   possible for the source, but not for the mask. (Actually
237 *   we could do tiling for the mask too, but dealing with the
238 *   combination of a tiled mask and a tiled source would be
239 *   a lot of complexity, so we handle only the most common
240 *   case of a repeating mask.)
241 */
242static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
243{
244    int w = pPict->pDrawable->width;
245    int h = pPict->pDrawable->height;
246    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
247
248    if ((repeatType == RepeatNormal || repeatType == RepeatReflect) &&
249	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
250	!(repeatType == RepeatNormal && !pPict->transform && canTile))
251	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
252			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
253
254    return TRUE;
255}
256
257/* Determine if the pitch of the pixmap meets the criteria for being
258 * used as a repeating texture: no padding or only a single line texture.
259 */
260static Bool RADEONPitchMatches(PixmapPtr pPix)
261{
262    int w = pPix->drawable.width;
263    int h = pPix->drawable.height;
264    uint32_t txpitch = exaGetPixmapPitch(pPix);
265
266    if (h > 1 && (RADEON_ALIGN(w * pPix->drawable.bitsPerPixel / 8, 32)) != txpitch)
267	return FALSE;
268
269    return TRUE;
270}
271
272/* We can't turn on repeats normally for a non-power-of-two dimension,
273 * but if the source isn't transformed, we can get the same effect
274 * by drawing the image in multiple tiles. (A common case that it's
275 * important to get right is drawing a strip of a NPOTxPOT texture
276 * repeating in the POT direction. With tiling, this ends up as a
277 * a single tile on R300 and newer, which is perfect.)
278 *
279 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
280 *   one direction and POT in the other in the POT direction; on
281 *   older chips we can only repeat at all if the texture is POT in
282 *   both directions.
283 *
284 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
285 *   there is no padding in the texture. Textures with small POT widths
286 *   (1,2,4,8) thus can't be tiled.
287 */
288static Bool RADEONSetupSourceTile(PicturePtr pPict,
289				  PixmapPtr pPix,
290				  Bool canTile1d,
291				  Bool needMatchingPitch)
292{
293    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
294    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
295
296    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
297    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
298
299    if (repeatType == RepeatNormal || repeatType == RepeatReflect) {
300	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
301
302	int w = pPict->pDrawable->width;
303	int h = pPict->pDrawable->height;
304
305	if (pPict->transform) {
306	    if (badPitch)
307		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
308				 w, (unsigned)exaGetPixmapPitch(pPix)));
309	} else {
310	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
311	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
312
313	    if ((info->accel_state->need_src_tile_x ||
314		 info->accel_state->need_src_tile_y) &&
315		repeatType != RepeatNormal)
316		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
317
318	    if (!canTile1d)
319		info->accel_state->need_src_tile_x =
320		    info->accel_state->need_src_tile_y =
321		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
322	}
323
324	if (info->accel_state->need_src_tile_x)
325	    info->accel_state->src_tile_width = w;
326	if (info->accel_state->need_src_tile_y)
327	    info->accel_state->src_tile_height = h;
328    }
329
330    return TRUE;
331}
332
333/* R100-specific code */
334
335static Bool R100CheckCompositeTexture(PicturePtr pPict,
336				      PicturePtr pDstPict,
337				      int op,
338				      int unit)
339{
340    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
341    int w = pPict->pDrawable->width;
342    int h = pPict->pDrawable->height;
343    int i;
344
345    /* r100 limit should be 2048, there are issues with 2048
346     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
347     */
348
349    if ((w > 2047) || (h > 2047))
350	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
351
352    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
353	if (R100TexFormats[i].fmt == pPict->format)
354	    break;
355    }
356    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
357	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
358			(int)pPict->format));
359
360    if (!RADEONCheckTexturePOT(pPict, unit == 0))
361	return FALSE;
362
363    if (pPict->filter != PictFilterNearest &&
364	pPict->filter != PictFilterBilinear)
365    {
366	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
367    }
368
369    /* for REPEAT_NONE, Render semantics are that sampling outside the source
370     * picture results in alpha=0 pixels. We can implement this with a border color
371     * *if* our source texture has an alpha channel, otherwise we need to fall
372     * back. If we're not transformed then we hope that upper layers have clipped
373     * rendering to the bounds of the source drawable, in which case it doesn't
374     * matter. I have not, however, verified that the X server always does such
375     * clipping.
376     */
377    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
378	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
379	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
380    }
381
382    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
383	RADEON_FALLBACK(("non-affine transforms not supported\n"));
384
385    return TRUE;
386}
387
388#endif /* ONLY_ONCE */
389
390static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
391					int unit)
392{
393    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
394    uint32_t txfilter, txformat, txoffset, txpitch;
395    int w = pPict->pDrawable->width;
396    int h = pPict->pDrawable->height;
397    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
398    Bool repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
399	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
400    int i;
401    struct radeon_exa_pixmap_priv *driver_priv;
402    ACCEL_PREAMBLE();
403
404    txpitch = exaGetPixmapPitch(pPix);
405    txoffset = 0;
406
407    CHECK_OFFSET(pPix, 0x1f, "texture");
408
409    if ((txpitch & 0x1f) != 0)
410	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
411
412    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
413    {
414	if (R100TexFormats[i].fmt == pPict->format)
415	    break;
416    }
417    txformat = R100TexFormats[i].card_fmt;
418    if (RADEONPixmapIsColortiled(pPix))
419	txoffset |= RADEON_TXO_MACRO_TILE;
420
421    if (repeat) {
422	if (!RADEONPitchMatches(pPix))
423	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
424			     w, (unsigned)txpitch));
425
426	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
427	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
428    } else
429	txformat |= RADEON_TXFORMAT_NON_POWER2;
430    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
431
432    info->accel_state->texW[unit] = w;
433    info->accel_state->texH[unit] = h;
434
435    switch (pPict->filter) {
436    case PictFilterNearest:
437	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
438	break;
439    case PictFilterBilinear:
440	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
441	break;
442    default:
443	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
444    }
445
446    switch (repeatType) {
447	case RepeatNormal:
448	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
449		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
450	    else
451	        txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
452	    break;
453	case RepeatPad:
454	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
455	    break;
456	case RepeatReflect:
457	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
458	    break;
459	case RepeatNone:
460	    /* don't set an illegal clamp mode for rects */
461	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
462		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
463	    break;
464    }
465
466    BEGIN_ACCEL_RELOC(5, 1);
467    if (unit == 0) {
468	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
469	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
470	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
471	    (pPix->drawable.width - 1) |
472	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
473	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
474
475	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
476	/* emit a texture relocation */
477    } else {
478	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
479	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
480
481	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
482	    (pPix->drawable.width - 1) |
483	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
484	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
485	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
486	/* emit a texture relocation */
487    }
488    FINISH_ACCEL();
489
490    if (pPict->transform != 0) {
491	info->accel_state->is_transform[unit] = TRUE;
492	info->accel_state->transform[unit] = pPict->transform;
493    } else {
494	info->accel_state->is_transform[unit] = FALSE;
495    }
496
497    return TRUE;
498}
499
500#ifdef ONLY_ONCE
501
502
503static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
504			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
505{
506    PixmapPtr pSrcPixmap, pDstPixmap;
507    uint32_t tmp1;
508
509    /* Check for unsupported compositing operations. */
510    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
511	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
512
513    if (!pSrcPicture->pDrawable)
514	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
515
516    /* r100 limit should be 2048, there are issues with 2048
517     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
518     */
519
520    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
521
522    if (pSrcPixmap->drawable.width > 2047 ||
523	pSrcPixmap->drawable.height > 2047) {
524	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
525			 pSrcPixmap->drawable.width,
526			 pSrcPixmap->drawable.height));
527    }
528
529    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
530
531    if (pDstPixmap->drawable.width > 2047 ||
532	pDstPixmap->drawable.height > 2047) {
533	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
534			 pDstPixmap->drawable.width,
535			 pDstPixmap->drawable.height));
536    }
537
538    if (pMaskPicture) {
539	PixmapPtr pMaskPixmap;
540
541	if (!pMaskPicture->pDrawable)
542	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
543
544	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
545
546	if (pMaskPixmap->drawable.width > 2047 ||
547	    pMaskPixmap->drawable.height > 2047) {
548	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
549			     pMaskPixmap->drawable.width,
550			     pMaskPixmap->drawable.height));
551	}
552
553	if (pMaskPicture->componentAlpha) {
554	    /* Check if it's component alpha that relies on a source alpha and
555	     * on the source value.  We can only get one of those into the
556	     * single source value that we get to blend with.
557	     */
558	    if (RadeonBlendOp[op].src_alpha &&
559		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
560		RADEON_SRC_BLEND_GL_ZERO) {
561		RADEON_FALLBACK(("Component alpha not supported with source "
562				 "alpha and source value blending.\n"));
563	    }
564	}
565
566	if (!R100CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
567	    return FALSE;
568    }
569
570    if (!R100CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
571	return FALSE;
572
573    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
574	return FALSE;
575
576    return TRUE;
577}
578
579static Bool
580RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
581			    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask,
582			    PixmapPtr pDst)
583{
584    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
585
586    info->accel_state->composite_op = op;
587    info->accel_state->dst_pic = pDstPicture;
588    info->accel_state->msk_pic = pMaskPicture;
589    info->accel_state->src_pic = pSrcPicture;
590    info->accel_state->dst_pix = pDst;
591    info->accel_state->msk_pix = pMask;
592    info->accel_state->src_pix = pSrc;
593
594#ifdef XF86DRM_MODE
595    if (info->cs) {
596	int ret;
597
598	radeon_cs_space_reset_bos(info->cs);
599
600	radeon_add_pixmap(info->cs, pSrc,
601			  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
602
603	if (pMask)
604	    radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
605
606	radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
607
608	ret = radeon_cs_space_check(info->cs);
609	if (ret)
610	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
611    }
612#endif
613
614    return TRUE;
615}
616
617#endif /* ONLY_ONCE */
618
619static Bool FUNC_NAME(R100PrepareComposite)(int op,
620					    PicturePtr pSrcPicture,
621					    PicturePtr pMaskPicture,
622					    PicturePtr pDstPicture,
623					    PixmapPtr pSrc,
624					    PixmapPtr pMask,
625					    PixmapPtr pDst)
626{
627    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
628    uint32_t dst_format, dst_pitch, colorpitch;
629    uint32_t pp_cntl, blendcntl, cblend, ablend;
630    int pixel_shift;
631    struct radeon_exa_pixmap_priv *driver_priv;
632    ACCEL_PREAMBLE();
633
634    TRACE;
635
636    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
637	return FALSE;
638
639    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
640	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
641
642    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
643
644    dst_pitch = exaGetPixmapPitch(pDst);
645    colorpitch = dst_pitch >> pixel_shift;
646    if (RADEONPixmapIsColortiled(pDst))
647	colorpitch |= RADEON_COLOR_TILE_ENABLE;
648
649    CHECK_OFFSET(pDst, 0x0f, "destination");
650
651    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
652	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
653
654    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
655	return FALSE;
656
657    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
658			     pSrc, pMask, pDst);
659
660    /* switch to 3D after doing buffer space checks as the latter may flush */
661    RADEON_SWITCH_TO_3D();
662
663    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
664	return FALSE;
665    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
666
667    if (pMask != NULL) {
668	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
669	    return FALSE;
670	pp_cntl |= RADEON_TEX_1_ENABLE;
671    } else {
672	info->accel_state->is_transform[1] = FALSE;
673    }
674
675    BEGIN_ACCEL_RELOC(10, 2);
676    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
677    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
678    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
679    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
680
681    /* IN operator: Multiply src by mask components or mask alpha.
682     * BLEND_CTL_ADD is A * B + C.
683     * If a source is a8, we have to explicitly zero its color values.
684     * If the destination is a8, we have to route the alpha to red, I think.
685     * If we're doing component alpha where the source for blending is going to
686     * be the source alpha (and there's no source value used), we have to zero
687     * the source's color values.
688     */
689    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
690    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
691
692    if (pDstPicture->format == PICT_a8 ||
693	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
694    {
695	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
696    } else if (pSrcPicture->format == PICT_a8)
697	cblend |= RADEON_COLOR_ARG_A_ZERO;
698    else
699	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
700    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
701
702    if (pMask) {
703	if (pMaskPicture->componentAlpha &&
704	    pDstPicture->format != PICT_a8)
705	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
706	else
707	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
708	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
709    } else {
710	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
711	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
712    }
713
714    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
715    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
716    if (pMask)
717	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
718					  RADEON_SE_VTX_FMT_ST0 |
719					  RADEON_SE_VTX_FMT_ST1));
720    else
721	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
722					  RADEON_SE_VTX_FMT_ST0));
723    /* Op operator. */
724    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
725
726    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
727
728    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
729    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
730					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
731    FINISH_ACCEL();
732
733    return TRUE;
734}
735
736#ifdef ONLY_ONCE
737
738static Bool R200CheckCompositeTexture(PicturePtr pPict,
739				      PicturePtr pDstPict,
740				      int op,
741				      int unit)
742{
743    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
744    int w = pPict->pDrawable->width;
745    int h = pPict->pDrawable->height;
746    int i;
747
748    /* r200 limit should be 2048, there are issues with 2048
749     * see bug 19269
750     */
751
752    if ((w > 2047) || (h > 2047))
753	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
754
755    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
756    {
757	if (R200TexFormats[i].fmt == pPict->format)
758	    break;
759    }
760    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
761	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
762			 (int)pPict->format));
763
764    if (!RADEONCheckTexturePOT(pPict, unit == 0))
765	return FALSE;
766
767    if (pPict->filter != PictFilterNearest &&
768	pPict->filter != PictFilterBilinear)
769	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
770
771    /* for REPEAT_NONE, Render semantics are that sampling outside the source
772     * picture results in alpha=0 pixels. We can implement this with a border color
773     * *if* our source texture has an alpha channel, otherwise we need to fall
774     * back. If we're not transformed then we hope that upper layers have clipped
775     * rendering to the bounds of the source drawable, in which case it doesn't
776     * matter. I have not, however, verified that the X server always does such
777     * clipping.
778     */
779    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
780	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
781	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
782    }
783
784    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
785	RADEON_FALLBACK(("non-affine transforms not supported\n"));
786
787    return TRUE;
788}
789
790#endif /* ONLY_ONCE */
791
792static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
793					int unit)
794{
795    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
796    uint32_t txfilter, txformat, txoffset, txpitch;
797    int w = pPict->pDrawable->width;
798    int h = pPict->pDrawable->height;
799    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
800    Bool repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
801	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
802    int i;
803    struct radeon_exa_pixmap_priv *driver_priv;
804    ACCEL_PREAMBLE();
805
806    txpitch = exaGetPixmapPitch(pPix);
807
808    txoffset = 0;
809    CHECK_OFFSET(pPix, 0x1f, "texture");
810
811    if ((txpitch & 0x1f) != 0)
812	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
813
814    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
815    {
816	if (R200TexFormats[i].fmt == pPict->format)
817	    break;
818    }
819    txformat = R200TexFormats[i].card_fmt;
820    if (RADEONPixmapIsColortiled(pPix))
821	txoffset |= R200_TXO_MACRO_TILE;
822
823    if (repeat) {
824	if (!RADEONPitchMatches(pPix))
825	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
826			     w, (unsigned)txpitch));
827
828	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
829	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
830    } else
831	txformat |= R200_TXFORMAT_NON_POWER2;
832    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
833
834    info->accel_state->texW[unit] = w;
835    info->accel_state->texH[unit] = h;
836
837    switch (pPict->filter) {
838    case PictFilterNearest:
839	txfilter = (R200_MAG_FILTER_NEAREST |
840		    R200_MIN_FILTER_NEAREST);
841	break;
842    case PictFilterBilinear:
843	txfilter = (R200_MAG_FILTER_LINEAR |
844		    R200_MIN_FILTER_LINEAR);
845	break;
846    default:
847	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
848    }
849
850    switch (repeatType) {
851	case RepeatNormal:
852	    if (txformat & R200_TXFORMAT_NON_POWER2)
853		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
854	    else
855	        txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
856	    break;
857	case RepeatPad:
858	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
859	    break;
860	case RepeatReflect:
861	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
862	    break;
863	case RepeatNone:
864	    /* don't set an illegal clamp mode for rect textures */
865	    if (txformat & R200_TXFORMAT_NON_POWER2)
866		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
867	    break;
868    }
869
870    BEGIN_ACCEL_RELOC(6, 1);
871    if (unit == 0) {
872	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
873	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
874	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
875	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
876		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
877	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
878	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
879    } else {
880	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
881	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
882	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
883	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
884		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
885	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
886	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
887	/* emit a texture relocation */
888    }
889    FINISH_ACCEL();
890
891    if (pPict->transform != 0) {
892	info->accel_state->is_transform[unit] = TRUE;
893	info->accel_state->transform[unit] = pPict->transform;
894    } else {
895	info->accel_state->is_transform[unit] = FALSE;
896    }
897
898    return TRUE;
899}
900
901#ifdef ONLY_ONCE
902static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
903			       PicturePtr pDstPicture)
904{
905    PixmapPtr pSrcPixmap, pDstPixmap;
906    uint32_t tmp1;
907
908    TRACE;
909
910    /* Check for unsupported compositing operations. */
911    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
912	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
913
914    if (!pSrcPicture->pDrawable)
915	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
916
917    /* r200 limit should be 2048, there are issues with 2048
918     * see bug 19269
919     */
920
921    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
922
923    if (pSrcPixmap->drawable.width > 2047 ||
924	pSrcPixmap->drawable.height > 2047) {
925	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
926			 pSrcPixmap->drawable.width,
927			 pSrcPixmap->drawable.height));
928    }
929
930    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
931
932    if (pDstPixmap->drawable.width > 2047 ||
933	pDstPixmap->drawable.height > 2047) {
934	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
935			 pDstPixmap->drawable.width,
936			 pDstPixmap->drawable.height));
937    }
938
939    if (pMaskPicture) {
940	PixmapPtr pMaskPixmap;
941
942	if (!pMaskPicture->pDrawable)
943	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
944
945	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
946
947	if (pMaskPixmap->drawable.width > 2047 ||
948	    pMaskPixmap->drawable.height > 2047) {
949	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
950			     pMaskPixmap->drawable.width,
951			     pMaskPixmap->drawable.height));
952	}
953
954	if (pMaskPicture->componentAlpha) {
955	    /* Check if it's component alpha that relies on a source alpha and
956	     * on the source value.  We can only get one of those into the
957	     * single source value that we get to blend with.
958	     */
959	    if (RadeonBlendOp[op].src_alpha &&
960		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
961		RADEON_SRC_BLEND_GL_ZERO) {
962		RADEON_FALLBACK(("Component alpha not supported with source "
963				 "alpha and source value blending.\n"));
964	    }
965	}
966
967	if (!R200CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
968	    return FALSE;
969    }
970
971    if (!R200CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
972	return FALSE;
973
974    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
975	return FALSE;
976
977    return TRUE;
978}
979#endif /* ONLY_ONCE */
980
981static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
982				PicturePtr pMaskPicture, PicturePtr pDstPicture,
983				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
984{
985    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
986    uint32_t dst_format, dst_pitch;
987    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
988    int pixel_shift;
989    struct radeon_exa_pixmap_priv *driver_priv;
990    ACCEL_PREAMBLE();
991
992    TRACE;
993
994    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
995	return FALSE;
996
997    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
998	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
999
1000    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1001
1002    dst_pitch = exaGetPixmapPitch(pDst);
1003    colorpitch = dst_pitch >> pixel_shift;
1004    if (RADEONPixmapIsColortiled(pDst))
1005	colorpitch |= RADEON_COLOR_TILE_ENABLE;
1006
1007    CHECK_OFFSET(pDst, 0xf, "destination");
1008
1009    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1010	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1011
1012    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
1013	return FALSE;
1014
1015    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1016			     pSrc, pMask, pDst);
1017
1018    /* switch to 3D after doing buffer space checks as it may flush */
1019    RADEON_SWITCH_TO_3D();
1020
1021    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
1022	return FALSE;
1023    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
1024
1025    if (pMask != NULL) {
1026	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
1027	    return FALSE;
1028	pp_cntl |= RADEON_TEX_1_ENABLE;
1029    } else {
1030	info->accel_state->is_transform[1] = FALSE;
1031    }
1032
1033    BEGIN_ACCEL_RELOC(12, 2);
1034
1035    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
1036    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
1037
1038    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
1039    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
1040
1041    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
1042    if (pMask)
1043	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1044		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
1045		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
1046    else
1047	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1048		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
1049
1050
1051
1052    /* IN operator: Multiply src by mask components or mask alpha.
1053     * BLEND_CTL_ADD is A * B + C.
1054     * If a picture is a8, we have to explicitly zero its color values.
1055     * If the destination is a8, we have to route the alpha to red, I think.
1056     * If we're doing component alpha where the source for blending is going to
1057     * be the source alpha (and there's no source value used), we have to zero
1058     * the source's color values.
1059     */
1060    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
1061    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
1062
1063    if (pDstPicture->format == PICT_a8 ||
1064	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
1065    {
1066	cblend |= R200_TXC_ARG_A_R0_ALPHA;
1067    } else if (pSrcPicture->format == PICT_a8)
1068	cblend |= R200_TXC_ARG_A_ZERO;
1069    else
1070	cblend |= R200_TXC_ARG_A_R0_COLOR;
1071    ablend |= R200_TXA_ARG_A_R0_ALPHA;
1072
1073    if (pMask) {
1074	if (pMaskPicture->componentAlpha &&
1075	    pDstPicture->format != PICT_a8)
1076	    cblend |= R200_TXC_ARG_B_R1_COLOR;
1077	else
1078	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
1079	ablend |= R200_TXA_ARG_B_R1_ALPHA;
1080    } else {
1081	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
1082	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
1083    }
1084
1085    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
1086    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
1087	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
1088    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
1089    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
1090	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
1091
1092    /* Op operator. */
1093    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1094    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
1095
1096    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
1097					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
1098
1099    FINISH_ACCEL();
1100
1101    return TRUE;
1102}
1103
1104#ifdef ONLY_ONCE
1105
1106static Bool R300CheckCompositeTexture(PicturePtr pPict,
1107				      PicturePtr pDstPict,
1108				      int op,
1109				      int unit,
1110				      Bool is_r500)
1111{
1112    ScreenPtr pScreen = pDstPict->pDrawable->pScreen;
1113    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1114    RADEONInfoPtr info = RADEONPTR(pScrn);
1115
1116    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1117    int w = pPict->pDrawable->width;
1118    int h = pPict->pDrawable->height;
1119    int i;
1120    int max_tex_w, max_tex_h;
1121
1122    if (is_r500) {
1123	max_tex_w = 4096;
1124	max_tex_h = 4096;
1125    } else {
1126	max_tex_w = 2048;
1127	max_tex_h = 2048;
1128    }
1129
1130    if ((w > max_tex_w) || (h > max_tex_h))
1131	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1132
1133    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1134    {
1135	if (R300TexFormats[i].fmt == pPict->format)
1136	    break;
1137    }
1138    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1139	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1140			 (int)pPict->format));
1141
1142    if (!RADEONCheckTexturePOT(pPict, unit == 0)) {
1143	if (info->cs) {
1144    		struct radeon_exa_pixmap_priv *driver_priv;
1145		PixmapPtr pPix;
1146
1147    		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
1148		driver_priv = exaGetPixmapDriverPrivate(pPix);
1149		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
1150	}
1151	return FALSE;
1152    }
1153
1154    if (pPict->filter != PictFilterNearest &&
1155	pPict->filter != PictFilterBilinear)
1156	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1157
1158    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1159     * picture results in alpha=0 pixels. We can implement this with a border color
1160     * *if* our source texture has an alpha channel, otherwise we need to fall
1161     * back. If we're not transformed then we hope that upper layers have clipped
1162     * rendering to the bounds of the source drawable, in which case it doesn't
1163     * matter. I have not, however, verified that the X server always does such
1164     * clipping.
1165     */
1166    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1167	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1168	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1169    }
1170
1171    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
1172	RADEON_FALLBACK(("non-affine transforms not supported\n"));
1173
1174    return TRUE;
1175}
1176
1177#endif /* ONLY_ONCE */
1178
1179static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
1180					int unit)
1181{
1182    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1183    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
1184    int w = pPict->pDrawable->width;
1185    int h = pPict->pDrawable->height;
1186    int i, pixel_shift;
1187    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1188    struct radeon_exa_pixmap_priv *driver_priv;
1189    ACCEL_PREAMBLE();
1190
1191    TRACE;
1192
1193    txpitch = exaGetPixmapPitch(pPix);
1194    txoffset = 0;
1195
1196    CHECK_OFFSET(pPix, 0x1f, "texture");
1197
1198    if ((txpitch & 0x1f) != 0)
1199	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1200
1201    /* TXPITCH = pixels (texels) per line - 1 */
1202    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1203    txpitch >>= pixel_shift;
1204    txpitch -= 1;
1205
1206    if (RADEONPixmapIsColortiled(pPix))
1207	txoffset |= R300_MACRO_TILE;
1208
1209    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1210    {
1211	if (R300TexFormats[i].fmt == pPict->format)
1212	    break;
1213    }
1214
1215    txformat1 = R300TexFormats[i].card_fmt;
1216
1217    if (IS_R300_3D) {
1218	if ((unit == 0) && info->accel_state->msk_pic)
1219	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0;
1220	else if (unit == 1)
1221	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1;
1222    }
1223
1224    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1225		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1226
1227    if (IS_R500_3D && ((w - 1) & 0x800))
1228	txpitch |= R500_TXWIDTH_11;
1229
1230    if (IS_R500_3D && ((h - 1) & 0x800))
1231	txpitch |= R500_TXHEIGHT_11;
1232
1233    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1234     * omit this if there is no padding, but there is no apparent advantage
1235     * in doing so.
1236     */
1237    txformat0 |= R300_TXPITCH_EN;
1238
1239    txfilter = (unit << R300_TX_ID_SHIFT);
1240
1241    switch (repeatType) {
1242    case RepeatNormal:
1243	if (unit != 0 || !info->accel_state->need_src_tile_x)
1244	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1245	else
1246	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1247
1248	if (unit != 0 || !info->accel_state->need_src_tile_y)
1249	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1250	else
1251	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1252
1253	break;
1254    case RepeatPad:
1255	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1256	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1257	break;
1258    case RepeatReflect:
1259	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1260	    R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1261	break;
1262    case RepeatNone:
1263	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1264	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1265	break;
1266    }
1267
1268    switch (pPict->filter) {
1269    case PictFilterNearest:
1270	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1271	break;
1272    case PictFilterBilinear:
1273	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1274	break;
1275    default:
1276	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1277    }
1278
1279    BEGIN_ACCEL_RELOC(repeatType == RepeatNone ? 7 : 6, 1);
1280    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1281    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1282    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1283    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1284    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1285
1286    EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
1287
1288    if (repeatType == RepeatNone)
1289	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1290    FINISH_ACCEL();
1291
1292    if (pPict->transform != 0) {
1293	info->accel_state->is_transform[unit] = TRUE;
1294	info->accel_state->transform[unit] = pPict->transform;
1295
1296	/* setup the PVS consts */
1297	if (info->accel_state->has_tcl) {
1298	    info->accel_state->texW[unit] = 1;
1299	    info->accel_state->texH[unit] = 1;
1300	    BEGIN_ACCEL(9);
1301	    if (IS_R300_3D)
1302		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1303	    else
1304		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1305
1306	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
1307	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
1308	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
1309	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1310
1311	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
1312	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
1313	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
1314	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1315
1316	    FINISH_ACCEL();
1317	} else {
1318	    info->accel_state->texW[unit] = w;
1319	    info->accel_state->texH[unit] = h;
1320	}
1321    } else {
1322	info->accel_state->is_transform[unit] = FALSE;
1323
1324	/* setup the PVS consts */
1325	if (info->accel_state->has_tcl) {
1326	    info->accel_state->texW[unit] = 1;
1327	    info->accel_state->texH[unit] = 1;
1328
1329	    BEGIN_ACCEL(9);
1330	    if (IS_R300_3D)
1331		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1332	    else
1333		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1334
1335	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1336	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1337	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1338	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1339
1340	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1341	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1342	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1343	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1344
1345	    FINISH_ACCEL();
1346	} else {
1347	    info->accel_state->texW[unit] = w;
1348	    info->accel_state->texH[unit] = h;
1349	}
1350    }
1351
1352    return TRUE;
1353}
1354
1355#ifdef ONLY_ONCE
1356
1357static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1358			       PicturePtr pDstPicture)
1359{
1360    uint32_t tmp1;
1361    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1362    PixmapPtr pSrcPixmap, pDstPixmap;
1363    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1364    RADEONInfoPtr info = RADEONPTR(pScrn);
1365    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1366
1367    TRACE;
1368
1369    /* Check for unsupported compositing operations. */
1370    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1371	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1372
1373    if (!pSrcPicture->pDrawable)
1374	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1375
1376    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1377
1378    if (IS_R500_3D) {
1379	max_tex_w = 4096;
1380	max_tex_h = 4096;
1381	max_dst_w = 4096;
1382	max_dst_h = 4096;
1383    } else {
1384	max_tex_w = 2048;
1385	max_tex_h = 2048;
1386	if (IS_R400_3D) {
1387	    max_dst_w = 4021;
1388	    max_dst_h = 4021;
1389	} else {
1390	    max_dst_w = 2560;
1391	    max_dst_h = 2560;
1392	}
1393    }
1394
1395    if (pSrcPixmap->drawable.width > max_tex_w ||
1396	pSrcPixmap->drawable.height > max_tex_h) {
1397	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1398			 pSrcPixmap->drawable.width,
1399			 pSrcPixmap->drawable.height));
1400    }
1401
1402    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1403
1404    if (pDstPixmap->drawable.width > max_dst_w ||
1405	pDstPixmap->drawable.height > max_dst_h) {
1406	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1407			 pDstPixmap->drawable.width,
1408			 pDstPixmap->drawable.height));
1409    }
1410
1411    if (pMaskPicture) {
1412	PixmapPtr pMaskPixmap;
1413
1414	if (!pMaskPicture->pDrawable)
1415	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1416
1417	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1418
1419	if (pMaskPixmap->drawable.width > max_tex_w ||
1420	    pMaskPixmap->drawable.height > max_tex_h) {
1421	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1422			     pMaskPixmap->drawable.width,
1423			     pMaskPixmap->drawable.height));
1424	}
1425
1426	if (pMaskPicture->componentAlpha) {
1427	    /* Check if it's component alpha that relies on a source alpha and
1428	     * on the source value.  We can only get one of those into the
1429	     * single source value that we get to blend with.
1430	     */
1431	    if (RadeonBlendOp[op].src_alpha &&
1432		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1433		RADEON_SRC_BLEND_GL_ZERO) {
1434		RADEON_FALLBACK(("Component alpha not supported with source "
1435				 "alpha and source value blending.\n"));
1436	    }
1437	}
1438
1439	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1440	    return FALSE;
1441    }
1442
1443    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1444	return FALSE;
1445
1446    if (!R300GetDestFormat(pDstPicture, &tmp1))
1447	return FALSE;
1448
1449    return TRUE;
1450
1451}
1452#endif /* ONLY_ONCE */
1453
1454static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1455				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1456				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1457{
1458    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1459    uint32_t dst_format, dst_pitch;
1460    uint32_t txenable, colorpitch;
1461    uint32_t blendcntl, output_fmt;
1462    uint32_t src_color, src_alpha;
1463    uint32_t mask_color, mask_alpha;
1464    int pixel_shift;
1465    struct radeon_exa_pixmap_priv *driver_priv;
1466    ACCEL_PREAMBLE();
1467    TRACE;
1468
1469    if (!R300GetDestFormat(pDstPicture, &dst_format))
1470	return FALSE;
1471
1472    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1473
1474    dst_pitch = exaGetPixmapPitch(pDst);
1475    colorpitch = dst_pitch >> pixel_shift;
1476
1477    if (RADEONPixmapIsColortiled(pDst))
1478	colorpitch |= R300_COLORTILE;
1479
1480    colorpitch |= dst_format;
1481
1482    CHECK_OFFSET(pDst, 0x0f, "destination");
1483
1484    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1485	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1486
1487    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1488	return FALSE;
1489
1490    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1491			     pSrc, pMask, pDst);
1492
1493    /* have to execute switch after doing buffer sizing check as the latter flushes */
1494    RADEON_SWITCH_TO_3D();
1495
1496    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1497	return FALSE;
1498    txenable = R300_TEX_0_ENABLE;
1499
1500    if (pMask != NULL) {
1501	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1502	    return FALSE;
1503	txenable |= R300_TEX_1_ENABLE;
1504    } else {
1505	info->accel_state->is_transform[1] = FALSE;
1506    }
1507
1508    /* setup the VAP */
1509    if (info->accel_state->has_tcl) {
1510	if (pMask)
1511	    BEGIN_ACCEL(10);
1512	else
1513	    BEGIN_ACCEL(9);
1514	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
1515    } else {
1516	if (pMask)
1517	    BEGIN_ACCEL(6);
1518	else
1519	    BEGIN_ACCEL(5);
1520    }
1521
1522    /* These registers define the number, type, and location of data submitted
1523     * to the PVS unit of GA input (when PVS is disabled)
1524     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1525     * enabled.  This memory provides the imputs to the vertex shader program
1526     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1527     * directly to the GA input memory and the order is signifigant.  In
1528     * PVS_BYPASS mode the order is as follows:
1529     * Position
1530     * Point Size
1531     * Color 0-3
1532     * Textures 0-7
1533     * Fog
1534     */
1535    if (pMask) {
1536	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1537		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1538		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1539		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1540		       R300_SIGNED_0 |
1541		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1542		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1543		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1544		       R300_SIGNED_1));
1545	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1546		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1547		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1548		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1549		       R300_LAST_VEC_2 |
1550		       R300_SIGNED_2));
1551    } else
1552	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1553		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1554		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1555		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1556		       R300_SIGNED_0 |
1557		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1558		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1559		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1560		       R300_LAST_VEC_1 |
1561		       R300_SIGNED_1));
1562
1563    /* load the vertex shader
1564     * We pre-load vertex programs in RADEONInit3DEngine():
1565     * - exa
1566     * - Xv
1567     * - Xv bicubic
1568     * Here we select the offset of the vertex program we want to use
1569     */
1570    if (info->accel_state->has_tcl) {
1571	if (pMask) {
1572	    /* consts used by vertex shaders */
1573	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1574						    R300_PVS_MAX_CONST_ADDR(3)));
1575	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1576			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1577			   (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1578			   (8 << R300_PVS_LAST_INST_SHIFT)));
1579	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1580			  (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1581	} else {
1582	    /* consts used by vertex shaders */
1583	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1584						    R300_PVS_MAX_CONST_ADDR(3)));
1585	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1586			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1587			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1588			   (4 << R300_PVS_LAST_INST_SHIFT)));
1589	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1590			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1591	}
1592    }
1593
1594    /* Position and one or two sets of 2 texture coordinates */
1595    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1596    if (pMask)
1597	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1598		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1599		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1600    else
1601	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1602		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1603
1604    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1605    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1606    FINISH_ACCEL();
1607
1608    /* shader output swizzling */
1609    switch (pDstPicture->format) {
1610    case PICT_a8r8g8b8:
1611    case PICT_x8r8g8b8:
1612    default:
1613	output_fmt = (R300_OUT_FMT_C4_8 |
1614		      R300_OUT_FMT_C0_SEL_BLUE |
1615		      R300_OUT_FMT_C1_SEL_GREEN |
1616		      R300_OUT_FMT_C2_SEL_RED |
1617		      R300_OUT_FMT_C3_SEL_ALPHA);
1618	break;
1619    case PICT_a8b8g8r8:
1620    case PICT_x8b8g8r8:
1621	output_fmt = (R300_OUT_FMT_C4_8 |
1622		      R300_OUT_FMT_C0_SEL_RED |
1623		      R300_OUT_FMT_C1_SEL_GREEN |
1624		      R300_OUT_FMT_C2_SEL_BLUE |
1625		      R300_OUT_FMT_C3_SEL_ALPHA);
1626	break;
1627#ifdef PICT_TYPE_BGRA
1628    case PICT_b8g8r8a8:
1629    case PICT_b8g8r8x8:
1630	output_fmt = (R300_OUT_FMT_C4_8 |
1631		      R300_OUT_FMT_C0_SEL_ALPHA |
1632		      R300_OUT_FMT_C1_SEL_RED |
1633		      R300_OUT_FMT_C2_SEL_GREEN |
1634		      R300_OUT_FMT_C3_SEL_BLUE);
1635	break;
1636#endif
1637    case PICT_a8:
1638	output_fmt = (R300_OUT_FMT_C4_8 |
1639		      R300_OUT_FMT_C0_SEL_ALPHA);
1640	break;
1641    }
1642
1643    /* setup pixel shader */
1644    if (IS_R300_3D) {
1645	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1646	    src_color = R300_ALU_RGB_0_0;
1647	else
1648	    src_color = R300_ALU_RGB_SRC0_RGB;
1649
1650	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1651	    src_alpha = R300_ALU_ALPHA_1_0;
1652	else
1653	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1654
1655	if (pMask) {
1656	    if (pMaskPicture->componentAlpha) {
1657		if (RadeonBlendOp[op].src_alpha) {
1658		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1659			src_color = R300_ALU_RGB_1_0;
1660		    else
1661			src_color = R300_ALU_RGB_SRC0_AAA;
1662		} else
1663		    src_color = R300_ALU_RGB_SRC0_RGB;
1664		mask_color = R300_ALU_RGB_SRC1_RGB;
1665	    } else {
1666		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1667		    mask_color = R300_ALU_RGB_1_0;
1668		else
1669		    mask_color = R300_ALU_RGB_SRC1_AAA;
1670	    }
1671	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1672		mask_alpha = R300_ALU_ALPHA_1_0;
1673	    else
1674		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1675	} else {
1676	    mask_color = R300_ALU_RGB_1_0;
1677	    mask_alpha = R300_ALU_ALPHA_1_0;
1678	}
1679
1680	/* setup the rasterizer, load FS */
1681	if (pMask) {
1682	    BEGIN_ACCEL(16);
1683	    /* 4 components: 2 for tex0, 2 for tex1 */
1684	    OUT_ACCEL_REG(R300_RS_COUNT,
1685			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1686			   R300_RS_COUNT_HIRES_EN));
1687
1688	    /* R300_INST_COUNT_RS - highest RS instruction used */
1689	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1690
1691	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1692						R300_ALU_CODE_SIZE(0) |
1693						R300_TEX_CODE_OFFSET(0) |
1694						R300_TEX_CODE_SIZE(1)));
1695
1696	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1697			  (R300_ALU_START(0) |
1698			   R300_ALU_SIZE(0) |
1699			   R300_TEX_START(0) |
1700			   R300_TEX_SIZE(1) |
1701			   R300_RGBA_OUT));
1702
1703
1704	} else {
1705	    BEGIN_ACCEL(15);
1706	    /* 2 components: 2 for tex0 */
1707	    OUT_ACCEL_REG(R300_RS_COUNT,
1708			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1709			   R300_RS_COUNT_HIRES_EN));
1710
1711	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1712
1713	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1714						R300_ALU_CODE_SIZE(0) |
1715						R300_TEX_CODE_OFFSET(0) |
1716						R300_TEX_CODE_SIZE(0)));
1717
1718	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1719			  (R300_ALU_START(0) |
1720			   R300_ALU_SIZE(0) |
1721			   R300_TEX_START(0) |
1722			   R300_TEX_SIZE(0) |
1723			   R300_RGBA_OUT));
1724
1725	}
1726
1727	OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1728	OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
1729		      (R300_ALU_START(0) |
1730		       R300_ALU_SIZE(0) |
1731		       R300_TEX_START(0) |
1732		       R300_TEX_SIZE(0)));
1733	OUT_ACCEL_REG(R300_US_CODE_ADDR_1,
1734		      (R300_ALU_START(0) |
1735		       R300_ALU_SIZE(0) |
1736		       R300_TEX_START(0) |
1737		       R300_TEX_SIZE(0)));
1738	OUT_ACCEL_REG(R300_US_CODE_ADDR_2,
1739		      (R300_ALU_START(0) |
1740		       R300_ALU_SIZE(0) |
1741		       R300_TEX_START(0) |
1742		       R300_TEX_SIZE(0)));
1743
1744	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1745	/* shader output swizzling */
1746	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1747
1748	/* tex inst for src texture */
1749	OUT_ACCEL_REG(R300_US_TEX_INST(0),
1750		      (R300_TEX_SRC_ADDR(0) |
1751		       R300_TEX_DST_ADDR(0) |
1752		       R300_TEX_ID(0) |
1753		       R300_TEX_INST(R300_TEX_INST_LD)));
1754
1755	if (pMask) {
1756	    /* tex inst for mask texture */
1757	    OUT_ACCEL_REG(R300_US_TEX_INST(1),
1758			  (R300_TEX_SRC_ADDR(1) |
1759			   R300_TEX_DST_ADDR(1) |
1760			   R300_TEX_ID(1) |
1761			   R300_TEX_INST(R300_TEX_INST_LD)));
1762	}
1763
1764	/* RGB inst
1765	 * temp addresses for texture inputs
1766	 * ALU_RGB_ADDR0 is src tex (temp 0)
1767	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1768	 * R300_ALU_RGB_OMASK - output components to write
1769	 * R300_ALU_RGB_TARGET_A - render target
1770	 */
1771	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0),
1772		      (R300_ALU_RGB_ADDR0(0) |
1773		       R300_ALU_RGB_ADDR1(1) |
1774		       R300_ALU_RGB_ADDR2(0) |
1775		       R300_ALU_RGB_ADDRD(0) |
1776		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1777					   R300_ALU_RGB_MASK_G |
1778					   R300_ALU_RGB_MASK_B)) |
1779		       R300_ALU_RGB_TARGET_A));
1780	/* RGB inst
1781	 * ALU operation
1782	 */
1783	OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0),
1784		      (R300_ALU_RGB_SEL_A(src_color) |
1785		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1786		       R300_ALU_RGB_SEL_B(mask_color) |
1787		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1788		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1789		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1790		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1791		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1792		       R300_ALU_RGB_CLAMP));
1793	/* Alpha inst
1794	 * temp addresses for texture inputs
1795	 * ALU_ALPHA_ADDR0 is src tex (0)
1796	 * ALU_ALPHA_ADDR1 is mask tex (1)
1797	 * R300_ALU_ALPHA_OMASK - output components to write
1798	 * R300_ALU_ALPHA_TARGET_A - render target
1799	 */
1800	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0),
1801		      (R300_ALU_ALPHA_ADDR0(0) |
1802		       R300_ALU_ALPHA_ADDR1(1) |
1803		       R300_ALU_ALPHA_ADDR2(0) |
1804		       R300_ALU_ALPHA_ADDRD(0) |
1805		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1806		       R300_ALU_ALPHA_TARGET_A |
1807		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1808	/* Alpha inst
1809	 * ALU operation
1810	 */
1811	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0),
1812		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1813		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1814		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1815		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1816		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1817		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1818		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1819		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1820		       R300_ALU_ALPHA_CLAMP));
1821	FINISH_ACCEL();
1822    } else {
1823	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1824	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1825			 R500_ALU_RGB_G_SWIZ_A_0 |
1826			 R500_ALU_RGB_B_SWIZ_A_0);
1827	else
1828	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1829			 R500_ALU_RGB_G_SWIZ_A_G |
1830			 R500_ALU_RGB_B_SWIZ_A_B);
1831
1832	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1833	    src_alpha = R500_ALPHA_SWIZ_A_1;
1834	else
1835	    src_alpha = R500_ALPHA_SWIZ_A_A;
1836
1837	if (pMask) {
1838	    if (pMaskPicture->componentAlpha) {
1839		if (RadeonBlendOp[op].src_alpha) {
1840		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1841			src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1842				     R500_ALU_RGB_G_SWIZ_A_1 |
1843				     R500_ALU_RGB_B_SWIZ_A_1);
1844		    else
1845			src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1846				     R500_ALU_RGB_G_SWIZ_A_A |
1847				     R500_ALU_RGB_B_SWIZ_A_A);
1848		} else
1849		    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1850				 R500_ALU_RGB_G_SWIZ_A_G |
1851				 R500_ALU_RGB_B_SWIZ_A_B);
1852
1853		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1854			      R500_ALU_RGB_G_SWIZ_B_G |
1855			      R500_ALU_RGB_B_SWIZ_B_B);
1856	    } else {
1857		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1858		    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1859				  R500_ALU_RGB_G_SWIZ_B_1 |
1860				  R500_ALU_RGB_B_SWIZ_B_1);
1861		else
1862		    mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1863				  R500_ALU_RGB_G_SWIZ_B_A |
1864				  R500_ALU_RGB_B_SWIZ_B_A);
1865	    }
1866	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1867		mask_alpha = R500_ALPHA_SWIZ_B_1;
1868	    else
1869		mask_alpha = R500_ALPHA_SWIZ_B_A;
1870	} else {
1871	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1872			  R500_ALU_RGB_G_SWIZ_B_1 |
1873			  R500_ALU_RGB_B_SWIZ_B_1);
1874	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1875	}
1876
1877	BEGIN_ACCEL(7);
1878	if (pMask) {
1879	    /* 4 components: 2 for tex0, 2 for tex1 */
1880	    OUT_ACCEL_REG(R300_RS_COUNT,
1881			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1882			   R300_RS_COUNT_HIRES_EN));
1883
1884	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1885	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1886
1887	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1888					      R500_US_CODE_END_ADDR(2)));
1889	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1890					       R500_US_CODE_RANGE_SIZE(2)));
1891	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1892	} else {
1893	    OUT_ACCEL_REG(R300_RS_COUNT,
1894			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1895			   R300_RS_COUNT_HIRES_EN));
1896
1897	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1898
1899	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1900					      R500_US_CODE_END_ADDR(1)));
1901	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1902					       R500_US_CODE_RANGE_SIZE(1)));
1903	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1904	}
1905
1906	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1907	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1908	FINISH_ACCEL();
1909
1910	if (pMask) {
1911	    BEGIN_ACCEL(19);
1912	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1913	    /* tex inst for src texture */
1914	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1915						   R500_INST_RGB_WMASK_R |
1916						   R500_INST_RGB_WMASK_G |
1917						   R500_INST_RGB_WMASK_B |
1918						   R500_INST_ALPHA_WMASK |
1919						   R500_INST_RGB_CLAMP |
1920						   R500_INST_ALPHA_CLAMP));
1921
1922	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1923						   R500_TEX_INST_LD |
1924						   R500_TEX_IGNORE_UNCOVERED));
1925
1926	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1927						   R500_TEX_SRC_S_SWIZ_R |
1928						   R500_TEX_SRC_T_SWIZ_G |
1929						   R500_TEX_DST_ADDR(0) |
1930						   R500_TEX_DST_R_SWIZ_R |
1931						   R500_TEX_DST_G_SWIZ_G |
1932						   R500_TEX_DST_B_SWIZ_B |
1933						   R500_TEX_DST_A_SWIZ_A));
1934	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1935						   R500_DX_S_SWIZ_R |
1936						   R500_DX_T_SWIZ_R |
1937						   R500_DX_R_SWIZ_R |
1938						   R500_DX_Q_SWIZ_R |
1939						   R500_DY_ADDR(0) |
1940						   R500_DY_S_SWIZ_R |
1941						   R500_DY_T_SWIZ_R |
1942						   R500_DY_R_SWIZ_R |
1943						   R500_DY_Q_SWIZ_R));
1944	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1945	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1946
1947	    /* tex inst for mask texture */
1948	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1949						   R500_INST_TEX_SEM_WAIT |
1950						   R500_INST_RGB_WMASK_R |
1951						   R500_INST_RGB_WMASK_G |
1952						   R500_INST_RGB_WMASK_B |
1953						   R500_INST_ALPHA_WMASK |
1954						   R500_INST_RGB_CLAMP |
1955						   R500_INST_ALPHA_CLAMP));
1956
1957	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1958						   R500_TEX_INST_LD |
1959						   R500_TEX_SEM_ACQUIRE |
1960						   R500_TEX_IGNORE_UNCOVERED));
1961
1962	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1963						   R500_TEX_SRC_S_SWIZ_R |
1964						   R500_TEX_SRC_T_SWIZ_G |
1965						   R500_TEX_DST_ADDR(1) |
1966						   R500_TEX_DST_R_SWIZ_R |
1967						   R500_TEX_DST_G_SWIZ_G |
1968						   R500_TEX_DST_B_SWIZ_B |
1969						   R500_TEX_DST_A_SWIZ_A));
1970	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1971						   R500_DX_S_SWIZ_R |
1972						   R500_DX_T_SWIZ_R |
1973						   R500_DX_R_SWIZ_R |
1974						   R500_DX_Q_SWIZ_R |
1975						   R500_DY_ADDR(1) |
1976						   R500_DY_S_SWIZ_R |
1977						   R500_DY_T_SWIZ_R |
1978						   R500_DY_R_SWIZ_R |
1979						   R500_DY_Q_SWIZ_R));
1980	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1981	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1982	} else {
1983	    BEGIN_ACCEL(13);
1984	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1985	    /* tex inst for src texture */
1986	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1987						   R500_INST_TEX_SEM_WAIT |
1988						   R500_INST_RGB_WMASK_R |
1989						   R500_INST_RGB_WMASK_G |
1990						   R500_INST_RGB_WMASK_B |
1991						   R500_INST_ALPHA_WMASK |
1992						   R500_INST_RGB_CLAMP |
1993						   R500_INST_ALPHA_CLAMP));
1994
1995	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1996						   R500_TEX_INST_LD |
1997						   R500_TEX_SEM_ACQUIRE |
1998						   R500_TEX_IGNORE_UNCOVERED));
1999
2000	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
2001						   R500_TEX_SRC_S_SWIZ_R |
2002						   R500_TEX_SRC_T_SWIZ_G |
2003						   R500_TEX_DST_ADDR(0) |
2004						   R500_TEX_DST_R_SWIZ_R |
2005						   R500_TEX_DST_G_SWIZ_G |
2006						   R500_TEX_DST_B_SWIZ_B |
2007						   R500_TEX_DST_A_SWIZ_A));
2008	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
2009						   R500_DX_S_SWIZ_R |
2010						   R500_DX_T_SWIZ_R |
2011						   R500_DX_R_SWIZ_R |
2012						   R500_DX_Q_SWIZ_R |
2013						   R500_DY_ADDR(0) |
2014						   R500_DY_S_SWIZ_R |
2015						   R500_DY_T_SWIZ_R |
2016						   R500_DY_R_SWIZ_R |
2017						   R500_DY_Q_SWIZ_R));
2018	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2019	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2020	}
2021
2022	/* ALU inst */
2023	/* *_OMASK* - output component write mask */
2024	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
2025					       R500_INST_TEX_SEM_WAIT |
2026					       R500_INST_LAST |
2027					       R500_INST_RGB_OMASK_R |
2028					       R500_INST_RGB_OMASK_G |
2029					       R500_INST_RGB_OMASK_B |
2030					       R500_INST_ALPHA_OMASK |
2031					       R500_INST_RGB_CLAMP |
2032					       R500_INST_ALPHA_CLAMP));
2033	/* ALU inst
2034	 * temp addresses for texture inputs
2035	 * RGB_ADDR0 is src tex (temp 0)
2036	 * RGB_ADDR1 is mask tex (temp 1)
2037	 */
2038	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2039					       R500_RGB_ADDR1(1) |
2040					       R500_RGB_ADDR2(0)));
2041	/* ALU inst
2042	 * temp addresses for texture inputs
2043	 * ALPHA_ADDR0 is src tex (temp 0)
2044	 * ALPHA_ADDR1 is mask tex (temp 1)
2045	 */
2046	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2047					       R500_ALPHA_ADDR1(1) |
2048					       R500_ALPHA_ADDR2(0)));
2049
2050	/* R500_ALU_RGB_TARGET - RGB render target */
2051	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2052					       src_color |
2053					       R500_ALU_RGB_SEL_B_SRC1 |
2054					       mask_color |
2055					       R500_ALU_RGB_TARGET(0)));
2056
2057	/* R500_ALPHA_RGB_TARGET - alpha render target */
2058	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
2059					       R500_ALPHA_ADDRD(0) |
2060					       R500_ALPHA_SEL_A_SRC0 |
2061					       src_alpha |
2062					       R500_ALPHA_SEL_B_SRC1 |
2063					       mask_alpha |
2064					       R500_ALPHA_TARGET(0)));
2065
2066	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
2067					       R500_ALU_RGBA_ADDRD(0) |
2068					       R500_ALU_RGBA_R_SWIZ_0 |
2069					       R500_ALU_RGBA_G_SWIZ_0 |
2070					       R500_ALU_RGBA_B_SWIZ_0 |
2071					       R500_ALU_RGBA_A_SWIZ_0));
2072	FINISH_ACCEL();
2073    }
2074
2075    /* Clear out scissoring */
2076    BEGIN_ACCEL(2);
2077    if (IS_R300_3D) {
2078	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
2079					 (1440 << R300_SCISSOR_Y_SHIFT)));
2080	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2081					 ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
2082
2083    } else {
2084	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
2085					 (0 << R300_SCISSOR_Y_SHIFT)));
2086	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) |
2087					 ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT)));
2088    }
2089    FINISH_ACCEL();
2090
2091
2092    BEGIN_ACCEL_RELOC(3, 2);
2093    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
2094    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst);
2095
2096    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
2097    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
2098
2099    FINISH_ACCEL();
2100
2101    BEGIN_ACCEL(1);
2102    if (pMask)
2103	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6);
2104    else
2105	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4);
2106    FINISH_ACCEL();
2107
2108    return TRUE;
2109}
2110
2111static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2112{
2113    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2114    ACCEL_PREAMBLE();
2115
2116    ENTER_DRAW(0);
2117
2118    if (info->accel_state->draw_header) {
2119	if (info->ChipFamily < CHIP_FAMILY_R200) {
2120	    info->accel_state->draw_header[0] = CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2121							   info->accel_state->num_vtx *
2122							   info->accel_state->vtx_count + 1);
2123	    info->accel_state->draw_header[2] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2124						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2125						 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2126						 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2127						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2128	} else if (IS_R300_3D || IS_R500_3D) {
2129	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2130							   info->accel_state->num_vtx *
2131							   info->accel_state->vtx_count);
2132	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2133						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2134						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2135	} else {
2136	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2137							   info->accel_state->num_vtx *
2138							   info->accel_state->vtx_count);
2139	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2140						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2141						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2142	}
2143	info->accel_state->draw_header = NULL;
2144    }
2145
2146    if (IS_R300_3D || IS_R500_3D) {
2147	BEGIN_ACCEL(3);
2148	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
2149	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2150    } else
2151	BEGIN_ACCEL(1);
2152    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2153    FINISH_ACCEL();
2154
2155    LEAVE_DRAW(0);
2156}
2157
2158
2159#ifdef ACCEL_CP
2160
2161#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2162do {								\
2163    OUT_RING_F(_dstX);						\
2164    OUT_RING_F(_dstY);						\
2165    OUT_RING_F(_srcX);						\
2166    OUT_RING_F(_srcY);						\
2167    OUT_RING_F(_maskX);						\
2168    OUT_RING_F(_maskY);						\
2169} while (0)
2170
2171#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2172do {								\
2173    OUT_RING_F(_dstX);						\
2174    OUT_RING_F(_dstY);						\
2175    OUT_RING_F(_srcX);						\
2176    OUT_RING_F(_srcY);						\
2177} while (0)
2178
2179#else /* ACCEL_CP */
2180
2181#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2182do {								\
2183    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2184    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2185    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2186    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2187    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
2188    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
2189} while (0)
2190
2191#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2192do {								\
2193    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2194    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2195    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2196    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2197} while (0)
2198
2199#endif /* !ACCEL_CP */
2200
2201#ifdef ONLY_ONCE
2202static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2203{
2204    PictVector v;
2205    v.vector[0] = point->x;
2206    v.vector[1] = point->y;
2207    v.vector[2] = xFixed1;
2208    PictureTransformPoint(transform, &v);
2209    point->x = v.vector[0];
2210    point->y = v.vector[1];
2211}
2212#endif
2213
2214static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
2215					   RADEONInfoPtr info,
2216					   PixmapPtr pDst,
2217					   int srcX, int srcY,
2218					   int maskX, int maskY,
2219					   int dstX, int dstY,
2220					   int w, int h)
2221{
2222    int vtx_count;
2223    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2224    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2225    ACCEL_PREAMBLE();
2226
2227    ENTER_DRAW(0);
2228
2229    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2230       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2231
2232#if defined(ACCEL_CP)
2233    if ((info->cs && CS_FULL(info->cs)) ||
2234	(!info->cs && (info->cp->indirectBuffer->used + 4 * 32) >
2235	 info->cp->indirectBuffer->total)) {
2236	FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix);
2237	if (info->cs)
2238	    radeon_cs_flush_indirect(pScrn);
2239	else
2240	    RADEONCPFlushIndirect(pScrn, 1);
2241	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
2242						 info->accel_state->src_pic,
2243						 info->accel_state->msk_pic,
2244						 info->accel_state->dst_pic,
2245						 info->accel_state->src_pix,
2246						 info->accel_state->msk_pix,
2247						 info->accel_state->dst_pix);
2248    }
2249#endif
2250
2251    srcTopLeft.x     = IntToxFixed(srcX);
2252    srcTopLeft.y     = IntToxFixed(srcY);
2253    srcTopRight.x    = IntToxFixed(srcX + w);
2254    srcTopRight.y    = IntToxFixed(srcY);
2255    srcBottomLeft.x  = IntToxFixed(srcX);
2256    srcBottomLeft.y  = IntToxFixed(srcY + h);
2257    srcBottomRight.x = IntToxFixed(srcX + w);
2258    srcBottomRight.y = IntToxFixed(srcY + h);
2259
2260    if (info->accel_state->is_transform[0]) {
2261	if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2262	    transformPoint(info->accel_state->transform[0], &srcTopLeft);
2263	    transformPoint(info->accel_state->transform[0], &srcTopRight);
2264	    transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2265	    transformPoint(info->accel_state->transform[0], &srcBottomRight);
2266	}
2267    }
2268
2269    if (info->accel_state->msk_pic) {
2270	maskTopLeft.x     = IntToxFixed(maskX);
2271	maskTopLeft.y     = IntToxFixed(maskY);
2272	maskTopRight.x    = IntToxFixed(maskX + w);
2273	maskTopRight.y    = IntToxFixed(maskY);
2274	maskBottomLeft.x  = IntToxFixed(maskX);
2275	maskBottomLeft.y  = IntToxFixed(maskY + h);
2276	maskBottomRight.x = IntToxFixed(maskX + w);
2277	maskBottomRight.y = IntToxFixed(maskY + h);
2278
2279	if (info->accel_state->is_transform[1]) {
2280	    if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2281		transformPoint(info->accel_state->transform[1], &maskTopLeft);
2282		transformPoint(info->accel_state->transform[1], &maskTopRight);
2283		transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2284		transformPoint(info->accel_state->transform[1], &maskBottomRight);
2285	    }
2286	}
2287
2288	vtx_count = 6;
2289    } else
2290	vtx_count = 4;
2291
2292    if (info->accel_state->vsync)
2293	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
2294				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
2295				      dstY, dstY + h);
2296
2297#ifdef ACCEL_CP
2298    if (info->ChipFamily < CHIP_FAMILY_R200) {
2299	if (!info->accel_state->draw_header) {
2300	    BEGIN_RING(3);
2301
2302#ifdef XF86DRM_MODE
2303	    if (info->cs)
2304		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2305	    else
2306#endif
2307		info->accel_state->draw_header = __head;
2308	    info->accel_state->num_vtx = 0;
2309	    info->accel_state->vtx_count = vtx_count;
2310
2311	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2312				3 * vtx_count + 1));
2313	    if (info->accel_state->msk_pic)
2314		OUT_RING(RADEON_CP_VC_FRMT_XY |
2315			 RADEON_CP_VC_FRMT_ST0 |
2316			 RADEON_CP_VC_FRMT_ST1);
2317	    else
2318		OUT_RING(RADEON_CP_VC_FRMT_XY |
2319			 RADEON_CP_VC_FRMT_ST0);
2320	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2321		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2322		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
2323		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2324		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2325	    ADVANCE_RING();
2326	}
2327
2328	info->accel_state->num_vtx += 3;
2329	BEGIN_RING(3 * vtx_count);
2330    } else if (IS_R300_3D || IS_R500_3D) {
2331	if (!info->accel_state->draw_header) {
2332	    BEGIN_RING(2);
2333
2334#ifdef XF86DRM_MODE
2335	    if (info->cs)
2336		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2337	    else
2338#endif
2339		info->accel_state->draw_header = __head;
2340	    info->accel_state->num_vtx = 0;
2341	    info->accel_state->vtx_count = vtx_count;
2342
2343	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2344				4 * vtx_count));
2345	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2346		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2347		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2348	    ADVANCE_RING();
2349	}
2350
2351	info->accel_state->num_vtx += 4;
2352	BEGIN_RING(4 * vtx_count);
2353    } else {
2354	if (!info->accel_state->draw_header) {
2355	    BEGIN_RING(2);
2356
2357#ifdef XF86DRM_MODE
2358	    if (info->cs)
2359		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2360	    else
2361#endif
2362		info->accel_state->draw_header = __head;
2363	    info->accel_state->num_vtx = 0;
2364	    info->accel_state->vtx_count = vtx_count;
2365
2366	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2367				3 * vtx_count));
2368	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2369		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2370		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2371	    ADVANCE_RING();
2372	}
2373
2374	info->accel_state->num_vtx += 3;
2375	BEGIN_RING(3 * vtx_count);
2376    }
2377
2378#else /* ACCEL_CP */
2379    if (IS_R300_3D || IS_R500_3D)
2380	BEGIN_ACCEL(2 + vtx_count * 4);
2381    else
2382	BEGIN_ACCEL(1 + vtx_count * 3);
2383
2384    if (info->ChipFamily < CHIP_FAMILY_R200)
2385	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2386					  RADEON_VF_PRIM_WALK_DATA |
2387					  RADEON_VF_RADEON_MODE |
2388					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2389    else if (IS_R300_3D || IS_R500_3D)
2390	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2391					  RADEON_VF_PRIM_WALK_DATA |
2392					  (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
2393    else
2394	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2395					  RADEON_VF_PRIM_WALK_DATA |
2396					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2397
2398#endif
2399
2400    if (info->accel_state->msk_pic) {
2401	if (IS_R300_3D || IS_R500_3D) {
2402	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2403			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2404			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2405	}
2406	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2407		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2408		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2409	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2410		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2411		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2412	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2413		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2414		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2415    } else {
2416	if (IS_R300_3D || IS_R500_3D) {
2417	    VTX_OUT((float)dstX,                                      (float)dstY,
2418		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2419	}
2420	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2421		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2422	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2423		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2424	VTX_OUT((float)(dstX + w),                                (float)dstY,
2425		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2426    }
2427
2428#ifdef ACCEL_CP
2429    ADVANCE_RING();
2430#else
2431    FINISH_ACCEL();
2432#endif /* !ACCEL_CP */
2433
2434    LEAVE_DRAW(0);
2435}
2436#undef VTX_OUT
2437#undef VTX_OUT_MASK
2438
2439static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2440				       int srcX, int srcY,
2441				       int maskX, int maskY,
2442				       int dstX, int dstY,
2443				       int width, int height)
2444{
2445    int tileSrcY, tileMaskY, tileDstY;
2446    int remainingHeight;
2447    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2448
2449    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2450	FUNC_NAME(RadeonCompositeTile)(pScrn,
2451				       info,
2452				       pDst,
2453				       srcX, srcY,
2454				       maskX, maskY,
2455				       dstX, dstY,
2456				       width, height);
2457	return;
2458    }
2459
2460    /* Tiling logic borrowed from exaFillRegionTiled */
2461
2462    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2463    tileMaskY = maskY;
2464    tileDstY = dstY;
2465
2466    remainingHeight = height;
2467    while (remainingHeight > 0) {
2468	int remainingWidth = width;
2469	int tileSrcX, tileMaskX, tileDstX;
2470	int h = info->accel_state->src_tile_height - tileSrcY;
2471
2472	if (h > remainingHeight)
2473	    h = remainingHeight;
2474	remainingHeight -= h;
2475
2476	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2477	tileMaskX = maskX;
2478	tileDstX = dstX;
2479
2480	while (remainingWidth > 0) {
2481	    int w = info->accel_state->src_tile_width - tileSrcX;
2482	    if (w > remainingWidth)
2483		w = remainingWidth;
2484	    remainingWidth -= w;
2485
2486	    FUNC_NAME(RadeonCompositeTile)(pScrn,
2487					   info,
2488					   pDst,
2489					   tileSrcX, tileSrcY,
2490					   tileMaskX, tileMaskY,
2491					   tileDstX, tileDstY,
2492					   w, h);
2493
2494	    tileSrcX = 0;
2495	    tileMaskX += w;
2496	    tileDstX += w;
2497	}
2498	tileSrcY = 0;
2499	tileMaskY += h;
2500	tileDstY += h;
2501    }
2502}
2503
2504#undef ONLY_ONCE
2505#undef FUNC_NAME
2506