radeon_exa_render.c revision c4ae5be6
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59
60struct blendinfo {
61    Bool dst_alpha;
62    Bool src_alpha;
63    uint32_t blend_cntl;
64};
65
66static struct blendinfo RadeonBlendOp[] = {
67    /* Clear */
68    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
69    /* Src */
70    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
71    /* Dst */
72    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
73    /* Over */
74    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
75    /* OverReverse */
76    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
77    /* In */
78    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
79    /* InReverse */
80    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
81    /* Out */
82    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
83    /* OutReverse */
84    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
85    /* Atop */
86    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
87    /* AtopReverse */
88    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
91    /* Add */
92    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
93};
94
95struct formatinfo {
96    int fmt;
97    uint32_t card_fmt;
98};
99
100/* Note on texture formats:
101 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
102 */
103static struct formatinfo R100TexFormats[] = {
104	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
105	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
106	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
107	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
108	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
109	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
110};
111
112static struct formatinfo R200TexFormats[] = {
113    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
114    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
115    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
116    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
117    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
118    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
119    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
120    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
121};
122
123static struct formatinfo R300TexFormats[] = {
124    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
125    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
126    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
127    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
128#ifdef PICT_TYPE_BGRA
129    {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
130    {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
131#endif
132    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
133    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
134    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
135    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
136};
137
138/* Common Radeon setup code */
139
140static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
141{
142    switch (pDstPicture->format) {
143    case PICT_a8r8g8b8:
144    case PICT_x8r8g8b8:
145	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
146	break;
147    case PICT_r5g6b5:
148	*dst_format = RADEON_COLOR_FORMAT_RGB565;
149	break;
150    case PICT_a1r5g5b5:
151    case PICT_x1r5g5b5:
152	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
153	break;
154    case PICT_a8:
155	*dst_format = RADEON_COLOR_FORMAT_RGB8;
156	break;
157    default:
158	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
159			(int)pDstPicture->format));
160    }
161
162    return TRUE;
163}
164
165static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
166{
167    switch (pDstPicture->format) {
168    case PICT_a8r8g8b8:
169    case PICT_x8r8g8b8:
170    case PICT_a8b8g8r8:
171    case PICT_x8b8g8r8:
172#ifdef PICT_TYPE_BGRA
173    case PICT_b8g8r8a8:
174    case PICT_b8g8r8x8:
175#endif
176	*dst_format = R300_COLORFORMAT_ARGB8888;
177	break;
178    case PICT_r5g6b5:
179	*dst_format = R300_COLORFORMAT_RGB565;
180	break;
181    case PICT_a1r5g5b5:
182    case PICT_x1r5g5b5:
183	*dst_format = R300_COLORFORMAT_ARGB1555;
184	break;
185    case PICT_a8:
186	*dst_format = R300_COLORFORMAT_I8;
187	break;
188    default:
189	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
190	       (int)pDstPicture->format));
191    }
192    return TRUE;
193}
194
195static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
196{
197    uint32_t sblend, dblend;
198
199    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
200    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
201
202    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
203     * it as always 1.
204     */
205    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
206	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
207	    sblend = RADEON_SRC_BLEND_GL_ONE;
208	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
209	    sblend = RADEON_SRC_BLEND_GL_ZERO;
210    }
211
212    /* If the source alpha is being used, then we should only be in a case where
213     * the source blend factor is 0, and the source blend value is the mask
214     * channels multiplied by the source picture's alpha.
215     */
216    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
217	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
218	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
219	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
220	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
221	}
222    }
223
224    return sblend | dblend;
225}
226
227union intfloat {
228    float f;
229    uint32_t i;
230};
231
232/* Check if we need a software-fallback because of a repeating
233 *   non-power-of-two texture.
234 *
235 * canTile: whether we can emulate a repeat by drawing in tiles:
236 *   possible for the source, but not for the mask. (Actually
237 *   we could do tiling for the mask too, but dealing with the
238 *   combination of a tiled mask and a tiled source would be
239 *   a lot of complexity, so we handle only the most common
240 *   case of a repeating mask.)
241 */
242static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
243{
244    int w = pPict->pDrawable->width;
245    int h = pPict->pDrawable->height;
246    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
247
248    if ((repeatType == RepeatNormal || repeatType == RepeatReflect) &&
249	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
250	!(repeatType == RepeatNormal && !pPict->transform && canTile))
251	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
252			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
253
254    return TRUE;
255}
256
257/* Determine if the pitch of the pixmap meets the criteria for being
258 * used as a repeating texture: no padding or only a single line texture.
259 */
260static Bool RADEONPitchMatches(PixmapPtr pPix)
261{
262    int w = pPix->drawable.width;
263    int h = pPix->drawable.height;
264    uint32_t txpitch = exaGetPixmapPitch(pPix);
265
266    if (h > 1 && (RADEON_ALIGN(w * pPix->drawable.bitsPerPixel / 8, 32)) != txpitch)
267	return FALSE;
268
269    return TRUE;
270}
271
272/* We can't turn on repeats normally for a non-power-of-two dimension,
273 * but if the source isn't transformed, we can get the same effect
274 * by drawing the image in multiple tiles. (A common case that it's
275 * important to get right is drawing a strip of a NPOTxPOT texture
276 * repeating in the POT direction. With tiling, this ends up as a
277 * a single tile on R300 and newer, which is perfect.)
278 *
279 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
280 *   one direction and POT in the other in the POT direction; on
281 *   older chips we can only repeat at all if the texture is POT in
282 *   both directions.
283 *
284 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
285 *   there is no padding in the texture. Textures with small POT widths
286 *   (1,2,4,8) thus can't be tiled.
287 */
288static Bool RADEONSetupSourceTile(PicturePtr pPict,
289				  PixmapPtr pPix,
290				  Bool canTile1d,
291				  Bool needMatchingPitch)
292{
293    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
294    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
295
296    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
297    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
298
299    if (repeatType == RepeatNormal || repeatType == RepeatReflect) {
300	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
301
302	int w = pPict->pDrawable->width;
303	int h = pPict->pDrawable->height;
304
305	if (pPict->transform) {
306	    if (badPitch)
307		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
308				 w, (unsigned)exaGetPixmapPitch(pPix)));
309	} else {
310	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
311	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
312
313	    if ((info->accel_state->need_src_tile_x ||
314		 info->accel_state->need_src_tile_y) &&
315		repeatType != RepeatNormal)
316		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
317
318	    if (!canTile1d)
319		info->accel_state->need_src_tile_x =
320		    info->accel_state->need_src_tile_y =
321		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
322	}
323
324	if (info->accel_state->need_src_tile_x)
325	    info->accel_state->src_tile_width = w;
326	if (info->accel_state->need_src_tile_y)
327	    info->accel_state->src_tile_height = h;
328    }
329
330    return TRUE;
331}
332
333/* R100-specific code */
334
335static Bool R100CheckCompositeTexture(PicturePtr pPict,
336				      PicturePtr pDstPict,
337				      int op,
338				      int unit)
339{
340    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
341    int w = pPict->pDrawable->width;
342    int h = pPict->pDrawable->height;
343    int i;
344
345    /* r100 limit should be 2048, there are issues with 2048
346     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
347     */
348
349    if ((w > 2047) || (h > 2047))
350	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
351
352    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
353	if (R100TexFormats[i].fmt == pPict->format)
354	    break;
355    }
356    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
357	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
358			(int)pPict->format));
359
360    if (!RADEONCheckTexturePOT(pPict, unit == 0))
361	return FALSE;
362
363    if (pPict->filter != PictFilterNearest &&
364	pPict->filter != PictFilterBilinear)
365    {
366	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
367    }
368
369    /* for REPEAT_NONE, Render semantics are that sampling outside the source
370     * picture results in alpha=0 pixels. We can implement this with a border color
371     * *if* our source texture has an alpha channel, otherwise we need to fall
372     * back. If we're not transformed then we hope that upper layers have clipped
373     * rendering to the bounds of the source drawable, in which case it doesn't
374     * matter. I have not, however, verified that the X server always does such
375     * clipping.
376     */
377    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
378	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
379	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
380    }
381
382    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
383	RADEON_FALLBACK(("non-affine transforms not supported\n"));
384
385    return TRUE;
386}
387
388#endif /* ONLY_ONCE */
389
390static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
391					int unit)
392{
393    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
394    uint32_t txfilter, txformat, txoffset, txpitch;
395    int w = pPict->pDrawable->width;
396    int h = pPict->pDrawable->height;
397    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
398    Bool repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
399	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
400    int i;
401    struct radeon_exa_pixmap_priv *driver_priv;
402    ACCEL_PREAMBLE();
403
404    txpitch = exaGetPixmapPitch(pPix);
405    txoffset = 0;
406
407    CHECK_OFFSET(pPix, 0x1f, "texture");
408
409    if ((txpitch & 0x1f) != 0)
410	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
411
412    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
413    {
414	if (R100TexFormats[i].fmt == pPict->format)
415	    break;
416    }
417    txformat = R100TexFormats[i].card_fmt;
418    if (RADEONPixmapIsColortiled(pPix))
419	txoffset |= RADEON_TXO_MACRO_TILE;
420
421    if (repeat) {
422	if (!RADEONPitchMatches(pPix))
423	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
424			     w, (unsigned)txpitch));
425
426	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
427	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
428    } else
429	txformat |= RADEON_TXFORMAT_NON_POWER2;
430    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
431
432    info->accel_state->texW[unit] = w;
433    info->accel_state->texH[unit] = h;
434
435    switch (pPict->filter) {
436    case PictFilterNearest:
437	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
438	break;
439    case PictFilterBilinear:
440	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
441	break;
442    default:
443	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
444    }
445
446    switch (repeatType) {
447	case RepeatNormal:
448	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
449		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
450	    else
451	        txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
452	    break;
453	case RepeatPad:
454	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
455	    break;
456	case RepeatReflect:
457	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
458	    break;
459	case RepeatNone:
460	    /* don't set an illegal clamp mode for rects */
461	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
462		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
463	    break;
464    }
465
466    BEGIN_ACCEL_RELOC(5, 1);
467    if (unit == 0) {
468	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
469	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
470	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
471	    (pPix->drawable.width - 1) |
472	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
473	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
474
475	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
476	/* emit a texture relocation */
477    } else {
478	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
479	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
480
481	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
482	    (pPix->drawable.width - 1) |
483	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
484	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
485	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
486	/* emit a texture relocation */
487    }
488    FINISH_ACCEL();
489
490    if (pPict->transform != 0) {
491	info->accel_state->is_transform[unit] = TRUE;
492	info->accel_state->transform[unit] = pPict->transform;
493    } else {
494	info->accel_state->is_transform[unit] = FALSE;
495    }
496
497    return TRUE;
498}
499
500#ifdef ONLY_ONCE
501
502
503static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
504			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
505{
506    PixmapPtr pSrcPixmap, pDstPixmap;
507    uint32_t tmp1;
508
509    /* Check for unsupported compositing operations. */
510    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
511	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
512
513    if (!pSrcPicture->pDrawable)
514	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
515
516    /* r100 limit should be 2048, there are issues with 2048
517     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
518     */
519
520    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
521
522    if (pSrcPixmap->drawable.width > 2047 ||
523	pSrcPixmap->drawable.height > 2047) {
524	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
525			 pSrcPixmap->drawable.width,
526			 pSrcPixmap->drawable.height));
527    }
528
529    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
530
531    if (pDstPixmap->drawable.width > 2047 ||
532	pDstPixmap->drawable.height > 2047) {
533	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
534			 pDstPixmap->drawable.width,
535			 pDstPixmap->drawable.height));
536    }
537
538    if (pMaskPicture) {
539	PixmapPtr pMaskPixmap;
540
541	if (!pMaskPicture->pDrawable)
542	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
543
544	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
545
546	if (pMaskPixmap->drawable.width > 2047 ||
547	    pMaskPixmap->drawable.height > 2047) {
548	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
549			     pMaskPixmap->drawable.width,
550			     pMaskPixmap->drawable.height));
551	}
552
553	if (pMaskPicture->componentAlpha) {
554	    /* Check if it's component alpha that relies on a source alpha and
555	     * on the source value.  We can only get one of those into the
556	     * single source value that we get to blend with.
557	     */
558	    if (RadeonBlendOp[op].src_alpha &&
559		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
560		RADEON_SRC_BLEND_GL_ZERO) {
561		RADEON_FALLBACK(("Component alpha not supported with source "
562				 "alpha and source value blending.\n"));
563	    }
564	}
565
566	if (!R100CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
567	    return FALSE;
568    }
569
570    if (!R100CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
571	return FALSE;
572
573    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
574	return FALSE;
575
576    return TRUE;
577}
578
579static Bool
580RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
581			    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask,
582			    PixmapPtr pDst)
583{
584    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
585
586    info->accel_state->composite_op = op;
587    info->accel_state->dst_pic = pDstPicture;
588    info->accel_state->msk_pic = pMaskPicture;
589    info->accel_state->src_pic = pSrcPicture;
590    info->accel_state->dst_pix = pDst;
591    info->accel_state->msk_pix = pMask;
592    info->accel_state->src_pix = pSrc;
593
594#ifdef XF86DRM_MODE
595    if (info->cs) {
596	int ret;
597
598	radeon_cs_space_reset_bos(info->cs);
599
600	radeon_add_pixmap(info->cs, pSrc,
601			  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
602
603	if (pMask)
604	    radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
605
606	radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
607
608	ret = radeon_cs_space_check(info->cs);
609	if (ret)
610	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
611    }
612#endif
613
614    return TRUE;
615}
616
617#endif /* ONLY_ONCE */
618
619static Bool FUNC_NAME(R100PrepareComposite)(int op,
620					    PicturePtr pSrcPicture,
621					    PicturePtr pMaskPicture,
622					    PicturePtr pDstPicture,
623					    PixmapPtr pSrc,
624					    PixmapPtr pMask,
625					    PixmapPtr pDst)
626{
627    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
628    uint32_t dst_format, dst_pitch, colorpitch;
629    uint32_t pp_cntl, blendcntl, cblend, ablend;
630    int pixel_shift;
631    struct radeon_exa_pixmap_priv *driver_priv;
632    ACCEL_PREAMBLE();
633
634    TRACE;
635
636    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
637	return FALSE;
638
639    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
640	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
641
642    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
643
644    dst_pitch = exaGetPixmapPitch(pDst);
645    colorpitch = dst_pitch >> pixel_shift;
646    if (RADEONPixmapIsColortiled(pDst))
647	colorpitch |= RADEON_COLOR_TILE_ENABLE;
648
649    CHECK_OFFSET(pDst, 0x0f, "destination");
650
651    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
652	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
653
654    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
655	return FALSE;
656
657    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
658			     pSrc, pMask, pDst);
659
660    /* switch to 3D after doing buffer space checks as the latter may flush */
661    RADEON_SWITCH_TO_3D();
662
663    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
664	return FALSE;
665    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
666
667    if (pMask != NULL) {
668	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
669	    return FALSE;
670	pp_cntl |= RADEON_TEX_1_ENABLE;
671    } else {
672	info->accel_state->is_transform[1] = FALSE;
673    }
674
675    BEGIN_ACCEL_RELOC(10, 2);
676    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
677    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
678    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
679    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
680
681    /* IN operator: Multiply src by mask components or mask alpha.
682     * BLEND_CTL_ADD is A * B + C.
683     * If a source is a8, we have to explicitly zero its color values.
684     * If the destination is a8, we have to route the alpha to red, I think.
685     * If we're doing component alpha where the source for blending is going to
686     * be the source alpha (and there's no source value used), we have to zero
687     * the source's color values.
688     */
689    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
690    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
691
692    if (pDstPicture->format == PICT_a8 ||
693	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
694    {
695	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
696    } else if (pSrcPicture->format == PICT_a8)
697	cblend |= RADEON_COLOR_ARG_A_ZERO;
698    else
699	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
700    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
701
702    if (pMask) {
703	if (pMaskPicture->componentAlpha &&
704	    pDstPicture->format != PICT_a8)
705	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
706	else
707	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
708	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
709    } else {
710	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
711	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
712    }
713
714    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
715    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
716    if (pMask)
717	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
718					  RADEON_SE_VTX_FMT_ST0 |
719					  RADEON_SE_VTX_FMT_ST1));
720    else
721	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
722					  RADEON_SE_VTX_FMT_ST0));
723    /* Op operator. */
724    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
725
726    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
727
728    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
729    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
730					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
731    FINISH_ACCEL();
732
733    return TRUE;
734}
735
736#ifdef ONLY_ONCE
737
738static Bool R200CheckCompositeTexture(PicturePtr pPict,
739				      PicturePtr pDstPict,
740				      int op,
741				      int unit)
742{
743    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
744    int w = pPict->pDrawable->width;
745    int h = pPict->pDrawable->height;
746    int i;
747
748    /* r200 limit should be 2048, there are issues with 2048
749     * see bug 19269
750     */
751
752    if ((w > 2047) || (h > 2047))
753	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
754
755    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
756    {
757	if (R200TexFormats[i].fmt == pPict->format)
758	    break;
759    }
760    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
761	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
762			 (int)pPict->format));
763
764    if (!RADEONCheckTexturePOT(pPict, unit == 0))
765	return FALSE;
766
767    if (pPict->filter != PictFilterNearest &&
768	pPict->filter != PictFilterBilinear)
769	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
770
771    /* for REPEAT_NONE, Render semantics are that sampling outside the source
772     * picture results in alpha=0 pixels. We can implement this with a border color
773     * *if* our source texture has an alpha channel, otherwise we need to fall
774     * back. If we're not transformed then we hope that upper layers have clipped
775     * rendering to the bounds of the source drawable, in which case it doesn't
776     * matter. I have not, however, verified that the X server always does such
777     * clipping.
778     */
779    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
780	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
781	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
782    }
783
784    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
785	RADEON_FALLBACK(("non-affine transforms not supported\n"));
786
787    return TRUE;
788}
789
790#endif /* ONLY_ONCE */
791
792static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
793					int unit)
794{
795    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
796    uint32_t txfilter, txformat, txoffset, txpitch;
797    int w = pPict->pDrawable->width;
798    int h = pPict->pDrawable->height;
799    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
800    Bool repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
801	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
802    int i;
803    struct radeon_exa_pixmap_priv *driver_priv;
804    ACCEL_PREAMBLE();
805
806    txpitch = exaGetPixmapPitch(pPix);
807
808    txoffset = 0;
809    CHECK_OFFSET(pPix, 0x1f, "texture");
810
811    if ((txpitch & 0x1f) != 0)
812	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
813
814    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
815    {
816	if (R200TexFormats[i].fmt == pPict->format)
817	    break;
818    }
819    txformat = R200TexFormats[i].card_fmt;
820    if (RADEONPixmapIsColortiled(pPix))
821	txoffset |= R200_TXO_MACRO_TILE;
822
823    if (repeat) {
824	if (!RADEONPitchMatches(pPix))
825	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
826			     w, (unsigned)txpitch));
827
828	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
829	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
830    } else
831	txformat |= R200_TXFORMAT_NON_POWER2;
832    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
833
834    info->accel_state->texW[unit] = w;
835    info->accel_state->texH[unit] = h;
836
837    switch (pPict->filter) {
838    case PictFilterNearest:
839	txfilter = (R200_MAG_FILTER_NEAREST |
840		    R200_MIN_FILTER_NEAREST);
841	break;
842    case PictFilterBilinear:
843	txfilter = (R200_MAG_FILTER_LINEAR |
844		    R200_MIN_FILTER_LINEAR);
845	break;
846    default:
847	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
848    }
849
850    switch (repeatType) {
851	case RepeatNormal:
852	    if (txformat & R200_TXFORMAT_NON_POWER2)
853		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
854	    else
855	        txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
856	    break;
857	case RepeatPad:
858	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
859	    break;
860	case RepeatReflect:
861	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
862	    break;
863	case RepeatNone:
864	    /* don't set an illegal clamp mode for rect textures */
865	    if (txformat & R200_TXFORMAT_NON_POWER2)
866		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
867	    break;
868    }
869
870    BEGIN_ACCEL_RELOC(6, 1);
871    if (unit == 0) {
872	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
873	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
874	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
875	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
876		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
877	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
878	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
879    } else {
880	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
881	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
882	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
883	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
884		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
885	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
886	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
887	/* emit a texture relocation */
888    }
889    FINISH_ACCEL();
890
891    if (pPict->transform != 0) {
892	info->accel_state->is_transform[unit] = TRUE;
893	info->accel_state->transform[unit] = pPict->transform;
894    } else {
895	info->accel_state->is_transform[unit] = FALSE;
896    }
897
898    return TRUE;
899}
900
901#ifdef ONLY_ONCE
902static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
903			       PicturePtr pDstPicture)
904{
905    PixmapPtr pSrcPixmap, pDstPixmap;
906    uint32_t tmp1;
907
908    TRACE;
909
910    /* Check for unsupported compositing operations. */
911    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
912	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
913
914    if (!pSrcPicture->pDrawable)
915	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
916
917    /* r200 limit should be 2048, there are issues with 2048
918     * see bug 19269
919     */
920
921    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
922
923    if (pSrcPixmap->drawable.width > 2047 ||
924	pSrcPixmap->drawable.height > 2047) {
925	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
926			 pSrcPixmap->drawable.width,
927			 pSrcPixmap->drawable.height));
928    }
929
930    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
931
932    if (pDstPixmap->drawable.width > 2047 ||
933	pDstPixmap->drawable.height > 2047) {
934	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
935			 pDstPixmap->drawable.width,
936			 pDstPixmap->drawable.height));
937    }
938
939    if (pMaskPicture) {
940	PixmapPtr pMaskPixmap;
941
942	if (!pMaskPicture->pDrawable)
943	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
944
945	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
946
947	if (pMaskPixmap->drawable.width > 2047 ||
948	    pMaskPixmap->drawable.height > 2047) {
949	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
950			     pMaskPixmap->drawable.width,
951			     pMaskPixmap->drawable.height));
952	}
953
954	if (pMaskPicture->componentAlpha) {
955	    /* Check if it's component alpha that relies on a source alpha and
956	     * on the source value.  We can only get one of those into the
957	     * single source value that we get to blend with.
958	     */
959	    if (RadeonBlendOp[op].src_alpha &&
960		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
961		RADEON_SRC_BLEND_GL_ZERO) {
962		RADEON_FALLBACK(("Component alpha not supported with source "
963				 "alpha and source value blending.\n"));
964	    }
965	}
966
967	if (!R200CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
968	    return FALSE;
969    }
970
971    if (!R200CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
972	return FALSE;
973
974    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
975	return FALSE;
976
977    return TRUE;
978}
979#endif /* ONLY_ONCE */
980
981static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
982				PicturePtr pMaskPicture, PicturePtr pDstPicture,
983				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
984{
985    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
986    uint32_t dst_format, dst_pitch;
987    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
988    int pixel_shift;
989    struct radeon_exa_pixmap_priv *driver_priv;
990    ACCEL_PREAMBLE();
991
992    TRACE;
993
994    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
995	return FALSE;
996
997    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
998	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
999
1000    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1001
1002    dst_pitch = exaGetPixmapPitch(pDst);
1003    colorpitch = dst_pitch >> pixel_shift;
1004    if (RADEONPixmapIsColortiled(pDst))
1005	colorpitch |= RADEON_COLOR_TILE_ENABLE;
1006
1007    CHECK_OFFSET(pDst, 0xf, "destination");
1008
1009    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1010	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1011
1012    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
1013	return FALSE;
1014
1015    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1016			     pSrc, pMask, pDst);
1017
1018    /* switch to 3D after doing buffer space checks as it may flush */
1019    RADEON_SWITCH_TO_3D();
1020
1021    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
1022	return FALSE;
1023    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
1024
1025    if (pMask != NULL) {
1026	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
1027	    return FALSE;
1028	pp_cntl |= RADEON_TEX_1_ENABLE;
1029    } else {
1030	info->accel_state->is_transform[1] = FALSE;
1031    }
1032
1033    BEGIN_ACCEL_RELOC(12, 2);
1034
1035    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
1036    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
1037
1038    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
1039    EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
1040
1041    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
1042    if (pMask)
1043	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1044		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
1045		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
1046    else
1047	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
1048		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
1049
1050
1051
1052    /* IN operator: Multiply src by mask components or mask alpha.
1053     * BLEND_CTL_ADD is A * B + C.
1054     * If a picture is a8, we have to explicitly zero its color values.
1055     * If the destination is a8, we have to route the alpha to red, I think.
1056     * If we're doing component alpha where the source for blending is going to
1057     * be the source alpha (and there's no source value used), we have to zero
1058     * the source's color values.
1059     */
1060    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
1061    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
1062
1063    if (pDstPicture->format == PICT_a8 ||
1064	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
1065    {
1066	cblend |= R200_TXC_ARG_A_R0_ALPHA;
1067    } else if (pSrcPicture->format == PICT_a8)
1068	cblend |= R200_TXC_ARG_A_ZERO;
1069    else
1070	cblend |= R200_TXC_ARG_A_R0_COLOR;
1071    ablend |= R200_TXA_ARG_A_R0_ALPHA;
1072
1073    if (pMask) {
1074	if (pMaskPicture->componentAlpha &&
1075	    pDstPicture->format != PICT_a8)
1076	    cblend |= R200_TXC_ARG_B_R1_COLOR;
1077	else
1078	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
1079	ablend |= R200_TXA_ARG_B_R1_ALPHA;
1080    } else {
1081	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
1082	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
1083    }
1084
1085    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
1086    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
1087	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
1088    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
1089    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
1090	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
1091
1092    /* Op operator. */
1093    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1094    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
1095
1096    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width) << RADEON_RE_WIDTH_SHIFT) |
1097					   ((pDst->drawable.height) << RADEON_RE_HEIGHT_SHIFT)));
1098
1099    FINISH_ACCEL();
1100
1101    return TRUE;
1102}
1103
1104#ifdef ONLY_ONCE
1105
1106static Bool R300CheckCompositeTexture(PicturePtr pPict,
1107				      PicturePtr pDstPict,
1108				      int op,
1109				      int unit,
1110				      Bool is_r500)
1111{
1112    ScreenPtr pScreen = pDstPict->pDrawable->pScreen;
1113    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1114    RADEONInfoPtr info = RADEONPTR(pScrn);
1115
1116    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1117    int w = pPict->pDrawable->width;
1118    int h = pPict->pDrawable->height;
1119    int i;
1120    int max_tex_w, max_tex_h;
1121
1122    if (is_r500) {
1123	max_tex_w = 4096;
1124	max_tex_h = 4096;
1125    } else {
1126	max_tex_w = 2048;
1127	max_tex_h = 2048;
1128    }
1129
1130    if ((w > max_tex_w) || (h > max_tex_h))
1131	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1132
1133    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1134    {
1135	if (R300TexFormats[i].fmt == pPict->format)
1136	    break;
1137    }
1138    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1139	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1140			 (int)pPict->format));
1141
1142    if (!RADEONCheckTexturePOT(pPict, unit == 0)) {
1143	if (info->cs) {
1144    		struct radeon_exa_pixmap_priv *driver_priv;
1145		PixmapPtr pPix;
1146
1147    		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
1148		driver_priv = exaGetPixmapDriverPrivate(pPix);
1149		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
1150	}
1151	return FALSE;
1152    }
1153
1154    if (pPict->filter != PictFilterNearest &&
1155	pPict->filter != PictFilterBilinear)
1156	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1157
1158    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1159     * picture results in alpha=0 pixels. We can implement this with a border color
1160     * *if* our source texture has an alpha channel, otherwise we need to fall
1161     * back. If we're not transformed then we hope that upper layers have clipped
1162     * rendering to the bounds of the source drawable, in which case it doesn't
1163     * matter. I have not, however, verified that the X server always does such
1164     * clipping.
1165     */
1166    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1167	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1168	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1169    }
1170
1171    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
1172	RADEON_FALLBACK(("non-affine transforms not supported\n"));
1173
1174    return TRUE;
1175}
1176
1177#endif /* ONLY_ONCE */
1178
1179static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
1180					int unit)
1181{
1182    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1183    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
1184    int w = pPict->pDrawable->width;
1185    int h = pPict->pDrawable->height;
1186    int i, pixel_shift, out_size = 6;
1187    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1188    struct radeon_exa_pixmap_priv *driver_priv;
1189    ACCEL_PREAMBLE();
1190
1191    TRACE;
1192
1193    txpitch = exaGetPixmapPitch(pPix);
1194    txoffset = 0;
1195
1196    CHECK_OFFSET(pPix, 0x1f, "texture");
1197
1198    if ((txpitch & 0x1f) != 0)
1199	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1200
1201    /* TXPITCH = pixels (texels) per line - 1 */
1202    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1203    txpitch >>= pixel_shift;
1204    txpitch -= 1;
1205
1206    if (RADEONPixmapIsColortiled(pPix))
1207	txoffset |= R300_MACRO_TILE;
1208
1209    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1210    {
1211	if (R300TexFormats[i].fmt == pPict->format)
1212	    break;
1213    }
1214
1215    txformat1 = R300TexFormats[i].card_fmt;
1216
1217    if (IS_R300_3D) {
1218	if ((unit == 0) && info->accel_state->msk_pic)
1219	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0;
1220	else if (unit == 1)
1221	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1;
1222    }
1223
1224    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1225		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1226
1227    if (IS_R500_3D && ((w - 1) & 0x800))
1228	txpitch |= R500_TXWIDTH_11;
1229
1230    if (IS_R500_3D && ((h - 1) & 0x800))
1231	txpitch |= R500_TXHEIGHT_11;
1232
1233    if (info->ChipFamily == CHIP_FAMILY_R520) {
1234	unsigned us_width = (w - 1) & 0x7ff;
1235	unsigned us_height = (h - 1) & 0x7ff;
1236	unsigned us_depth = 0;
1237
1238	if (w > 2048) {
1239	    us_width = (0x7ff + us_width) >> 1;
1240	    us_depth |= 0x0d;
1241	}
1242	if (h > 2048) {
1243	    us_height = (0x7ff + us_height) >> 1;
1244	    us_depth |= 0x0e;
1245	}
1246
1247	us_format = (us_width << R300_TXWIDTH_SHIFT) |
1248		    (us_height << R300_TXHEIGHT_SHIFT) |
1249		    (us_depth << R300_TXDEPTH_SHIFT);
1250	out_size++;
1251    }
1252
1253    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1254     * omit this if there is no padding, but there is no apparent advantage
1255     * in doing so.
1256     */
1257    txformat0 |= R300_TXPITCH_EN;
1258
1259    txfilter = (unit << R300_TX_ID_SHIFT);
1260
1261    switch (repeatType) {
1262    case RepeatNormal:
1263	if (unit != 0 || !info->accel_state->need_src_tile_x)
1264	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1265	else
1266	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1267
1268	if (unit != 0 || !info->accel_state->need_src_tile_y)
1269	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1270	else
1271	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1272
1273	break;
1274    case RepeatPad:
1275	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1276	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1277	break;
1278    case RepeatReflect:
1279	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1280	    R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1281	break;
1282    case RepeatNone:
1283	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1284	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1285	break;
1286    }
1287
1288    switch (pPict->filter) {
1289    case PictFilterNearest:
1290	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1291	break;
1292    case PictFilterBilinear:
1293	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1294	break;
1295    default:
1296	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1297    }
1298
1299    if (repeatType == RepeatNone)
1300	out_size++;
1301    BEGIN_ACCEL_RELOC(out_size, 1);
1302    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1303    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1304    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1305    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1306    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1307
1308    EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
1309
1310    if (repeatType == RepeatNone)
1311	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1312    if (info->ChipFamily == CHIP_FAMILY_R520)
1313	OUT_ACCEL_REG(R500_US_FORMAT0_0 + (unit * 4), us_format);
1314    FINISH_ACCEL();
1315
1316    if (pPict->transform != 0) {
1317	info->accel_state->is_transform[unit] = TRUE;
1318	info->accel_state->transform[unit] = pPict->transform;
1319
1320	/* setup the PVS consts */
1321	if (info->accel_state->has_tcl) {
1322	    info->accel_state->texW[unit] = 1;
1323	    info->accel_state->texH[unit] = 1;
1324	    BEGIN_ACCEL(9);
1325	    if (IS_R300_3D)
1326		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1327	    else
1328		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1329
1330	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
1331	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
1332	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
1333	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1334
1335	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
1336	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
1337	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
1338	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1339
1340	    FINISH_ACCEL();
1341	} else {
1342	    info->accel_state->texW[unit] = w;
1343	    info->accel_state->texH[unit] = h;
1344	}
1345    } else {
1346	info->accel_state->is_transform[unit] = FALSE;
1347
1348	/* setup the PVS consts */
1349	if (info->accel_state->has_tcl) {
1350	    info->accel_state->texW[unit] = 1;
1351	    info->accel_state->texH[unit] = 1;
1352
1353	    BEGIN_ACCEL(9);
1354	    if (IS_R300_3D)
1355		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1356	    else
1357		OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1358
1359	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1360	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1361	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1362	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1363
1364	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1365	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1366	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1367	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1368
1369	    FINISH_ACCEL();
1370	} else {
1371	    info->accel_state->texW[unit] = w;
1372	    info->accel_state->texH[unit] = h;
1373	}
1374    }
1375
1376    return TRUE;
1377}
1378
1379#ifdef ONLY_ONCE
1380
1381static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1382			       PicturePtr pDstPicture)
1383{
1384    uint32_t tmp1;
1385    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1386    PixmapPtr pSrcPixmap, pDstPixmap;
1387    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1388    RADEONInfoPtr info = RADEONPTR(pScrn);
1389    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1390
1391    TRACE;
1392
1393    /* Check for unsupported compositing operations. */
1394    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1395	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1396
1397    if (!pSrcPicture->pDrawable)
1398	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1399
1400    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1401
1402    if (IS_R500_3D) {
1403	max_tex_w = 4096;
1404	max_tex_h = 4096;
1405	max_dst_w = 4096;
1406	max_dst_h = 4096;
1407    } else {
1408	max_tex_w = 2048;
1409	max_tex_h = 2048;
1410	if (IS_R400_3D) {
1411	    max_dst_w = 4021;
1412	    max_dst_h = 4021;
1413	} else {
1414	    max_dst_w = 2560;
1415	    max_dst_h = 2560;
1416	}
1417    }
1418
1419    if (pSrcPixmap->drawable.width > max_tex_w ||
1420	pSrcPixmap->drawable.height > max_tex_h) {
1421	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1422			 pSrcPixmap->drawable.width,
1423			 pSrcPixmap->drawable.height));
1424    }
1425
1426    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1427
1428    if (pDstPixmap->drawable.width > max_dst_w ||
1429	pDstPixmap->drawable.height > max_dst_h) {
1430	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1431			 pDstPixmap->drawable.width,
1432			 pDstPixmap->drawable.height));
1433    }
1434
1435    if (pMaskPicture) {
1436	PixmapPtr pMaskPixmap;
1437
1438	if (!pMaskPicture->pDrawable)
1439	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1440
1441	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1442
1443	if (pMaskPixmap->drawable.width > max_tex_w ||
1444	    pMaskPixmap->drawable.height > max_tex_h) {
1445	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1446			     pMaskPixmap->drawable.width,
1447			     pMaskPixmap->drawable.height));
1448	}
1449
1450	if (pMaskPicture->componentAlpha) {
1451	    /* Check if it's component alpha that relies on a source alpha and
1452	     * on the source value.  We can only get one of those into the
1453	     * single source value that we get to blend with.
1454	     */
1455	    if (RadeonBlendOp[op].src_alpha &&
1456		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1457		RADEON_SRC_BLEND_GL_ZERO) {
1458		RADEON_FALLBACK(("Component alpha not supported with source "
1459				 "alpha and source value blending.\n"));
1460	    }
1461	}
1462
1463	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1464	    return FALSE;
1465    }
1466
1467    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1468	return FALSE;
1469
1470    if (!R300GetDestFormat(pDstPicture, &tmp1))
1471	return FALSE;
1472
1473    return TRUE;
1474
1475}
1476#endif /* ONLY_ONCE */
1477
1478static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1479				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1480				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1481{
1482    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1483    uint32_t dst_format, dst_pitch;
1484    uint32_t txenable, colorpitch;
1485    uint32_t blendcntl, output_fmt;
1486    uint32_t src_color, src_alpha;
1487    uint32_t mask_color, mask_alpha;
1488    int pixel_shift;
1489    struct radeon_exa_pixmap_priv *driver_priv;
1490    ACCEL_PREAMBLE();
1491    TRACE;
1492
1493    if (!R300GetDestFormat(pDstPicture, &dst_format))
1494	return FALSE;
1495
1496    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1497
1498    dst_pitch = exaGetPixmapPitch(pDst);
1499    colorpitch = dst_pitch >> pixel_shift;
1500
1501    if (RADEONPixmapIsColortiled(pDst))
1502	colorpitch |= R300_COLORTILE;
1503
1504    colorpitch |= dst_format;
1505
1506    CHECK_OFFSET(pDst, 0x0f, "destination");
1507
1508    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1509	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1510
1511    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1512	return FALSE;
1513
1514    RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1515			     pSrc, pMask, pDst);
1516
1517    /* have to execute switch after doing buffer sizing check as the latter flushes */
1518    RADEON_SWITCH_TO_3D();
1519
1520    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1521	return FALSE;
1522    txenable = R300_TEX_0_ENABLE;
1523
1524    if (pMask != NULL) {
1525	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1526	    return FALSE;
1527	txenable |= R300_TEX_1_ENABLE;
1528    } else {
1529	info->accel_state->is_transform[1] = FALSE;
1530    }
1531
1532    /* setup the VAP */
1533    if (info->accel_state->has_tcl) {
1534	if (pMask)
1535	    BEGIN_ACCEL(10);
1536	else
1537	    BEGIN_ACCEL(9);
1538	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
1539    } else {
1540	if (pMask)
1541	    BEGIN_ACCEL(6);
1542	else
1543	    BEGIN_ACCEL(5);
1544    }
1545
1546    /* These registers define the number, type, and location of data submitted
1547     * to the PVS unit of GA input (when PVS is disabled)
1548     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1549     * enabled.  This memory provides the imputs to the vertex shader program
1550     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1551     * directly to the GA input memory and the order is signifigant.  In
1552     * PVS_BYPASS mode the order is as follows:
1553     * Position
1554     * Point Size
1555     * Color 0-3
1556     * Textures 0-7
1557     * Fog
1558     */
1559    if (pMask) {
1560	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1561		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1562		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1563		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1564		       R300_SIGNED_0 |
1565		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1566		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1567		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1568		       R300_SIGNED_1));
1569	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1570		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1571		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1572		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1573		       R300_LAST_VEC_2 |
1574		       R300_SIGNED_2));
1575    } else
1576	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1577		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1578		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1579		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1580		       R300_SIGNED_0 |
1581		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1582		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1583		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1584		       R300_LAST_VEC_1 |
1585		       R300_SIGNED_1));
1586
1587    /* load the vertex shader
1588     * We pre-load vertex programs in RADEONInit3DEngine():
1589     * - exa
1590     * - Xv
1591     * - Xv bicubic
1592     * Here we select the offset of the vertex program we want to use
1593     */
1594    if (info->accel_state->has_tcl) {
1595	if (pMask) {
1596	    /* consts used by vertex shaders */
1597	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1598						    R300_PVS_MAX_CONST_ADDR(3)));
1599	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1600			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1601			   (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1602			   (8 << R300_PVS_LAST_INST_SHIFT)));
1603	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1604			  (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1605	} else {
1606	    /* consts used by vertex shaders */
1607	    OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1608						    R300_PVS_MAX_CONST_ADDR(3)));
1609	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1610			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1611			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1612			   (4 << R300_PVS_LAST_INST_SHIFT)));
1613	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1614			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1615	}
1616    }
1617
1618    /* Position and one or two sets of 2 texture coordinates */
1619    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1620    if (pMask)
1621	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1622		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1623		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1624    else
1625	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1626		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1627
1628    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1629    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1630    FINISH_ACCEL();
1631
1632    /* shader output swizzling */
1633    switch (pDstPicture->format) {
1634    case PICT_a8r8g8b8:
1635    case PICT_x8r8g8b8:
1636    default:
1637	output_fmt = (R300_OUT_FMT_C4_8 |
1638		      R300_OUT_FMT_C0_SEL_BLUE |
1639		      R300_OUT_FMT_C1_SEL_GREEN |
1640		      R300_OUT_FMT_C2_SEL_RED |
1641		      R300_OUT_FMT_C3_SEL_ALPHA);
1642	break;
1643    case PICT_a8b8g8r8:
1644    case PICT_x8b8g8r8:
1645	output_fmt = (R300_OUT_FMT_C4_8 |
1646		      R300_OUT_FMT_C0_SEL_RED |
1647		      R300_OUT_FMT_C1_SEL_GREEN |
1648		      R300_OUT_FMT_C2_SEL_BLUE |
1649		      R300_OUT_FMT_C3_SEL_ALPHA);
1650	break;
1651#ifdef PICT_TYPE_BGRA
1652    case PICT_b8g8r8a8:
1653    case PICT_b8g8r8x8:
1654	output_fmt = (R300_OUT_FMT_C4_8 |
1655		      R300_OUT_FMT_C0_SEL_ALPHA |
1656		      R300_OUT_FMT_C1_SEL_RED |
1657		      R300_OUT_FMT_C2_SEL_GREEN |
1658		      R300_OUT_FMT_C3_SEL_BLUE);
1659	break;
1660#endif
1661    case PICT_a8:
1662	output_fmt = (R300_OUT_FMT_C4_8 |
1663		      R300_OUT_FMT_C0_SEL_ALPHA);
1664	break;
1665    }
1666
1667    /* setup pixel shader */
1668    if (IS_R300_3D) {
1669	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1670	    src_color = R300_ALU_RGB_0_0;
1671	else
1672	    src_color = R300_ALU_RGB_SRC0_RGB;
1673
1674	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1675	    src_alpha = R300_ALU_ALPHA_1_0;
1676	else
1677	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1678
1679	if (pMask) {
1680	    if (pMaskPicture->componentAlpha) {
1681		if (RadeonBlendOp[op].src_alpha) {
1682		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1683			src_color = R300_ALU_RGB_1_0;
1684		    else
1685			src_color = R300_ALU_RGB_SRC0_AAA;
1686		} else
1687		    src_color = R300_ALU_RGB_SRC0_RGB;
1688		mask_color = R300_ALU_RGB_SRC1_RGB;
1689	    } else {
1690		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1691		    mask_color = R300_ALU_RGB_1_0;
1692		else
1693		    mask_color = R300_ALU_RGB_SRC1_AAA;
1694	    }
1695	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1696		mask_alpha = R300_ALU_ALPHA_1_0;
1697	    else
1698		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1699	} else {
1700	    mask_color = R300_ALU_RGB_1_0;
1701	    mask_alpha = R300_ALU_ALPHA_1_0;
1702	}
1703
1704	/* setup the rasterizer, load FS */
1705	if (pMask) {
1706	    BEGIN_ACCEL(16);
1707	    /* 4 components: 2 for tex0, 2 for tex1 */
1708	    OUT_ACCEL_REG(R300_RS_COUNT,
1709			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1710			   R300_RS_COUNT_HIRES_EN));
1711
1712	    /* R300_INST_COUNT_RS - highest RS instruction used */
1713	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1714
1715	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1716						R300_ALU_CODE_SIZE(0) |
1717						R300_TEX_CODE_OFFSET(0) |
1718						R300_TEX_CODE_SIZE(1)));
1719
1720	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1721			  (R300_ALU_START(0) |
1722			   R300_ALU_SIZE(0) |
1723			   R300_TEX_START(0) |
1724			   R300_TEX_SIZE(1) |
1725			   R300_RGBA_OUT));
1726
1727
1728	} else {
1729	    BEGIN_ACCEL(15);
1730	    /* 2 components: 2 for tex0 */
1731	    OUT_ACCEL_REG(R300_RS_COUNT,
1732			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1733			   R300_RS_COUNT_HIRES_EN));
1734
1735	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1736
1737	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1738						R300_ALU_CODE_SIZE(0) |
1739						R300_TEX_CODE_OFFSET(0) |
1740						R300_TEX_CODE_SIZE(0)));
1741
1742	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1743			  (R300_ALU_START(0) |
1744			   R300_ALU_SIZE(0) |
1745			   R300_TEX_START(0) |
1746			   R300_TEX_SIZE(0) |
1747			   R300_RGBA_OUT));
1748
1749	}
1750
1751	OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1752	OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
1753		      (R300_ALU_START(0) |
1754		       R300_ALU_SIZE(0) |
1755		       R300_TEX_START(0) |
1756		       R300_TEX_SIZE(0)));
1757	OUT_ACCEL_REG(R300_US_CODE_ADDR_1,
1758		      (R300_ALU_START(0) |
1759		       R300_ALU_SIZE(0) |
1760		       R300_TEX_START(0) |
1761		       R300_TEX_SIZE(0)));
1762	OUT_ACCEL_REG(R300_US_CODE_ADDR_2,
1763		      (R300_ALU_START(0) |
1764		       R300_ALU_SIZE(0) |
1765		       R300_TEX_START(0) |
1766		       R300_TEX_SIZE(0)));
1767
1768	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1769	/* shader output swizzling */
1770	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1771
1772	/* tex inst for src texture */
1773	OUT_ACCEL_REG(R300_US_TEX_INST(0),
1774		      (R300_TEX_SRC_ADDR(0) |
1775		       R300_TEX_DST_ADDR(0) |
1776		       R300_TEX_ID(0) |
1777		       R300_TEX_INST(R300_TEX_INST_LD)));
1778
1779	if (pMask) {
1780	    /* tex inst for mask texture */
1781	    OUT_ACCEL_REG(R300_US_TEX_INST(1),
1782			  (R300_TEX_SRC_ADDR(1) |
1783			   R300_TEX_DST_ADDR(1) |
1784			   R300_TEX_ID(1) |
1785			   R300_TEX_INST(R300_TEX_INST_LD)));
1786	}
1787
1788	/* RGB inst
1789	 * temp addresses for texture inputs
1790	 * ALU_RGB_ADDR0 is src tex (temp 0)
1791	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1792	 * R300_ALU_RGB_OMASK - output components to write
1793	 * R300_ALU_RGB_TARGET_A - render target
1794	 */
1795	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0),
1796		      (R300_ALU_RGB_ADDR0(0) |
1797		       R300_ALU_RGB_ADDR1(1) |
1798		       R300_ALU_RGB_ADDR2(0) |
1799		       R300_ALU_RGB_ADDRD(0) |
1800		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1801					   R300_ALU_RGB_MASK_G |
1802					   R300_ALU_RGB_MASK_B)) |
1803		       R300_ALU_RGB_TARGET_A));
1804	/* RGB inst
1805	 * ALU operation
1806	 */
1807	OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0),
1808		      (R300_ALU_RGB_SEL_A(src_color) |
1809		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1810		       R300_ALU_RGB_SEL_B(mask_color) |
1811		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1812		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1813		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1814		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1815		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1816		       R300_ALU_RGB_CLAMP));
1817	/* Alpha inst
1818	 * temp addresses for texture inputs
1819	 * ALU_ALPHA_ADDR0 is src tex (0)
1820	 * ALU_ALPHA_ADDR1 is mask tex (1)
1821	 * R300_ALU_ALPHA_OMASK - output components to write
1822	 * R300_ALU_ALPHA_TARGET_A - render target
1823	 */
1824	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0),
1825		      (R300_ALU_ALPHA_ADDR0(0) |
1826		       R300_ALU_ALPHA_ADDR1(1) |
1827		       R300_ALU_ALPHA_ADDR2(0) |
1828		       R300_ALU_ALPHA_ADDRD(0) |
1829		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1830		       R300_ALU_ALPHA_TARGET_A |
1831		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1832	/* Alpha inst
1833	 * ALU operation
1834	 */
1835	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0),
1836		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1837		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1838		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1839		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1840		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1841		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1842		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1843		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1844		       R300_ALU_ALPHA_CLAMP));
1845	FINISH_ACCEL();
1846    } else {
1847	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1848	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1849			 R500_ALU_RGB_G_SWIZ_A_0 |
1850			 R500_ALU_RGB_B_SWIZ_A_0);
1851	else
1852	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1853			 R500_ALU_RGB_G_SWIZ_A_G |
1854			 R500_ALU_RGB_B_SWIZ_A_B);
1855
1856	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1857	    src_alpha = R500_ALPHA_SWIZ_A_1;
1858	else
1859	    src_alpha = R500_ALPHA_SWIZ_A_A;
1860
1861	if (pMask) {
1862	    if (pMaskPicture->componentAlpha) {
1863		if (RadeonBlendOp[op].src_alpha) {
1864		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1865			src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1866				     R500_ALU_RGB_G_SWIZ_A_1 |
1867				     R500_ALU_RGB_B_SWIZ_A_1);
1868		    else
1869			src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1870				     R500_ALU_RGB_G_SWIZ_A_A |
1871				     R500_ALU_RGB_B_SWIZ_A_A);
1872		} else
1873		    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1874				 R500_ALU_RGB_G_SWIZ_A_G |
1875				 R500_ALU_RGB_B_SWIZ_A_B);
1876
1877		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1878			      R500_ALU_RGB_G_SWIZ_B_G |
1879			      R500_ALU_RGB_B_SWIZ_B_B);
1880	    } else {
1881		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1882		    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1883				  R500_ALU_RGB_G_SWIZ_B_1 |
1884				  R500_ALU_RGB_B_SWIZ_B_1);
1885		else
1886		    mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1887				  R500_ALU_RGB_G_SWIZ_B_A |
1888				  R500_ALU_RGB_B_SWIZ_B_A);
1889	    }
1890	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1891		mask_alpha = R500_ALPHA_SWIZ_B_1;
1892	    else
1893		mask_alpha = R500_ALPHA_SWIZ_B_A;
1894	} else {
1895	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1896			  R500_ALU_RGB_G_SWIZ_B_1 |
1897			  R500_ALU_RGB_B_SWIZ_B_1);
1898	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1899	}
1900
1901	BEGIN_ACCEL(7);
1902	if (pMask) {
1903	    /* 4 components: 2 for tex0, 2 for tex1 */
1904	    OUT_ACCEL_REG(R300_RS_COUNT,
1905			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1906			   R300_RS_COUNT_HIRES_EN));
1907
1908	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1909	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1910
1911	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1912					      R500_US_CODE_END_ADDR(2)));
1913	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1914					       R500_US_CODE_RANGE_SIZE(2)));
1915	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1916	} else {
1917	    OUT_ACCEL_REG(R300_RS_COUNT,
1918			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1919			   R300_RS_COUNT_HIRES_EN));
1920
1921	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1922
1923	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1924					      R500_US_CODE_END_ADDR(1)));
1925	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1926					       R500_US_CODE_RANGE_SIZE(1)));
1927	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1928	}
1929
1930	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1931	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1932	FINISH_ACCEL();
1933
1934	if (pMask) {
1935	    BEGIN_ACCEL(19);
1936	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1937	    /* tex inst for src texture */
1938	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1939						   R500_INST_RGB_WMASK_R |
1940						   R500_INST_RGB_WMASK_G |
1941						   R500_INST_RGB_WMASK_B |
1942						   R500_INST_ALPHA_WMASK |
1943						   R500_INST_RGB_CLAMP |
1944						   R500_INST_ALPHA_CLAMP));
1945
1946	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1947						   R500_TEX_INST_LD |
1948						   R500_TEX_IGNORE_UNCOVERED));
1949
1950	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1951						   R500_TEX_SRC_S_SWIZ_R |
1952						   R500_TEX_SRC_T_SWIZ_G |
1953						   R500_TEX_DST_ADDR(0) |
1954						   R500_TEX_DST_R_SWIZ_R |
1955						   R500_TEX_DST_G_SWIZ_G |
1956						   R500_TEX_DST_B_SWIZ_B |
1957						   R500_TEX_DST_A_SWIZ_A));
1958	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1959						   R500_DX_S_SWIZ_R |
1960						   R500_DX_T_SWIZ_R |
1961						   R500_DX_R_SWIZ_R |
1962						   R500_DX_Q_SWIZ_R |
1963						   R500_DY_ADDR(0) |
1964						   R500_DY_S_SWIZ_R |
1965						   R500_DY_T_SWIZ_R |
1966						   R500_DY_R_SWIZ_R |
1967						   R500_DY_Q_SWIZ_R));
1968	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1969	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1970
1971	    /* tex inst for mask texture */
1972	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1973						   R500_INST_TEX_SEM_WAIT |
1974						   R500_INST_RGB_WMASK_R |
1975						   R500_INST_RGB_WMASK_G |
1976						   R500_INST_RGB_WMASK_B |
1977						   R500_INST_ALPHA_WMASK |
1978						   R500_INST_RGB_CLAMP |
1979						   R500_INST_ALPHA_CLAMP));
1980
1981	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1982						   R500_TEX_INST_LD |
1983						   R500_TEX_SEM_ACQUIRE |
1984						   R500_TEX_IGNORE_UNCOVERED));
1985
1986	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1987						   R500_TEX_SRC_S_SWIZ_R |
1988						   R500_TEX_SRC_T_SWIZ_G |
1989						   R500_TEX_DST_ADDR(1) |
1990						   R500_TEX_DST_R_SWIZ_R |
1991						   R500_TEX_DST_G_SWIZ_G |
1992						   R500_TEX_DST_B_SWIZ_B |
1993						   R500_TEX_DST_A_SWIZ_A));
1994	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1995						   R500_DX_S_SWIZ_R |
1996						   R500_DX_T_SWIZ_R |
1997						   R500_DX_R_SWIZ_R |
1998						   R500_DX_Q_SWIZ_R |
1999						   R500_DY_ADDR(1) |
2000						   R500_DY_S_SWIZ_R |
2001						   R500_DY_T_SWIZ_R |
2002						   R500_DY_R_SWIZ_R |
2003						   R500_DY_Q_SWIZ_R));
2004	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2005	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2006	} else {
2007	    BEGIN_ACCEL(13);
2008	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
2009	    /* tex inst for src texture */
2010	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
2011						   R500_INST_TEX_SEM_WAIT |
2012						   R500_INST_RGB_WMASK_R |
2013						   R500_INST_RGB_WMASK_G |
2014						   R500_INST_RGB_WMASK_B |
2015						   R500_INST_ALPHA_WMASK |
2016						   R500_INST_RGB_CLAMP |
2017						   R500_INST_ALPHA_CLAMP));
2018
2019	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
2020						   R500_TEX_INST_LD |
2021						   R500_TEX_SEM_ACQUIRE |
2022						   R500_TEX_IGNORE_UNCOVERED));
2023
2024	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
2025						   R500_TEX_SRC_S_SWIZ_R |
2026						   R500_TEX_SRC_T_SWIZ_G |
2027						   R500_TEX_DST_ADDR(0) |
2028						   R500_TEX_DST_R_SWIZ_R |
2029						   R500_TEX_DST_G_SWIZ_G |
2030						   R500_TEX_DST_B_SWIZ_B |
2031						   R500_TEX_DST_A_SWIZ_A));
2032	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
2033						   R500_DX_S_SWIZ_R |
2034						   R500_DX_T_SWIZ_R |
2035						   R500_DX_R_SWIZ_R |
2036						   R500_DX_Q_SWIZ_R |
2037						   R500_DY_ADDR(0) |
2038						   R500_DY_S_SWIZ_R |
2039						   R500_DY_T_SWIZ_R |
2040						   R500_DY_R_SWIZ_R |
2041						   R500_DY_Q_SWIZ_R));
2042	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2043	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2044	}
2045
2046	/* ALU inst */
2047	/* *_OMASK* - output component write mask */
2048	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
2049					       R500_INST_TEX_SEM_WAIT |
2050					       R500_INST_LAST |
2051					       R500_INST_RGB_OMASK_R |
2052					       R500_INST_RGB_OMASK_G |
2053					       R500_INST_RGB_OMASK_B |
2054					       R500_INST_ALPHA_OMASK |
2055					       R500_INST_RGB_CLAMP |
2056					       R500_INST_ALPHA_CLAMP));
2057	/* ALU inst
2058	 * temp addresses for texture inputs
2059	 * RGB_ADDR0 is src tex (temp 0)
2060	 * RGB_ADDR1 is mask tex (temp 1)
2061	 */
2062	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2063					       R500_RGB_ADDR1(1) |
2064					       R500_RGB_ADDR2(0)));
2065	/* ALU inst
2066	 * temp addresses for texture inputs
2067	 * ALPHA_ADDR0 is src tex (temp 0)
2068	 * ALPHA_ADDR1 is mask tex (temp 1)
2069	 */
2070	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2071					       R500_ALPHA_ADDR1(1) |
2072					       R500_ALPHA_ADDR2(0)));
2073
2074	/* R500_ALU_RGB_TARGET - RGB render target */
2075	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2076					       src_color |
2077					       R500_ALU_RGB_SEL_B_SRC1 |
2078					       mask_color |
2079					       R500_ALU_RGB_TARGET(0)));
2080
2081	/* R500_ALPHA_RGB_TARGET - alpha render target */
2082	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
2083					       R500_ALPHA_ADDRD(0) |
2084					       R500_ALPHA_SEL_A_SRC0 |
2085					       src_alpha |
2086					       R500_ALPHA_SEL_B_SRC1 |
2087					       mask_alpha |
2088					       R500_ALPHA_TARGET(0)));
2089
2090	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
2091					       R500_ALU_RGBA_ADDRD(0) |
2092					       R500_ALU_RGBA_R_SWIZ_0 |
2093					       R500_ALU_RGBA_G_SWIZ_0 |
2094					       R500_ALU_RGBA_B_SWIZ_0 |
2095					       R500_ALU_RGBA_A_SWIZ_0));
2096	FINISH_ACCEL();
2097    }
2098
2099    /* Clear out scissoring */
2100    BEGIN_ACCEL(2);
2101    if (IS_R300_3D) {
2102	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
2103					 (1440 << R300_SCISSOR_Y_SHIFT)));
2104	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2105					 ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
2106
2107    } else {
2108	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
2109					 (0 << R300_SCISSOR_Y_SHIFT)));
2110	OUT_ACCEL_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) |
2111					 ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT)));
2112    }
2113    FINISH_ACCEL();
2114
2115
2116    BEGIN_ACCEL_RELOC(3, 2);
2117    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
2118    EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst);
2119
2120    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
2121    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
2122
2123    FINISH_ACCEL();
2124
2125    BEGIN_ACCEL(1);
2126    if (pMask)
2127	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6);
2128    else
2129	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4);
2130    FINISH_ACCEL();
2131
2132    return TRUE;
2133}
2134
2135static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2136{
2137    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2138    ACCEL_PREAMBLE();
2139
2140    ENTER_DRAW(0);
2141
2142    if (info->accel_state->draw_header) {
2143	if (info->ChipFamily < CHIP_FAMILY_R200) {
2144	    info->accel_state->draw_header[0] = CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2145							   info->accel_state->num_vtx *
2146							   info->accel_state->vtx_count + 1);
2147	    info->accel_state->draw_header[2] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2148						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2149						 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2150						 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2151						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2152	} else if (IS_R300_3D || IS_R500_3D) {
2153	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2154							   info->accel_state->num_vtx *
2155							   info->accel_state->vtx_count);
2156	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2157						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2158						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2159	} else {
2160	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2161							   info->accel_state->num_vtx *
2162							   info->accel_state->vtx_count);
2163	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2164						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2165						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2166	}
2167	info->accel_state->draw_header = NULL;
2168    }
2169
2170    if (IS_R300_3D || IS_R500_3D) {
2171	BEGIN_ACCEL(3);
2172	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
2173	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2174    } else
2175	BEGIN_ACCEL(1);
2176    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2177    FINISH_ACCEL();
2178
2179    LEAVE_DRAW(0);
2180}
2181
2182
2183#ifdef ACCEL_CP
2184
2185#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2186do {								\
2187    OUT_RING_F(_dstX);						\
2188    OUT_RING_F(_dstY);						\
2189    OUT_RING_F(_srcX);						\
2190    OUT_RING_F(_srcY);						\
2191    OUT_RING_F(_maskX);						\
2192    OUT_RING_F(_maskY);						\
2193} while (0)
2194
2195#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2196do {								\
2197    OUT_RING_F(_dstX);						\
2198    OUT_RING_F(_dstY);						\
2199    OUT_RING_F(_srcX);						\
2200    OUT_RING_F(_srcY);						\
2201} while (0)
2202
2203#else /* ACCEL_CP */
2204
2205#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2206do {								\
2207    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2208    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2209    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2210    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2211    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
2212    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
2213} while (0)
2214
2215#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2216do {								\
2217    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2218    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2219    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2220    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2221} while (0)
2222
2223#endif /* !ACCEL_CP */
2224
2225#ifdef ONLY_ONCE
2226static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2227{
2228    PictVector v;
2229    v.vector[0] = point->x;
2230    v.vector[1] = point->y;
2231    v.vector[2] = xFixed1;
2232    PictureTransformPoint(transform, &v);
2233    point->x = v.vector[0];
2234    point->y = v.vector[1];
2235}
2236#endif
2237
2238static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
2239					   RADEONInfoPtr info,
2240					   PixmapPtr pDst,
2241					   int srcX, int srcY,
2242					   int maskX, int maskY,
2243					   int dstX, int dstY,
2244					   int w, int h)
2245{
2246    int vtx_count;
2247    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2248    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2249    ACCEL_PREAMBLE();
2250
2251    ENTER_DRAW(0);
2252
2253    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2254       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2255
2256#if defined(ACCEL_CP)
2257    if ((info->cs && CS_FULL(info->cs)) ||
2258	(!info->cs && (info->cp->indirectBuffer->used + 4 * 32) >
2259	 info->cp->indirectBuffer->total)) {
2260	FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix);
2261	if (info->cs)
2262	    radeon_cs_flush_indirect(pScrn);
2263	else
2264	    RADEONCPFlushIndirect(pScrn, 1);
2265	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
2266						 info->accel_state->src_pic,
2267						 info->accel_state->msk_pic,
2268						 info->accel_state->dst_pic,
2269						 info->accel_state->src_pix,
2270						 info->accel_state->msk_pix,
2271						 info->accel_state->dst_pix);
2272    }
2273#endif
2274
2275    srcTopLeft.x     = IntToxFixed(srcX);
2276    srcTopLeft.y     = IntToxFixed(srcY);
2277    srcTopRight.x    = IntToxFixed(srcX + w);
2278    srcTopRight.y    = IntToxFixed(srcY);
2279    srcBottomLeft.x  = IntToxFixed(srcX);
2280    srcBottomLeft.y  = IntToxFixed(srcY + h);
2281    srcBottomRight.x = IntToxFixed(srcX + w);
2282    srcBottomRight.y = IntToxFixed(srcY + h);
2283
2284    if (info->accel_state->is_transform[0]) {
2285	if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2286	    transformPoint(info->accel_state->transform[0], &srcTopLeft);
2287	    transformPoint(info->accel_state->transform[0], &srcTopRight);
2288	    transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2289	    transformPoint(info->accel_state->transform[0], &srcBottomRight);
2290	}
2291    }
2292
2293    if (info->accel_state->msk_pic) {
2294	maskTopLeft.x     = IntToxFixed(maskX);
2295	maskTopLeft.y     = IntToxFixed(maskY);
2296	maskTopRight.x    = IntToxFixed(maskX + w);
2297	maskTopRight.y    = IntToxFixed(maskY);
2298	maskBottomLeft.x  = IntToxFixed(maskX);
2299	maskBottomLeft.y  = IntToxFixed(maskY + h);
2300	maskBottomRight.x = IntToxFixed(maskX + w);
2301	maskBottomRight.y = IntToxFixed(maskY + h);
2302
2303	if (info->accel_state->is_transform[1]) {
2304	    if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2305		transformPoint(info->accel_state->transform[1], &maskTopLeft);
2306		transformPoint(info->accel_state->transform[1], &maskTopRight);
2307		transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2308		transformPoint(info->accel_state->transform[1], &maskBottomRight);
2309	    }
2310	}
2311
2312	vtx_count = 6;
2313    } else
2314	vtx_count = 4;
2315
2316    if (info->accel_state->vsync)
2317	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst,
2318				      radeon_pick_best_crtc(pScrn, dstX, dstX + w, dstY, dstY + h),
2319				      dstY, dstY + h);
2320
2321#ifdef ACCEL_CP
2322    if (info->ChipFamily < CHIP_FAMILY_R200) {
2323	if (!info->accel_state->draw_header) {
2324	    BEGIN_RING(3);
2325
2326#ifdef XF86DRM_MODE
2327	    if (info->cs)
2328		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2329	    else
2330#endif
2331		info->accel_state->draw_header = __head;
2332	    info->accel_state->num_vtx = 0;
2333	    info->accel_state->vtx_count = vtx_count;
2334
2335	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2336				3 * vtx_count + 1));
2337	    if (info->accel_state->msk_pic)
2338		OUT_RING(RADEON_CP_VC_FRMT_XY |
2339			 RADEON_CP_VC_FRMT_ST0 |
2340			 RADEON_CP_VC_FRMT_ST1);
2341	    else
2342		OUT_RING(RADEON_CP_VC_FRMT_XY |
2343			 RADEON_CP_VC_FRMT_ST0);
2344	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2345		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2346		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
2347		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2348		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2349	    ADVANCE_RING();
2350	}
2351
2352	info->accel_state->num_vtx += 3;
2353	BEGIN_RING(3 * vtx_count);
2354    } else if (IS_R300_3D || IS_R500_3D) {
2355	if (!info->accel_state->draw_header) {
2356	    BEGIN_RING(2);
2357
2358#ifdef XF86DRM_MODE
2359	    if (info->cs)
2360		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2361	    else
2362#endif
2363		info->accel_state->draw_header = __head;
2364	    info->accel_state->num_vtx = 0;
2365	    info->accel_state->vtx_count = vtx_count;
2366
2367	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2368				4 * vtx_count));
2369	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2370		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2371		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2372	    ADVANCE_RING();
2373	}
2374
2375	info->accel_state->num_vtx += 4;
2376	BEGIN_RING(4 * vtx_count);
2377    } else {
2378	if (!info->accel_state->draw_header) {
2379	    BEGIN_RING(2);
2380
2381#ifdef XF86DRM_MODE
2382	    if (info->cs)
2383		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2384	    else
2385#endif
2386		info->accel_state->draw_header = __head;
2387	    info->accel_state->num_vtx = 0;
2388	    info->accel_state->vtx_count = vtx_count;
2389
2390	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2391				3 * vtx_count));
2392	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2393		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2394		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2395	    ADVANCE_RING();
2396	}
2397
2398	info->accel_state->num_vtx += 3;
2399	BEGIN_RING(3 * vtx_count);
2400    }
2401
2402#else /* ACCEL_CP */
2403    if (IS_R300_3D || IS_R500_3D)
2404	BEGIN_ACCEL(2 + vtx_count * 4);
2405    else
2406	BEGIN_ACCEL(1 + vtx_count * 3);
2407
2408    if (info->ChipFamily < CHIP_FAMILY_R200)
2409	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2410					  RADEON_VF_PRIM_WALK_DATA |
2411					  RADEON_VF_RADEON_MODE |
2412					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2413    else if (IS_R300_3D || IS_R500_3D)
2414	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2415					  RADEON_VF_PRIM_WALK_DATA |
2416					  (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
2417    else
2418	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2419					  RADEON_VF_PRIM_WALK_DATA |
2420					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2421
2422#endif
2423
2424    if (info->accel_state->msk_pic) {
2425	if (IS_R300_3D || IS_R500_3D) {
2426	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2427			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2428			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2429	}
2430	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2431		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2432		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2433	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2434		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2435		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2436	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2437		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2438		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2439    } else {
2440	if (IS_R300_3D || IS_R500_3D) {
2441	    VTX_OUT((float)dstX,                                      (float)dstY,
2442		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2443	}
2444	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2445		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2446	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2447		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2448	VTX_OUT((float)(dstX + w),                                (float)dstY,
2449		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2450    }
2451
2452#ifdef ACCEL_CP
2453    ADVANCE_RING();
2454#else
2455    FINISH_ACCEL();
2456#endif /* !ACCEL_CP */
2457
2458    LEAVE_DRAW(0);
2459}
2460#undef VTX_OUT
2461#undef VTX_OUT_MASK
2462
2463static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2464				       int srcX, int srcY,
2465				       int maskX, int maskY,
2466				       int dstX, int dstY,
2467				       int width, int height)
2468{
2469    int tileSrcY, tileMaskY, tileDstY;
2470    int remainingHeight;
2471    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2472
2473    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2474	FUNC_NAME(RadeonCompositeTile)(pScrn,
2475				       info,
2476				       pDst,
2477				       srcX, srcY,
2478				       maskX, maskY,
2479				       dstX, dstY,
2480				       width, height);
2481	return;
2482    }
2483
2484    /* Tiling logic borrowed from exaFillRegionTiled */
2485
2486    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2487    tileMaskY = maskY;
2488    tileDstY = dstY;
2489
2490    remainingHeight = height;
2491    while (remainingHeight > 0) {
2492	int remainingWidth = width;
2493	int tileSrcX, tileMaskX, tileDstX;
2494	int h = info->accel_state->src_tile_height - tileSrcY;
2495
2496	if (h > remainingHeight)
2497	    h = remainingHeight;
2498	remainingHeight -= h;
2499
2500	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2501	tileMaskX = maskX;
2502	tileDstX = dstX;
2503
2504	while (remainingWidth > 0) {
2505	    int w = info->accel_state->src_tile_width - tileSrcX;
2506	    if (w > remainingWidth)
2507		w = remainingWidth;
2508	    remainingWidth -= w;
2509
2510	    FUNC_NAME(RadeonCompositeTile)(pScrn,
2511					   info,
2512					   pDst,
2513					   tileSrcX, tileSrcY,
2514					   tileMaskX, tileMaskY,
2515					   tileDstX, tileDstY,
2516					   w, h);
2517
2518	    tileSrcX = 0;
2519	    tileMaskX += w;
2520	    tileDstX += w;
2521	}
2522	tileSrcY = 0;
2523	tileMaskY += h;
2524	tileDstY += h;
2525    }
2526}
2527
2528#undef ONLY_ONCE
2529#undef FUNC_NAME
2530