radeon_exa_render.c revision c503f109
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59
60struct blendinfo {
61    Bool dst_alpha;
62    Bool src_alpha;
63    uint32_t blend_cntl;
64};
65
66static struct blendinfo RadeonBlendOp[] = {
67    /* Clear */
68    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
69    /* Src */
70    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
71    /* Dst */
72    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
73    /* Over */
74    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
75    /* OverReverse */
76    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
77    /* In */
78    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
79    /* InReverse */
80    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
81    /* Out */
82    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
83    /* OutReverse */
84    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
85    /* Atop */
86    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
87    /* AtopReverse */
88    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
91    /* Add */
92    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
93};
94
95struct formatinfo {
96    int fmt;
97    uint32_t card_fmt;
98};
99
100/* Note on texture formats:
101 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
102 */
103static struct formatinfo R100TexFormats[] = {
104	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
105	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
106	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
107	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
108	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
109	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
110};
111
112static struct formatinfo R200TexFormats[] = {
113    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
114    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
115    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
116    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
117    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
118    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
119    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
120    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
121};
122
123static struct formatinfo R300TexFormats[] = {
124    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
125    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
126    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
127    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
128    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
129    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
130    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
131    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
132};
133
134/* Common Radeon setup code */
135
136static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
137{
138    switch (pDstPicture->format) {
139    case PICT_a8r8g8b8:
140    case PICT_x8r8g8b8:
141	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
142	break;
143    case PICT_r5g6b5:
144	*dst_format = RADEON_COLOR_FORMAT_RGB565;
145	break;
146    case PICT_a1r5g5b5:
147    case PICT_x1r5g5b5:
148	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
149	break;
150    case PICT_a8:
151	*dst_format = RADEON_COLOR_FORMAT_RGB8;
152	break;
153    default:
154	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
155			(int)pDstPicture->format));
156    }
157
158    return TRUE;
159}
160
161static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
162{
163    switch (pDstPicture->format) {
164    case PICT_a8r8g8b8:
165    case PICT_x8r8g8b8:
166	*dst_format = R300_COLORFORMAT_ARGB8888;
167	break;
168    case PICT_r5g6b5:
169	*dst_format = R300_COLORFORMAT_RGB565;
170	break;
171    case PICT_a1r5g5b5:
172    case PICT_x1r5g5b5:
173	*dst_format = R300_COLORFORMAT_ARGB1555;
174	break;
175    case PICT_a8:
176	*dst_format = R300_COLORFORMAT_I8;
177	break;
178    default:
179	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
180	       (int)pDstPicture->format));
181    }
182    return TRUE;
183}
184
185static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
186{
187    uint32_t sblend, dblend;
188
189    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
190    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
191
192    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
193     * it as always 1.
194     */
195    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
196	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
197	    sblend = RADEON_SRC_BLEND_GL_ONE;
198	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
199	    sblend = RADEON_SRC_BLEND_GL_ZERO;
200    }
201
202    /* If the source alpha is being used, then we should only be in a case where
203     * the source blend factor is 0, and the source blend value is the mask
204     * channels multiplied by the source picture's alpha.
205     */
206    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
207	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
208	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
209	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
210	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
211	}
212    }
213
214    return sblend | dblend;
215}
216
217union intfloat {
218    float f;
219    uint32_t i;
220};
221
222/* Check if we need a software-fallback because of a repeating
223 *   non-power-of-two texture.
224 *
225 * canTile: whether we can emulate a repeat by drawing in tiles:
226 *   possible for the source, but not for the mask. (Actually
227 *   we could do tiling for the mask too, but dealing with the
228 *   combination of a tiled mask and a tiled source would be
229 *   a lot of complexity, so we handle only the most common
230 *   case of a repeating mask.)
231 */
232static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
233{
234    int w = pPict->pDrawable->width;
235    int h = pPict->pDrawable->height;
236
237    if (pPict->repeat && pPict->repeatType != RepeatPad &&
238	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
239	!(pPict->repeatType == RepeatNormal && !pPict->transform && canTile))
240	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
241			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
242
243    return TRUE;
244}
245
246/* Determine if the pitch of the pixmap meets the criteria for being
247 * used as a repeating texture: no padding or only a single line texture.
248 */
249static Bool RADEONPitchMatches(PixmapPtr pPix)
250{
251    int w = pPix->drawable.width;
252    int h = pPix->drawable.height;
253    uint32_t txpitch = exaGetPixmapPitch(pPix);
254
255    if (h > 1 && ((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch)
256	return FALSE;
257
258    return TRUE;
259}
260
261/* We can't turn on repeats normally for a non-power-of-two dimension,
262 * but if the source isn't transformed, we can get the same effect
263 * by drawing the image in multiple tiles. (A common case that it's
264 * important to get right is drawing a strip of a NPOTxPOT texture
265 * repeating in the POT direction. With tiling, this ends up as a
266 * a single tile on R300 and newer, which is perfect.)
267 *
268 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
269 *   one direction and POT in the other in the POT direction; on
270 *   older chips we can only repeat at all if the texture is POT in
271 *   both directions.
272 *
273 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
274 *   there is no padding in the texture. Textures with small POT widths
275 *   (1,2,4,8) thus can't be tiled.
276 */
277static Bool RADEONSetupSourceTile(PicturePtr pPict,
278				  PixmapPtr pPix,
279				  Bool canTile1d,
280				  Bool needMatchingPitch)
281{
282    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
283
284    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
285    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
286
287    if (pPict->repeat && pPict->repeatType != RepeatPad) {
288	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
289
290	int w = pPict->pDrawable->width;
291	int h = pPict->pDrawable->height;
292
293	if (pPict->transform) {
294	    if (badPitch)
295		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
296				 w, (unsigned)exaGetPixmapPitch(pPix)));
297	} else {
298	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
299	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
300
301	    if ((info->accel_state->need_src_tile_x ||
302		 info->accel_state->need_src_tile_y) &&
303		pPict->repeatType != RepeatNormal)
304		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
305
306	    if (!canTile1d)
307		info->accel_state->need_src_tile_x =
308		    info->accel_state->need_src_tile_y =
309		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
310	}
311
312	if (info->accel_state->need_src_tile_x)
313	    info->accel_state->src_tile_width = w;
314	if (info->accel_state->need_src_tile_y)
315	    info->accel_state->src_tile_height = h;
316    }
317
318    return TRUE;
319}
320
321/* R100-specific code */
322
323static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit)
324{
325    int w = pPict->pDrawable->width;
326    int h = pPict->pDrawable->height;
327    int i;
328
329    /* r100 limit should be 2048, there are issues with 2048
330     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
331     */
332
333    if ((w > 2047) || (h > 2047))
334	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
335
336    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
337	if (R100TexFormats[i].fmt == pPict->format)
338	    break;
339    }
340    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
341	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
342			(int)pPict->format));
343
344    if (!RADEONCheckTexturePOT(pPict, unit == 0))
345	return FALSE;
346
347    if (pPict->filter != PictFilterNearest &&
348	pPict->filter != PictFilterBilinear)
349    {
350	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
351    }
352
353    return TRUE;
354}
355
356#endif /* ONLY_ONCE */
357
358static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
359					int unit)
360{
361    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
362    uint32_t txfilter, txformat, txoffset, txpitch;
363    int w = pPict->pDrawable->width;
364    int h = pPict->pDrawable->height;
365    Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad &&
366	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
367    int i;
368    ACCEL_PREAMBLE();
369
370    txpitch = exaGetPixmapPitch(pPix);
371    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
372
373    if ((txoffset & 0x1f) != 0)
374	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
375    if ((txpitch & 0x1f) != 0)
376	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
377
378    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
379    {
380	if (R100TexFormats[i].fmt == pPict->format)
381	    break;
382    }
383    txformat = R100TexFormats[i].card_fmt;
384    if (RADEONPixmapIsColortiled(pPix))
385	txoffset |= RADEON_TXO_MACRO_TILE;
386
387    if (repeat) {
388	if (!RADEONPitchMatches(pPix))
389	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
390			     w, (unsigned)txpitch));
391
392	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
393	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
394    } else
395	txformat |= RADEON_TXFORMAT_NON_POWER2;
396    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
397
398    info->accel_state->texW[unit] = 1;
399    info->accel_state->texH[unit] = 1;
400
401    switch (pPict->filter) {
402    case PictFilterNearest:
403	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
404	break;
405    case PictFilterBilinear:
406	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
407	break;
408    default:
409	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
410    }
411
412    switch (pPict->repeatType) {
413	case RepeatNormal:
414	    txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
415	    break;
416	case RepeatPad:
417	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
418	    break;
419	case RepeatReflect:
420	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
421	    break;
422	case RepeatNone:
423	    /* don't set an illegal clamp mode for rects */
424	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
425		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
426	    break;
427    }
428
429    BEGIN_ACCEL(5);
430    if (unit == 0) {
431	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
432	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
433	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset);
434	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
435	    (pPix->drawable.width - 1) |
436	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
437	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
438    } else {
439	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
440	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
441	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset);
442	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
443	    (pPix->drawable.width - 1) |
444	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
445	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
446    }
447    FINISH_ACCEL();
448
449    if (pPict->transform != 0) {
450	info->accel_state->is_transform[unit] = TRUE;
451	info->accel_state->transform[unit] = pPict->transform;
452    } else {
453	info->accel_state->is_transform[unit] = FALSE;
454    }
455
456    return TRUE;
457}
458
459#ifdef ONLY_ONCE
460
461PixmapPtr
462RADEONGetDrawablePixmap(DrawablePtr pDrawable)
463{
464    if (pDrawable->type == DRAWABLE_WINDOW)
465	return pDrawable->pScreen->GetWindowPixmap((WindowPtr)pDrawable);
466    else
467	return (PixmapPtr)pDrawable;
468}
469
470static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
471			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
472{
473    PixmapPtr pSrcPixmap, pDstPixmap;
474    uint32_t tmp1;
475
476    /* Check for unsupported compositing operations. */
477    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
478	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
479
480    if (!pSrcPicture->pDrawable)
481	return FALSE;
482
483    /* r100 limit should be 2048, there are issues with 2048
484     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
485     */
486
487    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
488
489    if (pSrcPixmap->drawable.width > 2047 ||
490	pSrcPixmap->drawable.height > 2047) {
491	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
492			 pSrcPixmap->drawable.width,
493			 pSrcPixmap->drawable.height));
494    }
495
496    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
497
498    if (pDstPixmap->drawable.width > 2047 ||
499	pDstPixmap->drawable.height > 2047) {
500	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
501			 pDstPixmap->drawable.width,
502			 pDstPixmap->drawable.height));
503    }
504
505    if (pMaskPicture) {
506	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
507
508	if (pMaskPixmap->drawable.width > 2047 ||
509	    pMaskPixmap->drawable.height > 2047) {
510	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
511			     pMaskPixmap->drawable.width,
512			     pMaskPixmap->drawable.height));
513	}
514
515	if (pMaskPicture->componentAlpha) {
516	    /* Check if it's component alpha that relies on a source alpha and
517	     * on the source value.  We can only get one of those into the
518	     * single source value that we get to blend with.
519	     */
520	    if (RadeonBlendOp[op].src_alpha &&
521		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
522		RADEON_SRC_BLEND_GL_ZERO) {
523		RADEON_FALLBACK(("Component alpha not supported with source "
524				 "alpha and source value blending.\n"));
525	    }
526	}
527
528	if (!R100CheckCompositeTexture(pMaskPicture, 1))
529	    return FALSE;
530    }
531
532    if (!R100CheckCompositeTexture(pSrcPicture, 0))
533	return FALSE;
534
535    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
536	return FALSE;
537
538    return TRUE;
539}
540#endif /* ONLY_ONCE */
541
542static Bool FUNC_NAME(R100PrepareComposite)(int op,
543					    PicturePtr pSrcPicture,
544					    PicturePtr pMaskPicture,
545					    PicturePtr pDstPicture,
546					    PixmapPtr pSrc,
547					    PixmapPtr pMask,
548					    PixmapPtr pDst)
549{
550    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
551    uint32_t dst_format, dst_offset, dst_pitch, colorpitch;
552    uint32_t pp_cntl, blendcntl, cblend, ablend;
553    int pixel_shift;
554    ACCEL_PREAMBLE();
555
556    TRACE;
557
558    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
559	return FALSE;
560
561    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
562	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
563
564    if (pMask)
565	info->accel_state->has_mask = TRUE;
566    else
567	info->accel_state->has_mask = FALSE;
568
569    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
570
571    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
572    dst_pitch = exaGetPixmapPitch(pDst);
573    colorpitch = dst_pitch >> pixel_shift;
574    if (RADEONPixmapIsColortiled(pDst))
575	colorpitch |= RADEON_COLOR_TILE_ENABLE;
576
577    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
578    dst_pitch = exaGetPixmapPitch(pDst);
579    if ((dst_offset & 0x0f) != 0)
580	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
581    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
582	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
583
584    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
585	return FALSE;
586
587    RADEON_SWITCH_TO_3D();
588
589    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
590	return FALSE;
591    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
592
593    if (pMask != NULL) {
594	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
595	    return FALSE;
596	pp_cntl |= RADEON_TEX_1_ENABLE;
597    } else {
598	info->accel_state->is_transform[1] = FALSE;
599    }
600
601    BEGIN_ACCEL(8);
602    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
603    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
604    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
605    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
606
607    /* IN operator: Multiply src by mask components or mask alpha.
608     * BLEND_CTL_ADD is A * B + C.
609     * If a source is a8, we have to explicitly zero its color values.
610     * If the destination is a8, we have to route the alpha to red, I think.
611     * If we're doing component alpha where the source for blending is going to
612     * be the source alpha (and there's no source value used), we have to zero
613     * the source's color values.
614     */
615    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
616    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
617
618    if (pDstPicture->format == PICT_a8 ||
619	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
620    {
621	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
622    } else if (pSrcPicture->format == PICT_a8)
623	cblend |= RADEON_COLOR_ARG_A_ZERO;
624    else
625	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
626    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
627
628    if (pMask) {
629	if (pMaskPicture->componentAlpha &&
630	    pDstPicture->format != PICT_a8)
631	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
632	else
633	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
634	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
635    } else {
636	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
637	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
638    }
639
640    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
641    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
642    if (pMask)
643	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
644					  RADEON_SE_VTX_FMT_ST0 |
645					  RADEON_SE_VTX_FMT_ST1));
646    else
647	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
648					  RADEON_SE_VTX_FMT_ST0));
649    /* Op operator. */
650    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
651
652    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
653    FINISH_ACCEL();
654
655    return TRUE;
656}
657
658#ifdef ONLY_ONCE
659
660static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit)
661{
662    int w = pPict->pDrawable->width;
663    int h = pPict->pDrawable->height;
664    int i;
665
666    /* r200 limit should be 2048, there are issues with 2048
667     * see bug 19269
668     */
669
670    if ((w > 2047) || (h > 2047))
671	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
672
673    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
674    {
675	if (R200TexFormats[i].fmt == pPict->format)
676	    break;
677    }
678    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
679	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
680			 (int)pPict->format));
681
682    if (!RADEONCheckTexturePOT(pPict, unit == 0))
683	return FALSE;
684
685    if (pPict->filter != PictFilterNearest &&
686	pPict->filter != PictFilterBilinear)
687	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
688
689    return TRUE;
690}
691
692#endif /* ONLY_ONCE */
693
694static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
695					int unit)
696{
697    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
698    uint32_t txfilter, txformat, txoffset, txpitch;
699    int w = pPict->pDrawable->width;
700    int h = pPict->pDrawable->height;
701    Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad &&
702	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
703    int i;
704    ACCEL_PREAMBLE();
705
706    txpitch = exaGetPixmapPitch(pPix);
707    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
708
709    if ((txoffset & 0x1f) != 0)
710	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
711    if ((txpitch & 0x1f) != 0)
712	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
713
714    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
715    {
716	if (R200TexFormats[i].fmt == pPict->format)
717	    break;
718    }
719    txformat = R200TexFormats[i].card_fmt;
720    if (RADEONPixmapIsColortiled(pPix))
721	txoffset |= R200_TXO_MACRO_TILE;
722
723    if (repeat) {
724	if (!RADEONPitchMatches(pPix))
725	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
726			     w, (unsigned)txpitch));
727
728	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
729	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
730    } else
731	txformat |= R200_TXFORMAT_NON_POWER2;
732    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
733
734    info->accel_state->texW[unit] = w;
735    info->accel_state->texH[unit] = h;
736
737    switch (pPict->filter) {
738    case PictFilterNearest:
739	txfilter = (R200_MAG_FILTER_NEAREST |
740		    R200_MIN_FILTER_NEAREST);
741	break;
742    case PictFilterBilinear:
743	txfilter = (R200_MAG_FILTER_LINEAR |
744		    R200_MIN_FILTER_LINEAR);
745	break;
746    default:
747	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
748    }
749
750    switch (pPict->repeatType) {
751	case RepeatNormal:
752	    txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
753	    break;
754	case RepeatPad:
755	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
756	    break;
757	case RepeatReflect:
758	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
759	    break;
760	case RepeatNone:
761	    /* don't set an illegal clamp mode for rect textures */
762	    if (txformat & R200_TXFORMAT_NON_POWER2)
763		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
764	    break;
765    }
766
767    BEGIN_ACCEL(6);
768    if (unit == 0) {
769	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
770	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
771	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
772	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
773		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
774	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
775	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset);
776    } else {
777	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
778	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
779	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
780	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
781		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
782	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
783	OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset);
784    }
785    FINISH_ACCEL();
786
787    if (pPict->transform != 0) {
788	info->accel_state->is_transform[unit] = TRUE;
789	info->accel_state->transform[unit] = pPict->transform;
790    } else {
791	info->accel_state->is_transform[unit] = FALSE;
792    }
793
794    return TRUE;
795}
796
797#ifdef ONLY_ONCE
798static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
799			       PicturePtr pDstPicture)
800{
801    PixmapPtr pSrcPixmap, pDstPixmap;
802    uint32_t tmp1;
803
804    TRACE;
805
806    /* Check for unsupported compositing operations. */
807    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
808	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
809
810    if (!pSrcPicture->pDrawable)
811	return FALSE;
812
813    /* r200 limit should be 2048, there are issues with 2048
814     * see bug 19269
815     */
816
817    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
818
819    if (pSrcPixmap->drawable.width > 2047 ||
820	pSrcPixmap->drawable.height > 2047) {
821	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
822			 pSrcPixmap->drawable.width,
823			 pSrcPixmap->drawable.height));
824    }
825
826    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
827
828    if (pDstPixmap->drawable.width > 2047 ||
829	pDstPixmap->drawable.height > 2047) {
830	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
831			 pDstPixmap->drawable.width,
832			 pDstPixmap->drawable.height));
833    }
834
835    if (pMaskPicture) {
836	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
837
838	if (pMaskPixmap->drawable.width > 2047 ||
839	    pMaskPixmap->drawable.height > 2047) {
840	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
841			     pMaskPixmap->drawable.width,
842			     pMaskPixmap->drawable.height));
843	}
844
845	if (pMaskPicture->componentAlpha) {
846	    /* Check if it's component alpha that relies on a source alpha and
847	     * on the source value.  We can only get one of those into the
848	     * single source value that we get to blend with.
849	     */
850	    if (RadeonBlendOp[op].src_alpha &&
851		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
852		RADEON_SRC_BLEND_GL_ZERO) {
853		RADEON_FALLBACK(("Component alpha not supported with source "
854				 "alpha and source value blending.\n"));
855	    }
856	}
857
858	if (!R200CheckCompositeTexture(pMaskPicture, 1))
859	    return FALSE;
860    }
861
862    if (!R200CheckCompositeTexture(pSrcPicture, 0))
863	return FALSE;
864
865    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
866	return FALSE;
867
868    return TRUE;
869}
870#endif /* ONLY_ONCE */
871
872static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
873				PicturePtr pMaskPicture, PicturePtr pDstPicture,
874				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
875{
876    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
877    uint32_t dst_format, dst_offset, dst_pitch;
878    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
879    int pixel_shift;
880    ACCEL_PREAMBLE();
881
882    TRACE;
883
884    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
885	return FALSE;
886
887    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
888	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
889
890    if (pMask)
891	info->accel_state->has_mask = TRUE;
892    else
893	info->accel_state->has_mask = FALSE;
894
895    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
896
897    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
898    dst_pitch = exaGetPixmapPitch(pDst);
899    colorpitch = dst_pitch >> pixel_shift;
900    if (RADEONPixmapIsColortiled(pDst))
901	colorpitch |= RADEON_COLOR_TILE_ENABLE;
902
903    if ((dst_offset & 0x0f) != 0)
904	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
905    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
906	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
907
908    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
909	return FALSE;
910
911    RADEON_SWITCH_TO_3D();
912
913    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
914	return FALSE;
915    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
916
917    if (pMask != NULL) {
918	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
919	    return FALSE;
920	pp_cntl |= RADEON_TEX_1_ENABLE;
921    } else {
922	info->accel_state->is_transform[1] = FALSE;
923    }
924
925    BEGIN_ACCEL(11);
926
927    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
928    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
929    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
930
931    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
932    if (pMask)
933	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
934		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
935		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
936    else
937	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
938		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
939
940    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
941
942    /* IN operator: Multiply src by mask components or mask alpha.
943     * BLEND_CTL_ADD is A * B + C.
944     * If a picture is a8, we have to explicitly zero its color values.
945     * If the destination is a8, we have to route the alpha to red, I think.
946     * If we're doing component alpha where the source for blending is going to
947     * be the source alpha (and there's no source value used), we have to zero
948     * the source's color values.
949     */
950    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
951    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
952
953    if (pDstPicture->format == PICT_a8 ||
954	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
955    {
956	cblend |= R200_TXC_ARG_A_R0_ALPHA;
957    } else if (pSrcPicture->format == PICT_a8)
958	cblend |= R200_TXC_ARG_A_ZERO;
959    else
960	cblend |= R200_TXC_ARG_A_R0_COLOR;
961    ablend |= R200_TXA_ARG_A_R0_ALPHA;
962
963    if (pMask) {
964	if (pMaskPicture->componentAlpha &&
965	    pDstPicture->format != PICT_a8)
966	    cblend |= R200_TXC_ARG_B_R1_COLOR;
967	else
968	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
969	ablend |= R200_TXA_ARG_B_R1_ALPHA;
970    } else {
971	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
972	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
973    }
974
975    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
976    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
977	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
978    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
979    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
980	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
981
982    /* Op operator. */
983    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
984    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
985    FINISH_ACCEL();
986
987    return TRUE;
988}
989
990#ifdef ONLY_ONCE
991
992static Bool R300CheckCompositeTexture(PicturePtr pPict,
993				      PicturePtr pDstPict,
994				      int op,
995				      int unit,
996				      Bool is_r500)
997{
998    int w = pPict->pDrawable->width;
999    int h = pPict->pDrawable->height;
1000    int i;
1001    int max_tex_w, max_tex_h;
1002
1003    if (is_r500) {
1004	max_tex_w = 4096;
1005	max_tex_h = 4096;
1006    } else {
1007	max_tex_w = 2048;
1008	max_tex_h = 2048;
1009    }
1010
1011    if ((w > max_tex_w) || (h > max_tex_h))
1012	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1013
1014    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1015    {
1016	if (R300TexFormats[i].fmt == pPict->format)
1017	    break;
1018    }
1019    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1020	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1021			 (int)pPict->format));
1022
1023    if (!RADEONCheckTexturePOT(pPict, unit == 0))
1024	return FALSE;
1025
1026    if (pPict->filter != PictFilterNearest &&
1027	pPict->filter != PictFilterBilinear)
1028	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1029
1030    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1031     * picture results in alpha=0 pixels. We can implement this with a border color
1032     * *if* our source texture has an alpha channel, otherwise we need to fall
1033     * back. If we're not transformed then we hope that upper layers have clipped
1034     * rendering to the bounds of the source drawable, in which case it doesn't
1035     * matter. I have not, however, verified that the X server always does such
1036     * clipping.
1037     */
1038    if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
1039	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1040	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1041    }
1042
1043    return TRUE;
1044}
1045
1046#endif /* ONLY_ONCE */
1047
1048static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
1049					int unit)
1050{
1051    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1052    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
1053    int w = pPict->pDrawable->width;
1054    int h = pPict->pDrawable->height;
1055    int i, pixel_shift;
1056    ACCEL_PREAMBLE();
1057
1058    TRACE;
1059
1060    txpitch = exaGetPixmapPitch(pPix);
1061    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
1062
1063    if ((txoffset & 0x1f) != 0)
1064	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
1065    if ((txpitch & 0x1f) != 0)
1066	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1067
1068    /* TXPITCH = pixels (texels) per line - 1 */
1069    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1070    txpitch >>= pixel_shift;
1071    txpitch -= 1;
1072
1073    if (RADEONPixmapIsColortiled(pPix))
1074	txoffset |= R300_MACRO_TILE;
1075
1076    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1077    {
1078	if (R300TexFormats[i].fmt == pPict->format)
1079	    break;
1080    }
1081
1082    txformat1 = R300TexFormats[i].card_fmt;
1083
1084    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1085		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1086
1087    if (IS_R500_3D && ((w - 1) & 0x800))
1088	txpitch |= R500_TXWIDTH_11;
1089
1090    if (IS_R500_3D && ((h - 1) & 0x800))
1091	txpitch |= R500_TXHEIGHT_11;
1092
1093    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1094     * omit this if there is no padding, but there is no apparent advantage
1095     * in doing so.
1096     */
1097    txformat0 |= R300_TXPITCH_EN;
1098
1099    info->accel_state->texW[unit] = w;
1100    info->accel_state->texH[unit] = h;
1101
1102    txfilter = (unit << R300_TX_ID_SHIFT);
1103
1104    if (pPict->repeat) {
1105	switch (pPict->repeatType) {
1106	case RepeatNormal:
1107	    if (unit != 0 || !info->accel_state->need_src_tile_x)
1108		txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1109	    else
1110		txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1111
1112	    if (unit != 0 || !info->accel_state->need_src_tile_y)
1113		txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1114	    else
1115		txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1116
1117	    break;
1118	case RepeatPad:
1119	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1120		        R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1121	    break;
1122	case RepeatReflect:
1123	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1124		        R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1125	    break;
1126	case RepeatNone:
1127	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1128		        R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1129	    break;
1130	}
1131    } else
1132	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1133	            R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1134
1135    switch (pPict->filter) {
1136    case PictFilterNearest:
1137	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1138	break;
1139    case PictFilterBilinear:
1140	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1141	break;
1142    default:
1143	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1144    }
1145
1146    BEGIN_ACCEL(pPict->repeat ? 6 : 7);
1147    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1148    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1149    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1150    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1151    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1152    OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset);
1153    if (!pPict->repeat)
1154	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1155    FINISH_ACCEL();
1156
1157    if (pPict->transform != 0) {
1158	info->accel_state->is_transform[unit] = TRUE;
1159	info->accel_state->transform[unit] = pPict->transform;
1160    } else {
1161	info->accel_state->is_transform[unit] = FALSE;
1162    }
1163
1164    return TRUE;
1165}
1166
1167#ifdef ONLY_ONCE
1168
1169static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1170			       PicturePtr pDstPicture)
1171{
1172    uint32_t tmp1;
1173    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1174    PixmapPtr pSrcPixmap, pDstPixmap;
1175    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1176    RADEONInfoPtr info = RADEONPTR(pScrn);
1177    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1178
1179    TRACE;
1180
1181    /* Check for unsupported compositing operations. */
1182    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1183	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1184
1185    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1186
1187    if (IS_R500_3D) {
1188	max_tex_w = 4096;
1189	max_tex_h = 4096;
1190	max_dst_w = 4096;
1191	max_dst_h = 4096;
1192    } else {
1193	max_tex_w = 2048;
1194	max_tex_h = 2048;
1195	max_dst_w = 2560;
1196	max_dst_h = 2560;
1197    }
1198
1199    if (pSrcPixmap->drawable.width > max_tex_w ||
1200	pSrcPixmap->drawable.height > max_tex_h) {
1201	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1202			 pSrcPixmap->drawable.width,
1203			 pSrcPixmap->drawable.height));
1204    }
1205
1206    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1207
1208    if (pDstPixmap->drawable.width > max_dst_w ||
1209	pDstPixmap->drawable.height > max_dst_h) {
1210	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1211			 pDstPixmap->drawable.width,
1212			 pDstPixmap->drawable.height));
1213    }
1214
1215    if (pMaskPicture) {
1216	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1217
1218	if (pMaskPixmap->drawable.width > max_tex_w ||
1219	    pMaskPixmap->drawable.height > max_tex_h) {
1220	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1221			     pMaskPixmap->drawable.width,
1222			     pMaskPixmap->drawable.height));
1223	}
1224
1225	if (pMaskPicture->componentAlpha) {
1226	    /* Check if it's component alpha that relies on a source alpha and
1227	     * on the source value.  We can only get one of those into the
1228	     * single source value that we get to blend with.
1229	     */
1230	    if (RadeonBlendOp[op].src_alpha &&
1231		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1232		RADEON_SRC_BLEND_GL_ZERO) {
1233		RADEON_FALLBACK(("Component alpha not supported with source "
1234				 "alpha and source value blending.\n"));
1235	    }
1236	}
1237
1238	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1239	    return FALSE;
1240    }
1241
1242    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1243	return FALSE;
1244
1245    if (!R300GetDestFormat(pDstPicture, &tmp1))
1246	return FALSE;
1247
1248    return TRUE;
1249
1250}
1251#endif /* ONLY_ONCE */
1252
1253static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1254				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1255				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1256{
1257    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1258    uint32_t dst_format, dst_offset, dst_pitch;
1259    uint32_t txenable, colorpitch;
1260    uint32_t blendcntl;
1261    int pixel_shift;
1262    ACCEL_PREAMBLE();
1263
1264    TRACE;
1265
1266    if (!R300GetDestFormat(pDstPicture, &dst_format))
1267	return FALSE;
1268
1269    if (pMask)
1270	info->accel_state->has_mask = TRUE;
1271    else
1272	info->accel_state->has_mask = FALSE;
1273
1274    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1275
1276    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1277    dst_pitch = exaGetPixmapPitch(pDst);
1278    colorpitch = dst_pitch >> pixel_shift;
1279
1280    if (RADEONPixmapIsColortiled(pDst))
1281	colorpitch |= R300_COLORTILE;
1282
1283    colorpitch |= dst_format;
1284
1285    if ((dst_offset & 0x0f) != 0)
1286	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
1287    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1288	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1289
1290    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1291	return FALSE;
1292
1293    RADEON_SWITCH_TO_3D();
1294
1295    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1296	return FALSE;
1297    txenable = R300_TEX_0_ENABLE;
1298
1299    if (pMask != NULL) {
1300	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1301	    return FALSE;
1302	txenable |= R300_TEX_1_ENABLE;
1303    } else {
1304	info->accel_state->is_transform[1] = FALSE;
1305    }
1306
1307    /* setup the VAP */
1308    if (info->accel_state->has_tcl) {
1309	if (pMask)
1310	    BEGIN_ACCEL(8);
1311	else
1312	    BEGIN_ACCEL(7);
1313    } else {
1314	if (pMask)
1315	    BEGIN_ACCEL(6);
1316	else
1317	    BEGIN_ACCEL(5);
1318    }
1319
1320    /* These registers define the number, type, and location of data submitted
1321     * to the PVS unit of GA input (when PVS is disabled)
1322     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1323     * enabled.  This memory provides the imputs to the vertex shader program
1324     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1325     * directly to the GA input memory and the order is signifigant.  In
1326     * PVS_BYPASS mode the order is as follows:
1327     * Position
1328     * Point Size
1329     * Color 0-3
1330     * Textures 0-7
1331     * Fog
1332     */
1333    if (pMask) {
1334	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1335		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1336		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1337		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1338		       R300_SIGNED_0 |
1339		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1340		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1341		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1342		       R300_SIGNED_1));
1343	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1344		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1345		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1346		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1347		       R300_LAST_VEC_2 |
1348		       R300_SIGNED_2));
1349    } else
1350	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1351		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1352		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1353		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1354		       R300_SIGNED_0 |
1355		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1356		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1357		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1358		       R300_LAST_VEC_1 |
1359		       R300_SIGNED_1));
1360
1361    /* load the vertex shader
1362     * We pre-load vertex programs in RADEONInit3DEngine():
1363     * - exa no mask
1364     * - exa mask
1365     * - Xv
1366     * Here we select the offset of the vertex program we want to use
1367     */
1368    if (info->accel_state->has_tcl) {
1369	if (pMask) {
1370	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1371			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1372			   (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1373			   (2 << R300_PVS_LAST_INST_SHIFT)));
1374	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1375			  (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1376	} else {
1377	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1378			  ((3 << R300_PVS_FIRST_INST_SHIFT) |
1379			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1380			   (4 << R300_PVS_LAST_INST_SHIFT)));
1381	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1382			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1383	}
1384    }
1385
1386    /* Position and one or two sets of 2 texture coordinates */
1387    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1388    if (pMask)
1389	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1390		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1391		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1392    else
1393	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1394		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1395
1396    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1397    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1398    FINISH_ACCEL();
1399
1400    /* setup pixel shader */
1401    if (IS_R300_3D) {
1402	uint32_t output_fmt;
1403	int src_color, src_alpha;
1404	int mask_color, mask_alpha;
1405
1406	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1407	    src_color = R300_ALU_RGB_0_0;
1408	else
1409	    src_color = R300_ALU_RGB_SRC0_RGB;
1410
1411	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1412	    src_alpha = R300_ALU_ALPHA_1_0;
1413	else
1414	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1415
1416	if (pMask && pMaskPicture->componentAlpha) {
1417	    if (RadeonBlendOp[op].src_alpha) {
1418		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
1419		    src_color = R300_ALU_RGB_1_0;
1420		    src_alpha = R300_ALU_ALPHA_1_0;
1421		} else {
1422		    src_color = R300_ALU_RGB_SRC0_AAA;
1423		    src_alpha = R300_ALU_ALPHA_SRC0_A;
1424		}
1425
1426		mask_color = R300_ALU_RGB_SRC1_RGB;
1427
1428		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1429		    mask_alpha = R300_ALU_ALPHA_1_0;
1430		else
1431		    mask_alpha = R300_ALU_ALPHA_SRC1_A;
1432
1433	    } else {
1434		src_color = R300_ALU_RGB_SRC0_RGB;
1435
1436		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1437		    src_alpha = R300_ALU_ALPHA_1_0;
1438		else
1439		    src_alpha = R300_ALU_ALPHA_SRC0_A;
1440
1441		mask_color = R300_ALU_RGB_SRC1_RGB;
1442
1443		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1444		    mask_alpha = R300_ALU_ALPHA_1_0;
1445		else
1446		    mask_alpha = R300_ALU_ALPHA_SRC1_A;
1447
1448	    }
1449	} else if (pMask) {
1450	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1451		mask_color = R300_ALU_RGB_1_0;
1452	    else
1453		mask_color = R300_ALU_RGB_SRC1_AAA;
1454
1455	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1456		mask_alpha = R300_ALU_ALPHA_1_0;
1457	    else
1458		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1459	} else {
1460	    mask_color = R300_ALU_RGB_1_0;
1461	    mask_alpha = R300_ALU_ALPHA_1_0;
1462	}
1463
1464	/* shader output swizzling */
1465	switch (pDstPicture->format) {
1466	case PICT_a8r8g8b8:
1467	case PICT_x8r8g8b8:
1468	default:
1469	    output_fmt = (R300_OUT_FMT_C4_8 |
1470			  R300_OUT_FMT_C0_SEL_BLUE |
1471			  R300_OUT_FMT_C1_SEL_GREEN |
1472			  R300_OUT_FMT_C2_SEL_RED |
1473			  R300_OUT_FMT_C3_SEL_ALPHA);
1474	    break;
1475	case PICT_a8b8g8r8:
1476	case PICT_x8b8g8r8:
1477	    output_fmt = (R300_OUT_FMT_C4_8 |
1478			  R300_OUT_FMT_C0_SEL_RED |
1479			  R300_OUT_FMT_C1_SEL_GREEN |
1480			  R300_OUT_FMT_C2_SEL_BLUE |
1481			  R300_OUT_FMT_C3_SEL_ALPHA);
1482	    break;
1483	case PICT_a8:
1484	    output_fmt = (R300_OUT_FMT_C4_8 |
1485			  R300_OUT_FMT_C0_SEL_ALPHA);
1486	    break;
1487	}
1488
1489
1490	/* setup the rasterizer, load FS */
1491	if (pMask) {
1492	    BEGIN_ACCEL(16);
1493	    /* 4 components: 2 for tex0, 2 for tex1 */
1494	    OUT_ACCEL_REG(R300_RS_COUNT,
1495			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1496			   R300_RS_COUNT_HIRES_EN));
1497
1498	    /* R300_INST_COUNT_RS - highest RS instruction used */
1499	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1500
1501	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1502						R300_ALU_CODE_SIZE(0) |
1503						R300_TEX_CODE_OFFSET(0) |
1504						R300_TEX_CODE_SIZE(1)));
1505
1506	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1507			  (R300_ALU_START(0) |
1508			   R300_ALU_SIZE(0) |
1509			   R300_TEX_START(0) |
1510			   R300_TEX_SIZE(1) |
1511			   R300_RGBA_OUT));
1512
1513
1514	} else {
1515	    BEGIN_ACCEL(15);
1516	    /* 2 components: 2 for tex0 */
1517	    OUT_ACCEL_REG(R300_RS_COUNT,
1518			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1519			   R300_RS_COUNT_HIRES_EN));
1520
1521	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1522
1523	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1524						R300_ALU_CODE_SIZE(0) |
1525						R300_TEX_CODE_OFFSET(0) |
1526						R300_TEX_CODE_SIZE(0)));
1527
1528	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1529			  (R300_ALU_START(0) |
1530			   R300_ALU_SIZE(0) |
1531			   R300_TEX_START(0) |
1532			   R300_TEX_SIZE(0) |
1533			   R300_RGBA_OUT));
1534
1535	}
1536
1537	OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1538	OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
1539		      (R300_ALU_START(0) |
1540		       R300_ALU_SIZE(0) |
1541		       R300_TEX_START(0) |
1542		       R300_TEX_SIZE(0)));
1543	OUT_ACCEL_REG(R300_US_CODE_ADDR_1,
1544		      (R300_ALU_START(0) |
1545		       R300_ALU_SIZE(0) |
1546		       R300_TEX_START(0) |
1547		       R300_TEX_SIZE(0)));
1548	OUT_ACCEL_REG(R300_US_CODE_ADDR_2,
1549		      (R300_ALU_START(0) |
1550		       R300_ALU_SIZE(0) |
1551		       R300_TEX_START(0) |
1552		       R300_TEX_SIZE(0)));
1553
1554	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1555	/* shader output swizzling */
1556	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1557
1558	/* tex inst for src texture */
1559	OUT_ACCEL_REG(R300_US_TEX_INST(0),
1560		      (R300_TEX_SRC_ADDR(0) |
1561		       R300_TEX_DST_ADDR(0) |
1562		       R300_TEX_ID(0) |
1563		       R300_TEX_INST(R300_TEX_INST_LD)));
1564
1565	if (pMask) {
1566	    /* tex inst for mask texture */
1567	    OUT_ACCEL_REG(R300_US_TEX_INST(1),
1568			  (R300_TEX_SRC_ADDR(1) |
1569			   R300_TEX_DST_ADDR(1) |
1570			   R300_TEX_ID(1) |
1571			   R300_TEX_INST(R300_TEX_INST_LD)));
1572	}
1573
1574	/* RGB inst
1575	 * temp addresses for texture inputs
1576	 * ALU_RGB_ADDR0 is src tex (temp 0)
1577	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1578	 * R300_ALU_RGB_OMASK - output components to write
1579	 * R300_ALU_RGB_TARGET_A - render target
1580	 */
1581	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0),
1582		      (R300_ALU_RGB_ADDR0(0) |
1583		       R300_ALU_RGB_ADDR1(1) |
1584		       R300_ALU_RGB_ADDR2(0) |
1585		       R300_ALU_RGB_ADDRD(0) |
1586		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1587					   R300_ALU_RGB_MASK_G |
1588					   R300_ALU_RGB_MASK_B)) |
1589		       R300_ALU_RGB_TARGET_A));
1590	/* RGB inst
1591	 * ALU operation
1592	 */
1593	OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0),
1594		      (R300_ALU_RGB_SEL_A(src_color) |
1595		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1596		       R300_ALU_RGB_SEL_B(mask_color) |
1597		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1598		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1599		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1600		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1601		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1602		       R300_ALU_RGB_CLAMP));
1603	/* Alpha inst
1604	 * temp addresses for texture inputs
1605	 * ALU_ALPHA_ADDR0 is src tex (0)
1606	 * ALU_ALPHA_ADDR1 is mask tex (1)
1607	 * R300_ALU_ALPHA_OMASK - output components to write
1608	 * R300_ALU_ALPHA_TARGET_A - render target
1609	 */
1610	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0),
1611		      (R300_ALU_ALPHA_ADDR0(0) |
1612		       R300_ALU_ALPHA_ADDR1(1) |
1613		       R300_ALU_ALPHA_ADDR2(0) |
1614		       R300_ALU_ALPHA_ADDRD(0) |
1615		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1616		       R300_ALU_ALPHA_TARGET_A |
1617		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1618	/* Alpha inst
1619	 * ALU operation
1620	 */
1621	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0),
1622		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1623		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1624		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1625		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1626		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1627		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1628		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1629		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1630		       R300_ALU_ALPHA_CLAMP));
1631	FINISH_ACCEL();
1632    } else {
1633	uint32_t output_fmt;
1634	uint32_t src_color, src_alpha;
1635	uint32_t mask_color, mask_alpha;
1636
1637	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1638	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1639			 R500_ALU_RGB_G_SWIZ_A_0 |
1640			 R500_ALU_RGB_B_SWIZ_A_0);
1641	else
1642	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1643			 R500_ALU_RGB_G_SWIZ_A_G |
1644			 R500_ALU_RGB_B_SWIZ_A_B);
1645
1646	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1647	    src_alpha = R500_ALPHA_SWIZ_A_1;
1648	else
1649	    src_alpha = R500_ALPHA_SWIZ_A_A;
1650
1651	if (pMask && pMaskPicture->componentAlpha) {
1652	    if (RadeonBlendOp[op].src_alpha) {
1653		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
1654		    src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1655				 R500_ALU_RGB_G_SWIZ_A_1 |
1656				 R500_ALU_RGB_B_SWIZ_A_1);
1657		    src_alpha = R500_ALPHA_SWIZ_A_1;
1658		} else {
1659		    src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1660				 R500_ALU_RGB_G_SWIZ_A_A |
1661				 R500_ALU_RGB_B_SWIZ_A_A);
1662		    src_alpha = R500_ALPHA_SWIZ_A_A;
1663		}
1664
1665		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1666			      R500_ALU_RGB_G_SWIZ_B_G |
1667			      R500_ALU_RGB_B_SWIZ_B_B);
1668
1669		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1670		    mask_alpha = R500_ALPHA_SWIZ_B_1;
1671		else
1672		    mask_alpha = R500_ALPHA_SWIZ_B_A;
1673
1674	    } else {
1675		src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1676			     R500_ALU_RGB_G_SWIZ_A_G |
1677			     R500_ALU_RGB_B_SWIZ_A_B);
1678
1679		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1680		    src_alpha = R500_ALPHA_SWIZ_A_1;
1681		else
1682		    src_alpha = R500_ALPHA_SWIZ_A_A;
1683
1684		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1685			      R500_ALU_RGB_G_SWIZ_B_G |
1686			      R500_ALU_RGB_B_SWIZ_B_B);
1687
1688		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1689		    mask_alpha = R500_ALPHA_SWIZ_B_1;
1690		else
1691		    mask_alpha = R500_ALPHA_SWIZ_B_A;
1692
1693	    }
1694	} else if (pMask) {
1695	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1696		mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1697			      R500_ALU_RGB_G_SWIZ_B_1 |
1698			      R500_ALU_RGB_B_SWIZ_B_1);
1699	    else
1700		mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1701			      R500_ALU_RGB_G_SWIZ_B_A |
1702			      R500_ALU_RGB_B_SWIZ_B_A);
1703
1704	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1705		mask_alpha = R500_ALPHA_SWIZ_B_1;
1706	    else
1707		mask_alpha = R500_ALPHA_SWIZ_B_A;
1708	} else {
1709	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1710			  R500_ALU_RGB_G_SWIZ_B_1 |
1711			  R500_ALU_RGB_B_SWIZ_B_1);
1712	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1713	}
1714
1715	/* shader output swizzling */
1716	switch (pDstPicture->format) {
1717	case PICT_a8r8g8b8:
1718	case PICT_x8r8g8b8:
1719	default:
1720	    output_fmt = (R300_OUT_FMT_C4_8 |
1721			  R300_OUT_FMT_C0_SEL_BLUE |
1722			  R300_OUT_FMT_C1_SEL_GREEN |
1723			  R300_OUT_FMT_C2_SEL_RED |
1724			  R300_OUT_FMT_C3_SEL_ALPHA);
1725	    break;
1726	case PICT_a8b8g8r8:
1727	case PICT_x8b8g8r8:
1728	    output_fmt = (R300_OUT_FMT_C4_8 |
1729			  R300_OUT_FMT_C0_SEL_RED |
1730			  R300_OUT_FMT_C1_SEL_GREEN |
1731			  R300_OUT_FMT_C2_SEL_BLUE |
1732			  R300_OUT_FMT_C3_SEL_ALPHA);
1733	    break;
1734	case PICT_a8:
1735	    output_fmt = (R300_OUT_FMT_C4_8 |
1736			  R300_OUT_FMT_C0_SEL_ALPHA);
1737	    break;
1738	}
1739
1740	BEGIN_ACCEL(7);
1741	if (pMask) {
1742	    /* 4 components: 2 for tex0, 2 for tex1 */
1743	    OUT_ACCEL_REG(R300_RS_COUNT,
1744			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1745			   R300_RS_COUNT_HIRES_EN));
1746
1747	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1748	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1749
1750	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1751					      R500_US_CODE_END_ADDR(2)));
1752	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1753					       R500_US_CODE_RANGE_SIZE(2)));
1754	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1755	} else {
1756	    OUT_ACCEL_REG(R300_RS_COUNT,
1757			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1758			   R300_RS_COUNT_HIRES_EN));
1759
1760	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1761
1762	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1763					      R500_US_CODE_END_ADDR(1)));
1764	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1765					       R500_US_CODE_RANGE_SIZE(1)));
1766	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1767	}
1768
1769	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1770	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1771	FINISH_ACCEL();
1772
1773	if (pMask) {
1774	    BEGIN_ACCEL(19);
1775	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1776	    /* tex inst for src texture */
1777	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1778						   R500_INST_RGB_WMASK_R |
1779						   R500_INST_RGB_WMASK_G |
1780						   R500_INST_RGB_WMASK_B |
1781						   R500_INST_ALPHA_WMASK |
1782						   R500_INST_RGB_CLAMP |
1783						   R500_INST_ALPHA_CLAMP));
1784
1785	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1786						   R500_TEX_INST_LD |
1787						   R500_TEX_IGNORE_UNCOVERED));
1788
1789	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1790						   R500_TEX_SRC_S_SWIZ_R |
1791						   R500_TEX_SRC_T_SWIZ_G |
1792						   R500_TEX_DST_ADDR(0) |
1793						   R500_TEX_DST_R_SWIZ_R |
1794						   R500_TEX_DST_G_SWIZ_G |
1795						   R500_TEX_DST_B_SWIZ_B |
1796						   R500_TEX_DST_A_SWIZ_A));
1797	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1798						   R500_DX_S_SWIZ_R |
1799						   R500_DX_T_SWIZ_R |
1800						   R500_DX_R_SWIZ_R |
1801						   R500_DX_Q_SWIZ_R |
1802						   R500_DY_ADDR(0) |
1803						   R500_DY_S_SWIZ_R |
1804						   R500_DY_T_SWIZ_R |
1805						   R500_DY_R_SWIZ_R |
1806						   R500_DY_Q_SWIZ_R));
1807	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1808	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1809
1810	    /* tex inst for mask texture */
1811	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1812						   R500_INST_TEX_SEM_WAIT |
1813						   R500_INST_RGB_WMASK_R |
1814						   R500_INST_RGB_WMASK_G |
1815						   R500_INST_RGB_WMASK_B |
1816						   R500_INST_ALPHA_WMASK |
1817						   R500_INST_RGB_CLAMP |
1818						   R500_INST_ALPHA_CLAMP));
1819
1820	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1821						   R500_TEX_INST_LD |
1822						   R500_TEX_SEM_ACQUIRE |
1823						   R500_TEX_IGNORE_UNCOVERED));
1824
1825	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1826						   R500_TEX_SRC_S_SWIZ_R |
1827						   R500_TEX_SRC_T_SWIZ_G |
1828						   R500_TEX_DST_ADDR(1) |
1829						   R500_TEX_DST_R_SWIZ_R |
1830						   R500_TEX_DST_G_SWIZ_G |
1831						   R500_TEX_DST_B_SWIZ_B |
1832						   R500_TEX_DST_A_SWIZ_A));
1833	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1834						   R500_DX_S_SWIZ_R |
1835						   R500_DX_T_SWIZ_R |
1836						   R500_DX_R_SWIZ_R |
1837						   R500_DX_Q_SWIZ_R |
1838						   R500_DY_ADDR(1) |
1839						   R500_DY_S_SWIZ_R |
1840						   R500_DY_T_SWIZ_R |
1841						   R500_DY_R_SWIZ_R |
1842						   R500_DY_Q_SWIZ_R));
1843	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1844	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1845	} else {
1846	    BEGIN_ACCEL(13);
1847	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1848	    /* tex inst for src texture */
1849	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1850						   R500_INST_TEX_SEM_WAIT |
1851						   R500_INST_RGB_WMASK_R |
1852						   R500_INST_RGB_WMASK_G |
1853						   R500_INST_RGB_WMASK_B |
1854						   R500_INST_ALPHA_WMASK |
1855						   R500_INST_RGB_CLAMP |
1856						   R500_INST_ALPHA_CLAMP));
1857
1858	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1859						   R500_TEX_INST_LD |
1860						   R500_TEX_SEM_ACQUIRE |
1861						   R500_TEX_IGNORE_UNCOVERED));
1862
1863	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1864						   R500_TEX_SRC_S_SWIZ_R |
1865						   R500_TEX_SRC_T_SWIZ_G |
1866						   R500_TEX_DST_ADDR(0) |
1867						   R500_TEX_DST_R_SWIZ_R |
1868						   R500_TEX_DST_G_SWIZ_G |
1869						   R500_TEX_DST_B_SWIZ_B |
1870						   R500_TEX_DST_A_SWIZ_A));
1871	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1872						   R500_DX_S_SWIZ_R |
1873						   R500_DX_T_SWIZ_R |
1874						   R500_DX_R_SWIZ_R |
1875						   R500_DX_Q_SWIZ_R |
1876						   R500_DY_ADDR(0) |
1877						   R500_DY_S_SWIZ_R |
1878						   R500_DY_T_SWIZ_R |
1879						   R500_DY_R_SWIZ_R |
1880						   R500_DY_Q_SWIZ_R));
1881	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1882	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1883	}
1884
1885	/* ALU inst */
1886	/* *_OMASK* - output component write mask */
1887	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
1888					       R500_INST_TEX_SEM_WAIT |
1889					       R500_INST_LAST |
1890					       R500_INST_RGB_OMASK_R |
1891					       R500_INST_RGB_OMASK_G |
1892					       R500_INST_RGB_OMASK_B |
1893					       R500_INST_ALPHA_OMASK |
1894					       R500_INST_RGB_CLAMP |
1895					       R500_INST_ALPHA_CLAMP));
1896	/* ALU inst
1897	 * temp addresses for texture inputs
1898	 * RGB_ADDR0 is src tex (temp 0)
1899	 * RGB_ADDR1 is mask tex (temp 1)
1900	 */
1901	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
1902					       R500_RGB_ADDR1(1) |
1903					       R500_RGB_ADDR2(0)));
1904	/* ALU inst
1905	 * temp addresses for texture inputs
1906	 * ALPHA_ADDR0 is src tex (temp 0)
1907	 * ALPHA_ADDR1 is mask tex (temp 1)
1908	 */
1909	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
1910					       R500_ALPHA_ADDR1(1) |
1911					       R500_ALPHA_ADDR2(0)));
1912
1913	/* R500_ALU_RGB_TARGET - RGB render target */
1914	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
1915					       src_color |
1916					       R500_ALU_RGB_SEL_B_SRC1 |
1917					       mask_color |
1918					       R500_ALU_RGB_TARGET(0)));
1919
1920	/* R500_ALPHA_RGB_TARGET - alpha render target */
1921	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
1922					       R500_ALPHA_ADDRD(0) |
1923					       R500_ALPHA_SEL_A_SRC0 |
1924					       src_alpha |
1925					       R500_ALPHA_SEL_B_SRC1 |
1926					       mask_alpha |
1927					       R500_ALPHA_TARGET(0)));
1928
1929	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
1930					       R500_ALU_RGBA_ADDRD(0) |
1931					       R500_ALU_RGBA_R_SWIZ_0 |
1932					       R500_ALU_RGBA_G_SWIZ_0 |
1933					       R500_ALU_RGBA_B_SWIZ_0 |
1934					       R500_ALU_RGBA_A_SWIZ_0));
1935	FINISH_ACCEL();
1936    }
1937
1938    /* Clear out scissoring */
1939    BEGIN_ACCEL(2);
1940    if (IS_R300_3D)
1941	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
1942					 (1440 << R300_SCISSOR_Y_SHIFT)));
1943    else
1944	OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
1945					 (0 << R300_SCISSOR_Y_SHIFT)));
1946    OUT_ACCEL_REG(R300_SC_SCISSOR1, ((8191 << R300_SCISSOR_X_SHIFT) |
1947				     (8191 << R300_SCISSOR_Y_SHIFT)));
1948    FINISH_ACCEL();
1949
1950    BEGIN_ACCEL(3);
1951
1952    OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
1953    OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
1954
1955    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1956    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
1957
1958    FINISH_ACCEL();
1959
1960    BEGIN_ACCEL(1);
1961    if (info->accel_state->has_mask)
1962	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6);
1963    else
1964	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4);
1965    FINISH_ACCEL();
1966
1967    return TRUE;
1968}
1969
1970
1971#ifdef ACCEL_CP
1972
1973#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
1974do {								\
1975    OUT_RING_F(_dstX);						\
1976    OUT_RING_F(_dstY);						\
1977    OUT_RING_F(_srcX);						\
1978    OUT_RING_F(_srcY);						\
1979    OUT_RING_F(_maskX);						\
1980    OUT_RING_F(_maskY);						\
1981} while (0)
1982
1983#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
1984do {								\
1985    OUT_RING_F(_dstX);						\
1986    OUT_RING_F(_dstY);						\
1987    OUT_RING_F(_srcX);						\
1988    OUT_RING_F(_srcY);						\
1989} while (0)
1990
1991#else /* ACCEL_CP */
1992
1993#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
1994do {								\
1995    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
1996    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
1997    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
1998    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
1999    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
2000    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
2001} while (0)
2002
2003#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2004do {								\
2005    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
2006    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
2007    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2008    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2009} while (0)
2010
2011#endif /* !ACCEL_CP */
2012
2013#ifdef ONLY_ONCE
2014static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2015{
2016    PictVector v;
2017    v.vector[0] = point->x;
2018    v.vector[1] = point->y;
2019    v.vector[2] = xFixed1;
2020    PictureTransformPoint(transform, &v);
2021    point->x = v.vector[0];
2022    point->y = v.vector[1];
2023}
2024#endif
2025
2026static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
2027					   RADEONInfoPtr info,
2028					   PixmapPtr pDst,
2029					   int srcX, int srcY,
2030					   int maskX, int maskY,
2031					   int dstX, int dstY,
2032					   int w, int h)
2033{
2034    int vtx_count;
2035    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2036    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2037    ACCEL_PREAMBLE();
2038
2039    ENTER_DRAW(0);
2040
2041    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2042       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2043
2044    srcTopLeft.x     = IntToxFixed(srcX);
2045    srcTopLeft.y     = IntToxFixed(srcY);
2046    srcTopRight.x    = IntToxFixed(srcX + w);
2047    srcTopRight.y    = IntToxFixed(srcY);
2048    srcBottomLeft.x  = IntToxFixed(srcX);
2049    srcBottomLeft.y  = IntToxFixed(srcY + h);
2050    srcBottomRight.x = IntToxFixed(srcX + w);
2051    srcBottomRight.y = IntToxFixed(srcY + h);
2052
2053    if (info->accel_state->is_transform[0]) {
2054	transformPoint(info->accel_state->transform[0], &srcTopLeft);
2055	transformPoint(info->accel_state->transform[0], &srcTopRight);
2056	transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2057	transformPoint(info->accel_state->transform[0], &srcBottomRight);
2058    }
2059
2060    if (info->accel_state->has_mask) {
2061	maskTopLeft.x     = IntToxFixed(maskX);
2062	maskTopLeft.y     = IntToxFixed(maskY);
2063	maskTopRight.x    = IntToxFixed(maskX + w);
2064	maskTopRight.y    = IntToxFixed(maskY);
2065	maskBottomLeft.x  = IntToxFixed(maskX);
2066	maskBottomLeft.y  = IntToxFixed(maskY + h);
2067	maskBottomRight.x = IntToxFixed(maskX + w);
2068	maskBottomRight.y = IntToxFixed(maskY + h);
2069
2070	if (info->accel_state->is_transform[1]) {
2071	    transformPoint(info->accel_state->transform[1], &maskTopLeft);
2072	    transformPoint(info->accel_state->transform[1], &maskTopRight);
2073	    transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2074	    transformPoint(info->accel_state->transform[1], &maskBottomRight);
2075	}
2076
2077	vtx_count = 6;
2078    } else
2079	vtx_count = 4;
2080
2081    if (info->accel_state->vsync)
2082	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h);
2083
2084#ifdef ACCEL_CP
2085    if (info->ChipFamily < CHIP_FAMILY_R200) {
2086	BEGIN_RING(3 * vtx_count + 3);
2087	OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2088			    3 * vtx_count + 1));
2089	if (info->accel_state->has_mask)
2090	    OUT_RING(RADEON_CP_VC_FRMT_XY |
2091		     RADEON_CP_VC_FRMT_ST0 |
2092		     RADEON_CP_VC_FRMT_ST1);
2093	else
2094	    OUT_RING(RADEON_CP_VC_FRMT_XY |
2095		     RADEON_CP_VC_FRMT_ST0);
2096	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2097		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2098		 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2099		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2100		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2101    } else if (IS_R300_3D || IS_R500_3D) {
2102	BEGIN_RING(4 * vtx_count + 4);
2103	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2104			    4 * vtx_count));
2105	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2106		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2107		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2108    } else {
2109	BEGIN_RING(3 * vtx_count + 2);
2110	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2111			    3 * vtx_count));
2112	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2113		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2114		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2115    }
2116
2117#else /* ACCEL_CP */
2118    if (IS_R300_3D || IS_R500_3D)
2119	BEGIN_ACCEL(2 + vtx_count * 4);
2120    else
2121	BEGIN_ACCEL(1 + vtx_count * 3);
2122
2123    if (info->ChipFamily < CHIP_FAMILY_R200)
2124	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2125					  RADEON_VF_PRIM_WALK_DATA |
2126					  RADEON_VF_RADEON_MODE |
2127					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2128    else if (IS_R300_3D || IS_R500_3D)
2129	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2130					  RADEON_VF_PRIM_WALK_DATA |
2131					  (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
2132    else
2133	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2134					  RADEON_VF_PRIM_WALK_DATA |
2135					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2136
2137#endif
2138
2139    if (info->accel_state->has_mask) {
2140	if (IS_R300_3D || IS_R500_3D) {
2141	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2142			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2143			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2144	}
2145	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2146		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2147		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2148	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2149		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2150		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2151	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2152		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2153		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2154    } else {
2155	if (IS_R300_3D || IS_R500_3D) {
2156	    VTX_OUT((float)dstX,                                      (float)dstY,
2157		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2158	}
2159	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2160		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2161	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2162		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2163	VTX_OUT((float)(dstX + w),                                (float)dstY,
2164		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2165    }
2166
2167    if (IS_R300_3D || IS_R500_3D)
2168	/* flushing is pipelined, free/finish is not */
2169	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
2170
2171#ifdef ACCEL_CP
2172    ADVANCE_RING();
2173#else
2174    FINISH_ACCEL();
2175#endif /* !ACCEL_CP */
2176
2177    LEAVE_DRAW(0);
2178}
2179#undef VTX_OUT
2180#undef VTX_OUT_MASK
2181
2182static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2183				       int srcX, int srcY,
2184				       int maskX, int maskY,
2185				       int dstX, int dstY,
2186				       int width, int height)
2187{
2188    int tileSrcY, tileMaskY, tileDstY;
2189    int remainingHeight;
2190    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2191
2192    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2193	FUNC_NAME(RadeonCompositeTile)(pScrn,
2194				       info,
2195				       pDst,
2196				       srcX, srcY,
2197				       maskX, maskY,
2198				       dstX, dstY,
2199				       width, height);
2200	return;
2201    }
2202
2203    /* Tiling logic borrowed from exaFillRegionTiled */
2204
2205    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2206    tileMaskY = maskY;
2207    tileDstY = dstY;
2208
2209    remainingHeight = height;
2210    while (remainingHeight > 0) {
2211	int remainingWidth = width;
2212	int tileSrcX, tileMaskX, tileDstX;
2213	int h = info->accel_state->src_tile_height - tileSrcY;
2214
2215	if (h > remainingHeight)
2216	    h = remainingHeight;
2217	remainingHeight -= h;
2218
2219	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2220	tileMaskX = maskX;
2221	tileDstX = dstX;
2222
2223	while (remainingWidth > 0) {
2224	    int w = info->accel_state->src_tile_width - tileSrcX;
2225	    if (w > remainingWidth)
2226		w = remainingWidth;
2227	    remainingWidth -= w;
2228
2229	    FUNC_NAME(RadeonCompositeTile)(pScrn,
2230					   info,
2231					   pDst,
2232					   tileSrcX, tileSrcY,
2233					   tileMaskX, tileMaskY,
2234					   tileDstX, tileDstY,
2235					   w, h);
2236
2237	    tileSrcX = 0;
2238	    tileMaskX += w;
2239	    tileDstX += w;
2240	}
2241	tileSrcY = 0;
2242	tileMaskY += h;
2243	tileDstY += h;
2244    }
2245}
2246
2247static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2248{
2249    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2250    ACCEL_PREAMBLE();
2251
2252    ENTER_DRAW(0);
2253
2254    if (IS_R300_3D || IS_R500_3D) {
2255	BEGIN_ACCEL(3);
2256	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
2257	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2258    } else
2259	BEGIN_ACCEL(1);
2260    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2261    FINISH_ACCEL();
2262
2263    LEAVE_DRAW(0);
2264}
2265
2266#undef ONLY_ONCE
2267#undef FUNC_NAME
2268