radeon_exa_render.c revision b7e1c893
1/*
2 * Copyright 2005 Eric Anholt
3 * Copyright 2005 Benjamin Herrenschmidt
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Eric Anholt <anholt@FreeBSD.org>
27 *    Zack Rusin <zrusin@trolltech.com>
28 *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29 *    Alex Deucher <alexander.deucher@amd.com>
30 *
31 */
32
33#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
34#error Cannot define both MMIO and CP acceleration!
35#endif
36
37#if !defined(UNIXCPP) || defined(ANSICPP)
38#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
39#else
40#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
41#endif
42
43#ifdef ACCEL_MMIO
44#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
45#else
46#ifdef ACCEL_CP
47#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
48#else
49#error No accel type defined!
50#endif
51#endif
52
53#ifndef ACCEL_CP
54#define ONLY_ONCE
55#endif
56
57/* Only include the following (generic) bits once. */
58#ifdef ONLY_ONCE
59
60struct blendinfo {
61    Bool dst_alpha;
62    Bool src_alpha;
63    uint32_t blend_cntl;
64};
65
66static struct blendinfo RadeonBlendOp[] = {
67    /* Clear */
68    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
69    /* Src */
70    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
71    /* Dst */
72    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
73    /* Over */
74    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
75    /* OverReverse */
76    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
77    /* In */
78    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
79    /* InReverse */
80    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
81    /* Out */
82    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
83    /* OutReverse */
84    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
85    /* Atop */
86    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
87    /* AtopReverse */
88    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
91    /* Add */
92    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
93};
94
95struct formatinfo {
96    int fmt;
97    uint32_t card_fmt;
98};
99
100/* Note on texture formats:
101 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
102 */
103static struct formatinfo R100TexFormats[] = {
104	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
105	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
106	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
107	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
108	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
109	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
110};
111
112static struct formatinfo R200TexFormats[] = {
113    {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
114    {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
115    {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
116    {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
117    {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
118    {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
119    {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
120    {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
121};
122
123static struct formatinfo R300TexFormats[] = {
124    {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
125    {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
126    {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
127    {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
128    {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
129    {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
130    {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
131    {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
132};
133
134/* Common Radeon setup code */
135
136static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
137{
138    switch (pDstPicture->format) {
139    case PICT_a8r8g8b8:
140    case PICT_x8r8g8b8:
141	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
142	break;
143    case PICT_r5g6b5:
144	*dst_format = RADEON_COLOR_FORMAT_RGB565;
145	break;
146    case PICT_a1r5g5b5:
147    case PICT_x1r5g5b5:
148	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
149	break;
150    case PICT_a8:
151	*dst_format = RADEON_COLOR_FORMAT_RGB8;
152	break;
153    default:
154	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
155			(int)pDstPicture->format));
156    }
157
158    return TRUE;
159}
160
161static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
162{
163    switch (pDstPicture->format) {
164    case PICT_a8r8g8b8:
165    case PICT_x8r8g8b8:
166	*dst_format = R300_COLORFORMAT_ARGB8888;
167	break;
168    case PICT_r5g6b5:
169	*dst_format = R300_COLORFORMAT_RGB565;
170	break;
171    case PICT_a1r5g5b5:
172    case PICT_x1r5g5b5:
173	*dst_format = R300_COLORFORMAT_ARGB1555;
174	break;
175    case PICT_a8:
176	*dst_format = R300_COLORFORMAT_I8;
177	break;
178    default:
179	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
180	       (int)pDstPicture->format));
181    }
182    return TRUE;
183}
184
185static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
186{
187    uint32_t sblend, dblend;
188
189    sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
190    dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
191
192    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
193     * it as always 1.
194     */
195    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
196	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
197	    sblend = RADEON_SRC_BLEND_GL_ONE;
198	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
199	    sblend = RADEON_SRC_BLEND_GL_ZERO;
200    }
201
202    /* If the source alpha is being used, then we should only be in a case where
203     * the source blend factor is 0, and the source blend value is the mask
204     * channels multiplied by the source picture's alpha.
205     */
206    if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
207	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
208	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
209	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
210	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
211	}
212    }
213
214    return sblend | dblend;
215}
216
217union intfloat {
218    float f;
219    uint32_t i;
220};
221
222/* Check if we need a software-fallback because of a repeating
223 *   non-power-of-two texture.
224 *
225 * canTile: whether we can emulate a repeat by drawing in tiles:
226 *   possible for the source, but not for the mask. (Actually
227 *   we could do tiling for the mask too, but dealing with the
228 *   combination of a tiled mask and a tiled source would be
229 *   a lot of complexity, so we handle only the most common
230 *   case of a repeating mask.)
231 */
232static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
233{
234    int w = pPict->pDrawable->width;
235    int h = pPict->pDrawable->height;
236
237    if (pPict->repeat && pPict->repeatType != RepeatPad &&
238	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
239	!(pPict->repeatType == RepeatNormal && !pPict->transform && canTile))
240	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
241			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
242
243    return TRUE;
244}
245
246/* Determine if the pitch of the pixmap meets the criteria for being
247 * used as a repeating texture: no padding or only a single line texture.
248 */
249static Bool RADEONPitchMatches(PixmapPtr pPix)
250{
251    int w = pPix->drawable.width;
252    int h = pPix->drawable.height;
253    uint32_t txpitch = exaGetPixmapPitch(pPix);
254
255    if (h > 1 && ((w * pPix->drawable.bitsPerPixel / 8 + 31) & ~31) != txpitch)
256	return FALSE;
257
258    return TRUE;
259}
260
261/* We can't turn on repeats normally for a non-power-of-two dimension,
262 * but if the source isn't transformed, we can get the same effect
263 * by drawing the image in multiple tiles. (A common case that it's
264 * important to get right is drawing a strip of a NPOTxPOT texture
265 * repeating in the POT direction. With tiling, this ends up as a
266 * a single tile on R300 and newer, which is perfect.)
267 *
268 * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
269 *   one direction and POT in the other in the POT direction; on
270 *   older chips we can only repeat at all if the texture is POT in
271 *   both directions.
272 *
273 * needMatchingPitch: On R100/R200, we can only repeat horizontally if
274 *   there is no padding in the texture. Textures with small POT widths
275 *   (1,2,4,8) thus can't be tiled.
276 */
277static Bool RADEONSetupSourceTile(PicturePtr pPict,
278				  PixmapPtr pPix,
279				  Bool canTile1d,
280				  Bool needMatchingPitch)
281{
282    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
283
284    info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
285    info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
286
287    if (pPict->repeat && pPict->repeatType != RepeatPad) {
288	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
289
290	int w = pPict->pDrawable->width;
291	int h = pPict->pDrawable->height;
292
293	if (pPict->transform) {
294	    if (badPitch)
295		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
296				 w, (unsigned)exaGetPixmapPitch(pPix)));
297	} else {
298	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
299	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
300
301	    if ((info->accel_state->need_src_tile_x ||
302		 info->accel_state->need_src_tile_y) &&
303		pPict->repeatType != RepeatNormal)
304		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
305
306	    if (!canTile1d)
307		info->accel_state->need_src_tile_x =
308		    info->accel_state->need_src_tile_y =
309		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
310	}
311
312	if (info->accel_state->need_src_tile_x)
313	    info->accel_state->src_tile_width = w;
314	if (info->accel_state->need_src_tile_y)
315	    info->accel_state->src_tile_height = h;
316    }
317
318    return TRUE;
319}
320
321/* R100-specific code */
322
323static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit)
324{
325    int w = pPict->pDrawable->width;
326    int h = pPict->pDrawable->height;
327    int i;
328
329    /* r100 limit should be 2048, there are issues with 2048
330     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
331     */
332
333    if ((w > 2047) || (h > 2047))
334	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
335
336    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
337	if (R100TexFormats[i].fmt == pPict->format)
338	    break;
339    }
340    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
341	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
342			(int)pPict->format));
343
344    if (!RADEONCheckTexturePOT(pPict, unit == 0))
345	return FALSE;
346
347    if (pPict->filter != PictFilterNearest &&
348	pPict->filter != PictFilterBilinear)
349    {
350	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
351    }
352
353    return TRUE;
354}
355
356#endif /* ONLY_ONCE */
357
358static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
359					int unit)
360{
361    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
362    uint32_t txfilter, txformat, txoffset, txpitch;
363    int w = pPict->pDrawable->width;
364    int h = pPict->pDrawable->height;
365    Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad &&
366	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
367    int i;
368    ACCEL_PREAMBLE();
369
370    txpitch = exaGetPixmapPitch(pPix);
371    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
372
373    if ((txoffset & 0x1f) != 0)
374	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
375    if ((txpitch & 0x1f) != 0)
376	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
377
378    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
379    {
380	if (R100TexFormats[i].fmt == pPict->format)
381	    break;
382    }
383    txformat = R100TexFormats[i].card_fmt;
384    if (RADEONPixmapIsColortiled(pPix))
385	txoffset |= RADEON_TXO_MACRO_TILE;
386
387    if (repeat) {
388	if (!RADEONPitchMatches(pPix))
389	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
390			     w, (unsigned)txpitch));
391
392	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
393	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
394    } else
395	txformat |= RADEON_TXFORMAT_NON_POWER2;
396    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
397
398    info->accel_state->texW[unit] = 1;
399    info->accel_state->texH[unit] = 1;
400
401    switch (pPict->filter) {
402    case PictFilterNearest:
403	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
404	break;
405    case PictFilterBilinear:
406	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
407	break;
408    default:
409	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
410    }
411
412    if (repeat) {
413	switch (pPict->repeatType) {
414	case RepeatNormal:
415	    txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
416	    break;
417	case RepeatPad:
418	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
419	    break;
420	case RepeatReflect:
421	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
422	    break;
423	case RepeatNone:
424	    /* Nothing to do */
425	    break;
426	}
427    }
428
429    BEGIN_ACCEL(5);
430    if (unit == 0) {
431	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
432	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
433	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset);
434	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
435	    (pPix->drawable.width - 1) |
436	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
437	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
438    } else {
439	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
440	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
441	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset);
442	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
443	    (pPix->drawable.width - 1) |
444	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
445	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
446    }
447    FINISH_ACCEL();
448
449    if (pPict->transform != 0) {
450	info->accel_state->is_transform[unit] = TRUE;
451	info->accel_state->transform[unit] = pPict->transform;
452    } else {
453	info->accel_state->is_transform[unit] = FALSE;
454    }
455
456    return TRUE;
457}
458
459#ifdef ONLY_ONCE
460
461PixmapPtr
462RADEONGetDrawablePixmap(DrawablePtr pDrawable)
463{
464    if (pDrawable->type == DRAWABLE_WINDOW)
465	return pDrawable->pScreen->GetWindowPixmap((WindowPtr)pDrawable);
466    else
467	return (PixmapPtr)pDrawable;
468}
469
470static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
471			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
472{
473    PixmapPtr pSrcPixmap, pDstPixmap;
474    uint32_t tmp1;
475
476    /* Check for unsupported compositing operations. */
477    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
478	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
479
480    if (!pSrcPicture->pDrawable)
481	return FALSE;
482
483    /* r100 limit should be 2048, there are issues with 2048
484     * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
485     */
486
487    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
488
489    if (pSrcPixmap->drawable.width > 2047 ||
490	pSrcPixmap->drawable.height > 2047) {
491	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
492			 pSrcPixmap->drawable.width,
493			 pSrcPixmap->drawable.height));
494    }
495
496    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
497
498    if (pDstPixmap->drawable.width > 2047 ||
499	pDstPixmap->drawable.height > 2047) {
500	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
501			 pDstPixmap->drawable.width,
502			 pDstPixmap->drawable.height));
503    }
504
505    if (pMaskPicture) {
506	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
507
508	if (pMaskPixmap->drawable.width > 2047 ||
509	    pMaskPixmap->drawable.height > 2047) {
510	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
511			     pMaskPixmap->drawable.width,
512			     pMaskPixmap->drawable.height));
513	}
514
515	if (pMaskPicture->componentAlpha) {
516	    /* Check if it's component alpha that relies on a source alpha and
517	     * on the source value.  We can only get one of those into the
518	     * single source value that we get to blend with.
519	     */
520	    if (RadeonBlendOp[op].src_alpha &&
521		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
522		RADEON_SRC_BLEND_GL_ZERO) {
523		RADEON_FALLBACK(("Component alpha not supported with source "
524				 "alpha and source value blending.\n"));
525	    }
526	}
527
528	if (!R100CheckCompositeTexture(pMaskPicture, 1))
529	    return FALSE;
530    }
531
532    if (!R100CheckCompositeTexture(pSrcPicture, 0))
533	return FALSE;
534
535    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
536	return FALSE;
537
538    return TRUE;
539}
540#endif /* ONLY_ONCE */
541
542static Bool FUNC_NAME(R100PrepareComposite)(int op,
543					    PicturePtr pSrcPicture,
544					    PicturePtr pMaskPicture,
545					    PicturePtr pDstPicture,
546					    PixmapPtr pSrc,
547					    PixmapPtr pMask,
548					    PixmapPtr pDst)
549{
550    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
551    uint32_t dst_format, dst_offset, dst_pitch, colorpitch;
552    uint32_t pp_cntl, blendcntl, cblend, ablend;
553    int pixel_shift;
554    ACCEL_PREAMBLE();
555
556    TRACE;
557
558    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
559	return FALSE;
560
561    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
562	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
563
564    if (pMask)
565	info->accel_state->has_mask = TRUE;
566    else
567	info->accel_state->has_mask = FALSE;
568
569    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
570
571    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
572    dst_pitch = exaGetPixmapPitch(pDst);
573    colorpitch = dst_pitch >> pixel_shift;
574    if (RADEONPixmapIsColortiled(pDst))
575	colorpitch |= RADEON_COLOR_TILE_ENABLE;
576
577    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
578    dst_pitch = exaGetPixmapPitch(pDst);
579    if ((dst_offset & 0x0f) != 0)
580	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
581    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
582	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
583
584    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
585	return FALSE;
586
587    RADEON_SWITCH_TO_3D();
588
589    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
590	return FALSE;
591    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
592
593    if (pMask != NULL) {
594	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
595	    return FALSE;
596	pp_cntl |= RADEON_TEX_1_ENABLE;
597    } else {
598	info->accel_state->is_transform[1] = FALSE;
599    }
600
601    BEGIN_ACCEL(8);
602    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
603    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
604    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
605    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
606
607    /* IN operator: Multiply src by mask components or mask alpha.
608     * BLEND_CTL_ADD is A * B + C.
609     * If a source is a8, we have to explicitly zero its color values.
610     * If the destination is a8, we have to route the alpha to red, I think.
611     * If we're doing component alpha where the source for blending is going to
612     * be the source alpha (and there's no source value used), we have to zero
613     * the source's color values.
614     */
615    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
616    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
617
618    if (pDstPicture->format == PICT_a8 ||
619	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
620    {
621	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
622    } else if (pSrcPicture->format == PICT_a8)
623	cblend |= RADEON_COLOR_ARG_A_ZERO;
624    else
625	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
626    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
627
628    if (pMask) {
629	if (pMaskPicture->componentAlpha &&
630	    pDstPicture->format != PICT_a8)
631	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
632	else
633	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
634	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
635    } else {
636	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
637	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
638    }
639
640    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
641    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
642    if (pMask)
643	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
644					  RADEON_SE_VTX_FMT_ST0 |
645					  RADEON_SE_VTX_FMT_ST1));
646    else
647	OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
648					  RADEON_SE_VTX_FMT_ST0));
649    /* Op operator. */
650    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
651
652    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
653    FINISH_ACCEL();
654
655    return TRUE;
656}
657
658#ifdef ONLY_ONCE
659
660static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit)
661{
662    int w = pPict->pDrawable->width;
663    int h = pPict->pDrawable->height;
664    int i;
665
666    /* r200 limit should be 2048, there are issues with 2048
667     * see bug 19269
668     */
669
670    if ((w > 2047) || (h > 2047))
671	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
672
673    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
674    {
675	if (R200TexFormats[i].fmt == pPict->format)
676	    break;
677    }
678    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
679	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
680			 (int)pPict->format));
681
682    if (!RADEONCheckTexturePOT(pPict, unit == 0))
683	return FALSE;
684
685    if (pPict->filter != PictFilterNearest &&
686	pPict->filter != PictFilterBilinear)
687	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
688
689    return TRUE;
690}
691
692#endif /* ONLY_ONCE */
693
694static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
695					int unit)
696{
697    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
698    uint32_t txfilter, txformat, txoffset, txpitch;
699    int w = pPict->pDrawable->width;
700    int h = pPict->pDrawable->height;
701    Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad &&
702	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
703    int i;
704    ACCEL_PREAMBLE();
705
706    txpitch = exaGetPixmapPitch(pPix);
707    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
708
709    if ((txoffset & 0x1f) != 0)
710	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
711    if ((txpitch & 0x1f) != 0)
712	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
713
714    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
715    {
716	if (R200TexFormats[i].fmt == pPict->format)
717	    break;
718    }
719    txformat = R200TexFormats[i].card_fmt;
720    if (RADEONPixmapIsColortiled(pPix))
721	txoffset |= R200_TXO_MACRO_TILE;
722
723    if (repeat) {
724	if (!RADEONPitchMatches(pPix))
725	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
726			     w, (unsigned)txpitch));
727
728	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
729	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
730    } else
731	txformat |= R200_TXFORMAT_NON_POWER2;
732    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
733
734    info->accel_state->texW[unit] = w;
735    info->accel_state->texH[unit] = h;
736
737    switch (pPict->filter) {
738    case PictFilterNearest:
739	txfilter = (R200_MAG_FILTER_NEAREST |
740		    R200_MIN_FILTER_NEAREST);
741	break;
742    case PictFilterBilinear:
743	txfilter = (R200_MAG_FILTER_LINEAR |
744		    R200_MIN_FILTER_LINEAR);
745	break;
746    default:
747	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
748    }
749
750    if (repeat) {
751	switch (pPict->repeatType) {
752	case RepeatNormal:
753	    txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
754	    break;
755	case RepeatPad:
756	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
757	    break;
758	case RepeatReflect:
759	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
760	    break;
761	case RepeatNone:
762	    /* Nothing to do */
763	    break;
764	}
765    }
766
767    BEGIN_ACCEL(6);
768    if (unit == 0) {
769	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
770	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
771	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
772	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
773		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
774	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
775	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset);
776    } else {
777	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
778	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
779	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
780	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
781		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
782	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
783	OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset);
784    }
785    FINISH_ACCEL();
786
787    if (pPict->transform != 0) {
788	info->accel_state->is_transform[unit] = TRUE;
789	info->accel_state->transform[unit] = pPict->transform;
790    } else {
791	info->accel_state->is_transform[unit] = FALSE;
792    }
793
794    return TRUE;
795}
796
797#ifdef ONLY_ONCE
798static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
799			       PicturePtr pDstPicture)
800{
801    PixmapPtr pSrcPixmap, pDstPixmap;
802    uint32_t tmp1;
803
804    TRACE;
805
806    if (!pSrcPicture->pDrawable)
807	return FALSE;
808
809    /* r200 limit should be 2048, there are issues with 2048
810     * see bug 19269
811     */
812
813    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
814
815    if (pSrcPixmap->drawable.width > 2047 ||
816	pSrcPixmap->drawable.height > 2047) {
817	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
818			 pSrcPixmap->drawable.width,
819			 pSrcPixmap->drawable.height));
820    }
821
822    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
823
824    if (pDstPixmap->drawable.width > 2047 ||
825	pDstPixmap->drawable.height > 2047) {
826	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
827			 pDstPixmap->drawable.width,
828			 pDstPixmap->drawable.height));
829    }
830
831    if (pMaskPicture) {
832	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
833
834	if (pMaskPixmap->drawable.width > 2047 ||
835	    pMaskPixmap->drawable.height > 2047) {
836	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
837			     pMaskPixmap->drawable.width,
838			     pMaskPixmap->drawable.height));
839	}
840
841	if (pMaskPicture->componentAlpha) {
842	    /* Check if it's component alpha that relies on a source alpha and
843	     * on the source value.  We can only get one of those into the
844	     * single source value that we get to blend with.
845	     */
846	    if (RadeonBlendOp[op].src_alpha &&
847		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
848		RADEON_SRC_BLEND_GL_ZERO) {
849		RADEON_FALLBACK(("Component alpha not supported with source "
850				 "alpha and source value blending.\n"));
851	    }
852	}
853
854	if (!R200CheckCompositeTexture(pMaskPicture, 1))
855	    return FALSE;
856    }
857
858    if (!R200CheckCompositeTexture(pSrcPicture, 0))
859	return FALSE;
860
861    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
862	return FALSE;
863
864    return TRUE;
865}
866#endif /* ONLY_ONCE */
867
868static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
869				PicturePtr pMaskPicture, PicturePtr pDstPicture,
870				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
871{
872    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
873    uint32_t dst_format, dst_offset, dst_pitch;
874    uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
875    int pixel_shift;
876    ACCEL_PREAMBLE();
877
878    TRACE;
879
880    if (!RADEONGetDestFormat(pDstPicture, &dst_format))
881	return FALSE;
882
883    if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
884	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
885
886    if (pMask)
887	info->accel_state->has_mask = TRUE;
888    else
889	info->accel_state->has_mask = FALSE;
890
891    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
892
893    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
894    dst_pitch = exaGetPixmapPitch(pDst);
895    colorpitch = dst_pitch >> pixel_shift;
896    if (RADEONPixmapIsColortiled(pDst))
897	colorpitch |= RADEON_COLOR_TILE_ENABLE;
898
899    if ((dst_offset & 0x0f) != 0)
900	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
901    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
902	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
903
904    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
905	return FALSE;
906
907    RADEON_SWITCH_TO_3D();
908
909    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
910	return FALSE;
911    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
912
913    if (pMask != NULL) {
914	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
915	    return FALSE;
916	pp_cntl |= RADEON_TEX_1_ENABLE;
917    } else {
918	info->accel_state->is_transform[1] = FALSE;
919    }
920
921    BEGIN_ACCEL(11);
922
923    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
924    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
925    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
926
927    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
928    if (pMask)
929	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
930		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
931		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
932    else
933	OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
934		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
935
936    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
937
938    /* IN operator: Multiply src by mask components or mask alpha.
939     * BLEND_CTL_ADD is A * B + C.
940     * If a picture is a8, we have to explicitly zero its color values.
941     * If the destination is a8, we have to route the alpha to red, I think.
942     * If we're doing component alpha where the source for blending is going to
943     * be the source alpha (and there's no source value used), we have to zero
944     * the source's color values.
945     */
946    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
947    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
948
949    if (pDstPicture->format == PICT_a8 ||
950	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
951    {
952	cblend |= R200_TXC_ARG_A_R0_ALPHA;
953    } else if (pSrcPicture->format == PICT_a8)
954	cblend |= R200_TXC_ARG_A_ZERO;
955    else
956	cblend |= R200_TXC_ARG_A_R0_COLOR;
957    ablend |= R200_TXA_ARG_A_R0_ALPHA;
958
959    if (pMask) {
960	if (pMaskPicture->componentAlpha &&
961	    pDstPicture->format != PICT_a8)
962	    cblend |= R200_TXC_ARG_B_R1_COLOR;
963	else
964	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
965	ablend |= R200_TXA_ARG_B_R1_ALPHA;
966    } else {
967	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
968	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
969    }
970
971    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
972    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
973	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
974    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
975    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
976	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
977
978    /* Op operator. */
979    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
980    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
981    FINISH_ACCEL();
982
983    return TRUE;
984}
985
986#ifdef ONLY_ONCE
987
988static Bool R300CheckCompositeTexture(PicturePtr pPict,
989				      PicturePtr pDstPict,
990				      int op,
991				      int unit,
992				      Bool is_r500)
993{
994    int w = pPict->pDrawable->width;
995    int h = pPict->pDrawable->height;
996    int i;
997    int max_tex_w, max_tex_h;
998
999    if (is_r500) {
1000	max_tex_w = 4096;
1001	max_tex_h = 4096;
1002    } else {
1003	max_tex_w = 2048;
1004	max_tex_h = 2048;
1005    }
1006
1007    if ((w > max_tex_w) || (h > max_tex_h))
1008	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1009
1010    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1011    {
1012	if (R300TexFormats[i].fmt == pPict->format)
1013	    break;
1014    }
1015    if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1016	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1017			 (int)pPict->format));
1018
1019    if (!RADEONCheckTexturePOT(pPict, unit == 0))
1020	return FALSE;
1021
1022    if (pPict->filter != PictFilterNearest &&
1023	pPict->filter != PictFilterBilinear)
1024	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1025
1026    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1027     * picture results in alpha=0 pixels. We can implement this with a border color
1028     * *if* our source texture has an alpha channel, otherwise we need to fall
1029     * back. If we're not transformed then we hope that upper layers have clipped
1030     * rendering to the bounds of the source drawable, in which case it doesn't
1031     * matter. I have not, however, verified that the X server always does such
1032     * clipping.
1033     */
1034    if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
1035	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1036	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1037    }
1038
1039    return TRUE;
1040}
1041
1042#endif /* ONLY_ONCE */
1043
1044static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
1045					int unit)
1046{
1047    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1048    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
1049    int w = pPict->pDrawable->width;
1050    int h = pPict->pDrawable->height;
1051    int i, pixel_shift;
1052    ACCEL_PREAMBLE();
1053
1054    TRACE;
1055
1056    txpitch = exaGetPixmapPitch(pPix);
1057    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
1058
1059    if ((txoffset & 0x1f) != 0)
1060	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
1061    if ((txpitch & 0x1f) != 0)
1062	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1063
1064    /* TXPITCH = pixels (texels) per line - 1 */
1065    pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1066    txpitch >>= pixel_shift;
1067    txpitch -= 1;
1068
1069    if (RADEONPixmapIsColortiled(pPix))
1070	txoffset |= R300_MACRO_TILE;
1071
1072    for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1073    {
1074	if (R300TexFormats[i].fmt == pPict->format)
1075	    break;
1076    }
1077
1078    txformat1 = R300TexFormats[i].card_fmt;
1079
1080    txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1081		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1082
1083    if (IS_R500_3D && ((w - 1) & 0x800))
1084	txpitch |= R500_TXWIDTH_11;
1085
1086    if (IS_R500_3D && ((h - 1) & 0x800))
1087	txpitch |= R500_TXHEIGHT_11;
1088
1089    /* Use TXPITCH instead of TXWIDTH for address computations: we could
1090     * omit this if there is no padding, but there is no apparent advantage
1091     * in doing so.
1092     */
1093    txformat0 |= R300_TXPITCH_EN;
1094
1095    info->accel_state->texW[unit] = w;
1096    info->accel_state->texH[unit] = h;
1097
1098    txfilter = (unit << R300_TX_ID_SHIFT);
1099
1100    if (pPict->repeat) {
1101	switch (pPict->repeatType) {
1102	case RepeatNormal:
1103	    if (unit != 0 || !info->accel_state->need_src_tile_x)
1104		txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1105	    else
1106		txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1107
1108	    if (unit != 0 || !info->accel_state->need_src_tile_y)
1109		txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1110	    else
1111		txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1112
1113	    break;
1114	case RepeatPad:
1115	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1116		        R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1117	    break;
1118	case RepeatReflect:
1119	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1120		        R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1121	    break;
1122	case RepeatNone:
1123	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1124		        R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1125	    break;
1126	}
1127    } else
1128	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1129	            R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1130
1131    switch (pPict->filter) {
1132    case PictFilterNearest:
1133	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1134	break;
1135    case PictFilterBilinear:
1136	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1137	break;
1138    default:
1139	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1140    }
1141
1142    BEGIN_ACCEL(pPict->repeat ? 6 : 7);
1143    OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1144    OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1145    OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1146    OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1147    OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1148    OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset);
1149    if (!pPict->repeat)
1150	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1151    FINISH_ACCEL();
1152
1153    if (pPict->transform != 0) {
1154	info->accel_state->is_transform[unit] = TRUE;
1155	info->accel_state->transform[unit] = pPict->transform;
1156    } else {
1157	info->accel_state->is_transform[unit] = FALSE;
1158    }
1159
1160    return TRUE;
1161}
1162
1163#ifdef ONLY_ONCE
1164
1165static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1166			       PicturePtr pDstPicture)
1167{
1168    uint32_t tmp1;
1169    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1170    PixmapPtr pSrcPixmap, pDstPixmap;
1171    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1172    RADEONInfoPtr info = RADEONPTR(pScrn);
1173    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1174
1175    TRACE;
1176
1177    /* Check for unsupported compositing operations. */
1178    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1179	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1180
1181    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1182
1183    if (IS_R500_3D) {
1184	max_tex_w = 4096;
1185	max_tex_h = 4096;
1186	max_dst_w = 4096;
1187	max_dst_h = 4096;
1188    } else {
1189	max_tex_w = 2048;
1190	max_tex_h = 2048;
1191	max_dst_w = 2560;
1192	max_dst_h = 2560;
1193    }
1194
1195    if (pSrcPixmap->drawable.width > max_tex_w ||
1196	pSrcPixmap->drawable.height > max_tex_h) {
1197	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1198			 pSrcPixmap->drawable.width,
1199			 pSrcPixmap->drawable.height));
1200    }
1201
1202    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1203
1204    if (pDstPixmap->drawable.width > max_dst_w ||
1205	pDstPixmap->drawable.height > max_dst_h) {
1206	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1207			 pDstPixmap->drawable.width,
1208			 pDstPixmap->drawable.height));
1209    }
1210
1211    if (pMaskPicture) {
1212	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1213
1214	if (pMaskPixmap->drawable.width > max_tex_w ||
1215	    pMaskPixmap->drawable.height > max_tex_h) {
1216	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1217			     pMaskPixmap->drawable.width,
1218			     pMaskPixmap->drawable.height));
1219	}
1220
1221	if (pMaskPicture->componentAlpha) {
1222	    /* Check if it's component alpha that relies on a source alpha and
1223	     * on the source value.  We can only get one of those into the
1224	     * single source value that we get to blend with.
1225	     */
1226	    if (RadeonBlendOp[op].src_alpha &&
1227		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1228		RADEON_SRC_BLEND_GL_ZERO) {
1229		RADEON_FALLBACK(("Component alpha not supported with source "
1230				 "alpha and source value blending.\n"));
1231	    }
1232	}
1233
1234	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1235	    return FALSE;
1236    }
1237
1238    if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1239	return FALSE;
1240
1241    if (!R300GetDestFormat(pDstPicture, &tmp1))
1242	return FALSE;
1243
1244    return TRUE;
1245
1246}
1247#endif /* ONLY_ONCE */
1248
1249static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
1250				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1251				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1252{
1253    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
1254    uint32_t dst_format, dst_offset, dst_pitch;
1255    uint32_t txenable, colorpitch;
1256    uint32_t blendcntl;
1257    int pixel_shift;
1258    ACCEL_PREAMBLE();
1259
1260    TRACE;
1261
1262    if (!R300GetDestFormat(pDstPicture, &dst_format))
1263	return FALSE;
1264
1265    if (pMask)
1266	info->accel_state->has_mask = TRUE;
1267    else
1268	info->accel_state->has_mask = FALSE;
1269
1270    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1271
1272    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1273    dst_pitch = exaGetPixmapPitch(pDst);
1274    colorpitch = dst_pitch >> pixel_shift;
1275
1276    if (RADEONPixmapIsColortiled(pDst))
1277	colorpitch |= R300_COLORTILE;
1278
1279    colorpitch |= dst_format;
1280
1281    if ((dst_offset & 0x0f) != 0)
1282	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
1283    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1284	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1285
1286    if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1287	return FALSE;
1288
1289    RADEON_SWITCH_TO_3D();
1290
1291    if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
1292	return FALSE;
1293    txenable = R300_TEX_0_ENABLE;
1294
1295    if (pMask != NULL) {
1296	if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1))
1297	    return FALSE;
1298	txenable |= R300_TEX_1_ENABLE;
1299    } else {
1300	info->accel_state->is_transform[1] = FALSE;
1301    }
1302
1303    /* setup the VAP */
1304    if (info->accel_state->has_tcl) {
1305	if (pMask)
1306	    BEGIN_ACCEL(8);
1307	else
1308	    BEGIN_ACCEL(7);
1309    } else {
1310	if (pMask)
1311	    BEGIN_ACCEL(6);
1312	else
1313	    BEGIN_ACCEL(5);
1314    }
1315
1316    /* These registers define the number, type, and location of data submitted
1317     * to the PVS unit of GA input (when PVS is disabled)
1318     * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1319     * enabled.  This memory provides the imputs to the vertex shader program
1320     * and ordering is not important.  When PVS/TCL is disabled, this field maps
1321     * directly to the GA input memory and the order is signifigant.  In
1322     * PVS_BYPASS mode the order is as follows:
1323     * Position
1324     * Point Size
1325     * Color 0-3
1326     * Textures 0-7
1327     * Fog
1328     */
1329    if (pMask) {
1330	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1331		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1332		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1333		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1334		       R300_SIGNED_0 |
1335		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1336		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1337		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1338		       R300_SIGNED_1));
1339	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
1340		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1341		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1342		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1343		       R300_LAST_VEC_2 |
1344		       R300_SIGNED_2));
1345    } else
1346	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
1347		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1348		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1349		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1350		       R300_SIGNED_0 |
1351		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1352		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1353		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1354		       R300_LAST_VEC_1 |
1355		       R300_SIGNED_1));
1356
1357    /* load the vertex shader
1358     * We pre-load vertex programs in RADEONInit3DEngine():
1359     * - exa no mask
1360     * - exa mask
1361     * - Xv
1362     * Here we select the offset of the vertex program we want to use
1363     */
1364    if (info->accel_state->has_tcl) {
1365	if (pMask) {
1366	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1367			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1368			   (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1369			   (2 << R300_PVS_LAST_INST_SHIFT)));
1370	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1371			  (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1372	} else {
1373	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
1374			  ((3 << R300_PVS_FIRST_INST_SHIFT) |
1375			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1376			   (4 << R300_PVS_LAST_INST_SHIFT)));
1377	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
1378			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1379	}
1380    }
1381
1382    /* Position and one or two sets of 2 texture coordinates */
1383    OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1384    if (pMask)
1385	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1386		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1387		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1388    else
1389	OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
1390		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1391
1392    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
1393    OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
1394    FINISH_ACCEL();
1395
1396    /* setup pixel shader */
1397    if (IS_R300_3D) {
1398	uint32_t output_fmt;
1399	int src_color, src_alpha;
1400	int mask_color, mask_alpha;
1401
1402	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1403	    src_color = R300_ALU_RGB_0_0;
1404	else
1405	    src_color = R300_ALU_RGB_SRC0_RGB;
1406
1407	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1408	    src_alpha = R300_ALU_ALPHA_1_0;
1409	else
1410	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1411
1412	if (pMask && pMaskPicture->componentAlpha) {
1413	    if (RadeonBlendOp[op].src_alpha) {
1414		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
1415		    src_color = R300_ALU_RGB_1_0;
1416		    src_alpha = R300_ALU_ALPHA_1_0;
1417		} else {
1418		    src_color = R300_ALU_RGB_SRC0_AAA;
1419		    src_alpha = R300_ALU_ALPHA_SRC0_A;
1420		}
1421
1422		mask_color = R300_ALU_RGB_SRC1_RGB;
1423
1424		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1425		    mask_alpha = R300_ALU_ALPHA_1_0;
1426		else
1427		    mask_alpha = R300_ALU_ALPHA_SRC1_A;
1428
1429	    } else {
1430		src_color = R300_ALU_RGB_SRC0_RGB;
1431
1432		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1433		    src_alpha = R300_ALU_ALPHA_1_0;
1434		else
1435		    src_alpha = R300_ALU_ALPHA_SRC0_A;
1436
1437		mask_color = R300_ALU_RGB_SRC1_RGB;
1438
1439		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1440		    mask_alpha = R300_ALU_ALPHA_1_0;
1441		else
1442		    mask_alpha = R300_ALU_ALPHA_SRC1_A;
1443
1444	    }
1445	} else if (pMask) {
1446	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1447		mask_color = R300_ALU_RGB_1_0;
1448	    else
1449		mask_color = R300_ALU_RGB_SRC1_AAA;
1450
1451	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1452		mask_alpha = R300_ALU_ALPHA_1_0;
1453	    else
1454		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1455	} else {
1456	    mask_color = R300_ALU_RGB_1_0;
1457	    mask_alpha = R300_ALU_ALPHA_1_0;
1458	}
1459
1460	/* shader output swizzling */
1461	switch (pDstPicture->format) {
1462	case PICT_a8r8g8b8:
1463	case PICT_x8r8g8b8:
1464	default:
1465	    output_fmt = (R300_OUT_FMT_C4_8 |
1466			  R300_OUT_FMT_C0_SEL_BLUE |
1467			  R300_OUT_FMT_C1_SEL_GREEN |
1468			  R300_OUT_FMT_C2_SEL_RED |
1469			  R300_OUT_FMT_C3_SEL_ALPHA);
1470	    break;
1471	case PICT_a8b8g8r8:
1472	case PICT_x8b8g8r8:
1473	    output_fmt = (R300_OUT_FMT_C4_8 |
1474			  R300_OUT_FMT_C0_SEL_RED |
1475			  R300_OUT_FMT_C1_SEL_GREEN |
1476			  R300_OUT_FMT_C2_SEL_BLUE |
1477			  R300_OUT_FMT_C3_SEL_ALPHA);
1478	    break;
1479	case PICT_a8:
1480	    output_fmt = (R300_OUT_FMT_C4_8 |
1481			  R300_OUT_FMT_C0_SEL_ALPHA);
1482	    break;
1483	}
1484
1485
1486	/* setup the rasterizer, load FS */
1487	if (pMask) {
1488	    BEGIN_ACCEL(16);
1489	    /* 4 components: 2 for tex0, 2 for tex1 */
1490	    OUT_ACCEL_REG(R300_RS_COUNT,
1491			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1492			   R300_RS_COUNT_HIRES_EN));
1493
1494	    /* R300_INST_COUNT_RS - highest RS instruction used */
1495	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
1496
1497	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1498						R300_ALU_CODE_SIZE(0) |
1499						R300_TEX_CODE_OFFSET(0) |
1500						R300_TEX_CODE_SIZE(1)));
1501
1502	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1503			  (R300_ALU_START(0) |
1504			   R300_ALU_SIZE(0) |
1505			   R300_TEX_START(0) |
1506			   R300_TEX_SIZE(1) |
1507			   R300_RGBA_OUT));
1508
1509
1510	} else {
1511	    BEGIN_ACCEL(15);
1512	    /* 2 components: 2 for tex0 */
1513	    OUT_ACCEL_REG(R300_RS_COUNT,
1514			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1515			   R300_RS_COUNT_HIRES_EN));
1516
1517	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
1518
1519	    OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1520						R300_ALU_CODE_SIZE(0) |
1521						R300_TEX_CODE_OFFSET(0) |
1522						R300_TEX_CODE_SIZE(0)));
1523
1524	    OUT_ACCEL_REG(R300_US_CODE_ADDR_3,
1525			  (R300_ALU_START(0) |
1526			   R300_ALU_SIZE(0) |
1527			   R300_TEX_START(0) |
1528			   R300_TEX_SIZE(0) |
1529			   R300_RGBA_OUT));
1530
1531	}
1532
1533	OUT_ACCEL_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1534	OUT_ACCEL_REG(R300_US_CODE_ADDR_0,
1535		      (R300_ALU_START(0) |
1536		       R300_ALU_SIZE(0) |
1537		       R300_TEX_START(0) |
1538		       R300_TEX_SIZE(0)));
1539	OUT_ACCEL_REG(R300_US_CODE_ADDR_1,
1540		      (R300_ALU_START(0) |
1541		       R300_ALU_SIZE(0) |
1542		       R300_TEX_START(0) |
1543		       R300_TEX_SIZE(0)));
1544	OUT_ACCEL_REG(R300_US_CODE_ADDR_2,
1545		      (R300_ALU_START(0) |
1546		       R300_ALU_SIZE(0) |
1547		       R300_TEX_START(0) |
1548		       R300_TEX_SIZE(0)));
1549
1550	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1551	/* shader output swizzling */
1552	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1553
1554	/* tex inst for src texture */
1555	OUT_ACCEL_REG(R300_US_TEX_INST(0),
1556		      (R300_TEX_SRC_ADDR(0) |
1557		       R300_TEX_DST_ADDR(0) |
1558		       R300_TEX_ID(0) |
1559		       R300_TEX_INST(R300_TEX_INST_LD)));
1560
1561	if (pMask) {
1562	    /* tex inst for mask texture */
1563	    OUT_ACCEL_REG(R300_US_TEX_INST(1),
1564			  (R300_TEX_SRC_ADDR(1) |
1565			   R300_TEX_DST_ADDR(1) |
1566			   R300_TEX_ID(1) |
1567			   R300_TEX_INST(R300_TEX_INST_LD)));
1568	}
1569
1570	/* RGB inst
1571	 * temp addresses for texture inputs
1572	 * ALU_RGB_ADDR0 is src tex (temp 0)
1573	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1574	 * R300_ALU_RGB_OMASK - output components to write
1575	 * R300_ALU_RGB_TARGET_A - render target
1576	 */
1577	OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0),
1578		      (R300_ALU_RGB_ADDR0(0) |
1579		       R300_ALU_RGB_ADDR1(1) |
1580		       R300_ALU_RGB_ADDR2(0) |
1581		       R300_ALU_RGB_ADDRD(0) |
1582		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1583					   R300_ALU_RGB_MASK_G |
1584					   R300_ALU_RGB_MASK_B)) |
1585		       R300_ALU_RGB_TARGET_A));
1586	/* RGB inst
1587	 * ALU operation
1588	 */
1589	OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0),
1590		      (R300_ALU_RGB_SEL_A(src_color) |
1591		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1592		       R300_ALU_RGB_SEL_B(mask_color) |
1593		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1594		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1595		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1596		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1597		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1598		       R300_ALU_RGB_CLAMP));
1599	/* Alpha inst
1600	 * temp addresses for texture inputs
1601	 * ALU_ALPHA_ADDR0 is src tex (0)
1602	 * ALU_ALPHA_ADDR1 is mask tex (1)
1603	 * R300_ALU_ALPHA_OMASK - output components to write
1604	 * R300_ALU_ALPHA_TARGET_A - render target
1605	 */
1606	OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0),
1607		      (R300_ALU_ALPHA_ADDR0(0) |
1608		       R300_ALU_ALPHA_ADDR1(1) |
1609		       R300_ALU_ALPHA_ADDR2(0) |
1610		       R300_ALU_ALPHA_ADDRD(0) |
1611		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1612		       R300_ALU_ALPHA_TARGET_A |
1613		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1614	/* Alpha inst
1615	 * ALU operation
1616	 */
1617	OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0),
1618		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1619		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1620		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1621		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1622		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1623		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1624		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1625		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1626		       R300_ALU_ALPHA_CLAMP));
1627	FINISH_ACCEL();
1628    } else {
1629	uint32_t output_fmt;
1630	uint32_t src_color, src_alpha;
1631	uint32_t mask_color, mask_alpha;
1632
1633	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1634	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1635			 R500_ALU_RGB_G_SWIZ_A_0 |
1636			 R500_ALU_RGB_B_SWIZ_A_0);
1637	else
1638	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1639			 R500_ALU_RGB_G_SWIZ_A_G |
1640			 R500_ALU_RGB_B_SWIZ_A_B);
1641
1642	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1643	    src_alpha = R500_ALPHA_SWIZ_A_1;
1644	else
1645	    src_alpha = R500_ALPHA_SWIZ_A_A;
1646
1647	if (pMask && pMaskPicture->componentAlpha) {
1648	    if (RadeonBlendOp[op].src_alpha) {
1649		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
1650		    src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1651				 R500_ALU_RGB_G_SWIZ_A_1 |
1652				 R500_ALU_RGB_B_SWIZ_A_1);
1653		    src_alpha = R500_ALPHA_SWIZ_A_1;
1654		} else {
1655		    src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1656				 R500_ALU_RGB_G_SWIZ_A_A |
1657				 R500_ALU_RGB_B_SWIZ_A_A);
1658		    src_alpha = R500_ALPHA_SWIZ_A_A;
1659		}
1660
1661		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1662			      R500_ALU_RGB_G_SWIZ_B_G |
1663			      R500_ALU_RGB_B_SWIZ_B_B);
1664
1665		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1666		    mask_alpha = R500_ALPHA_SWIZ_B_1;
1667		else
1668		    mask_alpha = R500_ALPHA_SWIZ_B_A;
1669
1670	    } else {
1671		src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1672			     R500_ALU_RGB_G_SWIZ_A_G |
1673			     R500_ALU_RGB_B_SWIZ_A_B);
1674
1675		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1676		    src_alpha = R500_ALPHA_SWIZ_A_1;
1677		else
1678		    src_alpha = R500_ALPHA_SWIZ_A_A;
1679
1680		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1681			      R500_ALU_RGB_G_SWIZ_B_G |
1682			      R500_ALU_RGB_B_SWIZ_B_B);
1683
1684		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1685		    mask_alpha = R500_ALPHA_SWIZ_B_1;
1686		else
1687		    mask_alpha = R500_ALPHA_SWIZ_B_A;
1688
1689	    }
1690	} else if (pMask) {
1691	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1692		mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1693			      R500_ALU_RGB_G_SWIZ_B_1 |
1694			      R500_ALU_RGB_B_SWIZ_B_1);
1695	    else
1696		mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1697			      R500_ALU_RGB_G_SWIZ_B_A |
1698			      R500_ALU_RGB_B_SWIZ_B_A);
1699
1700	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1701		mask_alpha = R500_ALPHA_SWIZ_B_1;
1702	    else
1703		mask_alpha = R500_ALPHA_SWIZ_B_A;
1704	} else {
1705	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1706			  R500_ALU_RGB_G_SWIZ_B_1 |
1707			  R500_ALU_RGB_B_SWIZ_B_1);
1708	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1709	}
1710
1711	/* shader output swizzling */
1712	switch (pDstPicture->format) {
1713	case PICT_a8r8g8b8:
1714	case PICT_x8r8g8b8:
1715	default:
1716	    output_fmt = (R300_OUT_FMT_C4_8 |
1717			  R300_OUT_FMT_C0_SEL_BLUE |
1718			  R300_OUT_FMT_C1_SEL_GREEN |
1719			  R300_OUT_FMT_C2_SEL_RED |
1720			  R300_OUT_FMT_C3_SEL_ALPHA);
1721	    break;
1722	case PICT_a8b8g8r8:
1723	case PICT_x8b8g8r8:
1724	    output_fmt = (R300_OUT_FMT_C4_8 |
1725			  R300_OUT_FMT_C0_SEL_RED |
1726			  R300_OUT_FMT_C1_SEL_GREEN |
1727			  R300_OUT_FMT_C2_SEL_BLUE |
1728			  R300_OUT_FMT_C3_SEL_ALPHA);
1729	    break;
1730	case PICT_a8:
1731	    output_fmt = (R300_OUT_FMT_C4_8 |
1732			  R300_OUT_FMT_C0_SEL_ALPHA);
1733	    break;
1734	}
1735
1736	BEGIN_ACCEL(7);
1737	if (pMask) {
1738	    /* 4 components: 2 for tex0, 2 for tex1 */
1739	    OUT_ACCEL_REG(R300_RS_COUNT,
1740			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1741			   R300_RS_COUNT_HIRES_EN));
1742
1743	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1744	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
1745
1746	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1747					      R500_US_CODE_END_ADDR(2)));
1748	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1749					       R500_US_CODE_RANGE_SIZE(2)));
1750	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1751	} else {
1752	    OUT_ACCEL_REG(R300_RS_COUNT,
1753			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1754			   R300_RS_COUNT_HIRES_EN));
1755
1756	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
1757
1758	    OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1759					      R500_US_CODE_END_ADDR(1)));
1760	    OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1761					       R500_US_CODE_RANGE_SIZE(1)));
1762	    OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
1763	}
1764
1765	OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1766	OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
1767	FINISH_ACCEL();
1768
1769	if (pMask) {
1770	    BEGIN_ACCEL(19);
1771	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1772	    /* tex inst for src texture */
1773	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1774						   R500_INST_RGB_WMASK_R |
1775						   R500_INST_RGB_WMASK_G |
1776						   R500_INST_RGB_WMASK_B |
1777						   R500_INST_ALPHA_WMASK |
1778						   R500_INST_RGB_CLAMP |
1779						   R500_INST_ALPHA_CLAMP));
1780
1781	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1782						   R500_TEX_INST_LD |
1783						   R500_TEX_IGNORE_UNCOVERED));
1784
1785	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1786						   R500_TEX_SRC_S_SWIZ_R |
1787						   R500_TEX_SRC_T_SWIZ_G |
1788						   R500_TEX_DST_ADDR(0) |
1789						   R500_TEX_DST_R_SWIZ_R |
1790						   R500_TEX_DST_G_SWIZ_G |
1791						   R500_TEX_DST_B_SWIZ_B |
1792						   R500_TEX_DST_A_SWIZ_A));
1793	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1794						   R500_DX_S_SWIZ_R |
1795						   R500_DX_T_SWIZ_R |
1796						   R500_DX_R_SWIZ_R |
1797						   R500_DX_Q_SWIZ_R |
1798						   R500_DY_ADDR(0) |
1799						   R500_DY_S_SWIZ_R |
1800						   R500_DY_T_SWIZ_R |
1801						   R500_DY_R_SWIZ_R |
1802						   R500_DY_Q_SWIZ_R));
1803	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1804	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1805
1806	    /* tex inst for mask texture */
1807	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1808						   R500_INST_TEX_SEM_WAIT |
1809						   R500_INST_RGB_WMASK_R |
1810						   R500_INST_RGB_WMASK_G |
1811						   R500_INST_RGB_WMASK_B |
1812						   R500_INST_ALPHA_WMASK |
1813						   R500_INST_RGB_CLAMP |
1814						   R500_INST_ALPHA_CLAMP));
1815
1816	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1817						   R500_TEX_INST_LD |
1818						   R500_TEX_SEM_ACQUIRE |
1819						   R500_TEX_IGNORE_UNCOVERED));
1820
1821	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1822						   R500_TEX_SRC_S_SWIZ_R |
1823						   R500_TEX_SRC_T_SWIZ_G |
1824						   R500_TEX_DST_ADDR(1) |
1825						   R500_TEX_DST_R_SWIZ_R |
1826						   R500_TEX_DST_G_SWIZ_G |
1827						   R500_TEX_DST_B_SWIZ_B |
1828						   R500_TEX_DST_A_SWIZ_A));
1829	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1830						   R500_DX_S_SWIZ_R |
1831						   R500_DX_T_SWIZ_R |
1832						   R500_DX_R_SWIZ_R |
1833						   R500_DX_Q_SWIZ_R |
1834						   R500_DY_ADDR(1) |
1835						   R500_DY_S_SWIZ_R |
1836						   R500_DY_T_SWIZ_R |
1837						   R500_DY_R_SWIZ_R |
1838						   R500_DY_Q_SWIZ_R));
1839	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1840	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1841	} else {
1842	    BEGIN_ACCEL(13);
1843	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1844	    /* tex inst for src texture */
1845	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1846						   R500_INST_TEX_SEM_WAIT |
1847						   R500_INST_RGB_WMASK_R |
1848						   R500_INST_RGB_WMASK_G |
1849						   R500_INST_RGB_WMASK_B |
1850						   R500_INST_ALPHA_WMASK |
1851						   R500_INST_RGB_CLAMP |
1852						   R500_INST_ALPHA_CLAMP));
1853
1854	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1855						   R500_TEX_INST_LD |
1856						   R500_TEX_SEM_ACQUIRE |
1857						   R500_TEX_IGNORE_UNCOVERED));
1858
1859	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1860						   R500_TEX_SRC_S_SWIZ_R |
1861						   R500_TEX_SRC_T_SWIZ_G |
1862						   R500_TEX_DST_ADDR(0) |
1863						   R500_TEX_DST_R_SWIZ_R |
1864						   R500_TEX_DST_G_SWIZ_G |
1865						   R500_TEX_DST_B_SWIZ_B |
1866						   R500_TEX_DST_A_SWIZ_A));
1867	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1868						   R500_DX_S_SWIZ_R |
1869						   R500_DX_T_SWIZ_R |
1870						   R500_DX_R_SWIZ_R |
1871						   R500_DX_Q_SWIZ_R |
1872						   R500_DY_ADDR(0) |
1873						   R500_DY_S_SWIZ_R |
1874						   R500_DY_T_SWIZ_R |
1875						   R500_DY_R_SWIZ_R |
1876						   R500_DY_Q_SWIZ_R));
1877	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1878	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1879	}
1880
1881	/* ALU inst */
1882	/* *_OMASK* - output component write mask */
1883	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
1884					       R500_INST_TEX_SEM_WAIT |
1885					       R500_INST_LAST |
1886					       R500_INST_RGB_OMASK_R |
1887					       R500_INST_RGB_OMASK_G |
1888					       R500_INST_RGB_OMASK_B |
1889					       R500_INST_ALPHA_OMASK |
1890					       R500_INST_RGB_CLAMP |
1891					       R500_INST_ALPHA_CLAMP));
1892	/* ALU inst
1893	 * temp addresses for texture inputs
1894	 * RGB_ADDR0 is src tex (temp 0)
1895	 * RGB_ADDR1 is mask tex (temp 1)
1896	 */
1897	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
1898					       R500_RGB_ADDR1(1) |
1899					       R500_RGB_ADDR2(0)));
1900	/* ALU inst
1901	 * temp addresses for texture inputs
1902	 * ALPHA_ADDR0 is src tex (temp 0)
1903	 * ALPHA_ADDR1 is mask tex (temp 1)
1904	 */
1905	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
1906					       R500_ALPHA_ADDR1(1) |
1907					       R500_ALPHA_ADDR2(0)));
1908
1909	/* R500_ALU_RGB_TARGET - RGB render target */
1910	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
1911					       src_color |
1912					       R500_ALU_RGB_SEL_B_SRC1 |
1913					       mask_color |
1914					       R500_ALU_RGB_TARGET(0)));
1915
1916	/* R500_ALPHA_RGB_TARGET - alpha render target */
1917	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
1918					       R500_ALPHA_ADDRD(0) |
1919					       R500_ALPHA_SEL_A_SRC0 |
1920					       src_alpha |
1921					       R500_ALPHA_SEL_B_SRC1 |
1922					       mask_alpha |
1923					       R500_ALPHA_TARGET(0)));
1924
1925	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
1926					       R500_ALU_RGBA_ADDRD(0) |
1927					       R500_ALU_RGBA_R_SWIZ_0 |
1928					       R500_ALU_RGBA_G_SWIZ_0 |
1929					       R500_ALU_RGBA_B_SWIZ_0 |
1930					       R500_ALU_RGBA_A_SWIZ_0));
1931	FINISH_ACCEL();
1932    }
1933
1934    /* Clear out scissoring */
1935    BEGIN_ACCEL(2);
1936    OUT_ACCEL_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
1937				     (0 << R300_SCISSOR_Y_SHIFT)));
1938    OUT_ACCEL_REG(R300_SC_SCISSOR1, ((8191 << R300_SCISSOR_X_SHIFT) |
1939				     (8191 << R300_SCISSOR_Y_SHIFT)));
1940    FINISH_ACCEL();
1941
1942    BEGIN_ACCEL(3);
1943
1944    OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
1945    OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
1946
1947    blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1948    OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
1949
1950    FINISH_ACCEL();
1951
1952    BEGIN_ACCEL(1);
1953    if (info->accel_state->has_mask)
1954	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 6);
1955    else
1956	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, 4);
1957    FINISH_ACCEL();
1958
1959    return TRUE;
1960}
1961
1962
1963#ifdef ACCEL_CP
1964
1965#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
1966do {								\
1967    OUT_RING_F(_dstX);						\
1968    OUT_RING_F(_dstY);						\
1969    OUT_RING_F(_srcX);						\
1970    OUT_RING_F(_srcY);						\
1971    OUT_RING_F(_maskX);						\
1972    OUT_RING_F(_maskY);						\
1973} while (0)
1974
1975#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
1976do {								\
1977    OUT_RING_F(_dstX);						\
1978    OUT_RING_F(_dstY);						\
1979    OUT_RING_F(_srcX);						\
1980    OUT_RING_F(_srcY);						\
1981} while (0)
1982
1983#else /* ACCEL_CP */
1984
1985#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
1986do {								\
1987    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
1988    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
1989    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
1990    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
1991    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskX);		\
1992    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _maskY);		\
1993} while (0)
1994
1995#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
1996do {								\
1997    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX);		\
1998    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY);		\
1999    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX);		\
2000    OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY);		\
2001} while (0)
2002
2003#endif /* !ACCEL_CP */
2004
2005#ifdef ONLY_ONCE
2006static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2007{
2008    PictVector v;
2009    v.vector[0] = point->x;
2010    v.vector[1] = point->y;
2011    v.vector[2] = xFixed1;
2012    PictureTransformPoint(transform, &v);
2013    point->x = v.vector[0];
2014    point->y = v.vector[1];
2015}
2016#endif
2017
2018static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
2019					   RADEONInfoPtr info,
2020					   PixmapPtr pDst,
2021					   int srcX, int srcY,
2022					   int maskX, int maskY,
2023					   int dstX, int dstY,
2024					   int w, int h)
2025{
2026    int vtx_count;
2027    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2028    static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2029    ACCEL_PREAMBLE();
2030
2031    ENTER_DRAW(0);
2032
2033    /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2034       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2035
2036    srcTopLeft.x     = IntToxFixed(srcX);
2037    srcTopLeft.y     = IntToxFixed(srcY);
2038    srcTopRight.x    = IntToxFixed(srcX + w);
2039    srcTopRight.y    = IntToxFixed(srcY);
2040    srcBottomLeft.x  = IntToxFixed(srcX);
2041    srcBottomLeft.y  = IntToxFixed(srcY + h);
2042    srcBottomRight.x = IntToxFixed(srcX + w);
2043    srcBottomRight.y = IntToxFixed(srcY + h);
2044
2045    if (info->accel_state->is_transform[0]) {
2046	transformPoint(info->accel_state->transform[0], &srcTopLeft);
2047	transformPoint(info->accel_state->transform[0], &srcTopRight);
2048	transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2049	transformPoint(info->accel_state->transform[0], &srcBottomRight);
2050    }
2051
2052    if (info->accel_state->has_mask) {
2053	maskTopLeft.x     = IntToxFixed(maskX);
2054	maskTopLeft.y     = IntToxFixed(maskY);
2055	maskTopRight.x    = IntToxFixed(maskX + w);
2056	maskTopRight.y    = IntToxFixed(maskY);
2057	maskBottomLeft.x  = IntToxFixed(maskX);
2058	maskBottomLeft.y  = IntToxFixed(maskY + h);
2059	maskBottomRight.x = IntToxFixed(maskX + w);
2060	maskBottomRight.y = IntToxFixed(maskY + h);
2061
2062	if (info->accel_state->is_transform[1]) {
2063	    transformPoint(info->accel_state->transform[1], &maskTopLeft);
2064	    transformPoint(info->accel_state->transform[1], &maskTopRight);
2065	    transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2066	    transformPoint(info->accel_state->transform[1], &maskBottomRight);
2067	}
2068
2069	vtx_count = 6;
2070    } else
2071	vtx_count = 4;
2072
2073    if (info->accel_state->vsync)
2074	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h);
2075
2076#ifdef ACCEL_CP
2077    if (info->ChipFamily < CHIP_FAMILY_R200) {
2078	BEGIN_RING(3 * vtx_count + 3);
2079	OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2080			    3 * vtx_count + 1));
2081	if (info->accel_state->has_mask)
2082	    OUT_RING(RADEON_CP_VC_FRMT_XY |
2083		     RADEON_CP_VC_FRMT_ST0 |
2084		     RADEON_CP_VC_FRMT_ST1);
2085	else
2086	    OUT_RING(RADEON_CP_VC_FRMT_XY |
2087		     RADEON_CP_VC_FRMT_ST0);
2088	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2089		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2090		 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2091		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2092		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2093    } else if (IS_R300_3D || IS_R500_3D) {
2094	BEGIN_RING(4 * vtx_count + 4);
2095	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2096			    4 * vtx_count));
2097	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2098		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2099		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2100    } else {
2101	BEGIN_RING(3 * vtx_count + 2);
2102	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2103			    3 * vtx_count));
2104	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2105		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2106		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2107    }
2108
2109#else /* ACCEL_CP */
2110    if (IS_R300_3D || IS_R500_3D)
2111	BEGIN_ACCEL(2 + vtx_count * 4);
2112    else
2113	BEGIN_ACCEL(1 + vtx_count * 3);
2114
2115    if (info->ChipFamily < CHIP_FAMILY_R200)
2116	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2117					  RADEON_VF_PRIM_WALK_DATA |
2118					  RADEON_VF_RADEON_MODE |
2119					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2120    else if (IS_R300_3D || IS_R500_3D)
2121	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
2122					  RADEON_VF_PRIM_WALK_DATA |
2123					  (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
2124    else
2125	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
2126					  RADEON_VF_PRIM_WALK_DATA |
2127					  (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
2128
2129#endif
2130
2131    if (info->accel_state->has_mask) {
2132	if (IS_R300_3D || IS_R500_3D) {
2133	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2134			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2135			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2136	}
2137	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2138		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2139		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2140	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2141		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2142		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2143	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2144		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2145		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2146    } else {
2147	if (IS_R300_3D || IS_R500_3D) {
2148	    VTX_OUT((float)dstX,                                      (float)dstY,
2149		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2150	}
2151	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2152		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2153	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2154		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2155	VTX_OUT((float)(dstX + w),                                (float)dstY,
2156		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2157    }
2158
2159    if (IS_R300_3D || IS_R500_3D)
2160	/* flushing is pipelined, free/finish is not */
2161	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
2162
2163#ifdef ACCEL_CP
2164    ADVANCE_RING();
2165#else
2166    FINISH_ACCEL();
2167#endif /* !ACCEL_CP */
2168
2169    LEAVE_DRAW(0);
2170}
2171#undef VTX_OUT
2172#undef VTX_OUT_MASK
2173
2174static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
2175				       int srcX, int srcY,
2176				       int maskX, int maskY,
2177				       int dstX, int dstY,
2178				       int width, int height)
2179{
2180    int tileSrcY, tileMaskY, tileDstY;
2181    int remainingHeight;
2182    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2183
2184    if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2185	FUNC_NAME(RadeonCompositeTile)(pScrn,
2186				       info,
2187				       pDst,
2188				       srcX, srcY,
2189				       maskX, maskY,
2190				       dstX, dstY,
2191				       width, height);
2192	return;
2193    }
2194
2195    /* Tiling logic borrowed from exaFillRegionTiled */
2196
2197    modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2198    tileMaskY = maskY;
2199    tileDstY = dstY;
2200
2201    remainingHeight = height;
2202    while (remainingHeight > 0) {
2203	int remainingWidth = width;
2204	int tileSrcX, tileMaskX, tileDstX;
2205	int h = info->accel_state->src_tile_height - tileSrcY;
2206
2207	if (h > remainingHeight)
2208	    h = remainingHeight;
2209	remainingHeight -= h;
2210
2211	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2212	tileMaskX = maskX;
2213	tileDstX = dstX;
2214
2215	while (remainingWidth > 0) {
2216	    int w = info->accel_state->src_tile_width - tileSrcX;
2217	    if (w > remainingWidth)
2218		w = remainingWidth;
2219	    remainingWidth -= w;
2220
2221	    FUNC_NAME(RadeonCompositeTile)(pScrn,
2222					   info,
2223					   pDst,
2224					   tileSrcX, tileSrcY,
2225					   tileMaskX, tileMaskY,
2226					   tileDstX, tileDstY,
2227					   w, h);
2228
2229	    tileSrcX = 0;
2230	    tileMaskX += w;
2231	    tileDstX += w;
2232	}
2233	tileSrcY = 0;
2234	tileMaskY += h;
2235	tileDstY += h;
2236    }
2237}
2238
2239static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
2240{
2241    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2242    ACCEL_PREAMBLE();
2243
2244    ENTER_DRAW(0);
2245
2246    if (IS_R300_3D || IS_R500_3D) {
2247	BEGIN_ACCEL(3);
2248	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
2249	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2250    } else
2251	BEGIN_ACCEL(1);
2252    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2253    FINISH_ACCEL();
2254
2255    LEAVE_DRAW(0);
2256}
2257
2258#undef ONLY_ONCE
2259#undef FUNC_NAME
2260