radeon_render.c revision 209ff23f
1/*
2 * Copyright 2004 Eric Anholt
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Eric Anholt <anholt@FreeBSD.org>
26 *    Hui Yu <hyu@ati.com>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <string.h>
35
36#ifdef USE_XAA
37
38#include "dixstruct.h"
39
40#include "xaa.h"
41#include "xaalocal.h"
42
43#ifndef RENDER_GENERIC_HELPER
44#define RENDER_GENERIC_HELPER
45
46struct blendinfo {
47	Bool dst_alpha;
48	Bool src_alpha;
49	uint32_t blend_cntl;
50};
51
52/* The first part of blend_cntl corresponds to Fa from the render "protocol"
53 * document, and the second part to Fb.
54 */
55static const struct blendinfo RadeonBlendOp[] = {
56    /* Clear */
57    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
58	   RADEON_DST_BLEND_GL_ZERO},
59    /* Src */
60    {0, 0, RADEON_SRC_BLEND_GL_ONE |
61	   RADEON_DST_BLEND_GL_ZERO},
62    /* Dst */
63    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
64	   RADEON_DST_BLEND_GL_ONE},
65    /* Over */
66    {0, 1, RADEON_SRC_BLEND_GL_ONE |
67	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
68    /* OverReverse */
69    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
70	   RADEON_DST_BLEND_GL_ONE},
71    /* In */
72    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA |
73	   RADEON_DST_BLEND_GL_ZERO},
74    /* InReverse */
75    {0, 1, RADEON_SRC_BLEND_GL_ZERO |
76	   RADEON_DST_BLEND_GL_SRC_ALPHA},
77    /* Out */
78    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
79	   RADEON_DST_BLEND_GL_ZERO},
80    /* OutReverse */
81    {0, 1, RADEON_SRC_BLEND_GL_ZERO |
82	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
83    /* Atop */
84    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA |
85	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
86    /* AtopReverse */
87    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
88	   RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
91	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
92    /* Add */
93    {0, 0, RADEON_SRC_BLEND_GL_ONE |
94	   RADEON_DST_BLEND_GL_ONE},
95    /* Saturate */
96    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
97	   RADEON_DST_BLEND_GL_ONE},
98    {0, 0, 0},
99    {0, 0, 0},
100    /* DisjointClear */
101    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
102	   RADEON_DST_BLEND_GL_ZERO},
103    /* DisjointSrc */
104    {0, 0, RADEON_SRC_BLEND_GL_ONE |
105	   RADEON_DST_BLEND_GL_ZERO},
106    /* DisjointDst */
107    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
108	   RADEON_DST_BLEND_GL_ONE},
109    /* DisjointOver unsupported */
110    {0, 0, 0},
111    /* DisjointOverReverse */
112    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
113	   RADEON_DST_BLEND_GL_ONE},
114    /* DisjointIn unsupported */
115    {0, 0, 0},
116    /* DisjointInReverse unsupported */
117    {0, 0, 0},
118    /* DisjointOut unsupported */
119    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
120	   RADEON_DST_BLEND_GL_ZERO},
121    /* DisjointOutReverse unsupported */
122    {0, 0, 0},
123    /* DisjointAtop unsupported */
124    {0, 0, 0},
125    /* DisjointAtopReverse unsupported */
126    {0, 0, 0},
127    /* DisjointXor unsupported */
128    {0, 0, 0},
129    {0, 0, 0},
130    {0, 0, 0},
131    {0, 0, 0},
132    {0, 0, 0},
133    /* ConjointClear */
134    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
135	   RADEON_DST_BLEND_GL_ZERO},
136    /* ConjointSrc */
137    {0, 0, RADEON_SRC_BLEND_GL_ONE |
138	   RADEON_DST_BLEND_GL_ZERO},
139    /* ConjointDst */
140    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
141	   RADEON_DST_BLEND_GL_ONE},
142};
143#define RadeonOpMax (sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
144
145/* Note on texture formats:
146 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
147 * The RADEON and R200 TXFORMATS we use are the same on r100/r200.
148 */
149
150static CARD32 RADEONTextureFormats[] = {
151    PICT_a8r8g8b8,
152    PICT_a8,
153    PICT_x8r8g8b8,
154    PICT_r5g6b5,
155    PICT_a1r5g5b5,
156    PICT_x1r5g5b5,
157    0
158};
159
160static CARD32 RADEONDstFormats[] = {
161    PICT_a8r8g8b8,
162    PICT_x8r8g8b8,
163    PICT_r5g6b5,
164    PICT_a1r5g5b5,
165    PICT_x1r5g5b5,
166    0
167};
168
169static uint32_t
170RadeonGetTextureFormat(uint32_t format)
171{
172    switch (format) {
173    case PICT_a8r8g8b8:
174	return RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
175    case PICT_a8:
176	return RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
177    case PICT_x8r8g8b8:
178	return RADEON_TXFORMAT_ARGB8888;
179    case PICT_r5g6b5:
180	return RADEON_TXFORMAT_RGB565;
181    case PICT_a1r5g5b5:
182	return RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
183    case PICT_x1r5g5b5:
184	return RADEON_TXFORMAT_ARGB1555;
185    default:
186	return 0;
187    }
188}
189
190static uint32_t
191RadeonGetColorFormat(uint32_t format)
192{
193    switch (format) {
194    case PICT_a8r8g8b8:
195    case PICT_x8r8g8b8:
196	return RADEON_COLOR_FORMAT_ARGB8888;
197    case PICT_r5g6b5:
198	return RADEON_COLOR_FORMAT_RGB565;
199    case PICT_a1r5g5b5:
200    case PICT_x1r5g5b5:
201	return RADEON_COLOR_FORMAT_ARGB1555;
202    default:
203	return 0;
204    }
205}
206
207/* Returns a RADEON_RB3D_BLENDCNTL value, or 0 if the operation is not
208 * supported
209 */
210static uint32_t
211RadeonGetBlendCntl(uint8_t op, uint32_t dstFormat)
212{
213    uint32_t blend_cntl;
214
215    if (op >= RadeonOpMax || RadeonBlendOp[op].blend_cntl == 0)
216	return 0;
217
218    blend_cntl = RadeonBlendOp[op].blend_cntl;
219
220    if (RadeonBlendOp[op].dst_alpha && !PICT_FORMAT_A(dstFormat)) {
221	uint32_t srcblend = blend_cntl & RADEON_SRC_BLEND_MASK;
222
223	/* If there's no destination alpha channel, we need to wire the blending
224	 * to treat the alpha channel as always 1.
225	 */
226	if (srcblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA ||
227	    srcblend == RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE)
228	    blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
229			 RADEON_SRC_BLEND_GL_ZERO;
230	else if (srcblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
231	    blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
232			 RADEON_SRC_BLEND_GL_ONE;
233    }
234
235    return blend_cntl;
236}
237
238static __inline__ uint32_t F_TO_DW(float val)
239{
240    union {
241	float f;
242	uint32_t l;
243    } tmp;
244    tmp.f = val;
245    return tmp.l;
246}
247
248/* Compute log base 2 of val. */
249static __inline__ int
250ATILog2(int val)
251{
252	int bits;
253#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
254	__asm volatile("bsrl	%1, %0"
255		: "=r" (bits)
256		: "c" (val)
257	);
258	return bits;
259#else
260	for (bits = 0; val != 0; val >>= 1, ++bits)
261		;
262	return bits - 1;
263#endif
264}
265
266static void
267RemoveLinear (FBLinearPtr linear)
268{
269   RADEONInfoPtr info = (RADEONInfoPtr)(linear->devPrivate.ptr);
270
271   info->RenderTex = NULL;
272}
273
274static void
275RenderCallback (ScrnInfoPtr pScrn)
276{
277    RADEONInfoPtr  info       = RADEONPTR(pScrn);
278
279    if ((currentTime.milliseconds > info->RenderTimeout) && info->RenderTex) {
280	xf86FreeOffscreenLinear(info->RenderTex);
281	info->RenderTex = NULL;
282    }
283
284    if (!info->RenderTex)
285	info->RenderCallback = NULL;
286}
287
288static Bool
289AllocateLinear (
290   ScrnInfoPtr pScrn,
291   int sizeNeeded
292){
293   RADEONInfoPtr  info       = RADEONPTR(pScrn);
294   int cpp = info->CurrentLayout.bitsPerPixel / 8;
295
296   info->RenderTimeout = currentTime.milliseconds + 30000;
297   info->RenderCallback = RenderCallback;
298
299   /* XAA allocates in units of pixels at the screen bpp, so adjust size
300    * appropriately.
301    */
302   sizeNeeded = (sizeNeeded + cpp - 1) / cpp;
303
304   if (info->RenderTex) {
305	if (info->RenderTex->size >= sizeNeeded)
306	   return TRUE;
307	else {
308	   if (xf86ResizeOffscreenLinear(info->RenderTex, sizeNeeded))
309		return TRUE;
310
311	   xf86FreeOffscreenLinear(info->RenderTex);
312	   info->RenderTex = NULL;
313	}
314   }
315
316   info->RenderTex = xf86AllocateOffscreenLinear(pScrn->pScreen, sizeNeeded, 32,
317						 NULL, RemoveLinear, info);
318
319   return (info->RenderTex != NULL);
320}
321
322#if X_BYTE_ORDER == X_BIG_ENDIAN
323static Bool RADEONSetupRenderByteswap(ScrnInfoPtr pScrn, int tex_bytepp)
324{
325    RADEONInfoPtr info = RADEONPTR(pScrn);
326    unsigned char *RADEONMMIO = info->MMIO;
327    uint32_t swapper = info->ModeReg->surface_cntl;
328
329    swapper &= ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP |
330		 RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP);
331
332    /* Set up byte swapping for the framebuffer aperture as needed */
333    switch (tex_bytepp) {
334    case 1:
335	break;
336    case 2:
337	swapper |= RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP;
338	break;
339    case 4:
340	swapper |= RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP;
341	break;
342    default:
343	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: Don't know what to do for "
344		   "tex_bytepp == %d!\n", __func__, tex_bytepp);
345	return FALSE;
346    }
347    OUTREG(RADEON_SURFACE_CNTL, swapper);
348    return TRUE;
349}
350
351static void RADEONRestoreByteswap(RADEONInfoPtr info)
352{
353    unsigned char *RADEONMMIO = info->MMIO;
354
355    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
356}
357#endif	/* X_BYTE_ORDER == X_BIG_ENDIAN */
358
359#endif	/* RENDER_GENERIC_HELPER */
360
361#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
362#error Cannot define both MMIO and CP acceleration!
363#endif
364
365#if !defined(UNIXCPP) || defined(ANSICPP)
366#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
367#else
368#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
369#endif
370
371#ifdef ACCEL_MMIO
372#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
373#else
374#ifdef ACCEL_CP
375#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
376#else
377#error No accel type defined!
378#endif
379#endif
380
381static Bool FUNC_NAME(R100SetupTexture)(
382	ScrnInfoPtr pScrn,
383	uint32_t format,
384	uint8_t *src,
385	int src_pitch,
386	unsigned int width,
387	unsigned int height,
388	int flags)
389{
390    RADEONInfoPtr info = RADEONPTR(pScrn);
391    uint8_t *dst;
392    uint32_t tex_size = 0, txformat;
393    int dst_pitch, offset, size, tex_bytepp;
394#ifdef ACCEL_CP
395    uint32_t buf_pitch, dst_pitch_off;
396    int x, y;
397    unsigned int hpass;
398    uint8_t *tmp_dst;
399#endif
400    ACCEL_PREAMBLE();
401
402    if ((width > 2047) || (height > 2047))
403	return FALSE;
404
405    txformat = RadeonGetTextureFormat(format);
406    tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
407
408    dst_pitch = (width * tex_bytepp + 63) & ~63;
409    size = dst_pitch * height;
410
411    if ((flags & XAA_RENDER_REPEAT) && (height != 1) &&
412	(((width * tex_bytepp + 31) & ~31) != dst_pitch))
413	return FALSE;
414
415#ifndef ACCEL_CP
416
417#if X_BYTE_ORDER == X_BIG_ENDIAN
418    if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
419	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
420		   "failed!\n", __func__);
421	return FALSE;
422    }
423#endif
424
425#endif
426
427    if (!AllocateLinear(pScrn, size))
428	return FALSE;
429
430    if (flags & XAA_RENDER_REPEAT) {
431	txformat |= ATILog2(width) << RADEON_TXFORMAT_WIDTH_SHIFT;
432	txformat |= ATILog2(height) << RADEON_TXFORMAT_HEIGHT_SHIFT;
433    } else {
434	tex_size = (height << 16) | width;
435	txformat |= RADEON_TXFORMAT_NON_POWER2;
436    }
437
438    offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
439    dst = (uint8_t*)(info->FB + offset);
440
441    /* Upload texture to card. */
442
443#ifdef ACCEL_CP
444
445    RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
446
447    while ( height )
448    {
449    	tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
450				      dst_pitch_off, &buf_pitch,
451				      x, &y, &height, &hpass );
452	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
453				    hpass, buf_pitch, src_pitch );
454	src += hpass * src_pitch;
455    }
456
457    RADEON_PURGE_CACHE();
458    RADEON_WAIT_UNTIL_IDLE();
459
460#else
461
462    if (info->accel->NeedToSync)
463	info->accel->Sync(pScrn);
464
465    while (height--) {
466	memcpy(dst, src, width * tex_bytepp);
467	src += src_pitch;
468	dst += dst_pitch;
469    }
470
471#if X_BYTE_ORDER == X_BIG_ENDIAN
472    RADEONRestoreByteswap(info);
473#endif
474
475#endif	/* ACCEL_CP */
476
477    BEGIN_ACCEL(5);
478    OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
479    OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, tex_size);
480    OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, dst_pitch - 32);
481    OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, offset + info->fbLocation +
482					pScrn->fbOffset);
483    OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
484					RADEON_MIN_FILTER_LINEAR |
485					RADEON_CLAMP_S_WRAP |
486					RADEON_CLAMP_T_WRAP);
487    FINISH_ACCEL();
488
489    return TRUE;
490}
491
492static Bool
493FUNC_NAME(R100SetupForCPUToScreenAlphaTexture) (
494	ScrnInfoPtr	pScrn,
495	int		op,
496	CARD16		red,
497	CARD16		green,
498	CARD16		blue,
499	CARD16		alpha,
500	CARD32		maskFormat,
501	CARD32		dstFormat,
502	CARD8		*alphaPtr,
503	int		alphaPitch,
504	int		width,
505	int		height,
506	int		flags
507)
508{
509    RADEONInfoPtr info = RADEONPTR(pScrn);
510    uint32_t colorformat, srccolor, blend_cntl;
511    ACCEL_PREAMBLE();
512
513    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
514    if (blend_cntl == 0)
515	return FALSE;
516
517    if (!info->XInited3D)
518	RADEONInit3DEngine(pScrn);
519
520    if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
521				     width, height, flags))
522	return FALSE;
523
524    colorformat = RadeonGetColorFormat(dstFormat);
525
526    srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
527	(green & 0xff00);
528
529    BEGIN_ACCEL(7);
530    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
531    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
532				  RADEON_TEX_BLEND_0_ENABLE);
533    OUT_ACCEL_REG(RADEON_PP_TFACTOR_0, srccolor);
534    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_A_TFACTOR_COLOR |
535					RADEON_COLOR_ARG_B_T0_ALPHA);
536    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_A_TFACTOR_ALPHA |
537					RADEON_ALPHA_ARG_B_T0_ALPHA);
538    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
539				     RADEON_SE_VTX_FMT_ST0);
540    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
541    FINISH_ACCEL();
542
543    return TRUE;
544}
545
546
547static Bool
548FUNC_NAME(R100SetupForCPUToScreenTexture) (
549	ScrnInfoPtr	pScrn,
550	int		op,
551	CARD32		srcFormat,
552	CARD32		dstFormat,
553	CARD8		*texPtr,
554	int		texPitch,
555	int		width,
556	int		height,
557	int		flags
558)
559{
560    RADEONInfoPtr info = RADEONPTR(pScrn);
561    uint32_t colorformat, blend_cntl;
562    ACCEL_PREAMBLE();
563
564    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
565    if (blend_cntl == 0)
566	return FALSE;
567
568    if (!info->XInited3D)
569	RADEONInit3DEngine(pScrn);
570
571    if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
572				     height, flags))
573	return FALSE;
574
575    colorformat = RadeonGetColorFormat(dstFormat);
576
577    BEGIN_ACCEL(6);
578    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
579    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
580				  RADEON_TEX_BLEND_0_ENABLE);
581    if (srcFormat != PICT_a8)
582	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_T0_COLOR);
583    else
584	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_ZERO);
585    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_C_T0_ALPHA);
586    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
587				     RADEON_SE_VTX_FMT_ST0);
588    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
589    FINISH_ACCEL();
590
591    return TRUE;
592}
593
594
595static void
596FUNC_NAME(R100SubsequentCPUToScreenTexture) (
597	ScrnInfoPtr	pScrn,
598	int		dstx,
599	int		dsty,
600	int		srcx,
601	int		srcy,
602	int		width,
603	int		height
604)
605{
606    RADEONInfoPtr  info       = RADEONPTR(pScrn);
607    int byteshift;
608    uint32_t fboffset;
609    float l, t, r, b, fl, fr, ft, fb;
610
611    ACCEL_PREAMBLE();
612
613    /* Note: we can't simply set up the 3D surface at the same location as the
614     * front buffer, because the 2048x2048 limit on coordinates may be smaller
615     * than the (MergedFB) screen.
616     * Can't use arbitrary offsets for color tiling
617     */
618    if (info->tilingEnabled) {
619       /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
620       fboffset = info->fbLocation + pScrn->fbOffset +
621          (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
622       l = dstx;
623       t = (dsty % 16);
624    }
625    else {
626       byteshift = (pScrn->bitsPerPixel >> 4);
627       fboffset = (info->fbLocation + pScrn->fbOffset +
628		((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
629       l = ((dstx << byteshift) % 16) >> byteshift;
630       t = 0.0;
631    }
632
633    r = width + l;
634    b = height + t;
635    fl = srcx;
636    fr = srcx + width;
637    ft = srcy;
638    fb = srcy + height;
639
640#ifdef ACCEL_CP
641    BEGIN_RING(25);
642
643    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
644	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
645    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
646    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 17));
647    /* RADEON_SE_VTX_FMT */
648    OUT_RING(RADEON_CP_VC_FRMT_XY |
649	     RADEON_CP_VC_FRMT_ST0);
650    /* SE_VF_CNTL */
651    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
652	     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
653	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
654	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
655	     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
656
657    OUT_RING(F_TO_DW(l));
658    OUT_RING(F_TO_DW(t));
659    OUT_RING(F_TO_DW(fl));
660    OUT_RING(F_TO_DW(ft));
661
662    OUT_RING(F_TO_DW(r));
663    OUT_RING(F_TO_DW(t));
664    OUT_RING(F_TO_DW(fr));
665    OUT_RING(F_TO_DW(ft));
666
667    OUT_RING(F_TO_DW(r));
668    OUT_RING(F_TO_DW(b));
669    OUT_RING(F_TO_DW(fr));
670    OUT_RING(F_TO_DW(fb));
671
672    OUT_RING(F_TO_DW(l));
673    OUT_RING(F_TO_DW(b));
674    OUT_RING(F_TO_DW(fl));
675    OUT_RING(F_TO_DW(fb));
676
677    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
678
679    ADVANCE_RING();
680#else
681    BEGIN_ACCEL(20);
682
683    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
684	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
685    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
686
687    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
688				     RADEON_VF_PRIM_WALK_DATA |
689				     RADEON_VF_RADEON_MODE |
690				     (4 << RADEON_VF_NUM_VERTICES_SHIFT));
691
692    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
693    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
694    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
695    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
696
697    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
698    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
699    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
700    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
701
702    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
703    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
704    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
705    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
706
707    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
708    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
709    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
710    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
711
712    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
713    FINISH_ACCEL();
714#endif
715
716}
717
718static Bool FUNC_NAME(R200SetupTexture)(
719	ScrnInfoPtr pScrn,
720	uint32_t format,
721	uint8_t *src,
722	int src_pitch,
723	unsigned int width,
724	unsigned int height,
725	int flags)
726{
727    RADEONInfoPtr info = RADEONPTR(pScrn);
728    uint8_t *dst;
729    uint32_t tex_size = 0, txformat;
730    int dst_pitch, offset, size, tex_bytepp;
731#ifdef ACCEL_CP
732    uint32_t buf_pitch, dst_pitch_off;
733    int x, y;
734    unsigned int hpass;
735    uint8_t *tmp_dst;
736#endif
737    ACCEL_PREAMBLE();
738
739    if ((width > 2048) || (height > 2048))
740	return FALSE;
741
742    txformat = RadeonGetTextureFormat(format);
743    tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
744
745    dst_pitch = (width * tex_bytepp + 63) & ~63;
746    size = dst_pitch * height;
747
748    if ((flags & XAA_RENDER_REPEAT) && (height != 1) &&
749	(((width * tex_bytepp + 31) & ~31) != dst_pitch))
750	return FALSE;
751
752#ifndef ACCEL_CP
753
754#if X_BYTE_ORDER == X_BIG_ENDIAN
755    if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
756	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
757		   "failed!\n", __func__);
758	return FALSE;
759    }
760#endif
761
762#endif
763
764    if (!AllocateLinear(pScrn, size))
765	return FALSE;
766
767    if (flags & XAA_RENDER_REPEAT) {
768	txformat |= ATILog2(width) << R200_TXFORMAT_WIDTH_SHIFT;
769	txformat |= ATILog2(height) << R200_TXFORMAT_HEIGHT_SHIFT;
770    } else {
771	tex_size = ((height - 1) << 16) | (width - 1);
772	txformat |= RADEON_TXFORMAT_NON_POWER2;
773    }
774
775    info->texW[0] = width;
776    info->texH[0] = height;
777
778    offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
779    dst = (uint8_t*)(info->FB + offset);
780
781    /* Upload texture to card. */
782
783#ifdef ACCEL_CP
784
785    RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
786
787    while ( height )
788    {
789        tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
790				      dst_pitch_off, &buf_pitch,
791				      x, &y, &height, &hpass );
792	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
793				    hpass, buf_pitch, src_pitch );
794	src += hpass * src_pitch;
795    }
796
797    RADEON_PURGE_CACHE();
798    RADEON_WAIT_UNTIL_IDLE();
799
800#else
801
802    if (info->accel->NeedToSync)
803	info->accel->Sync(pScrn);
804
805    while (height--) {
806	memcpy(dst, src, width * tex_bytepp);
807	src += src_pitch;
808	dst += dst_pitch;
809    }
810
811#if X_BYTE_ORDER == X_BIG_ENDIAN
812    RADEONRestoreByteswap(info);
813#endif
814
815#endif	/* ACCEL_CP */
816
817    BEGIN_ACCEL(6);
818    OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
819    OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
820    OUT_ACCEL_REG(R200_PP_TXSIZE_0, tex_size);
821    OUT_ACCEL_REG(R200_PP_TXPITCH_0, dst_pitch - 32);
822    OUT_ACCEL_REG(R200_PP_TXOFFSET_0, offset + info->fbLocation +
823				      pScrn->fbOffset);
824    OUT_ACCEL_REG(R200_PP_TXFILTER_0, R200_MAG_FILTER_NEAREST |
825				      R200_MIN_FILTER_NEAREST |
826				      R200_CLAMP_S_WRAP |
827				      R200_CLAMP_T_WRAP);
828    FINISH_ACCEL();
829
830    return TRUE;
831}
832
833static Bool
834FUNC_NAME(R200SetupForCPUToScreenAlphaTexture) (
835	ScrnInfoPtr	pScrn,
836	int		op,
837	CARD16		red,
838	CARD16		green,
839	CARD16		blue,
840	CARD16		alpha,
841	CARD32		maskFormat,
842	CARD32		dstFormat,
843	CARD8		*alphaPtr,
844	int		alphaPitch,
845	int		width,
846	int		height,
847	int		flags
848)
849{
850    RADEONInfoPtr  info = RADEONPTR(pScrn);
851    uint32_t colorformat, srccolor, blend_cntl;
852    ACCEL_PREAMBLE();
853
854    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
855    if (blend_cntl == 0)
856	return FALSE;
857
858    if (!info->XInited3D)
859	RADEONInit3DEngine(pScrn);
860
861    if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
862				     width, height, flags))
863	return FALSE;
864
865    colorformat = RadeonGetColorFormat(dstFormat);
866
867    srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
868	(green & 0xff00);
869
870    BEGIN_ACCEL(10);
871    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
872    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
873				  RADEON_TEX_BLEND_0_ENABLE);
874    OUT_ACCEL_REG(R200_PP_TFACTOR_0, srccolor);
875    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_A_TFACTOR_COLOR |
876				      R200_TXC_ARG_B_R0_ALPHA);
877    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
878    OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_A_TFACTOR_ALPHA |
879				      R200_TXA_ARG_B_R0_ALPHA);
880    OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
881    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
882    OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
883    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
884    FINISH_ACCEL();
885
886    return TRUE;
887}
888
889static Bool
890FUNC_NAME(R200SetupForCPUToScreenTexture) (
891	ScrnInfoPtr	pScrn,
892	int		op,
893	CARD32		srcFormat,
894	CARD32		dstFormat,
895	CARD8		*texPtr,
896	int		texPitch,
897	int		width,
898	int		height,
899	int		flags
900)
901{
902    RADEONInfoPtr  info       = RADEONPTR(pScrn);
903    uint32_t colorformat, blend_cntl;
904    ACCEL_PREAMBLE();
905
906    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
907    if (blend_cntl == 0)
908	return FALSE;
909
910    if (!info->XInited3D)
911	RADEONInit3DEngine(pScrn);
912
913    if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
914				     height, flags))
915	return FALSE;
916
917    colorformat = RadeonGetColorFormat(dstFormat);
918
919    BEGIN_ACCEL(9);
920    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
921    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
922				  RADEON_TEX_BLEND_0_ENABLE);
923    if (srcFormat != PICT_a8)
924	OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_R0_COLOR);
925    else
926	OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_ZERO);
927    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
928    OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_C_R0_ALPHA);
929    OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
930    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
931    OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
932    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
933    FINISH_ACCEL();
934
935    return TRUE;
936}
937
938static void
939FUNC_NAME(R200SubsequentCPUToScreenTexture) (
940	ScrnInfoPtr	pScrn,
941	int		dstx,
942	int		dsty,
943	int		srcx,
944	int		srcy,
945	int		width,
946	int		height
947)
948{
949    RADEONInfoPtr  info       = RADEONPTR(pScrn);
950    int byteshift;
951    uint32_t fboffset;
952    float l, t, r, b, fl, fr, ft, fb;
953    ACCEL_PREAMBLE();
954
955    /* Note: we can't simply set up the 3D surface at the same location as the
956     * front buffer, because the 2048x2048 limit on coordinates may be smaller
957     * than the (MergedFB) screen.
958     * Can't use arbitrary offsets for color tiling
959     */
960    if (info->tilingEnabled) {
961       /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
962       fboffset = info->fbLocation + pScrn->fbOffset +
963          (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
964       l = dstx;
965       t = (dsty % 16);
966    }
967    else {
968       byteshift = (pScrn->bitsPerPixel >> 4);
969       fboffset = (info->fbLocation + pScrn->fbOffset +
970		((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
971       l = ((dstx << byteshift) % 16) >> byteshift;
972       t = 0.0;
973    }
974
975    r = width + l;
976    b = height + t;
977    fl = (float)srcx / info->texW[0];
978    fr = (float)(srcx + width) / info->texW[0];
979    ft = (float)srcy / info->texH[0];
980    fb = (float)(srcy + height) / info->texH[0];
981
982#ifdef ACCEL_CP
983    BEGIN_RING(24);
984
985    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
986	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
987    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
988
989    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 16));
990    /* RADEON_SE_VF_CNTL */
991    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
992	     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
993	     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
994
995    OUT_RING(F_TO_DW(l));
996    OUT_RING(F_TO_DW(t));
997    OUT_RING(F_TO_DW(fl));
998    OUT_RING(F_TO_DW(ft));
999
1000    OUT_RING(F_TO_DW(r));
1001    OUT_RING(F_TO_DW(t));
1002    OUT_RING(F_TO_DW(fr));
1003    OUT_RING(F_TO_DW(ft));
1004
1005    OUT_RING(F_TO_DW(r));
1006    OUT_RING(F_TO_DW(b));
1007    OUT_RING(F_TO_DW(fr));
1008    OUT_RING(F_TO_DW(fb));
1009
1010    OUT_RING(F_TO_DW(l));
1011    OUT_RING(F_TO_DW(b));
1012    OUT_RING(F_TO_DW(fl));
1013    OUT_RING(F_TO_DW(fb));
1014
1015    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1016
1017    ADVANCE_RING();
1018#else
1019    BEGIN_ACCEL(20);
1020
1021    /* Note: we can't simply setup 3D surface at the same location as the front buffer,
1022       some apps may draw offscreen pictures out of the limitation of radeon 3D surface.
1023    */
1024    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
1025	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
1026    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
1027
1028    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
1029				      RADEON_VF_PRIM_WALK_DATA |
1030				      4 << RADEON_VF_NUM_VERTICES_SHIFT));
1031
1032    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
1033    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
1034    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
1035    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
1036
1037    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
1038    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
1039    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
1040    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
1041
1042    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
1043    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
1044    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
1045    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
1046
1047    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
1048    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
1049    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
1050    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
1051
1052    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1053
1054    FINISH_ACCEL();
1055#endif
1056}
1057
1058#undef FUNC_NAME
1059#endif /* USE_XAA */
1060