radeon_render.c revision b7e1c893
1/*
2 * Copyright 2004 Eric Anholt
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Eric Anholt <anholt@FreeBSD.org>
26 *    Hui Yu <hyu@ati.com>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <string.h>
35
36#ifdef USE_XAA
37
38#include "dixstruct.h"
39
40#include "xaa.h"
41#include "xaalocal.h"
42
43#ifndef RENDER_GENERIC_HELPER
44#define RENDER_GENERIC_HELPER
45
46struct blendinfo {
47	Bool dst_alpha;
48	Bool src_alpha;
49	uint32_t blend_cntl;
50};
51
52/* The first part of blend_cntl corresponds to Fa from the render "protocol"
53 * document, and the second part to Fb.
54 */
55static const struct blendinfo RadeonBlendOp[] = {
56    /* Clear */
57    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
58	   RADEON_DST_BLEND_GL_ZERO},
59    /* Src */
60    {0, 0, RADEON_SRC_BLEND_GL_ONE |
61	   RADEON_DST_BLEND_GL_ZERO},
62    /* Dst */
63    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
64	   RADEON_DST_BLEND_GL_ONE},
65    /* Over */
66    {0, 1, RADEON_SRC_BLEND_GL_ONE |
67	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
68    /* OverReverse */
69    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
70	   RADEON_DST_BLEND_GL_ONE},
71    /* In */
72    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA |
73	   RADEON_DST_BLEND_GL_ZERO},
74    /* InReverse */
75    {0, 1, RADEON_SRC_BLEND_GL_ZERO |
76	   RADEON_DST_BLEND_GL_SRC_ALPHA},
77    /* Out */
78    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
79	   RADEON_DST_BLEND_GL_ZERO},
80    /* OutReverse */
81    {0, 1, RADEON_SRC_BLEND_GL_ZERO |
82	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
83    /* Atop */
84    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA |
85	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
86    /* AtopReverse */
87    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
88	   RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
91	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
92    /* Add */
93    {0, 0, RADEON_SRC_BLEND_GL_ONE |
94	   RADEON_DST_BLEND_GL_ONE},
95    /* Saturate */
96    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
97	   RADEON_DST_BLEND_GL_ONE},
98    {0, 0, 0},
99    {0, 0, 0},
100    /* DisjointClear */
101    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
102	   RADEON_DST_BLEND_GL_ZERO},
103    /* DisjointSrc */
104    {0, 0, RADEON_SRC_BLEND_GL_ONE |
105	   RADEON_DST_BLEND_GL_ZERO},
106    /* DisjointDst */
107    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
108	   RADEON_DST_BLEND_GL_ONE},
109    /* DisjointOver unsupported */
110    {0, 0, 0},
111    /* DisjointOverReverse */
112    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
113	   RADEON_DST_BLEND_GL_ONE},
114    /* DisjointIn unsupported */
115    {0, 0, 0},
116    /* DisjointInReverse unsupported */
117    {0, 0, 0},
118    /* DisjointOut unsupported */
119    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
120	   RADEON_DST_BLEND_GL_ZERO},
121    /* DisjointOutReverse unsupported */
122    {0, 0, 0},
123    /* DisjointAtop unsupported */
124    {0, 0, 0},
125    /* DisjointAtopReverse unsupported */
126    {0, 0, 0},
127    /* DisjointXor unsupported */
128    {0, 0, 0},
129    {0, 0, 0},
130    {0, 0, 0},
131    {0, 0, 0},
132    {0, 0, 0},
133    /* ConjointClear */
134    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
135	   RADEON_DST_BLEND_GL_ZERO},
136    /* ConjointSrc */
137    {0, 0, RADEON_SRC_BLEND_GL_ONE |
138	   RADEON_DST_BLEND_GL_ZERO},
139    /* ConjointDst */
140    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
141	   RADEON_DST_BLEND_GL_ONE},
142};
143#define RadeonOpMax (sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
144
145/* Note on texture formats:
146 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
147 * The RADEON and R200 TXFORMATS we use are the same on r100/r200.
148 */
149
150static CARD32 RADEONTextureFormats[] = {
151    PICT_a8r8g8b8,
152    PICT_a8,
153    PICT_x8r8g8b8,
154    PICT_r5g6b5,
155    PICT_a1r5g5b5,
156    PICT_x1r5g5b5,
157    0
158};
159
160static CARD32 RADEONDstFormats[] = {
161    PICT_a8r8g8b8,
162    PICT_x8r8g8b8,
163    PICT_r5g6b5,
164    PICT_a1r5g5b5,
165    PICT_x1r5g5b5,
166    0
167};
168
169static uint32_t
170RadeonGetTextureFormat(uint32_t format)
171{
172    switch (format) {
173    case PICT_a8r8g8b8:
174	return RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
175    case PICT_a8:
176	return RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
177    case PICT_x8r8g8b8:
178	return RADEON_TXFORMAT_ARGB8888;
179    case PICT_r5g6b5:
180	return RADEON_TXFORMAT_RGB565;
181    case PICT_a1r5g5b5:
182	return RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
183    case PICT_x1r5g5b5:
184	return RADEON_TXFORMAT_ARGB1555;
185    default:
186	return 0;
187    }
188}
189
190static uint32_t
191RadeonGetColorFormat(uint32_t format)
192{
193    switch (format) {
194    case PICT_a8r8g8b8:
195    case PICT_x8r8g8b8:
196	return RADEON_COLOR_FORMAT_ARGB8888;
197    case PICT_r5g6b5:
198	return RADEON_COLOR_FORMAT_RGB565;
199    case PICT_a1r5g5b5:
200    case PICT_x1r5g5b5:
201	return RADEON_COLOR_FORMAT_ARGB1555;
202    default:
203	return 0;
204    }
205}
206
207/* Returns a RADEON_RB3D_BLENDCNTL value, or 0 if the operation is not
208 * supported
209 */
210static uint32_t
211RadeonGetBlendCntl(uint8_t op, uint32_t dstFormat)
212{
213    uint32_t blend_cntl;
214
215    if (op >= RadeonOpMax || RadeonBlendOp[op].blend_cntl == 0)
216	return 0;
217
218    blend_cntl = RadeonBlendOp[op].blend_cntl;
219
220    if (RadeonBlendOp[op].dst_alpha && !PICT_FORMAT_A(dstFormat)) {
221	uint32_t srcblend = blend_cntl & RADEON_SRC_BLEND_MASK;
222
223	/* If there's no destination alpha channel, we need to wire the blending
224	 * to treat the alpha channel as always 1.
225	 */
226	if (srcblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA ||
227	    srcblend == RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE)
228	    blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
229			 RADEON_SRC_BLEND_GL_ZERO;
230	else if (srcblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
231	    blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
232			 RADEON_SRC_BLEND_GL_ONE;
233    }
234
235    return blend_cntl;
236}
237
238static __inline__ uint32_t F_TO_DW(float val)
239{
240    union {
241	float f;
242	uint32_t l;
243    } tmp;
244    tmp.f = val;
245    return tmp.l;
246}
247
248/* Compute log base 2 of val. */
249static __inline__ int
250ATILog2(int val)
251{
252	int bits;
253#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
254	__asm volatile("bsrl	%1, %0"
255		: "=r" (bits)
256		: "c" (val)
257	);
258	return bits;
259#else
260	for (bits = 0; val != 0; val >>= 1, ++bits)
261		;
262	return bits - 1;
263#endif
264}
265
266static void
267RemoveLinear (FBLinearPtr linear)
268{
269   RADEONInfoPtr info = (RADEONInfoPtr)(linear->devPrivate.ptr);
270
271   info->accel_state->RenderTex = NULL;
272}
273
274static void
275RenderCallback (ScrnInfoPtr pScrn)
276{
277    RADEONInfoPtr  info       = RADEONPTR(pScrn);
278
279    if ((currentTime.milliseconds > info->accel_state->RenderTimeout) &&
280	info->accel_state->RenderTex) {
281	xf86FreeOffscreenLinear(info->accel_state->RenderTex);
282	info->accel_state->RenderTex = NULL;
283    }
284
285    if (!info->accel_state->RenderTex)
286	info->accel_state->RenderCallback = NULL;
287}
288
289static Bool
290AllocateLinear (
291   ScrnInfoPtr pScrn,
292   int sizeNeeded
293){
294   RADEONInfoPtr  info       = RADEONPTR(pScrn);
295   int cpp = info->CurrentLayout.bitsPerPixel / 8;
296
297   info->accel_state->RenderTimeout = currentTime.milliseconds + 30000;
298   info->accel_state->RenderCallback = RenderCallback;
299
300   /* XAA allocates in units of pixels at the screen bpp, so adjust size
301    * appropriately.
302    */
303   sizeNeeded = (sizeNeeded + cpp - 1) / cpp;
304
305   if (info->accel_state->RenderTex) {
306	if (info->accel_state->RenderTex->size >= sizeNeeded)
307	   return TRUE;
308	else {
309	   if (xf86ResizeOffscreenLinear(info->accel_state->RenderTex, sizeNeeded))
310		return TRUE;
311
312	   xf86FreeOffscreenLinear(info->accel_state->RenderTex);
313	   info->accel_state->RenderTex = NULL;
314	}
315   }
316
317   info->accel_state->RenderTex = xf86AllocateOffscreenLinear(pScrn->pScreen, sizeNeeded, 32,
318							      NULL, RemoveLinear, info);
319
320   return (info->accel_state->RenderTex != NULL);
321}
322
323#if X_BYTE_ORDER == X_BIG_ENDIAN
324static Bool RADEONSetupRenderByteswap(ScrnInfoPtr pScrn, int tex_bytepp)
325{
326    RADEONInfoPtr info = RADEONPTR(pScrn);
327    unsigned char *RADEONMMIO = info->MMIO;
328    uint32_t swapper = info->ModeReg->surface_cntl;
329
330    swapper &= ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP |
331		 RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP);
332
333    /* Set up byte swapping for the framebuffer aperture as needed */
334    switch (tex_bytepp) {
335    case 1:
336	break;
337    case 2:
338	swapper |= RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP;
339	break;
340    case 4:
341	swapper |= RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP;
342	break;
343    default:
344	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: Don't know what to do for "
345		   "tex_bytepp == %d!\n", __func__, tex_bytepp);
346	return FALSE;
347    }
348    OUTREG(RADEON_SURFACE_CNTL, swapper);
349    return TRUE;
350}
351
352static void RADEONRestoreByteswap(RADEONInfoPtr info)
353{
354    unsigned char *RADEONMMIO = info->MMIO;
355
356    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
357}
358#endif	/* X_BYTE_ORDER == X_BIG_ENDIAN */
359
360#endif	/* RENDER_GENERIC_HELPER */
361
362#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
363#error Cannot define both MMIO and CP acceleration!
364#endif
365
366#if !defined(UNIXCPP) || defined(ANSICPP)
367#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
368#else
369#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
370#endif
371
372#ifdef ACCEL_MMIO
373#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
374#else
375#ifdef ACCEL_CP
376#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
377#else
378#error No accel type defined!
379#endif
380#endif
381
382static Bool FUNC_NAME(R100SetupTexture)(
383	ScrnInfoPtr pScrn,
384	uint32_t format,
385	uint8_t *src,
386	int src_pitch,
387	unsigned int width,
388	unsigned int height,
389	int flags)
390{
391    RADEONInfoPtr info = RADEONPTR(pScrn);
392    uint8_t *dst;
393    uint32_t tex_size = 0, txformat;
394    int dst_pitch, offset, size, tex_bytepp;
395#ifdef ACCEL_CP
396    uint32_t buf_pitch, dst_pitch_off;
397    int x, y;
398    unsigned int hpass;
399    uint8_t *tmp_dst;
400#endif
401    ACCEL_PREAMBLE();
402
403    if ((width > 2047) || (height > 2047))
404	return FALSE;
405
406    txformat = RadeonGetTextureFormat(format);
407    tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
408
409    dst_pitch = (width * tex_bytepp + 63) & ~63;
410    size = dst_pitch * height;
411
412    if ((flags & XAA_RENDER_REPEAT) && (height != 1) &&
413	(((width * tex_bytepp + 31) & ~31) != dst_pitch))
414	return FALSE;
415
416#ifndef ACCEL_CP
417
418#if X_BYTE_ORDER == X_BIG_ENDIAN
419    if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
420	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
421		   "failed!\n", __func__);
422	return FALSE;
423    }
424#endif
425
426#endif
427
428    if (!AllocateLinear(pScrn, size))
429	return FALSE;
430
431    if (flags & XAA_RENDER_REPEAT) {
432	txformat |= ATILog2(width) << RADEON_TXFORMAT_WIDTH_SHIFT;
433	txformat |= ATILog2(height) << RADEON_TXFORMAT_HEIGHT_SHIFT;
434    } else {
435	tex_size = (height << 16) | width;
436	txformat |= RADEON_TXFORMAT_NON_POWER2;
437    }
438
439    offset = info->accel_state->RenderTex->offset * pScrn->bitsPerPixel / 8;
440    dst = (uint8_t*)(info->FB + offset);
441
442    /* Upload texture to card. */
443
444#ifdef ACCEL_CP
445
446    RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
447
448    while ( height )
449    {
450    	tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
451				      dst_pitch_off, &buf_pitch,
452				      x, &y, &height, &hpass );
453	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
454				    hpass, buf_pitch, src_pitch );
455	src += hpass * src_pitch;
456    }
457
458    RADEON_PURGE_CACHE();
459    RADEON_WAIT_UNTIL_IDLE();
460
461#else
462
463    if (info->accel_state->accel->NeedToSync)
464	info->accel_state->accel->Sync(pScrn);
465
466    while (height--) {
467	memcpy(dst, src, width * tex_bytepp);
468	src += src_pitch;
469	dst += dst_pitch;
470    }
471
472#if X_BYTE_ORDER == X_BIG_ENDIAN
473    RADEONRestoreByteswap(info);
474#endif
475
476#endif	/* ACCEL_CP */
477
478    BEGIN_ACCEL(5);
479    OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
480    OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, tex_size);
481    OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, dst_pitch - 32);
482    OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, offset + info->fbLocation +
483					pScrn->fbOffset);
484    OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
485					RADEON_MIN_FILTER_LINEAR |
486					RADEON_CLAMP_S_WRAP |
487					RADEON_CLAMP_T_WRAP);
488    FINISH_ACCEL();
489
490    return TRUE;
491}
492
493static Bool
494FUNC_NAME(R100SetupForCPUToScreenAlphaTexture) (
495	ScrnInfoPtr	pScrn,
496	int		op,
497	CARD16		red,
498	CARD16		green,
499	CARD16		blue,
500	CARD16		alpha,
501	CARD32		maskFormat,
502	CARD32		dstFormat,
503	CARD8		*alphaPtr,
504	int		alphaPitch,
505	int		width,
506	int		height,
507	int		flags
508)
509{
510    RADEONInfoPtr info = RADEONPTR(pScrn);
511    uint32_t colorformat, srccolor, blend_cntl;
512    ACCEL_PREAMBLE();
513
514    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
515    if (blend_cntl == 0)
516	return FALSE;
517
518    if (!info->accel_state->XInited3D)
519	RADEONInit3DEngine(pScrn);
520
521    if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
522				     width, height, flags))
523	return FALSE;
524
525    colorformat = RadeonGetColorFormat(dstFormat);
526
527    srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
528	(green & 0xff00);
529
530    BEGIN_ACCEL(7);
531    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
532    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
533				  RADEON_TEX_BLEND_0_ENABLE);
534    OUT_ACCEL_REG(RADEON_PP_TFACTOR_0, srccolor);
535    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_A_TFACTOR_COLOR |
536					RADEON_COLOR_ARG_B_T0_ALPHA);
537    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_A_TFACTOR_ALPHA |
538					RADEON_ALPHA_ARG_B_T0_ALPHA);
539    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
540				     RADEON_SE_VTX_FMT_ST0);
541    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
542    FINISH_ACCEL();
543
544    return TRUE;
545}
546
547
548static Bool
549FUNC_NAME(R100SetupForCPUToScreenTexture) (
550	ScrnInfoPtr	pScrn,
551	int		op,
552	CARD32		srcFormat,
553	CARD32		dstFormat,
554	CARD8		*texPtr,
555	int		texPitch,
556	int		width,
557	int		height,
558	int		flags
559)
560{
561    RADEONInfoPtr info = RADEONPTR(pScrn);
562    uint32_t colorformat, blend_cntl;
563    ACCEL_PREAMBLE();
564
565    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
566    if (blend_cntl == 0)
567	return FALSE;
568
569    if (!info->accel_state->XInited3D)
570	RADEONInit3DEngine(pScrn);
571
572    if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
573				     height, flags))
574	return FALSE;
575
576    colorformat = RadeonGetColorFormat(dstFormat);
577
578    BEGIN_ACCEL(6);
579    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
580    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
581				  RADEON_TEX_BLEND_0_ENABLE);
582    if (srcFormat != PICT_a8)
583	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_T0_COLOR);
584    else
585	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_ZERO);
586    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_C_T0_ALPHA);
587    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
588				     RADEON_SE_VTX_FMT_ST0);
589    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
590    FINISH_ACCEL();
591
592    return TRUE;
593}
594
595
596static void
597FUNC_NAME(R100SubsequentCPUToScreenTexture) (
598	ScrnInfoPtr	pScrn,
599	int		dstx,
600	int		dsty,
601	int		srcx,
602	int		srcy,
603	int		width,
604	int		height
605)
606{
607    RADEONInfoPtr  info       = RADEONPTR(pScrn);
608    int byteshift;
609    uint32_t fboffset;
610    float l, t, r, b, fl, fr, ft, fb;
611
612    ACCEL_PREAMBLE();
613
614    /* Note: we can't simply set up the 3D surface at the same location as the
615     * front buffer, because the 2048x2048 limit on coordinates may be smaller
616     * than the (MergedFB) screen.
617     * Can't use arbitrary offsets for color tiling
618     */
619    if (info->tilingEnabled) {
620       /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
621       fboffset = info->fbLocation + pScrn->fbOffset +
622          (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
623       l = dstx;
624       t = (dsty % 16);
625    }
626    else {
627       byteshift = (pScrn->bitsPerPixel >> 4);
628       fboffset = (info->fbLocation + pScrn->fbOffset +
629		((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
630       l = ((dstx << byteshift) % 16) >> byteshift;
631       t = 0.0;
632    }
633
634    r = width + l;
635    b = height + t;
636    fl = srcx;
637    fr = srcx + width;
638    ft = srcy;
639    fb = srcy + height;
640
641#ifdef ACCEL_CP
642    BEGIN_RING(25);
643
644    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
645	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
646    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
647    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 17));
648    /* RADEON_SE_VTX_FMT */
649    OUT_RING(RADEON_CP_VC_FRMT_XY |
650	     RADEON_CP_VC_FRMT_ST0);
651    /* SE_VF_CNTL */
652    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
653	     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
654	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
655	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
656	     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
657
658    OUT_RING(F_TO_DW(l));
659    OUT_RING(F_TO_DW(t));
660    OUT_RING(F_TO_DW(fl));
661    OUT_RING(F_TO_DW(ft));
662
663    OUT_RING(F_TO_DW(r));
664    OUT_RING(F_TO_DW(t));
665    OUT_RING(F_TO_DW(fr));
666    OUT_RING(F_TO_DW(ft));
667
668    OUT_RING(F_TO_DW(r));
669    OUT_RING(F_TO_DW(b));
670    OUT_RING(F_TO_DW(fr));
671    OUT_RING(F_TO_DW(fb));
672
673    OUT_RING(F_TO_DW(l));
674    OUT_RING(F_TO_DW(b));
675    OUT_RING(F_TO_DW(fl));
676    OUT_RING(F_TO_DW(fb));
677
678    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
679
680    ADVANCE_RING();
681#else
682    BEGIN_ACCEL(20);
683
684    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
685	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
686    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
687
688    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
689				     RADEON_VF_PRIM_WALK_DATA |
690				     RADEON_VF_RADEON_MODE |
691				     (4 << RADEON_VF_NUM_VERTICES_SHIFT));
692
693    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
694    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
695    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
696    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
697
698    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
699    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
700    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
701    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
702
703    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
704    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
705    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
706    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
707
708    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
709    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
710    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
711    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
712
713    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
714    FINISH_ACCEL();
715#endif
716
717}
718
719static Bool FUNC_NAME(R200SetupTexture)(
720	ScrnInfoPtr pScrn,
721	uint32_t format,
722	uint8_t *src,
723	int src_pitch,
724	unsigned int width,
725	unsigned int height,
726	int flags)
727{
728    RADEONInfoPtr info = RADEONPTR(pScrn);
729    uint8_t *dst;
730    uint32_t tex_size = 0, txformat;
731    int dst_pitch, offset, size, tex_bytepp;
732#ifdef ACCEL_CP
733    uint32_t buf_pitch, dst_pitch_off;
734    int x, y;
735    unsigned int hpass;
736    uint8_t *tmp_dst;
737#endif
738    ACCEL_PREAMBLE();
739
740    if ((width > 2048) || (height > 2048))
741	return FALSE;
742
743    txformat = RadeonGetTextureFormat(format);
744    tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
745
746    dst_pitch = (width * tex_bytepp + 63) & ~63;
747    size = dst_pitch * height;
748
749    if ((flags & XAA_RENDER_REPEAT) && (height != 1) &&
750	(((width * tex_bytepp + 31) & ~31) != dst_pitch))
751	return FALSE;
752
753#ifndef ACCEL_CP
754
755#if X_BYTE_ORDER == X_BIG_ENDIAN
756    if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
757	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
758		   "failed!\n", __func__);
759	return FALSE;
760    }
761#endif
762
763#endif
764
765    if (!AllocateLinear(pScrn, size))
766	return FALSE;
767
768    if (flags & XAA_RENDER_REPEAT) {
769	txformat |= ATILog2(width) << R200_TXFORMAT_WIDTH_SHIFT;
770	txformat |= ATILog2(height) << R200_TXFORMAT_HEIGHT_SHIFT;
771    } else {
772	tex_size = ((height - 1) << 16) | (width - 1);
773	txformat |= RADEON_TXFORMAT_NON_POWER2;
774    }
775
776    info->accel_state->texW[0] = width;
777    info->accel_state->texH[0] = height;
778
779    offset = info->accel_state->RenderTex->offset * pScrn->bitsPerPixel / 8;
780    dst = (uint8_t*)(info->FB + offset);
781
782    /* Upload texture to card. */
783
784#ifdef ACCEL_CP
785
786    RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
787
788    while ( height )
789    {
790        tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
791				      dst_pitch_off, &buf_pitch,
792				      x, &y, &height, &hpass );
793	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
794				    hpass, buf_pitch, src_pitch );
795	src += hpass * src_pitch;
796    }
797
798    RADEON_PURGE_CACHE();
799    RADEON_WAIT_UNTIL_IDLE();
800
801#else
802
803    if (info->accel_state->accel->NeedToSync)
804	info->accel_state->accel->Sync(pScrn);
805
806    while (height--) {
807	memcpy(dst, src, width * tex_bytepp);
808	src += src_pitch;
809	dst += dst_pitch;
810    }
811
812#if X_BYTE_ORDER == X_BIG_ENDIAN
813    RADEONRestoreByteswap(info);
814#endif
815
816#endif	/* ACCEL_CP */
817
818    BEGIN_ACCEL(6);
819    OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
820    OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
821    OUT_ACCEL_REG(R200_PP_TXSIZE_0, tex_size);
822    OUT_ACCEL_REG(R200_PP_TXPITCH_0, dst_pitch - 32);
823    OUT_ACCEL_REG(R200_PP_TXOFFSET_0, offset + info->fbLocation +
824				      pScrn->fbOffset);
825    OUT_ACCEL_REG(R200_PP_TXFILTER_0, R200_MAG_FILTER_NEAREST |
826				      R200_MIN_FILTER_NEAREST |
827				      R200_CLAMP_S_WRAP |
828				      R200_CLAMP_T_WRAP);
829    FINISH_ACCEL();
830
831    return TRUE;
832}
833
834static Bool
835FUNC_NAME(R200SetupForCPUToScreenAlphaTexture) (
836	ScrnInfoPtr	pScrn,
837	int		op,
838	CARD16		red,
839	CARD16		green,
840	CARD16		blue,
841	CARD16		alpha,
842	CARD32		maskFormat,
843	CARD32		dstFormat,
844	CARD8		*alphaPtr,
845	int		alphaPitch,
846	int		width,
847	int		height,
848	int		flags
849)
850{
851    RADEONInfoPtr  info = RADEONPTR(pScrn);
852    uint32_t colorformat, srccolor, blend_cntl;
853    ACCEL_PREAMBLE();
854
855    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
856    if (blend_cntl == 0)
857	return FALSE;
858
859    if (!info->accel_state->XInited3D)
860	RADEONInit3DEngine(pScrn);
861
862    if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
863				     width, height, flags))
864	return FALSE;
865
866    colorformat = RadeonGetColorFormat(dstFormat);
867
868    srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
869	(green & 0xff00);
870
871    BEGIN_ACCEL(10);
872    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
873    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
874				  RADEON_TEX_BLEND_0_ENABLE);
875    OUT_ACCEL_REG(R200_PP_TFACTOR_0, srccolor);
876    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_A_TFACTOR_COLOR |
877				      R200_TXC_ARG_B_R0_ALPHA);
878    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
879    OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_A_TFACTOR_ALPHA |
880				      R200_TXA_ARG_B_R0_ALPHA);
881    OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
882    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
883    OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
884    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
885    FINISH_ACCEL();
886
887    return TRUE;
888}
889
890static Bool
891FUNC_NAME(R200SetupForCPUToScreenTexture) (
892	ScrnInfoPtr	pScrn,
893	int		op,
894	CARD32		srcFormat,
895	CARD32		dstFormat,
896	CARD8		*texPtr,
897	int		texPitch,
898	int		width,
899	int		height,
900	int		flags
901)
902{
903    RADEONInfoPtr  info       = RADEONPTR(pScrn);
904    uint32_t colorformat, blend_cntl;
905    ACCEL_PREAMBLE();
906
907    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
908    if (blend_cntl == 0)
909	return FALSE;
910
911    if (!info->accel_state->XInited3D)
912	RADEONInit3DEngine(pScrn);
913
914    if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
915				     height, flags))
916	return FALSE;
917
918    colorformat = RadeonGetColorFormat(dstFormat);
919
920    BEGIN_ACCEL(9);
921    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
922    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
923				  RADEON_TEX_BLEND_0_ENABLE);
924    if (srcFormat != PICT_a8)
925	OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_R0_COLOR);
926    else
927	OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_ZERO);
928    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
929    OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_C_R0_ALPHA);
930    OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
931    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
932    OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
933    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
934    FINISH_ACCEL();
935
936    return TRUE;
937}
938
939static void
940FUNC_NAME(R200SubsequentCPUToScreenTexture) (
941	ScrnInfoPtr	pScrn,
942	int		dstx,
943	int		dsty,
944	int		srcx,
945	int		srcy,
946	int		width,
947	int		height
948)
949{
950    RADEONInfoPtr  info       = RADEONPTR(pScrn);
951    int byteshift;
952    uint32_t fboffset;
953    float l, t, r, b, fl, fr, ft, fb;
954    ACCEL_PREAMBLE();
955
956    /* Note: we can't simply set up the 3D surface at the same location as the
957     * front buffer, because the 2048x2048 limit on coordinates may be smaller
958     * than the (MergedFB) screen.
959     * Can't use arbitrary offsets for color tiling
960     */
961    if (info->tilingEnabled) {
962       /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
963       fboffset = info->fbLocation + pScrn->fbOffset +
964          (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
965       l = dstx;
966       t = (dsty % 16);
967    }
968    else {
969       byteshift = (pScrn->bitsPerPixel >> 4);
970       fboffset = (info->fbLocation + pScrn->fbOffset +
971		((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
972       l = ((dstx << byteshift) % 16) >> byteshift;
973       t = 0.0;
974    }
975
976    r = width + l;
977    b = height + t;
978    fl = (float)srcx / info->accel_state->texW[0];
979    fr = (float)(srcx + width) / info->accel_state->texW[0];
980    ft = (float)srcy / info->accel_state->texH[0];
981    fb = (float)(srcy + height) / info->accel_state->texH[0];
982
983#ifdef ACCEL_CP
984    BEGIN_RING(24);
985
986    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
987	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
988    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
989
990    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 16));
991    /* RADEON_SE_VF_CNTL */
992    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
993	     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
994	     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
995
996    OUT_RING(F_TO_DW(l));
997    OUT_RING(F_TO_DW(t));
998    OUT_RING(F_TO_DW(fl));
999    OUT_RING(F_TO_DW(ft));
1000
1001    OUT_RING(F_TO_DW(r));
1002    OUT_RING(F_TO_DW(t));
1003    OUT_RING(F_TO_DW(fr));
1004    OUT_RING(F_TO_DW(ft));
1005
1006    OUT_RING(F_TO_DW(r));
1007    OUT_RING(F_TO_DW(b));
1008    OUT_RING(F_TO_DW(fr));
1009    OUT_RING(F_TO_DW(fb));
1010
1011    OUT_RING(F_TO_DW(l));
1012    OUT_RING(F_TO_DW(b));
1013    OUT_RING(F_TO_DW(fl));
1014    OUT_RING(F_TO_DW(fb));
1015
1016    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1017
1018    ADVANCE_RING();
1019#else
1020    BEGIN_ACCEL(20);
1021
1022    /* Note: we can't simply setup 3D surface at the same location as the front buffer,
1023       some apps may draw offscreen pictures out of the limitation of radeon 3D surface.
1024    */
1025    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
1026	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
1027    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
1028
1029    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
1030				      RADEON_VF_PRIM_WALK_DATA |
1031				      4 << RADEON_VF_NUM_VERTICES_SHIFT));
1032
1033    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
1034    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
1035    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
1036    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
1037
1038    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
1039    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
1040    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
1041    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
1042
1043    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
1044    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
1045    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
1046    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
1047
1048    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
1049    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
1050    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
1051    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
1052
1053    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1054
1055    FINISH_ACCEL();
1056#endif
1057}
1058
1059#undef FUNC_NAME
1060#endif /* USE_XAA */
1061