1/*
2 * Copyright 2004 Eric Anholt
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Eric Anholt <anholt@FreeBSD.org>
26 *    Hui Yu <hyu@ati.com>
27 *
28 */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <string.h>
35
36#ifdef USE_XAA
37
38#include "dixstruct.h"
39
40#include "xaa.h"
41#include "xaalocal.h"
42
43#ifndef RENDER_GENERIC_HELPER
44#define RENDER_GENERIC_HELPER
45
46struct blendinfo {
47	Bool dst_alpha;
48	Bool src_alpha;
49	uint32_t blend_cntl;
50};
51
52/* The first part of blend_cntl corresponds to Fa from the render "protocol"
53 * document, and the second part to Fb.
54 */
55static const struct blendinfo RadeonBlendOp[] = {
56    /* Clear */
57    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
58	   RADEON_DST_BLEND_GL_ZERO},
59    /* Src */
60    {0, 0, RADEON_SRC_BLEND_GL_ONE |
61	   RADEON_DST_BLEND_GL_ZERO},
62    /* Dst */
63    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
64	   RADEON_DST_BLEND_GL_ONE},
65    /* Over */
66    {0, 1, RADEON_SRC_BLEND_GL_ONE |
67	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
68    /* OverReverse */
69    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
70	   RADEON_DST_BLEND_GL_ONE},
71    /* In */
72    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA |
73	   RADEON_DST_BLEND_GL_ZERO},
74    /* InReverse */
75    {0, 1, RADEON_SRC_BLEND_GL_ZERO |
76	   RADEON_DST_BLEND_GL_SRC_ALPHA},
77    /* Out */
78    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
79	   RADEON_DST_BLEND_GL_ZERO},
80    /* OutReverse */
81    {0, 1, RADEON_SRC_BLEND_GL_ZERO |
82	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
83    /* Atop */
84    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA |
85	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
86    /* AtopReverse */
87    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
88	   RADEON_DST_BLEND_GL_SRC_ALPHA},
89    /* Xor */
90    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
91	   RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
92    /* Add */
93    {0, 0, RADEON_SRC_BLEND_GL_ONE |
94	   RADEON_DST_BLEND_GL_ONE},
95    /* Saturate */
96    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
97	   RADEON_DST_BLEND_GL_ONE},
98    {0, 0, 0},
99    {0, 0, 0},
100    /* DisjointClear */
101    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
102	   RADEON_DST_BLEND_GL_ZERO},
103    /* DisjointSrc */
104    {0, 0, RADEON_SRC_BLEND_GL_ONE |
105	   RADEON_DST_BLEND_GL_ZERO},
106    /* DisjointDst */
107    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
108	   RADEON_DST_BLEND_GL_ONE},
109    /* DisjointOver unsupported */
110    {0, 0, 0},
111    /* DisjointOverReverse */
112    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
113	   RADEON_DST_BLEND_GL_ONE},
114    /* DisjointIn unsupported */
115    {0, 0, 0},
116    /* DisjointInReverse unsupported */
117    {0, 0, 0},
118    /* DisjointOut unsupported */
119    {1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
120	   RADEON_DST_BLEND_GL_ZERO},
121    /* DisjointOutReverse unsupported */
122    {0, 0, 0},
123    /* DisjointAtop unsupported */
124    {0, 0, 0},
125    /* DisjointAtopReverse unsupported */
126    {0, 0, 0},
127    /* DisjointXor unsupported */
128    {0, 0, 0},
129    {0, 0, 0},
130    {0, 0, 0},
131    {0, 0, 0},
132    {0, 0, 0},
133    /* ConjointClear */
134    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
135	   RADEON_DST_BLEND_GL_ZERO},
136    /* ConjointSrc */
137    {0, 0, RADEON_SRC_BLEND_GL_ONE |
138	   RADEON_DST_BLEND_GL_ZERO},
139    /* ConjointDst */
140    {0, 0, RADEON_SRC_BLEND_GL_ZERO |
141	   RADEON_DST_BLEND_GL_ONE},
142};
143#define RadeonOpMax (sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
144
145/* Note on texture formats:
146 * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
147 * The RADEON and R200 TXFORMATS we use are the same on r100/r200.
148 */
149
150static CARD32 RADEONTextureFormats[] = {
151    PICT_a8r8g8b8,
152    PICT_a8,
153    PICT_x8r8g8b8,
154    PICT_r5g6b5,
155    PICT_a1r5g5b5,
156    PICT_x1r5g5b5,
157    0
158};
159
160static CARD32 RADEONDstFormats[] = {
161    PICT_a8r8g8b8,
162    PICT_x8r8g8b8,
163    PICT_r5g6b5,
164    PICT_a1r5g5b5,
165    PICT_x1r5g5b5,
166    0
167};
168
169static uint32_t
170RadeonGetTextureFormat(uint32_t format)
171{
172    switch (format) {
173    case PICT_a8r8g8b8:
174	return RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
175    case PICT_a8:
176	return RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
177    case PICT_x8r8g8b8:
178	return RADEON_TXFORMAT_ARGB8888;
179    case PICT_r5g6b5:
180	return RADEON_TXFORMAT_RGB565;
181    case PICT_a1r5g5b5:
182	return RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
183    case PICT_x1r5g5b5:
184	return RADEON_TXFORMAT_ARGB1555;
185    default:
186	return 0;
187    }
188}
189
190static uint32_t
191RadeonGetColorFormat(uint32_t format)
192{
193    switch (format) {
194    case PICT_a8r8g8b8:
195    case PICT_x8r8g8b8:
196	return RADEON_COLOR_FORMAT_ARGB8888;
197    case PICT_r5g6b5:
198	return RADEON_COLOR_FORMAT_RGB565;
199    case PICT_a1r5g5b5:
200    case PICT_x1r5g5b5:
201	return RADEON_COLOR_FORMAT_ARGB1555;
202    default:
203	return 0;
204    }
205}
206
207/* Returns a RADEON_RB3D_BLENDCNTL value, or 0 if the operation is not
208 * supported
209 */
210static uint32_t
211RadeonGetBlendCntl(uint8_t op, uint32_t dstFormat)
212{
213    uint32_t blend_cntl;
214
215    if (op >= RadeonOpMax || RadeonBlendOp[op].blend_cntl == 0)
216	return 0;
217
218    blend_cntl = RadeonBlendOp[op].blend_cntl;
219
220    if (RadeonBlendOp[op].dst_alpha && !PICT_FORMAT_A(dstFormat)) {
221	uint32_t srcblend = blend_cntl & RADEON_SRC_BLEND_MASK;
222
223	/* If there's no destination alpha channel, we need to wire the blending
224	 * to treat the alpha channel as always 1.
225	 */
226	if (srcblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA ||
227	    srcblend == RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE)
228	    blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
229			 RADEON_SRC_BLEND_GL_ZERO;
230	else if (srcblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
231	    blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
232			 RADEON_SRC_BLEND_GL_ONE;
233    }
234
235    return blend_cntl;
236}
237
238static __inline__ uint32_t F_TO_DW(float val)
239{
240    union {
241	float f;
242	uint32_t l;
243    } tmp;
244    tmp.f = val;
245    return tmp.l;
246}
247
248/* Compute log base 2 of val. */
249static __inline__ int
250ATILog2(int val)
251{
252	int bits;
253#if (defined __i386__ || defined __x86_64__) && (defined __GNUC__)
254	__asm volatile("bsrl	%1, %0"
255		: "=r" (bits)
256		: "c" (val)
257	);
258	return bits;
259#else
260	for (bits = 0; val != 0; val >>= 1, ++bits)
261		;
262	return bits - 1;
263#endif
264}
265
266static void
267RemoveLinear (FBLinearPtr linear)
268{
269   RADEONInfoPtr info = (RADEONInfoPtr)(linear->devPrivate.ptr);
270
271   info->accel_state->RenderTex = NULL;
272}
273
274static void
275RenderCallback (ScrnInfoPtr pScrn)
276{
277    RADEONInfoPtr  info       = RADEONPTR(pScrn);
278
279    if ((currentTime.milliseconds > info->accel_state->RenderTimeout) &&
280	info->accel_state->RenderTex) {
281	xf86FreeOffscreenLinear(info->accel_state->RenderTex);
282	info->accel_state->RenderTex = NULL;
283    }
284
285    if (!info->accel_state->RenderTex)
286	info->accel_state->RenderCallback = NULL;
287}
288
289static Bool
290AllocateLinear (
291   ScrnInfoPtr pScrn,
292   int sizeNeeded
293){
294   RADEONInfoPtr  info       = RADEONPTR(pScrn);
295   int cpp = info->CurrentLayout.bitsPerPixel / 8;
296
297   info->accel_state->RenderTimeout = currentTime.milliseconds + 30000;
298   info->accel_state->RenderCallback = RenderCallback;
299
300   /* XAA allocates in units of pixels at the screen bpp, so adjust size
301    * appropriately.
302    */
303   sizeNeeded = (sizeNeeded + cpp - 1) / cpp;
304
305   if (info->accel_state->RenderTex) {
306	if (info->accel_state->RenderTex->size >= sizeNeeded)
307	   return TRUE;
308	else {
309	   if (xf86ResizeOffscreenLinear(info->accel_state->RenderTex, sizeNeeded))
310		return TRUE;
311
312	   xf86FreeOffscreenLinear(info->accel_state->RenderTex);
313	   info->accel_state->RenderTex = NULL;
314	}
315   }
316
317   info->accel_state->RenderTex = xf86AllocateOffscreenLinear(pScrn->pScreen, sizeNeeded, 32,
318							      NULL, RemoveLinear, info);
319
320   return (info->accel_state->RenderTex != NULL);
321}
322
323#if X_BYTE_ORDER == X_BIG_ENDIAN
324static Bool RADEONSetupRenderByteswap(ScrnInfoPtr pScrn, int tex_bytepp)
325{
326    RADEONInfoPtr info = RADEONPTR(pScrn);
327    unsigned char *RADEONMMIO = info->MMIO;
328    uint32_t swapper = info->ModeReg->surface_cntl;
329
330    swapper &= ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP |
331		 RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP);
332
333    /* Set up byte swapping for the framebuffer aperture as needed */
334    switch (tex_bytepp) {
335    case 1:
336	break;
337    case 2:
338	swapper |= RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP;
339	break;
340    case 4:
341	swapper |= RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP;
342	break;
343    default:
344	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: Don't know what to do for "
345		   "tex_bytepp == %d!\n", __func__, tex_bytepp);
346	return FALSE;
347    }
348    OUTREG(RADEON_SURFACE_CNTL, swapper);
349    return TRUE;
350}
351
352static void RADEONRestoreByteswap(RADEONInfoPtr info)
353{
354    unsigned char *RADEONMMIO = info->MMIO;
355
356    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
357}
358#endif	/* X_BYTE_ORDER == X_BIG_ENDIAN */
359
360#endif	/* RENDER_GENERIC_HELPER */
361
362#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
363#error Cannot define both MMIO and CP acceleration!
364#endif
365
366#if !defined(UNIXCPP) || defined(ANSICPP)
367#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
368#else
369#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
370#endif
371
372#ifdef ACCEL_MMIO
373#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
374#else
375#ifdef ACCEL_CP
376#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
377#else
378#error No accel type defined!
379#endif
380#endif
381
382static Bool FUNC_NAME(R100SetupTexture)(
383	ScrnInfoPtr pScrn,
384	uint32_t format,
385	uint8_t *src,
386	int src_pitch,
387	unsigned int width,
388	unsigned int height,
389	int flags)
390{
391    RADEONInfoPtr info = RADEONPTR(pScrn);
392    uint8_t *dst;
393    uint32_t tex_size = 0, txformat;
394    int dst_pitch, offset, size, tex_bytepp;
395#ifdef ACCEL_CP
396    uint32_t buf_pitch, dst_pitch_off;
397    int x, y;
398    unsigned int hpass;
399    uint8_t *tmp_dst;
400#endif
401    ACCEL_PREAMBLE();
402
403    if ((width > 2047) || (height > 2047))
404	return FALSE;
405
406    txformat = RadeonGetTextureFormat(format);
407    tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
408
409    dst_pitch = RADEON_ALIGN(width * tex_bytepp, 64);
410    size = dst_pitch * height;
411
412    info->accel_state->texW[0] = width;
413    info->accel_state->texH[0] = height;
414
415    if ((flags & XAA_RENDER_REPEAT) && (height != 1) &&
416	(RADEON_ALIGN(width * tex_bytepp, 32) != dst_pitch))
417	return FALSE;
418
419#ifndef ACCEL_CP
420
421#if X_BYTE_ORDER == X_BIG_ENDIAN
422    if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
423	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
424		   "failed!\n", __func__);
425	return FALSE;
426    }
427#endif
428
429#endif
430
431    if (!AllocateLinear(pScrn, size))
432	return FALSE;
433
434    if (flags & XAA_RENDER_REPEAT) {
435	txformat |= ATILog2(width) << RADEON_TXFORMAT_WIDTH_SHIFT;
436	txformat |= ATILog2(height) << RADEON_TXFORMAT_HEIGHT_SHIFT;
437    } else {
438	tex_size = ((height - 1) << 16) | (width - 1);
439	txformat |= RADEON_TXFORMAT_NON_POWER2;
440    }
441
442    offset = info->accel_state->RenderTex->offset * pScrn->bitsPerPixel / 8;
443    dst = (uint8_t*)(info->FB + offset);
444
445    /* Upload texture to card. */
446
447#ifdef ACCEL_CP
448
449    RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
450
451    while ( height )
452    {
453    	tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
454				      dst_pitch_off, &buf_pitch,
455				      x, &y, &height, &hpass );
456	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
457				    hpass, buf_pitch, src_pitch );
458	src += hpass * src_pitch;
459    }
460
461    RADEON_PURGE_CACHE();
462    RADEON_WAIT_UNTIL_IDLE();
463
464#else
465
466    if (info->accel_state->accel->NeedToSync)
467	info->accel_state->accel->Sync(pScrn);
468    while (height--) {
469	memcpy(dst, src, width * tex_bytepp);
470	src += src_pitch;
471	dst += dst_pitch;
472    }
473
474#if X_BYTE_ORDER == X_BIG_ENDIAN
475    RADEONRestoreByteswap(info);
476#endif
477
478#endif	/* ACCEL_CP */
479
480    BEGIN_ACCEL(5);
481    OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
482    OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, tex_size);
483    OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, dst_pitch - 32);
484    OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, offset + info->fbLocation +
485					pScrn->fbOffset);
486    OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
487					RADEON_MIN_FILTER_LINEAR |
488					RADEON_CLAMP_S_WRAP |
489					RADEON_CLAMP_T_WRAP);
490    FINISH_ACCEL();
491
492    return TRUE;
493}
494
495static Bool
496FUNC_NAME(R100SetupForCPUToScreenAlphaTexture) (
497	ScrnInfoPtr	pScrn,
498	int		op,
499	CARD16		red,
500	CARD16		green,
501	CARD16		blue,
502	CARD16		alpha,
503	CARD32		maskFormat,
504	CARD32		dstFormat,
505	CARD8		*alphaPtr,
506	int		alphaPitch,
507	int		width,
508	int		height,
509	int		flags
510)
511{
512    RADEONInfoPtr info = RADEONPTR(pScrn);
513    uint32_t colorformat, srccolor, blend_cntl;
514    ACCEL_PREAMBLE();
515
516    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
517    if (blend_cntl == 0)
518	return FALSE;
519
520    if (!info->accel_state->XInited3D)
521	RADEONInit3DEngine(pScrn);
522
523    if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
524				     width, height, flags))
525	return FALSE;
526
527    colorformat = RadeonGetColorFormat(dstFormat);
528
529    srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
530	(green & 0xff00);
531
532    BEGIN_ACCEL(7);
533    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
534    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
535				  RADEON_TEX_BLEND_0_ENABLE);
536    OUT_ACCEL_REG(RADEON_PP_TFACTOR_0, srccolor);
537    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_A_TFACTOR_COLOR |
538					RADEON_COLOR_ARG_B_T0_ALPHA);
539    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_A_TFACTOR_ALPHA |
540					RADEON_ALPHA_ARG_B_T0_ALPHA);
541    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
542				     RADEON_SE_VTX_FMT_ST0);
543    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
544    FINISH_ACCEL();
545
546    return TRUE;
547}
548
549
550static Bool
551FUNC_NAME(R100SetupForCPUToScreenTexture) (
552	ScrnInfoPtr	pScrn,
553	int		op,
554	CARD32		srcFormat,
555	CARD32		dstFormat,
556	CARD8		*texPtr,
557	int		texPitch,
558	int		width,
559	int		height,
560	int		flags
561)
562{
563    RADEONInfoPtr info = RADEONPTR(pScrn);
564    uint32_t colorformat, blend_cntl;
565    ACCEL_PREAMBLE();
566
567    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
568    if (blend_cntl == 0)
569	return FALSE;
570
571    if (!info->accel_state->XInited3D)
572	RADEONInit3DEngine(pScrn);
573
574    if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
575				     height, flags))
576	return FALSE;
577
578    colorformat = RadeonGetColorFormat(dstFormat);
579
580    BEGIN_ACCEL(6);
581    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
582    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
583				  RADEON_TEX_BLEND_0_ENABLE);
584    if (srcFormat != PICT_a8)
585	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_T0_COLOR);
586    else
587	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_ZERO);
588    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_C_T0_ALPHA);
589    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
590				     RADEON_SE_VTX_FMT_ST0);
591    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
592    FINISH_ACCEL();
593
594    return TRUE;
595}
596
597
598static void
599FUNC_NAME(R100SubsequentCPUToScreenTexture) (
600	ScrnInfoPtr	pScrn,
601	int		dstx,
602	int		dsty,
603	int		srcx,
604	int		srcy,
605	int		width,
606	int		height
607)
608{
609    RADEONInfoPtr  info       = RADEONPTR(pScrn);
610    int byteshift;
611    uint32_t fboffset;
612    float l, t, r, b, fl, fr, ft, fb;
613
614    ACCEL_PREAMBLE();
615
616    /* Note: we can't simply set up the 3D surface at the same location as the
617     * front buffer, because the 2048x2048 limit on coordinates may be smaller
618     * than the (MergedFB) screen.
619     * Can't use arbitrary offsets for color tiling
620     */
621    if (info->tilingEnabled) {
622       /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
623       fboffset = info->fbLocation + pScrn->fbOffset +
624          (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
625       l = dstx;
626       t = (dsty % 16);
627    }
628    else {
629       byteshift = (pScrn->bitsPerPixel >> 4);
630       fboffset = (info->fbLocation + pScrn->fbOffset +
631		((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
632       l = ((dstx << byteshift) % 16) >> byteshift;
633       t = 0.0;
634    }
635
636    r = width + l;
637    b = height + t;
638    fl = (float)srcx / info->accel_state->texW[0];
639    fr = (float)(srcx + width) / info->accel_state->texW[0];
640    ft = (float)srcy / info->accel_state->texH[0];
641    fb = (float)(srcy + height) / info->accel_state->texH[0];
642
643#ifdef ACCEL_CP
644    BEGIN_RING(25);
645
646    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
647	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
648    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
649    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 17));
650    /* RADEON_SE_VTX_FMT */
651    OUT_RING(RADEON_CP_VC_FRMT_XY |
652	     RADEON_CP_VC_FRMT_ST0);
653    /* SE_VF_CNTL */
654    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
655	     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
656	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
657	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
658	     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
659
660    OUT_RING(F_TO_DW(l));
661    OUT_RING(F_TO_DW(t));
662    OUT_RING(F_TO_DW(fl));
663    OUT_RING(F_TO_DW(ft));
664
665    OUT_RING(F_TO_DW(r));
666    OUT_RING(F_TO_DW(t));
667    OUT_RING(F_TO_DW(fr));
668    OUT_RING(F_TO_DW(ft));
669
670    OUT_RING(F_TO_DW(r));
671    OUT_RING(F_TO_DW(b));
672    OUT_RING(F_TO_DW(fr));
673    OUT_RING(F_TO_DW(fb));
674
675    OUT_RING(F_TO_DW(l));
676    OUT_RING(F_TO_DW(b));
677    OUT_RING(F_TO_DW(fl));
678    OUT_RING(F_TO_DW(fb));
679
680    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
681
682    ADVANCE_RING();
683#else
684    BEGIN_ACCEL(20);
685
686    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
687	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
688    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
689
690    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
691				     RADEON_VF_PRIM_WALK_DATA |
692				     RADEON_VF_RADEON_MODE |
693				     (4 << RADEON_VF_NUM_VERTICES_SHIFT));
694
695    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
696    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
697    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
698    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
699
700    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
701    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
702    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
703    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
704
705    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
706    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
707    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
708    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
709
710    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
711    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
712    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
713    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
714
715    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
716    FINISH_ACCEL();
717#endif
718
719}
720
721static Bool FUNC_NAME(R200SetupTexture)(
722	ScrnInfoPtr pScrn,
723	uint32_t format,
724	uint8_t *src,
725	int src_pitch,
726	unsigned int width,
727	unsigned int height,
728	int flags)
729{
730    RADEONInfoPtr info = RADEONPTR(pScrn);
731    uint8_t *dst;
732    uint32_t tex_size = 0, txformat;
733    int dst_pitch, offset, size, tex_bytepp;
734#ifdef ACCEL_CP
735    uint32_t buf_pitch, dst_pitch_off;
736    int x, y;
737    unsigned int hpass;
738    uint8_t *tmp_dst;
739#endif
740    ACCEL_PREAMBLE();
741
742    if ((width > 2048) || (height > 2048))
743	return FALSE;
744
745    txformat = RadeonGetTextureFormat(format);
746    tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
747
748    dst_pitch = RADEON_ALIGN(width * tex_bytepp, 64);
749    size = dst_pitch * height;
750
751    if ((flags & XAA_RENDER_REPEAT) && (height != 1) &&
752	(RADEON_ALIGN(width * tex_bytepp, 32) != dst_pitch))
753	return FALSE;
754
755#ifndef ACCEL_CP
756
757#if X_BYTE_ORDER == X_BIG_ENDIAN
758    if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
759	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
760		   "failed!\n", __func__);
761	return FALSE;
762    }
763#endif
764
765#endif
766
767    if (!AllocateLinear(pScrn, size))
768	return FALSE;
769
770    if (flags & XAA_RENDER_REPEAT) {
771	txformat |= ATILog2(width) << R200_TXFORMAT_WIDTH_SHIFT;
772	txformat |= ATILog2(height) << R200_TXFORMAT_HEIGHT_SHIFT;
773    } else {
774	tex_size = ((height - 1) << 16) | (width - 1);
775	txformat |= RADEON_TXFORMAT_NON_POWER2;
776    }
777
778    info->accel_state->texW[0] = width;
779    info->accel_state->texH[0] = height;
780
781    offset = info->accel_state->RenderTex->offset * pScrn->bitsPerPixel / 8;
782    dst = (uint8_t*)(info->FB + offset);
783
784    /* Upload texture to card. */
785
786#ifdef ACCEL_CP
787
788    RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
789
790    while ( height )
791    {
792        tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
793				      dst_pitch_off, &buf_pitch,
794				      x, &y, &height, &hpass );
795	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
796				    hpass, buf_pitch, src_pitch );
797	src += hpass * src_pitch;
798    }
799
800    RADEON_PURGE_CACHE();
801    RADEON_WAIT_UNTIL_IDLE();
802
803#else
804
805    if (info->accel_state->accel->NeedToSync)
806	info->accel_state->accel->Sync(pScrn);
807
808    while (height--) {
809	memcpy(dst, src, width * tex_bytepp);
810	src += src_pitch;
811	dst += dst_pitch;
812    }
813
814#if X_BYTE_ORDER == X_BIG_ENDIAN
815    RADEONRestoreByteswap(info);
816#endif
817
818#endif	/* ACCEL_CP */
819
820    BEGIN_ACCEL(6);
821    OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
822    OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
823    OUT_ACCEL_REG(R200_PP_TXSIZE_0, tex_size);
824    OUT_ACCEL_REG(R200_PP_TXPITCH_0, dst_pitch - 32);
825    OUT_ACCEL_REG(R200_PP_TXOFFSET_0, offset + info->fbLocation +
826				      pScrn->fbOffset);
827    OUT_ACCEL_REG(R200_PP_TXFILTER_0, R200_MAG_FILTER_NEAREST |
828				      R200_MIN_FILTER_NEAREST |
829				      R200_CLAMP_S_WRAP |
830				      R200_CLAMP_T_WRAP);
831    FINISH_ACCEL();
832
833    return TRUE;
834}
835
836static Bool
837FUNC_NAME(R200SetupForCPUToScreenAlphaTexture) (
838	ScrnInfoPtr	pScrn,
839	int		op,
840	CARD16		red,
841	CARD16		green,
842	CARD16		blue,
843	CARD16		alpha,
844	CARD32		maskFormat,
845	CARD32		dstFormat,
846	CARD8		*alphaPtr,
847	int		alphaPitch,
848	int		width,
849	int		height,
850	int		flags
851)
852{
853    RADEONInfoPtr  info = RADEONPTR(pScrn);
854    uint32_t colorformat, srccolor, blend_cntl;
855    ACCEL_PREAMBLE();
856
857    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
858    if (blend_cntl == 0)
859	return FALSE;
860
861    if (!info->accel_state->XInited3D)
862	RADEONInit3DEngine(pScrn);
863
864    if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
865				     width, height, flags))
866	return FALSE;
867
868    colorformat = RadeonGetColorFormat(dstFormat);
869
870    srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
871	(green & 0xff00);
872
873    BEGIN_ACCEL(10);
874    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
875    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
876				  RADEON_TEX_BLEND_0_ENABLE);
877    OUT_ACCEL_REG(R200_PP_TFACTOR_0, srccolor);
878    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_A_TFACTOR_COLOR |
879				      R200_TXC_ARG_B_R0_ALPHA);
880    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
881    OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_A_TFACTOR_ALPHA |
882				      R200_TXA_ARG_B_R0_ALPHA);
883    OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
884    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
885    OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
886    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
887    FINISH_ACCEL();
888
889    return TRUE;
890}
891
892static Bool
893FUNC_NAME(R200SetupForCPUToScreenTexture) (
894	ScrnInfoPtr	pScrn,
895	int		op,
896	CARD32		srcFormat,
897	CARD32		dstFormat,
898	CARD8		*texPtr,
899	int		texPitch,
900	int		width,
901	int		height,
902	int		flags
903)
904{
905    RADEONInfoPtr  info       = RADEONPTR(pScrn);
906    uint32_t colorformat, blend_cntl;
907    ACCEL_PREAMBLE();
908
909    blend_cntl = RadeonGetBlendCntl(op, dstFormat);
910    if (blend_cntl == 0)
911	return FALSE;
912
913    if (!info->accel_state->XInited3D)
914	RADEONInit3DEngine(pScrn);
915
916    if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
917				     height, flags))
918	return FALSE;
919
920    colorformat = RadeonGetColorFormat(dstFormat);
921
922    BEGIN_ACCEL(9);
923    OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
924    OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
925				  RADEON_TEX_BLEND_0_ENABLE);
926    if (srcFormat != PICT_a8)
927	OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_R0_COLOR);
928    else
929	OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_ZERO);
930    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
931    OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_C_R0_ALPHA);
932    OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
933    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
934    OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
935    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
936    FINISH_ACCEL();
937
938    return TRUE;
939}
940
941static void
942FUNC_NAME(R200SubsequentCPUToScreenTexture) (
943	ScrnInfoPtr	pScrn,
944	int		dstx,
945	int		dsty,
946	int		srcx,
947	int		srcy,
948	int		width,
949	int		height
950)
951{
952    RADEONInfoPtr  info       = RADEONPTR(pScrn);
953    int byteshift;
954    uint32_t fboffset;
955    float l, t, r, b, fl, fr, ft, fb;
956    ACCEL_PREAMBLE();
957
958    /* Note: we can't simply set up the 3D surface at the same location as the
959     * front buffer, because the 2048x2048 limit on coordinates may be smaller
960     * than the (MergedFB) screen.
961     * Can't use arbitrary offsets for color tiling
962     */
963    if (info->tilingEnabled) {
964       /* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
965       fboffset = info->fbLocation + pScrn->fbOffset +
966          (pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
967       l = dstx;
968       t = (dsty % 16);
969    }
970    else {
971       byteshift = (pScrn->bitsPerPixel >> 4);
972       fboffset = (info->fbLocation + pScrn->fbOffset +
973		((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
974       l = ((dstx << byteshift) % 16) >> byteshift;
975       t = 0.0;
976    }
977
978    r = width + l;
979    b = height + t;
980    fl = (float)srcx / info->accel_state->texW[0];
981    fr = (float)(srcx + width) / info->accel_state->texW[0];
982    ft = (float)srcy / info->accel_state->texH[0];
983    fb = (float)(srcy + height) / info->accel_state->texH[0];
984
985#ifdef ACCEL_CP
986    BEGIN_RING(24);
987
988    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
989	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
990    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
991
992    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 16));
993    /* RADEON_SE_VF_CNTL */
994    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
995	     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
996	     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
997
998    OUT_RING(F_TO_DW(l));
999    OUT_RING(F_TO_DW(t));
1000    OUT_RING(F_TO_DW(fl));
1001    OUT_RING(F_TO_DW(ft));
1002
1003    OUT_RING(F_TO_DW(r));
1004    OUT_RING(F_TO_DW(t));
1005    OUT_RING(F_TO_DW(fr));
1006    OUT_RING(F_TO_DW(ft));
1007
1008    OUT_RING(F_TO_DW(r));
1009    OUT_RING(F_TO_DW(b));
1010    OUT_RING(F_TO_DW(fr));
1011    OUT_RING(F_TO_DW(fb));
1012
1013    OUT_RING(F_TO_DW(l));
1014    OUT_RING(F_TO_DW(b));
1015    OUT_RING(F_TO_DW(fl));
1016    OUT_RING(F_TO_DW(fb));
1017
1018    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1019
1020    ADVANCE_RING();
1021#else
1022    BEGIN_ACCEL(20);
1023
1024    /* Note: we can't simply setup 3D surface at the same location as the front buffer,
1025       some apps may draw offscreen pictures out of the limitation of radeon 3D surface.
1026    */
1027    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
1028	((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
1029    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
1030
1031    OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
1032				      RADEON_VF_PRIM_WALK_DATA |
1033				      4 << RADEON_VF_NUM_VERTICES_SHIFT));
1034
1035    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
1036    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
1037    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
1038    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
1039
1040    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
1041    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
1042    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
1043    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
1044
1045    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
1046    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
1047    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
1048    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
1049
1050    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
1051    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
1052    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
1053    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
1054
1055    OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
1056
1057    FINISH_ACCEL();
1058#endif
1059}
1060
1061#undef FUNC_NAME
1062#endif /* USE_XAA */
1063