radeon_exa_funcs.c revision 209ff23f
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2005 Eric Anholt
3209ff23fSmrg * Copyright 2005 Benjamin Herrenschmidt
4209ff23fSmrg * Copyright 2006 Tungsten Graphics, Inc.
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
8209ff23fSmrg * copy of this software and associated documentation files (the "Software"),
9209ff23fSmrg * to deal in the Software without restriction, including without limitation
10209ff23fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11209ff23fSmrg * and/or sell copies of the Software, and to permit persons to whom the
12209ff23fSmrg * Software is furnished to do so, subject to the following conditions:
13209ff23fSmrg *
14209ff23fSmrg * The above copyright notice and this permission notice (including the next
15209ff23fSmrg * paragraph) shall be included in all copies or substantial portions of the
16209ff23fSmrg * Software.
17209ff23fSmrg *
18209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19209ff23fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20209ff23fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21209ff23fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22209ff23fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24209ff23fSmrg * SOFTWARE.
25209ff23fSmrg *
26209ff23fSmrg * Authors:
27209ff23fSmrg *    Eric Anholt <anholt@FreeBSD.org>
28209ff23fSmrg *    Zack Rusin <zrusin@trolltech.com>
29209ff23fSmrg *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
30209ff23fSmrg *    Michel Dänzer <michel@tungstengraphics.com>
31209ff23fSmrg *
32209ff23fSmrg */
33209ff23fSmrg
34209ff23fSmrg#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
35209ff23fSmrg#error Cannot define both MMIO and CP acceleration!
36209ff23fSmrg#endif
37209ff23fSmrg
38209ff23fSmrg#if !defined(UNIXCPP) || defined(ANSICPP)
39209ff23fSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
40209ff23fSmrg#else
41209ff23fSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
42209ff23fSmrg#endif
43209ff23fSmrg
44209ff23fSmrg#ifdef ACCEL_MMIO
45209ff23fSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
46209ff23fSmrg#else
47209ff23fSmrg#ifdef ACCEL_CP
48209ff23fSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
49209ff23fSmrg#else
50209ff23fSmrg#error No accel type defined!
51209ff23fSmrg#endif
52209ff23fSmrg#endif
53209ff23fSmrg
54209ff23fSmrg#include <errno.h>
55209ff23fSmrg#include <string.h>
56209ff23fSmrg
57209ff23fSmrg#include "radeon.h"
58209ff23fSmrg
59209ff23fSmrg#include "exa.h"
60209ff23fSmrg
61209ff23fSmrgstatic int
62209ff23fSmrgFUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen)
63209ff23fSmrg{
64209ff23fSmrg    RINFO_FROM_SCREEN(pScreen);
65209ff23fSmrg
66209ff23fSmrg    TRACE;
67209ff23fSmrg
68209ff23fSmrg    return ++info->exaSyncMarker;
69209ff23fSmrg}
70209ff23fSmrg
71209ff23fSmrgstatic void
72209ff23fSmrgFUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
73209ff23fSmrg{
74209ff23fSmrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
75209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
76209ff23fSmrg
77209ff23fSmrg    TRACE;
78209ff23fSmrg
79209ff23fSmrg    if (info->exaMarkerSynced != marker) {
80209ff23fSmrg	FUNC_NAME(RADEONWaitForIdle)(pScrn);
81209ff23fSmrg	info->exaMarkerSynced = marker;
82209ff23fSmrg    }
83209ff23fSmrg
84209ff23fSmrg    RADEONPTR(pScrn)->engineMode = EXA_ENGINEMODE_UNKNOWN;
85209ff23fSmrg}
86209ff23fSmrg
87209ff23fSmrgstatic Bool
88209ff23fSmrgFUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
89209ff23fSmrg{
90209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
91209ff23fSmrg    uint32_t datatype, dst_pitch_offset;
92209ff23fSmrg    ACCEL_PREAMBLE();
93209ff23fSmrg
94209ff23fSmrg    TRACE;
95209ff23fSmrg
96209ff23fSmrg    if (pPix->drawable.bitsPerPixel == 24)
97209ff23fSmrg	RADEON_FALLBACK(("24bpp unsupported\n"));
98209ff23fSmrg    if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype))
99209ff23fSmrg	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
100209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset))
101209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
102209ff23fSmrg
103209ff23fSmrg    RADEON_SWITCH_TO_2D();
104209ff23fSmrg
105209ff23fSmrg    BEGIN_ACCEL(5);
106209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
107209ff23fSmrg	    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
108209ff23fSmrg	    RADEON_GMC_BRUSH_SOLID_COLOR |
109209ff23fSmrg	    (datatype << 8) |
110209ff23fSmrg	    RADEON_GMC_SRC_DATATYPE_COLOR |
111209ff23fSmrg	    RADEON_ROP[alu].pattern |
112209ff23fSmrg	    RADEON_GMC_CLR_CMP_CNTL_DIS);
113209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, fg);
114209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, pm);
115209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_CNTL,
116209ff23fSmrg	(RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM));
117209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
118209ff23fSmrg    FINISH_ACCEL();
119209ff23fSmrg
120209ff23fSmrg    return TRUE;
121209ff23fSmrg}
122209ff23fSmrg
123209ff23fSmrg
124209ff23fSmrgstatic void
125209ff23fSmrgFUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2)
126209ff23fSmrg{
127209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
128209ff23fSmrg    ACCEL_PREAMBLE();
129209ff23fSmrg
130209ff23fSmrg    TRACE;
131209ff23fSmrg
132209ff23fSmrg    BEGIN_ACCEL(2);
133209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1);
134209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1));
135209ff23fSmrg    FINISH_ACCEL();
136209ff23fSmrg}
137209ff23fSmrg
138209ff23fSmrgstatic void
139209ff23fSmrgFUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix)
140209ff23fSmrg{
141209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
142209ff23fSmrg    ACCEL_PREAMBLE();
143209ff23fSmrg
144209ff23fSmrg    TRACE;
145209ff23fSmrg
146209ff23fSmrg    BEGIN_ACCEL(2);
147209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
148209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
149209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
150209ff23fSmrg    FINISH_ACCEL();
151209ff23fSmrg}
152209ff23fSmrg
153209ff23fSmrgvoid
154209ff23fSmrgFUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
155209ff23fSmrg			       uint32_t dst_pitch_offset, uint32_t datatype, int rop,
156209ff23fSmrg			       Pixel planemask)
157209ff23fSmrg{
158209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
159209ff23fSmrg    ACCEL_PREAMBLE();
160209ff23fSmrg
161209ff23fSmrg    RADEON_SWITCH_TO_2D();
162209ff23fSmrg
163209ff23fSmrg    BEGIN_ACCEL(5);
164209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
165209ff23fSmrg	RADEON_GMC_DST_PITCH_OFFSET_CNTL |
166209ff23fSmrg	RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
167209ff23fSmrg	RADEON_GMC_BRUSH_NONE |
168209ff23fSmrg	(datatype << 8) |
169209ff23fSmrg	RADEON_GMC_SRC_DATATYPE_COLOR |
170209ff23fSmrg	RADEON_ROP[rop].rop |
171209ff23fSmrg	RADEON_DP_SRC_SOURCE_MEMORY |
172209ff23fSmrg	RADEON_GMC_CLR_CMP_CNTL_DIS);
173209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask);
174209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_CNTL,
175209ff23fSmrg	((info->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
176209ff23fSmrg	 (info->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)));
177209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
178209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
179209ff23fSmrg    FINISH_ACCEL();
180209ff23fSmrg}
181209ff23fSmrg
182209ff23fSmrgstatic Bool
183209ff23fSmrgFUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
184209ff23fSmrg			     int xdir, int ydir,
185209ff23fSmrg			     int rop,
186209ff23fSmrg			     Pixel planemask)
187209ff23fSmrg{
188209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
189209ff23fSmrg    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
190209ff23fSmrg
191209ff23fSmrg    TRACE;
192209ff23fSmrg
193209ff23fSmrg    info->xdir = xdir;
194209ff23fSmrg    info->ydir = ydir;
195209ff23fSmrg
196209ff23fSmrg    if (pDst->drawable.bitsPerPixel == 24)
197209ff23fSmrg	RADEON_FALLBACK(("24bpp unsupported"));
198209ff23fSmrg    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
199209ff23fSmrg	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
200209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
201209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
202209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
203209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
204209ff23fSmrg
205209ff23fSmrg    FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset,
206209ff23fSmrg				   datatype, rop, planemask);
207209ff23fSmrg
208209ff23fSmrg    return TRUE;
209209ff23fSmrg}
210209ff23fSmrg
211209ff23fSmrgvoid
212209ff23fSmrgFUNC_NAME(RADEONCopy)(PixmapPtr pDst,
213209ff23fSmrg		      int srcX, int srcY,
214209ff23fSmrg		      int dstX, int dstY,
215209ff23fSmrg		      int w, int h)
216209ff23fSmrg{
217209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
218209ff23fSmrg    ACCEL_PREAMBLE();
219209ff23fSmrg
220209ff23fSmrg    TRACE;
221209ff23fSmrg
222209ff23fSmrg    if (info->xdir < 0) {
223209ff23fSmrg	srcX += w - 1;
224209ff23fSmrg	dstX += w - 1;
225209ff23fSmrg    }
226209ff23fSmrg    if (info->ydir < 0) {
227209ff23fSmrg	srcY += h - 1;
228209ff23fSmrg	dstY += h - 1;
229209ff23fSmrg    }
230209ff23fSmrg
231209ff23fSmrg    BEGIN_ACCEL(3);
232209ff23fSmrg
233209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_Y_X,	   (srcY << 16) | srcX);
234209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X,	   (dstY << 16) | dstX);
235209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
236209ff23fSmrg
237209ff23fSmrg    FINISH_ACCEL();
238209ff23fSmrg}
239209ff23fSmrg
240209ff23fSmrgstatic void
241209ff23fSmrgFUNC_NAME(RADEONDoneCopy)(PixmapPtr pDst)
242209ff23fSmrg{
243209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
244209ff23fSmrg    ACCEL_PREAMBLE();
245209ff23fSmrg
246209ff23fSmrg    TRACE;
247209ff23fSmrg
248209ff23fSmrg    BEGIN_ACCEL(2);
249209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
250209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
251209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
252209ff23fSmrg    FINISH_ACCEL();
253209ff23fSmrg}
254209ff23fSmrg
255209ff23fSmrgstatic Bool
256209ff23fSmrgFUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h,
257209ff23fSmrg				char *src, int src_pitch)
258209ff23fSmrg{
259209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
260209ff23fSmrg    uint8_t	   *dst	     = info->FB + exaGetPixmapOffset(pDst);
261209ff23fSmrg    unsigned int   dst_pitch = exaGetPixmapPitch(pDst);
262209ff23fSmrg    unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
263209ff23fSmrg#ifdef ACCEL_CP
264209ff23fSmrg    unsigned int   hpass;
265209ff23fSmrg    uint32_t	   buf_pitch, dst_pitch_off;
266209ff23fSmrg#endif
267209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
268209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
269209ff23fSmrg    unsigned int swapper = info->ModeReg->surface_cntl &
270209ff23fSmrg	    ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
271209ff23fSmrg	      RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
272209ff23fSmrg#endif
273209ff23fSmrg
274209ff23fSmrg    TRACE;
275209ff23fSmrg
276209ff23fSmrg    if (bpp < 8)
277209ff23fSmrg	return FALSE;
278209ff23fSmrg
279209ff23fSmrg#ifdef ACCEL_CP
280209ff23fSmrg    if (info->directRenderingEnabled &&
281209ff23fSmrg	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) {
282209ff23fSmrg	uint8_t *buf;
283209ff23fSmrg	int cpp = bpp / 8;
284209ff23fSmrg	ACCEL_PREAMBLE();
285209ff23fSmrg
286209ff23fSmrg	RADEON_SWITCH_TO_2D();
287209ff23fSmrg	while ((buf = RADEONHostDataBlit(pScrn,
288209ff23fSmrg					 cpp, w, dst_pitch_off, &buf_pitch,
289209ff23fSmrg					 x, &y, (unsigned int*)&h, &hpass)) != 0) {
290209ff23fSmrg	    RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (uint8_t *)src,
291209ff23fSmrg				       hpass, buf_pitch, src_pitch);
292209ff23fSmrg	    src += hpass * src_pitch;
293209ff23fSmrg	}
294209ff23fSmrg
295209ff23fSmrg	exaMarkSync(pDst->drawable.pScreen);
296209ff23fSmrg	return TRUE;
297209ff23fSmrg  }
298209ff23fSmrg#endif
299209ff23fSmrg
300209ff23fSmrg    /* Do we need that sync here ? probably not .... */
301209ff23fSmrg    exaWaitSync(pDst->drawable.pScreen);
302209ff23fSmrg
303209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
304209ff23fSmrg    switch(bpp) {
305209ff23fSmrg    case 15:
306209ff23fSmrg    case 16:
307209ff23fSmrg	swapper |= RADEON_NONSURF_AP0_SWP_16BPP
308209ff23fSmrg		|  RADEON_NONSURF_AP1_SWP_16BPP;
309209ff23fSmrg	break;
310209ff23fSmrg    case 24:
311209ff23fSmrg    case 32:
312209ff23fSmrg	swapper |= RADEON_NONSURF_AP0_SWP_32BPP
313209ff23fSmrg		|  RADEON_NONSURF_AP1_SWP_32BPP;
314209ff23fSmrg	break;
315209ff23fSmrg    }
316209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, swapper);
317209ff23fSmrg#endif
318209ff23fSmrg    w *= bpp / 8;
319209ff23fSmrg    dst += (x * bpp / 8) + (y * dst_pitch);
320209ff23fSmrg
321209ff23fSmrg    while (h--) {
322209ff23fSmrg	memcpy(dst, src, w);
323209ff23fSmrg	src += src_pitch;
324209ff23fSmrg	dst += dst_pitch;
325209ff23fSmrg    }
326209ff23fSmrg
327209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
328209ff23fSmrg    /* restore byte swapping */
329209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
330209ff23fSmrg#endif
331209ff23fSmrg
332209ff23fSmrg    return TRUE;
333209ff23fSmrg}
334209ff23fSmrg
335209ff23fSmrg#ifdef ACCEL_CP
336209ff23fSmrg/* Emit blit with arbitrary source and destination offsets and pitches */
337209ff23fSmrgstatic void
338209ff23fSmrgRADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset,
339209ff23fSmrg		uint32_t dst_pitch_offset, int srcX, int srcY, int dstX, int dstY,
340209ff23fSmrg		int w, int h)
341209ff23fSmrg{
342209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
343209ff23fSmrg    ACCEL_PREAMBLE();
344209ff23fSmrg
345209ff23fSmrg    BEGIN_ACCEL(6);
346209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
347209ff23fSmrg		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
348209ff23fSmrg		  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
349209ff23fSmrg		  RADEON_GMC_BRUSH_NONE |
350209ff23fSmrg		  (datatype << 8) |
351209ff23fSmrg		  RADEON_GMC_SRC_DATATYPE_COLOR |
352209ff23fSmrg		  RADEON_ROP3_S |
353209ff23fSmrg		  RADEON_DP_SRC_SOURCE_MEMORY |
354209ff23fSmrg		  RADEON_GMC_CLR_CMP_CNTL_DIS |
355209ff23fSmrg		  RADEON_GMC_WR_MSK_DIS);
356209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
357209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
358209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX);
359209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX);
360209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
361209ff23fSmrg    FINISH_ACCEL();
362209ff23fSmrg    BEGIN_ACCEL(2);
363209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
364209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
365209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
366209ff23fSmrg    FINISH_ACCEL();
367209ff23fSmrg}
368209ff23fSmrg#endif
369209ff23fSmrg
370209ff23fSmrgstatic Bool
371209ff23fSmrgFUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h,
372209ff23fSmrg				    char *dst, int dst_pitch)
373209ff23fSmrg{
374209ff23fSmrg    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
375209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
376209ff23fSmrg    unsigned char *RADEONMMIO = info->MMIO;
377209ff23fSmrg    unsigned int swapper = info->ModeReg->surface_cntl &
378209ff23fSmrg	    ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
379209ff23fSmrg	      RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
380209ff23fSmrg#endif
381209ff23fSmrg    uint8_t	  *src	     = info->FB + exaGetPixmapOffset(pSrc);
382209ff23fSmrg    int		   src_pitch = exaGetPixmapPitch(pSrc);
383209ff23fSmrg    int		   bpp	     = pSrc->drawable.bitsPerPixel;
384209ff23fSmrg#ifdef ACCEL_CP
385209ff23fSmrg    uint32_t datatype, src_pitch_offset, scratch_pitch = (w * bpp/8 + 63) & ~63, scratch_off = 0;
386209ff23fSmrg    drmBufPtr scratch;
387209ff23fSmrg#endif
388209ff23fSmrg
389209ff23fSmrg    TRACE;
390209ff23fSmrg
391209ff23fSmrg#ifdef ACCEL_CP
392209ff23fSmrg    /*
393209ff23fSmrg     * Try to accelerate download. Use an indirect buffer as scratch space,
394209ff23fSmrg     * blitting the bits to one half while copying them out of the other one and
395209ff23fSmrg     * then swapping the halves.
396209ff23fSmrg     */
397209ff23fSmrg    if (info->accelDFS && bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) &&
398209ff23fSmrg	RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) &&
399209ff23fSmrg	(scratch = RADEONCPGetBuffer(pScrn)))
400209ff23fSmrg    {
401209ff23fSmrg	int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8;
402209ff23fSmrg	int hpass = min(h, scratch->total/2 / scratch_pitch);
403209ff23fSmrg	uint32_t scratch_pitch_offset = scratch_pitch << 16
404209ff23fSmrg				    | (info->gartLocation + info->bufStart
405209ff23fSmrg				       + scratch->idx * scratch->total) >> 10;
406209ff23fSmrg	drmRadeonIndirect indirect;
407209ff23fSmrg	ACCEL_PREAMBLE();
408209ff23fSmrg
409209ff23fSmrg	RADEON_SWITCH_TO_2D();
410209ff23fSmrg
411209ff23fSmrg	/* Kick the first blit as early as possible */
412209ff23fSmrg	RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset,
413209ff23fSmrg			x, y, 0, 0, w, hpass);
414209ff23fSmrg	FLUSH_RING();
415209ff23fSmrg
416209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
417209ff23fSmrg	switch (bpp) {
418209ff23fSmrg	case 16:
419209ff23fSmrg	  swap = RADEON_HOST_DATA_SWAP_16BIT;
420209ff23fSmrg	  break;
421209ff23fSmrg	case 32:
422209ff23fSmrg	  swap = RADEON_HOST_DATA_SWAP_32BIT;
423209ff23fSmrg	  break;
424209ff23fSmrg	}
425209ff23fSmrg#endif
426209ff23fSmrg
427209ff23fSmrg	while (h) {
428209ff23fSmrg	    int oldhpass = hpass, i = 0;
429209ff23fSmrg
430209ff23fSmrg	    src = (uint8_t*)scratch->address + scratch_off;
431209ff23fSmrg
432209ff23fSmrg	    y += oldhpass;
433209ff23fSmrg	    h -= oldhpass;
434209ff23fSmrg	    hpass = min(h, scratch->total/2 / scratch_pitch);
435209ff23fSmrg
436209ff23fSmrg	    /* Prepare next blit if anything's left */
437209ff23fSmrg	    if (hpass) {
438209ff23fSmrg		scratch_off = scratch->total/2 - scratch_off;
439209ff23fSmrg		RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset + (scratch_off >> 10),
440209ff23fSmrg				x, y, 0, 0, w, hpass);
441209ff23fSmrg	    }
442209ff23fSmrg
443209ff23fSmrg	    /*
444209ff23fSmrg	     * Wait for previous blit to complete.
445209ff23fSmrg	     *
446209ff23fSmrg	     * XXX: Doing here essentially the same things this ioctl does in
447209ff23fSmrg	     * the DRM results in corruption with 'small' transfers, apparently
448209ff23fSmrg	     * because the data doesn't actually land in system RAM before the
449209ff23fSmrg	     * memcpy. I suspect the ioctl helps mostly due to its latency; what
450209ff23fSmrg	     * we'd really need is a way to reliably wait for the host interface
451209ff23fSmrg	     * to be done with pushing the data to the host.
452209ff23fSmrg	     */
453209ff23fSmrg	    while ((drmCommandNone(info->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY)
454209ff23fSmrg		   && (i++ < RADEON_TIMEOUT))
455209ff23fSmrg		;
456209ff23fSmrg
457209ff23fSmrg	    /* Kick next blit */
458209ff23fSmrg	    if (hpass)
459209ff23fSmrg		FLUSH_RING();
460209ff23fSmrg
461209ff23fSmrg	    /* Copy out data from previous blit */
462209ff23fSmrg	    if (wpass == scratch_pitch && wpass == dst_pitch) {
463209ff23fSmrg		RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap);
464209ff23fSmrg		dst += dst_pitch * oldhpass;
465209ff23fSmrg	    } else while (oldhpass--) {
466209ff23fSmrg		RADEONCopySwap((uint8_t*)dst, src, wpass, swap);
467209ff23fSmrg		src += scratch_pitch;
468209ff23fSmrg		dst += dst_pitch;
469209ff23fSmrg	    }
470209ff23fSmrg	}
471209ff23fSmrg
472209ff23fSmrg	indirect.idx = scratch->idx;
473209ff23fSmrg	indirect.start = indirect.end = 0;
474209ff23fSmrg	indirect.discard = 1;
475209ff23fSmrg
476209ff23fSmrg	drmCommandWriteRead(info->drmFD, DRM_RADEON_INDIRECT,
477209ff23fSmrg			    &indirect, sizeof(drmRadeonIndirect));
478209ff23fSmrg
479209ff23fSmrg	info->exaMarkerSynced = info->exaSyncMarker;
480209ff23fSmrg
481209ff23fSmrg	return TRUE;
482209ff23fSmrg    }
483209ff23fSmrg#endif
484209ff23fSmrg
485209ff23fSmrg    /* Can't accelerate download */
486209ff23fSmrg    exaWaitSync(pSrc->drawable.pScreen);
487209ff23fSmrg
488209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
489209ff23fSmrg    switch(bpp) {
490209ff23fSmrg    case 15:
491209ff23fSmrg    case 16:
492209ff23fSmrg	swapper |= RADEON_NONSURF_AP0_SWP_16BPP
493209ff23fSmrg		|  RADEON_NONSURF_AP1_SWP_16BPP;
494209ff23fSmrg	break;
495209ff23fSmrg    case 24:
496209ff23fSmrg    case 32:
497209ff23fSmrg	swapper |= RADEON_NONSURF_AP0_SWP_32BPP
498209ff23fSmrg		|  RADEON_NONSURF_AP1_SWP_32BPP;
499209ff23fSmrg	break;
500209ff23fSmrg    }
501209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, swapper);
502209ff23fSmrg#endif
503209ff23fSmrg
504209ff23fSmrg    src += (x * bpp / 8) + (y * src_pitch);
505209ff23fSmrg    w *= bpp / 8;
506209ff23fSmrg
507209ff23fSmrg    while (h--) {
508209ff23fSmrg	memcpy(dst, src, w);
509209ff23fSmrg	src += src_pitch;
510209ff23fSmrg	dst += dst_pitch;
511209ff23fSmrg    }
512209ff23fSmrg
513209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
514209ff23fSmrg    /* restore byte swapping */
515209ff23fSmrg    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
516209ff23fSmrg#endif
517209ff23fSmrg
518209ff23fSmrg    return TRUE;
519209ff23fSmrg}
520209ff23fSmrg
521209ff23fSmrgBool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
522209ff23fSmrg{
523209ff23fSmrg    RINFO_FROM_SCREEN(pScreen);
524209ff23fSmrg
525209ff23fSmrg    if (info->exa == NULL) {
526209ff23fSmrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
527209ff23fSmrg	return FALSE;
528209ff23fSmrg    }
529209ff23fSmrg
530209ff23fSmrg    info->exa->exa_major = EXA_VERSION_MAJOR;
531209ff23fSmrg    info->exa->exa_minor = EXA_VERSION_MINOR;
532209ff23fSmrg
533209ff23fSmrg    info->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid);
534209ff23fSmrg    info->exa->Solid = FUNC_NAME(RADEONSolid);
535209ff23fSmrg    info->exa->DoneSolid = FUNC_NAME(RADEONDoneSolid);
536209ff23fSmrg
537209ff23fSmrg    info->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy);
538209ff23fSmrg    info->exa->Copy = FUNC_NAME(RADEONCopy);
539209ff23fSmrg    info->exa->DoneCopy = FUNC_NAME(RADEONDoneCopy);
540209ff23fSmrg
541209ff23fSmrg    info->exa->MarkSync = FUNC_NAME(RADEONMarkSync);
542209ff23fSmrg    info->exa->WaitMarker = FUNC_NAME(RADEONSync);
543209ff23fSmrg    info->exa->UploadToScreen = FUNC_NAME(RADEONUploadToScreen);
544209ff23fSmrg    info->exa->DownloadFromScreen = FUNC_NAME(RADEONDownloadFromScreen);
545209ff23fSmrg
546209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
547209ff23fSmrg    info->exa->PrepareAccess = RADEONPrepareAccess;
548209ff23fSmrg    info->exa->FinishAccess = RADEONFinishAccess;
549209ff23fSmrg#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
550209ff23fSmrg
551209ff23fSmrg    info->exa->flags = EXA_OFFSCREEN_PIXMAPS;
552209ff23fSmrg    info->exa->pixmapOffsetAlign = RADEON_BUFFER_ALIGN + 1;
553209ff23fSmrg    info->exa->pixmapPitchAlign = 64;
554209ff23fSmrg
555209ff23fSmrg#ifdef RENDER
556209ff23fSmrg    if (info->RenderAccel) {
557209ff23fSmrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
558209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
559209ff23fSmrg			       "unsupported on R600 and newer cards.\n");
560209ff23fSmrg	else if (IS_R300_3D || IS_R500_3D) {
561209ff23fSmrg	    if ((info->ChipFamily < CHIP_FAMILY_RS400)
562209ff23fSmrg#ifdef XF86DRI
563209ff23fSmrg		|| (info->directRenderingEnabled)
564209ff23fSmrg#endif
565209ff23fSmrg		) {
566209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
567209ff23fSmrg			       "enabled for R300/R400/R500 type cards.\n");
568209ff23fSmrg		info->exa->CheckComposite = R300CheckComposite;
569209ff23fSmrg		info->exa->PrepareComposite =
570209ff23fSmrg		    FUNC_NAME(R300PrepareComposite);
571209ff23fSmrg		info->exa->Composite = FUNC_NAME(RadeonComposite);
572209ff23fSmrg		info->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
573209ff23fSmrg	    } else
574209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n");
575209ff23fSmrg	} else if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
576209ff23fSmrg		   (info->ChipFamily == CHIP_FAMILY_RV280) ||
577209ff23fSmrg		   (info->ChipFamily == CHIP_FAMILY_RS300) ||
578209ff23fSmrg		   (info->ChipFamily == CHIP_FAMILY_R200)) {
579209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
580209ff23fSmrg			       "enabled for R200 type cards.\n");
581209ff23fSmrg		info->exa->CheckComposite = R200CheckComposite;
582209ff23fSmrg		info->exa->PrepareComposite =
583209ff23fSmrg		    FUNC_NAME(R200PrepareComposite);
584209ff23fSmrg		info->exa->Composite = FUNC_NAME(RadeonComposite);
585209ff23fSmrg		info->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
586209ff23fSmrg	} else {
587209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
588209ff23fSmrg			       "enabled for R100 type cards.\n");
589209ff23fSmrg		info->exa->CheckComposite = R100CheckComposite;
590209ff23fSmrg		info->exa->PrepareComposite =
591209ff23fSmrg		    FUNC_NAME(R100PrepareComposite);
592209ff23fSmrg		info->exa->Composite = FUNC_NAME(RadeonComposite);
593209ff23fSmrg		info->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
594209ff23fSmrg	}
595209ff23fSmrg    }
596209ff23fSmrg#endif
597209ff23fSmrg
598209ff23fSmrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
599209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
600209ff23fSmrg
601209ff23fSmrg    info->exa->maxPitchBytes = 16320;
602209ff23fSmrg    info->exa->maxX = 8192;
603209ff23fSmrg#else
604209ff23fSmrg    info->exa->maxX = 16320 / 4;
605209ff23fSmrg#endif
606209ff23fSmrg    info->exa->maxY = 8192;
607209ff23fSmrg
608209ff23fSmrg    RADEONEngineInit(pScrn);
609209ff23fSmrg
610209ff23fSmrg    if (!exaDriverInit(pScreen, info->exa)) {
611209ff23fSmrg	xfree(info->exa);
612209ff23fSmrg	return FALSE;
613209ff23fSmrg    }
614209ff23fSmrg    exaMarkSync(pScreen);
615209ff23fSmrg
616209ff23fSmrg    return TRUE;
617209ff23fSmrg}
618209ff23fSmrg
619209ff23fSmrg#undef FUNC_NAME
620