radeon_exa_funcs.c revision a4f79855
1209ff23fSmrg/*
2209ff23fSmrg * Copyright 2005 Eric Anholt
3209ff23fSmrg * Copyright 2005 Benjamin Herrenschmidt
4209ff23fSmrg * Copyright 2006 Tungsten Graphics, Inc.
5209ff23fSmrg * All Rights Reserved.
6209ff23fSmrg *
7209ff23fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
8209ff23fSmrg * copy of this software and associated documentation files (the "Software"),
9209ff23fSmrg * to deal in the Software without restriction, including without limitation
10209ff23fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11209ff23fSmrg * and/or sell copies of the Software, and to permit persons to whom the
12209ff23fSmrg * Software is furnished to do so, subject to the following conditions:
13209ff23fSmrg *
14209ff23fSmrg * The above copyright notice and this permission notice (including the next
15209ff23fSmrg * paragraph) shall be included in all copies or substantial portions of the
16209ff23fSmrg * Software.
17209ff23fSmrg *
18209ff23fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19209ff23fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20209ff23fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21209ff23fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22209ff23fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23209ff23fSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24209ff23fSmrg * SOFTWARE.
25209ff23fSmrg *
26209ff23fSmrg * Authors:
27209ff23fSmrg *    Eric Anholt <anholt@FreeBSD.org>
28209ff23fSmrg *    Zack Rusin <zrusin@trolltech.com>
29209ff23fSmrg *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
30209ff23fSmrg *    Michel Dänzer <michel@tungstengraphics.com>
31209ff23fSmrg *
32209ff23fSmrg */
33209ff23fSmrg
34209ff23fSmrg#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
35209ff23fSmrg#error Cannot define both MMIO and CP acceleration!
36209ff23fSmrg#endif
37209ff23fSmrg
38209ff23fSmrg#if !defined(UNIXCPP) || defined(ANSICPP)
39209ff23fSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
40209ff23fSmrg#else
41209ff23fSmrg#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
42209ff23fSmrg#endif
43209ff23fSmrg
44209ff23fSmrg#ifdef ACCEL_MMIO
45209ff23fSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
46209ff23fSmrg#else
47209ff23fSmrg#ifdef ACCEL_CP
48209ff23fSmrg#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
49209ff23fSmrg#else
50209ff23fSmrg#error No accel type defined!
51209ff23fSmrg#endif
52209ff23fSmrg#endif
53209ff23fSmrg
54209ff23fSmrg#include <errno.h>
55209ff23fSmrg#include <string.h>
56209ff23fSmrg
57209ff23fSmrg#include "radeon.h"
58209ff23fSmrg
59209ff23fSmrg#include "exa.h"
60209ff23fSmrg
61209ff23fSmrgstatic int
62209ff23fSmrgFUNC_NAME(RADEONMarkSync)(ScreenPtr pScreen)
63209ff23fSmrg{
64209ff23fSmrg    RINFO_FROM_SCREEN(pScreen);
65209ff23fSmrg
66209ff23fSmrg    TRACE;
67209ff23fSmrg
68b7e1c893Smrg    return ++info->accel_state->exaSyncMarker;
69209ff23fSmrg}
70209ff23fSmrg
71209ff23fSmrgstatic void
72209ff23fSmrgFUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
73209ff23fSmrg{
74209ff23fSmrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
75209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
76209ff23fSmrg
77209ff23fSmrg    TRACE;
78209ff23fSmrg
79b7e1c893Smrg    if (info->accel_state->exaMarkerSynced != marker) {
80209ff23fSmrg	FUNC_NAME(RADEONWaitForIdle)(pScrn);
81b7e1c893Smrg	info->accel_state->exaMarkerSynced = marker;
82209ff23fSmrg    }
83209ff23fSmrg
84b7e1c893Smrg    RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
85209ff23fSmrg}
86209ff23fSmrg
87209ff23fSmrgstatic Bool
88209ff23fSmrgFUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
89209ff23fSmrg{
90209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
91209ff23fSmrg    uint32_t datatype, dst_pitch_offset;
92209ff23fSmrg    ACCEL_PREAMBLE();
93209ff23fSmrg
94209ff23fSmrg    TRACE;
95209ff23fSmrg
96209ff23fSmrg    if (pPix->drawable.bitsPerPixel == 24)
97209ff23fSmrg	RADEON_FALLBACK(("24bpp unsupported\n"));
98209ff23fSmrg    if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype))
99209ff23fSmrg	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
100209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset))
101209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
102209ff23fSmrg
103209ff23fSmrg    RADEON_SWITCH_TO_2D();
104209ff23fSmrg
105209ff23fSmrg    BEGIN_ACCEL(5);
106209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
107209ff23fSmrg	    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
108209ff23fSmrg	    RADEON_GMC_BRUSH_SOLID_COLOR |
109209ff23fSmrg	    (datatype << 8) |
110209ff23fSmrg	    RADEON_GMC_SRC_DATATYPE_COLOR |
111209ff23fSmrg	    RADEON_ROP[alu].pattern |
112209ff23fSmrg	    RADEON_GMC_CLR_CMP_CNTL_DIS);
113209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, fg);
114209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, pm);
115209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_CNTL,
116209ff23fSmrg	(RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM));
117209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
118209ff23fSmrg    FINISH_ACCEL();
119209ff23fSmrg
120209ff23fSmrg    return TRUE;
121209ff23fSmrg}
122209ff23fSmrg
123209ff23fSmrg
124209ff23fSmrgstatic void
125209ff23fSmrgFUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2)
126209ff23fSmrg{
127209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
128209ff23fSmrg    ACCEL_PREAMBLE();
129209ff23fSmrg
130209ff23fSmrg    TRACE;
131209ff23fSmrg
132b7e1c893Smrg    if (info->accel_state->vsync)
133b7e1c893Smrg	FUNC_NAME(RADEONWaitForVLine)(pScrn, pPix, RADEONBiggerCrtcArea(pPix), y1, y2);
134b7e1c893Smrg
135209ff23fSmrg    BEGIN_ACCEL(2);
136209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1);
137209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1));
138209ff23fSmrg    FINISH_ACCEL();
139209ff23fSmrg}
140209ff23fSmrg
141209ff23fSmrgstatic void
142209ff23fSmrgFUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix)
143209ff23fSmrg{
144209ff23fSmrg    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
145209ff23fSmrg    ACCEL_PREAMBLE();
146209ff23fSmrg
147209ff23fSmrg    TRACE;
148209ff23fSmrg
149209ff23fSmrg    BEGIN_ACCEL(2);
150209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
151209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
152209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
153209ff23fSmrg    FINISH_ACCEL();
154209ff23fSmrg}
155209ff23fSmrg
156209ff23fSmrgvoid
157209ff23fSmrgFUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
158209ff23fSmrg			       uint32_t dst_pitch_offset, uint32_t datatype, int rop,
159209ff23fSmrg			       Pixel planemask)
160209ff23fSmrg{
161209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
162209ff23fSmrg    ACCEL_PREAMBLE();
163209ff23fSmrg
164209ff23fSmrg    RADEON_SWITCH_TO_2D();
165209ff23fSmrg
166209ff23fSmrg    BEGIN_ACCEL(5);
167209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
168209ff23fSmrg	RADEON_GMC_DST_PITCH_OFFSET_CNTL |
169209ff23fSmrg	RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
170209ff23fSmrg	RADEON_GMC_BRUSH_NONE |
171209ff23fSmrg	(datatype << 8) |
172209ff23fSmrg	RADEON_GMC_SRC_DATATYPE_COLOR |
173209ff23fSmrg	RADEON_ROP[rop].rop |
174209ff23fSmrg	RADEON_DP_SRC_SOURCE_MEMORY |
175209ff23fSmrg	RADEON_GMC_CLR_CMP_CNTL_DIS);
176209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask);
177209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_CNTL,
178b7e1c893Smrg	((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
179b7e1c893Smrg	 (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)));
180209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
181209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
182209ff23fSmrg    FINISH_ACCEL();
183209ff23fSmrg}
184209ff23fSmrg
185209ff23fSmrgstatic Bool
186209ff23fSmrgFUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
187209ff23fSmrg			     int xdir, int ydir,
188209ff23fSmrg			     int rop,
189209ff23fSmrg			     Pixel planemask)
190209ff23fSmrg{
191209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
192209ff23fSmrg    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
193209ff23fSmrg
194209ff23fSmrg    TRACE;
195209ff23fSmrg
196b7e1c893Smrg    info->accel_state->xdir = xdir;
197b7e1c893Smrg    info->accel_state->ydir = ydir;
198209ff23fSmrg
199209ff23fSmrg    if (pDst->drawable.bitsPerPixel == 24)
200209ff23fSmrg	RADEON_FALLBACK(("24bpp unsupported"));
201209ff23fSmrg    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
202209ff23fSmrg	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
203209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
204209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
205209ff23fSmrg    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
206209ff23fSmrg	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
207209ff23fSmrg
208209ff23fSmrg    FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset,
209209ff23fSmrg				   datatype, rop, planemask);
210209ff23fSmrg
211209ff23fSmrg    return TRUE;
212209ff23fSmrg}
213209ff23fSmrg
214209ff23fSmrgvoid
215209ff23fSmrgFUNC_NAME(RADEONCopy)(PixmapPtr pDst,
216209ff23fSmrg		      int srcX, int srcY,
217209ff23fSmrg		      int dstX, int dstY,
218209ff23fSmrg		      int w, int h)
219209ff23fSmrg{
220209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
221209ff23fSmrg    ACCEL_PREAMBLE();
222209ff23fSmrg
223209ff23fSmrg    TRACE;
224209ff23fSmrg
225b7e1c893Smrg    if (info->accel_state->xdir < 0) {
226209ff23fSmrg	srcX += w - 1;
227209ff23fSmrg	dstX += w - 1;
228209ff23fSmrg    }
229b7e1c893Smrg    if (info->accel_state->ydir < 0) {
230209ff23fSmrg	srcY += h - 1;
231209ff23fSmrg	dstY += h - 1;
232209ff23fSmrg    }
233209ff23fSmrg
234b7e1c893Smrg    if (info->accel_state->vsync)
235b7e1c893Smrg	FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), dstY, dstY + h);
236b7e1c893Smrg
237209ff23fSmrg    BEGIN_ACCEL(3);
238209ff23fSmrg
239209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_Y_X,	   (srcY << 16) | srcX);
240209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X,	   (dstY << 16) | dstX);
241209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
242209ff23fSmrg
243209ff23fSmrg    FINISH_ACCEL();
244209ff23fSmrg}
245209ff23fSmrg
246209ff23fSmrgstatic void
247209ff23fSmrgFUNC_NAME(RADEONDoneCopy)(PixmapPtr pDst)
248209ff23fSmrg{
249209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
250209ff23fSmrg    ACCEL_PREAMBLE();
251209ff23fSmrg
252209ff23fSmrg    TRACE;
253209ff23fSmrg
254209ff23fSmrg    BEGIN_ACCEL(2);
255209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
256209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
257209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
258209ff23fSmrg    FINISH_ACCEL();
259209ff23fSmrg}
260209ff23fSmrg
261b7e1c893Smrg
262b7e1c893Smrg#ifdef ACCEL_CP
263b7e1c893Smrg
264209ff23fSmrgstatic Bool
265b7e1c893SmrgRADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h,
266b7e1c893Smrg		       char *src, int src_pitch)
267209ff23fSmrg{
268209ff23fSmrg    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
269209ff23fSmrg    unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
270209ff23fSmrg    unsigned int   hpass;
271209ff23fSmrg    uint32_t	   buf_pitch, dst_pitch_off;
272209ff23fSmrg
273209ff23fSmrg    TRACE;
274209ff23fSmrg
275209ff23fSmrg    if (bpp < 8)
276209ff23fSmrg	return FALSE;
277209ff23fSmrg
278209ff23fSmrg    if (info->directRenderingEnabled &&
279209ff23fSmrg	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_off)) {
280209ff23fSmrg	uint8_t *buf;
281209ff23fSmrg	int cpp = bpp / 8;
282209ff23fSmrg	ACCEL_PREAMBLE();
283209ff23fSmrg
284209ff23fSmrg	RADEON_SWITCH_TO_2D();
285b7e1c893Smrg
286b7e1c893Smrg	if (info->accel_state->vsync)
287b7e1c893Smrg	    FUNC_NAME(RADEONWaitForVLine)(pScrn, pDst, RADEONBiggerCrtcArea(pDst), y, y + h);
288b7e1c893Smrg
289209ff23fSmrg	while ((buf = RADEONHostDataBlit(pScrn,
290209ff23fSmrg					 cpp, w, dst_pitch_off, &buf_pitch,
291209ff23fSmrg					 x, &y, (unsigned int*)&h, &hpass)) != 0) {
292209ff23fSmrg	    RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (uint8_t *)src,
293209ff23fSmrg				       hpass, buf_pitch, src_pitch);
294209ff23fSmrg	    src += hpass * src_pitch;
295209ff23fSmrg	}
296209ff23fSmrg
297209ff23fSmrg	exaMarkSync(pDst->drawable.pScreen);
298209ff23fSmrg	return TRUE;
299209ff23fSmrg    }
300209ff23fSmrg
301b7e1c893Smrg    return FALSE;
302209ff23fSmrg}
303209ff23fSmrg
304209ff23fSmrg/* Emit blit with arbitrary source and destination offsets and pitches */
305209ff23fSmrgstatic void
306209ff23fSmrgRADEONBlitChunk(ScrnInfoPtr pScrn, uint32_t datatype, uint32_t src_pitch_offset,
307209ff23fSmrg		uint32_t dst_pitch_offset, int srcX, int srcY, int dstX, int dstY,
308209ff23fSmrg		int w, int h)
309209ff23fSmrg{
310209ff23fSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
311209ff23fSmrg    ACCEL_PREAMBLE();
312209ff23fSmrg
313209ff23fSmrg    BEGIN_ACCEL(6);
314209ff23fSmrg    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
315209ff23fSmrg		  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
316209ff23fSmrg		  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
317209ff23fSmrg		  RADEON_GMC_BRUSH_NONE |
318209ff23fSmrg		  (datatype << 8) |
319209ff23fSmrg		  RADEON_GMC_SRC_DATATYPE_COLOR |
320209ff23fSmrg		  RADEON_ROP3_S |
321209ff23fSmrg		  RADEON_DP_SRC_SOURCE_MEMORY |
322209ff23fSmrg		  RADEON_GMC_CLR_CMP_CNTL_DIS |
323209ff23fSmrg		  RADEON_GMC_WR_MSK_DIS);
324209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
325209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
326209ff23fSmrg    OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX);
327209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX);
328209ff23fSmrg    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
329209ff23fSmrg    FINISH_ACCEL();
330209ff23fSmrg    BEGIN_ACCEL(2);
331209ff23fSmrg    OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
332209ff23fSmrg    OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
333209ff23fSmrg                  RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE);
334209ff23fSmrg    FINISH_ACCEL();
335209ff23fSmrg}
336b7e1c893Smrg
337209ff23fSmrg
338209ff23fSmrgstatic Bool
339b7e1c893SmrgRADEONDownloadFromScreenCP(PixmapPtr pSrc, int x, int y, int w, int h,
340209ff23fSmrg				    char *dst, int dst_pitch)
341209ff23fSmrg{
342209ff23fSmrg    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
343209ff23fSmrg    uint8_t	  *src	     = info->FB + exaGetPixmapOffset(pSrc);
344209ff23fSmrg    int		   bpp	     = pSrc->drawable.bitsPerPixel;
345209ff23fSmrg    uint32_t datatype, src_pitch_offset, scratch_pitch = (w * bpp/8 + 63) & ~63, scratch_off = 0;
346209ff23fSmrg    drmBufPtr scratch;
347209ff23fSmrg
348209ff23fSmrg    TRACE;
349209ff23fSmrg
350209ff23fSmrg    /*
351209ff23fSmrg     * Try to accelerate download. Use an indirect buffer as scratch space,
352209ff23fSmrg     * blitting the bits to one half while copying them out of the other one and
353209ff23fSmrg     * then swapping the halves.
354209ff23fSmrg     */
355b7e1c893Smrg    if (bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) &&
356209ff23fSmrg	RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) &&
357209ff23fSmrg	(scratch = RADEONCPGetBuffer(pScrn)))
358209ff23fSmrg    {
359209ff23fSmrg	int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8;
360209ff23fSmrg	int hpass = min(h, scratch->total/2 / scratch_pitch);
361209ff23fSmrg	uint32_t scratch_pitch_offset = scratch_pitch << 16
362b7e1c893Smrg				    | (info->gartLocation + info->dri->bufStart
363209ff23fSmrg				       + scratch->idx * scratch->total) >> 10;
364b7e1c893Smrg	drm_radeon_indirect_t indirect;
365209ff23fSmrg	ACCEL_PREAMBLE();
366209ff23fSmrg
367209ff23fSmrg	RADEON_SWITCH_TO_2D();
368209ff23fSmrg
369209ff23fSmrg	/* Kick the first blit as early as possible */
370209ff23fSmrg	RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset,
371209ff23fSmrg			x, y, 0, 0, w, hpass);
372209ff23fSmrg	FLUSH_RING();
373209ff23fSmrg
374209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
375209ff23fSmrg	switch (bpp) {
376209ff23fSmrg	case 16:
377209ff23fSmrg	  swap = RADEON_HOST_DATA_SWAP_16BIT;
378209ff23fSmrg	  break;
379209ff23fSmrg	case 32:
380209ff23fSmrg	  swap = RADEON_HOST_DATA_SWAP_32BIT;
381209ff23fSmrg	  break;
382209ff23fSmrg	}
383209ff23fSmrg#endif
384209ff23fSmrg
385209ff23fSmrg	while (h) {
386209ff23fSmrg	    int oldhpass = hpass, i = 0;
387209ff23fSmrg
388209ff23fSmrg	    src = (uint8_t*)scratch->address + scratch_off;
389209ff23fSmrg
390209ff23fSmrg	    y += oldhpass;
391209ff23fSmrg	    h -= oldhpass;
392209ff23fSmrg	    hpass = min(h, scratch->total/2 / scratch_pitch);
393209ff23fSmrg
394209ff23fSmrg	    /* Prepare next blit if anything's left */
395209ff23fSmrg	    if (hpass) {
396209ff23fSmrg		scratch_off = scratch->total/2 - scratch_off;
397209ff23fSmrg		RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset + (scratch_off >> 10),
398209ff23fSmrg				x, y, 0, 0, w, hpass);
399209ff23fSmrg	    }
400209ff23fSmrg
401209ff23fSmrg	    /*
402209ff23fSmrg	     * Wait for previous blit to complete.
403209ff23fSmrg	     *
404209ff23fSmrg	     * XXX: Doing here essentially the same things this ioctl does in
405209ff23fSmrg	     * the DRM results in corruption with 'small' transfers, apparently
406209ff23fSmrg	     * because the data doesn't actually land in system RAM before the
407209ff23fSmrg	     * memcpy. I suspect the ioctl helps mostly due to its latency; what
408209ff23fSmrg	     * we'd really need is a way to reliably wait for the host interface
409209ff23fSmrg	     * to be done with pushing the data to the host.
410209ff23fSmrg	     */
411b7e1c893Smrg	    while ((drmCommandNone(info->dri->drmFD, DRM_RADEON_CP_IDLE) == -EBUSY)
412209ff23fSmrg		   && (i++ < RADEON_TIMEOUT))
413209ff23fSmrg		;
414209ff23fSmrg
415209ff23fSmrg	    /* Kick next blit */
416209ff23fSmrg	    if (hpass)
417209ff23fSmrg		FLUSH_RING();
418209ff23fSmrg
419209ff23fSmrg	    /* Copy out data from previous blit */
420209ff23fSmrg	    if (wpass == scratch_pitch && wpass == dst_pitch) {
421209ff23fSmrg		RADEONCopySwap((uint8_t*)dst, src, wpass * oldhpass, swap);
422209ff23fSmrg		dst += dst_pitch * oldhpass;
423209ff23fSmrg	    } else while (oldhpass--) {
424209ff23fSmrg		RADEONCopySwap((uint8_t*)dst, src, wpass, swap);
425209ff23fSmrg		src += scratch_pitch;
426209ff23fSmrg		dst += dst_pitch;
427209ff23fSmrg	    }
428209ff23fSmrg	}
429209ff23fSmrg
430209ff23fSmrg	indirect.idx = scratch->idx;
431209ff23fSmrg	indirect.start = indirect.end = 0;
432209ff23fSmrg	indirect.discard = 1;
433209ff23fSmrg
434b7e1c893Smrg	drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
435b7e1c893Smrg			    &indirect, sizeof(drm_radeon_indirect_t));
436209ff23fSmrg
437b7e1c893Smrg	info->accel_state->exaMarkerSynced = info->accel_state->exaSyncMarker;
438209ff23fSmrg
439209ff23fSmrg	return TRUE;
440209ff23fSmrg    }
441209ff23fSmrg
442b7e1c893Smrg    return FALSE;
443b7e1c893Smrg}
444209ff23fSmrg
445b7e1c893Smrg#endif	/* def ACCEL_CP */
446209ff23fSmrg
447209ff23fSmrg
448209ff23fSmrgBool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
449209ff23fSmrg{
450209ff23fSmrg    RINFO_FROM_SCREEN(pScreen);
451209ff23fSmrg
452b7e1c893Smrg    if (info->accel_state->exa == NULL) {
453209ff23fSmrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
454209ff23fSmrg	return FALSE;
455209ff23fSmrg    }
456209ff23fSmrg
457b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
458b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
459209ff23fSmrg
460b7e1c893Smrg    info->accel_state->exa->PrepareSolid = FUNC_NAME(RADEONPrepareSolid);
461b7e1c893Smrg    info->accel_state->exa->Solid = FUNC_NAME(RADEONSolid);
462b7e1c893Smrg    info->accel_state->exa->DoneSolid = FUNC_NAME(RADEONDoneSolid);
463209ff23fSmrg
464b7e1c893Smrg    info->accel_state->exa->PrepareCopy = FUNC_NAME(RADEONPrepareCopy);
465b7e1c893Smrg    info->accel_state->exa->Copy = FUNC_NAME(RADEONCopy);
466b7e1c893Smrg    info->accel_state->exa->DoneCopy = FUNC_NAME(RADEONDoneCopy);
467209ff23fSmrg
468b7e1c893Smrg    info->accel_state->exa->MarkSync = FUNC_NAME(RADEONMarkSync);
469b7e1c893Smrg    info->accel_state->exa->WaitMarker = FUNC_NAME(RADEONSync);
470b7e1c893Smrg#ifdef ACCEL_CP
471b7e1c893Smrg    info->accel_state->exa->UploadToScreen = RADEONUploadToScreenCP;
472b7e1c893Smrg    if (info->accelDFS)
473b7e1c893Smrg	info->accel_state->exa->DownloadFromScreen = RADEONDownloadFromScreenCP;
474b7e1c893Smrg#endif
475209ff23fSmrg
476209ff23fSmrg#if X_BYTE_ORDER == X_BIG_ENDIAN
477b7e1c893Smrg    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess;
478b7e1c893Smrg    info->accel_state->exa->FinishAccess = RADEONFinishAccess;
479209ff23fSmrg#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
480209ff23fSmrg
481b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
482b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
483b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
484b7e1c893Smrg#endif
485b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = RADEON_BUFFER_ALIGN + 1;
486b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 64;
487209ff23fSmrg
488209ff23fSmrg#ifdef RENDER
489209ff23fSmrg    if (info->RenderAccel) {
490209ff23fSmrg	if (info->ChipFamily >= CHIP_FAMILY_R600)
491209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
492209ff23fSmrg			       "unsupported on R600 and newer cards.\n");
493209ff23fSmrg	else if (IS_R300_3D || IS_R500_3D) {
494209ff23fSmrg	    if ((info->ChipFamily < CHIP_FAMILY_RS400)
495209ff23fSmrg#ifdef XF86DRI
496209ff23fSmrg		|| (info->directRenderingEnabled)
497209ff23fSmrg#endif
498209ff23fSmrg		) {
499209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
500209ff23fSmrg			       "enabled for R300/R400/R500 type cards.\n");
501b7e1c893Smrg		info->accel_state->exa->CheckComposite = R300CheckComposite;
502b7e1c893Smrg		info->accel_state->exa->PrepareComposite =
503209ff23fSmrg		    FUNC_NAME(R300PrepareComposite);
504b7e1c893Smrg		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
505b7e1c893Smrg		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
506209ff23fSmrg	    } else
507209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n");
508209ff23fSmrg	} else if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
509209ff23fSmrg		   (info->ChipFamily == CHIP_FAMILY_RV280) ||
510209ff23fSmrg		   (info->ChipFamily == CHIP_FAMILY_RS300) ||
511209ff23fSmrg		   (info->ChipFamily == CHIP_FAMILY_R200)) {
512209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
513209ff23fSmrg			       "enabled for R200 type cards.\n");
514b7e1c893Smrg		info->accel_state->exa->CheckComposite = R200CheckComposite;
515b7e1c893Smrg		info->accel_state->exa->PrepareComposite =
516209ff23fSmrg		    FUNC_NAME(R200PrepareComposite);
517b7e1c893Smrg		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
518b7e1c893Smrg		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
519209ff23fSmrg	} else {
520209ff23fSmrg		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
521209ff23fSmrg			       "enabled for R100 type cards.\n");
522b7e1c893Smrg		info->accel_state->exa->CheckComposite = R100CheckComposite;
523b7e1c893Smrg		info->accel_state->exa->PrepareComposite =
524209ff23fSmrg		    FUNC_NAME(R100PrepareComposite);
525b7e1c893Smrg		info->accel_state->exa->Composite = FUNC_NAME(RadeonComposite);
526b7e1c893Smrg		info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite);
527209ff23fSmrg	}
528209ff23fSmrg    }
529209ff23fSmrg#endif
530209ff23fSmrg
531209ff23fSmrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
532209ff23fSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
533209ff23fSmrg
534b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 16320;
535a4f79855Smrg    info->accel_state->exa->maxX = 8191;
536209ff23fSmrg#else
537b7e1c893Smrg    info->accel_state->exa->maxX = 16320 / 4;
538209ff23fSmrg#endif
539a4f79855Smrg    info->accel_state->exa->maxY = 8191;
540b7e1c893Smrg
541b7e1c893Smrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
542b7e1c893Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
543b7e1c893Smrg	info->accel_state->vsync = TRUE;
544b7e1c893Smrg    } else
545b7e1c893Smrg	info->accel_state->vsync = FALSE;
546209ff23fSmrg
547209ff23fSmrg    RADEONEngineInit(pScrn);
548209ff23fSmrg
549b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
550b7e1c893Smrg	xfree(info->accel_state->exa);
551209ff23fSmrg	return FALSE;
552209ff23fSmrg    }
553209ff23fSmrg    exaMarkSync(pScreen);
554209ff23fSmrg
555209ff23fSmrg    return TRUE;
556209ff23fSmrg}
557209ff23fSmrg
558209ff23fSmrg#undef FUNC_NAME
559